*** empty log message ***

This commit is contained in:
dockes 2008-08-26 07:36:41 +00:00
parent a4498cdca8
commit b40dac4162
5 changed files with 507 additions and 76 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: pyrecoll.cpp,v 1.5 2008-07-01 08:24:30 dockes Exp $ (C) 2007 J.F.Dockes";
static char rcsid[] = "@(#$Id: pyrecoll.cpp,v 1.6 2008-08-26 07:36:41 dockes Exp $ (C) 2007 J.F.Dockes";
#endif
#include <Python.h>
@ -35,6 +35,8 @@ PyObject *obj_Create(PyTypeObject *tp, PyObject *args, PyObject *kwargs)
return result;
}
//////////////////////////////////////////////////////
////// Python object definitions for Db, Query, and Doc
typedef struct {
PyObject_HEAD
/* Type-specific fields go here. */
@ -46,7 +48,7 @@ static PyTypeObject recollq_DbType = {
"recollq.Db", /*tp_name*/
sizeof(recollq_DbObject), /*tp_basicsize*/
0, /*tp_itemsize*/
0, /*tp_dealloc*/
0, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
@ -62,26 +64,27 @@ static PyTypeObject recollq_DbType = {
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
"Recollq Db objects", /* tp_doc */
"Recollq Db objects", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
0, /* tp_new */
};
typedef struct {
PyObject_HEAD
/* Type-specific fields go here. */
@ -111,24 +114,24 @@ static PyTypeObject recollq_QueryType = {
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
"Recollq Query objects", /* tp_doc */
"Recollq Query object", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
0, /* tp_new */
};
typedef struct {
PyObject_HEAD
@ -158,26 +161,28 @@ static PyTypeObject recollq_DocType = {
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
"Recollq Doc objects", /* tp_doc */
"Recollq Doc objects", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
0, /* tp_new */
};
///////////////////////////////////////////////
////// Db object code
static void
Db_dealloc(recollq_DbObject *self)
{
@ -206,12 +211,13 @@ static int
Db_init(recollq_DbObject *self, PyObject *args, PyObject *kwargs)
{
LOGDEB(("Db_init\n"));
static char *kwlist[] = {"confdir", "extra_dbs", NULL};
static char *kwlist[] = {"confdir", "extra_dbs", "writable", NULL};
PyObject *extradbs = 0;
char *confdir = 0;
int writable = 0;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|sO", kwlist,
&confdir, &extradbs))
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|sOi", kwlist,
&confdir, &extradbs, &writable))
return -1;
// If the user creates several dbs, changing the confdir, we call
@ -239,9 +245,10 @@ Db_init(recollq_DbObject *self, PyObject *args, PyObject *kwargs)
self->db = new Rcl::Db;
string dbdir = rclconfig->getDbDir();
LOGDEB(("Db_init: getdbdir ok: [%s]\n", dbdir.c_str()));
if (!self->db->open(dbdir, rclconfig->getStopfile(), Rcl::Db::DbRO)) {
if (!self->db->open(dbdir, rclconfig->getStopfile(), writable ?
Rcl::Db::DbUpd : Rcl::Db::DbRO)) {
LOGDEB(("Db_init: db open error\n"));
PyErr_SetString(PyExc_EnvironmentError, "Cant open index");
PyErr_SetString(PyExc_EnvironmentError, "Can't open index");
return -1;
}
@ -355,20 +362,85 @@ Db_makeDocAbstract(recollq_DbObject* self, PyObject *args, PyObject *)
"UTF-8", "replace");
}
static PyObject *
Db_needUpdate(recollq_DbObject* self, PyObject *args, PyObject *kwds)
{
char *udi = 0;
char *sig = 0;
LOGDEB(("Db_needUpdate\n"));
if (!PyArg_ParseTuple(args, "eses:Db_needUpdate",
"utf-8", &udi, "utf-8", &sig)) {
return 0;
}
if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
LOGERR(("Db_makeDocAbstract: db not found %p\n", self->db));
PyErr_SetString(PyExc_AttributeError, "db");
return 0;
}
bool result = self->db->needUpdate(udi, sig);
PyMem_Free(udi);
PyMem_Free(sig);
return Py_BuildValue("i", result);
}
static PyObject *
Db_addOrUpdate(recollq_DbObject* self, PyObject *args, PyObject *)
{
LOGDEB(("Db_addOrUpdate\n"));
char *udi = 0;
char *parent_udi = 0;
recollq_DocObject *pydoc;
if (!PyArg_ParseTuple(args, "esesO!:Db_makeDocAbstract",
"utf-8", &udi, "utf-8", &parent_udi,
&recollq_DocType, &pydoc)) {
return 0;
}
if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
LOGERR(("Db_addOrUpdate: db not found %p\n", self->db));
PyErr_SetString(PyExc_AttributeError, "db");
return 0;
}
if (pydoc->doc == 0 || the_docs.find(pydoc->doc) == the_docs.end()) {
LOGERR(("Db_addOrUpdate: doc not found %p\n", pydoc->doc));
PyErr_SetString(PyExc_AttributeError, "doc");
return 0;
}
if (!self->db->addOrUpdate(udi, parent_udi, *pydoc->doc)) {
LOGERR(("Db_addOrUpdate: rcldb error\n"));
PyErr_SetString(PyExc_AttributeError, "rcldb error");
PyMem_Free(udi);
PyMem_Free(parent_udi);
return 0;
}
PyMem_Free(udi);
PyMem_Free(parent_udi);
Py_RETURN_NONE;
}
static PyMethodDef Db_methods[] = {
{"query", (PyCFunction)Db_query, METH_NOARGS,
"Return a new, blank query for this index"
},
{"setAbstractParams", (PyCFunction)Db_setAbstractParams,
METH_VARARGS|METH_KEYWORDS,
"Set abstract build params: maxchars and contextwords"
"Set abstract build parameters: maxchars and contextwords"
},
{"makeDocAbstract", (PyCFunction)Db_makeDocAbstract, METH_VARARGS,
"Return a new, blank query for this index"
"Build keyword in context abstract for document and query"
},
{"needUpdate", (PyCFunction)Db_needUpdate, METH_VARARGS,
"Check index up to date"
},
{"addOrUpdate", (PyCFunction)Db_addOrUpdate, METH_VARARGS,
"Add or update document in index"
},
{NULL} /* Sentinel */
};
/////////////////////////////////////////////
/// Query object method
static void
Query_dealloc(recollq_QueryObject *self)
{
@ -394,6 +466,9 @@ Query_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
return (PyObject *)self;
}
// Query_init creates an unusable object. The only way to create a
// valid Query Object is through db_query(). (or we'd need to add a Db
// parameter to the Query object creation method)
static int
Query_init(recollq_QueryObject *self, PyObject *, PyObject *)
{
@ -411,9 +486,8 @@ static PyObject *
Query_execute(recollq_QueryObject* self, PyObject *args, PyObject *kwds)
{
char *utf8 = 0;
int len = 0;
LOGDEB(("Query_execute\n"));
if (!PyArg_ParseTuple(args, "es#:Query_execute", "utf-8", &utf8, &len)) {
if (!PyArg_ParseTuple(args, "es:Query_execute", "utf-8", &utf8)) {
return 0;
}
@ -425,6 +499,7 @@ Query_execute(recollq_QueryObject* self, PyObject *args, PyObject *kwds)
}
string reason;
Rcl::SearchData *sd = wasaStringToRcl(utf8, reason);
PyMem_Free(utf8);
if (!sd) {
PyErr_SetString(PyExc_ValueError, reason.c_str());
return 0;
@ -451,24 +526,22 @@ Query_fetchone(recollq_QueryObject* self, PyObject *, PyObject *)
PyErr_SetString(PyExc_AttributeError, "query: no results");
return 0;
}
Rcl::Doc *doc = new Rcl::Doc;
recollq_DocObject *result =
(recollq_DocObject *)obj_Create(&recollq_DocType, 0, 0);
if (!result) {
LOGERR(("Query_fetchone: couldn't create doc object for result\n"));
return 0;
}
int percent;
if (!self->query->getDoc(self->next, *doc, &percent)) {
if (!self->query->getDoc(self->next, *result->doc, &percent)) {
PyErr_SetString(PyExc_EnvironmentError, "query: cant fetch result");
self->next = -1;
return 0;
}
self->next++;
recollq_DocObject *result =
(recollq_DocObject *)obj_Create(&recollq_DocType, 0, 0);
if (!result) {
delete doc;
return 0;
}
result->doc = doc;
the_docs.insert(result->doc);
// Move some data from the dedicated fields to the meta array to make
// fetching attributes easier
Rcl::Doc *doc = result->doc;
printableUrl(rclconfig->getDefCharset(), doc->url, doc->meta["url"]);
doc->meta["mimetype"] = doc->mimetype;
doc->meta["mtime"] = doc->dmtime.empty() ? doc->fmtime : doc->dmtime;
@ -502,7 +575,8 @@ static PyMemberDef Query_members[] = {
{NULL} /* Sentinel */
};
///////////////////////////////////////////////////////////////////////
///// Doc object methods
static void
Doc_dealloc(recollq_DocObject *self)
{
@ -534,14 +608,21 @@ Doc_init(recollq_DocObject *self, PyObject *, PyObject *)
if (self->doc)
the_docs.erase(self->doc);
delete self->doc;
self->doc = 0;
self->doc = new Rcl::Doc;
if (self->doc == 0)
return -1;
the_docs.insert(self->doc);
return 0;
}
// The "closure" thing is actually the meta field name. This is how
// python allows one set of get/set functions to get/set different
// attributes (pass them an additional parameters as from the
// getseters table and call it a "closure"
static PyObject *
Doc_getmeta(recollq_DocObject *self, void *closure)
{
LOGDEB(("Doc_getmeta\n"));
LOGDEB(("Doc_getmeta: [%s]\n", (const char *)closure));
if (self->doc == 0 ||
the_docs.find(self->doc) == the_docs.end()) {
PyErr_SetString(PyExc_AttributeError, "doc");
@ -568,32 +649,109 @@ Doc_getmeta(recollq_DocObject *self, void *closure)
static int
Doc_setmeta(recollq_DocObject *self, PyObject *value, void *closure)
{
PyErr_SetString(PyExc_RuntimeError, "Cannot set attributes for now");
return -1;
if (self->doc == 0 ||
the_docs.find(self->doc) == the_docs.end()) {
PyErr_SetString(PyExc_AttributeError, "doc??");
return -1;
}
LOGDEB2(("Doc_setmeta: doc %p\n", self->doc));
if (PyString_Check(value)) {
value = PyUnicode_FromObject(value);
if (value == 0)
return -1;
}
if (!PyUnicode_Check(value)) {
PyErr_SetString(PyExc_AttributeError, "value not str/unicode??");
return -1;
}
PyObject* putf8 = PyUnicode_AsUTF8String(value);
if (putf8 == 0) {
LOGERR(("Doc_setmeta: encoding to utf8 failed\n"));
PyErr_SetString(PyExc_AttributeError, "value??");
return -1;
}
char* uvalue = PyString_AsString(putf8);
const char *key = (const char *)closure;
if (key == 0) {
PyErr_SetString(PyExc_AttributeError, "key??");
return -1;
}
LOGDEB(("Doc_setmeta: setting [%s] to [%s]\n", key, uvalue));
self->doc->meta[key] = uvalue;
switch (key[0]) {
case 'd':
if (!strcmp(key, "dbytes")) {
self->doc->dbytes = uvalue;
}
break;
case 'f':
if (!strcmp(key, "fbytes")) {
self->doc->fbytes = uvalue;
}
break;
case 'i':
if (!strcmp(key, "ipath")) {
self->doc->ipath = uvalue;
}
break;
case 'm':
if (!strcmp(key, "mimetype")) {
self->doc->mimetype = uvalue;
} else if (!strcmp(key, "mtime")) {
self->doc->dmtime = uvalue;
}
break;
case 's':
if (!strcmp(key, "sig")) {
self->doc->sig = uvalue;
}
break;
case 't':
if (!strcmp(key, "text")) {
self->doc->text = uvalue;
}
break;
case 'u':
if (!strcmp(key, "url")) {
self->doc->url = uvalue;
}
break;
}
return 0;
}
static PyGetSetDef Doc_getseters[] = {
// Name, get, set, doc, closure
{"title", (getter)Doc_getmeta, (setter)Doc_setmeta,
"title", (void *)"title"},
{"keywords", (getter)Doc_getmeta, (setter)Doc_setmeta,
"keywords", (void *)"keywords"},
{"abstract", (getter)Doc_getmeta, (setter)Doc_setmeta,
"abstract", (void *)"abstract"},
{"url", (getter)Doc_getmeta, (setter)Doc_setmeta,
"url", (void *)"url"},
{"ipath", (getter)Doc_getmeta, (setter)Doc_setmeta,
"ipath", (void *)"ipath"},
{"mimetype", (getter)Doc_getmeta, (setter)Doc_setmeta,
"mimetype", (void *)"mimetype"},
{"mtime", (getter)Doc_getmeta, (setter)Doc_setmeta,
"mtime", (void *)"mtime"},
{"ipath", (getter)Doc_getmeta, (setter)Doc_setmeta,
"ipath", (void *)"ipath"},
{"fbytes", (getter)Doc_getmeta, (setter)Doc_setmeta,
"fbytes", (void *)"fbytes"},
{"dbytes", (getter)Doc_getmeta, (setter)Doc_setmeta,
"dbytes", (void *)"dbytes"},
{"relevance", (getter)Doc_getmeta, (setter)Doc_setmeta,
"relevance", (void *)"relevance"},
{"title", (getter)Doc_getmeta, (setter)Doc_setmeta,
"title", (void *)"title"},
{"keywords", (getter)Doc_getmeta, (setter)Doc_setmeta,
"keywords", (void *)"keywords"},
{"abstract", (getter)Doc_getmeta, (setter)Doc_setmeta,
"abstract", (void *)"abstract"},
{"author", (getter)Doc_getmeta, (setter)Doc_setmeta,
"author", (void *)"author"},
{"text", (getter)Doc_getmeta, (setter)Doc_setmeta,
"text", (void *)"text"},
{"sig", (getter)Doc_getmeta, (setter)Doc_setmeta,
"sig", (void *)"sig"},
{NULL} /* Sentinel */
};

View File

@ -1,4 +1,5 @@
from distutils.core import setup, Extension
top = '../../'
module1 = Extension('recollq',
define_macros = [('MAJOR_VERSION', '1'),
@ -9,37 +10,37 @@ module1 = Extension('recollq',
'"/usr/local/share/recoll"')
],
include_dirs = ['/usr/local/include',
'../utils',
'../common',
'../rcldb',
'../query',
'../unac'
top + 'utils',
top + 'common',
top + 'rcldb',
top + 'query',
top + 'unac'
],
libraries = ['xapian', 'iconv'],
library_dirs = ['/usr/local/lib'],
sources = ['recoll_query.cpp',
'../common/rclconfig.cpp',
'../common/rclinit.cpp',
'../common/textsplit.cpp',
'../common/unacpp.cpp',
'../query/wasastringtoquery.cpp',
'../query/wasatorcl.cpp',
'../rcldb/pathhash.cpp',
'../rcldb/rcldb.cpp',
'../rcldb/rclquery.cpp',
'../rcldb/searchdata.cpp',
'../rcldb/stemdb.cpp',
'../rcldb/stoplist.cpp',
'../unac/unac.c',
'../utils/base64.cpp',
'../utils/conftree.cpp',
'../utils/debuglog.cpp',
'../utils/md5.cpp',
'../utils/pathut.cpp',
'../utils/readfile.cpp',
'../utils/smallut.cpp',
'../utils/transcode.cpp',
'../utils/wipedir.cpp'
top + 'common/rclconfig.cpp',
top + 'common/rclinit.cpp',
top + 'common/textsplit.cpp',
top + 'common/unacpp.cpp',
top + 'query/wasastringtoquery.cpp',
top + 'query/wasatorcl.cpp',
top + 'rcldb/pathhash.cpp',
top + 'rcldb/rcldb.cpp',
top + 'rcldb/rclquery.cpp',
top + 'rcldb/searchdata.cpp',
top + 'rcldb/stemdb.cpp',
top + 'rcldb/stoplist.cpp',
top + 'unac/unac.c',
top + 'utils/base64.cpp',
top + 'utils/conftree.cpp',
top + 'utils/debuglog.cpp',
top + 'utils/md5.cpp',
top + 'utils/pathut.cpp',
top + 'utils/readfile.cpp',
top + 'utils/smallut.cpp',
top + 'utils/transcode.cpp',
top + 'utils/wipedir.cpp'
])

114
src/python/samples/rcldlkp.py Executable file
View File

@ -0,0 +1,114 @@
#!/usr/bin/env python
__doc__ = """
''Lookup'' notes file indexing
The file format has text notes separated by lines with a single '%' character
If the script is called with just the file name as an argument, it will
(re)index the contents.
If the script is called with second numeric argument, it will retrieve the
specified record and output it in html
"""
import os
import stat
import sys
import re
rclconf = "/Users/dockes/.recoll-test"
def udi(docfile, numrec):
return docfile + "#" + str(numrec)
###############################################################
def index_rec(db, numrec, rec):
doc = recollq.Doc()
# url
doc.url = "file://" + docfile
# utf8fn
# ipath
doc.ipath = str(numrec)
# mimetype
doc.mimetype = "text/plain"
# mtime
# origcharset
# title
lines = rec.split("\n")
if len(lines) >= 2:
doc.title = unicode(lines[1], "iso-8859-1")
if len(doc.title.strip()) == 0 and len(lines) >= 3:
doc.title = unicode(lines[2], "iso-8859-1")
# keywords
# abstract
# author
# fbytes
doc.fbytes = str(fbytes)
# text
doc.text = unicode(rec, "iso-8859-1")
# dbytes
doc.dbytes = str(len(rec))
# sig
if numrec == 0:
doc.sig = str(fmtime)
db.addOrUpdate(udi(docfile, numrec), u"", doc)
def output_rec(rec):
# Escape html
rec = unicode(rec, "iso-8859-1").encode("utf-8")
rec = rec.replace("<", "&lt;");
rec = rec.replace("&", "&amp;");
rec = rec.replace('"', "&dquot;");
print '<html><head>'
print '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
print '</head><body><pre>'
print rec
print '</pre></body></html>'
################################################################
def usage():
sys.stderr.write("Usage: doclookup.py <filename> [<recnum>]\n")
exit(1)
if len(sys.argv) < 2:
usage()
docfile = sys.argv[1]
if len(sys.argv) > 2:
targetnum = int(sys.argv[2])
else:
targetnum = None
#print docfile, targetnum
stdata = os.stat(docfile)
fmtime = stdata[stat.ST_MTIME]
fbytes = stdata[stat.ST_SIZE]
f = open(docfile, 'r')
if targetnum == None:
import recollq
db = recollq.connect(confdir=rclconf, writable=1)
if not db.needUpdate(udi(docfile, 0), str(fmtime)):
exit(0)
rec = ""
numrec = 1
for line in f:
if re.compile("^%[ \t]*").match(line):
if targetnum == None:
index_rec(db, numrec, rec)
elif targetnum == numrec:
output_rec(rec)
exit(0)
numrec += 1
rec = ""
else:
rec += line
if targetnum == None:
index_rec(db, 0, "")

View File

@ -0,0 +1,109 @@
#!/usr/bin/env python
import mailbox
import email.header
import email.utils
#import sys
import recollq
import os
import stat
#mbfile = "/Users/dockes/projets/fulltext/testrecoll/mail/fred"
mbfile = "/Users/dockes/mail/outmail"
rclconf = "/Users/dockes/.recoll-test"
def header_value(msg, nm, to_utf = False):
value = msg.get(nm)
if value == None:
return ""
value = value.replace("\n", "")
value = value.replace("\r", "")
#print value
parts = email.header.decode_header(value)
#print parts
univalue = u""
for part in parts:
if part[1] != None:
univalue += unicode(part[0], part[1]) + " "
else:
univalue += part[0] + " "
if to_utf:
return univalue.encode('utf-8')
else:
return univalue
class mbox_indexer:
def __init__(self, mbfile):
self.mbfile = mbfile
stdata = os.stat(mbfile)
self.fmtime = stdata[stat.ST_MTIME]
self.fbytes = stdata[stat.ST_SIZE]
self.msgnum = 1
def sig(self):
return str(self.fmtime) + ":" + str(self.fbytes)
def udi(self, msgnum):
return self.mbfile + ":" + str(msgnum)
def index(self, db):
if not db.needUpdate(self.udi(1), self.sig()):
return None
mb = mailbox.mbox(self.mbfile)
for msg in mb.values():
self.index_message(db, msg)
self.msgnum += 1
def index_message(self, db, msg):
doc = recollq.Doc()
doc.author = header_value(msg, "From")
# url
doc.url = "file://" + self.mbfile
# utf8fn
# ipath
doc.ipath = str(self.msgnum)
# mimetype
doc.mimetype = "message/rfc822"
# mtime
dte = header_value(msg, "Date")
tm = email.utils.parsedate_tz(dte)
if tm == None:
doc.mtime = str(self.fmtime)
else:
doc.mtime = str(email.utils.mktime_tz(tm))
# origcharset
# title
doc.title = header_value(msg, "Subject")
# keywords
# abstract
# author
# fbytes
doc.fbytes = str(self.fbytes)
# text
text = u""
text += u"From: " + header_value(msg, "From") + u"\n"
text += u"To: " + header_value(msg, "To") + u"\n"
text += u"Subject: " + header_value(msg, "Subject") + u"\n"
#text += u"Message-ID: " + header_value(msg, "Message-ID") + u"\n"
text += u"\n"
for part in msg.walk():
if part.is_multipart():
pass #print "Multipart: " + part.get_content_type()
else:
ct = part.get_content_type()
#print "Simple: " + ct
if ct.lower() == "text/plain":
charset = part.get_content_charset("iso-8859-1")
text += unicode(part.get_payload(None, True), charset)
doc.text = text
# dbytes
doc.dbytes = str(len(text))
# sig
doc.sig = self.sig()
udi = self.udi(self.msgnum)
db.addOrUpdate(udi, u"", doc)
db = recollq.connect(confdir=rclconf, writable=1)
mbidx = mbox_indexer(mbfile)
mbidx.index(db)

49
src/python/samples/recollq.py Executable file
View File

@ -0,0 +1,49 @@
#!/usr/bin/env python
import sys
import recollq
allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
"ipath", "fbytes", "dbytes", "relevance")
def dotest(db, q):
query = db.query()
#query1 = db.query()
nres = query.execute(q)
print "Result count: ", nres
if nres > 10:
nres = 10
while query.next >= 0 and query.next < nres:
doc = query.fetchone()
print query.next
for k in ("title",):
print k, ":", getattr(doc, k).encode('utf-8')
abs = db.makeDocAbstract(doc, query).encode('utf-8')
print abs
print
# End dotest
q = "essaouira"
print "TESTING WITH .recoll"
db = recollq.connect()
db.setAbstractParams(maxchars=80, contextwords=2)
dotest(db, q)
sys.exit(0)
print "TESTING WITH .recoll-test"
db = recollq.connect(confdir="/Users/dockes/.recoll-test")
dotest(db, q)
print "TESTING WITH .recoll-doc"
db = recollq.connect(confdir="/y/home/dockes/.recoll-doc")
dotest(db, q)
print "TESTING WITH .recoll and .recoll-doc"
db = recollq.connect(confdir="/Users/dockes/.recoll",
extra_dbs=("/y/home/dockes/.recoll-doc",))
dotest(db, q)