*** empty log message ***
This commit is contained in:
parent
a4498cdca8
commit
b40dac4162
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: pyrecoll.cpp,v 1.5 2008-07-01 08:24:30 dockes Exp $ (C) 2007 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: pyrecoll.cpp,v 1.6 2008-08-26 07:36:41 dockes Exp $ (C) 2007 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <Python.h>
|
||||
@ -35,6 +35,8 @@ PyObject *obj_Create(PyTypeObject *tp, PyObject *args, PyObject *kwargs)
|
||||
return result;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
////// Python object definitions for Db, Query, and Doc
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
/* Type-specific fields go here. */
|
||||
@ -46,7 +48,7 @@ static PyTypeObject recollq_DbType = {
|
||||
"recollq.Db", /*tp_name*/
|
||||
sizeof(recollq_DbObject), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
0, /*tp_dealloc*/
|
||||
0, /*tp_dealloc*/
|
||||
0, /*tp_print*/
|
||||
0, /*tp_getattr*/
|
||||
0, /*tp_setattr*/
|
||||
@ -62,26 +64,27 @@ static PyTypeObject recollq_DbType = {
|
||||
0, /*tp_setattro*/
|
||||
0, /*tp_as_buffer*/
|
||||
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
||||
"Recollq Db objects", /* tp_doc */
|
||||
"Recollq Db objects", /* tp_doc */
|
||||
0, /* tp_traverse */
|
||||
0, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
0, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
0, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
0, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
0, /* tp_init */
|
||||
0, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
0, /* tp_new */
|
||||
0, /* tp_new */
|
||||
};
|
||||
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
/* Type-specific fields go here. */
|
||||
@ -111,24 +114,24 @@ static PyTypeObject recollq_QueryType = {
|
||||
0, /*tp_setattro*/
|
||||
0, /*tp_as_buffer*/
|
||||
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
||||
"Recollq Query objects", /* tp_doc */
|
||||
"Recollq Query object", /* tp_doc */
|
||||
0, /* tp_traverse */
|
||||
0, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
0, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
0, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
0, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
0, /* tp_init */
|
||||
0, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
0, /* tp_new */
|
||||
0, /* tp_new */
|
||||
};
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
@ -158,26 +161,28 @@ static PyTypeObject recollq_DocType = {
|
||||
0, /*tp_setattro*/
|
||||
0, /*tp_as_buffer*/
|
||||
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
||||
"Recollq Doc objects", /* tp_doc */
|
||||
"Recollq Doc objects", /* tp_doc */
|
||||
0, /* tp_traverse */
|
||||
0, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
0, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
0, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
0, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
0, /* tp_init */
|
||||
0, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
0, /* tp_new */
|
||||
0, /* tp_new */
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////
|
||||
////// Db object code
|
||||
static void
|
||||
Db_dealloc(recollq_DbObject *self)
|
||||
{
|
||||
@ -206,12 +211,13 @@ static int
|
||||
Db_init(recollq_DbObject *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
LOGDEB(("Db_init\n"));
|
||||
static char *kwlist[] = {"confdir", "extra_dbs", NULL};
|
||||
static char *kwlist[] = {"confdir", "extra_dbs", "writable", NULL};
|
||||
PyObject *extradbs = 0;
|
||||
char *confdir = 0;
|
||||
int writable = 0;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|sO", kwlist,
|
||||
&confdir, &extradbs))
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|sOi", kwlist,
|
||||
&confdir, &extradbs, &writable))
|
||||
return -1;
|
||||
|
||||
// If the user creates several dbs, changing the confdir, we call
|
||||
@ -239,9 +245,10 @@ Db_init(recollq_DbObject *self, PyObject *args, PyObject *kwargs)
|
||||
self->db = new Rcl::Db;
|
||||
string dbdir = rclconfig->getDbDir();
|
||||
LOGDEB(("Db_init: getdbdir ok: [%s]\n", dbdir.c_str()));
|
||||
if (!self->db->open(dbdir, rclconfig->getStopfile(), Rcl::Db::DbRO)) {
|
||||
if (!self->db->open(dbdir, rclconfig->getStopfile(), writable ?
|
||||
Rcl::Db::DbUpd : Rcl::Db::DbRO)) {
|
||||
LOGDEB(("Db_init: db open error\n"));
|
||||
PyErr_SetString(PyExc_EnvironmentError, "Cant open index");
|
||||
PyErr_SetString(PyExc_EnvironmentError, "Can't open index");
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -355,20 +362,85 @@ Db_makeDocAbstract(recollq_DbObject* self, PyObject *args, PyObject *)
|
||||
"UTF-8", "replace");
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
Db_needUpdate(recollq_DbObject* self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
char *udi = 0;
|
||||
char *sig = 0;
|
||||
LOGDEB(("Db_needUpdate\n"));
|
||||
if (!PyArg_ParseTuple(args, "eses:Db_needUpdate",
|
||||
"utf-8", &udi, "utf-8", &sig)) {
|
||||
return 0;
|
||||
}
|
||||
if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
|
||||
LOGERR(("Db_makeDocAbstract: db not found %p\n", self->db));
|
||||
PyErr_SetString(PyExc_AttributeError, "db");
|
||||
return 0;
|
||||
}
|
||||
bool result = self->db->needUpdate(udi, sig);
|
||||
PyMem_Free(udi);
|
||||
PyMem_Free(sig);
|
||||
return Py_BuildValue("i", result);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
Db_addOrUpdate(recollq_DbObject* self, PyObject *args, PyObject *)
|
||||
{
|
||||
LOGDEB(("Db_addOrUpdate\n"));
|
||||
char *udi = 0;
|
||||
char *parent_udi = 0;
|
||||
|
||||
recollq_DocObject *pydoc;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "esesO!:Db_makeDocAbstract",
|
||||
"utf-8", &udi, "utf-8", &parent_udi,
|
||||
&recollq_DocType, &pydoc)) {
|
||||
return 0;
|
||||
}
|
||||
if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
|
||||
LOGERR(("Db_addOrUpdate: db not found %p\n", self->db));
|
||||
PyErr_SetString(PyExc_AttributeError, "db");
|
||||
return 0;
|
||||
}
|
||||
if (pydoc->doc == 0 || the_docs.find(pydoc->doc) == the_docs.end()) {
|
||||
LOGERR(("Db_addOrUpdate: doc not found %p\n", pydoc->doc));
|
||||
PyErr_SetString(PyExc_AttributeError, "doc");
|
||||
return 0;
|
||||
}
|
||||
if (!self->db->addOrUpdate(udi, parent_udi, *pydoc->doc)) {
|
||||
LOGERR(("Db_addOrUpdate: rcldb error\n"));
|
||||
PyErr_SetString(PyExc_AttributeError, "rcldb error");
|
||||
PyMem_Free(udi);
|
||||
PyMem_Free(parent_udi);
|
||||
return 0;
|
||||
}
|
||||
PyMem_Free(udi);
|
||||
PyMem_Free(parent_udi);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyMethodDef Db_methods[] = {
|
||||
{"query", (PyCFunction)Db_query, METH_NOARGS,
|
||||
"Return a new, blank query for this index"
|
||||
},
|
||||
{"setAbstractParams", (PyCFunction)Db_setAbstractParams,
|
||||
METH_VARARGS|METH_KEYWORDS,
|
||||
"Set abstract build params: maxchars and contextwords"
|
||||
"Set abstract build parameters: maxchars and contextwords"
|
||||
},
|
||||
{"makeDocAbstract", (PyCFunction)Db_makeDocAbstract, METH_VARARGS,
|
||||
"Return a new, blank query for this index"
|
||||
"Build keyword in context abstract for document and query"
|
||||
},
|
||||
{"needUpdate", (PyCFunction)Db_needUpdate, METH_VARARGS,
|
||||
"Check index up to date"
|
||||
},
|
||||
{"addOrUpdate", (PyCFunction)Db_addOrUpdate, METH_VARARGS,
|
||||
"Add or update document in index"
|
||||
},
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////
|
||||
/// Query object method
|
||||
static void
|
||||
Query_dealloc(recollq_QueryObject *self)
|
||||
{
|
||||
@ -394,6 +466,9 @@ Query_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
|
||||
return (PyObject *)self;
|
||||
}
|
||||
|
||||
// Query_init creates an unusable object. The only way to create a
|
||||
// valid Query Object is through db_query(). (or we'd need to add a Db
|
||||
// parameter to the Query object creation method)
|
||||
static int
|
||||
Query_init(recollq_QueryObject *self, PyObject *, PyObject *)
|
||||
{
|
||||
@ -411,9 +486,8 @@ static PyObject *
|
||||
Query_execute(recollq_QueryObject* self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
char *utf8 = 0;
|
||||
int len = 0;
|
||||
LOGDEB(("Query_execute\n"));
|
||||
if (!PyArg_ParseTuple(args, "es#:Query_execute", "utf-8", &utf8, &len)) {
|
||||
if (!PyArg_ParseTuple(args, "es:Query_execute", "utf-8", &utf8)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -425,6 +499,7 @@ Query_execute(recollq_QueryObject* self, PyObject *args, PyObject *kwds)
|
||||
}
|
||||
string reason;
|
||||
Rcl::SearchData *sd = wasaStringToRcl(utf8, reason);
|
||||
PyMem_Free(utf8);
|
||||
if (!sd) {
|
||||
PyErr_SetString(PyExc_ValueError, reason.c_str());
|
||||
return 0;
|
||||
@ -451,24 +526,22 @@ Query_fetchone(recollq_QueryObject* self, PyObject *, PyObject *)
|
||||
PyErr_SetString(PyExc_AttributeError, "query: no results");
|
||||
return 0;
|
||||
}
|
||||
Rcl::Doc *doc = new Rcl::Doc;
|
||||
recollq_DocObject *result =
|
||||
(recollq_DocObject *)obj_Create(&recollq_DocType, 0, 0);
|
||||
if (!result) {
|
||||
LOGERR(("Query_fetchone: couldn't create doc object for result\n"));
|
||||
return 0;
|
||||
}
|
||||
int percent;
|
||||
if (!self->query->getDoc(self->next, *doc, &percent)) {
|
||||
if (!self->query->getDoc(self->next, *result->doc, &percent)) {
|
||||
PyErr_SetString(PyExc_EnvironmentError, "query: cant fetch result");
|
||||
self->next = -1;
|
||||
return 0;
|
||||
}
|
||||
self->next++;
|
||||
recollq_DocObject *result =
|
||||
(recollq_DocObject *)obj_Create(&recollq_DocType, 0, 0);
|
||||
if (!result) {
|
||||
delete doc;
|
||||
return 0;
|
||||
}
|
||||
result->doc = doc;
|
||||
the_docs.insert(result->doc);
|
||||
// Move some data from the dedicated fields to the meta array to make
|
||||
// fetching attributes easier
|
||||
Rcl::Doc *doc = result->doc;
|
||||
printableUrl(rclconfig->getDefCharset(), doc->url, doc->meta["url"]);
|
||||
doc->meta["mimetype"] = doc->mimetype;
|
||||
doc->meta["mtime"] = doc->dmtime.empty() ? doc->fmtime : doc->dmtime;
|
||||
@ -502,7 +575,8 @@ static PyMemberDef Query_members[] = {
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
///// Doc object methods
|
||||
static void
|
||||
Doc_dealloc(recollq_DocObject *self)
|
||||
{
|
||||
@ -534,14 +608,21 @@ Doc_init(recollq_DocObject *self, PyObject *, PyObject *)
|
||||
if (self->doc)
|
||||
the_docs.erase(self->doc);
|
||||
delete self->doc;
|
||||
self->doc = 0;
|
||||
self->doc = new Rcl::Doc;
|
||||
if (self->doc == 0)
|
||||
return -1;
|
||||
the_docs.insert(self->doc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// The "closure" thing is actually the meta field name. This is how
|
||||
// python allows one set of get/set functions to get/set different
|
||||
// attributes (pass them an additional parameters as from the
|
||||
// getseters table and call it a "closure"
|
||||
static PyObject *
|
||||
Doc_getmeta(recollq_DocObject *self, void *closure)
|
||||
{
|
||||
LOGDEB(("Doc_getmeta\n"));
|
||||
LOGDEB(("Doc_getmeta: [%s]\n", (const char *)closure));
|
||||
if (self->doc == 0 ||
|
||||
the_docs.find(self->doc) == the_docs.end()) {
|
||||
PyErr_SetString(PyExc_AttributeError, "doc");
|
||||
@ -568,32 +649,109 @@ Doc_getmeta(recollq_DocObject *self, void *closure)
|
||||
static int
|
||||
Doc_setmeta(recollq_DocObject *self, PyObject *value, void *closure)
|
||||
{
|
||||
PyErr_SetString(PyExc_RuntimeError, "Cannot set attributes for now");
|
||||
return -1;
|
||||
if (self->doc == 0 ||
|
||||
the_docs.find(self->doc) == the_docs.end()) {
|
||||
PyErr_SetString(PyExc_AttributeError, "doc??");
|
||||
return -1;
|
||||
}
|
||||
LOGDEB2(("Doc_setmeta: doc %p\n", self->doc));
|
||||
if (PyString_Check(value)) {
|
||||
value = PyUnicode_FromObject(value);
|
||||
if (value == 0)
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!PyUnicode_Check(value)) {
|
||||
PyErr_SetString(PyExc_AttributeError, "value not str/unicode??");
|
||||
return -1;
|
||||
}
|
||||
|
||||
PyObject* putf8 = PyUnicode_AsUTF8String(value);
|
||||
if (putf8 == 0) {
|
||||
LOGERR(("Doc_setmeta: encoding to utf8 failed\n"));
|
||||
PyErr_SetString(PyExc_AttributeError, "value??");
|
||||
return -1;
|
||||
}
|
||||
|
||||
char* uvalue = PyString_AsString(putf8);
|
||||
const char *key = (const char *)closure;
|
||||
if (key == 0) {
|
||||
PyErr_SetString(PyExc_AttributeError, "key??");
|
||||
return -1;
|
||||
}
|
||||
|
||||
LOGDEB(("Doc_setmeta: setting [%s] to [%s]\n", key, uvalue));
|
||||
self->doc->meta[key] = uvalue;
|
||||
switch (key[0]) {
|
||||
case 'd':
|
||||
if (!strcmp(key, "dbytes")) {
|
||||
self->doc->dbytes = uvalue;
|
||||
}
|
||||
break;
|
||||
case 'f':
|
||||
if (!strcmp(key, "fbytes")) {
|
||||
self->doc->fbytes = uvalue;
|
||||
}
|
||||
break;
|
||||
case 'i':
|
||||
if (!strcmp(key, "ipath")) {
|
||||
self->doc->ipath = uvalue;
|
||||
}
|
||||
break;
|
||||
case 'm':
|
||||
if (!strcmp(key, "mimetype")) {
|
||||
self->doc->mimetype = uvalue;
|
||||
} else if (!strcmp(key, "mtime")) {
|
||||
self->doc->dmtime = uvalue;
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
if (!strcmp(key, "sig")) {
|
||||
self->doc->sig = uvalue;
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
if (!strcmp(key, "text")) {
|
||||
self->doc->text = uvalue;
|
||||
}
|
||||
break;
|
||||
case 'u':
|
||||
if (!strcmp(key, "url")) {
|
||||
self->doc->url = uvalue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static PyGetSetDef Doc_getseters[] = {
|
||||
// Name, get, set, doc, closure
|
||||
{"title", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
"title", (void *)"title"},
|
||||
{"keywords", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
"keywords", (void *)"keywords"},
|
||||
{"abstract", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
"abstract", (void *)"abstract"},
|
||||
{"url", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
"url", (void *)"url"},
|
||||
{"ipath", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
"ipath", (void *)"ipath"},
|
||||
{"mimetype", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
"mimetype", (void *)"mimetype"},
|
||||
{"mtime", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
"mtime", (void *)"mtime"},
|
||||
{"ipath", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
"ipath", (void *)"ipath"},
|
||||
{"fbytes", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
"fbytes", (void *)"fbytes"},
|
||||
{"dbytes", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
"dbytes", (void *)"dbytes"},
|
||||
{"relevance", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
"relevance", (void *)"relevance"},
|
||||
{"title", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
"title", (void *)"title"},
|
||||
{"keywords", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
"keywords", (void *)"keywords"},
|
||||
{"abstract", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
"abstract", (void *)"abstract"},
|
||||
{"author", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
"author", (void *)"author"},
|
||||
{"text", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
"text", (void *)"text"},
|
||||
{"sig", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
"sig", (void *)"sig"},
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
from distutils.core import setup, Extension
|
||||
top = '../../'
|
||||
|
||||
module1 = Extension('recollq',
|
||||
define_macros = [('MAJOR_VERSION', '1'),
|
||||
@ -9,37 +10,37 @@ module1 = Extension('recollq',
|
||||
'"/usr/local/share/recoll"')
|
||||
],
|
||||
include_dirs = ['/usr/local/include',
|
||||
'../utils',
|
||||
'../common',
|
||||
'../rcldb',
|
||||
'../query',
|
||||
'../unac'
|
||||
top + 'utils',
|
||||
top + 'common',
|
||||
top + 'rcldb',
|
||||
top + 'query',
|
||||
top + 'unac'
|
||||
],
|
||||
libraries = ['xapian', 'iconv'],
|
||||
library_dirs = ['/usr/local/lib'],
|
||||
sources = ['recoll_query.cpp',
|
||||
'../common/rclconfig.cpp',
|
||||
'../common/rclinit.cpp',
|
||||
'../common/textsplit.cpp',
|
||||
'../common/unacpp.cpp',
|
||||
'../query/wasastringtoquery.cpp',
|
||||
'../query/wasatorcl.cpp',
|
||||
'../rcldb/pathhash.cpp',
|
||||
'../rcldb/rcldb.cpp',
|
||||
'../rcldb/rclquery.cpp',
|
||||
'../rcldb/searchdata.cpp',
|
||||
'../rcldb/stemdb.cpp',
|
||||
'../rcldb/stoplist.cpp',
|
||||
'../unac/unac.c',
|
||||
'../utils/base64.cpp',
|
||||
'../utils/conftree.cpp',
|
||||
'../utils/debuglog.cpp',
|
||||
'../utils/md5.cpp',
|
||||
'../utils/pathut.cpp',
|
||||
'../utils/readfile.cpp',
|
||||
'../utils/smallut.cpp',
|
||||
'../utils/transcode.cpp',
|
||||
'../utils/wipedir.cpp'
|
||||
top + 'common/rclconfig.cpp',
|
||||
top + 'common/rclinit.cpp',
|
||||
top + 'common/textsplit.cpp',
|
||||
top + 'common/unacpp.cpp',
|
||||
top + 'query/wasastringtoquery.cpp',
|
||||
top + 'query/wasatorcl.cpp',
|
||||
top + 'rcldb/pathhash.cpp',
|
||||
top + 'rcldb/rcldb.cpp',
|
||||
top + 'rcldb/rclquery.cpp',
|
||||
top + 'rcldb/searchdata.cpp',
|
||||
top + 'rcldb/stemdb.cpp',
|
||||
top + 'rcldb/stoplist.cpp',
|
||||
top + 'unac/unac.c',
|
||||
top + 'utils/base64.cpp',
|
||||
top + 'utils/conftree.cpp',
|
||||
top + 'utils/debuglog.cpp',
|
||||
top + 'utils/md5.cpp',
|
||||
top + 'utils/pathut.cpp',
|
||||
top + 'utils/readfile.cpp',
|
||||
top + 'utils/smallut.cpp',
|
||||
top + 'utils/transcode.cpp',
|
||||
top + 'utils/wipedir.cpp'
|
||||
])
|
||||
|
||||
|
||||
|
||||
114
src/python/samples/rcldlkp.py
Executable file
114
src/python/samples/rcldlkp.py
Executable file
@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env python
|
||||
__doc__ = """
|
||||
''Lookup'' notes file indexing
|
||||
|
||||
The file format has text notes separated by lines with a single '%' character
|
||||
|
||||
If the script is called with just the file name as an argument, it will
|
||||
(re)index the contents.
|
||||
|
||||
If the script is called with second numeric argument, it will retrieve the
|
||||
specified record and output it in html
|
||||
"""
|
||||
|
||||
import os
|
||||
import stat
|
||||
import sys
|
||||
import re
|
||||
|
||||
rclconf = "/Users/dockes/.recoll-test"
|
||||
|
||||
def udi(docfile, numrec):
|
||||
return docfile + "#" + str(numrec)
|
||||
|
||||
###############################################################
|
||||
def index_rec(db, numrec, rec):
|
||||
doc = recollq.Doc()
|
||||
# url
|
||||
doc.url = "file://" + docfile
|
||||
# utf8fn
|
||||
# ipath
|
||||
doc.ipath = str(numrec)
|
||||
# mimetype
|
||||
doc.mimetype = "text/plain"
|
||||
# mtime
|
||||
# origcharset
|
||||
# title
|
||||
lines = rec.split("\n")
|
||||
if len(lines) >= 2:
|
||||
doc.title = unicode(lines[1], "iso-8859-1")
|
||||
if len(doc.title.strip()) == 0 and len(lines) >= 3:
|
||||
doc.title = unicode(lines[2], "iso-8859-1")
|
||||
# keywords
|
||||
# abstract
|
||||
# author
|
||||
# fbytes
|
||||
doc.fbytes = str(fbytes)
|
||||
# text
|
||||
doc.text = unicode(rec, "iso-8859-1")
|
||||
# dbytes
|
||||
doc.dbytes = str(len(rec))
|
||||
# sig
|
||||
if numrec == 0:
|
||||
doc.sig = str(fmtime)
|
||||
db.addOrUpdate(udi(docfile, numrec), u"", doc)
|
||||
|
||||
def output_rec(rec):
|
||||
# Escape html
|
||||
rec = unicode(rec, "iso-8859-1").encode("utf-8")
|
||||
rec = rec.replace("<", "<");
|
||||
rec = rec.replace("&", "&");
|
||||
rec = rec.replace('"', "&dquot;");
|
||||
print '<html><head>'
|
||||
print '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
|
||||
print '</head><body><pre>'
|
||||
print rec
|
||||
print '</pre></body></html>'
|
||||
|
||||
|
||||
################################################################
|
||||
|
||||
def usage():
|
||||
sys.stderr.write("Usage: doclookup.py <filename> [<recnum>]\n")
|
||||
exit(1)
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
usage()
|
||||
|
||||
docfile = sys.argv[1]
|
||||
|
||||
if len(sys.argv) > 2:
|
||||
targetnum = int(sys.argv[2])
|
||||
else:
|
||||
targetnum = None
|
||||
|
||||
#print docfile, targetnum
|
||||
|
||||
stdata = os.stat(docfile)
|
||||
fmtime = stdata[stat.ST_MTIME]
|
||||
fbytes = stdata[stat.ST_SIZE]
|
||||
f = open(docfile, 'r')
|
||||
|
||||
if targetnum == None:
|
||||
import recollq
|
||||
db = recollq.connect(confdir=rclconf, writable=1)
|
||||
if not db.needUpdate(udi(docfile, 0), str(fmtime)):
|
||||
exit(0)
|
||||
|
||||
rec = ""
|
||||
numrec = 1
|
||||
for line in f:
|
||||
if re.compile("^%[ \t]*").match(line):
|
||||
if targetnum == None:
|
||||
index_rec(db, numrec, rec)
|
||||
elif targetnum == numrec:
|
||||
output_rec(rec)
|
||||
exit(0)
|
||||
numrec += 1
|
||||
rec = ""
|
||||
else:
|
||||
rec += line
|
||||
|
||||
if targetnum == None:
|
||||
index_rec(db, 0, "")
|
||||
|
||||
109
src/python/samples/rclmbox.py
Normal file
109
src/python/samples/rclmbox.py
Normal file
@ -0,0 +1,109 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import mailbox
|
||||
import email.header
|
||||
import email.utils
|
||||
#import sys
|
||||
import recollq
|
||||
import os
|
||||
import stat
|
||||
|
||||
#mbfile = "/Users/dockes/projets/fulltext/testrecoll/mail/fred"
|
||||
mbfile = "/Users/dockes/mail/outmail"
|
||||
rclconf = "/Users/dockes/.recoll-test"
|
||||
|
||||
def header_value(msg, nm, to_utf = False):
|
||||
value = msg.get(nm)
|
||||
if value == None:
|
||||
return ""
|
||||
value = value.replace("\n", "")
|
||||
value = value.replace("\r", "")
|
||||
#print value
|
||||
parts = email.header.decode_header(value)
|
||||
#print parts
|
||||
univalue = u""
|
||||
for part in parts:
|
||||
if part[1] != None:
|
||||
univalue += unicode(part[0], part[1]) + " "
|
||||
else:
|
||||
univalue += part[0] + " "
|
||||
if to_utf:
|
||||
return univalue.encode('utf-8')
|
||||
else:
|
||||
return univalue
|
||||
|
||||
class mbox_indexer:
|
||||
def __init__(self, mbfile):
|
||||
self.mbfile = mbfile
|
||||
stdata = os.stat(mbfile)
|
||||
self.fmtime = stdata[stat.ST_MTIME]
|
||||
self.fbytes = stdata[stat.ST_SIZE]
|
||||
self.msgnum = 1
|
||||
|
||||
def sig(self):
|
||||
return str(self.fmtime) + ":" + str(self.fbytes)
|
||||
def udi(self, msgnum):
|
||||
return self.mbfile + ":" + str(msgnum)
|
||||
|
||||
def index(self, db):
|
||||
if not db.needUpdate(self.udi(1), self.sig()):
|
||||
return None
|
||||
mb = mailbox.mbox(self.mbfile)
|
||||
for msg in mb.values():
|
||||
self.index_message(db, msg)
|
||||
self.msgnum += 1
|
||||
|
||||
def index_message(self, db, msg):
|
||||
doc = recollq.Doc()
|
||||
doc.author = header_value(msg, "From")
|
||||
# url
|
||||
doc.url = "file://" + self.mbfile
|
||||
# utf8fn
|
||||
# ipath
|
||||
doc.ipath = str(self.msgnum)
|
||||
# mimetype
|
||||
doc.mimetype = "message/rfc822"
|
||||
# mtime
|
||||
dte = header_value(msg, "Date")
|
||||
tm = email.utils.parsedate_tz(dte)
|
||||
if tm == None:
|
||||
doc.mtime = str(self.fmtime)
|
||||
else:
|
||||
doc.mtime = str(email.utils.mktime_tz(tm))
|
||||
# origcharset
|
||||
# title
|
||||
doc.title = header_value(msg, "Subject")
|
||||
# keywords
|
||||
# abstract
|
||||
# author
|
||||
# fbytes
|
||||
doc.fbytes = str(self.fbytes)
|
||||
# text
|
||||
text = u""
|
||||
text += u"From: " + header_value(msg, "From") + u"\n"
|
||||
text += u"To: " + header_value(msg, "To") + u"\n"
|
||||
text += u"Subject: " + header_value(msg, "Subject") + u"\n"
|
||||
#text += u"Message-ID: " + header_value(msg, "Message-ID") + u"\n"
|
||||
text += u"\n"
|
||||
for part in msg.walk():
|
||||
if part.is_multipart():
|
||||
pass #print "Multipart: " + part.get_content_type()
|
||||
else:
|
||||
ct = part.get_content_type()
|
||||
#print "Simple: " + ct
|
||||
if ct.lower() == "text/plain":
|
||||
charset = part.get_content_charset("iso-8859-1")
|
||||
text += unicode(part.get_payload(None, True), charset)
|
||||
doc.text = text
|
||||
# dbytes
|
||||
doc.dbytes = str(len(text))
|
||||
# sig
|
||||
doc.sig = self.sig()
|
||||
udi = self.udi(self.msgnum)
|
||||
db.addOrUpdate(udi, u"", doc)
|
||||
|
||||
|
||||
db = recollq.connect(confdir=rclconf, writable=1)
|
||||
|
||||
mbidx = mbox_indexer(mbfile)
|
||||
mbidx.index(db)
|
||||
49
src/python/samples/recollq.py
Executable file
49
src/python/samples/recollq.py
Executable file
@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
import recollq
|
||||
allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
|
||||
"ipath", "fbytes", "dbytes", "relevance")
|
||||
|
||||
|
||||
def dotest(db, q):
|
||||
query = db.query()
|
||||
#query1 = db.query()
|
||||
|
||||
nres = query.execute(q)
|
||||
print "Result count: ", nres
|
||||
if nres > 10:
|
||||
nres = 10
|
||||
while query.next >= 0 and query.next < nres:
|
||||
doc = query.fetchone()
|
||||
print query.next
|
||||
for k in ("title",):
|
||||
print k, ":", getattr(doc, k).encode('utf-8')
|
||||
abs = db.makeDocAbstract(doc, query).encode('utf-8')
|
||||
print abs
|
||||
print
|
||||
|
||||
# End dotest
|
||||
|
||||
q = "essaouira"
|
||||
|
||||
print "TESTING WITH .recoll"
|
||||
db = recollq.connect()
|
||||
db.setAbstractParams(maxchars=80, contextwords=2)
|
||||
dotest(db, q)
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
print "TESTING WITH .recoll-test"
|
||||
db = recollq.connect(confdir="/Users/dockes/.recoll-test")
|
||||
dotest(db, q)
|
||||
|
||||
print "TESTING WITH .recoll-doc"
|
||||
db = recollq.connect(confdir="/y/home/dockes/.recoll-doc")
|
||||
dotest(db, q)
|
||||
|
||||
print "TESTING WITH .recoll and .recoll-doc"
|
||||
db = recollq.connect(confdir="/Users/dockes/.recoll",
|
||||
extra_dbs=("/y/home/dockes/.recoll-doc",))
|
||||
dotest(db, q)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user