*** empty log message ***
This commit is contained in:
parent
a4498cdca8
commit
b40dac4162
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: pyrecoll.cpp,v 1.5 2008-07-01 08:24:30 dockes Exp $ (C) 2007 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: pyrecoll.cpp,v 1.6 2008-08-26 07:36:41 dockes Exp $ (C) 2007 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <Python.h>
|
#include <Python.h>
|
||||||
@ -35,6 +35,8 @@ PyObject *obj_Create(PyTypeObject *tp, PyObject *args, PyObject *kwargs)
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////
|
||||||
|
////// Python object definitions for Db, Query, and Doc
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
/* Type-specific fields go here. */
|
/* Type-specific fields go here. */
|
||||||
@ -46,7 +48,7 @@ static PyTypeObject recollq_DbType = {
|
|||||||
"recollq.Db", /*tp_name*/
|
"recollq.Db", /*tp_name*/
|
||||||
sizeof(recollq_DbObject), /*tp_basicsize*/
|
sizeof(recollq_DbObject), /*tp_basicsize*/
|
||||||
0, /*tp_itemsize*/
|
0, /*tp_itemsize*/
|
||||||
0, /*tp_dealloc*/
|
0, /*tp_dealloc*/
|
||||||
0, /*tp_print*/
|
0, /*tp_print*/
|
||||||
0, /*tp_getattr*/
|
0, /*tp_getattr*/
|
||||||
0, /*tp_setattr*/
|
0, /*tp_setattr*/
|
||||||
@ -62,26 +64,27 @@ static PyTypeObject recollq_DbType = {
|
|||||||
0, /*tp_setattro*/
|
0, /*tp_setattro*/
|
||||||
0, /*tp_as_buffer*/
|
0, /*tp_as_buffer*/
|
||||||
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
||||||
"Recollq Db objects", /* tp_doc */
|
"Recollq Db objects", /* tp_doc */
|
||||||
0, /* tp_traverse */
|
0, /* tp_traverse */
|
||||||
0, /* tp_clear */
|
0, /* tp_clear */
|
||||||
0, /* tp_richcompare */
|
0, /* tp_richcompare */
|
||||||
0, /* tp_weaklistoffset */
|
0, /* tp_weaklistoffset */
|
||||||
0, /* tp_iter */
|
0, /* tp_iter */
|
||||||
0, /* tp_iternext */
|
0, /* tp_iternext */
|
||||||
0, /* tp_methods */
|
0, /* tp_methods */
|
||||||
0, /* tp_members */
|
0, /* tp_members */
|
||||||
0, /* tp_getset */
|
0, /* tp_getset */
|
||||||
0, /* tp_base */
|
0, /* tp_base */
|
||||||
0, /* tp_dict */
|
0, /* tp_dict */
|
||||||
0, /* tp_descr_get */
|
0, /* tp_descr_get */
|
||||||
0, /* tp_descr_set */
|
0, /* tp_descr_set */
|
||||||
0, /* tp_dictoffset */
|
0, /* tp_dictoffset */
|
||||||
0, /* tp_init */
|
0, /* tp_init */
|
||||||
0, /* tp_alloc */
|
0, /* tp_alloc */
|
||||||
0, /* tp_new */
|
0, /* tp_new */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
/* Type-specific fields go here. */
|
/* Type-specific fields go here. */
|
||||||
@ -111,24 +114,24 @@ static PyTypeObject recollq_QueryType = {
|
|||||||
0, /*tp_setattro*/
|
0, /*tp_setattro*/
|
||||||
0, /*tp_as_buffer*/
|
0, /*tp_as_buffer*/
|
||||||
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
||||||
"Recollq Query objects", /* tp_doc */
|
"Recollq Query object", /* tp_doc */
|
||||||
0, /* tp_traverse */
|
0, /* tp_traverse */
|
||||||
0, /* tp_clear */
|
0, /* tp_clear */
|
||||||
0, /* tp_richcompare */
|
0, /* tp_richcompare */
|
||||||
0, /* tp_weaklistoffset */
|
0, /* tp_weaklistoffset */
|
||||||
0, /* tp_iter */
|
0, /* tp_iter */
|
||||||
0, /* tp_iternext */
|
0, /* tp_iternext */
|
||||||
0, /* tp_methods */
|
0, /* tp_methods */
|
||||||
0, /* tp_members */
|
0, /* tp_members */
|
||||||
0, /* tp_getset */
|
0, /* tp_getset */
|
||||||
0, /* tp_base */
|
0, /* tp_base */
|
||||||
0, /* tp_dict */
|
0, /* tp_dict */
|
||||||
0, /* tp_descr_get */
|
0, /* tp_descr_get */
|
||||||
0, /* tp_descr_set */
|
0, /* tp_descr_set */
|
||||||
0, /* tp_dictoffset */
|
0, /* tp_dictoffset */
|
||||||
0, /* tp_init */
|
0, /* tp_init */
|
||||||
0, /* tp_alloc */
|
0, /* tp_alloc */
|
||||||
0, /* tp_new */
|
0, /* tp_new */
|
||||||
};
|
};
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
@ -158,26 +161,28 @@ static PyTypeObject recollq_DocType = {
|
|||||||
0, /*tp_setattro*/
|
0, /*tp_setattro*/
|
||||||
0, /*tp_as_buffer*/
|
0, /*tp_as_buffer*/
|
||||||
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
||||||
"Recollq Doc objects", /* tp_doc */
|
"Recollq Doc objects", /* tp_doc */
|
||||||
0, /* tp_traverse */
|
0, /* tp_traverse */
|
||||||
0, /* tp_clear */
|
0, /* tp_clear */
|
||||||
0, /* tp_richcompare */
|
0, /* tp_richcompare */
|
||||||
0, /* tp_weaklistoffset */
|
0, /* tp_weaklistoffset */
|
||||||
0, /* tp_iter */
|
0, /* tp_iter */
|
||||||
0, /* tp_iternext */
|
0, /* tp_iternext */
|
||||||
0, /* tp_methods */
|
0, /* tp_methods */
|
||||||
0, /* tp_members */
|
0, /* tp_members */
|
||||||
0, /* tp_getset */
|
0, /* tp_getset */
|
||||||
0, /* tp_base */
|
0, /* tp_base */
|
||||||
0, /* tp_dict */
|
0, /* tp_dict */
|
||||||
0, /* tp_descr_get */
|
0, /* tp_descr_get */
|
||||||
0, /* tp_descr_set */
|
0, /* tp_descr_set */
|
||||||
0, /* tp_dictoffset */
|
0, /* tp_dictoffset */
|
||||||
0, /* tp_init */
|
0, /* tp_init */
|
||||||
0, /* tp_alloc */
|
0, /* tp_alloc */
|
||||||
0, /* tp_new */
|
0, /* tp_new */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////
|
||||||
|
////// Db object code
|
||||||
static void
|
static void
|
||||||
Db_dealloc(recollq_DbObject *self)
|
Db_dealloc(recollq_DbObject *self)
|
||||||
{
|
{
|
||||||
@ -206,12 +211,13 @@ static int
|
|||||||
Db_init(recollq_DbObject *self, PyObject *args, PyObject *kwargs)
|
Db_init(recollq_DbObject *self, PyObject *args, PyObject *kwargs)
|
||||||
{
|
{
|
||||||
LOGDEB(("Db_init\n"));
|
LOGDEB(("Db_init\n"));
|
||||||
static char *kwlist[] = {"confdir", "extra_dbs", NULL};
|
static char *kwlist[] = {"confdir", "extra_dbs", "writable", NULL};
|
||||||
PyObject *extradbs = 0;
|
PyObject *extradbs = 0;
|
||||||
char *confdir = 0;
|
char *confdir = 0;
|
||||||
|
int writable = 0;
|
||||||
|
|
||||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|sO", kwlist,
|
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|sOi", kwlist,
|
||||||
&confdir, &extradbs))
|
&confdir, &extradbs, &writable))
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
// If the user creates several dbs, changing the confdir, we call
|
// If the user creates several dbs, changing the confdir, we call
|
||||||
@ -239,9 +245,10 @@ Db_init(recollq_DbObject *self, PyObject *args, PyObject *kwargs)
|
|||||||
self->db = new Rcl::Db;
|
self->db = new Rcl::Db;
|
||||||
string dbdir = rclconfig->getDbDir();
|
string dbdir = rclconfig->getDbDir();
|
||||||
LOGDEB(("Db_init: getdbdir ok: [%s]\n", dbdir.c_str()));
|
LOGDEB(("Db_init: getdbdir ok: [%s]\n", dbdir.c_str()));
|
||||||
if (!self->db->open(dbdir, rclconfig->getStopfile(), Rcl::Db::DbRO)) {
|
if (!self->db->open(dbdir, rclconfig->getStopfile(), writable ?
|
||||||
|
Rcl::Db::DbUpd : Rcl::Db::DbRO)) {
|
||||||
LOGDEB(("Db_init: db open error\n"));
|
LOGDEB(("Db_init: db open error\n"));
|
||||||
PyErr_SetString(PyExc_EnvironmentError, "Cant open index");
|
PyErr_SetString(PyExc_EnvironmentError, "Can't open index");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -355,20 +362,85 @@ Db_makeDocAbstract(recollq_DbObject* self, PyObject *args, PyObject *)
|
|||||||
"UTF-8", "replace");
|
"UTF-8", "replace");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
Db_needUpdate(recollq_DbObject* self, PyObject *args, PyObject *kwds)
|
||||||
|
{
|
||||||
|
char *udi = 0;
|
||||||
|
char *sig = 0;
|
||||||
|
LOGDEB(("Db_needUpdate\n"));
|
||||||
|
if (!PyArg_ParseTuple(args, "eses:Db_needUpdate",
|
||||||
|
"utf-8", &udi, "utf-8", &sig)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
|
||||||
|
LOGERR(("Db_makeDocAbstract: db not found %p\n", self->db));
|
||||||
|
PyErr_SetString(PyExc_AttributeError, "db");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
bool result = self->db->needUpdate(udi, sig);
|
||||||
|
PyMem_Free(udi);
|
||||||
|
PyMem_Free(sig);
|
||||||
|
return Py_BuildValue("i", result);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
Db_addOrUpdate(recollq_DbObject* self, PyObject *args, PyObject *)
|
||||||
|
{
|
||||||
|
LOGDEB(("Db_addOrUpdate\n"));
|
||||||
|
char *udi = 0;
|
||||||
|
char *parent_udi = 0;
|
||||||
|
|
||||||
|
recollq_DocObject *pydoc;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTuple(args, "esesO!:Db_makeDocAbstract",
|
||||||
|
"utf-8", &udi, "utf-8", &parent_udi,
|
||||||
|
&recollq_DocType, &pydoc)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
|
||||||
|
LOGERR(("Db_addOrUpdate: db not found %p\n", self->db));
|
||||||
|
PyErr_SetString(PyExc_AttributeError, "db");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (pydoc->doc == 0 || the_docs.find(pydoc->doc) == the_docs.end()) {
|
||||||
|
LOGERR(("Db_addOrUpdate: doc not found %p\n", pydoc->doc));
|
||||||
|
PyErr_SetString(PyExc_AttributeError, "doc");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (!self->db->addOrUpdate(udi, parent_udi, *pydoc->doc)) {
|
||||||
|
LOGERR(("Db_addOrUpdate: rcldb error\n"));
|
||||||
|
PyErr_SetString(PyExc_AttributeError, "rcldb error");
|
||||||
|
PyMem_Free(udi);
|
||||||
|
PyMem_Free(parent_udi);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
PyMem_Free(udi);
|
||||||
|
PyMem_Free(parent_udi);
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
static PyMethodDef Db_methods[] = {
|
static PyMethodDef Db_methods[] = {
|
||||||
{"query", (PyCFunction)Db_query, METH_NOARGS,
|
{"query", (PyCFunction)Db_query, METH_NOARGS,
|
||||||
"Return a new, blank query for this index"
|
"Return a new, blank query for this index"
|
||||||
},
|
},
|
||||||
{"setAbstractParams", (PyCFunction)Db_setAbstractParams,
|
{"setAbstractParams", (PyCFunction)Db_setAbstractParams,
|
||||||
METH_VARARGS|METH_KEYWORDS,
|
METH_VARARGS|METH_KEYWORDS,
|
||||||
"Set abstract build params: maxchars and contextwords"
|
"Set abstract build parameters: maxchars and contextwords"
|
||||||
},
|
},
|
||||||
{"makeDocAbstract", (PyCFunction)Db_makeDocAbstract, METH_VARARGS,
|
{"makeDocAbstract", (PyCFunction)Db_makeDocAbstract, METH_VARARGS,
|
||||||
"Return a new, blank query for this index"
|
"Build keyword in context abstract for document and query"
|
||||||
|
},
|
||||||
|
{"needUpdate", (PyCFunction)Db_needUpdate, METH_VARARGS,
|
||||||
|
"Check index up to date"
|
||||||
|
},
|
||||||
|
{"addOrUpdate", (PyCFunction)Db_addOrUpdate, METH_VARARGS,
|
||||||
|
"Add or update document in index"
|
||||||
},
|
},
|
||||||
{NULL} /* Sentinel */
|
{NULL} /* Sentinel */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/////////////////////////////////////////////
|
||||||
|
/// Query object method
|
||||||
static void
|
static void
|
||||||
Query_dealloc(recollq_QueryObject *self)
|
Query_dealloc(recollq_QueryObject *self)
|
||||||
{
|
{
|
||||||
@ -394,6 +466,9 @@ Query_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
|
|||||||
return (PyObject *)self;
|
return (PyObject *)self;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Query_init creates an unusable object. The only way to create a
|
||||||
|
// valid Query Object is through db_query(). (or we'd need to add a Db
|
||||||
|
// parameter to the Query object creation method)
|
||||||
static int
|
static int
|
||||||
Query_init(recollq_QueryObject *self, PyObject *, PyObject *)
|
Query_init(recollq_QueryObject *self, PyObject *, PyObject *)
|
||||||
{
|
{
|
||||||
@ -411,9 +486,8 @@ static PyObject *
|
|||||||
Query_execute(recollq_QueryObject* self, PyObject *args, PyObject *kwds)
|
Query_execute(recollq_QueryObject* self, PyObject *args, PyObject *kwds)
|
||||||
{
|
{
|
||||||
char *utf8 = 0;
|
char *utf8 = 0;
|
||||||
int len = 0;
|
|
||||||
LOGDEB(("Query_execute\n"));
|
LOGDEB(("Query_execute\n"));
|
||||||
if (!PyArg_ParseTuple(args, "es#:Query_execute", "utf-8", &utf8, &len)) {
|
if (!PyArg_ParseTuple(args, "es:Query_execute", "utf-8", &utf8)) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -425,6 +499,7 @@ Query_execute(recollq_QueryObject* self, PyObject *args, PyObject *kwds)
|
|||||||
}
|
}
|
||||||
string reason;
|
string reason;
|
||||||
Rcl::SearchData *sd = wasaStringToRcl(utf8, reason);
|
Rcl::SearchData *sd = wasaStringToRcl(utf8, reason);
|
||||||
|
PyMem_Free(utf8);
|
||||||
if (!sd) {
|
if (!sd) {
|
||||||
PyErr_SetString(PyExc_ValueError, reason.c_str());
|
PyErr_SetString(PyExc_ValueError, reason.c_str());
|
||||||
return 0;
|
return 0;
|
||||||
@ -451,24 +526,22 @@ Query_fetchone(recollq_QueryObject* self, PyObject *, PyObject *)
|
|||||||
PyErr_SetString(PyExc_AttributeError, "query: no results");
|
PyErr_SetString(PyExc_AttributeError, "query: no results");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
Rcl::Doc *doc = new Rcl::Doc;
|
recollq_DocObject *result =
|
||||||
|
(recollq_DocObject *)obj_Create(&recollq_DocType, 0, 0);
|
||||||
|
if (!result) {
|
||||||
|
LOGERR(("Query_fetchone: couldn't create doc object for result\n"));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
int percent;
|
int percent;
|
||||||
if (!self->query->getDoc(self->next, *doc, &percent)) {
|
if (!self->query->getDoc(self->next, *result->doc, &percent)) {
|
||||||
PyErr_SetString(PyExc_EnvironmentError, "query: cant fetch result");
|
PyErr_SetString(PyExc_EnvironmentError, "query: cant fetch result");
|
||||||
self->next = -1;
|
self->next = -1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
self->next++;
|
self->next++;
|
||||||
recollq_DocObject *result =
|
|
||||||
(recollq_DocObject *)obj_Create(&recollq_DocType, 0, 0);
|
|
||||||
if (!result) {
|
|
||||||
delete doc;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
result->doc = doc;
|
|
||||||
the_docs.insert(result->doc);
|
|
||||||
// Move some data from the dedicated fields to the meta array to make
|
// Move some data from the dedicated fields to the meta array to make
|
||||||
// fetching attributes easier
|
// fetching attributes easier
|
||||||
|
Rcl::Doc *doc = result->doc;
|
||||||
printableUrl(rclconfig->getDefCharset(), doc->url, doc->meta["url"]);
|
printableUrl(rclconfig->getDefCharset(), doc->url, doc->meta["url"]);
|
||||||
doc->meta["mimetype"] = doc->mimetype;
|
doc->meta["mimetype"] = doc->mimetype;
|
||||||
doc->meta["mtime"] = doc->dmtime.empty() ? doc->fmtime : doc->dmtime;
|
doc->meta["mtime"] = doc->dmtime.empty() ? doc->fmtime : doc->dmtime;
|
||||||
@ -502,7 +575,8 @@ static PyMemberDef Query_members[] = {
|
|||||||
{NULL} /* Sentinel */
|
{NULL} /* Sentinel */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
///// Doc object methods
|
||||||
static void
|
static void
|
||||||
Doc_dealloc(recollq_DocObject *self)
|
Doc_dealloc(recollq_DocObject *self)
|
||||||
{
|
{
|
||||||
@ -534,14 +608,21 @@ Doc_init(recollq_DocObject *self, PyObject *, PyObject *)
|
|||||||
if (self->doc)
|
if (self->doc)
|
||||||
the_docs.erase(self->doc);
|
the_docs.erase(self->doc);
|
||||||
delete self->doc;
|
delete self->doc;
|
||||||
self->doc = 0;
|
self->doc = new Rcl::Doc;
|
||||||
|
if (self->doc == 0)
|
||||||
|
return -1;
|
||||||
|
the_docs.insert(self->doc);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The "closure" thing is actually the meta field name. This is how
|
||||||
|
// python allows one set of get/set functions to get/set different
|
||||||
|
// attributes (pass them an additional parameters as from the
|
||||||
|
// getseters table and call it a "closure"
|
||||||
static PyObject *
|
static PyObject *
|
||||||
Doc_getmeta(recollq_DocObject *self, void *closure)
|
Doc_getmeta(recollq_DocObject *self, void *closure)
|
||||||
{
|
{
|
||||||
LOGDEB(("Doc_getmeta\n"));
|
LOGDEB(("Doc_getmeta: [%s]\n", (const char *)closure));
|
||||||
if (self->doc == 0 ||
|
if (self->doc == 0 ||
|
||||||
the_docs.find(self->doc) == the_docs.end()) {
|
the_docs.find(self->doc) == the_docs.end()) {
|
||||||
PyErr_SetString(PyExc_AttributeError, "doc");
|
PyErr_SetString(PyExc_AttributeError, "doc");
|
||||||
@ -568,32 +649,109 @@ Doc_getmeta(recollq_DocObject *self, void *closure)
|
|||||||
static int
|
static int
|
||||||
Doc_setmeta(recollq_DocObject *self, PyObject *value, void *closure)
|
Doc_setmeta(recollq_DocObject *self, PyObject *value, void *closure)
|
||||||
{
|
{
|
||||||
PyErr_SetString(PyExc_RuntimeError, "Cannot set attributes for now");
|
if (self->doc == 0 ||
|
||||||
return -1;
|
the_docs.find(self->doc) == the_docs.end()) {
|
||||||
|
PyErr_SetString(PyExc_AttributeError, "doc??");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
LOGDEB2(("Doc_setmeta: doc %p\n", self->doc));
|
||||||
|
if (PyString_Check(value)) {
|
||||||
|
value = PyUnicode_FromObject(value);
|
||||||
|
if (value == 0)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!PyUnicode_Check(value)) {
|
||||||
|
PyErr_SetString(PyExc_AttributeError, "value not str/unicode??");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject* putf8 = PyUnicode_AsUTF8String(value);
|
||||||
|
if (putf8 == 0) {
|
||||||
|
LOGERR(("Doc_setmeta: encoding to utf8 failed\n"));
|
||||||
|
PyErr_SetString(PyExc_AttributeError, "value??");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
char* uvalue = PyString_AsString(putf8);
|
||||||
|
const char *key = (const char *)closure;
|
||||||
|
if (key == 0) {
|
||||||
|
PyErr_SetString(PyExc_AttributeError, "key??");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOGDEB(("Doc_setmeta: setting [%s] to [%s]\n", key, uvalue));
|
||||||
|
self->doc->meta[key] = uvalue;
|
||||||
|
switch (key[0]) {
|
||||||
|
case 'd':
|
||||||
|
if (!strcmp(key, "dbytes")) {
|
||||||
|
self->doc->dbytes = uvalue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'f':
|
||||||
|
if (!strcmp(key, "fbytes")) {
|
||||||
|
self->doc->fbytes = uvalue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'i':
|
||||||
|
if (!strcmp(key, "ipath")) {
|
||||||
|
self->doc->ipath = uvalue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'm':
|
||||||
|
if (!strcmp(key, "mimetype")) {
|
||||||
|
self->doc->mimetype = uvalue;
|
||||||
|
} else if (!strcmp(key, "mtime")) {
|
||||||
|
self->doc->dmtime = uvalue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 's':
|
||||||
|
if (!strcmp(key, "sig")) {
|
||||||
|
self->doc->sig = uvalue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 't':
|
||||||
|
if (!strcmp(key, "text")) {
|
||||||
|
self->doc->text = uvalue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'u':
|
||||||
|
if (!strcmp(key, "url")) {
|
||||||
|
self->doc->url = uvalue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyGetSetDef Doc_getseters[] = {
|
static PyGetSetDef Doc_getseters[] = {
|
||||||
// Name, get, set, doc, closure
|
// Name, get, set, doc, closure
|
||||||
{"title", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
|
||||||
"title", (void *)"title"},
|
|
||||||
{"keywords", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
|
||||||
"keywords", (void *)"keywords"},
|
|
||||||
{"abstract", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
|
||||||
"abstract", (void *)"abstract"},
|
|
||||||
{"url", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
{"url", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||||
"url", (void *)"url"},
|
"url", (void *)"url"},
|
||||||
|
{"ipath", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||||
|
"ipath", (void *)"ipath"},
|
||||||
{"mimetype", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
{"mimetype", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||||
"mimetype", (void *)"mimetype"},
|
"mimetype", (void *)"mimetype"},
|
||||||
{"mtime", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
{"mtime", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||||
"mtime", (void *)"mtime"},
|
"mtime", (void *)"mtime"},
|
||||||
{"ipath", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
|
||||||
"ipath", (void *)"ipath"},
|
|
||||||
{"fbytes", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
{"fbytes", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||||
"fbytes", (void *)"fbytes"},
|
"fbytes", (void *)"fbytes"},
|
||||||
{"dbytes", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
{"dbytes", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||||
"dbytes", (void *)"dbytes"},
|
"dbytes", (void *)"dbytes"},
|
||||||
{"relevance", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
{"relevance", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||||
"relevance", (void *)"relevance"},
|
"relevance", (void *)"relevance"},
|
||||||
|
{"title", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||||
|
"title", (void *)"title"},
|
||||||
|
{"keywords", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||||
|
"keywords", (void *)"keywords"},
|
||||||
|
{"abstract", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||||
|
"abstract", (void *)"abstract"},
|
||||||
|
{"author", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||||
|
"author", (void *)"author"},
|
||||||
|
{"text", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||||
|
"text", (void *)"text"},
|
||||||
|
{"sig", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||||
|
"sig", (void *)"sig"},
|
||||||
{NULL} /* Sentinel */
|
{NULL} /* Sentinel */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
from distutils.core import setup, Extension
|
from distutils.core import setup, Extension
|
||||||
|
top = '../../'
|
||||||
|
|
||||||
module1 = Extension('recollq',
|
module1 = Extension('recollq',
|
||||||
define_macros = [('MAJOR_VERSION', '1'),
|
define_macros = [('MAJOR_VERSION', '1'),
|
||||||
@ -9,37 +10,37 @@ module1 = Extension('recollq',
|
|||||||
'"/usr/local/share/recoll"')
|
'"/usr/local/share/recoll"')
|
||||||
],
|
],
|
||||||
include_dirs = ['/usr/local/include',
|
include_dirs = ['/usr/local/include',
|
||||||
'../utils',
|
top + 'utils',
|
||||||
'../common',
|
top + 'common',
|
||||||
'../rcldb',
|
top + 'rcldb',
|
||||||
'../query',
|
top + 'query',
|
||||||
'../unac'
|
top + 'unac'
|
||||||
],
|
],
|
||||||
libraries = ['xapian', 'iconv'],
|
libraries = ['xapian', 'iconv'],
|
||||||
library_dirs = ['/usr/local/lib'],
|
library_dirs = ['/usr/local/lib'],
|
||||||
sources = ['recoll_query.cpp',
|
sources = ['recoll_query.cpp',
|
||||||
'../common/rclconfig.cpp',
|
top + 'common/rclconfig.cpp',
|
||||||
'../common/rclinit.cpp',
|
top + 'common/rclinit.cpp',
|
||||||
'../common/textsplit.cpp',
|
top + 'common/textsplit.cpp',
|
||||||
'../common/unacpp.cpp',
|
top + 'common/unacpp.cpp',
|
||||||
'../query/wasastringtoquery.cpp',
|
top + 'query/wasastringtoquery.cpp',
|
||||||
'../query/wasatorcl.cpp',
|
top + 'query/wasatorcl.cpp',
|
||||||
'../rcldb/pathhash.cpp',
|
top + 'rcldb/pathhash.cpp',
|
||||||
'../rcldb/rcldb.cpp',
|
top + 'rcldb/rcldb.cpp',
|
||||||
'../rcldb/rclquery.cpp',
|
top + 'rcldb/rclquery.cpp',
|
||||||
'../rcldb/searchdata.cpp',
|
top + 'rcldb/searchdata.cpp',
|
||||||
'../rcldb/stemdb.cpp',
|
top + 'rcldb/stemdb.cpp',
|
||||||
'../rcldb/stoplist.cpp',
|
top + 'rcldb/stoplist.cpp',
|
||||||
'../unac/unac.c',
|
top + 'unac/unac.c',
|
||||||
'../utils/base64.cpp',
|
top + 'utils/base64.cpp',
|
||||||
'../utils/conftree.cpp',
|
top + 'utils/conftree.cpp',
|
||||||
'../utils/debuglog.cpp',
|
top + 'utils/debuglog.cpp',
|
||||||
'../utils/md5.cpp',
|
top + 'utils/md5.cpp',
|
||||||
'../utils/pathut.cpp',
|
top + 'utils/pathut.cpp',
|
||||||
'../utils/readfile.cpp',
|
top + 'utils/readfile.cpp',
|
||||||
'../utils/smallut.cpp',
|
top + 'utils/smallut.cpp',
|
||||||
'../utils/transcode.cpp',
|
top + 'utils/transcode.cpp',
|
||||||
'../utils/wipedir.cpp'
|
top + 'utils/wipedir.cpp'
|
||||||
])
|
])
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
114
src/python/samples/rcldlkp.py
Executable file
114
src/python/samples/rcldlkp.py
Executable file
@ -0,0 +1,114 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
__doc__ = """
|
||||||
|
''Lookup'' notes file indexing
|
||||||
|
|
||||||
|
The file format has text notes separated by lines with a single '%' character
|
||||||
|
|
||||||
|
If the script is called with just the file name as an argument, it will
|
||||||
|
(re)index the contents.
|
||||||
|
|
||||||
|
If the script is called with second numeric argument, it will retrieve the
|
||||||
|
specified record and output it in html
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import stat
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
|
||||||
|
rclconf = "/Users/dockes/.recoll-test"
|
||||||
|
|
||||||
|
def udi(docfile, numrec):
|
||||||
|
return docfile + "#" + str(numrec)
|
||||||
|
|
||||||
|
###############################################################
|
||||||
|
def index_rec(db, numrec, rec):
|
||||||
|
doc = recollq.Doc()
|
||||||
|
# url
|
||||||
|
doc.url = "file://" + docfile
|
||||||
|
# utf8fn
|
||||||
|
# ipath
|
||||||
|
doc.ipath = str(numrec)
|
||||||
|
# mimetype
|
||||||
|
doc.mimetype = "text/plain"
|
||||||
|
# mtime
|
||||||
|
# origcharset
|
||||||
|
# title
|
||||||
|
lines = rec.split("\n")
|
||||||
|
if len(lines) >= 2:
|
||||||
|
doc.title = unicode(lines[1], "iso-8859-1")
|
||||||
|
if len(doc.title.strip()) == 0 and len(lines) >= 3:
|
||||||
|
doc.title = unicode(lines[2], "iso-8859-1")
|
||||||
|
# keywords
|
||||||
|
# abstract
|
||||||
|
# author
|
||||||
|
# fbytes
|
||||||
|
doc.fbytes = str(fbytes)
|
||||||
|
# text
|
||||||
|
doc.text = unicode(rec, "iso-8859-1")
|
||||||
|
# dbytes
|
||||||
|
doc.dbytes = str(len(rec))
|
||||||
|
# sig
|
||||||
|
if numrec == 0:
|
||||||
|
doc.sig = str(fmtime)
|
||||||
|
db.addOrUpdate(udi(docfile, numrec), u"", doc)
|
||||||
|
|
||||||
|
def output_rec(rec):
|
||||||
|
# Escape html
|
||||||
|
rec = unicode(rec, "iso-8859-1").encode("utf-8")
|
||||||
|
rec = rec.replace("<", "<");
|
||||||
|
rec = rec.replace("&", "&");
|
||||||
|
rec = rec.replace('"', "&dquot;");
|
||||||
|
print '<html><head>'
|
||||||
|
print '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
|
||||||
|
print '</head><body><pre>'
|
||||||
|
print rec
|
||||||
|
print '</pre></body></html>'
|
||||||
|
|
||||||
|
|
||||||
|
################################################################
|
||||||
|
|
||||||
|
def usage():
|
||||||
|
sys.stderr.write("Usage: doclookup.py <filename> [<recnum>]\n")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
usage()
|
||||||
|
|
||||||
|
docfile = sys.argv[1]
|
||||||
|
|
||||||
|
if len(sys.argv) > 2:
|
||||||
|
targetnum = int(sys.argv[2])
|
||||||
|
else:
|
||||||
|
targetnum = None
|
||||||
|
|
||||||
|
#print docfile, targetnum
|
||||||
|
|
||||||
|
stdata = os.stat(docfile)
|
||||||
|
fmtime = stdata[stat.ST_MTIME]
|
||||||
|
fbytes = stdata[stat.ST_SIZE]
|
||||||
|
f = open(docfile, 'r')
|
||||||
|
|
||||||
|
if targetnum == None:
|
||||||
|
import recollq
|
||||||
|
db = recollq.connect(confdir=rclconf, writable=1)
|
||||||
|
if not db.needUpdate(udi(docfile, 0), str(fmtime)):
|
||||||
|
exit(0)
|
||||||
|
|
||||||
|
rec = ""
|
||||||
|
numrec = 1
|
||||||
|
for line in f:
|
||||||
|
if re.compile("^%[ \t]*").match(line):
|
||||||
|
if targetnum == None:
|
||||||
|
index_rec(db, numrec, rec)
|
||||||
|
elif targetnum == numrec:
|
||||||
|
output_rec(rec)
|
||||||
|
exit(0)
|
||||||
|
numrec += 1
|
||||||
|
rec = ""
|
||||||
|
else:
|
||||||
|
rec += line
|
||||||
|
|
||||||
|
if targetnum == None:
|
||||||
|
index_rec(db, 0, "")
|
||||||
|
|
||||||
109
src/python/samples/rclmbox.py
Normal file
109
src/python/samples/rclmbox.py
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import mailbox
|
||||||
|
import email.header
|
||||||
|
import email.utils
|
||||||
|
#import sys
|
||||||
|
import recollq
|
||||||
|
import os
|
||||||
|
import stat
|
||||||
|
|
||||||
|
#mbfile = "/Users/dockes/projets/fulltext/testrecoll/mail/fred"
|
||||||
|
mbfile = "/Users/dockes/mail/outmail"
|
||||||
|
rclconf = "/Users/dockes/.recoll-test"
|
||||||
|
|
||||||
|
def header_value(msg, nm, to_utf = False):
|
||||||
|
value = msg.get(nm)
|
||||||
|
if value == None:
|
||||||
|
return ""
|
||||||
|
value = value.replace("\n", "")
|
||||||
|
value = value.replace("\r", "")
|
||||||
|
#print value
|
||||||
|
parts = email.header.decode_header(value)
|
||||||
|
#print parts
|
||||||
|
univalue = u""
|
||||||
|
for part in parts:
|
||||||
|
if part[1] != None:
|
||||||
|
univalue += unicode(part[0], part[1]) + " "
|
||||||
|
else:
|
||||||
|
univalue += part[0] + " "
|
||||||
|
if to_utf:
|
||||||
|
return univalue.encode('utf-8')
|
||||||
|
else:
|
||||||
|
return univalue
|
||||||
|
|
||||||
|
class mbox_indexer:
|
||||||
|
def __init__(self, mbfile):
|
||||||
|
self.mbfile = mbfile
|
||||||
|
stdata = os.stat(mbfile)
|
||||||
|
self.fmtime = stdata[stat.ST_MTIME]
|
||||||
|
self.fbytes = stdata[stat.ST_SIZE]
|
||||||
|
self.msgnum = 1
|
||||||
|
|
||||||
|
def sig(self):
|
||||||
|
return str(self.fmtime) + ":" + str(self.fbytes)
|
||||||
|
def udi(self, msgnum):
|
||||||
|
return self.mbfile + ":" + str(msgnum)
|
||||||
|
|
||||||
|
def index(self, db):
|
||||||
|
if not db.needUpdate(self.udi(1), self.sig()):
|
||||||
|
return None
|
||||||
|
mb = mailbox.mbox(self.mbfile)
|
||||||
|
for msg in mb.values():
|
||||||
|
self.index_message(db, msg)
|
||||||
|
self.msgnum += 1
|
||||||
|
|
||||||
|
def index_message(self, db, msg):
|
||||||
|
doc = recollq.Doc()
|
||||||
|
doc.author = header_value(msg, "From")
|
||||||
|
# url
|
||||||
|
doc.url = "file://" + self.mbfile
|
||||||
|
# utf8fn
|
||||||
|
# ipath
|
||||||
|
doc.ipath = str(self.msgnum)
|
||||||
|
# mimetype
|
||||||
|
doc.mimetype = "message/rfc822"
|
||||||
|
# mtime
|
||||||
|
dte = header_value(msg, "Date")
|
||||||
|
tm = email.utils.parsedate_tz(dte)
|
||||||
|
if tm == None:
|
||||||
|
doc.mtime = str(self.fmtime)
|
||||||
|
else:
|
||||||
|
doc.mtime = str(email.utils.mktime_tz(tm))
|
||||||
|
# origcharset
|
||||||
|
# title
|
||||||
|
doc.title = header_value(msg, "Subject")
|
||||||
|
# keywords
|
||||||
|
# abstract
|
||||||
|
# author
|
||||||
|
# fbytes
|
||||||
|
doc.fbytes = str(self.fbytes)
|
||||||
|
# text
|
||||||
|
text = u""
|
||||||
|
text += u"From: " + header_value(msg, "From") + u"\n"
|
||||||
|
text += u"To: " + header_value(msg, "To") + u"\n"
|
||||||
|
text += u"Subject: " + header_value(msg, "Subject") + u"\n"
|
||||||
|
#text += u"Message-ID: " + header_value(msg, "Message-ID") + u"\n"
|
||||||
|
text += u"\n"
|
||||||
|
for part in msg.walk():
|
||||||
|
if part.is_multipart():
|
||||||
|
pass #print "Multipart: " + part.get_content_type()
|
||||||
|
else:
|
||||||
|
ct = part.get_content_type()
|
||||||
|
#print "Simple: " + ct
|
||||||
|
if ct.lower() == "text/plain":
|
||||||
|
charset = part.get_content_charset("iso-8859-1")
|
||||||
|
text += unicode(part.get_payload(None, True), charset)
|
||||||
|
doc.text = text
|
||||||
|
# dbytes
|
||||||
|
doc.dbytes = str(len(text))
|
||||||
|
# sig
|
||||||
|
doc.sig = self.sig()
|
||||||
|
udi = self.udi(self.msgnum)
|
||||||
|
db.addOrUpdate(udi, u"", doc)
|
||||||
|
|
||||||
|
|
||||||
|
db = recollq.connect(confdir=rclconf, writable=1)
|
||||||
|
|
||||||
|
mbidx = mbox_indexer(mbfile)
|
||||||
|
mbidx.index(db)
|
||||||
49
src/python/samples/recollq.py
Executable file
49
src/python/samples/recollq.py
Executable file
@ -0,0 +1,49 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import recollq
|
||||||
|
allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
|
||||||
|
"ipath", "fbytes", "dbytes", "relevance")
|
||||||
|
|
||||||
|
|
||||||
|
def dotest(db, q):
|
||||||
|
query = db.query()
|
||||||
|
#query1 = db.query()
|
||||||
|
|
||||||
|
nres = query.execute(q)
|
||||||
|
print "Result count: ", nres
|
||||||
|
if nres > 10:
|
||||||
|
nres = 10
|
||||||
|
while query.next >= 0 and query.next < nres:
|
||||||
|
doc = query.fetchone()
|
||||||
|
print query.next
|
||||||
|
for k in ("title",):
|
||||||
|
print k, ":", getattr(doc, k).encode('utf-8')
|
||||||
|
abs = db.makeDocAbstract(doc, query).encode('utf-8')
|
||||||
|
print abs
|
||||||
|
print
|
||||||
|
|
||||||
|
# End dotest
|
||||||
|
|
||||||
|
q = "essaouira"
|
||||||
|
|
||||||
|
print "TESTING WITH .recoll"
|
||||||
|
db = recollq.connect()
|
||||||
|
db.setAbstractParams(maxchars=80, contextwords=2)
|
||||||
|
dotest(db, q)
|
||||||
|
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
print "TESTING WITH .recoll-test"
|
||||||
|
db = recollq.connect(confdir="/Users/dockes/.recoll-test")
|
||||||
|
dotest(db, q)
|
||||||
|
|
||||||
|
print "TESTING WITH .recoll-doc"
|
||||||
|
db = recollq.connect(confdir="/y/home/dockes/.recoll-doc")
|
||||||
|
dotest(db, q)
|
||||||
|
|
||||||
|
print "TESTING WITH .recoll and .recoll-doc"
|
||||||
|
db = recollq.connect(confdir="/Users/dockes/.recoll",
|
||||||
|
extra_dbs=("/y/home/dockes/.recoll-doc",))
|
||||||
|
dotest(db, q)
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user