updated python searchdata object to match current C++ interface

This commit is contained in:
Jean-Francois Dockes 2013-03-01 11:50:35 +01:00
parent 66a5d9ab83
commit b0e296235f
3 changed files with 90 additions and 52 deletions

View File

@ -3711,6 +3711,8 @@ or
we can index new data through this we can index new data through this
connection.</listitem> connection.</listitem>
</itemizedlist> </itemizedlist>
This call initializes the recoll module, and it should
always be performed before any other call or object creation.
</listitem> </listitem>
</varlistentry> </varlistentry>

View File

@ -74,30 +74,46 @@ SearchData_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return (PyObject *)self; return (PyObject *)self;
} }
PyDoc_STRVAR(doc_SearchDataObject,
"SearchData([type=AND|OR], [stemlang=somelanguage|null])\n"
"\n"
"A SearchData object describes a query. It has a number of global\n"
"parameters and a chain of search clauses.\n"
);
static int static int
SearchData_init(recoll_SearchDataObject *self, PyObject *args, PyObject *kwargs) SearchData_init(recoll_SearchDataObject *self, PyObject *args, PyObject *kwargs)
{ {
LOGDEB(("SearchData_init\n")); LOGDEB(("SearchData_init\n"));
static const char* kwlist[] = {"type", NULL}; static const char* kwlist[] = {"type", "stemlang", NULL};
char *stp = 0; char *stp = 0;
char *steml = 0;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|s", (char**)kwlist, &stp)) if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|sz", (char**)kwlist,
&stp, &steml))
return -1; return -1;
Rcl::SClType tp = Rcl::SCLT_AND; Rcl::SClType tp = Rcl::SCLT_AND;
if (stp && strcasecmp(stp, "or")) { if (stp && strcasecmp(stp, "or")) {
tp = Rcl::SCLT_OR; tp = Rcl::SCLT_OR;
} }
self->sd = RefCntr<Rcl::SearchData>(new Rcl::SearchData(tp, "english")); string stemlang;
if (steml) {
stemlang = steml;
} else {
stemlang = "english";
}
self->sd = RefCntr<Rcl::SearchData>(new Rcl::SearchData(tp, stemlang));
return 0; return 0;
} }
/* Note: addclause necessite And/Or vient du fait que le string peut avoir /* Note: addclause necessite And/Or vient du fait que le string peut avoir
plusieurs mots. A transferer dans l'i/f Python ou pas ? */ plusieurs mots. A transferer dans l'i/f Python ou pas ? */
PyDoc_STRVAR(doc_addclause, PyDoc_STRVAR(doc_addclause,
"addclause(type='and'|'or'|'excl'|'phrase'|'near'|'sub',\n" "addclause(type='and'|'or'|'filename'|'phrase'|'near'|'path'|'sub',\n"
" qstring=string, slack=int, field=string, stemming=1|0,\n" " qstring=string, slack=int, field=string, stemming=1|0,\n"
" subSearch=SearchData)\n" " subSearch=SearchData, exclude=0|1, anchorstart=0|1, anchorend=0|1,\n"
" casesens=0|1, diacsens=0|1)\n"
"Adds a simple clause to the SearchData And/Or chain, or a subquery\n" "Adds a simple clause to the SearchData And/Or chain, or a subquery\n"
"defined by another SearchData object\n" "defined by another SearchData object\n"
); );
@ -107,20 +123,12 @@ static PyObject *
SearchData_addclause(recoll_SearchDataObject* self, PyObject *args, SearchData_addclause(recoll_SearchDataObject* self, PyObject *args,
PyObject *kwargs); PyObject *kwargs);
static PyMethodDef SearchData_methods[] = { static PyMethodDef SearchData_methods[] = {
{"addclause", (PyCFunction)SearchData_addclause, METH_VARARGS|METH_KEYWORDS, {"addclause", (PyCFunction)SearchData_addclause, METH_VARARGS|METH_KEYWORDS,
doc_addclause}, doc_addclause},
{NULL} /* Sentinel */ {NULL} /* Sentinel */
}; };
PyDoc_STRVAR(doc_SearchDataObject,
"SearchData()\n"
"\n"
"A SearchData object describes a query. It has a number of global\n"
"parameters and a chain of search clauses.\n"
);
static PyTypeObject recoll_SearchDataType = { static PyTypeObject recoll_SearchDataType = {
PyObject_HEAD_INIT(NULL) PyObject_HEAD_INIT(NULL)
0, /*ob_size*/ 0, /*ob_size*/
@ -174,17 +182,29 @@ SearchData_addclause(recoll_SearchDataObject* self, PyObject *args,
return 0; return 0;
} }
static const char *kwlist[] = {"type", "qstring", "slack", "field", static const char *kwlist[] = {"type", "qstring", "slack", "field",
"stemming", "subsearch", NULL}; "stemming", "subsearch",
"exclude", "anchorstart", "anchorend",
"casesens", "diacsens",
NULL};
char *tp = 0; char *tp = 0;
char *qs = 0; // needs freeing char *qs = 0; // needs freeing
int slack = 0; int slack = 0;
char *fld = 0; // needs freeing char *fld = 0; // needs freeing
int dostem = 1; PyObject *dostem = 0;
recoll_SearchDataObject *sub = 0; recoll_SearchDataObject *sub = 0;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "ses|iesiO!", (char**)kwlist, PyObject *exclude = 0;
PyObject *anchorstart = 0;
PyObject *anchorend = 0;
PyObject *casesens = 0;
PyObject *diacsens = 0;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "ses|iesOO!OOOOO",
(char**)kwlist,
&tp, "utf-8", &qs, &slack, &tp, "utf-8", &qs, &slack,
"utf-8", &fld, &dostem, "utf-8", &fld, &dostem,
&recoll_SearchDataType, &sub)) &recoll_SearchDataType, &sub,
&exclude, &anchorstart, &anchorend,
&casesens, &diacsens
))
return 0; return 0;
Rcl::SearchDataClause *cl = 0; Rcl::SearchDataClause *cl = 0;
@ -196,19 +216,18 @@ SearchData_addclause(recoll_SearchDataObject* self, PyObject *args,
goto defaultcase; goto defaultcase;
cl = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, qs, fld?fld:""); cl = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, qs, fld?fld:"");
break; break;
case 'f':
case 'F':
if (strcasecmp(tp, "filename"))
goto defaultcase;
cl = new Rcl::SearchDataClauseFilename(qs);
break;
case 'o': case 'o':
case 'O': case 'O':
if (strcasecmp(tp, "or")) if (strcasecmp(tp, "or"))
goto defaultcase; goto defaultcase;
cl = new Rcl::SearchDataClauseSimple(Rcl::SCLT_OR, qs, fld?fld:""); cl = new Rcl::SearchDataClauseSimple(Rcl::SCLT_OR, qs, fld?fld:"");
break; break;
case 'e':
case 'E':
if (strcasecmp(tp, "excl"))
goto defaultcase;
cl = new Rcl::SearchDataClauseSimple(Rcl::SCLT_OR, qs, fld?fld:"");
cl->setexclude(true);
break;
case 'n': case 'n':
case 'N': case 'N':
if (strcasecmp(tp, "near")) if (strcasecmp(tp, "near"))
@ -218,10 +237,14 @@ SearchData_addclause(recoll_SearchDataObject* self, PyObject *args,
break; break;
case 'p': case 'p':
case 'P': case 'P':
if (strcasecmp(tp, "phrase")) if (!strcasecmp(tp, "phrase")) {
cl = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, qs, slack,
fld ? fld : "");
} else if (!strcasecmp(tp, "path")) {
cl = new Rcl::SearchDataClausePath(qs);
} else {
goto defaultcase; goto defaultcase;
cl = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, qs, slack, }
fld ? fld : "");
break; break;
case 's': case 's':
case 'S': case 'S':
@ -234,12 +257,27 @@ SearchData_addclause(recoll_SearchDataObject* self, PyObject *args,
PyErr_SetString(PyExc_AttributeError, "Bad tp arg"); PyErr_SetString(PyExc_AttributeError, "Bad tp arg");
return 0; return 0;
} }
if (dostem == 0) {
cl->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING);
}
PyMem_Free(qs); PyMem_Free(qs);
PyMem_Free(fld); PyMem_Free(fld);
if (dostem != 0 && !PyObject_IsTrue(dostem)) {
cl->addModifier(Rcl::SearchDataClause::SDCM_NOSTEMMING);
}
if (exclude != 0 && !PyObject_IsTrue(exclude)) {
cl->setexclude(true);
}
if (anchorstart && PyObject_IsTrue(anchorstart)) {
cl->addModifier(Rcl::SearchDataClause::SDCM_ANCHORSTART);
}
if (anchorend && PyObject_IsTrue(anchorend)) {
cl->addModifier(Rcl::SearchDataClause::SDCM_ANCHOREND);
}
if (casesens && PyObject_IsTrue(casesens)) {
cl->addModifier(Rcl::SearchDataClause::SDCM_CASESENS);
}
if (diacsens && PyObject_IsTrue(diacsens)) {
cl->addModifier(Rcl::SearchDataClause::SDCM_DIACSENS);
}
self->sd->addClause(cl); self->sd->addClause(cl);
Py_RETURN_NONE; Py_RETURN_NONE;
} }
@ -1426,7 +1464,6 @@ Db_dealloc(recoll_DbObject *self)
static PyObject * static PyObject *
Db_new(PyTypeObject *type, PyObject *args, PyObject *kwds) Db_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{ {
LOGDEB(("Db_new\n"));
recoll_DbObject *self; recoll_DbObject *self;
self = (recoll_DbObject *)type->tp_alloc(type, 0); self = (recoll_DbObject *)type->tp_alloc(type, 0);
@ -1439,7 +1476,6 @@ Db_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
static int static int
Db_init(recoll_DbObject *self, PyObject *args, PyObject *kwargs) Db_init(recoll_DbObject *self, PyObject *args, PyObject *kwargs)
{ {
LOGDEB(("Db_init\n"));
static const char *kwlist[] = {"confdir", "extra_dbs", "writable", NULL}; static const char *kwlist[] = {"confdir", "extra_dbs", "writable", NULL};
PyObject *extradbs = 0; PyObject *extradbs = 0;
char *confdir = 0; char *confdir = 0;
@ -1459,6 +1495,8 @@ Db_init(recoll_DbObject *self, PyObject *args, PyObject *kwargs)
} else { } else {
rclconfig = recollinit(0, 0, reason, 0); rclconfig = recollinit(0, 0, reason, 0);
} }
LOGDEB(("Db_init\n"));
if (rclconfig == 0) { if (rclconfig == 0) {
PyErr_SetString(PyExc_EnvironmentError, reason.c_str()); PyErr_SetString(PyExc_EnvironmentError, reason.c_str());
return -1; return -1;
@ -1786,7 +1824,6 @@ static PyTypeObject recoll_DbType = {
static PyObject * static PyObject *
recoll_connect(PyObject *self, PyObject *args, PyObject *kwargs) recoll_connect(PyObject *self, PyObject *args, PyObject *kwargs)
{ {
LOGDEB(("recoll_connect\n"));
recoll_DbObject *db = (recoll_DbObject *) recoll_DbObject *db = (recoll_DbObject *)
PyObject_Call((PyObject *)&recoll_DbType, args, kwargs); PyObject_Call((PyObject *)&recoll_DbType, args, kwargs);
return (PyObject *)db; return (PyObject *)db;
@ -1820,21 +1857,13 @@ PyDoc_STRVAR(pyrecoll_doc_string,
PyMODINIT_FUNC PyMODINIT_FUNC
initrecoll(void) initrecoll(void)
{ {
string reason; // Note: we can't call recollinit here, because the confdir is only really
rclconfig = recollinit(0, 0, reason, 0); // known when the first db object is created (it is an optional parameter).
if (rclconfig == 0) { // Using a default here may end up with variables such as stripchars being
PyErr_SetString(PyExc_EnvironmentError, reason.c_str()); // wrong
return;
}
if (!rclconfig->ok()) {
PyErr_SetString(PyExc_EnvironmentError,
"Recoll init error: bad environment ?");
return;
}
PyObject* m; PyObject* m;
m = Py_InitModule3("recoll", recollMethods, m = Py_InitModule3("recoll", recollMethods, "Recoll extension module.");
"Recoll extension module.");
if (PyType_Ready(&recoll_DbType) < 0) if (PyType_Ready(&recoll_DbType) < 0)
return; return;

View File

@ -3,7 +3,7 @@
Not good for anything except showing/trying the code.""" Not good for anything except showing/trying the code."""
import sys import sys
import recoll from recoll import recoll
def dotest(db, q): def dotest(db, q):
query = db.query() query = db.query()
@ -11,25 +11,32 @@ def dotest(db, q):
nres = query.executesd(q) nres = query.executesd(q)
print "Result count: ", nres print "Result count: ", nres
print "Query: ", query.getxquery().encode('utf-8')
sys.exit(0)
if nres > 10: if nres > 10:
nres = 10 nres = 10
for i in range(nres): for i in range(nres):
doc = query.fetchone() doc = query.fetchone()
print query.next if type(query.next) == int else query.rownumber print query.next if type(query.next) == int else query.rownumber
for k in ("url", "mtime", "title", "author", "abstract"): for k in ("url", "mtime", "title", "author", "abstract"):
print k, ":", getattr(doc, k).encode('utf-8') if getattr(doc, k):
print k, ":", getattr(doc, k).encode('utf-8')
else:
print k, ": None"
#abs = db.makeDocAbstract(doc, query).encode('utf-8') #abs = db.makeDocAbstract(doc, query).encode('utf-8')
#print abs #print abs
print print
# End dotest # End dotest
sd = recoll.SearchData()
sd.addclause("and", "essaouira maroc")
#sd.addclause("and", "dockes", field="author") #sd.addclause("and", "dockes", field="author")
#sd.addclause("phrase", "jean francois", 1) #sd.addclause("phrase", "jean francois", 1)
#sd.addclause("excl", "plage") #sd.addclause("excl", "plage")
db = recoll.connect() db = recoll.connect(confdir="/home/dockes/.recoll-prod")
sd = recoll.SearchData(stemlang="english")
sd.addclause("and", "dockes", stemming = False, casesens = False, diacsens = True)
dotest(db, sd) dotest(db, sd)
sys.exit(0) sys.exit(0)