added termMatch method to the Python API

This commit is contained in:
Jean-Francois Dockes 2013-09-16 19:05:11 +02:00
parent ebf9e8bfad
commit a15de0b9df
3 changed files with 95 additions and 1 deletions

View File

@ -3896,6 +3896,20 @@ or
terms are shown around the keyword.</listitem>
</varlistentry>
<varlistentry>
<term>Db.termMatch(match_type, expr, field='',
maxlen=-1, casesens=False, diacsens=False, lang='english')
</term>
<listitem>Expand an expression against the
index term list. Performs the basic function from the
GUI term explorer tool. <literal>match_type</literal>
can be either
of <literal>wildcard</literal>, <literal>regexp</literal>
or <literal>stem</literal>. Returns a list of terms
expanded from the input expression.
</listitem>
</varlistentry>
</variablelist>
</sect5>

View File

@ -1628,6 +1628,83 @@ Db_makeDocAbstract(recoll_DbObject* self, PyObject *args)
"UTF-8", "replace");
}
PyDoc_STRVAR(doc_Db_termMatch,
"termMatch(match_type='wildcard|regexp|stem', expr, field='', "
"maxlen=-1, casesens=False, diacsens=False, lang='english')"
" returns the expanded term list\n"
"\n"
"Expands the input expression according to the mode and parameters and "
"returns the expanded term list.\n"
);
static PyObject *
Db_termMatch(recoll_DbObject* self, PyObject *args, PyObject *kwargs)
{
LOGDEB(("Db_termMatch\n"));
static const char *kwlist[] = {"type", "expr", "field", "maxlen",
"casesens", "diacsens", "lang", NULL};
char *tp = 0;
char *expr = 0; // needs freeing
char *field = 0; // needs freeing
int maxlen = -1;
PyObject *casesens = 0;
PyObject *diacsens = 0;
char *lang = 0; // needs freeing
PyObject *ret = 0;
int typ_sens = 0;
Rcl::TermMatchResult result;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "ses|esiOOes",
(char**)kwlist,
&tp, "utf-8", &expr, "utf-8", &field,
&maxlen, &casesens,
&diacsens, "utf-8", &lang))
return 0;
if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
LOGERR(("Db_termMatch: db not found %p\n", self->db));
PyErr_SetString(PyExc_AttributeError, "db");
goto out;
}
if (!strcasecmp(tp, "wildcard")) {
typ_sens = Rcl::Db::ET_WILD;
} else if (!strcasecmp(tp, "regexp")) {
typ_sens = Rcl::Db::ET_REGEXP;
} else if (!strcasecmp(tp, "stem")) {
typ_sens = Rcl::Db::ET_STEM;
} else {
PyErr_SetString(PyExc_AttributeError, "Bad type arg");
goto out;
}
if (casesens != 0 && PyObject_IsTrue(casesens)) {
typ_sens |= Rcl::Db::ET_CASESENS;
}
if (diacsens != 0 && PyObject_IsTrue(diacsens)) {
typ_sens |= Rcl::Db::ET_DIACSENS;
}
if (!self->db->termMatch(typ_sens, lang ? lang : "english",
expr, result, maxlen, field ? field : "")) {
LOGERR(("Db_termMatch: db termMatch error\n"));
PyErr_SetString(PyExc_AttributeError, "rcldb termMatch error");
goto out;
}
ret = PyList_New(result.entries.size());
for (unsigned int i = 0; i < result.entries.size(); i++) {
PyList_SetItem(ret, i,
PyUnicode_FromString(
Rcl::strip_prefix(result.entries[i].term).c_str()));
}
out:
PyMem_Free(expr);
PyMem_Free(field);
PyMem_Free(lang);
return ret;
}
static PyObject *
Db_needUpdate(recoll_DbObject* self, PyObject *args, PyObject *kwds)
{
@ -1738,6 +1815,9 @@ static PyMethodDef Db_methods[] = {
"Build and return 'keyword-in-context' abstract for document\n"
"and query."
},
{"termMatch", (PyCFunction)Db_termMatch, METH_VARARGS|METH_KEYWORDS,
doc_Db_termMatch
},
{"needUpdate", (PyCFunction)Db_needUpdate, METH_VARARGS,
"needUpdate(udi, sig) -> Bool.\n"
"Check if the index is up to date for the document defined by udi,\n"

View File

@ -56,7 +56,7 @@ logfilename = stderr
# chdir to some garbage bin. 3 possible values:
# - (literal) tmp : go to temp dir as set by env (RECOLL_TMPDIR else
# TMPDIR else /tmp)
# - Empty: stay were started
# - Empty: stay where started
# - Absolute path value: go there.
idxrundir = tmp