From a15de0b9dfc9ddb9aa855450840844baee43d35f Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Mon, 16 Sep 2013 19:05:11 +0200 Subject: [PATCH] added termMatch method to the Python API --- src/doc/user/usermanual.sgml | 14 ++++++ src/python/recoll/pyrecoll.cpp | 80 ++++++++++++++++++++++++++++++++++ src/sampleconf/recoll.conf.in | 2 +- 3 files changed, 95 insertions(+), 1 deletion(-) diff --git a/src/doc/user/usermanual.sgml b/src/doc/user/usermanual.sgml index 95be92db..0b7fb187 100644 --- a/src/doc/user/usermanual.sgml +++ b/src/doc/user/usermanual.sgml @@ -3896,6 +3896,20 @@ or terms are shown around the keyword. + + Db.termMatch(match_type, expr, field='', + maxlen=-1, casesens=False, diacsens=False, lang='english') + + Expand an expression against the + index term list. Performs the basic function from the + GUI term explorer tool. match_type + can be either + of wildcard, regexp + or stem. Returns a list of terms + expanded from the input expression. + + + diff --git a/src/python/recoll/pyrecoll.cpp b/src/python/recoll/pyrecoll.cpp index ac4cbf43..398297a3 100644 --- a/src/python/recoll/pyrecoll.cpp +++ b/src/python/recoll/pyrecoll.cpp @@ -1628,6 +1628,83 @@ Db_makeDocAbstract(recoll_DbObject* self, PyObject *args) "UTF-8", "replace"); } +PyDoc_STRVAR(doc_Db_termMatch, + "termMatch(match_type='wildcard|regexp|stem', expr, field='', " + "maxlen=-1, casesens=False, diacsens=False, lang='english')" + " returns the expanded term list\n" +"\n" +"Expands the input expression according to the mode and parameters and " +"returns the expanded term list.\n" +); +static PyObject * +Db_termMatch(recoll_DbObject* self, PyObject *args, PyObject *kwargs) +{ + LOGDEB(("Db_termMatch\n")); + static const char *kwlist[] = {"type", "expr", "field", "maxlen", + "casesens", "diacsens", "lang", NULL}; + char *tp = 0; + char *expr = 0; // needs freeing + char *field = 0; // needs freeing + int maxlen = -1; + PyObject *casesens = 0; + PyObject *diacsens = 0; + char *lang = 0; // needs freeing + + PyObject *ret = 0; + int typ_sens = 0; + Rcl::TermMatchResult result; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "ses|esiOOes", + (char**)kwlist, + &tp, "utf-8", &expr, "utf-8", &field, + &maxlen, &casesens, + &diacsens, "utf-8", &lang)) + return 0; + + if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) { + LOGERR(("Db_termMatch: db not found %p\n", self->db)); + PyErr_SetString(PyExc_AttributeError, "db"); + goto out; + } + + if (!strcasecmp(tp, "wildcard")) { + typ_sens = Rcl::Db::ET_WILD; + } else if (!strcasecmp(tp, "regexp")) { + typ_sens = Rcl::Db::ET_REGEXP; + } else if (!strcasecmp(tp, "stem")) { + typ_sens = Rcl::Db::ET_STEM; + } else { + PyErr_SetString(PyExc_AttributeError, "Bad type arg"); + goto out; + } + + if (casesens != 0 && PyObject_IsTrue(casesens)) { + typ_sens |= Rcl::Db::ET_CASESENS; + } + if (diacsens != 0 && PyObject_IsTrue(diacsens)) { + typ_sens |= Rcl::Db::ET_DIACSENS; + } + + if (!self->db->termMatch(typ_sens, lang ? lang : "english", + expr, result, maxlen, field ? field : "")) { + LOGERR(("Db_termMatch: db termMatch error\n")); + PyErr_SetString(PyExc_AttributeError, "rcldb termMatch error"); + goto out; + } + ret = PyList_New(result.entries.size()); + for (unsigned int i = 0; i < result.entries.size(); i++) { + PyList_SetItem(ret, i, + PyUnicode_FromString( + Rcl::strip_prefix(result.entries[i].term).c_str())); + } + +out: + PyMem_Free(expr); + PyMem_Free(field); + PyMem_Free(lang); + return ret; +} + static PyObject * Db_needUpdate(recoll_DbObject* self, PyObject *args, PyObject *kwds) { @@ -1738,6 +1815,9 @@ static PyMethodDef Db_methods[] = { "Build and return 'keyword-in-context' abstract for document\n" "and query." }, + {"termMatch", (PyCFunction)Db_termMatch, METH_VARARGS|METH_KEYWORDS, + doc_Db_termMatch + }, {"needUpdate", (PyCFunction)Db_needUpdate, METH_VARARGS, "needUpdate(udi, sig) -> Bool.\n" "Check if the index is up to date for the document defined by udi,\n" diff --git a/src/sampleconf/recoll.conf.in b/src/sampleconf/recoll.conf.in index 44052b9f..da4526c7 100644 --- a/src/sampleconf/recoll.conf.in +++ b/src/sampleconf/recoll.conf.in @@ -56,7 +56,7 @@ logfilename = stderr # chdir to some garbage bin. 3 possible values: # - (literal) tmp : go to temp dir as set by env (RECOLL_TMPDIR else # TMPDIR else /tmp) -# - Empty: stay were started +# - Empty: stay where started # - Absolute path value: go there. idxrundir = tmp