Merge branch 'resultstore'
This commit is contained in:
commit
1fc5e0db1d
@ -156,6 +156,8 @@ query/filtseq.cpp \
|
|||||||
query/filtseq.h \
|
query/filtseq.h \
|
||||||
query/plaintorich.cpp \
|
query/plaintorich.cpp \
|
||||||
query/plaintorich.h \
|
query/plaintorich.h \
|
||||||
|
query/qresultstore.cpp \
|
||||||
|
query/qresultstore.h \
|
||||||
query/recollq.cpp \
|
query/recollq.cpp \
|
||||||
query/recollq.h \
|
query/recollq.h \
|
||||||
query/reslistpager.cpp \
|
query/reslistpager.cpp \
|
||||||
|
|||||||
@ -327,11 +327,12 @@ Doc_init(recoll_DocObject *self, PyObject *, PyObject *)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(doc_Doc_getbinurl,
|
PyDoc_STRVAR(
|
||||||
"getbinurl(none) -> binary url\n"
|
doc_Doc_getbinurl,
|
||||||
"\n"
|
"getbinurl(none) -> binary url\n"
|
||||||
"Returns an URL with a path part which is a as bit for bit copy of the \n"
|
"\n"
|
||||||
"file system path, without encoding\n"
|
"Returns an URL with a path part which is a as bit for bit copy of the \n"
|
||||||
|
"file system path, without encoding\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
@ -339,17 +340,18 @@ Doc_getbinurl(recoll_DocObject *self)
|
|||||||
{
|
{
|
||||||
LOGDEB0("Doc_getbinurl\n");
|
LOGDEB0("Doc_getbinurl\n");
|
||||||
if (self->doc == 0) {
|
if (self->doc == 0) {
|
||||||
PyErr_SetString(PyExc_AttributeError, "doc");
|
PyErr_SetString(PyExc_AttributeError, "doc is NULL");
|
||||||
return 0;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
return PyBytes_FromStringAndSize(self->doc->url.c_str(),
|
return PyBytes_FromStringAndSize(self->doc->url.c_str(),
|
||||||
self->doc->url.size());
|
self->doc->url.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(doc_Doc_setbinurl,
|
PyDoc_STRVAR(
|
||||||
"setbinurl(url) -> binary url\n"
|
doc_Doc_setbinurl,
|
||||||
"\n"
|
"setbinurl(url) -> binary url\n"
|
||||||
"Set the URL from binary path like file://may/contain/unencodable/bytes\n"
|
"\n"
|
||||||
|
"Set the URL from binary path like file://may/contain/unencodable/bytes\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
@ -367,6 +369,8 @@ Doc_setbinurl(recoll_DocObject *self, PyObject *value)
|
|||||||
|
|
||||||
self->doc->url = string(PyByteArray_AsString(value),
|
self->doc->url = string(PyByteArray_AsString(value),
|
||||||
PyByteArray_Size(value));
|
PyByteArray_Size(value));
|
||||||
|
printableUrl(self->rclconfig->getDefCharset(), self->doc->url,
|
||||||
|
self->doc->meta[Rcl::Doc::keyurl]);
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -387,7 +391,7 @@ Doc_keys(recoll_DocObject *self)
|
|||||||
return 0;
|
return 0;
|
||||||
for (const auto& entry : self->doc->meta) {
|
for (const auto& entry : self->doc->meta) {
|
||||||
PyList_Append(pkeys,
|
PyList_Append(pkeys,
|
||||||
PyUnicode_Decode(entry.first.c_str(),entry.first.size(),
|
PyUnicode_Decode(entry.first.c_str(), entry.first.size(),
|
||||||
"UTF-8", "replace"));
|
"UTF-8", "replace"));
|
||||||
}
|
}
|
||||||
return pkeys;
|
return pkeys;
|
||||||
@ -537,6 +541,23 @@ static PyMethodDef Doc_methods[] = {
|
|||||||
{NULL} /* Sentinel */
|
{NULL} /* Sentinel */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
int pys2cpps(PyObject *pyval, std::string& out)
|
||||||
|
{
|
||||||
|
if (PyUnicode_Check(pyval)) {
|
||||||
|
PyObject* utf8o = PyUnicode_AsUTF8String(pyval);
|
||||||
|
if (utf8o == 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
out = PyBytes_AsString(utf8o);
|
||||||
|
Py_DECREF(utf8o);
|
||||||
|
} else if (PyBytes_Check(pyval)) {
|
||||||
|
out = PyBytes_AsString(pyval);
|
||||||
|
} else {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
// Note that this returns None if the attribute is not found instead of raising
|
// Note that this returns None if the attribute is not found instead of raising
|
||||||
// an exception as would be standard. We don't change it to keep existing code
|
// an exception as would be standard. We don't change it to keep existing code
|
||||||
// working.
|
// working.
|
||||||
@ -560,18 +581,7 @@ Doc_getattro(recoll_DocObject *self, PyObject *nameobj)
|
|||||||
PyErr_Clear();
|
PyErr_Clear();
|
||||||
|
|
||||||
string name;
|
string name;
|
||||||
if (PyUnicode_Check(nameobj)) {
|
if (pys2cpps(nameobj, name) < 0) {
|
||||||
PyObject* utf8o = PyUnicode_AsUTF8String(nameobj);
|
|
||||||
if (utf8o == 0) {
|
|
||||||
LOGERR("Doc_getattro: encoding name to utf8 failed\n");
|
|
||||||
PyErr_SetString(PyExc_AttributeError, "name??");
|
|
||||||
Py_RETURN_NONE;
|
|
||||||
}
|
|
||||||
name = PyBytes_AsString(utf8o);
|
|
||||||
Py_DECREF(utf8o);
|
|
||||||
} else if (PyBytes_Check(nameobj)) {
|
|
||||||
name = PyBytes_AsString(nameobj);
|
|
||||||
} else {
|
|
||||||
PyErr_SetString(PyExc_AttributeError, "name not unicode nor string??");
|
PyErr_SetString(PyExc_AttributeError, "name not unicode nor string??");
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
@ -588,7 +598,7 @@ Doc_getattro(recoll_DocObject *self, PyObject *nameobj)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
Doc_setattr(recoll_DocObject *self, char *name, PyObject *value)
|
Doc_setattro(recoll_DocObject *self, PyObject *nameobj, PyObject *value)
|
||||||
{
|
{
|
||||||
if (self->doc == 0) {
|
if (self->doc == 0) {
|
||||||
PyErr_SetString(PyExc_AttributeError, "doc??");
|
PyErr_SetString(PyExc_AttributeError, "doc??");
|
||||||
@ -599,84 +609,78 @@ Doc_setattr(recoll_DocObject *self, char *name, PyObject *value)
|
|||||||
"Configuration not initialized");
|
"Configuration not initialized");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (name == 0) {
|
string name;
|
||||||
PyErr_SetString(PyExc_AttributeError, "name??");
|
if (pys2cpps(nameobj, name) < 0) {
|
||||||
|
PyErr_SetString(PyExc_AttributeError, "name not unicode nor string??");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PyBytes_Check(value)) {
|
string uvalue;
|
||||||
value = PyUnicode_FromEncodedObject(value, "UTF-8", "strict");
|
if (pys2cpps(value, uvalue) < 0) {
|
||||||
if (value == 0)
|
PyErr_SetString(PyExc_AttributeError, "value neither bytes nor str");
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!PyUnicode_Check(value)) {
|
|
||||||
PyErr_SetString(PyExc_AttributeError, "value not unicode??");
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject* putf8 = PyUnicode_AsUTF8String(value);
|
|
||||||
if (putf8 == 0) {
|
|
||||||
LOGERR("Doc_setmeta: encoding to utf8 failed\n");
|
|
||||||
PyErr_SetString(PyExc_AttributeError, "value??");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
string uvalue = PyBytes_AsString(putf8);
|
|
||||||
Py_DECREF(putf8);
|
|
||||||
string key = self->rclconfig->fieldQCanon(name);
|
string key = self->rclconfig->fieldQCanon(name);
|
||||||
|
|
||||||
LOGDEB0("Doc_setattr: doc " << self->doc << " [" << key << "] (" << name <<
|
LOGDEB0("Doc_setattr: doc " << self->doc << " [" << key << "] (" << name <<
|
||||||
") -> [" << uvalue << "]\n");
|
") -> [" << uvalue << "]\n");
|
||||||
|
|
||||||
// We set the value in the meta array in all cases. Good idea ? or do it
|
// Note that some attributes are set both as struct fields and
|
||||||
// only for fields without a dedicated Doc:: entry?
|
// meta members, keep compat with movedocfields() used when
|
||||||
self->doc->meta[key] = uvalue;
|
// fetching from query.
|
||||||
switch (key.at(0)) {
|
switch (key.at(0)) {
|
||||||
case 't':
|
case 't':
|
||||||
if (!key.compare("text")) {
|
if (key == "text") {
|
||||||
self->doc->text.swap(uvalue);
|
self->doc->text.swap(uvalue);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 'u':
|
case 'u':
|
||||||
if (!key.compare(Rcl::Doc::keyurl)) {
|
if (key == Rcl::Doc::keyurl) {
|
||||||
self->doc->url.swap(uvalue);
|
self->doc->url.swap(uvalue);
|
||||||
|
printableUrl(self->rclconfig->getDefCharset(), self->doc->url,
|
||||||
|
self->doc->meta[Rcl::Doc::keyurl]);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 'f':
|
case 'f':
|
||||||
if (!key.compare(Rcl::Doc::keyfs)) {
|
if (key == Rcl::Doc::keyfs) {
|
||||||
self->doc->fbytes.swap(uvalue);
|
self->doc->fbytes.swap(uvalue);
|
||||||
} else if (!key.compare(Rcl::Doc::keyfmt)) {
|
self->doc->meta[Rcl::Doc::keyfs] = self->doc->fbytes;
|
||||||
|
} else if (key == Rcl::Doc::keyfmt) {
|
||||||
self->doc->fmtime.swap(uvalue);
|
self->doc->fmtime.swap(uvalue);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 'd':
|
case 'd':
|
||||||
if (!key.compare(Rcl::Doc::keyds)) {
|
if (key == Rcl::Doc::keyds) {
|
||||||
self->doc->dbytes.swap(uvalue);
|
self->doc->dbytes.swap(uvalue);
|
||||||
} else if (!key.compare(Rcl::Doc::keydmt)) {
|
self->doc->meta[Rcl::Doc::keyds] = self->doc->dbytes;
|
||||||
|
} else if (key == Rcl::Doc::keydmt) {
|
||||||
self->doc->dmtime.swap(uvalue);
|
self->doc->dmtime.swap(uvalue);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 'i':
|
case 'i':
|
||||||
if (!key.compare(Rcl::Doc::keyipt)) {
|
if (key == Rcl::Doc::keyipt) {
|
||||||
self->doc->ipath.swap(uvalue);
|
self->doc->ipath.swap(uvalue);
|
||||||
|
self->doc->meta[Rcl::Doc::keyipt] = self->doc->ipath;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 'm':
|
case 'm':
|
||||||
if (!key.compare(Rcl::Doc::keytp)) {
|
if (key == Rcl::Doc::keytp) {
|
||||||
self->doc->mimetype.swap(uvalue);
|
self->doc->mimetype.swap(uvalue);
|
||||||
} else if (!key.compare(Rcl::Doc::keymt)) {
|
self->doc->meta[Rcl::Doc::keytp] = self->doc->mimetype;
|
||||||
|
} else if (key == Rcl::Doc::keymt) {
|
||||||
self->doc->dmtime.swap(uvalue);
|
self->doc->dmtime.swap(uvalue);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 'o':
|
case 'o':
|
||||||
if (!key.compare(Rcl::Doc::keyoc)) {
|
if (key == Rcl::Doc::keyoc) {
|
||||||
self->doc->origcharset.swap(uvalue);
|
self->doc->origcharset.swap(uvalue);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 's':
|
case 's':
|
||||||
if (!key.compare(Rcl::Doc::keysig)) {
|
if (key == Rcl::Doc::keysig) {
|
||||||
self->doc->sig.swap(uvalue);
|
self->doc->sig.swap(uvalue);
|
||||||
} else if (!key.compare(Rcl::Doc::keysz)) {
|
} else if (key == Rcl::Doc::keysz) {
|
||||||
self->doc->dbytes.swap(uvalue);
|
self->doc->dbytes.swap(uvalue);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -697,6 +701,7 @@ Doc_length(recoll_DocObject *self)
|
|||||||
static PyObject *
|
static PyObject *
|
||||||
Doc_subscript(recoll_DocObject *self, PyObject *key)
|
Doc_subscript(recoll_DocObject *self, PyObject *key)
|
||||||
{
|
{
|
||||||
|
// Can't just return getattro because this first checks for a method name
|
||||||
if (self->doc == 0) {
|
if (self->doc == 0) {
|
||||||
PyErr_SetString(PyExc_AttributeError, "doc??");
|
PyErr_SetString(PyExc_AttributeError, "doc??");
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -707,18 +712,7 @@ Doc_subscript(recoll_DocObject *self, PyObject *key)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
string name;
|
string name;
|
||||||
if (PyUnicode_Check(key)) {
|
if (pys2cpps(key, name) < 0) {
|
||||||
PyObject* utf8o = PyUnicode_AsUTF8String(key);
|
|
||||||
if (utf8o == 0) {
|
|
||||||
LOGERR("Doc_getitemo: encoding name to utf8 failed\n");
|
|
||||||
PyErr_SetString(PyExc_AttributeError, "name??");
|
|
||||||
Py_RETURN_NONE;
|
|
||||||
}
|
|
||||||
name = PyBytes_AsString(utf8o);
|
|
||||||
Py_DECREF(utf8o);
|
|
||||||
} else if (PyBytes_Check(key)) {
|
|
||||||
name = PyBytes_AsString(key);
|
|
||||||
} else {
|
|
||||||
PyErr_SetString(PyExc_AttributeError, "key not unicode nor string??");
|
PyErr_SetString(PyExc_AttributeError, "key not unicode nor string??");
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
@ -726,54 +720,61 @@ Doc_subscript(recoll_DocObject *self, PyObject *key)
|
|||||||
string skey = self->rclconfig->fieldQCanon(name);
|
string skey = self->rclconfig->fieldQCanon(name);
|
||||||
string value;
|
string value;
|
||||||
if (idocget(self, skey, value)) {
|
if (idocget(self, skey, value)) {
|
||||||
return PyUnicode_Decode(value.c_str(), value.size(), "UTF-8","replace");
|
return PyUnicode_Decode(value.c_str(), value.size(),
|
||||||
|
"UTF-8", "backslashreplace");
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
Doc_ass_subscript(recoll_DocObject *self, PyObject *key, PyObject *val)
|
||||||
|
{
|
||||||
|
return Doc_setattro(self, key, val);
|
||||||
|
}
|
||||||
|
|
||||||
static PyMappingMethods doc_as_mapping = {
|
static PyMappingMethods doc_as_mapping = {
|
||||||
(lenfunc)Doc_length, /*mp_length*/
|
(lenfunc)Doc_length, /*mp_length*/
|
||||||
(binaryfunc)Doc_subscript, /*mp_subscript*/
|
(binaryfunc)Doc_subscript, /*mp_subscript*/
|
||||||
(objobjargproc)0, /*mp_ass_subscript*/
|
(objobjargproc)Doc_ass_subscript, /*mp_ass_subscript*/
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
PyDoc_STRVAR(doc_DocObject,
|
PyDoc_STRVAR(
|
||||||
"Doc()\n"
|
doc_DocObject,
|
||||||
"\n"
|
"Doc()\n"
|
||||||
"A Doc object contains index data for a given document.\n"
|
"\n"
|
||||||
"The data is extracted from the index when searching, or set by the\n"
|
"A Doc object contains index data for a given document.\n"
|
||||||
"indexer program when updating. The Doc object has no useful methods but\n"
|
"The data is extracted from the index when searching, or set by the\n"
|
||||||
"many attributes to be read or set by its user. It matches exactly the\n"
|
"indexer program when updating. The Doc object has no useful methods but\n"
|
||||||
"Rcl::Doc c++ object. Some of the attributes are predefined, but, \n"
|
"many attributes to be read or set by its user. It matches exactly the\n"
|
||||||
"especially when indexing, others can be set, the name of which will be\n"
|
"Rcl::Doc c++ object. Some of the attributes are predefined, but, \n"
|
||||||
"processed as field names by the indexing configuration.\n"
|
"especially when indexing, others can be set, the name of which will be\n"
|
||||||
"Inputs can be specified as unicode or strings.\n"
|
"processed as field names by the indexing configuration.\n"
|
||||||
"Outputs are unicode objects.\n"
|
"Inputs can be specified as unicode or strings.\n"
|
||||||
"All dates are specified as unix timestamps, printed as strings\n"
|
"Outputs are unicode objects.\n"
|
||||||
"Predefined attributes (index/query/both):\n"
|
"All dates are specified as unix timestamps, printed as strings\n"
|
||||||
" text (index): document plain text\n"
|
"Predefined attributes (index/query/both):\n"
|
||||||
" url (both)\n"
|
" text (index): document plain text\n"
|
||||||
" fbytes (both) optional) file size in bytes\n"
|
" url (both)\n"
|
||||||
" filename (both)\n"
|
" fbytes (both) optional) file size in bytes\n"
|
||||||
" fmtime (both) optional file modification date. Unix time printed \n"
|
" filename (both)\n"
|
||||||
" as string\n"
|
" fmtime (both) optional file modification date. Unix time printed \n"
|
||||||
" dbytes (both) document text bytes\n"
|
" as string\n"
|
||||||
" dmtime (both) document creation/modification date\n"
|
" dbytes (both) document text bytes\n"
|
||||||
" ipath (both) value private to the app.: internal access path\n"
|
" dmtime (both) document creation/modification date\n"
|
||||||
" inside file\n"
|
" ipath (both) value private to the app.: internal access path\n"
|
||||||
" mtype (both) mime type for original document\n"
|
" inside file\n"
|
||||||
" mtime (query) dmtime if set else fmtime\n"
|
" mtype (both) mime type for original document\n"
|
||||||
" origcharset (both) charset the text was converted from\n"
|
" mtime (query) dmtime if set else fmtime\n"
|
||||||
" size (query) dbytes if set, else fbytes\n"
|
" origcharset (both) charset the text was converted from\n"
|
||||||
" sig (both) app-defined file modification signature. \n"
|
" size (query) dbytes if set, else fbytes\n"
|
||||||
" For up to date checks\n"
|
" sig (both) app-defined file modification signature. \n"
|
||||||
" relevancyrating (query)\n"
|
" For up to date checks\n"
|
||||||
" abstract (both)\n"
|
" relevancyrating (query)\n"
|
||||||
" author (both)\n"
|
" abstract (both)\n"
|
||||||
" title (both)\n"
|
" author (both)\n"
|
||||||
" keywords (both)\n"
|
" title (both)\n"
|
||||||
|
" keywords (both)\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
PyTypeObject recoll_DocType = {
|
PyTypeObject recoll_DocType = {
|
||||||
@ -784,7 +785,7 @@ PyTypeObject recoll_DocType = {
|
|||||||
(destructor)Doc_dealloc, /*tp_dealloc*/
|
(destructor)Doc_dealloc, /*tp_dealloc*/
|
||||||
0, /*tp_print*/
|
0, /*tp_print*/
|
||||||
0, /*tp_getattr*/
|
0, /*tp_getattr*/
|
||||||
(setattrfunc)Doc_setattr, /*tp_setattr*/
|
0, /*tp_setattr*/
|
||||||
0, /*tp_compare*/
|
0, /*tp_compare*/
|
||||||
0, /*tp_repr*/
|
0, /*tp_repr*/
|
||||||
0, /*tp_as_number*/
|
0, /*tp_as_number*/
|
||||||
@ -794,7 +795,7 @@ PyTypeObject recoll_DocType = {
|
|||||||
0, /*tp_call*/
|
0, /*tp_call*/
|
||||||
0, /*tp_str*/
|
0, /*tp_str*/
|
||||||
(getattrofunc)Doc_getattro,/*tp_getattro*/
|
(getattrofunc)Doc_getattro,/*tp_getattro*/
|
||||||
0, /*tp_setattro*/
|
(setattrofunc)Doc_setattro,/*tp_setattro*/
|
||||||
0, /*tp_as_buffer*/
|
0, /*tp_as_buffer*/
|
||||||
Py_TPFLAGS_DEFAULT, /*tp_flags*/
|
Py_TPFLAGS_DEFAULT, /*tp_flags*/
|
||||||
doc_DocObject, /* tp_doc */
|
doc_DocObject, /* tp_doc */
|
||||||
@ -828,19 +829,6 @@ typedef struct recoll_DbObject {
|
|||||||
std::shared_ptr<RclConfig> rclconfig;
|
std::shared_ptr<RclConfig> rclconfig;
|
||||||
} recoll_DbObject;
|
} recoll_DbObject;
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
PyObject_HEAD
|
|
||||||
/* Type-specific fields go here. */
|
|
||||||
Rcl::Query *query;
|
|
||||||
int next; // Index of result to be fetched next or -1 if uninit
|
|
||||||
int rowcount; // Number of records returned by last execute
|
|
||||||
string *sortfield; // Need to allocate in here, main program is C.
|
|
||||||
int ascending;
|
|
||||||
int arraysize; // Default size for fetchmany
|
|
||||||
recoll_DbObject* connection;
|
|
||||||
bool fetchtext;
|
|
||||||
} recoll_QueryObject;
|
|
||||||
|
|
||||||
PyDoc_STRVAR(doc_Query_close,
|
PyDoc_STRVAR(doc_Query_close,
|
||||||
"close(). Deallocate query. Object is unusable after the call."
|
"close(). Deallocate query. Object is unusable after the call."
|
||||||
);
|
);
|
||||||
@ -1521,7 +1509,7 @@ PyDoc_STRVAR(doc_QueryObject,
|
|||||||
"Recoll Query objects are used to execute index searches. \n"
|
"Recoll Query objects are used to execute index searches. \n"
|
||||||
"They must be created by the Db.query() method.\n"
|
"They must be created by the Db.query() method.\n"
|
||||||
);
|
);
|
||||||
static PyTypeObject recoll_QueryType = {
|
PyTypeObject recoll_QueryType = {
|
||||||
PyVarObject_HEAD_INIT(NULL, 0)
|
PyVarObject_HEAD_INIT(NULL, 0)
|
||||||
"_recoll.Query", /*tp_name*/
|
"_recoll.Query", /*tp_name*/
|
||||||
sizeof(recoll_QueryObject), /*tp_basicsize*/
|
sizeof(recoll_QueryObject), /*tp_basicsize*/
|
||||||
@ -2195,6 +2183,17 @@ PyInit__recoll(void)
|
|||||||
Py_INCREF(&rclx_ExtractorType);
|
Py_INCREF(&rclx_ExtractorType);
|
||||||
PyModule_AddObject(module, "Extractor", (PyObject *)&rclx_ExtractorType);
|
PyModule_AddObject(module, "Extractor", (PyObject *)&rclx_ExtractorType);
|
||||||
|
|
||||||
|
if (PyType_Ready(&recoll_QResultStoreType) < 0)
|
||||||
|
INITERROR;
|
||||||
|
Py_INCREF(&recoll_QResultStoreType);
|
||||||
|
PyModule_AddObject(module, "QResultStore", (PyObject *)&recoll_QResultStoreType);
|
||||||
|
|
||||||
|
if (PyType_Ready(&recoll_QRSDocType) < 0)
|
||||||
|
INITERROR;
|
||||||
|
Py_INCREF((PyObject*)&recoll_QRSDocType);
|
||||||
|
PyModule_AddObject(module, "QRSDoc",
|
||||||
|
(PyObject *)&recoll_QRSDocType);
|
||||||
|
|
||||||
#if PY_MAJOR_VERSION >= 3
|
#if PY_MAJOR_VERSION >= 3
|
||||||
return module;
|
return module;
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -22,8 +22,13 @@
|
|||||||
#include <Python.h>
|
#include <Python.h>
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
class RclConfig;
|
class RclConfig;
|
||||||
|
namespace Rcl {
|
||||||
|
class Doc;
|
||||||
|
class Query;
|
||||||
|
};
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
@ -33,7 +38,27 @@ typedef struct {
|
|||||||
std::shared_ptr<RclConfig> rclconfig;
|
std::shared_ptr<RclConfig> rclconfig;
|
||||||
} recoll_DocObject;
|
} recoll_DocObject;
|
||||||
|
|
||||||
extern PyTypeObject rclx_ExtractorType;
|
struct recoll_DbObject;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
PyObject_HEAD
|
||||||
|
/* Type-specific fields go here. */
|
||||||
|
Rcl::Query *query;
|
||||||
|
int next; // Index of result to be fetched next or -1 if uninit
|
||||||
|
int rowcount; // Number of records returned by last execute
|
||||||
|
std::string *sortfield; // Need to allocate in here, main program is C.
|
||||||
|
int ascending;
|
||||||
|
int arraysize; // Default size for fetchmany
|
||||||
|
recoll_DbObject* connection;
|
||||||
|
bool fetchtext;
|
||||||
|
} recoll_QueryObject;
|
||||||
|
|
||||||
extern PyTypeObject recoll_DocType;
|
extern PyTypeObject recoll_DocType;
|
||||||
|
extern PyTypeObject recoll_QueryType;
|
||||||
|
extern PyTypeObject rclx_ExtractorType;
|
||||||
|
extern PyTypeObject recoll_QResultStoreType;
|
||||||
|
extern PyTypeObject recoll_QRSDocType;
|
||||||
|
|
||||||
|
extern int pys2cpps(PyObject *pyval, std::string& out);
|
||||||
|
|
||||||
#endif // _PYRECOLL_H_INCLUDED_
|
#endif // _PYRECOLL_H_INCLUDED_
|
||||||
|
|||||||
378
src/python/recoll/pyresultstore.cpp
Normal file
378
src/python/recoll/pyresultstore.cpp
Normal file
@ -0,0 +1,378 @@
|
|||||||
|
/* Copyright (C) 2007-2020 J.F.Dockes
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc.,
|
||||||
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <Python.h>
|
||||||
|
#include <structmember.h>
|
||||||
|
#include <bytesobject.h>
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
#include <set>
|
||||||
|
|
||||||
|
#include "qresultstore.h"
|
||||||
|
|
||||||
|
#include "pyrecoll.h"
|
||||||
|
#include "log.h"
|
||||||
|
#include "rclutil.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#if PY_MAJOR_VERSION >=3
|
||||||
|
# define Py_TPFLAGS_HAVE_ITER 0
|
||||||
|
#else
|
||||||
|
#define PyLong_FromLong PyInt_FromLong
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct recoll_QRSDocObject;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
PyObject_HEAD
|
||||||
|
/* Type-specific fields go here. */
|
||||||
|
Rcl::QResultStore *store;
|
||||||
|
} recoll_QResultStoreObject;
|
||||||
|
|
||||||
|
static void
|
||||||
|
QResultStore_dealloc(recoll_QResultStoreObject *self)
|
||||||
|
{
|
||||||
|
LOGDEB1("QResultStore_dealloc.\n");
|
||||||
|
delete self->store;
|
||||||
|
Py_TYPE(self)->tp_free((PyObject*)self);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
QResultStore_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||||
|
{
|
||||||
|
LOGDEB1("QResultStore_new\n");
|
||||||
|
recoll_QResultStoreObject *self =
|
||||||
|
(recoll_QResultStoreObject *)type->tp_alloc(type, 0);
|
||||||
|
if (self == 0)
|
||||||
|
return 0;
|
||||||
|
self->store = new Rcl::QResultStore();
|
||||||
|
return (PyObject *)self;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyDoc_STRVAR(qrs_doc_QResultStoreObject,
|
||||||
|
"QResultStore()\n"
|
||||||
|
"\n"
|
||||||
|
"A QResultStore can efficiently store query result documents.\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
static int
|
||||||
|
QResultStore_init(
|
||||||
|
recoll_QResultStoreObject *self, PyObject *args, PyObject *kwargs)
|
||||||
|
{
|
||||||
|
LOGDEB("QResultStore_init\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyDoc_STRVAR(
|
||||||
|
qrs_doc_storeQuery,
|
||||||
|
"storeQuery(query, fieldspec=[], isinc=False)\n"
|
||||||
|
"\n"
|
||||||
|
"Stores the results from the input query object, possibly "
|
||||||
|
"excluding/including the specified fields.\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
QResultStore_storeQuery(recoll_QResultStoreObject* self, PyObject *args,
|
||||||
|
PyObject *kwargs)
|
||||||
|
{
|
||||||
|
LOGDEB0("QResultStore_storeQuery\n");
|
||||||
|
static const char* kwlist[] = {"query", "fieldspec", "isinc", NULL};
|
||||||
|
PyObject *q{nullptr};
|
||||||
|
PyObject *fieldspec{nullptr};
|
||||||
|
PyObject *isinco = 0;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!|OO", (char**)kwlist,
|
||||||
|
&recoll_QueryType, &q, &fieldspec, &isinco))
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
recoll_QueryObject *query = (recoll_QueryObject*)q;
|
||||||
|
if (nullptr == query->query) {
|
||||||
|
PyErr_SetString(PyExc_ValueError,
|
||||||
|
"query not initialised (null query ?)");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
bool isinc{false};
|
||||||
|
if (nullptr != isinco && PyObject_IsTrue(isinco))
|
||||||
|
isinc = true;
|
||||||
|
|
||||||
|
std::set<std::string> fldspec;
|
||||||
|
if (nullptr != fieldspec) {
|
||||||
|
// fieldspec must be either single string or list of strings
|
||||||
|
if (PyUnicode_Check(fieldspec)) {
|
||||||
|
PyObject *utf8o = PyUnicode_AsUTF8String(fieldspec);
|
||||||
|
if (nullptr == utf8o) {
|
||||||
|
PyErr_SetString(PyExc_AttributeError,
|
||||||
|
"storeQuery: can't encode field name??");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
fldspec.insert(PyBytes_AsString(utf8o));
|
||||||
|
Py_DECREF(utf8o);
|
||||||
|
} else if (PySequence_Check(fieldspec)) {
|
||||||
|
for (Py_ssize_t i = 0; i < PySequence_Size(fieldspec); i++) {
|
||||||
|
PyObject *utf8o =
|
||||||
|
PyUnicode_AsUTF8String(PySequence_GetItem(fieldspec, i));
|
||||||
|
if (nullptr == utf8o) {
|
||||||
|
PyErr_SetString(PyExc_AttributeError,
|
||||||
|
"storeQuery: can't encode field name??");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
fldspec.insert(PyBytes_AsString(utf8o));
|
||||||
|
Py_DECREF(utf8o);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
PyErr_SetString(PyExc_TypeError,
|
||||||
|
"fieldspec arg must be str or sequence of str");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self->store->storeQuery(*(query->query), fldspec, isinc);
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyDoc_STRVAR(
|
||||||
|
qrs_doc_getField,
|
||||||
|
"getField(index, fieldname)\n"
|
||||||
|
"\n"
|
||||||
|
"Retrieve tha value of field <fieldname> from result at index <index>.\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
QResultStore_getField(recoll_QResultStoreObject* self, PyObject *args)
|
||||||
|
{
|
||||||
|
int index;
|
||||||
|
const char *fieldname;
|
||||||
|
if (!PyArg_ParseTuple(args, "is", &index, &fieldname)) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
const char *result = self->store->fieldValue(index, fieldname);
|
||||||
|
if (nullptr == result) {
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
} else {
|
||||||
|
return PyBytes_FromString(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyMethodDef QResultStore_methods[] = {
|
||||||
|
{"storeQuery", (PyCFunction)QResultStore_storeQuery,
|
||||||
|
METH_VARARGS|METH_KEYWORDS, qrs_doc_storeQuery},
|
||||||
|
{"getField", (PyCFunction)QResultStore_getField,
|
||||||
|
METH_VARARGS, qrs_doc_getField},
|
||||||
|
|
||||||
|
{NULL} /* Sentinel */
|
||||||
|
};
|
||||||
|
|
||||||
|
static Py_ssize_t QResultStore_Size(PyObject *o)
|
||||||
|
{
|
||||||
|
return ((recoll_QResultStoreObject*)o)->store->getCount();
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject* QResultStore_GetItem(PyObject *o, Py_ssize_t i)
|
||||||
|
{
|
||||||
|
if (i < 0 || i >= ((recoll_QResultStoreObject*)o)->store->getCount()) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
PyObject *args = Py_BuildValue("Oi", o, i);
|
||||||
|
auto res = PyObject_CallObject((PyObject *)&recoll_QRSDocType, args);
|
||||||
|
Py_DECREF(args);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PySequenceMethods resultstore_as_sequence = {
|
||||||
|
(lenfunc)QResultStore_Size, // sq_length
|
||||||
|
(binaryfunc)0, // sq_concat
|
||||||
|
(ssizeargfunc)0, // sq_repeat
|
||||||
|
(ssizeargfunc)QResultStore_GetItem, // sq_item
|
||||||
|
0, // was sq_slice
|
||||||
|
(ssizeobjargproc)0, // sq_ass_item
|
||||||
|
0, // was sq_ass_slice
|
||||||
|
(objobjproc)0, // sq_contains
|
||||||
|
(binaryfunc)0, // sq_inplace_concat
|
||||||
|
(ssizeargfunc)0, // sq_inplace_repeat
|
||||||
|
};
|
||||||
|
|
||||||
|
PyTypeObject recoll_QResultStoreType = {
|
||||||
|
PyVarObject_HEAD_INIT(NULL, 0)
|
||||||
|
"_recoll.QResultStore", /*tp_name*/
|
||||||
|
sizeof(recoll_QResultStoreObject), /*tp_basicsize*/
|
||||||
|
0, /*tp_itemsize*/
|
||||||
|
(destructor)QResultStore_dealloc, /*tp_dealloc*/
|
||||||
|
0, /*tp_print*/
|
||||||
|
0, /*tp_getattr*/
|
||||||
|
0, /*tp_setattr*/
|
||||||
|
0, /*tp_compare*/
|
||||||
|
0, /*tp_repr*/
|
||||||
|
0, /*tp_as_number*/
|
||||||
|
&resultstore_as_sequence, /*tp_as_sequence*/
|
||||||
|
0, /*tp_as_mapping*/
|
||||||
|
0, /*tp_hash */
|
||||||
|
0, /*tp_call*/
|
||||||
|
0, /*tp_str*/
|
||||||
|
0, /*tp_getattro*/
|
||||||
|
0, /*tp_setattro*/
|
||||||
|
0, /*tp_as_buffer*/
|
||||||
|
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
||||||
|
qrs_doc_QResultStoreObject, /* tp_doc */
|
||||||
|
0, /* tp_traverse */
|
||||||
|
0, /* tp_clear */
|
||||||
|
0, /* tp_richcompare */
|
||||||
|
0, /* tp_weaklistoffset */
|
||||||
|
0, /* tp_iter */
|
||||||
|
0, /* tp_iternext */
|
||||||
|
QResultStore_methods, /* tp_methods */
|
||||||
|
0, /* tp_members */
|
||||||
|
0, /* tp_getset */
|
||||||
|
0, /* tp_base */
|
||||||
|
0, /* tp_dict */
|
||||||
|
0, /* tp_descr_get */
|
||||||
|
0, /* tp_descr_set */
|
||||||
|
0, /* tp_dictoffset */
|
||||||
|
(initproc)QResultStore_init, /* tp_init */
|
||||||
|
0, /* tp_alloc */
|
||||||
|
QResultStore_new, /* tp_new */
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
// QRSDoc iterator
|
||||||
|
typedef struct recoll_QRSDocObject {
|
||||||
|
PyObject_HEAD
|
||||||
|
/* Type-specific fields go here. */
|
||||||
|
recoll_QResultStoreObject *pystore;
|
||||||
|
int index;
|
||||||
|
} recoll_QRSDocObject;
|
||||||
|
|
||||||
|
static void
|
||||||
|
QRSDoc_dealloc(recoll_QRSDocObject *self)
|
||||||
|
{
|
||||||
|
LOGDEB1("QRSDoc_dealloc\n");
|
||||||
|
Py_DECREF(self->pystore);
|
||||||
|
Py_TYPE(self)->tp_free((PyObject*)self);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
QRSDoc_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||||
|
{
|
||||||
|
recoll_QRSDocObject *self = (recoll_QRSDocObject *)type->tp_alloc(type, 0);
|
||||||
|
if (self == 0)
|
||||||
|
return 0;
|
||||||
|
return (PyObject *)self;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyDoc_STRVAR(qrs_doc_QRSDocObject,
|
||||||
|
"QRSDoc(resultstore, index)\n"
|
||||||
|
"\n"
|
||||||
|
"A QRSDoc gives access to one result from a qresultstore.\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
static int
|
||||||
|
QRSDoc_init(
|
||||||
|
recoll_QRSDocObject *self, PyObject *args, PyObject *kwargs)
|
||||||
|
{
|
||||||
|
recoll_QResultStoreObject *pystore;
|
||||||
|
int index;
|
||||||
|
if (!PyArg_ParseTuple(args, "O!i",
|
||||||
|
&recoll_QResultStoreType, &pystore, &index)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_INCREF(pystore);
|
||||||
|
self->pystore = pystore;
|
||||||
|
self->index = index;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
QRSDoc_subscript(recoll_QRSDocObject *self, PyObject *key)
|
||||||
|
{
|
||||||
|
if (self->pystore == 0) {
|
||||||
|
PyErr_SetString(PyExc_AttributeError, "store??");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
string name;
|
||||||
|
if (pys2cpps(key, name) < 0) {
|
||||||
|
PyErr_SetString(PyExc_AttributeError, "name??");
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *value = self->pystore->store->fieldValue(self->index, name);
|
||||||
|
if (nullptr == value) {
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
}
|
||||||
|
string urlstring;
|
||||||
|
if (name == "url") {
|
||||||
|
printableUrl("UTF-8", value, urlstring);
|
||||||
|
value = urlstring.c_str();
|
||||||
|
}
|
||||||
|
PyObject *bytes = PyBytes_FromString(value);
|
||||||
|
PyObject *u =
|
||||||
|
PyUnicode_FromEncodedObject(bytes, "UTF-8", "backslashreplace");
|
||||||
|
Py_DECREF(bytes);
|
||||||
|
return u;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyMappingMethods qrsdoc_as_mapping = {
|
||||||
|
(lenfunc)0, /*mp_length*/
|
||||||
|
(binaryfunc)QRSDoc_subscript, /*mp_subscript*/
|
||||||
|
(objobjargproc)0, /*mp_ass_subscript*/
|
||||||
|
};
|
||||||
|
|
||||||
|
static PyMethodDef QRSDoc_methods[] = {
|
||||||
|
{NULL} /* Sentinel */
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
PyTypeObject recoll_QRSDocType = {
|
||||||
|
PyVarObject_HEAD_INIT(NULL, 0)
|
||||||
|
"_recoll.QRSDoc", /*tp_name*/
|
||||||
|
sizeof(recoll_QRSDocObject), /*tp_basicsize*/
|
||||||
|
0, /*tp_itemsize*/
|
||||||
|
(destructor)QRSDoc_dealloc, /*tp_dealloc*/
|
||||||
|
0, /*tp_print*/
|
||||||
|
0, /*tp_getattr*/
|
||||||
|
0, /*tp_setattr*/
|
||||||
|
0, /*tp_compare*/
|
||||||
|
0, /*tp_repr*/
|
||||||
|
0, /*tp_as_number*/
|
||||||
|
0, /*tp_as_sequence*/
|
||||||
|
&qrsdoc_as_mapping, /*tp_as_mapping*/
|
||||||
|
0, /*tp_hash */
|
||||||
|
0, /*tp_call*/
|
||||||
|
0, /*tp_str*/
|
||||||
|
0, /*tp_getattro*/
|
||||||
|
0, /*tp_setattro*/
|
||||||
|
0, /*tp_as_buffer*/
|
||||||
|
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
||||||
|
qrs_doc_QRSDocObject, /* tp_doc */
|
||||||
|
0, /* tp_traverse */
|
||||||
|
0, /* tp_clear */
|
||||||
|
0, /* tp_richcompare */
|
||||||
|
0, /* tp_weaklistoffset */
|
||||||
|
0, /* tp_iter */
|
||||||
|
0, /* tp_iternext */
|
||||||
|
QRSDoc_methods, /* tp_methods */
|
||||||
|
0, /* tp_members */
|
||||||
|
0, /* tp_getset */
|
||||||
|
0, /* tp_base */
|
||||||
|
0, /* tp_dict */
|
||||||
|
0, /* tp_descr_get */
|
||||||
|
0, /* tp_descr_set */
|
||||||
|
0, /* tp_dictoffset */
|
||||||
|
(initproc)QRSDoc_init, /* tp_init */
|
||||||
|
0, /* tp_alloc */
|
||||||
|
QRSDoc_new, /* tp_new */
|
||||||
|
};
|
||||||
23
src/python/recoll/recoll/qresultstore.py
Normal file
23
src/python/recoll/recoll/qresultstore.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# Copyright (C) 2020 J.F.Dockes
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the
|
||||||
|
# Free Software Foundation, Inc.,
|
||||||
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
# We used to have two C extensions: recoll and rclextract, which was a really
|
||||||
|
# bad idea. They are now merged into the _recoll C extension module. The two
|
||||||
|
# python modules recoll.py and rclextract.py only exist for compatibility (for
|
||||||
|
# now: maybe we'll do something with them in the future).
|
||||||
|
|
||||||
|
|
||||||
|
from ._recoll import QResultStore, QRSDoc
|
||||||
@ -38,13 +38,15 @@ module1 = Extension('_recoll',
|
|||||||
os.path.join(top, 'internfile'),
|
os.path.join(top, 'internfile'),
|
||||||
os.path.join(top, 'rcldb'),
|
os.path.join(top, 'rcldb'),
|
||||||
os.path.join(top, 'query'),
|
os.path.join(top, 'query'),
|
||||||
os.path.join(top, 'unac')
|
os.path.join(top, 'unac'),
|
||||||
|
os.path.join(top, 'testmains')
|
||||||
],
|
],
|
||||||
extra_compile_args = extra_compile_args,
|
extra_compile_args = extra_compile_args,
|
||||||
libraries = libraries,
|
libraries = libraries,
|
||||||
library_dirs = library_dirs,
|
library_dirs = library_dirs,
|
||||||
runtime_library_dirs = runtime_library_dirs,
|
runtime_library_dirs = runtime_library_dirs,
|
||||||
sources = [os.path.join(pytop, 'pyrecoll.cpp'),
|
sources = [os.path.join(pytop, 'pyrecoll.cpp'),
|
||||||
|
os.path.join(pytop, 'pyresultstore.cpp'),
|
||||||
os.path.join(pytop, 'pyrclextract.cpp')
|
os.path.join(pytop, 'pyrclextract.cpp')
|
||||||
])
|
])
|
||||||
|
|
||||||
|
|||||||
@ -32,15 +32,17 @@
|
|||||||
#include "rcldoc.h"
|
#include "rcldoc.h"
|
||||||
#include "rclquery.h"
|
#include "rclquery.h"
|
||||||
|
|
||||||
|
namespace Rcl {
|
||||||
|
|
||||||
class QResultStore::Internal {
|
class QResultStore::Internal {
|
||||||
public:
|
public:
|
||||||
bool testentry(const std::pair<std::string,std::string>& entry) {
|
bool testentry(const std::pair<std::string,std::string>& entry) {
|
||||||
return !entry.second.empty() &&
|
return !entry.second.empty() &&
|
||||||
excludedfields.find(entry.first) == excludedfields.end();
|
(isinc ? fieldspec.find(entry.first) != fieldspec.end() :
|
||||||
|
fieldspec.find(entry.first) == fieldspec.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
std::map<std::string, int> keyidx;
|
std::map<std::string, int> keyidx;
|
||||||
int ndocs{0};
|
|
||||||
// Notes: offsets[0] is always 0, not really useful, simpler this
|
// Notes: offsets[0] is always 0, not really useful, simpler this
|
||||||
// way. Also could use simple C array instead of c++ vector...
|
// way. Also could use simple C array instead of c++ vector...
|
||||||
struct docoffs {
|
struct docoffs {
|
||||||
@ -51,7 +53,8 @@ public:
|
|||||||
std::vector<int> offsets;
|
std::vector<int> offsets;
|
||||||
};
|
};
|
||||||
std::vector<struct docoffs> docs;
|
std::vector<struct docoffs> docs;
|
||||||
std::set<std::string> excludedfields;
|
std::set<std::string> fieldspec;
|
||||||
|
bool isinc{false};
|
||||||
};
|
};
|
||||||
|
|
||||||
QResultStore::QResultStore()
|
QResultStore::QResultStore()
|
||||||
@ -63,14 +66,17 @@ QResultStore::~QResultStore()
|
|||||||
delete m;
|
delete m;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For reference : Fields normally excluded by uprcl:
|
||||||
//{"author", "ipath", "rcludi", "relevancyrating", "sig", "abstract", "caption",
|
// {"author", "ipath", "rcludi", "relevancyrating", "sig", "abstract", "caption",
|
||||||
// "filename", "origcharset", "sig"};
|
// "filename", "origcharset", "sig"};
|
||||||
|
|
||||||
|
|
||||||
bool QResultStore::storeQuery(Rcl::Query& query, std::set<std::string> excl)
|
bool QResultStore::storeQuery(Rcl::Query& query, std::set<std::string> fldspec,
|
||||||
|
bool isinc)
|
||||||
{
|
{
|
||||||
m->excludedfields = excl;
|
m->fieldspec = fldspec;
|
||||||
|
m->isinc = isinc;
|
||||||
|
|
||||||
/////////////
|
/////////////
|
||||||
// Enumerate all existing keys and assign array indexes for
|
// Enumerate all existing keys and assign array indexes for
|
||||||
// them. Count documents while we are at it.
|
// them. Count documents while we are at it.
|
||||||
@ -81,10 +87,11 @@ bool QResultStore::storeQuery(Rcl::Query& query, std::set<std::string> excl)
|
|||||||
{"fbytes", 4},
|
{"fbytes", 4},
|
||||||
{"dbytes", 5}
|
{"dbytes", 5}
|
||||||
};
|
};
|
||||||
m->ndocs = 0;
|
|
||||||
for (;;m->ndocs++) {
|
int count = 0;
|
||||||
|
for (;;count++) {
|
||||||
Rcl::Doc doc;
|
Rcl::Doc doc;
|
||||||
if (!query.getDoc(m->ndocs, doc, false)) {
|
if (!query.getDoc(count, doc, false)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
for (const auto& entry : doc.meta) {
|
for (const auto& entry : doc.meta) {
|
||||||
@ -101,9 +108,9 @@ bool QResultStore::storeQuery(Rcl::Query& query, std::set<std::string> excl)
|
|||||||
///////
|
///////
|
||||||
// Populate the main array with doc-equivalent structures.
|
// Populate the main array with doc-equivalent structures.
|
||||||
|
|
||||||
m->docs.resize(m->ndocs);
|
m->docs.resize(count);
|
||||||
|
|
||||||
for (int i = 0; i < m->ndocs; i++) {
|
for (int i = 0; i < count; i++) {
|
||||||
Rcl::Doc doc;
|
Rcl::Doc doc;
|
||||||
if (!query.getDoc(i, doc, false)) {
|
if (!query.getDoc(i, doc, false)) {
|
||||||
break;
|
break;
|
||||||
@ -164,24 +171,34 @@ bool QResultStore::storeQuery(Rcl::Query& query, std::set<std::string> excl)
|
|||||||
STRINGCPCOPY(cp, entry.second);
|
STRINGCPCOPY(cp, entry.second);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Point all empty entries to the final null byte
|
||||||
|
for (unsigned int i = 1; i < vdoc.offsets.size(); i++) {
|
||||||
|
if (vdoc.offsets[i] == 0) {
|
||||||
|
vdoc.offsets[i] = cp - 1 - vdoc.base;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *QResultStore::fieldvalue(int docindex, const std::string& fldname)
|
int QResultStore::getCount()
|
||||||
{
|
{
|
||||||
if (docindex < 0 || docindex >= m->ndocs) {
|
return int(m->docs.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *QResultStore::fieldValue(int docindex, const std::string& fldname)
|
||||||
|
{
|
||||||
|
if (docindex < 0 || docindex >= int(m->docs.size())) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
auto& vdoc = m->docs[docindex];
|
auto& vdoc = m->docs[docindex];
|
||||||
|
|
||||||
auto it = m->keyidx.find(fldname);
|
auto it = m->keyidx.find(fldname);
|
||||||
if (it == m->keyidx.end()) {
|
if (it == m->keyidx.end() ||
|
||||||
return nullptr;
|
it->second < 0 || it->second >= int(vdoc.offsets.size())) {
|
||||||
}
|
|
||||||
if (it->second < 0 || it->second >= int(vdoc.offsets.size())) {
|
|
||||||
//??
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
return vdoc.base + vdoc.offsets[it->second];
|
return vdoc.base + vdoc.offsets[it->second];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace Rcl
|
||||||
|
|||||||
@ -17,24 +17,49 @@
|
|||||||
* Free Software Foundation, Inc.,
|
* Free Software Foundation, Inc.,
|
||||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _QRESULTSTORE_H_INCLUDED_
|
#ifndef _QRESULTSTORE_H_INCLUDED_
|
||||||
#define _QRESULTSTORE_H_INCLUDED_
|
#define _QRESULTSTORE_H_INCLUDED_
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implement an efficient way to store the whole or part of a query result set.
|
||||||
|
* This would naturally be done as a vector<Rcl::Doc>, but the natural
|
||||||
|
* way leads to a huge space waste (8-10x), which may be a problem in
|
||||||
|
* some cases. This is mostly used by the uprcl Media Server.
|
||||||
|
*/
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
|
||||||
namespace Rcl {
|
namespace Rcl {
|
||||||
class Query;
|
class Query;
|
||||||
}
|
|
||||||
|
|
||||||
class QResultStore {
|
class QResultStore {
|
||||||
public:
|
public:
|
||||||
QResultStore();
|
QResultStore();
|
||||||
~QResultStore();
|
~QResultStore();
|
||||||
|
|
||||||
bool storeQuery(Rcl::Query& q, std::set<std::string> excluded = {});
|
/**
|
||||||
const char *fieldvalue(int docindex, const std::string& fldname);
|
* Fetch and store the results of the input query.
|
||||||
|
*
|
||||||
|
* @param q the executed query object to use for fetching results.
|
||||||
|
* @param fldspec list of fields to be excluded or included.
|
||||||
|
* @param isinc if true, the field list defines the fields to be stored,
|
||||||
|
* else, those to be excluded.
|
||||||
|
*/
|
||||||
|
bool storeQuery(Rcl::Query& q, std::set<std::string> fldspec = {},
|
||||||
|
bool isinc = false);
|
||||||
|
|
||||||
|
/** Retrieve count of stored results */
|
||||||
|
int getCount();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieve field value.
|
||||||
|
*
|
||||||
|
* @param docindex index in query results.
|
||||||
|
* @param fldname field name.
|
||||||
|
*/
|
||||||
|
const char *fieldValue(int docindex, const std::string& fldname);
|
||||||
|
|
||||||
|
|
||||||
QResultStore(const QResultStore&) = delete;
|
QResultStore(const QResultStore&) = delete;
|
||||||
QResultStore& operator=(const QResultStore&) = delete;
|
QResultStore& operator=(const QResultStore&) = delete;
|
||||||
@ -43,4 +68,5 @@ private:
|
|||||||
Internal *m{nullptr};
|
Internal *m{nullptr};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
}
|
||||||
#endif /* _QRESULTSTORE_H_INCLUDED_ */
|
#endif /* _QRESULTSTORE_H_INCLUDED_ */
|
||||||
|
|||||||
@ -39,7 +39,7 @@ AM_CPPFLAGS = -Wall -Wno-unused -std=c++11 \
|
|||||||
$(DEFS)
|
$(DEFS)
|
||||||
|
|
||||||
noinst_PROGRAMS = textsplit utf8iter fstreewalk rclconfig hldata unac mbox \
|
noinst_PROGRAMS = textsplit utf8iter fstreewalk rclconfig hldata unac mbox \
|
||||||
circache wipedir mimetype pathut fileudi x11mon rclqdocmem
|
circache wipedir mimetype pathut fileudi x11mon trqrstore
|
||||||
|
|
||||||
circache_SOURCES = trcircache.cpp
|
circache_SOURCES = trcircache.cpp
|
||||||
circache_LDADD = ../librecoll.la
|
circache_LDADD = ../librecoll.la
|
||||||
@ -80,5 +80,5 @@ wipedir_LDADD = ../librecoll.la
|
|||||||
x11mon_SOURCES = trx11mon.cpp
|
x11mon_SOURCES = trx11mon.cpp
|
||||||
x11mon_LDADD = ../utils/x11mon.o ../librecoll.la -lX11
|
x11mon_LDADD = ../utils/x11mon.o ../librecoll.la -lX11
|
||||||
|
|
||||||
rclqdocmem_SOURCES = rclqdocmem.cpp qresultstore.cpp
|
trqrstore_SOURCES = trqrstore.cpp
|
||||||
rclqdocmem_LDADD = ../librecoll.la
|
trqrstore_LDADD = ../librecoll.la
|
||||||
|
|||||||
@ -306,8 +306,7 @@ int main(int argc, char *argv[])
|
|||||||
// This uses 19 MB of storage for the audio index, and 72 MB for
|
// This uses 19 MB of storage for the audio index, and 72 MB for
|
||||||
// the main one (less keys->less gain)
|
// the main one (less keys->less gain)
|
||||||
{
|
{
|
||||||
#if 1
|
Rcl::QResultStore store;
|
||||||
QResultStore store;
|
|
||||||
bool result = store.storeQuery(
|
bool result = store.storeQuery(
|
||||||
query, {"author", "ipath", "rcludi", "relevancyrating",
|
query, {"author", "ipath", "rcludi", "relevancyrating",
|
||||||
"sig","abstract", "caption", "filename", "origcharset", "sig"});
|
"sig","abstract", "caption", "filename", "origcharset", "sig"});
|
||||||
@ -316,121 +315,7 @@ int main(int argc, char *argv[])
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
meminfo("After storing");
|
meminfo("After storing");
|
||||||
std::cerr << "url 20 " << store.fieldvalue(20, "url") << "\n";
|
std::cerr << "url 20 " << store.fieldValue(20, "url") << "\n";
|
||||||
#else
|
|
||||||
/////////////
|
|
||||||
// Enumerate all existing keys and assign array indexes for
|
|
||||||
// them. Count documents while we are at it.
|
|
||||||
std::map<std::string, int> keyidx {
|
|
||||||
{"url",0},
|
|
||||||
{"mimetype", 1},
|
|
||||||
{"fmtime", 2},
|
|
||||||
{"dmtime", 3},
|
|
||||||
{"fbytes", 4},
|
|
||||||
{"dbytes", 5},
|
|
||||||
};
|
|
||||||
int ndocs = 0;
|
|
||||||
for (;;ndocs++) {
|
|
||||||
Rcl::Doc doc;
|
|
||||||
if (!query.getDoc(ndocs, doc, false)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
for (const auto& entry : doc.meta) {
|
|
||||||
if (testentry(entry)) {
|
|
||||||
auto it = keyidx.find(entry.first);
|
|
||||||
if (it == keyidx.end()) {
|
|
||||||
int idx = keyidx.size();
|
|
||||||
keyidx.insert({entry.first, idx});
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// The audio db has 49 keys !
|
|
||||||
std::cerr << "Found " << keyidx.size() << " different keys\n";
|
|
||||||
|
|
||||||
///////
|
|
||||||
// Populate the main array with doc-equivalent structures.
|
|
||||||
|
|
||||||
// Notes: offsets[0] is always 0, not really useful, simpler this way. Also
|
|
||||||
// could use simple C array instead of c++ vector...
|
|
||||||
struct docoffs {
|
|
||||||
~docoffs() {
|
|
||||||
free(base);
|
|
||||||
}
|
|
||||||
char *base{nullptr};
|
|
||||||
std::vector<int> offsets;
|
|
||||||
};
|
|
||||||
std::vector<struct docoffs> docs;
|
|
||||||
docs.resize(ndocs);
|
|
||||||
meminfo("After resize");
|
|
||||||
|
|
||||||
for (int i = 0; i < ndocs; i++) {
|
|
||||||
Rcl::Doc doc;
|
|
||||||
if (!query.getDoc(i, doc, false)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
auto& vdoc = docs[i];
|
|
||||||
vdoc.offsets.resize(keyidx.size());
|
|
||||||
int nbytes =
|
|
||||||
doc.url.size() + 1 +
|
|
||||||
doc.mimetype.size() + 1 +
|
|
||||||
doc.fmtime.size() + 1 +
|
|
||||||
doc.dmtime.size() + 1 +
|
|
||||||
doc.fbytes.size() + 1 +
|
|
||||||
doc.dbytes.size() + 1;
|
|
||||||
for (const auto& entry : doc.meta) {
|
|
||||||
if (testentry(entry)) {
|
|
||||||
if (keyidx.find(entry.first) == keyidx.end()) {
|
|
||||||
std::cerr << "Unknown key: " << entry.first << "\n";
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
nbytes += entry.second.size() + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
char *cp = (char*)malloc(nbytes);
|
|
||||||
if (nullptr == cp) {
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
#define STRINGCPCOPY(CHARP, S) do { \
|
|
||||||
memcpy(CHARP, S.c_str(), S.size()+1); \
|
|
||||||
CHARP += S.size()+1; \
|
|
||||||
} while (false);
|
|
||||||
|
|
||||||
vdoc.base = cp;
|
|
||||||
vdoc.offsets[0] = cp - vdoc.base;
|
|
||||||
STRINGCPCOPY(cp, doc.url);
|
|
||||||
vdoc.offsets[1] = cp - vdoc.base;
|
|
||||||
STRINGCPCOPY(cp, doc.mimetype);
|
|
||||||
vdoc.offsets[2] = cp - vdoc.base;
|
|
||||||
STRINGCPCOPY(cp, doc.fmtime);
|
|
||||||
vdoc.offsets[3] = cp - vdoc.base;
|
|
||||||
STRINGCPCOPY(cp, doc.dmtime);
|
|
||||||
vdoc.offsets[4] = cp - vdoc.base;
|
|
||||||
STRINGCPCOPY(cp, doc.fbytes);
|
|
||||||
vdoc.offsets[5] = cp - vdoc.base;
|
|
||||||
STRINGCPCOPY(cp, doc.dbytes);
|
|
||||||
for (const auto& entry : doc.meta) {
|
|
||||||
if (testentry(entry)) {
|
|
||||||
auto it = keyidx.find(entry.first);
|
|
||||||
if (it == keyidx.end()) {
|
|
||||||
std::cerr << "Unknown key: " << entry.first << "\n";
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
if (it->second <= 5) {
|
|
||||||
// Already done ! Storing another address would be
|
|
||||||
// wasteful and crash when freeing...
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
vdoc.offsets[it->second] = cp - vdoc.base;
|
|
||||||
STRINGCPCOPY(cp, entry.second);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
meminfo("After storing");
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
#elif defined(STORE_ALLOBSTACK)
|
#elif defined(STORE_ALLOBSTACK)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user