Implement doc_ass_subscript, cleanup the Doc interface. Connect the qresultstore to the python module

This commit is contained in:
Jean-Francois Dockes 2020-12-18 11:20:39 +01:00
parent ea9b5ab9eb
commit 22f62216d2
4 changed files with 242 additions and 197 deletions

View File

@ -327,7 +327,8 @@ Doc_init(recoll_DocObject *self, PyObject *, PyObject *)
return 0; return 0;
} }
PyDoc_STRVAR(doc_Doc_getbinurl, PyDoc_STRVAR(
doc_Doc_getbinurl,
"getbinurl(none) -> binary url\n" "getbinurl(none) -> binary url\n"
"\n" "\n"
"Returns an URL with a path part which is a as bit for bit copy of the \n" "Returns an URL with a path part which is a as bit for bit copy of the \n"
@ -339,14 +340,15 @@ Doc_getbinurl(recoll_DocObject *self)
{ {
LOGDEB0("Doc_getbinurl\n"); LOGDEB0("Doc_getbinurl\n");
if (self->doc == 0) { if (self->doc == 0) {
PyErr_SetString(PyExc_AttributeError, "doc"); PyErr_SetString(PyExc_AttributeError, "doc is NULL");
return 0; Py_RETURN_NONE;
} }
return PyBytes_FromStringAndSize(self->doc->url.c_str(), return PyBytes_FromStringAndSize(self->doc->url.c_str(),
self->doc->url.size()); self->doc->url.size());
} }
PyDoc_STRVAR(doc_Doc_setbinurl, PyDoc_STRVAR(
doc_Doc_setbinurl,
"setbinurl(url) -> binary url\n" "setbinurl(url) -> binary url\n"
"\n" "\n"
"Set the URL from binary path like file://may/contain/unencodable/bytes\n" "Set the URL from binary path like file://may/contain/unencodable/bytes\n"
@ -387,7 +389,7 @@ Doc_keys(recoll_DocObject *self)
return 0; return 0;
for (const auto& entry : self->doc->meta) { for (const auto& entry : self->doc->meta) {
PyList_Append(pkeys, PyList_Append(pkeys,
PyUnicode_Decode(entry.first.c_str(),entry.first.size(), PyUnicode_Decode(entry.first.c_str(), entry.first.size(),
"UTF-8", "replace")); "UTF-8", "replace"));
} }
return pkeys; return pkeys;
@ -537,6 +539,23 @@ static PyMethodDef Doc_methods[] = {
{NULL} /* Sentinel */ {NULL} /* Sentinel */
}; };
static int pys2cpps(PyObject *pyval, std::string& out)
{
if (PyUnicode_Check(pyval)) {
PyObject* utf8o = PyUnicode_AsUTF8String(pyval);
if (utf8o == 0) {
return -1;
}
out = PyBytes_AsString(utf8o);
Py_DECREF(utf8o);
} else if (PyBytes_Check(pyval)) {
out = PyBytes_AsString(pyval);
} else {
return -1;
}
return 0;
}
// Note that this returns None if the attribute is not found instead of raising // Note that this returns None if the attribute is not found instead of raising
// an exception as would be standard. We don't change it to keep existing code // an exception as would be standard. We don't change it to keep existing code
// working. // working.
@ -560,18 +579,7 @@ Doc_getattro(recoll_DocObject *self, PyObject *nameobj)
PyErr_Clear(); PyErr_Clear();
string name; string name;
if (PyUnicode_Check(nameobj)) { if (pys2cpps(nameobj, name) < 0) {
PyObject* utf8o = PyUnicode_AsUTF8String(nameobj);
if (utf8o == 0) {
LOGERR("Doc_getattro: encoding name to utf8 failed\n");
PyErr_SetString(PyExc_AttributeError, "name??");
Py_RETURN_NONE;
}
name = PyBytes_AsString(utf8o);
Py_DECREF(utf8o);
} else if (PyBytes_Check(nameobj)) {
name = PyBytes_AsString(nameobj);
} else {
PyErr_SetString(PyExc_AttributeError, "name not unicode nor string??"); PyErr_SetString(PyExc_AttributeError, "name not unicode nor string??");
Py_RETURN_NONE; Py_RETURN_NONE;
} }
@ -588,7 +596,7 @@ Doc_getattro(recoll_DocObject *self, PyObject *nameobj)
} }
static int static int
Doc_setattr(recoll_DocObject *self, char *name, PyObject *value) Doc_setattro(recoll_DocObject *self, PyObject *nameobj, PyObject *value)
{ {
if (self->doc == 0) { if (self->doc == 0) {
PyErr_SetString(PyExc_AttributeError, "doc??"); PyErr_SetString(PyExc_AttributeError, "doc??");
@ -599,84 +607,78 @@ Doc_setattr(recoll_DocObject *self, char *name, PyObject *value)
"Configuration not initialized"); "Configuration not initialized");
return -1; return -1;
} }
if (name == 0) { string name;
PyErr_SetString(PyExc_AttributeError, "name??"); if (pys2cpps(nameobj, name) < 0) {
PyErr_SetString(PyExc_AttributeError, "name not unicode nor string??");
return -1; return -1;
} }
if (PyBytes_Check(value)) { string uvalue;
value = PyUnicode_FromEncodedObject(value, "UTF-8", "strict"); if (pys2cpps(value, uvalue) < 0) {
if (value == 0) PyErr_SetString(PyExc_AttributeError, "value neither bytes nor str");
return -1; return -1;
} }
if (!PyUnicode_Check(value)) {
PyErr_SetString(PyExc_AttributeError, "value not unicode??");
return -1;
}
PyObject* putf8 = PyUnicode_AsUTF8String(value);
if (putf8 == 0) {
LOGERR("Doc_setmeta: encoding to utf8 failed\n");
PyErr_SetString(PyExc_AttributeError, "value??");
return -1;
}
string uvalue = PyBytes_AsString(putf8);
Py_DECREF(putf8);
string key = self->rclconfig->fieldQCanon(name); string key = self->rclconfig->fieldQCanon(name);
LOGDEB0("Doc_setattr: doc " << self->doc << " [" << key << "] (" << name << LOGDEB0("Doc_setattr: doc " << self->doc << " [" << key << "] (" << name <<
") -> [" << uvalue << "]\n"); ") -> [" << uvalue << "]\n");
// We set the value in the meta array in all cases. Good idea ? or do it // Note that some attributes are set both as struct fields and
// only for fields without a dedicated Doc:: entry? // meta members, keep compat with movedocfields() used when
self->doc->meta[key] = uvalue; // fetching from query.
switch (key.at(0)) { switch (key.at(0)) {
case 't': case 't':
if (!key.compare("text")) { if (key == "text") {
self->doc->text.swap(uvalue); self->doc->text.swap(uvalue);
} }
break; break;
case 'u': case 'u':
if (!key.compare(Rcl::Doc::keyurl)) { if (key == Rcl::Doc::keyurl) {
self->doc->url.swap(uvalue); self->doc->url.swap(uvalue);
printableUrl(self->rclconfig->getDefCharset(), self->doc->url,
self->doc->meta[Rcl::Doc::keyurl]);
} }
break; break;
case 'f': case 'f':
if (!key.compare(Rcl::Doc::keyfs)) { if (key == Rcl::Doc::keyfs) {
self->doc->fbytes.swap(uvalue); self->doc->fbytes.swap(uvalue);
} else if (!key.compare(Rcl::Doc::keyfmt)) { self->doc->meta[Rcl::Doc::keyfs] = self->doc->fbytes;
} else if (key == Rcl::Doc::keyfmt) {
self->doc->fmtime.swap(uvalue); self->doc->fmtime.swap(uvalue);
} }
break; break;
case 'd': case 'd':
if (!key.compare(Rcl::Doc::keyds)) { if (key == Rcl::Doc::keyds) {
self->doc->dbytes.swap(uvalue); self->doc->dbytes.swap(uvalue);
} else if (!key.compare(Rcl::Doc::keydmt)) { self->doc->meta[Rcl::Doc::keyds] = self->doc->dbytes;
} else if (key == Rcl::Doc::keydmt) {
self->doc->dmtime.swap(uvalue); self->doc->dmtime.swap(uvalue);
} }
break; break;
case 'i': case 'i':
if (!key.compare(Rcl::Doc::keyipt)) { if (key == Rcl::Doc::keyipt) {
self->doc->ipath.swap(uvalue); self->doc->ipath.swap(uvalue);
self->doc->meta[Rcl::Doc::keyipt] = self->doc->ipath;
} }
break; break;
case 'm': case 'm':
if (!key.compare(Rcl::Doc::keytp)) { if (key == Rcl::Doc::keytp) {
self->doc->mimetype.swap(uvalue); self->doc->mimetype.swap(uvalue);
} else if (!key.compare(Rcl::Doc::keymt)) { self->doc->meta[Rcl::Doc::keytp] = self->doc->mimetype;
} else if (key == Rcl::Doc::keymt) {
self->doc->dmtime.swap(uvalue); self->doc->dmtime.swap(uvalue);
} }
break; break;
case 'o': case 'o':
if (!key.compare(Rcl::Doc::keyoc)) { if (key == Rcl::Doc::keyoc) {
self->doc->origcharset.swap(uvalue); self->doc->origcharset.swap(uvalue);
} }
break; break;
case 's': case 's':
if (!key.compare(Rcl::Doc::keysig)) { if (key == Rcl::Doc::keysig) {
self->doc->sig.swap(uvalue); self->doc->sig.swap(uvalue);
} else if (!key.compare(Rcl::Doc::keysz)) { } else if (key == Rcl::Doc::keysz) {
self->doc->dbytes.swap(uvalue); self->doc->dbytes.swap(uvalue);
} }
break; break;
@ -697,6 +699,7 @@ Doc_length(recoll_DocObject *self)
static PyObject * static PyObject *
Doc_subscript(recoll_DocObject *self, PyObject *key) Doc_subscript(recoll_DocObject *self, PyObject *key)
{ {
// Can't just return getattro because this first checks for a method name
if (self->doc == 0) { if (self->doc == 0) {
PyErr_SetString(PyExc_AttributeError, "doc??"); PyErr_SetString(PyExc_AttributeError, "doc??");
return NULL; return NULL;
@ -707,18 +710,7 @@ Doc_subscript(recoll_DocObject *self, PyObject *key)
return NULL; return NULL;
} }
string name; string name;
if (PyUnicode_Check(key)) { if (pys2cpps(key, name) < 0) {
PyObject* utf8o = PyUnicode_AsUTF8String(key);
if (utf8o == 0) {
LOGERR("Doc_getitemo: encoding name to utf8 failed\n");
PyErr_SetString(PyExc_AttributeError, "name??");
Py_RETURN_NONE;
}
name = PyBytes_AsString(utf8o);
Py_DECREF(utf8o);
} else if (PyBytes_Check(key)) {
name = PyBytes_AsString(key);
} else {
PyErr_SetString(PyExc_AttributeError, "key not unicode nor string??"); PyErr_SetString(PyExc_AttributeError, "key not unicode nor string??");
Py_RETURN_NONE; Py_RETURN_NONE;
} }
@ -726,20 +718,27 @@ Doc_subscript(recoll_DocObject *self, PyObject *key)
string skey = self->rclconfig->fieldQCanon(name); string skey = self->rclconfig->fieldQCanon(name);
string value; string value;
if (idocget(self, skey, value)) { if (idocget(self, skey, value)) {
return PyUnicode_Decode(value.c_str(), value.size(), "UTF-8","replace"); return PyUnicode_Decode(value.c_str(), value.size(),
"UTF-8", "backslashreplace");
} }
Py_RETURN_NONE; Py_RETURN_NONE;
} }
static int
Doc_ass_subscript(recoll_DocObject *self, PyObject *key, PyObject *val)
{
return Doc_setattro(self, key, val);
}
static PyMappingMethods doc_as_mapping = { static PyMappingMethods doc_as_mapping = {
(lenfunc)Doc_length, /*mp_length*/ (lenfunc)Doc_length, /*mp_length*/
(binaryfunc)Doc_subscript, /*mp_subscript*/ (binaryfunc)Doc_subscript, /*mp_subscript*/
(objobjargproc)0, /*mp_ass_subscript*/ (objobjargproc)Doc_ass_subscript, /*mp_ass_subscript*/
}; };
PyDoc_STRVAR(doc_DocObject, PyDoc_STRVAR(
doc_DocObject,
"Doc()\n" "Doc()\n"
"\n" "\n"
"A Doc object contains index data for a given document.\n" "A Doc object contains index data for a given document.\n"
@ -784,7 +783,7 @@ PyTypeObject recoll_DocType = {
(destructor)Doc_dealloc, /*tp_dealloc*/ (destructor)Doc_dealloc, /*tp_dealloc*/
0, /*tp_print*/ 0, /*tp_print*/
0, /*tp_getattr*/ 0, /*tp_getattr*/
(setattrfunc)Doc_setattr, /*tp_setattr*/ 0, /*tp_setattr*/
0, /*tp_compare*/ 0, /*tp_compare*/
0, /*tp_repr*/ 0, /*tp_repr*/
0, /*tp_as_number*/ 0, /*tp_as_number*/
@ -794,7 +793,7 @@ PyTypeObject recoll_DocType = {
0, /*tp_call*/ 0, /*tp_call*/
0, /*tp_str*/ 0, /*tp_str*/
(getattrofunc)Doc_getattro,/*tp_getattro*/ (getattrofunc)Doc_getattro,/*tp_getattro*/
0, /*tp_setattro*/ (setattrofunc)Doc_setattro,/*tp_setattro*/
0, /*tp_as_buffer*/ 0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT, /*tp_flags*/ Py_TPFLAGS_DEFAULT, /*tp_flags*/
doc_DocObject, /* tp_doc */ doc_DocObject, /* tp_doc */
@ -2187,6 +2186,11 @@ PyInit__recoll(void)
Py_INCREF(&recoll_QResultStoreType); Py_INCREF(&recoll_QResultStoreType);
PyModule_AddObject(module, "QResultStore", (PyObject *)&recoll_QResultStoreType); PyModule_AddObject(module, "QResultStore", (PyObject *)&recoll_QResultStoreType);
if (PyType_Ready(&recoll_QRSDocType) < 0)
INITERROR;
Py_INCREF((PyObject*)&recoll_QRSDocType);
PyModule_AddObject(module, "QRSDoc",
(PyObject *)&recoll_QRSDocType);
#if PY_MAJOR_VERSION >= 3 #if PY_MAJOR_VERSION >= 3
return module; return module;

View File

@ -57,5 +57,6 @@ extern PyTypeObject recoll_DocType;
extern PyTypeObject recoll_QueryType; extern PyTypeObject recoll_QueryType;
extern PyTypeObject rclx_ExtractorType; extern PyTypeObject rclx_ExtractorType;
extern PyTypeObject recoll_QResultStoreType; extern PyTypeObject recoll_QResultStoreType;
extern PyTypeObject recoll_QRSDocType;
#endif // _PYRECOLL_H_INCLUDED_ #endif // _PYRECOLL_H_INCLUDED_

View File

@ -231,91 +231,131 @@ PyTypeObject recoll_QResultStoreType = {
QResultStore_new, /* tp_new */ QResultStore_new, /* tp_new */
}; };
////////////////////////////////////////////////////////////////////////
// QRSDoc iterator
typedef struct {
PyObject_HEAD
/* Type-specific fields go here. */
recoll_QResultStoreObject *pystore;
int index;
} recoll_QRSDocObject;
////////////////////////////////////////////////////////////////////////// static void
// Module methods QRSDoc_dealloc(recoll_QRSDocObject *self)
static PyMethodDef rclrstore_methods[] = {
{NULL, NULL, 0, NULL} /* Sentinel */
};
PyDoc_STRVAR(pyrclrstore_doc_string,
"Utility module for efficiently storing many query results.\n");
struct module_state {
PyObject *error;
};
#if PY_MAJOR_VERSION >= 3
#define GETSTATE(m) ((struct module_state*)PyModule_GetState(m))
#else
#define GETSTATE(m) (&_state)
static struct module_state _state;
#endif
#if PY_MAJOR_VERSION >= 3
static int rclrstore_traverse(PyObject *m, visitproc visit, void *arg) {
Py_VISIT(GETSTATE(m)->error);
return 0;
}
static int rclrstore_clear(PyObject *m) {
Py_CLEAR(GETSTATE(m)->error);
return 0;
}
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"_rclrstore",
NULL,
sizeof(struct module_state),
rclrstore_methods,
NULL,
rclrstore_traverse,
rclrstore_clear,
NULL
};
#define INITERROR return NULL
extern "C" PyObject *
PyInit__rclrstore(void)
#else
#define INITERROR return
PyMODINIT_FUNC
init__rclrstore(void)
#endif
{ {
// Note: we can't call recollinit here, because the confdir is only really Py_DECREF(self->pystore);
// known when the first db object is created (it is an optional parameter). Py_TYPE(self)->tp_free((PyObject*)self);
// Using a default here may end up with variables such as stripchars being }
// wrong
#if PY_MAJOR_VERSION >= 3 static PyObject *
PyObject *module = PyModule_Create(&moduledef); QRSDoc_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
#else {
PyObject *module = Py_InitModule("_rclrstore", rclrstore_methods); recoll_QRSDocObject *self = (recoll_QRSDocObject *)type->tp_alloc(type, 0);
#endif if (self == 0)
if (module == NULL) return 0;
INITERROR; return (PyObject *)self;
}
struct module_state *st = GETSTATE(module); PyDoc_STRVAR(qrs_doc_QRSDocObject,
// The first parameter is a char *. Hopefully we don't initialize "QRSDoc(resultstore, index)\n"
// modules too often... "\n"
st->error = PyErr_NewException(strdup("_rclrstore.Error"), NULL, NULL); "A QRSDoc gives access to one result from a qresultstore.\n"
if (st->error == NULL) { );
Py_DECREF(module);
INITERROR; static int
QRSDoc_init(
recoll_QRSDocObject *self, PyObject *args, PyObject *kwargs)
{
recoll_QResultStoreObject *pystore;
int index;
if (!PyArg_ParseTuple(args, "O!i",
&recoll_QResultStoreType, &pystore, &index)) {
return -1;
} }
if (PyType_Ready(&recoll_QResultStoreType) < 0) Py_INCREF(pystore);
INITERROR; self->pystore = pystore;
Py_INCREF((PyObject*)&recoll_QResultStoreType); self->index = index;
PyModule_AddObject(module, "QResultStore", return 0;
(PyObject *)&recoll_QResultStoreType);
PyModule_AddStringConstant(module, "__doc__", pyrclrstore_doc_string);
#if PY_MAJOR_VERSION >= 3
return module;
#endif
} }
static PyObject *
QRSDoc_subscript(recoll_QRSDocObject *self, PyObject *key)
{
if (self->pystore == 0) {
PyErr_SetString(PyExc_AttributeError, "store??");
return NULL;
}
string name;
if (PyUnicode_Check(key)) {
PyObject* utf8o = PyUnicode_AsUTF8String(key);
if (utf8o == 0) {
PyErr_SetString(PyExc_AttributeError, "name??");
Py_RETURN_NONE;
}
name = PyBytes_AsString(utf8o);
Py_DECREF(utf8o);
} else if (PyBytes_Check(key)) {
name = PyBytes_AsString(key);
} else {
PyErr_SetString(PyExc_AttributeError, "key not unicode nor string??");
Py_RETURN_NONE;
}
const char *value = self->pystore->store->fieldValue(self->index, name);
if (nullptr == value) {
Py_RETURN_NONE;
}
return PyBytes_FromString(value);
}
static PyMappingMethods qrsdoc_as_mapping = {
(lenfunc)0, /*mp_length*/
(binaryfunc)QRSDoc_subscript, /*mp_subscript*/
(objobjargproc)0, /*mp_ass_subscript*/
};
static PyMethodDef QRSDoc_methods[] = {
{NULL} /* Sentinel */
};
PyTypeObject recoll_QRSDocType = {
PyVarObject_HEAD_INIT(NULL, 0)
"_recoll.QRSDoc", /*tp_name*/
sizeof(recoll_QRSDocObject), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)QRSDoc_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
&qrsdoc_as_mapping, /*tp_as_mapping*/
0, /*tp_hash */
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
qrs_doc_QRSDocObject, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
QRSDoc_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)QRSDoc_init, /* tp_init */
0, /* tp_alloc */
QRSDoc_new, /* tp_new */
};

View File

@ -20,4 +20,4 @@
# now: maybe we'll do something with them in the future). # now: maybe we'll do something with them in the future).
from ._recoll import QResultStore from ._recoll import QResultStore, QRSDoc