python: connect() accept bytes or strings for extradbs items

This commit is contained in:
Jean-Francois Dockes 2021-12-07 16:18:13 +01:00
parent c51f6ee10d
commit 213589f00f

View File

@ -222,7 +222,7 @@ SearchData_addclause(recoll_SearchDataObject* self, PyObject *args,
case 'A': case 'A':
if (strcasecmp(tp, "and")) if (strcasecmp(tp, "and"))
goto defaultcase; goto defaultcase;
cl = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, qs, fld?fld:""); cl = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, qs, fld ? fld : "");
break; break;
case 'f': case 'f':
case 'F': case 'F':
@ -234,20 +234,18 @@ SearchData_addclause(recoll_SearchDataObject* self, PyObject *args,
case 'O': case 'O':
if (strcasecmp(tp, "or")) if (strcasecmp(tp, "or"))
goto defaultcase; goto defaultcase;
cl = new Rcl::SearchDataClauseSimple(Rcl::SCLT_OR, qs, fld?fld:""); cl = new Rcl::SearchDataClauseSimple(Rcl::SCLT_OR, qs, fld ? fld : "");
break; break;
case 'n': case 'n':
case 'N': case 'N':
if (strcasecmp(tp, "near")) if (strcasecmp(tp, "near"))
goto defaultcase; goto defaultcase;
cl = new Rcl::SearchDataClauseDist(Rcl::SCLT_NEAR, qs, slack, cl = new Rcl::SearchDataClauseDist(Rcl::SCLT_NEAR, qs, slack, fld ? fld : "");
fld ? fld : "");
break; break;
case 'p': case 'p':
case 'P': case 'P':
if (!strcasecmp(tp, "phrase")) { if (!strcasecmp(tp, "phrase")) {
cl = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, qs, slack, cl = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, qs, slack, fld ? fld : "");
fld ? fld : "");
} else if (!strcasecmp(tp, "path")) { } else if (!strcasecmp(tp, "path")) {
cl = new Rcl::SearchDataClausePath(qs); cl = new Rcl::SearchDataClausePath(qs);
} else { } else {
@ -367,10 +365,9 @@ Doc_setbinurl(recoll_DocObject *self, PyObject *value)
return 0; return 0;
} }
self->doc->url = string(PyByteArray_AsString(value), self->doc->url = string(PyByteArray_AsString(value), PyByteArray_Size(value));
PyByteArray_Size(value)); printableUrl(
printableUrl(self->rclconfig->getDefCharset(), self->doc->url, self->rclconfig->getDefCharset(), self->doc->url, self->doc->meta[Rcl::Doc::keyurl]);
self->doc->meta[Rcl::Doc::keyurl]);
Py_RETURN_NONE; Py_RETURN_NONE;
} }
@ -390,9 +387,8 @@ Doc_keys(recoll_DocObject *self)
if (!pkeys) if (!pkeys)
return 0; return 0;
for (const auto& entry : self->doc->meta) { for (const auto& entry : self->doc->meta) {
PyList_Append(pkeys, PyList_Append(
PyUnicode_Decode(entry.first.c_str(), entry.first.size(), pkeys, PyUnicode_Decode(entry.first.c_str(), entry.first.size(), "UTF-8", "replace"));
"UTF-8", "replace"));
} }
return pkeys; return pkeys;
} }
@ -414,12 +410,10 @@ Doc_items(recoll_DocObject *self)
return 0; return 0;
for (const auto& entry : self->doc->meta) { for (const auto& entry : self->doc->meta) {
PyDict_SetItem(pdict, PyDict_SetItem(pdict,
PyUnicode_Decode(entry.first.c_str(), PyUnicode_Decode(
entry.first.size(), entry.first.c_str(), entry.first.size(), "UTF-8", "replace"),
"UTF-8", "replace"), PyUnicode_Decode(
PyUnicode_Decode(entry.second.c_str(), entry.second.c_str(), entry.second.size(), "UTF-8", "replace"));
entry.second.size(),
"UTF-8", "replace"));
} }
return pdict; return pdict;
} }
@ -462,8 +456,7 @@ static bool idocget(recoll_DocObject *self, const string& key, string& value)
value = self->doc->mimetype; value = self->doc->mimetype;
return true; return true;
} else if (!key.compare(Rcl::Doc::keymt)) { } else if (!key.compare(Rcl::Doc::keymt)) {
value = self->doc->dmtime.empty() ? self->doc->fmtime : value = self->doc->dmtime.empty() ? self->doc->fmtime : self->doc->dmtime;
self->doc->dmtime;
return true; return true;
} }
break; break;
@ -478,8 +471,7 @@ static bool idocget(recoll_DocObject *self, const string& key, string& value)
value = self->doc->sig; value = self->doc->sig;
return true; return true;
} else if (!key.compare(Rcl::Doc::keysz)) { } else if (!key.compare(Rcl::Doc::keysz)) {
value = self->doc->dbytes.empty() ? self->doc->fbytes : value = self->doc->dbytes.empty() ? self->doc->fbytes : self->doc->dbytes;
self->doc->dbytes;
return true; return true;
} }
break; break;
@ -569,8 +561,7 @@ Doc_getattro(recoll_DocObject *self, PyObject *nameobj)
return 0; return 0;
} }
if (!self->rclconfig || !self->rclconfig->ok()) { if (!self->rclconfig || !self->rclconfig->ok()) {
PyErr_SetString(PyExc_AttributeError, PyErr_SetString(PyExc_AttributeError, "Configuration not initialized");
"Configuration not initialized");
return 0; return 0;
} }
@ -605,8 +596,7 @@ Doc_setattro(recoll_DocObject *self, PyObject *nameobj, PyObject *value)
return -1; return -1;
} }
if (!self->rclconfig || !self->rclconfig->ok()) { if (!self->rclconfig || !self->rclconfig->ok()) {
PyErr_SetString(PyExc_AttributeError, PyErr_SetString(PyExc_AttributeError, "Configuration not initialized");
"Configuration not initialized");
return -1; return -1;
} }
string name; string name;
@ -681,7 +671,7 @@ Doc_setattro(recoll_DocObject *self, PyObject *nameobj, PyObject *value)
case 's': case 's':
if (key == Rcl::Doc::keysig) { if (key == Rcl::Doc::keysig) {
self->doc->sig.swap(uvalue); self->doc->sig.swap(uvalue);
} else if (key == Rcl::Doc::keysz) { } else if (key == Rcl::Doc::keysz) {
self->doc->dbytes.swap(uvalue); self->doc->dbytes.swap(uvalue);
} }
break; break;
@ -708,8 +698,7 @@ Doc_subscript(recoll_DocObject *self, PyObject *key)
return NULL; return NULL;
} }
if (!self->rclconfig || !self->rclconfig->ok()) { if (!self->rclconfig || !self->rclconfig->ok()) {
PyErr_SetString(PyExc_AttributeError, PyErr_SetString(PyExc_AttributeError, "Configuration not initialized");
"Configuration not initialized");
return NULL; return NULL;
} }
string name; string name;
@ -721,8 +710,7 @@ Doc_subscript(recoll_DocObject *self, PyObject *key)
string skey = self->rclconfig->fieldQCanon(name); string skey = self->rclconfig->fieldQCanon(name);
string value; string value;
if (idocget(self, skey, value)) { if (idocget(self, skey, value)) {
return PyUnicode_Decode(value.c_str(), value.size(), return PyUnicode_Decode(value.c_str(), value.size(), "UTF-8", "backslashreplace");
"UTF-8", "backslashreplace");
} }
Py_RETURN_NONE; Py_RETURN_NONE;
} }
@ -913,9 +901,7 @@ Query_sortby(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
static const char *kwlist[] = {"field", "ascending", NULL}; static const char *kwlist[] = {"field", "ascending", NULL};
char *sfield = 0; char *sfield = 0;
PyObject *ascobj = 0; PyObject *ascobj = 0;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|O", (char**)kwlist, if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|O", (char**)kwlist, &sfield, &ascobj))
&sfield,
&ascobj))
return 0; return 0;
if (sfield) { if (sfield) {
@ -1059,8 +1045,7 @@ Query_executesd(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
// one knows their name (e.g. xdocid). // one knows their name (e.g. xdocid).
static void movedocfields(const RclConfig* rclconfig, Rcl::Doc *doc) static void movedocfields(const RclConfig* rclconfig, Rcl::Doc *doc)
{ {
printableUrl(rclconfig->getDefCharset(), doc->url, printableUrl(rclconfig->getDefCharset(), doc->url, doc->meta[Rcl::Doc::keyurl]);
doc->meta[Rcl::Doc::keyurl]);
doc->meta[Rcl::Doc::keytp] = doc->mimetype; doc->meta[Rcl::Doc::keytp] = doc->mimetype;
doc->meta[Rcl::Doc::keyipt] = doc->ipath; doc->meta[Rcl::Doc::keyipt] = doc->ipath;
doc->meta[Rcl::Doc::keyfs] = doc->fbytes; doc->meta[Rcl::Doc::keyfs] = doc->fbytes;
@ -1082,7 +1067,7 @@ Query_iternext(PyObject *_self)
// This happens if there are no results and is not an error // This happens if there are no results and is not an error
return 0; return 0;
} }
recoll_DocObject *result = recoll_DocObject *result =
(recoll_DocObject *)PyObject_CallObject((PyObject *)&recoll_DocType, 0); (recoll_DocObject *)PyObject_CallObject((PyObject *)&recoll_DocType, 0);
if (!result) { if (!result) {
PyErr_SetString(PyExc_EnvironmentError, "doc create failed"); PyErr_SetString(PyExc_EnvironmentError, "doc create failed");
@ -1131,8 +1116,7 @@ Query_fetchmany(PyObject* _self, PyObject *args, PyObject *kwargs)
static const char *kwlist[] = {"size", NULL}; static const char *kwlist[] = {"size", NULL};
int size = 0; int size = 0;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", (char**)kwlist, if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", (char**)kwlist, &size))
&size))
return 0; return 0;
if (size == 0) if (size == 0)
@ -1169,8 +1153,7 @@ Query_scroll(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
static const char *kwlist[] = {"position", "mode", NULL}; static const char *kwlist[] = {"position", "mode", NULL};
int pos = 0; int pos = 0;
char *smode = 0; char *smode = 0;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|s", (char**)kwlist, if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|s", (char**)kwlist, &pos, &smode))
&pos, &smode))
return 0; return 0;
bool isrelative = 1; bool isrelative = 1;
@ -1219,8 +1202,7 @@ public:
virtual string startMatch(unsigned int idx) { virtual string startMatch(unsigned int idx) {
PyObject *res = 0; PyObject *res = 0;
if (m_methods) if (m_methods)
res = PyObject_CallMethod(m_methods, (char *)"startMatch", res = PyObject_CallMethod(m_methods, (char *)"startMatch", (char *)"(i)", idx);
(char *)"(i)", idx);
if (res == 0) if (res == 0)
return "<span class=\"rclmatch\">"; return "<span class=\"rclmatch\">";
PyObject *res1 = res; PyObject *res1 = res;
@ -1292,8 +1274,7 @@ Query_highlight(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
return 0; return 0;
} }
// cf python manual:The bytes will be interpreted as being UTF-8 encoded. // cf python manual:The bytes will be interpreted as being UTF-8 encoded.
PyObject* unicode = PyUnicode_FromStringAndSize(out.begin()->c_str(), PyObject* unicode = PyUnicode_FromStringAndSize(out.begin()->c_str(), out.begin()->size());
out.begin()->size());
// We used to return a copy of the unicode object. Can't see why any more // We used to return a copy of the unicode object. Can't see why any more
return unicode; return unicode;
} }
@ -1365,8 +1346,7 @@ Query_makedocabstract(recoll_QueryObject* self, PyObject *args,PyObject *kwargs)
} }
// Return a python unicode object // Return a python unicode object
return PyUnicode_Decode(abstract.c_str(), abstract.size(), return PyUnicode_Decode(abstract.c_str(), abstract.size(), "UTF-8", "replace");
"UTF-8", "replace");
} }
PyDoc_STRVAR(doc_Query_getsnippets, PyDoc_STRVAR(doc_Query_getsnippets,
@ -1511,16 +1491,14 @@ Query_getgroups(recoll_QueryObject* self, PyObject *, PyObject *)
// multiply_groups to using or-plists. TBD: check // multiply_groups to using or-plists. TBD: check
if (tg.kind == HighlightData::TermGroup::TGK_TERM) { if (tg.kind == HighlightData::TermGroup::TGK_TERM) {
xlist = PyList_New(1); xlist = PyList_New(1);
PyList_SetItem(xlist, 0, PyList_SetItem(xlist, 0,
PyUnicode_Decode(tg.term.c_str(), tg.term.size(), PyUnicode_Decode(tg.term.c_str(), tg.term.size(), "UTF-8", "replace"));
"UTF-8", "replace"));
} else { } else {
xlist = PyList_New(tg.orgroups.size()); xlist = PyList_New(tg.orgroups.size());
for (unsigned int j = 0; j < tg.orgroups.size(); j++) { for (unsigned int j = 0; j < tg.orgroups.size(); j++) {
PyList_SetItem(xlist, j, PyList_SetItem(xlist, j,
PyUnicode_Decode(tg.orgroups[j][0].c_str(), PyUnicode_Decode(tg.orgroups[j][0].c_str(),
tg.orgroups[j][0].size(), tg.orgroups[j][0].size(), "UTF-8", "replace"));
"UTF-8", "replace"));
} }
} }
PyList_Append(mainlist, Py_BuildValue("(OO)", ulist, xlist)); PyList_Append(mainlist, Py_BuildValue("(OO)", ulist, xlist));
@ -1717,19 +1695,26 @@ Db_init(recoll_DbObject *self, PyObject *args, PyObject *kwargs)
} }
for (int i = 0; i < dbcnt; i++) { for (int i = 0; i < dbcnt; i++) {
PyObject *item = PySequence_GetItem(extradbs, i); PyObject *item = PySequence_GetItem(extradbs, i);
const char *s = PyBytes_AsString(item); string dbname;
if (s == nullptr) { if (PyUnicode_Check(item)) {
PyErr_SetString(PyExc_TypeError, PyObject *utf8o = PyUnicode_AsUTF8String(item);
"extra_dbs must contain strings"); if (nullptr != utf8o) {
dbname = PyBytes_AsString(utf8o);
Py_DECREF(utf8o);
}
} else if (PyBytes_Check(item)) {
dbname = PyBytes_AsString(item);
}
if (dbname.empty()) {
PyErr_SetString(PyExc_TypeError, "extra_dbs items must be bytes or strings");
deleteZ(self->db); deleteZ(self->db);
Py_DECREF(item); Py_DECREF(item);
return -1; return -1;
} }
string dbname(s);
Py_DECREF(item); Py_DECREF(item);
string errmsg = string("extra db could not be opened: ") + dbname;
if (!self->db->addQueryDb(dbname)) { if (!self->db->addQueryDb(dbname)) {
PyErr_SetString(PyExc_EnvironmentError, PyErr_SetString(PyExc_EnvironmentError, errmsg.c_str());
"extra db could not be opened");
deleteZ(self->db); deleteZ(self->db);
return -1; return -1;
} }
@ -1792,8 +1777,7 @@ Db_setAbstractParams(recoll_DbObject *self, PyObject *args, PyObject *kwargs)
PyErr_SetString(PyExc_AttributeError, "db id not found"); PyErr_SetString(PyExc_AttributeError, "db id not found");
return 0; return 0;
} }
LOGDEB0("Db_setAbstractParams: mxchrs " << maxchars << ", ctxwrds " << LOGDEB0("Db_setAbstractParams: mxchrs " << maxchars << ", ctxwrds " << ctxwords << "\n");
ctxwords << "\n");
self->db->setAbstractParams(-1, maxchars, ctxwords); self->db->setAbstractParams(-1, maxchars, ctxwords);
Py_RETURN_NONE; Py_RETURN_NONE;
} }
@ -1830,8 +1814,7 @@ Db_makeDocAbstract(recoll_DbObject* self, PyObject *args)
return 0; return 0;
} }
// Return a python unicode object // Return a python unicode object
return PyUnicode_Decode(abstract.c_str(), abstract.size(), return PyUnicode_Decode(abstract.c_str(), abstract.size(), "UTF-8", "replace");
"UTF-8", "replace");
} }
PyDoc_STRVAR( PyDoc_STRVAR(
@ -1906,8 +1889,7 @@ Db_termMatch(recoll_DbObject* self, PyObject *args, PyObject *kwargs)
ret = PyList_New(result.entries.size()); ret = PyList_New(result.entries.size());
for (unsigned int i = 0; i < result.entries.size(); i++) { for (unsigned int i = 0; i < result.entries.size(); i++) {
PyObject *term = PyUnicode_FromString( PyObject *term = PyUnicode_FromString(Rcl::strip_prefix(result.entries[i].term).c_str());
Rcl::strip_prefix(result.entries[i].term).c_str());
if (showfreqs) { if (showfreqs) {
PyObject *totcnt = PyLong_FromLong(result.entries[i].wcf); PyObject *totcnt = PyLong_FromLong(result.entries[i].wcf);
PyObject *doccnt = PyLong_FromLong(result.entries[i].docs); PyObject *doccnt = PyLong_FromLong(result.entries[i].docs);
@ -1934,8 +1916,7 @@ Db_needUpdate(recoll_DbObject* self, PyObject *args, PyObject *kwds)
LOGDEB0("Db_needUpdate\n"); LOGDEB0("Db_needUpdate\n");
char *udi = 0; // needs freeing char *udi = 0; // needs freeing
char *sig = 0; // needs freeing char *sig = 0; // needs freeing
if (!PyArg_ParseTuple(args, "eses:Db_needUpdate", if (!PyArg_ParseTuple(args, "eses:Db_needUpdate", "utf-8", &udi, "utf-8", &sig)) {
"utf-8", &udi, "utf-8", &sig)) {
return 0; return 0;
} }
if (self->db == 0) { if (self->db == 0) {
@ -2244,8 +2225,7 @@ PyInit__recoll(void)
if (PyType_Ready(&recoll_SearchDataType) < 0) if (PyType_Ready(&recoll_SearchDataType) < 0)
INITERROR; INITERROR;
Py_INCREF((PyObject*)&recoll_SearchDataType); Py_INCREF((PyObject*)&recoll_SearchDataType);
PyModule_AddObject(module, "SearchData", PyModule_AddObject(module, "SearchData", (PyObject *)&recoll_SearchDataType);
(PyObject *)&recoll_SearchDataType);
PyModule_AddStringConstant(module, "__doc__", pyrecoll_doc_string); PyModule_AddStringConstant(module, "__doc__", pyrecoll_doc_string);
@ -2262,8 +2242,7 @@ PyInit__recoll(void)
if (PyType_Ready(&recoll_QRSDocType) < 0) if (PyType_Ready(&recoll_QRSDocType) < 0)
INITERROR; INITERROR;
Py_INCREF((PyObject*)&recoll_QRSDocType); Py_INCREF((PyObject*)&recoll_QRSDocType);
PyModule_AddObject(module, "QRSDoc", PyModule_AddObject(module, "QRSDoc", (PyObject *)&recoll_QRSDocType);
(PyObject *)&recoll_QRSDocType);
#if PY_MAJOR_VERSION >= 3 #if PY_MAJOR_VERSION >= 3
return module; return module;