python module: properly manage a per-db config
This commit is contained in:
parent
b6fb1f7f26
commit
5d22ad3bcb
@ -23,7 +23,7 @@
|
|||||||
#include <strings.h>
|
#include <strings.h>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
using namespace std;
|
#include <memory>
|
||||||
|
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
#include "rcldoc.h"
|
#include "rcldoc.h"
|
||||||
@ -33,6 +33,8 @@ using namespace std;
|
|||||||
|
|
||||||
#include "pyrecoll.h"
|
#include "pyrecoll.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
// Imported from pyrecoll
|
// Imported from pyrecoll
|
||||||
static PyObject *recoll_DocType;
|
static PyObject *recoll_DocType;
|
||||||
|
|
||||||
@ -42,7 +44,7 @@ typedef struct {
|
|||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
/* Type-specific fields go here. */
|
/* Type-specific fields go here. */
|
||||||
FileInterner *xtr;
|
FileInterner *xtr;
|
||||||
RclConfig *rclconfig;
|
std::shared_ptr<RclConfig> rclconfig;
|
||||||
recoll_DocObject *docobject;
|
recoll_DocObject *docobject;
|
||||||
} rclx_ExtractorObject;
|
} rclx_ExtractorObject;
|
||||||
|
|
||||||
@ -53,6 +55,7 @@ Extractor_dealloc(rclx_ExtractorObject *self)
|
|||||||
if (self->docobject) {
|
if (self->docobject) {
|
||||||
Py_DECREF(&self->docobject);
|
Py_DECREF(&self->docobject);
|
||||||
}
|
}
|
||||||
|
self->rclconfig.reset();
|
||||||
delete self->xtr;
|
delete self->xtr;
|
||||||
Py_TYPE(self)->tp_free((PyObject*)self);
|
Py_TYPE(self)->tp_free((PyObject*)self);
|
||||||
}
|
}
|
||||||
@ -66,7 +69,6 @@ Extractor_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|||||||
if (self == 0)
|
if (self == 0)
|
||||||
return 0;
|
return 0;
|
||||||
self->xtr = 0;
|
self->xtr = 0;
|
||||||
self->rclconfig = 0;
|
|
||||||
self->docobject = 0;
|
self->docobject = 0;
|
||||||
return (PyObject *)self;
|
return (PyObject *)self;
|
||||||
}
|
}
|
||||||
@ -90,7 +92,7 @@ Extractor_init(rclx_ExtractorObject *self, PyObject *args, PyObject *kwargs)
|
|||||||
Py_INCREF(dobj);
|
Py_INCREF(dobj);
|
||||||
|
|
||||||
self->rclconfig = dobj->rclconfig;
|
self->rclconfig = dobj->rclconfig;
|
||||||
self->xtr = new FileInterner(*dobj->doc, self->rclconfig,
|
self->xtr = new FileInterner(*dobj->doc, self->rclconfig.get(),
|
||||||
FileInterner::FIF_forPreview);
|
FileInterner::FIF_forPreview);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -198,7 +200,7 @@ Extractor_idoctofile(rclx_ExtractorObject* self, PyObject *args,
|
|||||||
mimetype << "] doc mimetype [" << self->docobject->doc->mimetype <<
|
mimetype << "] doc mimetype [" << self->docobject->doc->mimetype <<
|
||||||
"\n");
|
"\n");
|
||||||
if (ipath.empty() && !mimetype.compare(self->docobject->doc->mimetype)) {
|
if (ipath.empty() && !mimetype.compare(self->docobject->doc->mimetype)) {
|
||||||
status = FileInterner::idocToFile(temp, outfile, self->rclconfig,
|
status = FileInterner::idocToFile(temp, outfile, self->rclconfig.get(),
|
||||||
*self->docobject->doc);
|
*self->docobject->doc);
|
||||||
} else {
|
} else {
|
||||||
self->xtr->setTargetMType(mimetype);
|
self->xtr->setTargetMType(mimetype);
|
||||||
|
|||||||
@ -24,7 +24,6 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <set>
|
#include <set>
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
#include "rclinit.h"
|
#include "rclinit.h"
|
||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
@ -42,7 +41,7 @@ using namespace std;
|
|||||||
|
|
||||||
#include "pyrecoll.h"
|
#include "pyrecoll.h"
|
||||||
|
|
||||||
static RclConfig *rclconfig;
|
using namespace std;
|
||||||
|
|
||||||
#if PY_MAJOR_VERSION >=3
|
#if PY_MAJOR_VERSION >=3
|
||||||
# define Py_TPFLAGS_HAVE_ITER 0
|
# define Py_TPFLAGS_HAVE_ITER 0
|
||||||
@ -50,6 +49,13 @@ static RclConfig *rclconfig;
|
|||||||
#define PyLong_FromLong PyInt_FromLong
|
#define PyLong_FromLong PyInt_FromLong
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// To keep old code going after we moved the static rclconfig to the
|
||||||
|
// db object (to fix multiple dbs issues), we keep a copy of the last
|
||||||
|
// created rclconfig in RCLCONFIG. This is set into the doc objec by
|
||||||
|
// doc_init, then reset to the db's by db::doc() or query::iter_next,
|
||||||
|
// the proper Doc creators.
|
||||||
|
static shared_ptr<RclConfig> RCLCONFIG;
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
/// SEARCHDATA SearchData code
|
/// SEARCHDATA SearchData code
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@ -308,7 +314,6 @@ Doc_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|||||||
if (self == 0)
|
if (self == 0)
|
||||||
return 0;
|
return 0;
|
||||||
self->doc = 0;
|
self->doc = 0;
|
||||||
self->rclconfig = 0;
|
|
||||||
return (PyObject *)self;
|
return (PyObject *)self;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -320,7 +325,7 @@ Doc_init(recoll_DocObject *self, PyObject *, PyObject *)
|
|||||||
self->doc = new Rcl::Doc;
|
self->doc = new Rcl::Doc;
|
||||||
if (self->doc == 0)
|
if (self->doc == 0)
|
||||||
return -1;
|
return -1;
|
||||||
self->rclconfig = rclconfig;
|
self->rclconfig = RCLCONFIG;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -544,6 +549,11 @@ Doc_getattro(recoll_DocObject *self, PyObject *nameobj)
|
|||||||
PyErr_SetString(PyExc_AttributeError, "doc");
|
PyErr_SetString(PyExc_AttributeError, "doc");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
if (!self->rclconfig || !self->rclconfig->ok()) {
|
||||||
|
PyErr_SetString(PyExc_AttributeError,
|
||||||
|
"Configuration not initialized");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
PyObject *meth = PyObject_GenericGetAttr((PyObject*)self, nameobj);
|
PyObject *meth = PyObject_GenericGetAttr((PyObject*)self, nameobj);
|
||||||
if (meth) {
|
if (meth) {
|
||||||
@ -568,7 +578,7 @@ Doc_getattro(recoll_DocObject *self, PyObject *nameobj)
|
|||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
string key = rclconfig->fieldQCanon(name);
|
string key = self->rclconfig->fieldQCanon(name);
|
||||||
string value;
|
string value;
|
||||||
if (idocget(self, key, value)) {
|
if (idocget(self, key, value)) {
|
||||||
LOGDEB1("Doc_getattro: [" << key << "] -> [" << value << "]\n");
|
LOGDEB1("Doc_getattro: [" << key << "] -> [" << value << "]\n");
|
||||||
@ -586,8 +596,8 @@ Doc_setattr(recoll_DocObject *self, char *name, PyObject *value)
|
|||||||
PyErr_SetString(PyExc_AttributeError, "doc??");
|
PyErr_SetString(PyExc_AttributeError, "doc??");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (!rclconfig || !rclconfig->ok()) {
|
if (!self->rclconfig || !self->rclconfig->ok()) {
|
||||||
PyErr_SetString(PyExc_EnvironmentError,
|
PyErr_SetString(PyExc_AttributeError,
|
||||||
"Configuration not initialized");
|
"Configuration not initialized");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -615,7 +625,7 @@ Doc_setattr(recoll_DocObject *self, char *name, PyObject *value)
|
|||||||
}
|
}
|
||||||
string uvalue = PyBytes_AsString(putf8);
|
string uvalue = PyBytes_AsString(putf8);
|
||||||
Py_DECREF(putf8);
|
Py_DECREF(putf8);
|
||||||
string key = rclconfig->fieldQCanon(name);
|
string key = self->rclconfig->fieldQCanon(name);
|
||||||
|
|
||||||
LOGDEB0("Doc_setattr: doc " << self->doc << " [" << key << "] (" << name <<
|
LOGDEB0("Doc_setattr: doc " << self->doc << " [" << key << "] (" << name <<
|
||||||
") -> [" << uvalue << "]\n");
|
") -> [" << uvalue << "]\n");
|
||||||
@ -693,6 +703,11 @@ Doc_subscript(recoll_DocObject *self, PyObject *key)
|
|||||||
PyErr_SetString(PyExc_AttributeError, "doc??");
|
PyErr_SetString(PyExc_AttributeError, "doc??");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
if (!self->rclconfig || !self->rclconfig->ok()) {
|
||||||
|
PyErr_SetString(PyExc_AttributeError,
|
||||||
|
"Configuration not initialized");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
string name;
|
string name;
|
||||||
if (PyUnicode_Check(key)) {
|
if (PyUnicode_Check(key)) {
|
||||||
PyObject* utf8o = PyUnicode_AsUTF8String(key);
|
PyObject* utf8o = PyUnicode_AsUTF8String(key);
|
||||||
@ -710,7 +725,7 @@ Doc_subscript(recoll_DocObject *self, PyObject *key)
|
|||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
string skey = rclconfig->fieldQCanon(name);
|
string skey = self->rclconfig->fieldQCanon(name);
|
||||||
string value;
|
string value;
|
||||||
if (idocget(self, skey, value)) {
|
if (idocget(self, skey, value)) {
|
||||||
return PyUnicode_Decode(value.c_str(), value.size(), "UTF-8","replace");
|
return PyUnicode_Decode(value.c_str(), value.size(), "UTF-8","replace");
|
||||||
@ -807,7 +822,13 @@ static PyTypeObject recoll_DocType = {
|
|||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
/// QUERY Query object
|
/// QUERY Query object
|
||||||
|
|
||||||
struct recoll_DbObject;
|
typedef struct recoll_DbObject {
|
||||||
|
PyObject_HEAD
|
||||||
|
/* Type-specific fields go here. */
|
||||||
|
Rcl::Db *db;
|
||||||
|
std::shared_ptr<RclConfig> rclconfig;
|
||||||
|
} recoll_DbObject;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
/* Type-specific fields go here. */
|
/* Type-specific fields go here. */
|
||||||
@ -977,8 +998,8 @@ Query_execute(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
|
|||||||
// SearchData defaults to stemming in english
|
// SearchData defaults to stemming in english
|
||||||
// Use default for now but need to add way to specify language
|
// Use default for now but need to add way to specify language
|
||||||
string reason;
|
string reason;
|
||||||
Rcl::SearchData *sd = wasaStringToRcl(rclconfig, dostem ? stemlang : "",
|
Rcl::SearchData *sd = wasaStringToRcl(
|
||||||
utf8, reason);
|
self->connection->rclconfig.get(),dostem ? stemlang : "", utf8, reason);
|
||||||
|
|
||||||
if (!sd) {
|
if (!sd) {
|
||||||
PyErr_SetString(PyExc_ValueError, reason.c_str());
|
PyErr_SetString(PyExc_ValueError, reason.c_str());
|
||||||
@ -1034,7 +1055,7 @@ Query_executesd(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
|
|||||||
// array when enumerating keys. Also for url which is also formatted.
|
// array when enumerating keys. Also for url which is also formatted.
|
||||||
// But not that some fields are not copied, and are only reachable if
|
// But not that some fields are not copied, and are only reachable if
|
||||||
// one knows their name (e.g. xdocid).
|
// one knows their name (e.g. xdocid).
|
||||||
static void movedocfields(Rcl::Doc *doc)
|
static void movedocfields(const RclConfig* rclconfig, Rcl::Doc *doc)
|
||||||
{
|
{
|
||||||
printableUrl(rclconfig->getDefCharset(), doc->url,
|
printableUrl(rclconfig->getDefCharset(), doc->url,
|
||||||
doc->meta[Rcl::Doc::keyurl]);
|
doc->meta[Rcl::Doc::keyurl]);
|
||||||
@ -1065,7 +1086,7 @@ Query_iternext(PyObject *_self)
|
|||||||
PyErr_SetString(PyExc_EnvironmentError, "doc create failed");
|
PyErr_SetString(PyExc_EnvironmentError, "doc create failed");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
result->rclconfig = self->connection->rclconfig;
|
||||||
// We used to check against rowcount here, but this was wrong:
|
// We used to check against rowcount here, but this was wrong:
|
||||||
// xapian result count estimate are sometimes wrong, we must go on
|
// xapian result count estimate are sometimes wrong, we must go on
|
||||||
// fetching until we fail
|
// fetching until we fail
|
||||||
@ -1074,7 +1095,7 @@ Query_iternext(PyObject *_self)
|
|||||||
}
|
}
|
||||||
self->next++;
|
self->next++;
|
||||||
|
|
||||||
movedocfields(result->doc);
|
movedocfields(self->connection->rclconfig.get(), result->doc);
|
||||||
return (PyObject *)result;
|
return (PyObject *)result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1520,11 +1541,6 @@ static PyTypeObject recoll_QueryType = {
|
|||||||
|
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
////// DB Db object code
|
////// DB Db object code
|
||||||
typedef struct recoll_DbObject {
|
|
||||||
PyObject_HEAD
|
|
||||||
/* Type-specific fields go here. */
|
|
||||||
Rcl::Db *db;
|
|
||||||
} recoll_DbObject;
|
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
Db_close(recoll_DbObject *self)
|
Db_close(recoll_DbObject *self)
|
||||||
@ -1534,6 +1550,7 @@ Db_close(recoll_DbObject *self)
|
|||||||
delete self->db;
|
delete self->db;
|
||||||
self->db = 0;
|
self->db = 0;
|
||||||
}
|
}
|
||||||
|
self->rclconfig.reset();
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1575,26 +1592,28 @@ Db_init(recoll_DbObject *self, PyObject *args, PyObject *kwargs)
|
|||||||
// recollinit repeatedly, which *should* be ok, except that it
|
// recollinit repeatedly, which *should* be ok, except that it
|
||||||
// resets the log file.
|
// resets the log file.
|
||||||
string reason;
|
string reason;
|
||||||
delete rclconfig;
|
|
||||||
if (confdir) {
|
if (confdir) {
|
||||||
string cfd = confdir;
|
string cfd = confdir;
|
||||||
rclconfig = recollinit(RCLINIT_PYTHON, 0, 0, reason, &cfd);
|
self->rclconfig = std::shared_ptr<RclConfig>(
|
||||||
|
recollinit(RCLINIT_PYTHON, 0, 0, reason, &cfd));
|
||||||
} else {
|
} else {
|
||||||
rclconfig = recollinit(RCLINIT_PYTHON, 0, 0, reason, 0);
|
self->rclconfig = std::shared_ptr<RclConfig>(
|
||||||
|
recollinit(RCLINIT_PYTHON, 0, 0, reason, 0));
|
||||||
}
|
}
|
||||||
|
RCLCONFIG = self->rclconfig;
|
||||||
LOGDEB("Db_init\n");
|
LOGDEB("Db_init\n");
|
||||||
|
|
||||||
if (rclconfig == 0) {
|
if (!self->rclconfig) {
|
||||||
PyErr_SetString(PyExc_EnvironmentError, reason.c_str());
|
PyErr_SetString(PyExc_EnvironmentError, reason.c_str());
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (!rclconfig->ok()) {
|
if (!self->rclconfig->ok()) {
|
||||||
PyErr_SetString(PyExc_EnvironmentError, "Bad config ?");
|
PyErr_SetString(PyExc_EnvironmentError, "Bad config ?");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
delete self->db;
|
delete self->db;
|
||||||
self->db = new Rcl::Db(rclconfig);
|
self->db = new Rcl::Db(self->rclconfig.get());
|
||||||
if (!self->db->open(writable ? Rcl::Db::DbUpd : Rcl::Db::DbRO)) {
|
if (!self->db->open(writable ? Rcl::Db::DbUpd : Rcl::Db::DbRO)) {
|
||||||
LOGERR("Db_init: db open error\n");
|
LOGERR("Db_init: db open error\n");
|
||||||
PyErr_SetString(PyExc_EnvironmentError, "Can't open index");
|
PyErr_SetString(PyExc_EnvironmentError, "Can't open index");
|
||||||
@ -1657,6 +1676,25 @@ Db_query(recoll_DbObject* self)
|
|||||||
return (PyObject *)result;
|
return (PyObject *)result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
Db_doc(recoll_DbObject* self)
|
||||||
|
{
|
||||||
|
LOGDEB("Db_doc\n");
|
||||||
|
if (self->db == 0) {
|
||||||
|
LOGERR("Db_doc: db not found " << self->db << "\n");
|
||||||
|
PyErr_SetString(PyExc_AttributeError, "db");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
recoll_DocObject *result = (recoll_DocObject *)
|
||||||
|
PyObject_CallObject((PyObject *)&recoll_DocType, 0);
|
||||||
|
if (!result)
|
||||||
|
return 0;
|
||||||
|
result->rclconfig = self->rclconfig;
|
||||||
|
Py_INCREF(self);
|
||||||
|
|
||||||
|
return (PyObject *)result;
|
||||||
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
Db_setAbstractParams(recoll_DbObject *self, PyObject *args, PyObject *kwargs)
|
Db_setAbstractParams(recoll_DbObject *self, PyObject *args, PyObject *kwargs)
|
||||||
{
|
{
|
||||||
@ -1667,7 +1705,7 @@ Db_setAbstractParams(recoll_DbObject *self, PyObject *args, PyObject *kwargs)
|
|||||||
&maxchars, &ctxwords))
|
&maxchars, &ctxwords))
|
||||||
return 0;
|
return 0;
|
||||||
if (self->db == 0) {
|
if (self->db == 0) {
|
||||||
LOGERR("Db_query: db not found " << self->db << "\n");
|
LOGERR("Db_setAbstractParams: db not found " << self->db << "\n");
|
||||||
PyErr_SetString(PyExc_AttributeError, "db id not found");
|
PyErr_SetString(PyExc_AttributeError, "db id not found");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -1905,6 +1943,9 @@ static PyMethodDef Db_methods[] = {
|
|||||||
{"query", (PyCFunction)Db_query, METH_NOARGS,
|
{"query", (PyCFunction)Db_query, METH_NOARGS,
|
||||||
"query() -> Query. Return a new, blank query object for this index."
|
"query() -> Query. Return a new, blank query object for this index."
|
||||||
},
|
},
|
||||||
|
{"doc", (PyCFunction)Db_doc, METH_NOARGS,
|
||||||
|
"doc() -> Doc. Return a new, blank doc object for this index."
|
||||||
|
},
|
||||||
{"cursor", (PyCFunction)Db_query, METH_NOARGS,
|
{"cursor", (PyCFunction)Db_query, METH_NOARGS,
|
||||||
"cursor() -> Query. Alias for query(). Return query object."
|
"cursor() -> Query. Alias for query(). Return query object."
|
||||||
},
|
},
|
||||||
|
|||||||
@ -19,13 +19,16 @@
|
|||||||
|
|
||||||
#include <Python.h>
|
#include <Python.h>
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
class RclConfig;
|
class RclConfig;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
/* Type-specific fields go here. */
|
/* Type-specific fields go here. */
|
||||||
Rcl::Doc *doc;
|
Rcl::Doc *doc;
|
||||||
/* Each doc object has a pointer to the global config, for convenience */
|
/* Each doc object has a pointer to the global config, for convenience */
|
||||||
RclConfig *rclconfig;
|
std::shared_ptr<RclConfig> rclconfig;
|
||||||
} recoll_DocObject;
|
} recoll_DocObject;
|
||||||
|
|
||||||
#define PYRECOLL_PACKAGE "recoll."
|
#define PYRECOLL_PACKAGE "recoll."
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user