resultstore: 1st working. Interface needs improvements
This commit is contained in:
parent
3479e7cd85
commit
ea9b5ab9eb
@ -156,6 +156,8 @@ query/filtseq.cpp \
|
||||
query/filtseq.h \
|
||||
query/plaintorich.cpp \
|
||||
query/plaintorich.h \
|
||||
query/qresultstore.cpp \
|
||||
query/qresultstore.h \
|
||||
query/recollq.cpp \
|
||||
query/recollq.h \
|
||||
query/reslistpager.cpp \
|
||||
|
||||
@ -828,19 +828,6 @@ typedef struct recoll_DbObject {
|
||||
std::shared_ptr<RclConfig> rclconfig;
|
||||
} recoll_DbObject;
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
/* Type-specific fields go here. */
|
||||
Rcl::Query *query;
|
||||
int next; // Index of result to be fetched next or -1 if uninit
|
||||
int rowcount; // Number of records returned by last execute
|
||||
string *sortfield; // Need to allocate in here, main program is C.
|
||||
int ascending;
|
||||
int arraysize; // Default size for fetchmany
|
||||
recoll_DbObject* connection;
|
||||
bool fetchtext;
|
||||
} recoll_QueryObject;
|
||||
|
||||
PyDoc_STRVAR(doc_Query_close,
|
||||
"close(). Deallocate query. Object is unusable after the call."
|
||||
);
|
||||
@ -1521,7 +1508,7 @@ PyDoc_STRVAR(doc_QueryObject,
|
||||
"Recoll Query objects are used to execute index searches. \n"
|
||||
"They must be created by the Db.query() method.\n"
|
||||
);
|
||||
static PyTypeObject recoll_QueryType = {
|
||||
PyTypeObject recoll_QueryType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"_recoll.Query", /*tp_name*/
|
||||
sizeof(recoll_QueryObject), /*tp_basicsize*/
|
||||
@ -2195,6 +2182,12 @@ PyInit__recoll(void)
|
||||
Py_INCREF(&rclx_ExtractorType);
|
||||
PyModule_AddObject(module, "Extractor", (PyObject *)&rclx_ExtractorType);
|
||||
|
||||
if (PyType_Ready(&recoll_QResultStoreType) < 0)
|
||||
INITERROR;
|
||||
Py_INCREF(&recoll_QResultStoreType);
|
||||
PyModule_AddObject(module, "QResultStore", (PyObject *)&recoll_QResultStoreType);
|
||||
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
return module;
|
||||
#endif
|
||||
|
||||
@ -22,8 +22,13 @@
|
||||
#include <Python.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
class RclConfig;
|
||||
namespace Rcl {
|
||||
class Doc;
|
||||
class Query;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
@ -33,7 +38,24 @@ typedef struct {
|
||||
std::shared_ptr<RclConfig> rclconfig;
|
||||
} recoll_DocObject;
|
||||
|
||||
extern PyTypeObject rclx_ExtractorType;
|
||||
struct recoll_DbObject;
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
/* Type-specific fields go here. */
|
||||
Rcl::Query *query;
|
||||
int next; // Index of result to be fetched next or -1 if uninit
|
||||
int rowcount; // Number of records returned by last execute
|
||||
std::string *sortfield; // Need to allocate in here, main program is C.
|
||||
int ascending;
|
||||
int arraysize; // Default size for fetchmany
|
||||
recoll_DbObject* connection;
|
||||
bool fetchtext;
|
||||
} recoll_QueryObject;
|
||||
|
||||
extern PyTypeObject recoll_DocType;
|
||||
extern PyTypeObject recoll_QueryType;
|
||||
extern PyTypeObject rclx_ExtractorType;
|
||||
extern PyTypeObject recoll_QResultStoreType;
|
||||
|
||||
#endif // _PYRECOLL_H_INCLUDED_
|
||||
|
||||
321
src/python/recoll/pyresultstore.cpp
Normal file
321
src/python/recoll/pyresultstore.cpp
Normal file
@ -0,0 +1,321 @@
|
||||
/* Copyright (C) 2007-2020 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <Python.h>
|
||||
#include <structmember.h>
|
||||
#include <bytesobject.h>
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <set>
|
||||
|
||||
#include "qresultstore.h"
|
||||
|
||||
#include "pyrecoll.h"
|
||||
#include "log.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
#if PY_MAJOR_VERSION >=3
|
||||
# define Py_TPFLAGS_HAVE_ITER 0
|
||||
#else
|
||||
#define PyLong_FromLong PyInt_FromLong
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
/* Type-specific fields go here. */
|
||||
Rcl::QResultStore *store;
|
||||
} recoll_QResultStoreObject;
|
||||
|
||||
static void
|
||||
QResultStore_dealloc(recoll_QResultStoreObject *self)
|
||||
{
|
||||
LOGDEB("QResultStore_dealloc.\n");
|
||||
delete self->store;
|
||||
Py_TYPE(self)->tp_free((PyObject*)self);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
QResultStore_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
LOGDEB("QResultStore_new\n");
|
||||
recoll_QResultStoreObject *self;
|
||||
|
||||
self = (recoll_QResultStoreObject *)type->tp_alloc(type, 0);
|
||||
if (self == 0)
|
||||
return 0;
|
||||
self->store = new Rcl::QResultStore();
|
||||
return (PyObject *)self;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(qrs_doc_QResultStoreObject,
|
||||
"QResultStore()\n"
|
||||
"\n"
|
||||
"A QResultStore can efficiently store query result documents.\n"
|
||||
);
|
||||
|
||||
static int
|
||||
QResultStore_init(
|
||||
recoll_QResultStoreObject *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
LOGDEB("QResultStore_init\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(
|
||||
qrs_doc_storeQuery,
|
||||
"storeQuery(query, fieldspec=[], isinc=False)\n"
|
||||
"\n"
|
||||
"Stores the results from the input query object, possibly "
|
||||
"excluding/including the specified fields.\n"
|
||||
);
|
||||
|
||||
static PyObject *
|
||||
QResultStore_storeQuery(recoll_QResultStoreObject* self, PyObject *args,
|
||||
PyObject *kwargs)
|
||||
{
|
||||
static const char* kwlist[] = {"query", "fieldspec", "isinc", NULL};
|
||||
PyObject *q{nullptr};
|
||||
PyObject *fieldspec{nullptr};
|
||||
PyObject *isinco = 0;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!|OO", (char**)kwlist,
|
||||
&recoll_QueryType, &q, &fieldspec, &isinco))
|
||||
return nullptr;
|
||||
|
||||
recoll_QueryObject *query = (recoll_QueryObject*)q;
|
||||
if (nullptr == query->query) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"query not initialised (null query ?)");
|
||||
return nullptr;
|
||||
}
|
||||
bool isinc{false};
|
||||
if (nullptr != isinco && PyObject_IsTrue(isinco))
|
||||
isinc = true;
|
||||
|
||||
std::set<std::string> fldspec;
|
||||
if (nullptr != fieldspec) {
|
||||
// fieldspec must be either single string or list of strings
|
||||
if (PyUnicode_Check(fieldspec)) {
|
||||
PyObject *utf8o = PyUnicode_AsUTF8String(fieldspec);
|
||||
if (nullptr == utf8o) {
|
||||
PyErr_SetString(PyExc_AttributeError,
|
||||
"storeQuery: can't encode field name??");
|
||||
return nullptr;
|
||||
}
|
||||
fldspec.insert(PyBytes_AsString(utf8o));
|
||||
Py_DECREF(utf8o);
|
||||
} else if (PySequence_Check(fieldspec)) {
|
||||
for (Py_ssize_t i = 0; i < PySequence_Size(fieldspec); i++) {
|
||||
PyObject *utf8o =
|
||||
PyUnicode_AsUTF8String(PySequence_GetItem(fieldspec, i));
|
||||
if (nullptr == utf8o) {
|
||||
PyErr_SetString(PyExc_AttributeError,
|
||||
"storeQuery: can't encode field name??");
|
||||
return nullptr;
|
||||
}
|
||||
fldspec.insert(PyBytes_AsString(utf8o));
|
||||
Py_DECREF(utf8o);
|
||||
}
|
||||
} else {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"fieldspec arg must be str or sequence of str");
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
self->store->storeQuery(*(query->query), fldspec, isinc);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(
|
||||
qrs_doc_getCount,
|
||||
"getCount()\n"
|
||||
"\n"
|
||||
"Return the stored results count.\n"
|
||||
);
|
||||
|
||||
static PyObject *
|
||||
QResultStore_getCount(recoll_QResultStoreObject* self, PyObject *args)
|
||||
{
|
||||
return PyLong_FromLong(self->store->getCount());
|
||||
}
|
||||
|
||||
|
||||
PyDoc_STRVAR(
|
||||
qrs_doc_getField,
|
||||
"getField(index, fieldname)\n"
|
||||
"\n"
|
||||
"Retrieve tha value of field <fieldname> from result at index <index>.\n"
|
||||
);
|
||||
|
||||
static PyObject *
|
||||
QResultStore_getField(recoll_QResultStoreObject* self, PyObject *args)
|
||||
{
|
||||
int index;
|
||||
const char *fieldname;
|
||||
if (!PyArg_ParseTuple(args, "is", &index, &fieldname)) {
|
||||
return nullptr;
|
||||
}
|
||||
const char *result = self->store->fieldValue(index, fieldname);
|
||||
if (nullptr == result) {
|
||||
Py_RETURN_NONE;
|
||||
} else {
|
||||
return PyBytes_FromString(result);
|
||||
}
|
||||
}
|
||||
|
||||
static PyMethodDef QResultStore_methods[] = {
|
||||
{"storeQuery", (PyCFunction)QResultStore_storeQuery,
|
||||
METH_VARARGS|METH_KEYWORDS, qrs_doc_getCount},
|
||||
{"getCount", (PyCFunction)QResultStore_getCount,
|
||||
METH_VARARGS|METH_KEYWORDS, qrs_doc_storeQuery},
|
||||
{"getField", (PyCFunction)QResultStore_getField,
|
||||
METH_VARARGS, qrs_doc_getField},
|
||||
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
PyTypeObject recoll_QResultStoreType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"_recoll.QResultStore", /*tp_name*/
|
||||
sizeof(recoll_QResultStoreObject), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
(destructor)QResultStore_dealloc, /*tp_dealloc*/
|
||||
0, /*tp_print*/
|
||||
0, /*tp_getattr*/
|
||||
0, /*tp_setattr*/
|
||||
0, /*tp_compare*/
|
||||
0, /*tp_repr*/
|
||||
0, /*tp_as_number*/
|
||||
0, /*tp_as_sequence*/
|
||||
0, /*tp_as_mapping*/
|
||||
0, /*tp_hash */
|
||||
0, /*tp_call*/
|
||||
0, /*tp_str*/
|
||||
0, /*tp_getattro*/
|
||||
0, /*tp_setattro*/
|
||||
0, /*tp_as_buffer*/
|
||||
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
||||
qrs_doc_QResultStoreObject, /* tp_doc */
|
||||
0, /* tp_traverse */
|
||||
0, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
QResultStore_methods, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
0, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
(initproc)QResultStore_init, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
QResultStore_new, /* tp_new */
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Module methods
|
||||
static PyMethodDef rclrstore_methods[] = {
|
||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
|
||||
PyDoc_STRVAR(pyrclrstore_doc_string,
|
||||
"Utility module for efficiently storing many query results.\n");
|
||||
|
||||
struct module_state {
|
||||
PyObject *error;
|
||||
};
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
#define GETSTATE(m) ((struct module_state*)PyModule_GetState(m))
|
||||
#else
|
||||
#define GETSTATE(m) (&_state)
|
||||
static struct module_state _state;
|
||||
#endif
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
static int rclrstore_traverse(PyObject *m, visitproc visit, void *arg) {
|
||||
Py_VISIT(GETSTATE(m)->error);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rclrstore_clear(PyObject *m) {
|
||||
Py_CLEAR(GETSTATE(m)->error);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct PyModuleDef moduledef = {
|
||||
PyModuleDef_HEAD_INIT,
|
||||
"_rclrstore",
|
||||
NULL,
|
||||
sizeof(struct module_state),
|
||||
rclrstore_methods,
|
||||
NULL,
|
||||
rclrstore_traverse,
|
||||
rclrstore_clear,
|
||||
NULL
|
||||
};
|
||||
|
||||
#define INITERROR return NULL
|
||||
extern "C" PyObject *
|
||||
PyInit__rclrstore(void)
|
||||
#else
|
||||
#define INITERROR return
|
||||
PyMODINIT_FUNC
|
||||
init__rclrstore(void)
|
||||
#endif
|
||||
{
|
||||
// Note: we can't call recollinit here, because the confdir is only really
|
||||
// known when the first db object is created (it is an optional parameter).
|
||||
// Using a default here may end up with variables such as stripchars being
|
||||
// wrong
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
PyObject *module = PyModule_Create(&moduledef);
|
||||
#else
|
||||
PyObject *module = Py_InitModule("_rclrstore", rclrstore_methods);
|
||||
#endif
|
||||
if (module == NULL)
|
||||
INITERROR;
|
||||
|
||||
struct module_state *st = GETSTATE(module);
|
||||
// The first parameter is a char *. Hopefully we don't initialize
|
||||
// modules too often...
|
||||
st->error = PyErr_NewException(strdup("_rclrstore.Error"), NULL, NULL);
|
||||
if (st->error == NULL) {
|
||||
Py_DECREF(module);
|
||||
INITERROR;
|
||||
}
|
||||
|
||||
if (PyType_Ready(&recoll_QResultStoreType) < 0)
|
||||
INITERROR;
|
||||
Py_INCREF((PyObject*)&recoll_QResultStoreType);
|
||||
PyModule_AddObject(module, "QResultStore",
|
||||
(PyObject *)&recoll_QResultStoreType);
|
||||
|
||||
PyModule_AddStringConstant(module, "__doc__", pyrclrstore_doc_string);
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
return module;
|
||||
#endif
|
||||
}
|
||||
23
src/python/recoll/recoll/qresultstore.py
Normal file
23
src/python/recoll/recoll/qresultstore.py
Normal file
@ -0,0 +1,23 @@
|
||||
# Copyright (C) 2020 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the
|
||||
# Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
# We used to have two C extensions: recoll and rclextract, which was a really
|
||||
# bad idea. They are now merged into the _recoll C extension module. The two
|
||||
# python modules recoll.py and rclextract.py only exist for compatibility (for
|
||||
# now: maybe we'll do something with them in the future).
|
||||
|
||||
|
||||
from ._recoll import QResultStore
|
||||
@ -38,13 +38,15 @@ module1 = Extension('_recoll',
|
||||
os.path.join(top, 'internfile'),
|
||||
os.path.join(top, 'rcldb'),
|
||||
os.path.join(top, 'query'),
|
||||
os.path.join(top, 'unac')
|
||||
os.path.join(top, 'unac'),
|
||||
os.path.join(top, 'testmains')
|
||||
],
|
||||
extra_compile_args = extra_compile_args,
|
||||
libraries = libraries,
|
||||
library_dirs = library_dirs,
|
||||
runtime_library_dirs = runtime_library_dirs,
|
||||
sources = [os.path.join(pytop, 'pyrecoll.cpp'),
|
||||
os.path.join(pytop, 'pyresultstore.cpp'),
|
||||
os.path.join(pytop, 'pyrclextract.cpp')
|
||||
])
|
||||
|
||||
|
||||
@ -32,15 +32,17 @@
|
||||
#include "rcldoc.h"
|
||||
#include "rclquery.h"
|
||||
|
||||
namespace Rcl {
|
||||
|
||||
class QResultStore::Internal {
|
||||
public:
|
||||
bool testentry(const std::pair<std::string,std::string>& entry) {
|
||||
return !entry.second.empty() &&
|
||||
excludedfields.find(entry.first) == excludedfields.end();
|
||||
(isinc ? fieldspec.find(entry.first) != fieldspec.end() :
|
||||
fieldspec.find(entry.first) == fieldspec.end());
|
||||
}
|
||||
|
||||
std::map<std::string, int> keyidx;
|
||||
int ndocs{0};
|
||||
// Notes: offsets[0] is always 0, not really useful, simpler this
|
||||
// way. Also could use simple C array instead of c++ vector...
|
||||
struct docoffs {
|
||||
@ -51,7 +53,8 @@ public:
|
||||
std::vector<int> offsets;
|
||||
};
|
||||
std::vector<struct docoffs> docs;
|
||||
std::set<std::string> excludedfields;
|
||||
std::set<std::string> fieldspec;
|
||||
bool isinc{false};
|
||||
};
|
||||
|
||||
QResultStore::QResultStore()
|
||||
@ -63,14 +66,17 @@ QResultStore::~QResultStore()
|
||||
delete m;
|
||||
}
|
||||
|
||||
|
||||
//{"author", "ipath", "rcludi", "relevancyrating", "sig", "abstract", "caption",
|
||||
// "filename", "origcharset", "sig"};
|
||||
// For reference : Fields normally excluded by uprcl:
|
||||
// {"author", "ipath", "rcludi", "relevancyrating", "sig", "abstract", "caption",
|
||||
// "filename", "origcharset", "sig"};
|
||||
|
||||
|
||||
bool QResultStore::storeQuery(Rcl::Query& query, std::set<std::string> excl)
|
||||
bool QResultStore::storeQuery(Rcl::Query& query, std::set<std::string> fldspec,
|
||||
bool isinc)
|
||||
{
|
||||
m->excludedfields = excl;
|
||||
m->fieldspec = fldspec;
|
||||
m->isinc = isinc;
|
||||
|
||||
/////////////
|
||||
// Enumerate all existing keys and assign array indexes for
|
||||
// them. Count documents while we are at it.
|
||||
@ -81,10 +87,11 @@ bool QResultStore::storeQuery(Rcl::Query& query, std::set<std::string> excl)
|
||||
{"fbytes", 4},
|
||||
{"dbytes", 5}
|
||||
};
|
||||
m->ndocs = 0;
|
||||
for (;;m->ndocs++) {
|
||||
|
||||
int count = 0;
|
||||
for (;;count++) {
|
||||
Rcl::Doc doc;
|
||||
if (!query.getDoc(m->ndocs, doc, false)) {
|
||||
if (!query.getDoc(count, doc, false)) {
|
||||
break;
|
||||
}
|
||||
for (const auto& entry : doc.meta) {
|
||||
@ -101,9 +108,9 @@ bool QResultStore::storeQuery(Rcl::Query& query, std::set<std::string> excl)
|
||||
///////
|
||||
// Populate the main array with doc-equivalent structures.
|
||||
|
||||
m->docs.resize(m->ndocs);
|
||||
m->docs.resize(count);
|
||||
|
||||
for (int i = 0; i < m->ndocs; i++) {
|
||||
for (int i = 0; i < count; i++) {
|
||||
Rcl::Doc doc;
|
||||
if (!query.getDoc(i, doc, false)) {
|
||||
break;
|
||||
@ -168,20 +175,25 @@ bool QResultStore::storeQuery(Rcl::Query& query, std::set<std::string> excl)
|
||||
return true;
|
||||
}
|
||||
|
||||
const char *QResultStore::fieldvalue(int docindex, const std::string& fldname)
|
||||
int QResultStore::getCount()
|
||||
{
|
||||
if (docindex < 0 || docindex >= m->ndocs) {
|
||||
return int(m->docs.size());
|
||||
}
|
||||
|
||||
const char *QResultStore::fieldValue(int docindex, const std::string& fldname)
|
||||
{
|
||||
if (docindex < 0 || docindex >= int(m->docs.size())) {
|
||||
return nullptr;
|
||||
}
|
||||
auto& vdoc = m->docs[docindex];
|
||||
|
||||
auto it = m->keyidx.find(fldname);
|
||||
if (it == m->keyidx.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
if (it->second < 0 || it->second >= int(vdoc.offsets.size())) {
|
||||
if (it == m->keyidx.end() ||
|
||||
it->second < 0 || it->second >= int(vdoc.offsets.size())) {
|
||||
//??
|
||||
return nullptr;
|
||||
}
|
||||
return vdoc.base + vdoc.offsets[it->second];
|
||||
}
|
||||
|
||||
} // namespace Rcl
|
||||
|
||||
@ -17,24 +17,49 @@
|
||||
* Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#ifndef _QRESULTSTORE_H_INCLUDED_
|
||||
#define _QRESULTSTORE_H_INCLUDED_
|
||||
|
||||
/**
|
||||
* Implement an efficient way to store the whole or part of a query result set.
|
||||
* This would naturally be done as a vector<Rcl::Doc>, but the natural
|
||||
* way leads to a huge space waste (8-10x), which may be a problem in
|
||||
* some cases. This is mostly used by the uprcl Media Server.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <set>
|
||||
|
||||
namespace Rcl {
|
||||
class Query;
|
||||
}
|
||||
|
||||
class QResultStore {
|
||||
public:
|
||||
QResultStore();
|
||||
~QResultStore();
|
||||
|
||||
bool storeQuery(Rcl::Query& q, std::set<std::string> excluded = {});
|
||||
const char *fieldvalue(int docindex, const std::string& fldname);
|
||||
/**
|
||||
* Fetch and store the results of the input query.
|
||||
*
|
||||
* @param q the executed query object to use for fetching results.
|
||||
* @param fldspec list of fields to be excluded or included.
|
||||
* @param isinc if true, the field list defines the fields to be stored,
|
||||
* else, those to be excluded.
|
||||
*/
|
||||
bool storeQuery(Rcl::Query& q, std::set<std::string> fldspec = {},
|
||||
bool isinc = false);
|
||||
|
||||
/** Retrieve count of stored results */
|
||||
int getCount();
|
||||
|
||||
/**
|
||||
* Retrieve field value.
|
||||
*
|
||||
* @param docindex index in query results.
|
||||
* @param fldname field name.
|
||||
*/
|
||||
const char *fieldValue(int docindex, const std::string& fldname);
|
||||
|
||||
|
||||
QResultStore(const QResultStore&) = delete;
|
||||
QResultStore& operator=(const QResultStore&) = delete;
|
||||
@ -43,4 +68,5 @@ private:
|
||||
Internal *m{nullptr};
|
||||
};
|
||||
|
||||
}
|
||||
#endif /* _QRESULTSTORE_H_INCLUDED_ */
|
||||
|
||||
@ -39,7 +39,7 @@ AM_CPPFLAGS = -Wall -Wno-unused -std=c++11 \
|
||||
$(DEFS)
|
||||
|
||||
noinst_PROGRAMS = textsplit utf8iter fstreewalk rclconfig hldata unac mbox \
|
||||
circache wipedir mimetype pathut fileudi x11mon rclqdocmem
|
||||
circache wipedir mimetype pathut fileudi x11mon trqrstore
|
||||
|
||||
circache_SOURCES = trcircache.cpp
|
||||
circache_LDADD = ../librecoll.la
|
||||
@ -80,5 +80,5 @@ wipedir_LDADD = ../librecoll.la
|
||||
x11mon_SOURCES = trx11mon.cpp
|
||||
x11mon_LDADD = ../utils/x11mon.o ../librecoll.la -lX11
|
||||
|
||||
rclqdocmem_SOURCES = rclqdocmem.cpp qresultstore.cpp
|
||||
rclqdocmem_LDADD = ../librecoll.la
|
||||
trqrstore_SOURCES = trqrstore.cpp
|
||||
trqrstore_LDADD = ../librecoll.la
|
||||
|
||||
@ -306,8 +306,7 @@ int main(int argc, char *argv[])
|
||||
// This uses 19 MB of storage for the audio index, and 72 MB for
|
||||
// the main one (less keys->less gain)
|
||||
{
|
||||
#if 1
|
||||
QResultStore store;
|
||||
Rcl::QResultStore store;
|
||||
bool result = store.storeQuery(
|
||||
query, {"author", "ipath", "rcludi", "relevancyrating",
|
||||
"sig","abstract", "caption", "filename", "origcharset", "sig"});
|
||||
@ -316,121 +315,7 @@ int main(int argc, char *argv[])
|
||||
return 1;
|
||||
}
|
||||
meminfo("After storing");
|
||||
std::cerr << "url 20 " << store.fieldvalue(20, "url") << "\n";
|
||||
#else
|
||||
/////////////
|
||||
// Enumerate all existing keys and assign array indexes for
|
||||
// them. Count documents while we are at it.
|
||||
std::map<std::string, int> keyidx {
|
||||
{"url",0},
|
||||
{"mimetype", 1},
|
||||
{"fmtime", 2},
|
||||
{"dmtime", 3},
|
||||
{"fbytes", 4},
|
||||
{"dbytes", 5},
|
||||
};
|
||||
int ndocs = 0;
|
||||
for (;;ndocs++) {
|
||||
Rcl::Doc doc;
|
||||
if (!query.getDoc(ndocs, doc, false)) {
|
||||
break;
|
||||
}
|
||||
for (const auto& entry : doc.meta) {
|
||||
if (testentry(entry)) {
|
||||
auto it = keyidx.find(entry.first);
|
||||
if (it == keyidx.end()) {
|
||||
int idx = keyidx.size();
|
||||
keyidx.insert({entry.first, idx});
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
// The audio db has 49 keys !
|
||||
std::cerr << "Found " << keyidx.size() << " different keys\n";
|
||||
|
||||
///////
|
||||
// Populate the main array with doc-equivalent structures.
|
||||
|
||||
// Notes: offsets[0] is always 0, not really useful, simpler this way. Also
|
||||
// could use simple C array instead of c++ vector...
|
||||
struct docoffs {
|
||||
~docoffs() {
|
||||
free(base);
|
||||
}
|
||||
char *base{nullptr};
|
||||
std::vector<int> offsets;
|
||||
};
|
||||
std::vector<struct docoffs> docs;
|
||||
docs.resize(ndocs);
|
||||
meminfo("After resize");
|
||||
|
||||
for (int i = 0; i < ndocs; i++) {
|
||||
Rcl::Doc doc;
|
||||
if (!query.getDoc(i, doc, false)) {
|
||||
break;
|
||||
}
|
||||
auto& vdoc = docs[i];
|
||||
vdoc.offsets.resize(keyidx.size());
|
||||
int nbytes =
|
||||
doc.url.size() + 1 +
|
||||
doc.mimetype.size() + 1 +
|
||||
doc.fmtime.size() + 1 +
|
||||
doc.dmtime.size() + 1 +
|
||||
doc.fbytes.size() + 1 +
|
||||
doc.dbytes.size() + 1;
|
||||
for (const auto& entry : doc.meta) {
|
||||
if (testentry(entry)) {
|
||||
if (keyidx.find(entry.first) == keyidx.end()) {
|
||||
std::cerr << "Unknown key: " << entry.first << "\n";
|
||||
abort();
|
||||
}
|
||||
nbytes += entry.second.size() + 1;
|
||||
}
|
||||
}
|
||||
|
||||
char *cp = (char*)malloc(nbytes);
|
||||
if (nullptr == cp) {
|
||||
abort();
|
||||
}
|
||||
|
||||
#define STRINGCPCOPY(CHARP, S) do { \
|
||||
memcpy(CHARP, S.c_str(), S.size()+1); \
|
||||
CHARP += S.size()+1; \
|
||||
} while (false);
|
||||
|
||||
vdoc.base = cp;
|
||||
vdoc.offsets[0] = cp - vdoc.base;
|
||||
STRINGCPCOPY(cp, doc.url);
|
||||
vdoc.offsets[1] = cp - vdoc.base;
|
||||
STRINGCPCOPY(cp, doc.mimetype);
|
||||
vdoc.offsets[2] = cp - vdoc.base;
|
||||
STRINGCPCOPY(cp, doc.fmtime);
|
||||
vdoc.offsets[3] = cp - vdoc.base;
|
||||
STRINGCPCOPY(cp, doc.dmtime);
|
||||
vdoc.offsets[4] = cp - vdoc.base;
|
||||
STRINGCPCOPY(cp, doc.fbytes);
|
||||
vdoc.offsets[5] = cp - vdoc.base;
|
||||
STRINGCPCOPY(cp, doc.dbytes);
|
||||
for (const auto& entry : doc.meta) {
|
||||
if (testentry(entry)) {
|
||||
auto it = keyidx.find(entry.first);
|
||||
if (it == keyidx.end()) {
|
||||
std::cerr << "Unknown key: " << entry.first << "\n";
|
||||
abort();
|
||||
}
|
||||
if (it->second <= 5) {
|
||||
// Already done ! Storing another address would be
|
||||
// wasteful and crash when freeing...
|
||||
continue;
|
||||
}
|
||||
vdoc.offsets[it->second] = cp - vdoc.base;
|
||||
STRINGCPCOPY(cp, entry.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
meminfo("After storing");
|
||||
#endif
|
||||
std::cerr << "url 20 " << store.fieldValue(20, "url") << "\n";
|
||||
}
|
||||
#elif defined(STORE_ALLOBSTACK)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user