general field name handling cleanup + sort facility in rclquery
This commit is contained in:
parent
5cc1de9aad
commit
7d30485f87
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.57 2008-09-08 16:49:10 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.58 2008-09-16 08:18:30 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -467,27 +467,21 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
|
||||
|
||||
// Build a direct map avoiding all indirections for field to
|
||||
// prefix translation
|
||||
// Add direct prefixes
|
||||
// Add direct prefixes from the [prefixes] section
|
||||
list<string>tps = m_fields->getNames("prefixes");
|
||||
for (list<string>::const_iterator it = tps.begin(); it != tps.end();it++) {
|
||||
string val;
|
||||
m_fields->get(*it, val, "prefixes");
|
||||
m_fldtopref[*it] = val;
|
||||
m_fldtopfx[stringtolower(*it)] = val;
|
||||
}
|
||||
// Add prefixes for aliases:
|
||||
// Add prefixes for aliases (build alias-to-canonic map while we're at it)
|
||||
tps = m_fields->getNames("aliases");
|
||||
for (list<string>::const_iterator it = tps.begin(); it != tps.end();it++) {
|
||||
string canonic = *it; // canonic name
|
||||
string canonic = stringtolower(*it); // canonic name
|
||||
string pfx;
|
||||
map<string,string>::const_iterator pit = m_fldtopref.find(canonic);
|
||||
if (pit != m_fldtopref.end()) {
|
||||
map<string,string>::const_iterator pit = m_fldtopfx.find(canonic);
|
||||
if (pit != m_fldtopfx.end()) {
|
||||
pfx = pit->second;
|
||||
} else {
|
||||
// Note: it's perfectly normal to have no prefix for the canonic
|
||||
// name, this could be a stored, not indexed field
|
||||
LOGDEB2(("RclConfig::readFieldsConfig: no pfx for canonic [%s]\n",
|
||||
canonic.c_str()));
|
||||
continue;
|
||||
}
|
||||
string aliases;
|
||||
m_fields->get(canonic, aliases, "aliases");
|
||||
@ -495,12 +489,14 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
|
||||
stringToStrings(aliases, l);
|
||||
for (list<string>::const_iterator ait = l.begin();
|
||||
ait != l.end(); ait++) {
|
||||
m_fldtopref[*ait] = pfx;
|
||||
if (!pfx.empty())
|
||||
m_fldtopfx[stringtolower(*ait)] = pfx;
|
||||
m_aliastocanon[stringtolower(*ait)] = canonic;
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
for (map<string,string>::const_iterator it = m_fldtopref.begin();
|
||||
it != m_fldtopref.end(); it++) {
|
||||
for (map<string,string>::const_iterator it = m_fldtopfx.begin();
|
||||
it != m_fldtopfx.end(); it++) {
|
||||
LOGDEB(("RclConfig::readFieldsConfig: [%s] => [%s]\n",
|
||||
it->first.c_str(), it->second.c_str()));
|
||||
}
|
||||
@ -512,8 +508,9 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
|
||||
stringToStrings(ss, sl);
|
||||
for (list<string>::const_iterator it = sl.begin();
|
||||
it != sl.end(); it++) {
|
||||
LOGDEB(("Inserting [%s] in stored list\n", (*it).c_str()));
|
||||
m_storedFields.insert(*it);
|
||||
string fld = fieldCanon(stringtolower(*it));
|
||||
LOGDEB(("Inserting [%s] in stored list\n", fld.c_str()));
|
||||
m_storedFields.insert(fld);
|
||||
}
|
||||
}
|
||||
|
||||
@ -521,10 +518,11 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
|
||||
}
|
||||
|
||||
// Return term indexing prefix for field name (ie: "filename" -> "XSFN")
|
||||
// The input must be a canonical field name (alias translation done already)
|
||||
bool RclConfig::getFieldPrefix(const string& fld, string &pfx)
|
||||
{
|
||||
map<string,string>::const_iterator pit = m_fldtopref.find(fld);
|
||||
if (pit != m_fldtopref.end()) {
|
||||
map<string,string>::const_iterator pit = m_fldtopfx.find(fld);
|
||||
if (pit != m_fldtopfx.end()) {
|
||||
pfx = pit->second;
|
||||
return true;
|
||||
} else {
|
||||
@ -572,10 +570,13 @@ bool RclConfig::getFieldSpecialisationPrefixes(const string& fld,
|
||||
pfxes.unique();
|
||||
return true;
|
||||
}
|
||||
bool RclConfig::fieldIsStored(const string& fld)
|
||||
|
||||
string RclConfig::fieldCanon(const string& fld)
|
||||
{
|
||||
set<string>::const_iterator it = m_storedFields.find(fld);
|
||||
return it != m_storedFields.end();
|
||||
map<string, string>::const_iterator it = m_aliastocanon.find(fld);
|
||||
if (it != m_aliastocanon.end())
|
||||
return it->second;
|
||||
return fld;
|
||||
}
|
||||
|
||||
string RclConfig::getMimeViewerDef(const string &mtype)
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _RCLCONFIG_H_INCLUDED_
|
||||
#define _RCLCONFIG_H_INCLUDED_
|
||||
/* @(#$Id: rclconfig.h,v 1.40 2008-09-08 16:49:10 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: rclconfig.h,v 1.41 2008-09-16 08:18:30 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <list>
|
||||
#include <string>
|
||||
@ -144,7 +144,7 @@ class RclConfig {
|
||||
/** mimeconf: get list of mime types for category */
|
||||
bool getMimeCatTypes(const string& cat, list<string>&);
|
||||
|
||||
/** mimeconf: get field prefix from field name */
|
||||
/** fields: get field prefix from field name */
|
||||
bool getFieldPrefix(const string& fldname, string &pfx);
|
||||
/** Get implied meanings for field name (ie: author->[author, from]) */
|
||||
bool getFieldSpecialisations(const string& fld,
|
||||
@ -152,8 +152,9 @@ class RclConfig {
|
||||
/** Get prefixes for specialisations of field name */
|
||||
bool getFieldSpecialisationPrefixes(const string& fld,
|
||||
list<string>& pfxes);
|
||||
bool fieldIsStored(const string& fld);
|
||||
const set<string>& getStoredFields() {return m_storedFields;}
|
||||
/** Get canonic name for possible alias */
|
||||
string fieldCanon(const string& fld);
|
||||
|
||||
/** mimeview: get/set external viewer exec string(s) for mimetype(s) */
|
||||
string getMimeViewerDef(const string &mimetype);
|
||||
@ -196,7 +197,8 @@ class RclConfig {
|
||||
ConfStack<ConfSimple> *mimeconf; // but their content may depend on it.
|
||||
ConfStack<ConfSimple> *mimeview; //
|
||||
ConfStack<ConfSimple> *m_fields;
|
||||
map<string, string> m_fldtopref;
|
||||
map<string, string> m_fldtopfx;
|
||||
map<string, string> m_aliastocanon;
|
||||
set<string> m_storedFields;
|
||||
|
||||
void *m_stopsuffixes;
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.41 2008-09-08 16:49:10 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.42 2008-09-16 08:18:30 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -296,7 +296,6 @@ static const string keyds("description");
|
||||
static const string keyfn("filename");
|
||||
static const string keymd("modificationdate");
|
||||
static const string keymt("mimetype");
|
||||
static const string keyoc("origcharset");
|
||||
static const string keytt("title");
|
||||
|
||||
bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
||||
@ -310,7 +309,7 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
||||
doc.text = it->second;
|
||||
} else if (it->first == keymd) {
|
||||
doc.dmtime = it->second;
|
||||
} else if (it->first == keyoc) {
|
||||
} else if (it->first == Rcl::Doc::keyoc) {
|
||||
doc.origcharset = it->second;
|
||||
} else if (it->first == keymt || it->first == keycs) {
|
||||
// don't need these.
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: pyrecoll.cpp,v 1.11 2008-09-08 16:49:10 dockes Exp $ (C) 2007 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: pyrecoll.cpp,v 1.12 2008-09-16 08:18:30 dockes Exp $ (C) 2007 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
|
||||
@ -84,31 +84,33 @@ SearchData_init(recoll_SearchDataObject *self, PyObject *args, PyObject *kwargs)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Note: addclause necessite And/Or vient du fait que le string peut avoir
|
||||
plusieurs mots. A transferer dans l'i/f Python ou pas ? */
|
||||
PyDoc_STRVAR(doc_addClause,
|
||||
"addClause(type='and'|'or'|'excl'|'phrase'|'near'|'sub', qstring=string,\n"
|
||||
" slack=int, field=string, subSearch=SearchData,\n"
|
||||
" slack=int, field=string, subSearch=SearchData)\n"
|
||||
"Adds a simple clause to the SearchData And/Or chain, or a subquery\n"
|
||||
"defined by another SearchData object\n"
|
||||
);
|
||||
/* Note: necessite And/Or vient du fait que le string peut avoir
|
||||
plusieurs mots. A transferer dans l'i/f Python ou pas ? */
|
||||
|
||||
/* Forward decl, def needs recoll_searchDataTyep */
|
||||
/* Forward declaration only, definition needs recoll_searchDataType */
|
||||
static PyObject *
|
||||
SearchData_addClause(recoll_SearchDataObject* self, PyObject *args,
|
||||
PyObject *kwargs);
|
||||
|
||||
|
||||
|
||||
static PyMethodDef SearchData_methods[] = {
|
||||
{"addClause", (PyCFunction)SearchData_addClause, METH_VARARGS|METH_KEYWORDS,
|
||||
doc_addClause
|
||||
},
|
||||
doc_addClause},
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
PyDoc_STRVAR(doc_SearchDataObject,
|
||||
"SearchData()\n"
|
||||
"\n"
|
||||
"A SearchData object describes a query.\n"
|
||||
"A SearchData object describes a query. It has a number of global parameters\n"
|
||||
"and a chain of search clauses.\n"
|
||||
);
|
||||
static PyTypeObject recoll_SearchDataType = {
|
||||
PyObject_HEAD_INIT(NULL)
|
||||
@ -165,9 +167,9 @@ SearchData_addClause(recoll_SearchDataObject* self, PyObject *args,
|
||||
static char *kwlist[] = {"type", "qstring", "slack", "field",
|
||||
"subsearch", NULL};
|
||||
char *tp = 0;
|
||||
char *qs = 0;
|
||||
char *qs = 0; // needs freeing
|
||||
int slack = 0;
|
||||
char *fld = 0;
|
||||
char *fld = 0; // needs freeing
|
||||
recoll_SearchDataObject *sub = 0;
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "ses|iesO!", kwlist,
|
||||
&tp, "utf-8", &qs, &slack,
|
||||
@ -221,11 +223,13 @@ SearchData_addClause(recoll_SearchDataObject* self, PyObject *args,
|
||||
PyErr_SetString(PyExc_AttributeError, "Bad tp arg");
|
||||
return 0;
|
||||
}
|
||||
|
||||
PyMem_Free(qs);
|
||||
PyMem_Free(fld);
|
||||
self->sd->addClause(cl);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
///// Doc code
|
||||
typedef struct {
|
||||
@ -272,38 +276,86 @@ Doc_init(recoll_DocObject *self, PyObject *, PyObject *)
|
||||
return 0;
|
||||
}
|
||||
|
||||
// The "closure" thing is actually the meta field name. This is how
|
||||
// python allows one set of get/set functions to get/set different
|
||||
// attributes (pass them an additional parameters as from the
|
||||
// getseters table and call it a "closure"
|
||||
static PyObject *
|
||||
Doc_getmeta(recoll_DocObject *self, void *closure)
|
||||
Doc_getattr(recoll_DocObject *self, char *name)
|
||||
{
|
||||
LOGDEB0(("Doc_getmeta: [%s]\n", (const char *)closure));
|
||||
LOGDEB(("Doc_getattr: name [%s]\n", name));
|
||||
if (self->doc == 0 ||
|
||||
the_docs.find(self->doc) == the_docs.end()) {
|
||||
PyErr_SetString(PyExc_AttributeError, "doc");
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
for (map<string,string>::const_iterator it = self->doc->meta.begin();
|
||||
it != self->doc->meta.end(); it++) {
|
||||
LOGDEB(("meta[%s] -> [%s]\n", it->first.c_str(), it->second.c_str()));
|
||||
}
|
||||
#endif
|
||||
string key = rclconfig->fieldCanon(stringtolower(string(name)));
|
||||
|
||||
// Retrieve utf-8 coded value for meta field (if it doesnt exist,
|
||||
// this inserts a null value in the array, we could be nicer.
|
||||
string meta = self->doc->meta[(const char *)closure];
|
||||
// Handle special cases, then try retrieving key value from meta
|
||||
// array
|
||||
string value;
|
||||
switch (key.at(0)) {
|
||||
case 'f':
|
||||
if (!key.compare(Rcl::Doc::keyfs)) {
|
||||
value = self->doc->fbytes;
|
||||
} else if (!key.compare(Rcl::Doc::keyfn)) {
|
||||
value = self->doc->utf8fn;
|
||||
} else if (!key.compare(Rcl::Doc::keyfs)) {
|
||||
value = self->doc->fbytes;
|
||||
} else if (!key.compare(Rcl::Doc::keyfmt)) {
|
||||
value = self->doc->fmtime;
|
||||
}
|
||||
break;
|
||||
case 'd':
|
||||
if (!key.compare(Rcl::Doc::keyds)) {
|
||||
value = self->doc->dbytes;
|
||||
} else if (!key.compare(Rcl::Doc::keydmt)) {
|
||||
value = self->doc->dmtime;
|
||||
}
|
||||
break;
|
||||
case 'i':
|
||||
if (!key.compare(Rcl::Doc::keyipt)) {
|
||||
value = self->doc->ipath;
|
||||
}
|
||||
break;
|
||||
case 'm':
|
||||
if (!key.compare(Rcl::Doc::keytp)) {
|
||||
value = self->doc->mimetype;
|
||||
} else if (!key.compare(Rcl::Doc::keymt)) {
|
||||
value = self->doc->dmtime.empty() ? self->doc->fmtime :
|
||||
self->doc->dmtime;
|
||||
}
|
||||
break;
|
||||
case 'o':
|
||||
if (!key.compare(Rcl::Doc::keyoc)) {
|
||||
value = self->doc->origcharset;
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
if (!key.compare(Rcl::Doc::keysig)) {
|
||||
value = self->doc->sig;
|
||||
} else if (!key.compare(Rcl::Doc::keysz)) {
|
||||
value = self->doc->dbytes.empty() ? self->doc->fbytes :
|
||||
self->doc->dbytes;
|
||||
}
|
||||
|
||||
break;
|
||||
default:
|
||||
value = self->doc->meta[key];
|
||||
}
|
||||
|
||||
LOGDEB(("Doc_getattr: [%s] (%s) -> [%s]\n",
|
||||
name, key.c_str(), value.c_str()));
|
||||
// Return a python unicode object
|
||||
PyObject* res = PyUnicode_Decode(meta.c_str(), meta.size(), "UTF-8",
|
||||
PyObject* res = PyUnicode_Decode(value.c_str(), value.size(), "UTF-8",
|
||||
"replace");
|
||||
return res;
|
||||
}
|
||||
|
||||
static int
|
||||
Doc_setmeta(recoll_DocObject *self, PyObject *value, void *closure)
|
||||
Doc_setattr(recoll_DocObject *self, char *name, PyObject *value)
|
||||
{
|
||||
if (self->doc == 0 ||
|
||||
the_docs.find(self->doc) == the_docs.end()) {
|
||||
@ -330,56 +382,55 @@ Doc_setmeta(recoll_DocObject *self, PyObject *value, void *closure)
|
||||
}
|
||||
|
||||
char* uvalue = PyString_AsString(putf8);
|
||||
const char *key = (const char *)closure;
|
||||
if (key == 0) {
|
||||
PyErr_SetString(PyExc_AttributeError, "key??");
|
||||
if (name == 0) {
|
||||
PyErr_SetString(PyExc_AttributeError, "name??");
|
||||
return -1;
|
||||
}
|
||||
|
||||
LOGDEB0(("Doc_setmeta: setting [%s] to [%s]\n", key, uvalue));
|
||||
self->doc->meta[key] = uvalue;
|
||||
switch (key[0]) {
|
||||
LOGDEB0(("Doc_setattr: setting [%s] to [%s]\n", name, uvalue));
|
||||
self->doc->meta[name] = uvalue;
|
||||
switch (name[0]) {
|
||||
case 'd':
|
||||
if (!strcmp(key, "dbytes")) {
|
||||
if (!strcmp(name, "dbytes")) {
|
||||
self->doc->dbytes = uvalue;
|
||||
}
|
||||
break;
|
||||
case 'f':
|
||||
if (!strcmp(key, "fbytes")) {
|
||||
if (!strcmp(name, "fbytes")) {
|
||||
self->doc->fbytes = uvalue;
|
||||
}
|
||||
break;
|
||||
case 'i':
|
||||
if (!strcmp(key, "ipath")) {
|
||||
if (!strcmp(name, "ipath")) {
|
||||
self->doc->ipath = uvalue;
|
||||
}
|
||||
break;
|
||||
case 'm':
|
||||
if (!strcmp(key, "mimetype")) {
|
||||
if (!strcmp(name, "mimetype")) {
|
||||
self->doc->mimetype = uvalue;
|
||||
} else if (!strcmp(key, "mtime")) {
|
||||
} else if (!strcmp(name, "mtime")) {
|
||||
self->doc->dmtime = uvalue;
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
if (!strcmp(key, "sig")) {
|
||||
if (!strcmp(name, "sig")) {
|
||||
self->doc->sig = uvalue;
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
if (!strcmp(key, "text")) {
|
||||
if (!strcmp(name, "text")) {
|
||||
self->doc->text = uvalue;
|
||||
}
|
||||
break;
|
||||
case 'u':
|
||||
if (!strcmp(key, "url")) {
|
||||
if (!strcmp(name, "url")) {
|
||||
self->doc->url = uvalue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static PyGetSetDef Doc_getseters[] = {
|
||||
// Name, get, set, doc, closure
|
||||
{"url", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
@ -410,6 +461,7 @@ static PyGetSetDef Doc_getseters[] = {
|
||||
"sig", (void *)"sig"},
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
#endif
|
||||
|
||||
PyDoc_STRVAR(doc_DocObject,
|
||||
"Doc()\n"
|
||||
@ -427,8 +479,8 @@ static PyTypeObject recoll_DocType = {
|
||||
0, /*tp_itemsize*/
|
||||
(destructor)Doc_dealloc, /*tp_dealloc*/
|
||||
0, /*tp_print*/
|
||||
0, /*tp_getattr*/
|
||||
0, /*tp_setattr*/
|
||||
(getattrfunc)Doc_getattr, /*tp_getattr*/
|
||||
(setattrfunc)Doc_setattr, /*tp_setattr*/
|
||||
0, /*tp_compare*/
|
||||
0, /*tp_repr*/
|
||||
0, /*tp_as_number*/
|
||||
@ -450,7 +502,7 @@ static PyTypeObject recoll_DocType = {
|
||||
0, /* tp_iternext */
|
||||
0, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
Doc_getseters, /* tp_getset */
|
||||
0, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
@ -470,7 +522,10 @@ typedef struct {
|
||||
/* Type-specific fields go here. */
|
||||
Rcl::Query *query;
|
||||
int next; // Index of result to be fetched next or -1 if uninit
|
||||
char *sortfield;
|
||||
int ascending;
|
||||
} recoll_QueryObject;
|
||||
|
||||
/////////////////////////////////////////////
|
||||
/// Query object
|
||||
static void
|
||||
@ -481,6 +536,7 @@ Query_dealloc(recoll_QueryObject *self)
|
||||
the_queries.erase(self->query);
|
||||
delete self->query;
|
||||
self->query = 0;
|
||||
self->sortfield = 0;
|
||||
self->ob_type->tp_free((PyObject*)self);
|
||||
}
|
||||
|
||||
@ -495,6 +551,7 @@ Query_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
|
||||
return 0;
|
||||
self->query = 0;
|
||||
self->next = -1;
|
||||
self->sortfield = 0;
|
||||
return (PyObject *)self;
|
||||
}
|
||||
|
||||
@ -511,9 +568,29 @@ Query_init(recoll_QueryObject *self, PyObject *, PyObject *)
|
||||
delete self->query;
|
||||
self->query = 0;
|
||||
self->next = -1;
|
||||
self->sortfield = 0;
|
||||
self->ascending = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(doc_Query_sortby,
|
||||
"sortby(field=fieldname, ascending=true)\n"
|
||||
"Sort results by 'fieldname', in ascending or descending order.\n"
|
||||
"Only one field can be used, no subsorts for now.\n"
|
||||
);
|
||||
|
||||
static PyObject *
|
||||
Query_sortby(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
LOGDEB(("Query_sortby\n"));
|
||||
static char *kwlist[] = {"field", "ascending", NULL};
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|i", kwlist,
|
||||
&self->sortfield,
|
||||
&self->ascending))
|
||||
return 0;
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(doc_Query_execute,
|
||||
"execute(query_string, stemmming=1|0)\n"
|
||||
"\n"
|
||||
@ -527,14 +604,17 @@ Query_execute(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
LOGDEB(("Query_execute\n"));
|
||||
static char *kwlist[] = {"query_string", "stemming", NULL};
|
||||
char *utf8 = 0;
|
||||
char *sutf8 = 0; // needs freeing
|
||||
int dostem = 1;
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "es|i:Query_execute",
|
||||
kwlist, "utf-8", &utf8,
|
||||
kwlist, "utf-8", &sutf8,
|
||||
&dostem)) {
|
||||
return 0;
|
||||
}
|
||||
LOGDEB(("Query_execute: [%s]\n", utf8));
|
||||
LOGDEB(("Query_execute: [%s]\n", sutf8));
|
||||
|
||||
string utf8(sutf8);
|
||||
PyMem_Free(sutf8);
|
||||
if (self->query == 0 ||
|
||||
the_queries.find(self->query) == the_queries.end()) {
|
||||
PyErr_SetString(PyExc_AttributeError, "query");
|
||||
@ -542,11 +622,12 @@ Query_execute(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
|
||||
}
|
||||
string reason;
|
||||
Rcl::SearchData *sd = wasaStringToRcl(utf8, reason);
|
||||
PyMem_Free(utf8);
|
||||
|
||||
if (!sd) {
|
||||
PyErr_SetString(PyExc_ValueError, reason.c_str());
|
||||
return 0;
|
||||
}
|
||||
sd->setSortBy(self->sortfield, self->ascending);
|
||||
RefCntr<Rcl::SearchData> rq(sd);
|
||||
self->query->setQuery(rq, dostem?Rcl::Query::QO_STEM:Rcl::Query::QO_NONE);
|
||||
int cnt = self->query->getResCnt();
|
||||
@ -557,7 +638,7 @@ Query_execute(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
|
||||
PyDoc_STRVAR(doc_Query_executesd,
|
||||
"execute(SearchData, stemming=1|0)\n"
|
||||
"\n"
|
||||
"Starts a search for the query defined by SearchData.\n"
|
||||
"Starts a search for the query defined by the SearchData object.\n"
|
||||
);
|
||||
|
||||
static PyObject *
|
||||
@ -576,6 +657,7 @@ Query_executesd(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
|
||||
PyErr_SetString(PyExc_AttributeError, "query");
|
||||
return 0;
|
||||
}
|
||||
pysd->sd->setSortBy(self->sortfield, self->ascending);
|
||||
self->query->setQuery(pysd->sd, dostem ? Rcl::Query::QO_STEM :
|
||||
Rcl::Query::QO_NONE);
|
||||
int cnt = self->query->getResCnt();
|
||||
@ -616,18 +698,22 @@ Query_fetchone(recoll_QueryObject* self, PyObject *, PyObject *)
|
||||
return 0;
|
||||
}
|
||||
self->next++;
|
||||
|
||||
// Move some data from the dedicated fields to the meta array to make
|
||||
// fetching attributes easier
|
||||
// fetching attributes easier. Is this actually needed ? Useful for
|
||||
// url and relevancy rating which are also formatted .
|
||||
Rcl::Doc *doc = result->doc;
|
||||
printableUrl(rclconfig->getDefCharset(), doc->url, doc->meta["url"]);
|
||||
doc->meta["mimetype"] = doc->mimetype;
|
||||
doc->meta["mtime"] = doc->dmtime.empty() ? doc->fmtime : doc->dmtime;
|
||||
doc->meta["ipath"] = doc->ipath;
|
||||
doc->meta["fbytes"] = doc->fbytes;
|
||||
doc->meta["dbytes"] = doc->dbytes;
|
||||
printableUrl(rclconfig->getDefCharset(), doc->url,
|
||||
doc->meta[Rcl::Doc::keyurl]);
|
||||
doc->meta[Rcl::Doc::keytp] = doc->mimetype;
|
||||
doc->meta[Rcl::Doc::keymt] = doc->dmtime.empty() ?
|
||||
doc->fmtime : doc->dmtime;
|
||||
doc->meta[Rcl::Doc::keyipt] = doc->ipath;
|
||||
doc->meta[Rcl::Doc::keyfs] = doc->fbytes;
|
||||
doc->meta[Rcl::Doc::keyds] = doc->dbytes;
|
||||
char pc[20];
|
||||
sprintf(pc, "%02d %%", percent);
|
||||
doc->meta["relevance"] = pc;
|
||||
doc->meta[Rcl::Doc::keyrr] = pc;
|
||||
|
||||
return (PyObject *)result;
|
||||
}
|
||||
@ -637,7 +723,10 @@ static PyMethodDef Query_methods[] = {
|
||||
doc_Query_execute},
|
||||
{"executesd", (PyCFunction)Query_executesd, METH_VARARGS|METH_KEYWORDS,
|
||||
doc_Query_executesd},
|
||||
{"fetchone", (PyCFunction)Query_fetchone, METH_VARARGS,doc_Query_fetchone},
|
||||
{"fetchone", (PyCFunction)Query_fetchone, METH_VARARGS,
|
||||
doc_Query_fetchone},
|
||||
{"sortby", (PyCFunction)Query_sortby, METH_VARARGS|METH_KEYWORDS,
|
||||
doc_Query_sortby},
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
@ -881,8 +970,8 @@ Db_makeDocAbstract(recoll_DbObject* self, PyObject *args, PyObject *)
|
||||
static PyObject *
|
||||
Db_needUpdate(recoll_DbObject* self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
char *udi = 0;
|
||||
char *sig = 0;
|
||||
char *udi = 0; // needs freeing
|
||||
char *sig = 0; // needs freeing
|
||||
LOGDEB(("Db_needUpdate\n"));
|
||||
if (!PyArg_ParseTuple(args, "eses:Db_needUpdate",
|
||||
"utf-8", &udi, "utf-8", &sig)) {
|
||||
@ -891,6 +980,8 @@ Db_needUpdate(recoll_DbObject* self, PyObject *args, PyObject *kwds)
|
||||
if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
|
||||
LOGERR(("Db_needUpdate: db not found %p\n", self->db));
|
||||
PyErr_SetString(PyExc_AttributeError, "db");
|
||||
PyMem_Free(udi);
|
||||
PyMem_Free(sig);
|
||||
return 0;
|
||||
}
|
||||
bool result = self->db->needUpdate(udi, sig);
|
||||
@ -903,16 +994,20 @@ static PyObject *
|
||||
Db_addOrUpdate(recoll_DbObject* self, PyObject *args, PyObject *)
|
||||
{
|
||||
LOGDEB(("Db_addOrUpdate\n"));
|
||||
char *udi = 0;
|
||||
char *parent_udi = 0;
|
||||
|
||||
char *sudi = 0; // needs freeing
|
||||
char *sparent_udi = 0; // needs freeing
|
||||
recoll_DocObject *pydoc;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "esO!|es:Db_addOrUpdate",
|
||||
"utf-8", &udi, &recoll_DocType, &pydoc,
|
||||
"utf-8", &parent_udi)) {
|
||||
"utf-8", &sudi, &recoll_DocType, &pydoc,
|
||||
"utf-8", &sparent_udi)) {
|
||||
return 0;
|
||||
}
|
||||
string udi(sudi);
|
||||
string parent_udi(sparent_udi ? sparent_udi : "");
|
||||
PyMem_Free(sudi);
|
||||
PyMem_Free(sparent_udi);
|
||||
|
||||
if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
|
||||
LOGERR(("Db_addOrUpdate: db not found %p\n", self->db));
|
||||
PyErr_SetString(PyExc_AttributeError, "db");
|
||||
@ -923,16 +1018,11 @@ Db_addOrUpdate(recoll_DbObject* self, PyObject *args, PyObject *)
|
||||
PyErr_SetString(PyExc_AttributeError, "doc");
|
||||
return 0;
|
||||
}
|
||||
if (!self->db->addOrUpdate(udi, parent_udi?parent_udi:"", *pydoc->doc)) {
|
||||
if (!self->db->addOrUpdate(udi, parent_udi, *pydoc->doc)) {
|
||||
LOGERR(("Db_addOrUpdate: rcldb error\n"));
|
||||
PyErr_SetString(PyExc_AttributeError, "rcldb error");
|
||||
PyMem_Free(udi);
|
||||
PyMem_Free(parent_udi);
|
||||
return 0;
|
||||
}
|
||||
PyMem_Free(udi);
|
||||
if (parent_udi)
|
||||
PyMem_Free(parent_udi);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
|
||||
@ -27,6 +27,7 @@ module1 = Extension('recoll',
|
||||
top + 'query/wasatorcl.cpp',
|
||||
top + 'rcldb/pathhash.cpp',
|
||||
top + 'rcldb/rcldb.cpp',
|
||||
top + 'rcldb/rcldoc.cpp',
|
||||
top + 'rcldb/rclquery.cpp',
|
||||
top + 'rcldb/searchdata.cpp',
|
||||
top + 'rcldb/stemdb.cpp',
|
||||
|
||||
33
src/python/samples/recollqsd.py
Normal file
33
src/python/samples/recollqsd.py
Normal file
@ -0,0 +1,33 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
import recoll
|
||||
|
||||
def dotest(db, q):
|
||||
query = db.query()
|
||||
query.sortby("title", 1)
|
||||
|
||||
nres = query.executesd(q, stemming = 1)
|
||||
print "Result count: ", nres
|
||||
if nres > 10:
|
||||
nres = 10
|
||||
while query.next >= 0 and query.next < nres:
|
||||
doc = query.fetchone()
|
||||
print query.next
|
||||
for k in ("url", "mtime", "title", "author", "abstract"):
|
||||
print k, ":", getattr(doc, k).encode('utf-8')
|
||||
#abs = db.makeDocAbstract(doc, query).encode('utf-8')
|
||||
#print abs
|
||||
print
|
||||
# End dotest
|
||||
|
||||
sd = recoll.SearchData()
|
||||
sd.addClause("and", "essaouira maroc")
|
||||
#sd.addClause("and", "dockes", field="author")
|
||||
#sd.addClause("phrase", "jean francois", 1)
|
||||
#sd.addClause("excl", "plage")
|
||||
|
||||
db = recoll.connect()
|
||||
dotest(db, sd)
|
||||
|
||||
sys.exit(0)
|
||||
@ -1,24 +1,25 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Demo implementation of a xesam server. Run it like
|
||||
Recoll implementation of a xesam server.
|
||||
Based on the example in the xesam-tools package by:
|
||||
Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
|
||||
|
||||
demo/xesam-dummy-service [-s|--state-messages]
|
||||
Run it like
|
||||
|
||||
xesam-recoll-service
|
||||
|
||||
And launch a search on it via
|
||||
|
||||
./xesam-tool search hello
|
||||
xesam-tool search hello
|
||||
|
||||
You can use the -s or --state-messages switch to enable StateChanged
|
||||
signal monitoring in xesam-tool as well as in xesam-dummy-service.
|
||||
"""
|
||||
|
||||
|
||||
# Sets up path to uninstalled xesam module
|
||||
import demo
|
||||
|
||||
import xesam
|
||||
import xesam.query
|
||||
from xesam.server import *
|
||||
import xesam.server
|
||||
import gobject
|
||||
import sys
|
||||
|
||||
@ -34,15 +35,16 @@ class RecollServer (xesam.server.Searcher):
|
||||
"""
|
||||
|
||||
def __init__ (self):
|
||||
h_fact = HandleFactory ()
|
||||
fact = ClientFactory (self, h_fact, RecollSession, RecollSearch)
|
||||
h_fact = xesam.server.HandleFactory ()
|
||||
fact = xesam.server.ClientFactory (self, h_fact,
|
||||
RecollSession, RecollSearch)
|
||||
xesam.server.Searcher.__init__ (self, h_fact, fact)
|
||||
self.set_echo_queries (True)
|
||||
self.rcldb = recoll.connect()
|
||||
|
||||
def start (self):
|
||||
# Export our selves via a SearchServerStub
|
||||
SearchServerStub(self).start()
|
||||
xesam.server.SearchServerStub(self).start()
|
||||
|
||||
def GetProperty (self, shandle, name):
|
||||
prop = xesam.server.Searcher.GetProperty(self, shandle, name)
|
||||
@ -54,33 +56,24 @@ class RecollServer (xesam.server.Searcher):
|
||||
xesam.debug ("Set property request for '%s=%s', on session '%s', returning %s" % (name, value, shandle,val))
|
||||
return val
|
||||
|
||||
class RecollSession (Session):
|
||||
class RecollSession (xesam.server.Session):
|
||||
"""
|
||||
|
||||
"""
|
||||
def __init__ (self, searcher, session_handle):
|
||||
Session.__init__ (self, searcher, session_handle)
|
||||
xesam.server.Session.__init__ (self, searcher, session_handle)
|
||||
self.set_property ("recoll.org", "xesam-recoll-service")
|
||||
|
||||
class RecollSearch (Search):
|
||||
class RecollSearch (xesam.server.Search):
|
||||
"""
|
||||
|
||||
"""
|
||||
# Translation from known xesam/whatever field names to Recoll Doc elements
|
||||
FLDTRANS = \
|
||||
{
|
||||
"xesam:title" : lambda doc : doc.title,
|
||||
"xesam:summary" : lambda doc : doc.abstract,
|
||||
"xesam:mimeType" : lambda doc : doc.mimetype,
|
||||
"xesam:contentModified" : lambda doc : \
|
||||
timestampToIso8601(doc.dmtime or doc.fmtime),
|
||||
"xesam:url" : lambda doc : doc.url
|
||||
}
|
||||
|
||||
SLICE = 10
|
||||
|
||||
def __init__ (self, searcher, session, search_handle, \
|
||||
query=None, xml=None) :
|
||||
Search.__init__ (self, searcher, session, search_handle, \
|
||||
xesam.server.Search.__init__ (self, searcher, session, search_handle, \
|
||||
query=query, xml=xml)
|
||||
|
||||
self._hit_fields = session.get_property (xesam.SESSION_HIT_FIELDS)
|
||||
@ -88,10 +81,7 @@ class RecollSearch (Search):
|
||||
xesam.error ("Got property hit.fields as None."
|
||||
" Setting default xesam:url")
|
||||
self._hit_fields = ["xesam:url"]
|
||||
print "RecollSearch: fields:", self._hit_fields
|
||||
# TOBEDONE: if fields includes "snippet" we need to generate
|
||||
# the synthetic abstract for each returned doc
|
||||
# Also relevancyRating, ContentCategory et SourceCategory
|
||||
xesam.debug("RecollSearch: fields:" % self._hit_fields)
|
||||
xesam.debug ("Created %s with handle %s and query:\n%s" %
|
||||
(self.__class__, self.get_handle(), self.get_query()))
|
||||
|
||||
@ -99,6 +89,21 @@ class RecollSearch (Search):
|
||||
if not isinstance(self.get_query(), xesam.query.UserQuery):
|
||||
raise Exception ("Only UserQuery supported ATM, sorry.")
|
||||
self.rclquery = self._searcher.rcldb.query()
|
||||
|
||||
# In the latest version (>0.95), primary/secondary is replaced by
|
||||
# a field list.
|
||||
sortfield = session.get_property(xesam.SESSION_SORT_PRIMARY)
|
||||
order = session.get_property(xesam.SESSION_SORT_ORDER)
|
||||
|
||||
# xesam-tool does not know how to set these for now, so let's
|
||||
# TEST here
|
||||
sortfield = "contentModified"
|
||||
order = "descending"
|
||||
xesam.debug("Session sort primary %s order %s" % (sortfield, order))
|
||||
# END TEST
|
||||
|
||||
if sortfield:
|
||||
self.rclquery.sortby(sortfield, order == "ascending" and 1 or 0)
|
||||
|
||||
def start (self):
|
||||
xesam.debug ("RecollSearch '%s' got [%s]" %
|
||||
@ -110,10 +115,16 @@ class RecollSearch (Search):
|
||||
doc = self.rclquery.fetchone()
|
||||
data = []
|
||||
for fld in self._hit_fields:
|
||||
if self.FLDTRANS.has_key (fld):
|
||||
data.append(self.FLDTRANS[fld](doc))
|
||||
# Need to handle ContentCategory and SourceCategory
|
||||
fld = fld.lower().replace("xesam:", "")
|
||||
xesam.debug("Adding data for fld %s" % (fld))
|
||||
if fld == "snippet":
|
||||
data.append(self._searcher.rcldb.makeDocAbstract(doc,
|
||||
self.rclquery))
|
||||
elif fld == "contentmodified":
|
||||
data.append(timestampToIso8601(getattr(doc, "mtime")))
|
||||
else:
|
||||
data.append("")
|
||||
data.append(getattr(doc, fld, ""))
|
||||
self.add_new_hit (self._hit_fields, data)
|
||||
hits += 1
|
||||
if hits >= self.SLICE:
|
||||
@ -135,7 +146,7 @@ class RecollSearch (Search):
|
||||
xesam.debug ("RecollSearch get_hits")
|
||||
|
||||
if self._stopped:
|
||||
return Search.get_hits(self, num_hits)
|
||||
return xesam.server.Search.get_hits(self, num_hits)
|
||||
|
||||
hits = 0
|
||||
done = 0;
|
||||
@ -163,7 +174,7 @@ class RecollSearch (Search):
|
||||
xesam.debug ("Search '%s' emitted 'done'" % self.get_handle())
|
||||
self.stop()
|
||||
|
||||
return Search.get_hits(self, num_hits)
|
||||
return xesam.server.Search.get_hits(self, num_hits)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: recollq.cpp,v 1.14 2008-09-08 16:49:10 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: recollq.cpp,v 1.15 2008-09-16 08:18:30 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -42,6 +42,33 @@ using namespace std;
|
||||
#include "internfile.h"
|
||||
#include "wipedir.h"
|
||||
|
||||
bool dump_contents(RclConfig *rclconfig, string& tmpdir, Rcl::Doc& doc)
|
||||
{
|
||||
string fn = doc.url.substr(7);
|
||||
struct stat st;
|
||||
if (stat(fn.c_str(), &st) != 0) {
|
||||
cout << "No such file: " << fn << endl;
|
||||
return true;
|
||||
}
|
||||
if (tmpdir.empty() || access(tmpdir.c_str(), 0) < 0) {
|
||||
string reason;
|
||||
if (!maketmpdir(tmpdir, reason)) {
|
||||
cerr << "Cannot create temporary directory: "
|
||||
<< reason << endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
wipedir(tmpdir);
|
||||
FileInterner interner(fn, &st, rclconfig, tmpdir, &doc.mimetype);
|
||||
if (interner.internfile(doc, doc.ipath)) {
|
||||
cout << doc.text << endl;
|
||||
} else {
|
||||
cout << "Cant intern: " << fn << endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static char *thisprog;
|
||||
static char usage [] =
|
||||
" [-o|-a|-f] <query string>\n"
|
||||
@ -60,6 +87,8 @@ static char usage [] =
|
||||
" -n <cnt> limit the maximum number of results (0->no limit, default 2000)\n"
|
||||
" -b : basic. Just output urls, no mime types or titles\n"
|
||||
" -m : dump the whole document meta[] array\n"
|
||||
" -S fld : sort by field name\n"
|
||||
" -D : sort descending\n"
|
||||
;
|
||||
static void
|
||||
Usage(void)
|
||||
@ -82,10 +111,15 @@ static int op_flags;
|
||||
#define OPT_q 0x200
|
||||
#define OPT_t 0x400
|
||||
#define OPT_m 0x800
|
||||
#define OPT_D 0x1000
|
||||
#define OPT_S 0x2000
|
||||
|
||||
|
||||
int recollq(RclConfig **cfp, int argc, char **argv)
|
||||
{
|
||||
string a_config;
|
||||
string sortfield;
|
||||
|
||||
int limit = 2000;
|
||||
thisprog = argv[0];
|
||||
argc--; argv++;
|
||||
@ -103,6 +137,7 @@ int recollq(RclConfig **cfp, int argc, char **argv)
|
||||
a_config = *(++argv);
|
||||
argc--; goto b1;
|
||||
case 'd': op_flags |= OPT_d; break;
|
||||
case 'D': op_flags |= OPT_D; break;
|
||||
case 'f': op_flags |= OPT_f; break;
|
||||
case 'l': op_flags |= OPT_l; break;
|
||||
case 'm': op_flags |= OPT_m; break;
|
||||
@ -112,6 +147,9 @@ int recollq(RclConfig **cfp, int argc, char **argv)
|
||||
argc--; goto b1;
|
||||
case 'o': op_flags |= OPT_o; break;
|
||||
case 'q': op_flags |= OPT_q; break;
|
||||
case 'S': op_flags |= OPT_S; if (argc < 2) Usage();
|
||||
sortfield = *(++argv);
|
||||
argc--; goto b1;
|
||||
case 't': op_flags |= OPT_t; break;
|
||||
default: Usage(); break;
|
||||
}
|
||||
@ -168,6 +206,10 @@ int recollq(RclConfig **cfp, int argc, char **argv)
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (op_flags & OPT_S) {
|
||||
sd->setSortBy(sortfield, (op_flags & OPT_D) ? false : true);
|
||||
}
|
||||
|
||||
RefCntr<Rcl::SearchData> rq(sd);
|
||||
Rcl::Query query(&rcldb);
|
||||
query.setQuery(rq, Rcl::Query::QO_STEM);
|
||||
@ -197,6 +239,7 @@ int recollq(RclConfig **cfp, int argc, char **argv)
|
||||
<< "[" << doc.url.c_str() << "]" << "\t"
|
||||
<< "[" << doc.meta[Rcl::Doc::keytt].c_str() << "]" << "\t"
|
||||
<< doc.fbytes.c_str() << "\tbytes" << "\t"
|
||||
<< doc.dmtime.c_str() << "\tSecs" << "\t"
|
||||
<< endl;
|
||||
if (op_flags & OPT_m) {
|
||||
for (map<string,string>::const_iterator it = doc.meta.begin();
|
||||
@ -204,32 +247,11 @@ int recollq(RclConfig **cfp, int argc, char **argv)
|
||||
cout << it->first << " = " << it->second << endl;
|
||||
}
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
if (op_flags & OPT_d) {
|
||||
string fn = doc.url.substr(7);
|
||||
struct stat st;
|
||||
if (stat(fn.c_str(), &st) != 0) {
|
||||
cout << "No such file: " << fn << endl;
|
||||
continue;
|
||||
}
|
||||
if (tmpdir.empty() || access(tmpdir.c_str(), 0) < 0) {
|
||||
string reason;
|
||||
if (!maketmpdir(tmpdir, reason)) {
|
||||
cerr << "Cannot create temporary directory: "
|
||||
<< reason << endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
wipedir(tmpdir);
|
||||
FileInterner interner(fn, &st, rclconfig, tmpdir, &doc.mimetype);
|
||||
if (interner.internfile(doc, doc.ipath)) {
|
||||
cout << doc.text << endl;
|
||||
} else {
|
||||
cout << "Cant intern: " << fn << endl;
|
||||
}
|
||||
}
|
||||
|
||||
dump_contents(rclconfig, tmpdir, doc);
|
||||
}
|
||||
}
|
||||
|
||||
// Maybe clean up temporary directory
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.144 2008-09-09 12:58:23 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.145 2008-09-16 08:18:30 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -129,12 +129,10 @@ bool Db::Native::subDocs(const string &udi, vector<Xapian::docid>& docids)
|
||||
return false;
|
||||
}
|
||||
|
||||
// Only ONE field name inside the index data record differs from the
|
||||
// Rcl::Doc ones: caption<->title, for a remnant of compatibility with
|
||||
// omega
|
||||
static const string keycap("caption");
|
||||
static const string keymtp("mtype");
|
||||
static const string keyfmt("fmtime");
|
||||
static const string keydmt("dmtime");
|
||||
static const string keyoc("origcharset");
|
||||
static const string keyurl("url");
|
||||
|
||||
// Turn data record from db into document fields
|
||||
bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
|
||||
@ -144,11 +142,11 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
|
||||
ConfSimple parms(&data);
|
||||
if (!parms.ok())
|
||||
return false;
|
||||
parms.get(keyurl, doc.url);
|
||||
parms.get(keymtp, doc.mimetype);
|
||||
parms.get(keyfmt, doc.fmtime);
|
||||
parms.get(keydmt, doc.dmtime);
|
||||
parms.get(keyoc, doc.origcharset);
|
||||
parms.get(Doc::keyurl, doc.url);
|
||||
parms.get(Doc::keytp, doc.mimetype);
|
||||
parms.get(Doc::keyfmt, doc.fmtime);
|
||||
parms.get(Doc::keydmt, doc.dmtime);
|
||||
parms.get(Doc::keyoc, doc.origcharset);
|
||||
parms.get(keycap, doc.meta[Doc::keytt]);
|
||||
parms.get(Doc::keykw, doc.meta[Doc::keykw]);
|
||||
parms.get(Doc::keyabs, doc.meta[Doc::keyabs]);
|
||||
@ -162,10 +160,10 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
|
||||
char buf[20];
|
||||
sprintf(buf,"%.2f", float(percent) / 100.0);
|
||||
doc.meta[Doc::keyrr] = buf;
|
||||
parms.get(string("ipath"), doc.ipath);
|
||||
parms.get(string("fbytes"), doc.fbytes);
|
||||
parms.get(string("dbytes"), doc.dbytes);
|
||||
parms.get(string("sig"), doc.sig);
|
||||
parms.get(Doc::keyipt, doc.ipath);
|
||||
parms.get(Doc::keyfs, doc.fbytes);
|
||||
parms.get(Doc::keyds, doc.dbytes);
|
||||
parms.get(Doc::keysig, doc.sig);
|
||||
doc.xdocid = docid;
|
||||
|
||||
// Other, not predefined meta fields:
|
||||
@ -691,24 +689,25 @@ bool Db::isopen()
|
||||
// indexed with no prefix (ie: abstract)
|
||||
bool Db::fieldToPrefix(const string& fldname, string &pfx)
|
||||
{
|
||||
// This is the default table
|
||||
// This is the default table. We prefer the data from rclconfig if
|
||||
// available
|
||||
static map<string, string> fldToPrefs;
|
||||
if (fldToPrefs.empty()) {
|
||||
fldToPrefs[Doc::keyabs] = string();
|
||||
fldToPrefs["ext"] = "XE";
|
||||
fldToPrefs["filename"] = "XSFN";
|
||||
fldToPrefs[Doc::keyfn] = "XSFN";
|
||||
|
||||
fldToPrefs["title"] = "S";
|
||||
fldToPrefs[keycap] = "S";
|
||||
fldToPrefs[Doc::keytt] = "S";
|
||||
fldToPrefs["subject"] = "S";
|
||||
|
||||
fldToPrefs[Doc::keyau] = "A";
|
||||
fldToPrefs["creator"] = "A";
|
||||
fldToPrefs["from"] = "A";
|
||||
|
||||
fldToPrefs[Doc::keykw] = "K";
|
||||
fldToPrefs["keyword"] = "K";
|
||||
fldToPrefs["tag"] = "K";
|
||||
fldToPrefs[Doc::keykw] = "K";
|
||||
fldToPrefs["tags"] = "K";
|
||||
}
|
||||
|
||||
@ -719,6 +718,7 @@ bool Db::fieldToPrefix(const string& fldname, string &pfx)
|
||||
if (config && config->getFieldPrefix(fld, pfx))
|
||||
return true;
|
||||
|
||||
// No data in rclconfig? Check default values
|
||||
map<string, string>::const_iterator it = fldToPrefs.find(fld);
|
||||
if (it != fldToPrefs.end()) {
|
||||
pfx = it->second;
|
||||
@ -816,9 +816,17 @@ void Db::setAbstractParams(int idxtrunc, int syntlen, int syntctxlen)
|
||||
m_synthAbsWordCtxLen = syntctxlen;
|
||||
}
|
||||
|
||||
static inline void leftzeropad(string& s, unsigned len)
|
||||
{
|
||||
if (s.length() && s.length() < len)
|
||||
s = s.insert(0, len-s.length(), '0');
|
||||
}
|
||||
|
||||
static const int MB = 1024 * 1024;
|
||||
static const string nc("\n\r\x0c");
|
||||
|
||||
#define RECORD_APPEND(R, NM, VAL) {R += NM + "=" + VAL + "\n";}
|
||||
|
||||
// Add document in internal form to the database: index the terms in
|
||||
// the title abstract and body and add special terms for file name,
|
||||
// date, mime type ... , create the document data record (more
|
||||
@ -958,39 +966,43 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
|
||||
// reasonable lengths and suppress newlines (so that the data
|
||||
// record can keep a simple syntax)
|
||||
|
||||
string record = "url=" + doc.url;
|
||||
record += "\nmtype=" + doc.mimetype;
|
||||
record += "\nfmtime=" + doc.fmtime;
|
||||
string record;
|
||||
RECORD_APPEND(record, Doc::keyurl, doc.url);
|
||||
RECORD_APPEND(record, Doc::keytp, doc.mimetype);
|
||||
// We left-zero-pad the times so that they are lexico-sortable
|
||||
leftzeropad(doc.fmtime, 11);
|
||||
RECORD_APPEND(record, Doc::keyfmt, doc.fmtime);
|
||||
if (!doc.dmtime.empty()) {
|
||||
record += "\ndmtime=" + doc.dmtime;
|
||||
leftzeropad(doc.dmtime, 11);
|
||||
RECORD_APPEND(record, Doc::keydmt, doc.dmtime);
|
||||
}
|
||||
record += "\norigcharset=" + doc.origcharset;
|
||||
RECORD_APPEND(record, Doc::keyoc, doc.origcharset);
|
||||
|
||||
if (!doc.fbytes.empty())
|
||||
record += string("\nfbytes=") + doc.fbytes;
|
||||
RECORD_APPEND(record, Doc::keyfs, doc.fbytes);
|
||||
// Note that we add the signature both as a value and in the data record
|
||||
if (!doc.sig.empty())
|
||||
record += string("\nsig=") + doc.sig;
|
||||
RECORD_APPEND(record, Doc::keysig, doc.sig);
|
||||
newdocument.add_value(VALUE_SIG, doc.sig);
|
||||
|
||||
char sizebuf[30];
|
||||
sprintf(sizebuf, "%u", (unsigned int)doc.text.length());
|
||||
record += string("\ndbytes=") + sizebuf;
|
||||
RECORD_APPEND(record, Doc::keyds, sizebuf);
|
||||
|
||||
if (!doc.ipath.empty())
|
||||
record += "\nipath=" + doc.ipath;
|
||||
RECORD_APPEND(record, Doc::keyipt, doc.ipath);
|
||||
|
||||
if (doc.meta[Doc::keytt].empty())
|
||||
doc.meta[Doc::keytt] = doc.utf8fn;
|
||||
doc.meta[Doc::keytt] =
|
||||
neutchars(truncate_to_word(doc.meta[Doc::keytt], 150), nc);
|
||||
if (!doc.meta[Doc::keytt].empty())
|
||||
record += "\n" + keycap + "=" + doc.meta[Doc::keytt];
|
||||
RECORD_APPEND(record, keycap, doc.meta[Doc::keytt]);
|
||||
|
||||
doc.meta[Doc::keykw] =
|
||||
neutchars(truncate_to_word(doc.meta[Doc::keykw], 300), nc);
|
||||
if (!doc.meta[Doc::keykw].empty())
|
||||
record += "\n" + Doc::keykw + "=" + doc.meta[Doc::keykw];
|
||||
RECORD_APPEND(record, Doc::keykw, doc.meta[Doc::keykw]);
|
||||
|
||||
// If abstract is empty, we make up one with the beginning of the
|
||||
// document. This is then not indexed, but part of the doc data so
|
||||
@ -1010,22 +1022,23 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
|
||||
nc);
|
||||
}
|
||||
if (!doc.meta[Doc::keyabs].empty())
|
||||
record += "\n" + Doc::keyabs + "=" + doc.meta[Doc::keyabs];
|
||||
RECORD_APPEND(record, Doc::keyabs, doc.meta[Doc::keyabs]);
|
||||
|
||||
RclConfig *config = RclConfig::getMainConfig();
|
||||
if (config) {
|
||||
const set<string>& stored = config->getStoredFields();
|
||||
for (set<string>::const_iterator it = stored.begin();
|
||||
it != stored.end(); it++) {
|
||||
string nm = stringtolower(config->fieldCanon(*it));
|
||||
if (!doc.meta[*it].empty()) {
|
||||
string value =
|
||||
neutchars(truncate_to_word(doc.meta[*it], 150), nc);
|
||||
record += "\n" + *it + "=" + value;
|
||||
RECORD_APPEND(record, nm, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
record += "\n";
|
||||
LOGDEB0(("Rcl::Db::add: new doc record:\n %s\n", record.c_str()));
|
||||
|
||||
LOGDEB0(("Rcl::Db::add: new doc record:\n%s\n", record.c_str()));
|
||||
newdocument.set_data(record);
|
||||
|
||||
const char *fnc = udi.c_str();
|
||||
@ -1105,21 +1118,6 @@ bool Db::needUpdate(const string &udi, const string& sig)
|
||||
|
||||
// Retrieve old file/doc signature from value
|
||||
string osig = doc.get_value(VALUE_SIG);
|
||||
#if 0
|
||||
// Get old sig from data record
|
||||
string data = doc.get_data();
|
||||
string::size_type i1, i2;
|
||||
i1 = data.find("sig=");
|
||||
if (i1 == string::npos)
|
||||
return true;
|
||||
i1 += 4;
|
||||
if (i1 >= data.length())
|
||||
return true;
|
||||
i2 = data.find_first_of("\n\r", i1);
|
||||
if (i2 == string::npos)
|
||||
return true;
|
||||
string osig = data.substr(i1, i2-i1);
|
||||
#endif
|
||||
LOGDEB2(("Db::needUpdate: oldsig [%s] new [%s]\n",
|
||||
osig.c_str(), sig.c_str()));
|
||||
// Compare new/old sig
|
||||
@ -1287,14 +1285,12 @@ bool Db::purgeFile(const string &udi)
|
||||
return false;
|
||||
}
|
||||
|
||||
// File name wild card expansion. This is a specialisation ot termMatch
|
||||
bool Db::filenameWildExp(const string& fnexp, list<string>& names)
|
||||
{
|
||||
// File name search, with possible wildcards.
|
||||
// We expand wildcards by scanning the filename terms (prefixed
|
||||
// with XSFN) from the database.
|
||||
// We build an OR query with the expanded values if any.
|
||||
string pattern;
|
||||
dumb_string(fnexp, pattern);
|
||||
names.clear();
|
||||
|
||||
// If pattern is not quoted, and has no wildcards, we add * at
|
||||
// each end: match any substring
|
||||
@ -1303,33 +1299,14 @@ bool Db::filenameWildExp(const string& fnexp, list<string>& names)
|
||||
} else if (pattern.find_first_of("*?[") == string::npos) {
|
||||
pattern = "*" + pattern + "*";
|
||||
} // else let it be
|
||||
LOGDEB(("Rcl::Db::filenameWildExp: pattern: [%s]\n", pattern.c_str()));
|
||||
|
||||
LOGDEB((" pattern: [%s]\n", pattern.c_str()));
|
||||
|
||||
// Match pattern against all file names in the db
|
||||
string ermsg;
|
||||
try {
|
||||
Xapian::TermIterator it = m_ndb->db.allterms_begin();
|
||||
it.skip_to("XSFN");
|
||||
for (;it != m_ndb->db.allterms_end(); it++) {
|
||||
if ((*it).find("XSFN") != 0)
|
||||
break;
|
||||
string fn = (*it).substr(4);
|
||||
LOGDEB2(("Matching [%s] and [%s]\n", pattern.c_str(), fn.c_str()));
|
||||
if (fnmatch(pattern.c_str(), fn.c_str(), 0) != FNM_NOMATCH) {
|
||||
names.push_back((*it).c_str());
|
||||
}
|
||||
// Limit the match count
|
||||
if (names.size() > 1000) {
|
||||
LOGERR(("Db::filenameWildExp: too many matched file names\n"));
|
||||
break;
|
||||
}
|
||||
}
|
||||
} XCATCHERROR(ermsg);
|
||||
if (!ermsg.empty()) {
|
||||
LOGERR(("filenameWildExp: xapian error: %s\n", ermsg.c_str()));
|
||||
list<TermMatchEntry> entries;
|
||||
if (!termMatch(ET_WILD, string(), pattern, entries, 1000, Doc::keyfn))
|
||||
return false;
|
||||
}
|
||||
for (list<TermMatchEntry>::const_iterator it = entries.begin();
|
||||
it != entries.end(); it++)
|
||||
names.push_back("XSFN"+it->term);
|
||||
|
||||
if (names.empty()) {
|
||||
// Build an impossible query: we know its impossible because we
|
||||
@ -1385,11 +1362,11 @@ const string regSpecChars = "(.[{";
|
||||
bool Db::termMatch(MatchType typ, const string &lang,
|
||||
const string &root,
|
||||
list<TermMatchEntry>& res,
|
||||
int max)
|
||||
int max,
|
||||
const string& field)
|
||||
{
|
||||
if (!m_ndb || !m_ndb->m_isopen)
|
||||
return false;
|
||||
|
||||
Xapian::Database db = m_ndb->m_iswritable ? m_ndb->wdb: m_ndb->db;
|
||||
|
||||
res.clear();
|
||||
@ -1399,6 +1376,11 @@ bool Db::termMatch(MatchType typ, const string &lang,
|
||||
dumb_string(root, droot);
|
||||
string nochars = typ == ET_WILD ? wildSpecChars : regSpecChars;
|
||||
|
||||
string prefix;
|
||||
if (!field.empty()) {
|
||||
(void)fieldToPrefix(field, prefix);
|
||||
}
|
||||
|
||||
if (typ == ET_STEM) {
|
||||
if (!stemExpand(lang, root, res, max))
|
||||
return false;
|
||||
@ -1429,33 +1411,43 @@ bool Db::termMatch(MatchType typ, const string &lang,
|
||||
string::size_type es = droot.find_first_of(nochars);
|
||||
string is;
|
||||
switch (es) {
|
||||
case string::npos: is = droot;break;
|
||||
case 0: break;
|
||||
default: is = droot.substr(0, es);break;
|
||||
case string::npos: is = prefix + droot; break;
|
||||
case 0: is = prefix; break;
|
||||
default: is = prefix + droot.substr(0, es); break;
|
||||
}
|
||||
LOGDEB(("termMatch: initsec: [%s]\n", is.c_str()));
|
||||
|
||||
Xapian::TermIterator it = db.allterms_begin();
|
||||
if (!is.empty())
|
||||
it.skip_to(is.c_str());
|
||||
for (int n = 0;it != db.allterms_end(); it++) {
|
||||
// If we're beyond the terms matching the initial string, end
|
||||
if (!is.empty() && (*it).find(is) != 0)
|
||||
break;
|
||||
// Don't match special internal terms beginning with uppercase ascii
|
||||
if ((*it).at(0) >= 'A' && (*it).at(0) <= 'Z')
|
||||
continue;
|
||||
if (typ == ET_WILD) {
|
||||
if (fnmatch(droot.c_str(), (*it).c_str(), 0) == FNM_NOMATCH)
|
||||
continue;
|
||||
} else {
|
||||
if (regexec(®, (*it).c_str(), 0, 0, 0))
|
||||
continue;
|
||||
string ermsg;
|
||||
try {
|
||||
Xapian::TermIterator it = db.allterms_begin();
|
||||
if (!is.empty())
|
||||
it.skip_to(is.c_str());
|
||||
for (int n = 0; it != db.allterms_end(); it++) {
|
||||
// If we're beyond the terms matching the initial string, end
|
||||
if (!is.empty() && (*it).find(is) != 0)
|
||||
break;
|
||||
string term;
|
||||
if (!prefix.empty())
|
||||
term = (*it).substr(prefix.length());
|
||||
else
|
||||
term = *it;
|
||||
if (typ == ET_WILD) {
|
||||
if (fnmatch(droot.c_str(), term.c_str(), 0) == FNM_NOMATCH)
|
||||
continue;
|
||||
} else {
|
||||
if (regexec(®, term.c_str(), 0, 0, 0))
|
||||
continue;
|
||||
}
|
||||
// Do we want stem expansion here? We don't do it for now
|
||||
res.push_back(TermMatchEntry(term, it.get_termfreq()));
|
||||
++n;
|
||||
}
|
||||
// Do we want stem expansion here? We don't do it for now
|
||||
res.push_back(TermMatchEntry(*it, it.get_termfreq()));
|
||||
++n;
|
||||
} XCATCHERROR(ermsg);
|
||||
if (!ermsg.empty()) {
|
||||
LOGERR(("termMatch: %s\n", ermsg.c_str()));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (typ == ET_REGEXP) {
|
||||
regfree(®);
|
||||
}
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _DB_H_INCLUDED_
|
||||
#define _DB_H_INCLUDED_
|
||||
/* @(#$Id: rcldb.h,v 1.61 2008-08-26 07:38:29 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: rcldb.h,v 1.62 2008-09-16 08:18:30 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
@ -156,9 +156,11 @@ class Db {
|
||||
* Stem expansion is performed if lang is not empty */
|
||||
enum MatchType {ET_WILD, ET_REGEXP, ET_STEM};
|
||||
bool termMatch(MatchType typ, const string &lang, const string &s,
|
||||
list<TermMatchEntry>& result, int max = -1);
|
||||
list<TermMatchEntry>& result, int max = -1,
|
||||
const string& field = "");
|
||||
|
||||
/** Specific filename wildcard expansion */
|
||||
/** Special filename wildcard to XSFN terms expansion.
|
||||
internal/searchdata use only */
|
||||
bool filenameWildExp(const string& exp, list<string>& names);
|
||||
|
||||
/** Set parameters for synthetic abstract generation */
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
#include "xapian.h"
|
||||
|
||||
namespace Rcl {
|
||||
/* @(#$Id: rcldb_p.h,v 1.4 2008-09-05 10:34:17 dockes Exp $ (C) 2007 J.F.Dockes */
|
||||
/* @(#$Id: rcldb_p.h,v 1.5 2008-09-16 08:18:30 dockes Exp $ (C) 2007 J.F.Dockes */
|
||||
|
||||
// Generic Xapian exception catching code. We do this quite often,
|
||||
// and I have no idea how to do this except for a macro
|
||||
@ -70,5 +70,14 @@ class Db::Native {
|
||||
bool subDocs(const string &udi, vector<Xapian::docid>& docids);
|
||||
|
||||
};
|
||||
|
||||
// Field names inside the index data record may differ from the rcldoc ones
|
||||
// (esp.: caption / title)
|
||||
inline const string& docfToDatf(const string& df)
|
||||
{
|
||||
static const string keycap("caption");
|
||||
return df.compare(Doc::keytt) ? df : keycap;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* _rcldb_p_h_included_ */
|
||||
|
||||
@ -1,14 +1,25 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rcldoc.cpp,v 1.1 2008-09-08 16:49:10 dockes Exp $ (C) 2007 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rcldoc.cpp,v 1.2 2008-09-16 08:18:30 dockes Exp $ (C) 2007 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
|
||||
#include "rcldoc.h"
|
||||
namespace Rcl {
|
||||
const string Doc::keyabs("abstract");
|
||||
const string Doc::keyau("author");
|
||||
const string Doc::keyfn("filename");
|
||||
const string Doc::keykw("keywords");
|
||||
const string Doc::keyrr("relevancyrating");
|
||||
const string Doc::keytt("title");
|
||||
const string Doc::keyurl("url");
|
||||
const string Doc::keyfn("filename");
|
||||
const string Doc::keyipt("ipath");
|
||||
const string Doc::keytp("mtype");
|
||||
const string Doc::keyfmt("fmtime");
|
||||
const string Doc::keydmt("dmtime");
|
||||
const string Doc::keymt("mtime");
|
||||
const string Doc::keyoc("origcharset");
|
||||
const string Doc::keyfs("fbytes");
|
||||
const string Doc::keyds("dbytes");
|
||||
const string Doc::keysz("size");
|
||||
const string Doc::keysig("sig");
|
||||
const string Doc::keyrr("relevancyrating");
|
||||
const string Doc::keyabs("abstract");
|
||||
const string Doc::keyau("author");
|
||||
const string Doc::keytt("title");
|
||||
const string Doc::keykw("keywords");
|
||||
}
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _RCLDOC_H_INCLUDED_
|
||||
#define _RCLDOC_H_INCLUDED_
|
||||
/* @(#$Id: rcldoc.h,v 1.9 2008-09-08 16:49:10 dockes Exp $ (C) 2006 J.F.Dockes */
|
||||
/* @(#$Id: rcldoc.h,v 1.10 2008-09-16 08:18:30 dockes Exp $ (C) 2006 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
@ -44,9 +44,9 @@ class Doc {
|
||||
// can be accessed after a query without fetching the actual document).
|
||||
// We indicate the routine that sets them up during indexing
|
||||
|
||||
// This is just "file://" + binary filename. No transcoding: this
|
||||
// is used to access files
|
||||
// Index: computed from fn by Db::add caller. Query: from doc data.
|
||||
// This is just "file://" + binary or url-encoded filename. No
|
||||
// transcoding: this is used to access files Index: computed from
|
||||
// fn by Db::add caller. Query: from doc data.
|
||||
string url;
|
||||
|
||||
// Transcoded version of the simple file name for SFN-prefixed
|
||||
@ -134,12 +134,29 @@ class Doc {
|
||||
pc = 0;
|
||||
xdocid = 0;
|
||||
}
|
||||
static const string keyfn;
|
||||
static const string keyrr;
|
||||
static const string keyabs;
|
||||
static const string keyau;
|
||||
static const string keytt;
|
||||
static const string keykw;
|
||||
|
||||
// The official names for recoll native fields when used in a text
|
||||
// context (ie: the python interface duplicates some of the fixed
|
||||
// fields in the meta array, these are the names used). Defined in
|
||||
// rcldoc.cpp. For fields stored in the meta[] array (ie, title,
|
||||
// author), filters _must_ use these values
|
||||
static const string keyurl; // url
|
||||
static const string keyfn; // file name
|
||||
static const string keyipt; // ipath
|
||||
static const string keytp; // mime type
|
||||
static const string keyfmt; // file mtime
|
||||
static const string keydmt; // document mtime
|
||||
static const string keymt; // mtime dmtime if set else fmtime
|
||||
static const string keyoc; // original charset
|
||||
static const string keyfs; // file size
|
||||
static const string keyds; // document size
|
||||
static const string keysz; // dbytes if set else fbytes
|
||||
static const string keysig; // sig
|
||||
static const string keyrr; // relevancy rating
|
||||
static const string keyabs; // abstract
|
||||
static const string keyau; // author
|
||||
static const string keytt; // title
|
||||
static const string keykw; // keywords
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rclquery.cpp,v 1.5 2008-09-05 11:45:16 dockes Exp $ (C) 2008 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rclquery.cpp,v 1.6 2008-09-16 08:18:30 dockes Exp $ (C) 2008 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
@ -8,6 +8,8 @@ static char rcsid[] = "@(#$Id: rclquery.cpp,v 1.5 2008-09-05 11:45:16 dockes Exp
|
||||
#include <list>
|
||||
#include <vector>
|
||||
|
||||
#include "xapian/sorter.h"
|
||||
|
||||
#include "rcldb.h"
|
||||
#include "rcldb_p.h"
|
||||
#include "rclquery.h"
|
||||
@ -20,6 +22,8 @@ static char rcsid[] = "@(#$Id: rclquery.cpp,v 1.5 2008-09-05 11:45:16 dockes Exp
|
||||
#ifndef NO_NAMESPACES
|
||||
namespace Rcl {
|
||||
#endif
|
||||
|
||||
|
||||
class FilterMatcher : public Xapian::MatchDecider {
|
||||
public:
|
||||
FilterMatcher(const string &topdir)
|
||||
@ -41,7 +45,7 @@ public:
|
||||
|
||||
// The only filtering for now is on file path (subtree)
|
||||
string url;
|
||||
parms.get(string("url"), url);
|
||||
parms.get(Doc::keyurl, url);
|
||||
LOGDEB2(("FilterMatcher topdir [%s] url [%s]\n",
|
||||
m_topdir.c_str(), url.c_str()));
|
||||
if (url.find(m_topdir, 7) == 7) {
|
||||
@ -55,14 +59,46 @@ private:
|
||||
string m_topdir;
|
||||
};
|
||||
|
||||
// Sort helper class
|
||||
class QSorter : public Xapian::Sorter {
|
||||
public:
|
||||
QSorter(const string& f) : m_fld(docfToDatf(f) + "=") {}
|
||||
|
||||
virtual std::string operator()(const Xapian::Document& xdoc) const {
|
||||
string data = xdoc.get_data();
|
||||
|
||||
// It would be simpler to do the record->Rcl::Doc thing, but
|
||||
// hand-doing this will be faster. It makes more assumptions
|
||||
// about the format than a ConfTree though:
|
||||
string::size_type i1, i2;
|
||||
i1 = data.find(m_fld);
|
||||
if (i1 == string::npos)
|
||||
return string();
|
||||
i1 += m_fld.length();
|
||||
if (i1 >= data.length())
|
||||
return string();
|
||||
i2 = data.find_first_of("\n\r", i1);
|
||||
if (i2 == string::npos)
|
||||
return string();
|
||||
return data.substr(i1, i2-i1);
|
||||
}
|
||||
|
||||
private:
|
||||
string m_fld;
|
||||
};
|
||||
|
||||
Query::Query(Db *db)
|
||||
: m_nq(new Native(this)), m_db(db)
|
||||
: m_nq(new Native(this)), m_db(db), m_sorter(0)
|
||||
{
|
||||
}
|
||||
|
||||
Query::~Query()
|
||||
{
|
||||
deleteZ(m_nq);
|
||||
if (m_sorter) {
|
||||
delete (QSorter*)m_sorter;
|
||||
m_sorter = 0;
|
||||
}
|
||||
}
|
||||
|
||||
string Query::getReason() const
|
||||
@ -75,6 +111,7 @@ Db *Query::whatDb()
|
||||
return m_db;
|
||||
}
|
||||
|
||||
|
||||
//#define ISNULL(X) (X).isNull()
|
||||
#define ISNULL(X) !(X)
|
||||
|
||||
@ -114,6 +151,17 @@ bool Query::setQuery(RefCntr<SearchData> sdata, int opts,
|
||||
try {
|
||||
m_nq->enquire = new Xapian::Enquire(m_db->m_ndb->db);
|
||||
m_nq->enquire->set_query(m_nq->query);
|
||||
if (!sdata->getSortBy().empty()) {
|
||||
if (m_sorter) {
|
||||
delete (QSorter*)m_sorter;
|
||||
m_sorter = 0;
|
||||
}
|
||||
m_sorter = new QSorter(sdata->getSortBy());
|
||||
// It really seems there is a xapian bug about sort order, we
|
||||
// invert here.
|
||||
m_nq->enquire->set_sort_by_key((QSorter*)m_sorter,
|
||||
!sdata->getSortAscending());
|
||||
}
|
||||
m_nq->mset = Xapian::MSet();
|
||||
// Get the query description and trim the "Xapian::Query"
|
||||
d = m_nq->query.get_description();
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#ifndef _rclquery_h_included_
|
||||
#define _rclquery_h_included_
|
||||
/* @(#$Id: rclquery.h,v 1.2 2008-07-01 08:31:08 dockes Exp $ (C) 2008 J.F.Dockes */
|
||||
/* @(#$Id: rclquery.h,v 1.3 2008-09-16 08:18:30 dockes Exp $ (C) 2008 J.F.Dockes */
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@ -87,6 +87,7 @@ private:
|
||||
string m_filterTopDir; // Current query filter on subtree top directory
|
||||
string m_reason; // Error explanation
|
||||
Db *m_db;
|
||||
void *m_sorter;
|
||||
unsigned int m_qOpts;
|
||||
/* Copyconst and assignement private and forbidden */
|
||||
Query(const Query &) {}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.22 2008-08-28 15:42:43 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.23 2008-09-16 08:18:30 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -153,6 +153,14 @@ void SearchData::erase() {
|
||||
m_reason.erase();
|
||||
}
|
||||
|
||||
void SearchData::setSortBy(const string& fld, bool ascending) {
|
||||
RclConfig *cfg = RclConfig::getMainConfig();
|
||||
m_sortField = cfg->fieldCanon(stringtolower(fld));
|
||||
m_sortAscending = ascending;
|
||||
LOGDEB0(("SearchData::setSortBy: [%s] %s\n", m_sortField.c_str(),
|
||||
m_sortAscending ? "ascending" : "descending"));
|
||||
}
|
||||
|
||||
// Am I a file name only search ? This is to turn off term highlighting
|
||||
bool SearchData::fileNameOnly()
|
||||
{
|
||||
@ -572,9 +580,9 @@ bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p,
|
||||
list<string> names;
|
||||
for (list<string>::iterator it = patterns.begin();
|
||||
it != patterns.end(); it++) {
|
||||
// This relies on filenameWildExp not resetting and always
|
||||
// adding to the input
|
||||
db.filenameWildExp(*it, names);
|
||||
list<string> more;
|
||||
db.filenameWildExp(*it, more);
|
||||
names.splice(names.end(), more);
|
||||
}
|
||||
// Build a query out of the matching file name terms.
|
||||
*qp = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _SEARCHDATA_H_INCLUDED_
|
||||
#define _SEARCHDATA_H_INCLUDED_
|
||||
/* @(#$Id: searchdata.h,v 1.17 2008-09-08 15:47:44 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: searchdata.h,v 1.18 2008-09-16 08:18:30 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
/**
|
||||
* Structures to hold data coming almost directly from the gui
|
||||
@ -110,11 +110,17 @@ public:
|
||||
/** Add file type for filtering results */
|
||||
void addFiletype(const string& ft) {m_filetypes.push_back(ft);}
|
||||
|
||||
/** Choose sort order. Should this be in RclQuery instead ? */
|
||||
void setSortBy(const string& fld, bool ascending = true);
|
||||
const string& getSortBy() const {return m_sortField;}
|
||||
bool getSortAscending() const {return m_sortAscending;}
|
||||
private:
|
||||
SClType m_tp; // Only SCLT_AND or SCLT_OR here
|
||||
vector<SearchDataClause *> m_query;
|
||||
vector<string> m_filetypes; // Restrict to filetypes if set.
|
||||
string m_topdir; // Restrict to subtree.
|
||||
SClType m_tp; // Only SCLT_AND or SCLT_OR here
|
||||
vector<SearchDataClause*> m_query;
|
||||
vector<string> m_filetypes; // Restrict to filetypes if set.
|
||||
string m_topdir; // Restrict to subtree.
|
||||
string m_sortField;
|
||||
bool m_sortAscending;
|
||||
// Printable expanded version of the complete query, retrieved/set
|
||||
// from rcldb after the Xapian::setQuery() call
|
||||
string m_description;
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# @(#$Id: fields,v 1.2 2008-09-15 08:03:37 dockes Exp $ (C) 2007 J.F.Dockes
|
||||
# @(#$Id: fields,v 1.3 2008-09-16 08:18:30 dockes Exp $ (C) 2007 J.F.Dockes
|
||||
# Field names configuration. This defines how one may search ie for
|
||||
# author:Hemingway
|
||||
# Important:
|
||||
@ -43,13 +43,18 @@ stored = author
|
||||
##########################
|
||||
# This section defines field names aliases or synonyms. Any right hand side
|
||||
# value will be turned into the lhs canonic name before further treatment
|
||||
# Left-hand values must match names in the prefixes section or
|
||||
# data-record fields.
|
||||
# Note to filter writers: only canonic names should be used when indexing.
|
||||
#
|
||||
# The left-hand values in the recoll distribution file are well known and
|
||||
# must match names used in the c++ code, or even the index data
|
||||
# record. They can't change! But you can add others.
|
||||
#
|
||||
# Filters should only add canonic names to the meta array when indexing,
|
||||
# not aliases.
|
||||
|
||||
[aliases]
|
||||
abstract = summary dc:summary description xesam:description
|
||||
author = creator dc:creator xesam:author xesam:creator
|
||||
caption = title dc:title subject
|
||||
title = title dc:title subject
|
||||
# catg = dc:type contentCategory
|
||||
dbytes = size xesam:size
|
||||
dmtime = date dc:date dc:datemodified datemodified contentmodified \
|
||||
@ -64,5 +69,6 @@ url = dc:identifier xesam:url
|
||||
#########################
|
||||
# This section defines a hierarchy for field names. Searching for a lhs
|
||||
# ancestor will be expanded to a search for itself and all rhs descendants
|
||||
# This is not used for now
|
||||
[specialisations]
|
||||
author = from
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user