separated rcldb and rclquery
This commit is contained in:
parent
e5e8249ad3
commit
0e7a78d688
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: kio_recoll.cpp,v 1.7 2007-11-09 15:46:17 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: kio_recoll.cpp,v 1.8 2008-06-13 18:22:46 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
@ -109,18 +109,20 @@ void RecollProtocol::get(const KURL & url)
|
||||
RefCntr<Rcl::SearchData> sdata(new Rcl::SearchData(Rcl::SCLT_OR));
|
||||
sdata->addClause(new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND,
|
||||
(const char *)u8));
|
||||
|
||||
if (!m_rcldb->setQuery(sdata, Rcl::Db::QO_STEM, "english")) {
|
||||
Rcl::Query *query = new Rcl::Query(m_rcldb);
|
||||
if (!query->setQuery(sdata, Rcl::Db::QO_STEM, "english")) {
|
||||
m_reason = "Internal Error: setQuery failed";
|
||||
outputError(m_reason.c_str());
|
||||
finished();
|
||||
delete query;
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_docsource)
|
||||
delete m_docsource;
|
||||
|
||||
m_docsource = new DocSequenceDb(m_rcldb, "Query results", sdata);
|
||||
m_docsource = new DocSequenceDb(RefCntr<Rcl::Query>(query),
|
||||
"Query results", sdata);
|
||||
|
||||
QByteArray output;
|
||||
QTextStream os(output, IO_WriteOnly );
|
||||
|
||||
@ -8,8 +8,8 @@ LIBS = librcl.a
|
||||
|
||||
all: $(LIBS)
|
||||
|
||||
OBJS = rclaspell.o rclconfig.o rclinit.o textsplit.o unacpp.o csguess.o indexer.o mimetype.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_html.o mh_mail.o mh_mbox.o mh_text.o docseq.o docseqdb.o docseqhist.o history.o recollq.o sortseq.o wasastringtoquery.o wasatorcl.o pathhash.o rcldb.o searchdata.o stemdb.o stoplist.o base64.o conftree.o copyfile.o debuglog.o execmd.o fstreewalk.o idfile.o md5.o mimeparse.o pathut.o readfile.o smallut.o transcode.o wipedir.o x11mon.o
|
||||
DEPS = rclaspell.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp csguess.dep.stamp indexer.dep.stamp mimetype.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp history.dep.stamp recollq.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp pathhash.dep.stamp rcldb.dep.stamp searchdata.dep.stamp stemdb.dep.stamp stoplist.dep.stamp base64.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp md5.dep.stamp mimeparse.dep.stamp pathut.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp
|
||||
OBJS = rclaspell.o rclconfig.o rclinit.o textsplit.o unacpp.o csguess.o indexer.o mimetype.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_html.o mh_mail.o mh_mbox.o mh_text.o docseq.o docseqdb.o docseqhist.o history.o recollq.o sortseq.o wasastringtoquery.o wasatorcl.o pathhash.o rcldb.o rclquery.o searchdata.o stemdb.o stoplist.o base64.o conftree.o copyfile.o debuglog.o execmd.o fstreewalk.o idfile.o md5.o mimeparse.o pathut.o readfile.o smallut.o transcode.o wipedir.o x11mon.o
|
||||
DEPS = rclaspell.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp csguess.dep.stamp indexer.dep.stamp mimetype.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp history.dep.stamp recollq.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp pathhash.dep.stamp rcldb.dep.stamp rclquery.dep.stamp searchdata.dep.stamp stemdb.dep.stamp stoplist.dep.stamp base64.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp md5.dep.stamp mimeparse.dep.stamp pathut.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp
|
||||
|
||||
librcl.a : $(DEPS) $(OBJS) unac.o
|
||||
ar ru librcl.a $(OBJS) unac.o
|
||||
@ -71,6 +71,8 @@ pathhash.o : ../rcldb/pathhash.cpp
|
||||
$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/pathhash.cpp
|
||||
rcldb.o : ../rcldb/rcldb.cpp
|
||||
$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/rcldb.cpp
|
||||
rclquery.o : ../rcldb/rclquery.cpp
|
||||
$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/rclquery.cpp
|
||||
searchdata.o : ../rcldb/searchdata.cpp
|
||||
$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/searchdata.cpp
|
||||
stemdb.o : ../rcldb/stemdb.cpp
|
||||
@ -194,6 +196,9 @@ pathhash.dep.stamp : ../rcldb/pathhash.cpp
|
||||
rcldb.dep.stamp : ../rcldb/rcldb.cpp
|
||||
$(CXX) -M $(ALL_CXXFLAGS) ../rcldb/rcldb.cpp > rcldb.dep
|
||||
touch rcldb.dep.stamp
|
||||
rclquery.dep.stamp : ../rcldb/rclquery.cpp
|
||||
$(CXX) -M $(ALL_CXXFLAGS) ../rcldb/rclquery.cpp > rclquery.dep
|
||||
touch rclquery.dep.stamp
|
||||
searchdata.dep.stamp : ../rcldb/searchdata.cpp
|
||||
$(CXX) -M $(ALL_CXXFLAGS) ../rcldb/searchdata.cpp > searchdata.dep
|
||||
touch searchdata.dep.stamp
|
||||
@ -275,6 +280,7 @@ include wasastringtoquery.dep
|
||||
include wasatorcl.dep
|
||||
include pathhash.dep
|
||||
include rcldb.dep
|
||||
include rclquery.dep
|
||||
include searchdata.dep
|
||||
include stemdb.dep
|
||||
include stoplist.dep
|
||||
|
||||
@ -31,6 +31,7 @@ ${depth}/query/wasastringtoquery.cpp \
|
||||
${depth}/query/wasatorcl.cpp \
|
||||
${depth}/rcldb/pathhash.cpp \
|
||||
${depth}/rcldb/rcldb.cpp \
|
||||
${depth}/rcldb/rclquery.cpp \
|
||||
${depth}/rcldb/searchdata.cpp \
|
||||
${depth}/rcldb/stemdb.cpp \
|
||||
${depth}/rcldb/stoplist.cpp \
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: pyrecoll.cpp,v 1.2 2008-05-27 10:45:59 dockes Exp $ (C) 2007 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: pyrecoll.cpp,v 1.3 2008-06-13 18:22:46 dockes Exp $ (C) 2007 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <Python.h>
|
||||
@ -11,6 +11,7 @@ using namespace std;
|
||||
#include "rclinit.h"
|
||||
#include "rclconfig.h"
|
||||
#include "rcldb.h"
|
||||
#include "rclquery.h"
|
||||
#include "pathut.h"
|
||||
#include "wasastringtoquery.h"
|
||||
#include "wasatorcl.h"
|
||||
@ -31,7 +32,7 @@ recollq_question(PyObject *self, PyObject *args)
|
||||
string reason;
|
||||
string dbdir = config->getDbDir();
|
||||
rcldb.open(dbdir, config->getStopfile(),
|
||||
Rcl::Db::DbRO, Rcl::Db::QO_STEM);
|
||||
Rcl::Db::DbRO);
|
||||
|
||||
Rcl::SearchData *sd = wasaStringToRcl(qs, reason);
|
||||
if (!sd) {
|
||||
@ -40,8 +41,9 @@ recollq_question(PyObject *self, PyObject *args)
|
||||
}
|
||||
|
||||
RefCntr<Rcl::SearchData> rq(sd);
|
||||
rcldb.setQuery(rq, Rcl::Db::QO_STEM);
|
||||
int cnt = rcldb.getResCnt();
|
||||
RefCntr<Rcl::Query> query(new Rcl::Query(&rcldb));
|
||||
query->setQuery(rq, Rcl::Query::QO_STEM);
|
||||
int cnt = query->getResCnt();
|
||||
cout << "Recoll query: " << rq->getDescription() << endl;
|
||||
if (cnt <= limit)
|
||||
cout << cnt << " results" << endl;
|
||||
@ -51,7 +53,7 @@ recollq_question(PyObject *self, PyObject *args)
|
||||
for (int i = 0; i < limit; i++) {
|
||||
int pc;
|
||||
Rcl::Doc doc;
|
||||
if (!rcldb.getDoc(i, doc, &pc))
|
||||
if (!query->getDoc(i, doc, &pc))
|
||||
break;
|
||||
char cpc[20];
|
||||
sprintf(cpc, "%d", pc);
|
||||
|
||||
@ -3,8 +3,6 @@ from distutils.core import setup, Extension
|
||||
module1 = Extension('recollq',
|
||||
define_macros = [('MAJOR_VERSION', '1'),
|
||||
('MINOR_VERSION', '0'),
|
||||
('HAVE_MKDTEMP', '1'),
|
||||
('HAVE_VASPRINTF', '1'),
|
||||
('UNAC_VERSION', '"1.0.7"'),
|
||||
('STATFS_INCLUDE', '"sys/mount.h"'),
|
||||
('RECOLL_DATADIR',
|
||||
@ -27,6 +25,7 @@ module1 = Extension('recollq',
|
||||
'../query/wasastringtoquery.cpp',
|
||||
'../query/wasatorcl.cpp',
|
||||
'../rcldb/rcldb.cpp',
|
||||
'../rcldb/rclquery.cpp',
|
||||
'../rcldb/searchdata.cpp',
|
||||
'../rcldb/stemdb.cpp',
|
||||
'../rcldb/pathhash.cpp',
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: main.cpp,v 1.66 2008-02-19 08:02:20 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: main.cpp,v 1.67 2008-06-13 18:22:46 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -100,9 +100,6 @@ bool maybeOpenDb(string &reason, bool force)
|
||||
return false;
|
||||
}
|
||||
|
||||
int qopts = Rcl::Db::QO_NONE;
|
||||
if (prefs.queryStemLang.length() > 0)
|
||||
qopts |= Rcl::Db::QO_STEM;
|
||||
if (force)
|
||||
rcldb->close();
|
||||
rcldb->rmQueryDb("");
|
||||
@ -112,7 +109,7 @@ bool maybeOpenDb(string &reason, bool force)
|
||||
rcldb->addQueryDb(*it);
|
||||
}
|
||||
if (!rcldb->isopen() && !rcldb->open(dbdir, rclconfig->getStopfile(),
|
||||
Rcl::Db::DbRO, qopts)) {
|
||||
Rcl::Db::DbRO)) {
|
||||
reason = "Could not open database in " +
|
||||
dbdir + " wait for indexing to complete?";
|
||||
return false;
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rclmain_w.cpp,v 1.48 2008-02-19 08:02:01 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rclmain_w.cpp,v 1.49 2008-06-13 18:22:46 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -440,23 +440,25 @@ void RclMain::startSearch(RefCntr<Rcl::SearchData> sdata)
|
||||
|
||||
int qopts = 0;
|
||||
if (!prefs.queryStemLang.length() == 0)
|
||||
qopts |= Rcl::Db::QO_STEM;
|
||||
qopts |= Rcl::Query::QO_STEM;
|
||||
QApplication::setOverrideCursor(QCursor(Qt::WaitCursor));
|
||||
|
||||
string stemLang = (const char *)prefs.queryStemLang.ascii();
|
||||
if (stemLang == "ALL") {
|
||||
rclconfig->getConfParam("indexstemminglanguages", stemLang);
|
||||
}
|
||||
Rcl::Query *query = new Rcl::Query(rcldb);
|
||||
|
||||
if (!rcldb->setQuery(sdata, qopts, stemLang)) {
|
||||
if (!query || !query->setQuery(sdata, qopts, stemLang)) {
|
||||
QMessageBox::warning(0, "Recoll", tr("Cant start query: ") +
|
||||
QString::fromAscii(rcldb->getReason().c_str()));
|
||||
QString::fromAscii(query->getReason().c_str()));
|
||||
QApplication::restoreOverrideCursor();
|
||||
return;
|
||||
}
|
||||
curPreview = 0;
|
||||
DocSequenceDb *src =
|
||||
new DocSequenceDb(rcldb, string(tr("Query results").utf8()), sdata);
|
||||
new DocSequenceDb(RefCntr<Rcl::Query>(query),
|
||||
string(tr("Query results").utf8()), sdata);
|
||||
m_docSource = RefCntr<DocSequence>(src);
|
||||
m_searchData = sdata;
|
||||
setDocSequence();
|
||||
@ -921,7 +923,8 @@ void RclMain::docExpand(int docnum)
|
||||
if (!resList->getDoc(docnum, doc))
|
||||
return;
|
||||
list<string> terms;
|
||||
terms = rcldb->expand(doc);
|
||||
if (!m_docSource.isNull())
|
||||
terms = m_docSource->expand(doc);
|
||||
if (terms.empty())
|
||||
return;
|
||||
// Do we keep the original query. I think we'd better not.
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _DOCSEQ_H_INCLUDED_
|
||||
#define _DOCSEQ_H_INCLUDED_
|
||||
/* @(#$Id: docseq.h,v 1.12 2007-06-19 08:36:24 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: docseq.h,v 1.13 2008-06-13 18:22:46 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
#include <string>
|
||||
#include <list>
|
||||
#include <vector>
|
||||
@ -89,7 +89,7 @@ class DocSequence {
|
||||
vector<int>& gslks) const {
|
||||
terms.clear(); groups.clear(); gslks.clear(); return true;
|
||||
}
|
||||
|
||||
virtual list<string> expand(Rcl::Doc &) {list<string> e; return e;}
|
||||
private:
|
||||
string m_title;
|
||||
};
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: docseqdb.cpp,v 1.3 2007-06-19 08:36:24 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: docseqdb.cpp,v 1.4 2008-06-13 18:22:46 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -23,28 +23,53 @@ static char rcsid[] = "@(#$Id: docseqdb.cpp,v 1.3 2007-06-19 08:36:24 dockes Exp
|
||||
#include "docseqdb.h"
|
||||
#include "rcldb.h"
|
||||
|
||||
DocSequenceDb::DocSequenceDb(RefCntr<Rcl::Query> q, const string &t,
|
||||
RefCntr<Rcl::SearchData> sdata)
|
||||
: DocSequence(t), m_q(q), m_sdata(sdata), m_rescnt(-1)
|
||||
{
|
||||
}
|
||||
|
||||
DocSequenceDb::~DocSequenceDb()
|
||||
{
|
||||
}
|
||||
|
||||
bool DocSequenceDb::getTerms(vector<string>& terms,
|
||||
vector<vector<string> >& groups,
|
||||
vector<int>& gslks) const
|
||||
{
|
||||
return m_sdata.getptr()->getTerms(terms, groups, gslks);
|
||||
}
|
||||
|
||||
string DocSequenceDb::getDescription()
|
||||
{
|
||||
return m_sdata->getDescription();
|
||||
}
|
||||
|
||||
bool DocSequenceDb::getDoc(int num, Rcl::Doc &doc, int *percent, string *sh)
|
||||
{
|
||||
if (sh) sh->erase();
|
||||
return m_db ? m_db->getDoc(num, doc, percent) : false;
|
||||
return m_q->getDoc(num, doc, percent);
|
||||
}
|
||||
|
||||
int DocSequenceDb::getResCnt()
|
||||
{
|
||||
if (!m_db)
|
||||
return -1;
|
||||
if (m_rescnt < 0) {
|
||||
m_rescnt= m_db->getResCnt();
|
||||
m_rescnt= m_q->getResCnt();
|
||||
}
|
||||
return m_rescnt;
|
||||
}
|
||||
|
||||
string DocSequenceDb::getAbstract(Rcl::Doc &doc)
|
||||
{
|
||||
if (!m_db)
|
||||
if (!m_q->whatDb())
|
||||
return doc.meta["abstract"];
|
||||
string abstract;
|
||||
m_db->makeDocAbstract(doc, abstract);
|
||||
m_q->whatDb()->makeDocAbstract(doc, m_q.getptr(), abstract);
|
||||
return abstract.empty() ? doc.meta["abstract"] : abstract;
|
||||
}
|
||||
|
||||
list<string> DocSequenceDb::expand(Rcl::Doc &doc)
|
||||
{
|
||||
return m_q->expand(doc);
|
||||
}
|
||||
|
||||
|
||||
@ -16,33 +16,30 @@
|
||||
*/
|
||||
#ifndef _DOCSEQDB_H_INCLUDED_
|
||||
#define _DOCSEQDB_H_INCLUDED_
|
||||
/* @(#$Id: docseqdb.h,v 1.2 2007-01-19 15:22:50 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: docseqdb.h,v 1.3 2008-06-13 18:22:46 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
#include "docseq.h"
|
||||
#include "refcntr.h"
|
||||
|
||||
#include "searchdata.h"
|
||||
#include "rclquery.h"
|
||||
|
||||
/** A DocSequence from a Db query (there should be one active for this
|
||||
to make sense) */
|
||||
class DocSequenceDb : public DocSequence {
|
||||
public:
|
||||
DocSequenceDb(Rcl::Db *d, const string &t, RefCntr<Rcl::SearchData> sdata)
|
||||
: DocSequence(t), m_db(d), m_sdata(sdata), m_rescnt(-1)
|
||||
{}
|
||||
virtual ~DocSequenceDb() {}
|
||||
DocSequenceDb(RefCntr<Rcl::Query> q, const string &t,
|
||||
RefCntr<Rcl::SearchData> sdata);
|
||||
virtual ~DocSequenceDb();
|
||||
virtual bool getDoc(int num, Rcl::Doc &doc, int *percent, string * = 0);
|
||||
virtual int getResCnt();
|
||||
virtual bool getTerms(vector<string>& terms,
|
||||
vector<vector<string> >& groups,
|
||||
vector<int>& gslks) const {
|
||||
return m_sdata.getptr()->getTerms(terms, groups, gslks);
|
||||
}
|
||||
|
||||
vector<int>& gslks) const;
|
||||
virtual string getAbstract(Rcl::Doc &doc);
|
||||
virtual string getDescription() {return m_sdata->getDescription();}
|
||||
|
||||
virtual string getDescription();
|
||||
virtual list<string> expand(Rcl::Doc &doc);
|
||||
private:
|
||||
Rcl::Db *m_db;
|
||||
RefCntr<Rcl::Query> m_q;
|
||||
RefCntr<Rcl::SearchData> m_sdata;
|
||||
int m_rescnt;
|
||||
};
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: recollq.cpp,v 1.12 2007-12-13 06:58:21 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: recollq.cpp,v 1.13 2008-06-13 18:22:46 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -32,6 +32,7 @@ static char rcsid[] = "@(#$Id: recollq.cpp,v 1.12 2007-12-13 06:58:21 dockes Exp
|
||||
using namespace std;
|
||||
|
||||
#include "rcldb.h"
|
||||
#include "rclquery.h"
|
||||
#include "rclconfig.h"
|
||||
#include "pathut.h"
|
||||
#include "rclinit.h"
|
||||
@ -132,8 +133,7 @@ int recollq(RclConfig **cfp, int argc, char **argv)
|
||||
exit(1);
|
||||
}
|
||||
dbdir = rclconfig->getDbDir();
|
||||
rcldb.open(dbdir, rclconfig->getStopfile(),
|
||||
Rcl::Db::DbRO, Rcl::Db::QO_STEM);
|
||||
rcldb.open(dbdir, rclconfig->getStopfile(), Rcl::Db::DbRO);
|
||||
|
||||
Rcl::SearchData *sd = 0;
|
||||
|
||||
@ -166,8 +166,9 @@ int recollq(RclConfig **cfp, int argc, char **argv)
|
||||
}
|
||||
|
||||
RefCntr<Rcl::SearchData> rq(sd);
|
||||
rcldb.setQuery(rq, Rcl::Db::QO_STEM);
|
||||
int cnt = rcldb.getResCnt();
|
||||
Rcl::Query query(&rcldb);
|
||||
query.setQuery(rq, Rcl::Query::QO_STEM);
|
||||
int cnt = query.getResCnt();
|
||||
if (!(op_flags & OPT_b)) {
|
||||
cout << "Recoll query: " << rq->getDescription() << endl;
|
||||
if (cnt <= limit)
|
||||
@ -180,7 +181,7 @@ int recollq(RclConfig **cfp, int argc, char **argv)
|
||||
for (int i = 0; i < limit; i++) {
|
||||
int pc;
|
||||
Rcl::Doc doc;
|
||||
if (!rcldb.getDoc(i, doc, &pc))
|
||||
if (!query.getDoc(i, doc, &pc))
|
||||
break;
|
||||
|
||||
if (op_flags & OPT_b) {
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.132 2008-05-20 10:09:54 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.133 2008-06-13 18:22:46 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -36,6 +36,7 @@ using namespace std;
|
||||
|
||||
#include "rclconfig.h"
|
||||
#include "rcldb.h"
|
||||
#include "rcldb_p.h"
|
||||
#include "stemdb.h"
|
||||
#include "textsplit.h"
|
||||
#include "transcode.h"
|
||||
@ -47,8 +48,9 @@ using namespace std;
|
||||
#include "pathhash.h"
|
||||
#include "utf8iter.h"
|
||||
#include "searchdata.h"
|
||||
#include "rclquery.h"
|
||||
#include "rclquery_p.h"
|
||||
|
||||
#include "xapian.h"
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX(A,B) (A>B?A:B)
|
||||
@ -88,125 +90,8 @@ namespace Rcl {
|
||||
const static string rclSyntAbs = "?!#@";
|
||||
const static string emptystring;
|
||||
|
||||
// A class for data and methods that would have to expose
|
||||
// Xapian-specific stuff if they were in Rcl::Db. There could actually be
|
||||
// 2 different ones for indexing or query as there is not much in
|
||||
// common.
|
||||
class Native {
|
||||
public:
|
||||
Db *m_db;
|
||||
bool m_isopen;
|
||||
bool m_iswritable;
|
||||
|
||||
// Indexing
|
||||
Xapian::WritableDatabase wdb;
|
||||
|
||||
// Querying
|
||||
Xapian::Database db;
|
||||
Xapian::Query query; // query descriptor: terms and subqueries
|
||||
// joined by operators (or/and etc...)
|
||||
|
||||
// Filtering results on location. There are 2 possible approaches
|
||||
// for this:
|
||||
// - Set a "MatchDecider" to be used by Xapian during the query
|
||||
// - Filter the results out of Xapian (this also uses a
|
||||
// Xapian::MatchDecider object, but applied to the results by Recoll.
|
||||
//
|
||||
// The result filtering approach was the first implemented.
|
||||
//
|
||||
// The efficiency of both methods depend on the searches, so the code
|
||||
// for both has been kept. A nice point for the Xapian approach is that
|
||||
// the result count estimate are correct (they are wrong with
|
||||
// the postfilter approach). It is also faster in some worst case scenarios
|
||||
// so this now the default (but the post-filtering is faster in many common
|
||||
// cases).
|
||||
//
|
||||
// Which is used is decided in SetQuery(), by setting either of
|
||||
// the two following members. This in turn is controlled by a
|
||||
// preprocessor directive.
|
||||
|
||||
#define XAPIAN_FILTERING 1
|
||||
|
||||
Xapian::MatchDecider *decider; // Xapian does the filtering
|
||||
Xapian::MatchDecider *postfilter; // Result filtering done by Recoll
|
||||
|
||||
Xapian::Enquire *enquire; // Open query descriptor.
|
||||
Xapian::MSet mset; // Partial result set
|
||||
|
||||
// Term frequencies for current query. See makeAbstract, setQuery
|
||||
map<string, double> m_termfreqs;
|
||||
|
||||
Native(Db *db)
|
||||
: m_db(db),
|
||||
m_isopen(false), m_iswritable(false), decider(0), postfilter(0),
|
||||
enquire(0)
|
||||
{ }
|
||||
|
||||
~Native() {
|
||||
delete decider;
|
||||
delete postfilter;
|
||||
delete enquire;
|
||||
}
|
||||
|
||||
string makeAbstract(Xapian::docid id, const list<string>& terms);
|
||||
|
||||
bool dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc);
|
||||
|
||||
/** Compute list of subdocuments for a given path (given by hash)
|
||||
* We look for all Q terms beginning with the path/hash
|
||||
* As suggested by James Aylett, a better method would be to add
|
||||
* a single term (ie: XP/path/to/file) to all subdocs, then finding
|
||||
* them would be a simple matter of retrieving the posting list for the
|
||||
* term. There would still be a need for the current Qterm though, as a
|
||||
* unique term for replace_document, and for retrieving by
|
||||
* path/ipath (history)
|
||||
*/
|
||||
bool subDocs(const string &hash, vector<Xapian::docid>& docids);
|
||||
|
||||
};
|
||||
|
||||
class FilterMatcher : public Xapian::MatchDecider {
|
||||
public:
|
||||
FilterMatcher(const string &topdir)
|
||||
: m_topdir(topdir)
|
||||
{}
|
||||
virtual ~FilterMatcher() {}
|
||||
|
||||
virtual
|
||||
#if XAPIAN_MAJOR_VERSION < 1
|
||||
int
|
||||
#else
|
||||
bool
|
||||
#endif
|
||||
operator()(const Xapian::Document &xdoc) const
|
||||
{
|
||||
m_cnt++;
|
||||
// Parse xapian document's data and populate doc fields
|
||||
string data = xdoc.get_data();
|
||||
ConfSimple parms(&data);
|
||||
|
||||
// The only filtering for now is on file path (subtree)
|
||||
string url;
|
||||
parms.get(string("url"), url);
|
||||
LOGDEB2(("FilterMatcher topdir [%s] url [%s]\n",
|
||||
m_topdir.c_str(), url.c_str()));
|
||||
if (url.find(m_topdir, 7) == 7) {
|
||||
LOGDEB2(("FilterMatcher: MATCH %d\n", m_cnt));
|
||||
return true;
|
||||
} else {
|
||||
LOGDEB2(("FilterMatcher: NO MATCH %d\n", m_cnt));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
static int m_cnt;
|
||||
|
||||
private:
|
||||
string m_topdir;
|
||||
};
|
||||
int FilterMatcher::m_cnt;
|
||||
|
||||
/* See comment in class declaration */
|
||||
bool Native::subDocs(const string &hash, vector<Xapian::docid>& docids)
|
||||
bool Db::Native::subDocs(const string &hash, vector<Xapian::docid>& docids)
|
||||
{
|
||||
docids.clear();
|
||||
string qterm = "Q"+ hash + "|";
|
||||
@ -250,7 +135,7 @@ bool Native::subDocs(const string &hash, vector<Xapian::docid>& docids)
|
||||
}
|
||||
|
||||
// Turn data record from db into document fields
|
||||
bool Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc)
|
||||
bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc)
|
||||
{
|
||||
LOGDEB1(("Db::dbDataToRclDoc: data: %s\n", data.c_str()));
|
||||
ConfSimple parms(&data);
|
||||
@ -306,26 +191,29 @@ static list<string> noPrefixList(const list<string>& in)
|
||||
|
||||
// Build a document abstract by extracting text chunks around the query terms
|
||||
// This uses the db termlists, not the original document.
|
||||
string Native::makeAbstract(Xapian::docid docid, const list<string>& iterms)
|
||||
string Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
||||
{
|
||||
Chrono chron;
|
||||
LOGDEB(("makeAbstract:%d: maxlen %d wWidth %d\n", chron.ms(),
|
||||
m_db->m_synthAbsLen, m_db->m_synthAbsWordCtxLen));
|
||||
|
||||
list<string> iterms;
|
||||
query->getQueryTerms(iterms);
|
||||
|
||||
list<string> terms = noPrefixList(iterms);
|
||||
if (terms.empty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
// Retrieve db-wide frequencies for the query terms
|
||||
if (m_termfreqs.empty()) {
|
||||
if (query->m_nq->termfreqs.empty()) {
|
||||
double doccnt = db.get_doccount();
|
||||
if (doccnt == 0) doccnt = 1;
|
||||
for (list<string>::const_iterator qit = terms.begin();
|
||||
qit != terms.end(); qit++) {
|
||||
m_termfreqs[*qit] = db.get_termfreq(*qit) / doccnt;
|
||||
query->m_nq->termfreqs[*qit] = db.get_termfreq(*qit) / doccnt;
|
||||
LOGABS(("makeAbstract: [%s] db freq %.1e\n", qit->c_str(),
|
||||
m_termfreqs[*qit]));
|
||||
query->m_nq->termfreqs[*qit]));
|
||||
}
|
||||
LOGABS(("makeAbstract:%d: got termfreqs\n", chron.ms()));
|
||||
}
|
||||
@ -343,7 +231,7 @@ string Native::makeAbstract(Xapian::docid docid, const list<string>& iterms)
|
||||
Xapian::TermIterator term = db.termlist_begin(docid);
|
||||
term.skip_to(*qit);
|
||||
if (term != db.termlist_end(docid) && *term == *qit) {
|
||||
double q = (term.get_wdf() / doclen) * m_termfreqs[*qit];
|
||||
double q = (term.get_wdf() / doclen) * query->m_nq->termfreqs[*qit];
|
||||
q = -log10(q);
|
||||
if (q < 3) {
|
||||
q = 0.05;
|
||||
@ -556,7 +444,7 @@ string Native::makeAbstract(Xapian::docid docid, const list<string>& iterms)
|
||||
/* Rcl::Db methods ///////////////////////////////// */
|
||||
|
||||
Db::Db()
|
||||
: m_ndb(0), m_qOpts(QO_NONE), m_idxAbsTruncLen(250), m_synthAbsLen(250),
|
||||
: m_ndb(0), m_idxAbsTruncLen(250), m_synthAbsLen(250),
|
||||
m_synthAbsWordCtxLen(4), m_flushMb(-1),
|
||||
m_curtxtsz(0), m_flushtxtsz(0), m_occtxtsz(0),
|
||||
m_maxFsOccupPc(0), m_mode(Db::DbRO)
|
||||
@ -586,28 +474,9 @@ Db::~Db()
|
||||
return res;
|
||||
}
|
||||
|
||||
// Generic Xapian exception catching code. We do this quite often,
|
||||
// and I have no idea how to do this except for a macro
|
||||
#define XCATCHERROR(MSG) \
|
||||
catch (const Xapian::Error &e) { \
|
||||
MSG = e.get_msg(); \
|
||||
if (MSG.empty()) MSG = "Empty error message"; \
|
||||
} catch (const string &s) { \
|
||||
MSG = s; \
|
||||
if (MSG.empty()) MSG = "Empty error message"; \
|
||||
} catch (const char *s) { \
|
||||
MSG = s; \
|
||||
if (MSG.empty()) MSG = "Empty error message"; \
|
||||
} catch (...) { \
|
||||
MSG = "Caught unknown xapian exception"; \
|
||||
}
|
||||
|
||||
|
||||
bool Db::open(const string& dir, const string &stops, OpenMode mode, int qops)
|
||||
bool Db::open(const string& dir, const string &stops, OpenMode mode,
|
||||
bool keep_updated)
|
||||
{
|
||||
bool keep_updated = (qops & QO_KEEP_UPDATED) != 0;
|
||||
qops &= ~QO_KEEP_UPDATED;
|
||||
|
||||
if (m_ndb == 0)
|
||||
return false;
|
||||
LOGDEB(("Db::open: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen,
|
||||
@ -724,7 +593,7 @@ bool Db::reOpen()
|
||||
if (m_ndb && m_ndb->m_isopen) {
|
||||
if (!close())
|
||||
return false;
|
||||
if (!open(m_basedir, "", m_mode, m_qOpts | QO_KEEP_UPDATED)) {
|
||||
if (!open(m_basedir, "", m_mode, true)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -1467,64 +1336,6 @@ bool Db::filenameWildExp(const string& fnexp, list<string>& names)
|
||||
return true;
|
||||
}
|
||||
|
||||
// Prepare query out of user search data
|
||||
bool Db::setQuery(RefCntr<SearchData> sdata, int opts,
|
||||
const string& stemlang)
|
||||
{
|
||||
if (!m_ndb) {
|
||||
LOGERR(("Db::setQuery: no db!\n"));
|
||||
return false;
|
||||
}
|
||||
m_reason.erase();
|
||||
LOGDEB(("Db::setQuery:\n"));
|
||||
|
||||
m_filterTopDir = sdata->getTopdir();
|
||||
deleteZ(m_ndb->decider);
|
||||
deleteZ(m_ndb->postfilter);
|
||||
if (!m_filterTopDir.empty()) {
|
||||
#if XAPIAN_FILTERING
|
||||
m_ndb->decider =
|
||||
#else
|
||||
m_ndb->postfilter =
|
||||
#endif
|
||||
new FilterMatcher(m_filterTopDir);
|
||||
}
|
||||
m_dbindices.clear();
|
||||
m_qOpts = opts;
|
||||
m_ndb->m_termfreqs.clear();
|
||||
FilterMatcher::m_cnt = 0;
|
||||
Xapian::Query xq;
|
||||
if (!sdata->toNativeQuery(*this, &xq,
|
||||
(opts & Db::QO_STEM) ? stemlang : "")) {
|
||||
m_reason += sdata->getReason();
|
||||
return false;
|
||||
}
|
||||
m_ndb->query = xq;
|
||||
string ermsg;
|
||||
string d;
|
||||
try {
|
||||
delete m_ndb->enquire;
|
||||
m_ndb->enquire = new Xapian::Enquire(m_ndb->db);
|
||||
m_ndb->enquire->set_query(m_ndb->query);
|
||||
m_ndb->mset = Xapian::MSet();
|
||||
// Get the query description and trim the "Xapian::Query"
|
||||
d = m_ndb->query.get_description();
|
||||
} XCATCHERROR(ermsg);
|
||||
if (!ermsg.empty()) {
|
||||
LOGDEB(("Db::SetQuery: xapian error %s\n", ermsg.c_str()));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (d.find("Xapian::Query") == 0)
|
||||
d.erase(0, strlen("Xapian::Query"));
|
||||
if (!m_filterTopDir.empty()) {
|
||||
d += string(" [dir: ") + m_filterTopDir + "]";
|
||||
}
|
||||
sdata->setDescription(d);
|
||||
LOGDEB(("Db::SetQuery: Q: %s\n", sdata->getDescription().c_str()));
|
||||
return true;
|
||||
}
|
||||
|
||||
class TermMatchCmpByWcf {
|
||||
public:
|
||||
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
|
||||
@ -1735,195 +1546,15 @@ bool Db::stemDiffers(const string& lang, const string& word,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Db::getQueryTerms(list<string>& terms)
|
||||
{
|
||||
if (!m_ndb)
|
||||
return false;
|
||||
|
||||
terms.clear();
|
||||
Xapian::TermIterator it;
|
||||
string ermsg;
|
||||
try {
|
||||
for (it = m_ndb->query.get_terms_begin();
|
||||
it != m_ndb->query.get_terms_end(); it++) {
|
||||
terms.push_back(*it);
|
||||
}
|
||||
} XCATCHERROR(ermsg);
|
||||
if (!ermsg.empty()) {
|
||||
LOGERR(("getQueryTerms: xapian error: %s\n", ermsg.c_str()));
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Db::getMatchTerms(const Doc& doc, list<string>& terms)
|
||||
{
|
||||
if (!m_ndb || !m_ndb->enquire) {
|
||||
LOGERR(("Db::getMatchTerms: no query opened\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
terms.clear();
|
||||
Xapian::TermIterator it;
|
||||
Xapian::docid id = Xapian::docid(doc.xdocid);
|
||||
string ermsg;
|
||||
try {
|
||||
for (it=m_ndb->enquire->get_matching_terms_begin(id);
|
||||
it != m_ndb->enquire->get_matching_terms_end(id); it++) {
|
||||
terms.push_back(*it);
|
||||
}
|
||||
} XCATCHERROR(ermsg);
|
||||
if (!ermsg.empty()) {
|
||||
LOGERR(("getQueryTerms: xapian error: %s\n", ermsg.c_str()));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Mset size
|
||||
static const int qquantum = 30;
|
||||
|
||||
int Db::getResCnt()
|
||||
{
|
||||
if (!m_ndb || !m_ndb->enquire) {
|
||||
LOGERR(("Db::getResCnt: no query opened\n"));
|
||||
return -1;
|
||||
}
|
||||
string ermsg;
|
||||
if (m_ndb->mset.size() <= 0) {
|
||||
try {
|
||||
m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum,
|
||||
0, m_ndb->decider);
|
||||
} catch (const Xapian::DatabaseModifiedError &error) {
|
||||
m_ndb->db.reopen();
|
||||
m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum,
|
||||
0, m_ndb->decider);
|
||||
} XCATCHERROR(ermsg);
|
||||
if (!ermsg.empty()) {
|
||||
LOGERR(("enquire->get_mset: exception: %s\n", ermsg.c_str()));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
int ret = -1;
|
||||
try {
|
||||
ret = m_ndb->mset.get_matches_lower_bound();
|
||||
} catch (...) {}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
// Get document at rank i in query (i is the index in the whole result
|
||||
// set, as in the enquire class. We check if the current mset has the
|
||||
// doc, else ask for an other one. We use msets of 10 documents. Don't
|
||||
// know if the whole thing makes sense at all but it seems to work.
|
||||
//
|
||||
// If there is a postquery filter (ie: file names), we have to
|
||||
// maintain a correspondance from the sequential external index
|
||||
// sequence to the internal Xapian hole-y one (the holes being the documents
|
||||
// that dont match the filter).
|
||||
bool Db::getDoc(int exti, Doc &doc, int *percent)
|
||||
{
|
||||
LOGDEB1(("Db::getDoc: exti %d\n", exti));
|
||||
if (!m_ndb || !m_ndb->enquire) {
|
||||
LOGERR(("Db::getDoc: no query opened\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
int xapi;
|
||||
if (m_ndb->postfilter) {
|
||||
// There is a postquery filter, does this fall in already known area ?
|
||||
if (exti >= (int)m_dbindices.size()) {
|
||||
// Have to fetch xapian docs and filter until we get
|
||||
// enough or fail
|
||||
m_dbindices.reserve(exti+1);
|
||||
// First xapian doc we fetch is the one after last stored
|
||||
int first = m_dbindices.size() > 0 ? m_dbindices.back() + 1 : 0;
|
||||
// Loop until we get enough docs
|
||||
while (exti >= (int)m_dbindices.size()) {
|
||||
LOGDEB(("Db::getDoc: fetching %d starting at %d\n",
|
||||
qquantum, first));
|
||||
try {
|
||||
m_ndb->mset = m_ndb->enquire->get_mset(first, qquantum);
|
||||
} catch (const Xapian::DatabaseModifiedError &error) {
|
||||
m_ndb->db.reopen();
|
||||
m_ndb->mset = m_ndb->enquire->get_mset(first, qquantum);
|
||||
} catch (const Xapian::Error & error) {
|
||||
LOGERR(("enquire->get_mset: exception: %s\n",
|
||||
error.get_msg().c_str()));
|
||||
abort();
|
||||
}
|
||||
|
||||
if (m_ndb->mset.empty()) {
|
||||
LOGDEB(("Db::getDoc: got empty mset\n"));
|
||||
return false;
|
||||
}
|
||||
first = m_ndb->mset.get_firstitem();
|
||||
for (unsigned int i = 0; i < m_ndb->mset.size() ; i++) {
|
||||
LOGDEB(("Db::getDoc: [%d]\n", i));
|
||||
Xapian::Document xdoc = m_ndb->mset[i].get_document();
|
||||
if ((*m_ndb->postfilter)(xdoc)) {
|
||||
m_dbindices.push_back(first + i);
|
||||
}
|
||||
}
|
||||
first = first + m_ndb->mset.size();
|
||||
}
|
||||
}
|
||||
xapi = m_dbindices[exti];
|
||||
} else {
|
||||
xapi = exti;
|
||||
}
|
||||
|
||||
// From there on, we work with a xapian enquire item number. Fetch it
|
||||
int first = m_ndb->mset.get_firstitem();
|
||||
int last = first + m_ndb->mset.size() -1;
|
||||
|
||||
if (!(xapi >= first && xapi <= last)) {
|
||||
LOGDEB(("Fetching for first %d, count %d\n", xapi, qquantum));
|
||||
try {
|
||||
m_ndb->mset = m_ndb->enquire->get_mset(xapi, qquantum,
|
||||
0, m_ndb->decider);
|
||||
} catch (const Xapian::DatabaseModifiedError &error) {
|
||||
m_ndb->db.reopen();
|
||||
m_ndb->mset = m_ndb->enquire->get_mset(xapi, qquantum,
|
||||
0, m_ndb->decider);
|
||||
|
||||
} catch (const Xapian::Error & error) {
|
||||
LOGERR(("enquire->get_mset: exception: %s\n",
|
||||
error.get_msg().c_str()));
|
||||
abort();
|
||||
}
|
||||
if (m_ndb->mset.empty())
|
||||
return false;
|
||||
first = m_ndb->mset.get_firstitem();
|
||||
last = first + m_ndb->mset.size() -1;
|
||||
}
|
||||
|
||||
LOGDEB1(("Db::getDoc: Qry [%s] win [%d-%d] Estimated results: %d",
|
||||
m_ndb->query.get_description().c_str(),
|
||||
first, last,
|
||||
m_ndb->mset.get_matches_lower_bound()));
|
||||
|
||||
Xapian::Document xdoc = m_ndb->mset[xapi-first].get_document();
|
||||
Xapian::docid docid = *(m_ndb->mset[xapi-first]);
|
||||
if (percent)
|
||||
*percent = m_ndb->mset.convert_to_percent(m_ndb->mset[xapi-first]);
|
||||
|
||||
// Parse xapian document's data and populate doc fields
|
||||
string data = xdoc.get_data();
|
||||
return m_ndb->dbDataToRclDoc(docid, data, doc);
|
||||
}
|
||||
|
||||
bool Db::makeDocAbstract(Doc &doc, string& abstract)
|
||||
bool Db::makeDocAbstract(Doc &doc, Query *query, string& abstract)
|
||||
{
|
||||
LOGDEB1(("Db::makeDocAbstract: exti %d\n", exti));
|
||||
if (!m_ndb || !m_ndb->enquire) {
|
||||
LOGERR(("Db::makeDocAbstract: no query opened\n"));
|
||||
if (!m_ndb) {
|
||||
LOGERR(("Db::makeDocAbstract: no db\n"));
|
||||
return false;
|
||||
}
|
||||
list<string> terms;
|
||||
getQueryTerms(terms);
|
||||
abstract = m_ndb->makeAbstract(doc.xdocid, terms);
|
||||
abstract = m_ndb->makeAbstract(doc.xdocid, query);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1969,45 +1600,6 @@ bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
|
||||
return false;
|
||||
}
|
||||
|
||||
list<string> Db::expand(const Doc &doc)
|
||||
{
|
||||
list<string> res;
|
||||
if (!m_ndb || !m_ndb->enquire) {
|
||||
LOGERR(("Db::expand: no query opened\n"));
|
||||
return res;
|
||||
}
|
||||
string ermsg;
|
||||
for (int tries = 0; tries < 2; tries++) {
|
||||
try {
|
||||
Xapian::RSet rset;
|
||||
rset.add_document(Xapian::docid(doc.xdocid));
|
||||
// We don't exclude the original query terms.
|
||||
Xapian::ESet eset = m_ndb->enquire->get_eset(20, rset, false);
|
||||
LOGDEB(("ESet terms:\n"));
|
||||
// We filter out the special terms
|
||||
for (Xapian::ESetIterator it = eset.begin();
|
||||
it != eset.end(); it++) {
|
||||
LOGDEB((" [%s]\n", (*it).c_str()));
|
||||
if ((*it).empty() || ((*it).at(0)>='A' && (*it).at(0)<='Z'))
|
||||
continue;
|
||||
res.push_back(*it);
|
||||
if (res.size() >= 10)
|
||||
break;
|
||||
}
|
||||
} catch (const Xapian::DatabaseModifiedError &error) {
|
||||
continue;
|
||||
} XCATCHERROR(ermsg);
|
||||
if (!ermsg.empty()) {
|
||||
LOGERR(("Db::expand: xapian error %s\n", ermsg.c_str()));
|
||||
res.clear();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
#ifndef NO_NAMESPACES
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _DB_H_INCLUDED_
|
||||
#define _DB_H_INCLUDED_
|
||||
/* @(#$Id: rcldb.h,v 1.54 2007-07-10 09:23:28 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: rcldb.h,v 1.55 2008-06-13 18:22:46 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
@ -52,8 +52,8 @@ namespace Rcl {
|
||||
#endif
|
||||
|
||||
class SearchData;
|
||||
class Native;
|
||||
class TermIter;
|
||||
class Query;
|
||||
|
||||
class TermMatchEntry {
|
||||
public:
|
||||
@ -71,17 +71,17 @@ public:
|
||||
*/
|
||||
class Db {
|
||||
public:
|
||||
// A place for things we don't want visible here.
|
||||
class Native;
|
||||
friend class Native;
|
||||
|
||||
/* General stuff (valid for query or update) ****************************/
|
||||
Db();
|
||||
~Db();
|
||||
|
||||
enum OpenMode {DbRO, DbUpd, DbTrunc};
|
||||
// KEEP_UPDATED is internal use by reOpen() only
|
||||
enum QueryOpts {QO_NONE=0, QO_STEM = 1, QO_KEEP_UPDATED = 8};
|
||||
|
||||
bool open(const string &dbdir, const string &stoplistfn,
|
||||
OpenMode mode, int qops = QO_NONE);
|
||||
OpenMode mode, bool keep_updated = false);
|
||||
bool close();
|
||||
bool isopen();
|
||||
|
||||
@ -130,11 +130,12 @@ class Db {
|
||||
/** Return total docs in db */
|
||||
int docCnt();
|
||||
|
||||
// Parse query string and initialize query
|
||||
bool setQuery(RefCntr<SearchData> q, int opts = QO_NONE,
|
||||
const string& stemlang = "english");
|
||||
bool getQueryTerms(list<string>& terms);
|
||||
bool getMatchTerms(const Doc& doc, list<string>& terms);
|
||||
/** Add extra database for querying */
|
||||
bool addQueryDb(const string &dir);
|
||||
/** Remove extra database. if dir == "", remove all. */
|
||||
bool rmQueryDb(const string &dir);
|
||||
/** Tell if directory seems to hold xapian db */
|
||||
static bool testDbDir(const string &dir);
|
||||
|
||||
/** Return a list of index terms that match the input string
|
||||
* Expansion is performed either with either wildcard or regexp processing
|
||||
@ -143,33 +144,12 @@ class Db {
|
||||
bool termMatch(MatchType typ, const string &lang, const string &s,
|
||||
list<TermMatchEntry>& result, int max = -1);
|
||||
|
||||
/** Add extra database for querying */
|
||||
bool addQueryDb(const string &dir);
|
||||
/** Remove extra database. if dir == "", remove all. */
|
||||
bool rmQueryDb(const string &dir);
|
||||
/** Tell if directory seems to hold xapian db */
|
||||
static bool testDbDir(const string &dir);
|
||||
|
||||
/** Get document at rank i in current query.
|
||||
|
||||
This is probably vastly inferior to the type of interface in
|
||||
Xapian, but we have to start with something simple to
|
||||
experiment with the GUI. i is sequential from 0 to some value.
|
||||
*/
|
||||
bool getDoc(int i, Doc &doc, int *percent = 0);
|
||||
|
||||
/* Build synthetic abstract out of query terms and term position data */
|
||||
bool makeDocAbstract(Doc &doc, string& abstract);
|
||||
bool makeDocAbstract(Doc &doc, Query *query, string& abstract);
|
||||
|
||||
/** Get document for given filename and ipath */
|
||||
bool getDoc(const string &fn, const string &ipath, Doc &doc, int *percent);
|
||||
|
||||
/** Expand query */
|
||||
list<string> expand(const Doc &doc);
|
||||
|
||||
/** Get results count for current query */
|
||||
int getResCnt();
|
||||
|
||||
/** Get a list of existing stemming databases */
|
||||
std::list<std::string> getStemLangs();
|
||||
|
||||
@ -189,22 +169,16 @@ class Db {
|
||||
/** Filename wildcard expansion */
|
||||
bool filenameWildExp(const string& exp, list<string>& names);
|
||||
|
||||
/** This has to be public for access by embedded Query::Native */
|
||||
Native *m_ndb;
|
||||
|
||||
private:
|
||||
// Internal form of close, can be called during destruction
|
||||
bool i_close(bool final);
|
||||
|
||||
string m_filterTopDir; // Current query filter on subtree top directory
|
||||
vector<int> m_dbindices; // In case there is a postq filter: sequence of
|
||||
// db indices that match
|
||||
|
||||
string m_reason; // Error explanation
|
||||
|
||||
// A place for things we don't want visible here.
|
||||
friend class Native;
|
||||
Native *m_ndb;
|
||||
|
||||
unsigned int m_qOpts;
|
||||
|
||||
/* Parameters cached out of the configuration files */
|
||||
// This is how long an abstract we keep or build from beginning of
|
||||
// text when indexing. It only has an influence on the size of the
|
||||
// db as we are free to shorten it again when displaying
|
||||
@ -215,7 +189,6 @@ private:
|
||||
// This is how many words (context size) we keep around query terms
|
||||
// when building the abstract
|
||||
int m_synthAbsWordCtxLen;
|
||||
|
||||
// Flush threshold. Megabytes of text indexed before we flush.
|
||||
int m_flushMb;
|
||||
// Text bytes indexed since beginning
|
||||
@ -224,7 +197,6 @@ private:
|
||||
long long m_flushtxtsz;
|
||||
// Text bytes at last fsoccup check
|
||||
long long m_occtxtsz;
|
||||
|
||||
// Maximum file system occupation percentage
|
||||
int m_maxFsOccupPc;
|
||||
|
||||
|
||||
67
src/rcldb/rcldb_p.h
Normal file
67
src/rcldb/rcldb_p.h
Normal file
@ -0,0 +1,67 @@
|
||||
#ifndef _rcldb_p_h_included_
|
||||
#define _rcldb_p_h_included_
|
||||
|
||||
#include "xapian.h"
|
||||
|
||||
namespace Rcl {
|
||||
/* @(#$Id: rcldb_p.h,v 1.1 2008-06-13 18:22:46 dockes Exp $ (C) 2007 J.F.Dockes */
|
||||
|
||||
// Generic Xapian exception catching code. We do this quite often,
|
||||
// and I have no idea how to do this except for a macro
|
||||
#define XCATCHERROR(MSG) \
|
||||
catch (const Xapian::Error &e) { \
|
||||
MSG = e.get_msg(); \
|
||||
if (MSG.empty()) MSG = "Empty error message"; \
|
||||
} catch (const string &s) { \
|
||||
MSG = s; \
|
||||
if (MSG.empty()) MSG = "Empty error message"; \
|
||||
} catch (const char *s) { \
|
||||
MSG = s; \
|
||||
if (MSG.empty()) MSG = "Empty error message"; \
|
||||
} catch (...) { \
|
||||
MSG = "Caught unknown xapian exception"; \
|
||||
}
|
||||
|
||||
class Query;
|
||||
|
||||
// A class for data and methods that would have to expose
|
||||
// Xapian-specific stuff if they were in Rcl::Db. There could actually be
|
||||
// 2 different ones for indexing or query as there is not much in
|
||||
// common.
|
||||
class Db::Native {
|
||||
public:
|
||||
Db *m_db;
|
||||
bool m_isopen;
|
||||
bool m_iswritable;
|
||||
|
||||
// Indexing
|
||||
Xapian::WritableDatabase wdb;
|
||||
|
||||
// Querying
|
||||
Xapian::Database db;
|
||||
|
||||
Native(Db *db)
|
||||
: m_db(db), m_isopen(false), m_iswritable(false)
|
||||
{ }
|
||||
|
||||
~Native() {
|
||||
}
|
||||
|
||||
string makeAbstract(Xapian::docid id, Query *query);
|
||||
|
||||
bool dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc);
|
||||
|
||||
/** Compute list of subdocuments for a given path (given by hash)
|
||||
* We look for all Q terms beginning with the path/hash
|
||||
* As suggested by James Aylett, a better method would be to add
|
||||
* a single term (ie: XP/path/to/file) to all subdocs, then finding
|
||||
* them would be a simple matter of retrieving the posting list for the
|
||||
* term. There would still be a need for the current Qterm though, as a
|
||||
* unique term for replace_document, and for retrieving by
|
||||
* path/ipath (history)
|
||||
*/
|
||||
bool subDocs(const string &hash, vector<Xapian::docid>& docids);
|
||||
|
||||
};
|
||||
}
|
||||
#endif /* _rcldb_p_h_included_ */
|
||||
354
src/rcldb/rclquery.cpp
Normal file
354
src/rcldb/rclquery.cpp
Normal file
@ -0,0 +1,354 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rclquery.cpp,v 1.1 2008-06-13 18:22:46 dockes Exp $ (C) 2008 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <list>
|
||||
#include <vector>
|
||||
|
||||
#include "rcldb.h"
|
||||
#include "rcldb_p.h"
|
||||
#include "rclquery.h"
|
||||
#include "rclquery_p.h"
|
||||
#include "debuglog.h"
|
||||
#include "conftree.h"
|
||||
#include "smallut.h"
|
||||
#include "searchdata.h"
|
||||
|
||||
#ifndef NO_NAMESPACES
|
||||
namespace Rcl {
|
||||
#endif
|
||||
class FilterMatcher : public Xapian::MatchDecider {
|
||||
public:
|
||||
FilterMatcher(const string &topdir)
|
||||
: m_topdir(topdir)
|
||||
{}
|
||||
virtual ~FilterMatcher() {}
|
||||
|
||||
virtual
|
||||
#if XAPIAN_MAJOR_VERSION < 1
|
||||
int
|
||||
#else
|
||||
bool
|
||||
#endif
|
||||
operator()(const Xapian::Document &xdoc) const
|
||||
{
|
||||
// Parse xapian document's data and populate doc fields
|
||||
string data = xdoc.get_data();
|
||||
ConfSimple parms(&data);
|
||||
|
||||
// The only filtering for now is on file path (subtree)
|
||||
string url;
|
||||
parms.get(string("url"), url);
|
||||
LOGDEB2(("FilterMatcher topdir [%s] url [%s]\n",
|
||||
m_topdir.c_str(), url.c_str()));
|
||||
if (url.find(m_topdir, 7) == 7) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
string m_topdir;
|
||||
};
|
||||
|
||||
Query::Query(Db *db)
|
||||
: m_nq(new Native(this)), m_db(db)
|
||||
{
|
||||
}
|
||||
|
||||
Query::~Query()
|
||||
{
|
||||
deleteZ(m_nq);
|
||||
}
|
||||
|
||||
string Query::getReason() const
|
||||
{
|
||||
return m_reason;
|
||||
}
|
||||
|
||||
Db *Query::whatDb()
|
||||
{
|
||||
return m_db;
|
||||
}
|
||||
|
||||
//#define ISNULL(X) (X).isNull()
|
||||
#define ISNULL(X) !(X)
|
||||
|
||||
// Prepare query out of user search data
|
||||
bool Query::setQuery(RefCntr<SearchData> sdata, int opts,
|
||||
const string& stemlang)
|
||||
{
|
||||
if (!m_db || ISNULL(m_nq)) {
|
||||
LOGERR(("Query::setQuery: not initialised!\n"));
|
||||
return false;
|
||||
}
|
||||
m_reason.erase();
|
||||
LOGDEB(("Query::setQuery:\n"));
|
||||
|
||||
m_filterTopDir = sdata->getTopdir();
|
||||
deleteZ(m_nq->decider);
|
||||
deleteZ(m_nq->postfilter);
|
||||
if (!m_filterTopDir.empty()) {
|
||||
#if XAPIAN_FILTERING
|
||||
m_nq->decider =
|
||||
#else
|
||||
m_nq->postfilter =
|
||||
#endif
|
||||
new FilterMatcher(m_filterTopDir);
|
||||
}
|
||||
m_nq->m_dbindices.clear();
|
||||
m_qOpts = opts;
|
||||
m_nq->termfreqs.clear();
|
||||
Xapian::Query xq;
|
||||
if (!sdata->toNativeQuery(*m_db, &xq,
|
||||
(opts & QO_STEM) ? stemlang : "")) {
|
||||
m_reason += sdata->getReason();
|
||||
return false;
|
||||
}
|
||||
m_nq->query = xq;
|
||||
string ermsg;
|
||||
string d;
|
||||
try {
|
||||
delete m_nq->enquire;
|
||||
m_nq->enquire = new Xapian::Enquire(m_db->m_ndb->db);
|
||||
m_nq->enquire->set_query(m_nq->query);
|
||||
m_nq->mset = Xapian::MSet();
|
||||
// Get the query description and trim the "Xapian::Query"
|
||||
d = m_nq->query.get_description();
|
||||
} XCATCHERROR(ermsg);
|
||||
if (!ermsg.empty()) {
|
||||
LOGDEB(("Query::SetQuery: xapian error %s\n", ermsg.c_str()));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (d.find("Xapian::Query") == 0)
|
||||
d.erase(0, strlen("Xapian::Query"));
|
||||
if (!m_filterTopDir.empty()) {
|
||||
d += string(" [dir: ") + m_filterTopDir + "]";
|
||||
}
|
||||
sdata->setDescription(d);
|
||||
LOGDEB(("Query::SetQuery: Q: %s\n", sdata->getDescription().c_str()));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool Query::getQueryTerms(list<string>& terms)
|
||||
{
|
||||
if (ISNULL(m_nq))
|
||||
return false;
|
||||
|
||||
terms.clear();
|
||||
Xapian::TermIterator it;
|
||||
string ermsg;
|
||||
try {
|
||||
for (it = m_nq->query.get_terms_begin();
|
||||
it != m_nq->query.get_terms_end(); it++) {
|
||||
terms.push_back(*it);
|
||||
}
|
||||
} XCATCHERROR(ermsg);
|
||||
if (!ermsg.empty()) {
|
||||
LOGERR(("getQueryTerms: xapian error: %s\n", ermsg.c_str()));
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Query::getMatchTerms(const Doc& doc, list<string>& terms)
|
||||
{
|
||||
if (ISNULL(m_nq) || !m_nq->enquire) {
|
||||
LOGERR(("Query::getMatchTerms: no query opened\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
terms.clear();
|
||||
Xapian::TermIterator it;
|
||||
Xapian::docid id = Xapian::docid(doc.xdocid);
|
||||
string ermsg;
|
||||
try {
|
||||
for (it=m_nq->enquire->get_matching_terms_begin(id);
|
||||
it != m_nq->enquire->get_matching_terms_end(id); it++) {
|
||||
terms.push_back(*it);
|
||||
}
|
||||
} XCATCHERROR(ermsg);
|
||||
if (!ermsg.empty()) {
|
||||
LOGERR(("getQueryTerms: xapian error: %s\n", ermsg.c_str()));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Mset size
|
||||
static const int qquantum = 30;
|
||||
|
||||
int Query::getResCnt()
|
||||
{
|
||||
if (ISNULL(m_nq) || !m_nq->enquire) {
|
||||
LOGERR(("Query::getResCnt: no query opened\n"));
|
||||
return -1;
|
||||
}
|
||||
string ermsg;
|
||||
if (m_nq->mset.size() <= 0) {
|
||||
try {
|
||||
m_nq->mset = m_nq->enquire->get_mset(0, qquantum,
|
||||
0, m_nq->decider);
|
||||
} catch (const Xapian::DatabaseModifiedError &error) {
|
||||
m_db->m_ndb->db.reopen();
|
||||
m_nq->mset = m_nq->enquire->get_mset(0, qquantum,
|
||||
0, m_nq->decider);
|
||||
} XCATCHERROR(ermsg);
|
||||
if (!ermsg.empty()) {
|
||||
LOGERR(("enquire->get_mset: exception: %s\n", ermsg.c_str()));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
int ret = -1;
|
||||
try {
|
||||
ret = m_nq->mset.get_matches_lower_bound();
|
||||
} catch (...) {}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
// Get document at rank i in query (i is the index in the whole result
|
||||
// set, as in the enquire class. We check if the current mset has the
|
||||
// doc, else ask for an other one. We use msets of 10 documents. Don't
|
||||
// know if the whole thing makes sense at all but it seems to work.
|
||||
//
|
||||
// If there is a postquery filter (ie: file names), we have to
|
||||
// maintain a correspondance from the sequential external index
|
||||
// sequence to the internal Xapian hole-y one (the holes being the documents
|
||||
// that dont match the filter).
|
||||
bool Query::getDoc(int exti, Doc &doc, int *percent)
|
||||
{
|
||||
LOGDEB1(("Query::getDoc: exti %d\n", exti));
|
||||
if (ISNULL(m_nq) || !m_nq->enquire) {
|
||||
LOGERR(("Query::getDoc: no query opened\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
int xapi;
|
||||
if (m_nq->postfilter) {
|
||||
// There is a postquery filter, does this fall in already known area ?
|
||||
if (exti >= (int)m_nq->m_dbindices.size()) {
|
||||
// Have to fetch xapian docs and filter until we get
|
||||
// enough or fail
|
||||
m_nq->m_dbindices.reserve(exti+1);
|
||||
// First xapian doc we fetch is the one after last stored
|
||||
int first = m_nq->m_dbindices.size() > 0 ?
|
||||
m_nq->m_dbindices.back() + 1 : 0;
|
||||
// Loop until we get enough docs
|
||||
while (exti >= (int)m_nq->m_dbindices.size()) {
|
||||
LOGDEB(("Query::getDoc: fetching %d starting at %d\n",
|
||||
qquantum, first));
|
||||
try {
|
||||
m_nq->mset = m_nq->enquire->get_mset(first, qquantum);
|
||||
} catch (const Xapian::DatabaseModifiedError &error) {
|
||||
m_db->m_ndb->db.reopen();
|
||||
m_nq->mset = m_nq->enquire->get_mset(first, qquantum);
|
||||
} catch (const Xapian::Error & error) {
|
||||
LOGERR(("enquire->get_mset: exception: %s\n",
|
||||
error.get_msg().c_str()));
|
||||
abort();
|
||||
}
|
||||
|
||||
if (m_nq->mset.empty()) {
|
||||
LOGDEB(("Query::getDoc: got empty mset\n"));
|
||||
return false;
|
||||
}
|
||||
first = m_nq->mset.get_firstitem();
|
||||
for (unsigned int i = 0; i < m_nq->mset.size() ; i++) {
|
||||
LOGDEB(("Query::getDoc: [%d]\n", i));
|
||||
Xapian::Document xdoc = m_nq->mset[i].get_document();
|
||||
if ((*m_nq->postfilter)(xdoc)) {
|
||||
m_nq->m_dbindices.push_back(first + i);
|
||||
}
|
||||
}
|
||||
first = first + m_nq->mset.size();
|
||||
}
|
||||
}
|
||||
xapi = m_nq->m_dbindices[exti];
|
||||
} else {
|
||||
xapi = exti;
|
||||
}
|
||||
|
||||
// From there on, we work with a xapian enquire item number. Fetch it
|
||||
int first = m_nq->mset.get_firstitem();
|
||||
int last = first + m_nq->mset.size() -1;
|
||||
|
||||
if (!(xapi >= first && xapi <= last)) {
|
||||
LOGDEB(("Fetching for first %d, count %d\n", xapi, qquantum));
|
||||
try {
|
||||
m_nq->mset = m_nq->enquire->get_mset(xapi, qquantum,
|
||||
0, m_nq->decider);
|
||||
} catch (const Xapian::DatabaseModifiedError &error) {
|
||||
m_db->m_ndb->db.reopen();
|
||||
m_nq->mset = m_nq->enquire->get_mset(xapi, qquantum,
|
||||
0, m_nq->decider);
|
||||
|
||||
} catch (const Xapian::Error & error) {
|
||||
LOGERR(("enquire->get_mset: exception: %s\n",
|
||||
error.get_msg().c_str()));
|
||||
abort();
|
||||
}
|
||||
if (m_nq->mset.empty())
|
||||
return false;
|
||||
first = m_nq->mset.get_firstitem();
|
||||
last = first + m_nq->mset.size() -1;
|
||||
}
|
||||
|
||||
LOGDEB1(("Query::getDoc: Qry [%s] win [%d-%d] Estimated results: %d",
|
||||
m_nq->query.get_description().c_str(),
|
||||
first, last,
|
||||
m_nq->mset.get_matches_lower_bound()));
|
||||
|
||||
Xapian::Document xdoc = m_nq->mset[xapi-first].get_document();
|
||||
Xapian::docid docid = *(m_nq->mset[xapi-first]);
|
||||
if (percent)
|
||||
*percent = m_nq->mset.convert_to_percent(m_nq->mset[xapi-first]);
|
||||
|
||||
// Parse xapian document's data and populate doc fields
|
||||
string data = xdoc.get_data();
|
||||
return m_db->m_ndb->dbDataToRclDoc(docid, data, doc);
|
||||
}
|
||||
|
||||
list<string> Query::expand(const Doc &doc)
|
||||
{
|
||||
list<string> res;
|
||||
if (ISNULL(m_nq) || !m_nq->enquire) {
|
||||
LOGERR(("Query::expand: no query opened\n"));
|
||||
return res;
|
||||
}
|
||||
string ermsg;
|
||||
for (int tries = 0; tries < 2; tries++) {
|
||||
try {
|
||||
Xapian::RSet rset;
|
||||
rset.add_document(Xapian::docid(doc.xdocid));
|
||||
// We don't exclude the original query terms.
|
||||
Xapian::ESet eset = m_nq->enquire->get_eset(20, rset, false);
|
||||
LOGDEB(("ESet terms:\n"));
|
||||
// We filter out the special terms
|
||||
for (Xapian::ESetIterator it = eset.begin();
|
||||
it != eset.end(); it++) {
|
||||
LOGDEB((" [%s]\n", (*it).c_str()));
|
||||
if ((*it).empty() || ((*it).at(0)>='A' && (*it).at(0)<='Z'))
|
||||
continue;
|
||||
res.push_back(*it);
|
||||
if (res.size() >= 10)
|
||||
break;
|
||||
}
|
||||
} catch (const Xapian::DatabaseModifiedError &error) {
|
||||
continue;
|
||||
} XCATCHERROR(ermsg);
|
||||
if (!ermsg.empty()) {
|
||||
LOGERR(("Query::expand: xapian error %s\n", ermsg.c_str()));
|
||||
res.clear();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
||||
92
src/rcldb/rclquery.h
Normal file
92
src/rcldb/rclquery.h
Normal file
@ -0,0 +1,92 @@
|
||||
#ifndef _rclquery_h_included_
|
||||
#define _rclquery_h_included_
|
||||
/* @(#$Id: rclquery.h,v 1.1 2008-06-13 18:22:46 dockes Exp $ (C) 2008 J.F.Dockes */
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
#include <string>
|
||||
#include <list>
|
||||
#include <vector>
|
||||
|
||||
#ifndef NO_NAMESPACES
|
||||
using std::string;
|
||||
using std::list;
|
||||
using std::vector;
|
||||
#endif
|
||||
|
||||
#include "refcntr.h"
|
||||
|
||||
#ifndef NO_NAMESPACES
|
||||
namespace Rcl {
|
||||
#endif
|
||||
|
||||
class SearchData;
|
||||
class Db;
|
||||
class Doc;
|
||||
|
||||
/**
|
||||
* An Rcl::Query is a question (SearchData) applied to a
|
||||
* database. Handles access to the results. Somewhat equivalent to a
|
||||
* cursor in an rdb.
|
||||
*/
|
||||
class Query {
|
||||
public:
|
||||
enum QueryOpts {QO_NONE=0, QO_STEM = 1};
|
||||
|
||||
Query(Db *db);
|
||||
|
||||
~Query();
|
||||
|
||||
/** Get explanation about last error */
|
||||
string getReason() const;
|
||||
|
||||
/** Parse query string and initialize query */
|
||||
bool setQuery(RefCntr<SearchData> q, int opts = QO_NONE,
|
||||
const string& stemlang = "english");
|
||||
bool getQueryTerms(list<string>& terms);
|
||||
bool getMatchTerms(const Doc& doc, list<string>& terms);
|
||||
|
||||
/** Get document at rank i in current query. */
|
||||
bool getDoc(int i, Doc &doc, int *percent = 0);
|
||||
|
||||
/** Expand query */
|
||||
list<string> expand(const Doc &doc);
|
||||
|
||||
/** Get results count for current query */
|
||||
int getResCnt();
|
||||
|
||||
Db *whatDb();
|
||||
|
||||
/** make this public for access from embedded Db::Native */
|
||||
class Native;
|
||||
Native *m_nq;
|
||||
|
||||
private:
|
||||
string m_filterTopDir; // Current query filter on subtree top directory
|
||||
string m_reason; // Error explanation
|
||||
Db *m_db;
|
||||
unsigned int m_qOpts;
|
||||
/* Copyconst and assignemt private and forbidden */
|
||||
Query(const Query &) {}
|
||||
Query & operator=(const Query &) {return *this;};
|
||||
};
|
||||
|
||||
#ifndef NO_NAMESPACES
|
||||
}
|
||||
#endif // NO_NAMESPACES
|
||||
|
||||
|
||||
#endif /* _rclquery_h_included_ */
|
||||
66
src/rcldb/rclquery_p.h
Normal file
66
src/rcldb/rclquery_p.h
Normal file
@ -0,0 +1,66 @@
|
||||
#ifndef _rclquery_p_h_included_
|
||||
#define _rclquery_p_h_included_
|
||||
/* @(#$Id: rclquery_p.h,v 1.1 2008-06-13 18:22:46 dockes Exp $ (C) 2007 J.F.Dockes */
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
using std::map;
|
||||
using std::vector;
|
||||
|
||||
#include <xapian.h>
|
||||
#include "rclquery.h"
|
||||
|
||||
namespace Rcl {
|
||||
|
||||
class Query::Native {
|
||||
public:
|
||||
Xapian::Query query; // query descriptor: terms and subqueries
|
||||
// joined by operators (or/and etc...)
|
||||
|
||||
vector<int> m_dbindices; // In case there is a postq filter: sequence of
|
||||
// db indices that match
|
||||
|
||||
// Filtering results on location. There are 2 possible approaches
|
||||
// for this:
|
||||
// - Set a "MatchDecider" to be used by Xapian during the query
|
||||
// - Filter the results out of Xapian (this also uses a
|
||||
// Xapian::MatchDecider object, but applied to the results by Recoll.
|
||||
//
|
||||
// The result filtering approach was the first implemented.
|
||||
//
|
||||
// The efficiency of both methods depend on the searches, so the code
|
||||
// for both has been kept. A nice point for the Xapian approach is that
|
||||
// the result count estimate are correct (they are wrong with
|
||||
// the postfilter approach). It is also faster in some worst case scenarios
|
||||
// so this now the default (but the post-filtering is faster in many common
|
||||
// cases).
|
||||
//
|
||||
// Which is used is decided in SetQuery(), by setting either of
|
||||
// the two following members. This in turn is controlled by a
|
||||
// preprocessor directive.
|
||||
|
||||
#define XAPIAN_FILTERING 1
|
||||
|
||||
Xapian::MatchDecider *decider; // Xapian does the filtering
|
||||
Xapian::MatchDecider *postfilter; // Result filtering done by Recoll
|
||||
|
||||
Xapian::Enquire *enquire; // Open query descriptor.
|
||||
Xapian::MSet mset; // Partial result set
|
||||
Query *m_q;
|
||||
// Term frequencies for current query. See makeAbstract, setQuery
|
||||
map<string, double> termfreqs;
|
||||
|
||||
Native(Query *q)
|
||||
: decider(0), postfilter(0), enquire(0), m_q(q)
|
||||
{ }
|
||||
|
||||
~Native() {
|
||||
delete decider;
|
||||
delete postfilter;
|
||||
delete enquire;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
#endif /* _rclquery_p_h_included_ */
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _SEARCHDATA_H_INCLUDED_
|
||||
#define _SEARCHDATA_H_INCLUDED_
|
||||
/* @(#$Id: searchdata.h,v 1.13 2008-05-08 10:00:20 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: searchdata.h,v 1.14 2008-06-13 18:22:46 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
/**
|
||||
* Structures to hold data coming almost directly from the gui
|
||||
@ -47,7 +47,7 @@ class SearchDataClause;
|
||||
|
||||
/**
|
||||
Data structure representing a Recoll user query, for translation
|
||||
into a Xapian query tree.
|
||||
into a Xapian query tree. This could probably better called a 'question'.
|
||||
|
||||
This is a list of search clauses combined through either OR or AND.
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: pathut.cpp,v 1.19 2008-05-27 06:18:28 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: pathut.cpp,v 1.20 2008-06-13 18:22:47 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -36,6 +36,7 @@ using std::list;
|
||||
using std::stack;
|
||||
#endif /* NO_NAMESPACES */
|
||||
|
||||
#include "autoconfig.h"
|
||||
#include "pathut.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
@ -42,6 +42,7 @@ public:
|
||||
X *operator->() {return rep;}
|
||||
int getcnt() const {return pcount ? *pcount : 0;}
|
||||
const X *getptr() const {return rep;}
|
||||
bool isNull() const {return rep == 0;}
|
||||
};
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user