separated rcldb and rclquery
This commit is contained in:
parent
e5e8249ad3
commit
0e7a78d688
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: kio_recoll.cpp,v 1.7 2007-11-09 15:46:17 dockes Exp $ (C) 2005 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: kio_recoll.cpp,v 1.8 2008-06-13 18:22:46 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
@ -109,18 +109,20 @@ void RecollProtocol::get(const KURL & url)
|
|||||||
RefCntr<Rcl::SearchData> sdata(new Rcl::SearchData(Rcl::SCLT_OR));
|
RefCntr<Rcl::SearchData> sdata(new Rcl::SearchData(Rcl::SCLT_OR));
|
||||||
sdata->addClause(new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND,
|
sdata->addClause(new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND,
|
||||||
(const char *)u8));
|
(const char *)u8));
|
||||||
|
Rcl::Query *query = new Rcl::Query(m_rcldb);
|
||||||
if (!m_rcldb->setQuery(sdata, Rcl::Db::QO_STEM, "english")) {
|
if (!query->setQuery(sdata, Rcl::Db::QO_STEM, "english")) {
|
||||||
m_reason = "Internal Error: setQuery failed";
|
m_reason = "Internal Error: setQuery failed";
|
||||||
outputError(m_reason.c_str());
|
outputError(m_reason.c_str());
|
||||||
finished();
|
finished();
|
||||||
|
delete query;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_docsource)
|
if (m_docsource)
|
||||||
delete m_docsource;
|
delete m_docsource;
|
||||||
|
|
||||||
m_docsource = new DocSequenceDb(m_rcldb, "Query results", sdata);
|
m_docsource = new DocSequenceDb(RefCntr<Rcl::Query>(query),
|
||||||
|
"Query results", sdata);
|
||||||
|
|
||||||
QByteArray output;
|
QByteArray output;
|
||||||
QTextStream os(output, IO_WriteOnly );
|
QTextStream os(output, IO_WriteOnly );
|
||||||
|
|||||||
@ -8,8 +8,8 @@ LIBS = librcl.a
|
|||||||
|
|
||||||
all: $(LIBS)
|
all: $(LIBS)
|
||||||
|
|
||||||
OBJS = rclaspell.o rclconfig.o rclinit.o textsplit.o unacpp.o csguess.o indexer.o mimetype.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_html.o mh_mail.o mh_mbox.o mh_text.o docseq.o docseqdb.o docseqhist.o history.o recollq.o sortseq.o wasastringtoquery.o wasatorcl.o pathhash.o rcldb.o searchdata.o stemdb.o stoplist.o base64.o conftree.o copyfile.o debuglog.o execmd.o fstreewalk.o idfile.o md5.o mimeparse.o pathut.o readfile.o smallut.o transcode.o wipedir.o x11mon.o
|
OBJS = rclaspell.o rclconfig.o rclinit.o textsplit.o unacpp.o csguess.o indexer.o mimetype.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_html.o mh_mail.o mh_mbox.o mh_text.o docseq.o docseqdb.o docseqhist.o history.o recollq.o sortseq.o wasastringtoquery.o wasatorcl.o pathhash.o rcldb.o rclquery.o searchdata.o stemdb.o stoplist.o base64.o conftree.o copyfile.o debuglog.o execmd.o fstreewalk.o idfile.o md5.o mimeparse.o pathut.o readfile.o smallut.o transcode.o wipedir.o x11mon.o
|
||||||
DEPS = rclaspell.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp csguess.dep.stamp indexer.dep.stamp mimetype.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp history.dep.stamp recollq.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp pathhash.dep.stamp rcldb.dep.stamp searchdata.dep.stamp stemdb.dep.stamp stoplist.dep.stamp base64.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp md5.dep.stamp mimeparse.dep.stamp pathut.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp
|
DEPS = rclaspell.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp csguess.dep.stamp indexer.dep.stamp mimetype.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp history.dep.stamp recollq.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp pathhash.dep.stamp rcldb.dep.stamp rclquery.dep.stamp searchdata.dep.stamp stemdb.dep.stamp stoplist.dep.stamp base64.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp md5.dep.stamp mimeparse.dep.stamp pathut.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp
|
||||||
|
|
||||||
librcl.a : $(DEPS) $(OBJS) unac.o
|
librcl.a : $(DEPS) $(OBJS) unac.o
|
||||||
ar ru librcl.a $(OBJS) unac.o
|
ar ru librcl.a $(OBJS) unac.o
|
||||||
@ -71,6 +71,8 @@ pathhash.o : ../rcldb/pathhash.cpp
|
|||||||
$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/pathhash.cpp
|
$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/pathhash.cpp
|
||||||
rcldb.o : ../rcldb/rcldb.cpp
|
rcldb.o : ../rcldb/rcldb.cpp
|
||||||
$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/rcldb.cpp
|
$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/rcldb.cpp
|
||||||
|
rclquery.o : ../rcldb/rclquery.cpp
|
||||||
|
$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/rclquery.cpp
|
||||||
searchdata.o : ../rcldb/searchdata.cpp
|
searchdata.o : ../rcldb/searchdata.cpp
|
||||||
$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/searchdata.cpp
|
$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/searchdata.cpp
|
||||||
stemdb.o : ../rcldb/stemdb.cpp
|
stemdb.o : ../rcldb/stemdb.cpp
|
||||||
@ -194,6 +196,9 @@ pathhash.dep.stamp : ../rcldb/pathhash.cpp
|
|||||||
rcldb.dep.stamp : ../rcldb/rcldb.cpp
|
rcldb.dep.stamp : ../rcldb/rcldb.cpp
|
||||||
$(CXX) -M $(ALL_CXXFLAGS) ../rcldb/rcldb.cpp > rcldb.dep
|
$(CXX) -M $(ALL_CXXFLAGS) ../rcldb/rcldb.cpp > rcldb.dep
|
||||||
touch rcldb.dep.stamp
|
touch rcldb.dep.stamp
|
||||||
|
rclquery.dep.stamp : ../rcldb/rclquery.cpp
|
||||||
|
$(CXX) -M $(ALL_CXXFLAGS) ../rcldb/rclquery.cpp > rclquery.dep
|
||||||
|
touch rclquery.dep.stamp
|
||||||
searchdata.dep.stamp : ../rcldb/searchdata.cpp
|
searchdata.dep.stamp : ../rcldb/searchdata.cpp
|
||||||
$(CXX) -M $(ALL_CXXFLAGS) ../rcldb/searchdata.cpp > searchdata.dep
|
$(CXX) -M $(ALL_CXXFLAGS) ../rcldb/searchdata.cpp > searchdata.dep
|
||||||
touch searchdata.dep.stamp
|
touch searchdata.dep.stamp
|
||||||
@ -275,6 +280,7 @@ include wasastringtoquery.dep
|
|||||||
include wasatorcl.dep
|
include wasatorcl.dep
|
||||||
include pathhash.dep
|
include pathhash.dep
|
||||||
include rcldb.dep
|
include rcldb.dep
|
||||||
|
include rclquery.dep
|
||||||
include searchdata.dep
|
include searchdata.dep
|
||||||
include stemdb.dep
|
include stemdb.dep
|
||||||
include stoplist.dep
|
include stoplist.dep
|
||||||
|
|||||||
@ -31,6 +31,7 @@ ${depth}/query/wasastringtoquery.cpp \
|
|||||||
${depth}/query/wasatorcl.cpp \
|
${depth}/query/wasatorcl.cpp \
|
||||||
${depth}/rcldb/pathhash.cpp \
|
${depth}/rcldb/pathhash.cpp \
|
||||||
${depth}/rcldb/rcldb.cpp \
|
${depth}/rcldb/rcldb.cpp \
|
||||||
|
${depth}/rcldb/rclquery.cpp \
|
||||||
${depth}/rcldb/searchdata.cpp \
|
${depth}/rcldb/searchdata.cpp \
|
||||||
${depth}/rcldb/stemdb.cpp \
|
${depth}/rcldb/stemdb.cpp \
|
||||||
${depth}/rcldb/stoplist.cpp \
|
${depth}/rcldb/stoplist.cpp \
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: pyrecoll.cpp,v 1.2 2008-05-27 10:45:59 dockes Exp $ (C) 2007 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: pyrecoll.cpp,v 1.3 2008-06-13 18:22:46 dockes Exp $ (C) 2007 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <Python.h>
|
#include <Python.h>
|
||||||
@ -11,6 +11,7 @@ using namespace std;
|
|||||||
#include "rclinit.h"
|
#include "rclinit.h"
|
||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
#include "rcldb.h"
|
#include "rcldb.h"
|
||||||
|
#include "rclquery.h"
|
||||||
#include "pathut.h"
|
#include "pathut.h"
|
||||||
#include "wasastringtoquery.h"
|
#include "wasastringtoquery.h"
|
||||||
#include "wasatorcl.h"
|
#include "wasatorcl.h"
|
||||||
@ -31,7 +32,7 @@ recollq_question(PyObject *self, PyObject *args)
|
|||||||
string reason;
|
string reason;
|
||||||
string dbdir = config->getDbDir();
|
string dbdir = config->getDbDir();
|
||||||
rcldb.open(dbdir, config->getStopfile(),
|
rcldb.open(dbdir, config->getStopfile(),
|
||||||
Rcl::Db::DbRO, Rcl::Db::QO_STEM);
|
Rcl::Db::DbRO);
|
||||||
|
|
||||||
Rcl::SearchData *sd = wasaStringToRcl(qs, reason);
|
Rcl::SearchData *sd = wasaStringToRcl(qs, reason);
|
||||||
if (!sd) {
|
if (!sd) {
|
||||||
@ -40,8 +41,9 @@ recollq_question(PyObject *self, PyObject *args)
|
|||||||
}
|
}
|
||||||
|
|
||||||
RefCntr<Rcl::SearchData> rq(sd);
|
RefCntr<Rcl::SearchData> rq(sd);
|
||||||
rcldb.setQuery(rq, Rcl::Db::QO_STEM);
|
RefCntr<Rcl::Query> query(new Rcl::Query(&rcldb));
|
||||||
int cnt = rcldb.getResCnt();
|
query->setQuery(rq, Rcl::Query::QO_STEM);
|
||||||
|
int cnt = query->getResCnt();
|
||||||
cout << "Recoll query: " << rq->getDescription() << endl;
|
cout << "Recoll query: " << rq->getDescription() << endl;
|
||||||
if (cnt <= limit)
|
if (cnt <= limit)
|
||||||
cout << cnt << " results" << endl;
|
cout << cnt << " results" << endl;
|
||||||
@ -51,7 +53,7 @@ recollq_question(PyObject *self, PyObject *args)
|
|||||||
for (int i = 0; i < limit; i++) {
|
for (int i = 0; i < limit; i++) {
|
||||||
int pc;
|
int pc;
|
||||||
Rcl::Doc doc;
|
Rcl::Doc doc;
|
||||||
if (!rcldb.getDoc(i, doc, &pc))
|
if (!query->getDoc(i, doc, &pc))
|
||||||
break;
|
break;
|
||||||
char cpc[20];
|
char cpc[20];
|
||||||
sprintf(cpc, "%d", pc);
|
sprintf(cpc, "%d", pc);
|
||||||
|
|||||||
@ -3,8 +3,6 @@ from distutils.core import setup, Extension
|
|||||||
module1 = Extension('recollq',
|
module1 = Extension('recollq',
|
||||||
define_macros = [('MAJOR_VERSION', '1'),
|
define_macros = [('MAJOR_VERSION', '1'),
|
||||||
('MINOR_VERSION', '0'),
|
('MINOR_VERSION', '0'),
|
||||||
('HAVE_MKDTEMP', '1'),
|
|
||||||
('HAVE_VASPRINTF', '1'),
|
|
||||||
('UNAC_VERSION', '"1.0.7"'),
|
('UNAC_VERSION', '"1.0.7"'),
|
||||||
('STATFS_INCLUDE', '"sys/mount.h"'),
|
('STATFS_INCLUDE', '"sys/mount.h"'),
|
||||||
('RECOLL_DATADIR',
|
('RECOLL_DATADIR',
|
||||||
@ -27,6 +25,7 @@ module1 = Extension('recollq',
|
|||||||
'../query/wasastringtoquery.cpp',
|
'../query/wasastringtoquery.cpp',
|
||||||
'../query/wasatorcl.cpp',
|
'../query/wasatorcl.cpp',
|
||||||
'../rcldb/rcldb.cpp',
|
'../rcldb/rcldb.cpp',
|
||||||
|
'../rcldb/rclquery.cpp',
|
||||||
'../rcldb/searchdata.cpp',
|
'../rcldb/searchdata.cpp',
|
||||||
'../rcldb/stemdb.cpp',
|
'../rcldb/stemdb.cpp',
|
||||||
'../rcldb/pathhash.cpp',
|
'../rcldb/pathhash.cpp',
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: main.cpp,v 1.66 2008-02-19 08:02:20 dockes Exp $ (C) 2005 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: main.cpp,v 1.67 2008-06-13 18:22:46 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -100,9 +100,6 @@ bool maybeOpenDb(string &reason, bool force)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
int qopts = Rcl::Db::QO_NONE;
|
|
||||||
if (prefs.queryStemLang.length() > 0)
|
|
||||||
qopts |= Rcl::Db::QO_STEM;
|
|
||||||
if (force)
|
if (force)
|
||||||
rcldb->close();
|
rcldb->close();
|
||||||
rcldb->rmQueryDb("");
|
rcldb->rmQueryDb("");
|
||||||
@ -112,7 +109,7 @@ bool maybeOpenDb(string &reason, bool force)
|
|||||||
rcldb->addQueryDb(*it);
|
rcldb->addQueryDb(*it);
|
||||||
}
|
}
|
||||||
if (!rcldb->isopen() && !rcldb->open(dbdir, rclconfig->getStopfile(),
|
if (!rcldb->isopen() && !rcldb->open(dbdir, rclconfig->getStopfile(),
|
||||||
Rcl::Db::DbRO, qopts)) {
|
Rcl::Db::DbRO)) {
|
||||||
reason = "Could not open database in " +
|
reason = "Could not open database in " +
|
||||||
dbdir + " wait for indexing to complete?";
|
dbdir + " wait for indexing to complete?";
|
||||||
return false;
|
return false;
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: rclmain_w.cpp,v 1.48 2008-02-19 08:02:01 dockes Exp $ (C) 2005 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: rclmain_w.cpp,v 1.49 2008-06-13 18:22:46 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -440,23 +440,25 @@ void RclMain::startSearch(RefCntr<Rcl::SearchData> sdata)
|
|||||||
|
|
||||||
int qopts = 0;
|
int qopts = 0;
|
||||||
if (!prefs.queryStemLang.length() == 0)
|
if (!prefs.queryStemLang.length() == 0)
|
||||||
qopts |= Rcl::Db::QO_STEM;
|
qopts |= Rcl::Query::QO_STEM;
|
||||||
QApplication::setOverrideCursor(QCursor(Qt::WaitCursor));
|
QApplication::setOverrideCursor(QCursor(Qt::WaitCursor));
|
||||||
|
|
||||||
string stemLang = (const char *)prefs.queryStemLang.ascii();
|
string stemLang = (const char *)prefs.queryStemLang.ascii();
|
||||||
if (stemLang == "ALL") {
|
if (stemLang == "ALL") {
|
||||||
rclconfig->getConfParam("indexstemminglanguages", stemLang);
|
rclconfig->getConfParam("indexstemminglanguages", stemLang);
|
||||||
}
|
}
|
||||||
|
Rcl::Query *query = new Rcl::Query(rcldb);
|
||||||
|
|
||||||
if (!rcldb->setQuery(sdata, qopts, stemLang)) {
|
if (!query || !query->setQuery(sdata, qopts, stemLang)) {
|
||||||
QMessageBox::warning(0, "Recoll", tr("Cant start query: ") +
|
QMessageBox::warning(0, "Recoll", tr("Cant start query: ") +
|
||||||
QString::fromAscii(rcldb->getReason().c_str()));
|
QString::fromAscii(query->getReason().c_str()));
|
||||||
QApplication::restoreOverrideCursor();
|
QApplication::restoreOverrideCursor();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
curPreview = 0;
|
curPreview = 0;
|
||||||
DocSequenceDb *src =
|
DocSequenceDb *src =
|
||||||
new DocSequenceDb(rcldb, string(tr("Query results").utf8()), sdata);
|
new DocSequenceDb(RefCntr<Rcl::Query>(query),
|
||||||
|
string(tr("Query results").utf8()), sdata);
|
||||||
m_docSource = RefCntr<DocSequence>(src);
|
m_docSource = RefCntr<DocSequence>(src);
|
||||||
m_searchData = sdata;
|
m_searchData = sdata;
|
||||||
setDocSequence();
|
setDocSequence();
|
||||||
@ -921,7 +923,8 @@ void RclMain::docExpand(int docnum)
|
|||||||
if (!resList->getDoc(docnum, doc))
|
if (!resList->getDoc(docnum, doc))
|
||||||
return;
|
return;
|
||||||
list<string> terms;
|
list<string> terms;
|
||||||
terms = rcldb->expand(doc);
|
if (!m_docSource.isNull())
|
||||||
|
terms = m_docSource->expand(doc);
|
||||||
if (terms.empty())
|
if (terms.empty())
|
||||||
return;
|
return;
|
||||||
// Do we keep the original query. I think we'd better not.
|
// Do we keep the original query. I think we'd better not.
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _DOCSEQ_H_INCLUDED_
|
#ifndef _DOCSEQ_H_INCLUDED_
|
||||||
#define _DOCSEQ_H_INCLUDED_
|
#define _DOCSEQ_H_INCLUDED_
|
||||||
/* @(#$Id: docseq.h,v 1.12 2007-06-19 08:36:24 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: docseq.h,v 1.13 2008-06-13 18:22:46 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@ -89,7 +89,7 @@ class DocSequence {
|
|||||||
vector<int>& gslks) const {
|
vector<int>& gslks) const {
|
||||||
terms.clear(); groups.clear(); gslks.clear(); return true;
|
terms.clear(); groups.clear(); gslks.clear(); return true;
|
||||||
}
|
}
|
||||||
|
virtual list<string> expand(Rcl::Doc &) {list<string> e; return e;}
|
||||||
private:
|
private:
|
||||||
string m_title;
|
string m_title;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: docseqdb.cpp,v 1.3 2007-06-19 08:36:24 dockes Exp $ (C) 2005 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: docseqdb.cpp,v 1.4 2008-06-13 18:22:46 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -23,28 +23,53 @@ static char rcsid[] = "@(#$Id: docseqdb.cpp,v 1.3 2007-06-19 08:36:24 dockes Exp
|
|||||||
#include "docseqdb.h"
|
#include "docseqdb.h"
|
||||||
#include "rcldb.h"
|
#include "rcldb.h"
|
||||||
|
|
||||||
|
DocSequenceDb::DocSequenceDb(RefCntr<Rcl::Query> q, const string &t,
|
||||||
|
RefCntr<Rcl::SearchData> sdata)
|
||||||
|
: DocSequence(t), m_q(q), m_sdata(sdata), m_rescnt(-1)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
DocSequenceDb::~DocSequenceDb()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DocSequenceDb::getTerms(vector<string>& terms,
|
||||||
|
vector<vector<string> >& groups,
|
||||||
|
vector<int>& gslks) const
|
||||||
|
{
|
||||||
|
return m_sdata.getptr()->getTerms(terms, groups, gslks);
|
||||||
|
}
|
||||||
|
|
||||||
|
string DocSequenceDb::getDescription()
|
||||||
|
{
|
||||||
|
return m_sdata->getDescription();
|
||||||
|
}
|
||||||
|
|
||||||
bool DocSequenceDb::getDoc(int num, Rcl::Doc &doc, int *percent, string *sh)
|
bool DocSequenceDb::getDoc(int num, Rcl::Doc &doc, int *percent, string *sh)
|
||||||
{
|
{
|
||||||
if (sh) sh->erase();
|
if (sh) sh->erase();
|
||||||
return m_db ? m_db->getDoc(num, doc, percent) : false;
|
return m_q->getDoc(num, doc, percent);
|
||||||
}
|
}
|
||||||
|
|
||||||
int DocSequenceDb::getResCnt()
|
int DocSequenceDb::getResCnt()
|
||||||
{
|
{
|
||||||
if (!m_db)
|
|
||||||
return -1;
|
|
||||||
if (m_rescnt < 0) {
|
if (m_rescnt < 0) {
|
||||||
m_rescnt= m_db->getResCnt();
|
m_rescnt= m_q->getResCnt();
|
||||||
}
|
}
|
||||||
return m_rescnt;
|
return m_rescnt;
|
||||||
}
|
}
|
||||||
|
|
||||||
string DocSequenceDb::getAbstract(Rcl::Doc &doc)
|
string DocSequenceDb::getAbstract(Rcl::Doc &doc)
|
||||||
{
|
{
|
||||||
if (!m_db)
|
if (!m_q->whatDb())
|
||||||
return doc.meta["abstract"];
|
return doc.meta["abstract"];
|
||||||
string abstract;
|
string abstract;
|
||||||
m_db->makeDocAbstract(doc, abstract);
|
m_q->whatDb()->makeDocAbstract(doc, m_q.getptr(), abstract);
|
||||||
return abstract.empty() ? doc.meta["abstract"] : abstract;
|
return abstract.empty() ? doc.meta["abstract"] : abstract;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
list<string> DocSequenceDb::expand(Rcl::Doc &doc)
|
||||||
|
{
|
||||||
|
return m_q->expand(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@ -16,33 +16,30 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _DOCSEQDB_H_INCLUDED_
|
#ifndef _DOCSEQDB_H_INCLUDED_
|
||||||
#define _DOCSEQDB_H_INCLUDED_
|
#define _DOCSEQDB_H_INCLUDED_
|
||||||
/* @(#$Id: docseqdb.h,v 1.2 2007-01-19 15:22:50 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: docseqdb.h,v 1.3 2008-06-13 18:22:46 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
#include "docseq.h"
|
#include "docseq.h"
|
||||||
#include "refcntr.h"
|
#include "refcntr.h"
|
||||||
|
|
||||||
#include "searchdata.h"
|
#include "searchdata.h"
|
||||||
|
#include "rclquery.h"
|
||||||
|
|
||||||
/** A DocSequence from a Db query (there should be one active for this
|
/** A DocSequence from a Db query (there should be one active for this
|
||||||
to make sense) */
|
to make sense) */
|
||||||
class DocSequenceDb : public DocSequence {
|
class DocSequenceDb : public DocSequence {
|
||||||
public:
|
public:
|
||||||
DocSequenceDb(Rcl::Db *d, const string &t, RefCntr<Rcl::SearchData> sdata)
|
DocSequenceDb(RefCntr<Rcl::Query> q, const string &t,
|
||||||
: DocSequence(t), m_db(d), m_sdata(sdata), m_rescnt(-1)
|
RefCntr<Rcl::SearchData> sdata);
|
||||||
{}
|
virtual ~DocSequenceDb();
|
||||||
virtual ~DocSequenceDb() {}
|
|
||||||
virtual bool getDoc(int num, Rcl::Doc &doc, int *percent, string * = 0);
|
virtual bool getDoc(int num, Rcl::Doc &doc, int *percent, string * = 0);
|
||||||
virtual int getResCnt();
|
virtual int getResCnt();
|
||||||
virtual bool getTerms(vector<string>& terms,
|
virtual bool getTerms(vector<string>& terms,
|
||||||
vector<vector<string> >& groups,
|
vector<vector<string> >& groups,
|
||||||
vector<int>& gslks) const {
|
vector<int>& gslks) const;
|
||||||
return m_sdata.getptr()->getTerms(terms, groups, gslks);
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual string getAbstract(Rcl::Doc &doc);
|
virtual string getAbstract(Rcl::Doc &doc);
|
||||||
virtual string getDescription() {return m_sdata->getDescription();}
|
virtual string getDescription();
|
||||||
|
virtual list<string> expand(Rcl::Doc &doc);
|
||||||
private:
|
private:
|
||||||
Rcl::Db *m_db;
|
RefCntr<Rcl::Query> m_q;
|
||||||
RefCntr<Rcl::SearchData> m_sdata;
|
RefCntr<Rcl::SearchData> m_sdata;
|
||||||
int m_rescnt;
|
int m_rescnt;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: recollq.cpp,v 1.12 2007-12-13 06:58:21 dockes Exp $ (C) 2006 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: recollq.cpp,v 1.13 2008-06-13 18:22:46 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -32,6 +32,7 @@ static char rcsid[] = "@(#$Id: recollq.cpp,v 1.12 2007-12-13 06:58:21 dockes Exp
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#include "rcldb.h"
|
#include "rcldb.h"
|
||||||
|
#include "rclquery.h"
|
||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
#include "pathut.h"
|
#include "pathut.h"
|
||||||
#include "rclinit.h"
|
#include "rclinit.h"
|
||||||
@ -132,8 +133,7 @@ int recollq(RclConfig **cfp, int argc, char **argv)
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
dbdir = rclconfig->getDbDir();
|
dbdir = rclconfig->getDbDir();
|
||||||
rcldb.open(dbdir, rclconfig->getStopfile(),
|
rcldb.open(dbdir, rclconfig->getStopfile(), Rcl::Db::DbRO);
|
||||||
Rcl::Db::DbRO, Rcl::Db::QO_STEM);
|
|
||||||
|
|
||||||
Rcl::SearchData *sd = 0;
|
Rcl::SearchData *sd = 0;
|
||||||
|
|
||||||
@ -166,8 +166,9 @@ int recollq(RclConfig **cfp, int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
RefCntr<Rcl::SearchData> rq(sd);
|
RefCntr<Rcl::SearchData> rq(sd);
|
||||||
rcldb.setQuery(rq, Rcl::Db::QO_STEM);
|
Rcl::Query query(&rcldb);
|
||||||
int cnt = rcldb.getResCnt();
|
query.setQuery(rq, Rcl::Query::QO_STEM);
|
||||||
|
int cnt = query.getResCnt();
|
||||||
if (!(op_flags & OPT_b)) {
|
if (!(op_flags & OPT_b)) {
|
||||||
cout << "Recoll query: " << rq->getDescription() << endl;
|
cout << "Recoll query: " << rq->getDescription() << endl;
|
||||||
if (cnt <= limit)
|
if (cnt <= limit)
|
||||||
@ -180,7 +181,7 @@ int recollq(RclConfig **cfp, int argc, char **argv)
|
|||||||
for (int i = 0; i < limit; i++) {
|
for (int i = 0; i < limit; i++) {
|
||||||
int pc;
|
int pc;
|
||||||
Rcl::Doc doc;
|
Rcl::Doc doc;
|
||||||
if (!rcldb.getDoc(i, doc, &pc))
|
if (!query.getDoc(i, doc, &pc))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (op_flags & OPT_b) {
|
if (op_flags & OPT_b) {
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.132 2008-05-20 10:09:54 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.133 2008-06-13 18:22:46 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -36,6 +36,7 @@ using namespace std;
|
|||||||
|
|
||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
#include "rcldb.h"
|
#include "rcldb.h"
|
||||||
|
#include "rcldb_p.h"
|
||||||
#include "stemdb.h"
|
#include "stemdb.h"
|
||||||
#include "textsplit.h"
|
#include "textsplit.h"
|
||||||
#include "transcode.h"
|
#include "transcode.h"
|
||||||
@ -47,8 +48,9 @@ using namespace std;
|
|||||||
#include "pathhash.h"
|
#include "pathhash.h"
|
||||||
#include "utf8iter.h"
|
#include "utf8iter.h"
|
||||||
#include "searchdata.h"
|
#include "searchdata.h"
|
||||||
|
#include "rclquery.h"
|
||||||
|
#include "rclquery_p.h"
|
||||||
|
|
||||||
#include "xapian.h"
|
|
||||||
|
|
||||||
#ifndef MAX
|
#ifndef MAX
|
||||||
#define MAX(A,B) (A>B?A:B)
|
#define MAX(A,B) (A>B?A:B)
|
||||||
@ -88,125 +90,8 @@ namespace Rcl {
|
|||||||
const static string rclSyntAbs = "?!#@";
|
const static string rclSyntAbs = "?!#@";
|
||||||
const static string emptystring;
|
const static string emptystring;
|
||||||
|
|
||||||
// A class for data and methods that would have to expose
|
|
||||||
// Xapian-specific stuff if they were in Rcl::Db. There could actually be
|
|
||||||
// 2 different ones for indexing or query as there is not much in
|
|
||||||
// common.
|
|
||||||
class Native {
|
|
||||||
public:
|
|
||||||
Db *m_db;
|
|
||||||
bool m_isopen;
|
|
||||||
bool m_iswritable;
|
|
||||||
|
|
||||||
// Indexing
|
|
||||||
Xapian::WritableDatabase wdb;
|
|
||||||
|
|
||||||
// Querying
|
|
||||||
Xapian::Database db;
|
|
||||||
Xapian::Query query; // query descriptor: terms and subqueries
|
|
||||||
// joined by operators (or/and etc...)
|
|
||||||
|
|
||||||
// Filtering results on location. There are 2 possible approaches
|
|
||||||
// for this:
|
|
||||||
// - Set a "MatchDecider" to be used by Xapian during the query
|
|
||||||
// - Filter the results out of Xapian (this also uses a
|
|
||||||
// Xapian::MatchDecider object, but applied to the results by Recoll.
|
|
||||||
//
|
|
||||||
// The result filtering approach was the first implemented.
|
|
||||||
//
|
|
||||||
// The efficiency of both methods depend on the searches, so the code
|
|
||||||
// for both has been kept. A nice point for the Xapian approach is that
|
|
||||||
// the result count estimate are correct (they are wrong with
|
|
||||||
// the postfilter approach). It is also faster in some worst case scenarios
|
|
||||||
// so this now the default (but the post-filtering is faster in many common
|
|
||||||
// cases).
|
|
||||||
//
|
|
||||||
// Which is used is decided in SetQuery(), by setting either of
|
|
||||||
// the two following members. This in turn is controlled by a
|
|
||||||
// preprocessor directive.
|
|
||||||
|
|
||||||
#define XAPIAN_FILTERING 1
|
|
||||||
|
|
||||||
Xapian::MatchDecider *decider; // Xapian does the filtering
|
|
||||||
Xapian::MatchDecider *postfilter; // Result filtering done by Recoll
|
|
||||||
|
|
||||||
Xapian::Enquire *enquire; // Open query descriptor.
|
|
||||||
Xapian::MSet mset; // Partial result set
|
|
||||||
|
|
||||||
// Term frequencies for current query. See makeAbstract, setQuery
|
|
||||||
map<string, double> m_termfreqs;
|
|
||||||
|
|
||||||
Native(Db *db)
|
|
||||||
: m_db(db),
|
|
||||||
m_isopen(false), m_iswritable(false), decider(0), postfilter(0),
|
|
||||||
enquire(0)
|
|
||||||
{ }
|
|
||||||
|
|
||||||
~Native() {
|
|
||||||
delete decider;
|
|
||||||
delete postfilter;
|
|
||||||
delete enquire;
|
|
||||||
}
|
|
||||||
|
|
||||||
string makeAbstract(Xapian::docid id, const list<string>& terms);
|
|
||||||
|
|
||||||
bool dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc);
|
|
||||||
|
|
||||||
/** Compute list of subdocuments for a given path (given by hash)
|
|
||||||
* We look for all Q terms beginning with the path/hash
|
|
||||||
* As suggested by James Aylett, a better method would be to add
|
|
||||||
* a single term (ie: XP/path/to/file) to all subdocs, then finding
|
|
||||||
* them would be a simple matter of retrieving the posting list for the
|
|
||||||
* term. There would still be a need for the current Qterm though, as a
|
|
||||||
* unique term for replace_document, and for retrieving by
|
|
||||||
* path/ipath (history)
|
|
||||||
*/
|
|
||||||
bool subDocs(const string &hash, vector<Xapian::docid>& docids);
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
class FilterMatcher : public Xapian::MatchDecider {
|
|
||||||
public:
|
|
||||||
FilterMatcher(const string &topdir)
|
|
||||||
: m_topdir(topdir)
|
|
||||||
{}
|
|
||||||
virtual ~FilterMatcher() {}
|
|
||||||
|
|
||||||
virtual
|
|
||||||
#if XAPIAN_MAJOR_VERSION < 1
|
|
||||||
int
|
|
||||||
#else
|
|
||||||
bool
|
|
||||||
#endif
|
|
||||||
operator()(const Xapian::Document &xdoc) const
|
|
||||||
{
|
|
||||||
m_cnt++;
|
|
||||||
// Parse xapian document's data and populate doc fields
|
|
||||||
string data = xdoc.get_data();
|
|
||||||
ConfSimple parms(&data);
|
|
||||||
|
|
||||||
// The only filtering for now is on file path (subtree)
|
|
||||||
string url;
|
|
||||||
parms.get(string("url"), url);
|
|
||||||
LOGDEB2(("FilterMatcher topdir [%s] url [%s]\n",
|
|
||||||
m_topdir.c_str(), url.c_str()));
|
|
||||||
if (url.find(m_topdir, 7) == 7) {
|
|
||||||
LOGDEB2(("FilterMatcher: MATCH %d\n", m_cnt));
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
LOGDEB2(("FilterMatcher: NO MATCH %d\n", m_cnt));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
static int m_cnt;
|
|
||||||
|
|
||||||
private:
|
|
||||||
string m_topdir;
|
|
||||||
};
|
|
||||||
int FilterMatcher::m_cnt;
|
|
||||||
|
|
||||||
/* See comment in class declaration */
|
/* See comment in class declaration */
|
||||||
bool Native::subDocs(const string &hash, vector<Xapian::docid>& docids)
|
bool Db::Native::subDocs(const string &hash, vector<Xapian::docid>& docids)
|
||||||
{
|
{
|
||||||
docids.clear();
|
docids.clear();
|
||||||
string qterm = "Q"+ hash + "|";
|
string qterm = "Q"+ hash + "|";
|
||||||
@ -250,7 +135,7 @@ bool Native::subDocs(const string &hash, vector<Xapian::docid>& docids)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Turn data record from db into document fields
|
// Turn data record from db into document fields
|
||||||
bool Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc)
|
bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc)
|
||||||
{
|
{
|
||||||
LOGDEB1(("Db::dbDataToRclDoc: data: %s\n", data.c_str()));
|
LOGDEB1(("Db::dbDataToRclDoc: data: %s\n", data.c_str()));
|
||||||
ConfSimple parms(&data);
|
ConfSimple parms(&data);
|
||||||
@ -306,26 +191,29 @@ static list<string> noPrefixList(const list<string>& in)
|
|||||||
|
|
||||||
// Build a document abstract by extracting text chunks around the query terms
|
// Build a document abstract by extracting text chunks around the query terms
|
||||||
// This uses the db termlists, not the original document.
|
// This uses the db termlists, not the original document.
|
||||||
string Native::makeAbstract(Xapian::docid docid, const list<string>& iterms)
|
string Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
||||||
{
|
{
|
||||||
Chrono chron;
|
Chrono chron;
|
||||||
LOGDEB(("makeAbstract:%d: maxlen %d wWidth %d\n", chron.ms(),
|
LOGDEB(("makeAbstract:%d: maxlen %d wWidth %d\n", chron.ms(),
|
||||||
m_db->m_synthAbsLen, m_db->m_synthAbsWordCtxLen));
|
m_db->m_synthAbsLen, m_db->m_synthAbsWordCtxLen));
|
||||||
|
|
||||||
|
list<string> iterms;
|
||||||
|
query->getQueryTerms(iterms);
|
||||||
|
|
||||||
list<string> terms = noPrefixList(iterms);
|
list<string> terms = noPrefixList(iterms);
|
||||||
if (terms.empty()) {
|
if (terms.empty()) {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
// Retrieve db-wide frequencies for the query terms
|
// Retrieve db-wide frequencies for the query terms
|
||||||
if (m_termfreqs.empty()) {
|
if (query->m_nq->termfreqs.empty()) {
|
||||||
double doccnt = db.get_doccount();
|
double doccnt = db.get_doccount();
|
||||||
if (doccnt == 0) doccnt = 1;
|
if (doccnt == 0) doccnt = 1;
|
||||||
for (list<string>::const_iterator qit = terms.begin();
|
for (list<string>::const_iterator qit = terms.begin();
|
||||||
qit != terms.end(); qit++) {
|
qit != terms.end(); qit++) {
|
||||||
m_termfreqs[*qit] = db.get_termfreq(*qit) / doccnt;
|
query->m_nq->termfreqs[*qit] = db.get_termfreq(*qit) / doccnt;
|
||||||
LOGABS(("makeAbstract: [%s] db freq %.1e\n", qit->c_str(),
|
LOGABS(("makeAbstract: [%s] db freq %.1e\n", qit->c_str(),
|
||||||
m_termfreqs[*qit]));
|
query->m_nq->termfreqs[*qit]));
|
||||||
}
|
}
|
||||||
LOGABS(("makeAbstract:%d: got termfreqs\n", chron.ms()));
|
LOGABS(("makeAbstract:%d: got termfreqs\n", chron.ms()));
|
||||||
}
|
}
|
||||||
@ -343,7 +231,7 @@ string Native::makeAbstract(Xapian::docid docid, const list<string>& iterms)
|
|||||||
Xapian::TermIterator term = db.termlist_begin(docid);
|
Xapian::TermIterator term = db.termlist_begin(docid);
|
||||||
term.skip_to(*qit);
|
term.skip_to(*qit);
|
||||||
if (term != db.termlist_end(docid) && *term == *qit) {
|
if (term != db.termlist_end(docid) && *term == *qit) {
|
||||||
double q = (term.get_wdf() / doclen) * m_termfreqs[*qit];
|
double q = (term.get_wdf() / doclen) * query->m_nq->termfreqs[*qit];
|
||||||
q = -log10(q);
|
q = -log10(q);
|
||||||
if (q < 3) {
|
if (q < 3) {
|
||||||
q = 0.05;
|
q = 0.05;
|
||||||
@ -556,7 +444,7 @@ string Native::makeAbstract(Xapian::docid docid, const list<string>& iterms)
|
|||||||
/* Rcl::Db methods ///////////////////////////////// */
|
/* Rcl::Db methods ///////////////////////////////// */
|
||||||
|
|
||||||
Db::Db()
|
Db::Db()
|
||||||
: m_ndb(0), m_qOpts(QO_NONE), m_idxAbsTruncLen(250), m_synthAbsLen(250),
|
: m_ndb(0), m_idxAbsTruncLen(250), m_synthAbsLen(250),
|
||||||
m_synthAbsWordCtxLen(4), m_flushMb(-1),
|
m_synthAbsWordCtxLen(4), m_flushMb(-1),
|
||||||
m_curtxtsz(0), m_flushtxtsz(0), m_occtxtsz(0),
|
m_curtxtsz(0), m_flushtxtsz(0), m_occtxtsz(0),
|
||||||
m_maxFsOccupPc(0), m_mode(Db::DbRO)
|
m_maxFsOccupPc(0), m_mode(Db::DbRO)
|
||||||
@ -586,28 +474,9 @@ Db::~Db()
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generic Xapian exception catching code. We do this quite often,
|
bool Db::open(const string& dir, const string &stops, OpenMode mode,
|
||||||
// and I have no idea how to do this except for a macro
|
bool keep_updated)
|
||||||
#define XCATCHERROR(MSG) \
|
|
||||||
catch (const Xapian::Error &e) { \
|
|
||||||
MSG = e.get_msg(); \
|
|
||||||
if (MSG.empty()) MSG = "Empty error message"; \
|
|
||||||
} catch (const string &s) { \
|
|
||||||
MSG = s; \
|
|
||||||
if (MSG.empty()) MSG = "Empty error message"; \
|
|
||||||
} catch (const char *s) { \
|
|
||||||
MSG = s; \
|
|
||||||
if (MSG.empty()) MSG = "Empty error message"; \
|
|
||||||
} catch (...) { \
|
|
||||||
MSG = "Caught unknown xapian exception"; \
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
bool Db::open(const string& dir, const string &stops, OpenMode mode, int qops)
|
|
||||||
{
|
{
|
||||||
bool keep_updated = (qops & QO_KEEP_UPDATED) != 0;
|
|
||||||
qops &= ~QO_KEEP_UPDATED;
|
|
||||||
|
|
||||||
if (m_ndb == 0)
|
if (m_ndb == 0)
|
||||||
return false;
|
return false;
|
||||||
LOGDEB(("Db::open: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen,
|
LOGDEB(("Db::open: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen,
|
||||||
@ -724,7 +593,7 @@ bool Db::reOpen()
|
|||||||
if (m_ndb && m_ndb->m_isopen) {
|
if (m_ndb && m_ndb->m_isopen) {
|
||||||
if (!close())
|
if (!close())
|
||||||
return false;
|
return false;
|
||||||
if (!open(m_basedir, "", m_mode, m_qOpts | QO_KEEP_UPDATED)) {
|
if (!open(m_basedir, "", m_mode, true)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1467,64 +1336,6 @@ bool Db::filenameWildExp(const string& fnexp, list<string>& names)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prepare query out of user search data
|
|
||||||
bool Db::setQuery(RefCntr<SearchData> sdata, int opts,
|
|
||||||
const string& stemlang)
|
|
||||||
{
|
|
||||||
if (!m_ndb) {
|
|
||||||
LOGERR(("Db::setQuery: no db!\n"));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
m_reason.erase();
|
|
||||||
LOGDEB(("Db::setQuery:\n"));
|
|
||||||
|
|
||||||
m_filterTopDir = sdata->getTopdir();
|
|
||||||
deleteZ(m_ndb->decider);
|
|
||||||
deleteZ(m_ndb->postfilter);
|
|
||||||
if (!m_filterTopDir.empty()) {
|
|
||||||
#if XAPIAN_FILTERING
|
|
||||||
m_ndb->decider =
|
|
||||||
#else
|
|
||||||
m_ndb->postfilter =
|
|
||||||
#endif
|
|
||||||
new FilterMatcher(m_filterTopDir);
|
|
||||||
}
|
|
||||||
m_dbindices.clear();
|
|
||||||
m_qOpts = opts;
|
|
||||||
m_ndb->m_termfreqs.clear();
|
|
||||||
FilterMatcher::m_cnt = 0;
|
|
||||||
Xapian::Query xq;
|
|
||||||
if (!sdata->toNativeQuery(*this, &xq,
|
|
||||||
(opts & Db::QO_STEM) ? stemlang : "")) {
|
|
||||||
m_reason += sdata->getReason();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
m_ndb->query = xq;
|
|
||||||
string ermsg;
|
|
||||||
string d;
|
|
||||||
try {
|
|
||||||
delete m_ndb->enquire;
|
|
||||||
m_ndb->enquire = new Xapian::Enquire(m_ndb->db);
|
|
||||||
m_ndb->enquire->set_query(m_ndb->query);
|
|
||||||
m_ndb->mset = Xapian::MSet();
|
|
||||||
// Get the query description and trim the "Xapian::Query"
|
|
||||||
d = m_ndb->query.get_description();
|
|
||||||
} XCATCHERROR(ermsg);
|
|
||||||
if (!ermsg.empty()) {
|
|
||||||
LOGDEB(("Db::SetQuery: xapian error %s\n", ermsg.c_str()));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (d.find("Xapian::Query") == 0)
|
|
||||||
d.erase(0, strlen("Xapian::Query"));
|
|
||||||
if (!m_filterTopDir.empty()) {
|
|
||||||
d += string(" [dir: ") + m_filterTopDir + "]";
|
|
||||||
}
|
|
||||||
sdata->setDescription(d);
|
|
||||||
LOGDEB(("Db::SetQuery: Q: %s\n", sdata->getDescription().c_str()));
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
class TermMatchCmpByWcf {
|
class TermMatchCmpByWcf {
|
||||||
public:
|
public:
|
||||||
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
|
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
|
||||||
@ -1735,195 +1546,15 @@ bool Db::stemDiffers(const string& lang, const string& word,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Db::getQueryTerms(list<string>& terms)
|
|
||||||
{
|
|
||||||
if (!m_ndb)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
terms.clear();
|
bool Db::makeDocAbstract(Doc &doc, Query *query, string& abstract)
|
||||||
Xapian::TermIterator it;
|
|
||||||
string ermsg;
|
|
||||||
try {
|
|
||||||
for (it = m_ndb->query.get_terms_begin();
|
|
||||||
it != m_ndb->query.get_terms_end(); it++) {
|
|
||||||
terms.push_back(*it);
|
|
||||||
}
|
|
||||||
} XCATCHERROR(ermsg);
|
|
||||||
if (!ermsg.empty()) {
|
|
||||||
LOGERR(("getQueryTerms: xapian error: %s\n", ermsg.c_str()));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Db::getMatchTerms(const Doc& doc, list<string>& terms)
|
|
||||||
{
|
|
||||||
if (!m_ndb || !m_ndb->enquire) {
|
|
||||||
LOGERR(("Db::getMatchTerms: no query opened\n"));
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
terms.clear();
|
|
||||||
Xapian::TermIterator it;
|
|
||||||
Xapian::docid id = Xapian::docid(doc.xdocid);
|
|
||||||
string ermsg;
|
|
||||||
try {
|
|
||||||
for (it=m_ndb->enquire->get_matching_terms_begin(id);
|
|
||||||
it != m_ndb->enquire->get_matching_terms_end(id); it++) {
|
|
||||||
terms.push_back(*it);
|
|
||||||
}
|
|
||||||
} XCATCHERROR(ermsg);
|
|
||||||
if (!ermsg.empty()) {
|
|
||||||
LOGERR(("getQueryTerms: xapian error: %s\n", ermsg.c_str()));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mset size
|
|
||||||
static const int qquantum = 30;
|
|
||||||
|
|
||||||
int Db::getResCnt()
|
|
||||||
{
|
|
||||||
if (!m_ndb || !m_ndb->enquire) {
|
|
||||||
LOGERR(("Db::getResCnt: no query opened\n"));
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
string ermsg;
|
|
||||||
if (m_ndb->mset.size() <= 0) {
|
|
||||||
try {
|
|
||||||
m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum,
|
|
||||||
0, m_ndb->decider);
|
|
||||||
} catch (const Xapian::DatabaseModifiedError &error) {
|
|
||||||
m_ndb->db.reopen();
|
|
||||||
m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum,
|
|
||||||
0, m_ndb->decider);
|
|
||||||
} XCATCHERROR(ermsg);
|
|
||||||
if (!ermsg.empty()) {
|
|
||||||
LOGERR(("enquire->get_mset: exception: %s\n", ermsg.c_str()));
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
int ret = -1;
|
|
||||||
try {
|
|
||||||
ret = m_ndb->mset.get_matches_lower_bound();
|
|
||||||
} catch (...) {}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Get document at rank i in query (i is the index in the whole result
|
|
||||||
// set, as in the enquire class. We check if the current mset has the
|
|
||||||
// doc, else ask for an other one. We use msets of 10 documents. Don't
|
|
||||||
// know if the whole thing makes sense at all but it seems to work.
|
|
||||||
//
|
|
||||||
// If there is a postquery filter (ie: file names), we have to
|
|
||||||
// maintain a correspondance from the sequential external index
|
|
||||||
// sequence to the internal Xapian hole-y one (the holes being the documents
|
|
||||||
// that dont match the filter).
|
|
||||||
bool Db::getDoc(int exti, Doc &doc, int *percent)
|
|
||||||
{
|
|
||||||
LOGDEB1(("Db::getDoc: exti %d\n", exti));
|
|
||||||
if (!m_ndb || !m_ndb->enquire) {
|
|
||||||
LOGERR(("Db::getDoc: no query opened\n"));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
int xapi;
|
|
||||||
if (m_ndb->postfilter) {
|
|
||||||
// There is a postquery filter, does this fall in already known area ?
|
|
||||||
if (exti >= (int)m_dbindices.size()) {
|
|
||||||
// Have to fetch xapian docs and filter until we get
|
|
||||||
// enough or fail
|
|
||||||
m_dbindices.reserve(exti+1);
|
|
||||||
// First xapian doc we fetch is the one after last stored
|
|
||||||
int first = m_dbindices.size() > 0 ? m_dbindices.back() + 1 : 0;
|
|
||||||
// Loop until we get enough docs
|
|
||||||
while (exti >= (int)m_dbindices.size()) {
|
|
||||||
LOGDEB(("Db::getDoc: fetching %d starting at %d\n",
|
|
||||||
qquantum, first));
|
|
||||||
try {
|
|
||||||
m_ndb->mset = m_ndb->enquire->get_mset(first, qquantum);
|
|
||||||
} catch (const Xapian::DatabaseModifiedError &error) {
|
|
||||||
m_ndb->db.reopen();
|
|
||||||
m_ndb->mset = m_ndb->enquire->get_mset(first, qquantum);
|
|
||||||
} catch (const Xapian::Error & error) {
|
|
||||||
LOGERR(("enquire->get_mset: exception: %s\n",
|
|
||||||
error.get_msg().c_str()));
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (m_ndb->mset.empty()) {
|
|
||||||
LOGDEB(("Db::getDoc: got empty mset\n"));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
first = m_ndb->mset.get_firstitem();
|
|
||||||
for (unsigned int i = 0; i < m_ndb->mset.size() ; i++) {
|
|
||||||
LOGDEB(("Db::getDoc: [%d]\n", i));
|
|
||||||
Xapian::Document xdoc = m_ndb->mset[i].get_document();
|
|
||||||
if ((*m_ndb->postfilter)(xdoc)) {
|
|
||||||
m_dbindices.push_back(first + i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
first = first + m_ndb->mset.size();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
xapi = m_dbindices[exti];
|
|
||||||
} else {
|
|
||||||
xapi = exti;
|
|
||||||
}
|
|
||||||
|
|
||||||
// From there on, we work with a xapian enquire item number. Fetch it
|
|
||||||
int first = m_ndb->mset.get_firstitem();
|
|
||||||
int last = first + m_ndb->mset.size() -1;
|
|
||||||
|
|
||||||
if (!(xapi >= first && xapi <= last)) {
|
|
||||||
LOGDEB(("Fetching for first %d, count %d\n", xapi, qquantum));
|
|
||||||
try {
|
|
||||||
m_ndb->mset = m_ndb->enquire->get_mset(xapi, qquantum,
|
|
||||||
0, m_ndb->decider);
|
|
||||||
} catch (const Xapian::DatabaseModifiedError &error) {
|
|
||||||
m_ndb->db.reopen();
|
|
||||||
m_ndb->mset = m_ndb->enquire->get_mset(xapi, qquantum,
|
|
||||||
0, m_ndb->decider);
|
|
||||||
|
|
||||||
} catch (const Xapian::Error & error) {
|
|
||||||
LOGERR(("enquire->get_mset: exception: %s\n",
|
|
||||||
error.get_msg().c_str()));
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
if (m_ndb->mset.empty())
|
|
||||||
return false;
|
|
||||||
first = m_ndb->mset.get_firstitem();
|
|
||||||
last = first + m_ndb->mset.size() -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
LOGDEB1(("Db::getDoc: Qry [%s] win [%d-%d] Estimated results: %d",
|
|
||||||
m_ndb->query.get_description().c_str(),
|
|
||||||
first, last,
|
|
||||||
m_ndb->mset.get_matches_lower_bound()));
|
|
||||||
|
|
||||||
Xapian::Document xdoc = m_ndb->mset[xapi-first].get_document();
|
|
||||||
Xapian::docid docid = *(m_ndb->mset[xapi-first]);
|
|
||||||
if (percent)
|
|
||||||
*percent = m_ndb->mset.convert_to_percent(m_ndb->mset[xapi-first]);
|
|
||||||
|
|
||||||
// Parse xapian document's data and populate doc fields
|
|
||||||
string data = xdoc.get_data();
|
|
||||||
return m_ndb->dbDataToRclDoc(docid, data, doc);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Db::makeDocAbstract(Doc &doc, string& abstract)
|
|
||||||
{
|
{
|
||||||
LOGDEB1(("Db::makeDocAbstract: exti %d\n", exti));
|
LOGDEB1(("Db::makeDocAbstract: exti %d\n", exti));
|
||||||
if (!m_ndb || !m_ndb->enquire) {
|
if (!m_ndb) {
|
||||||
LOGERR(("Db::makeDocAbstract: no query opened\n"));
|
LOGERR(("Db::makeDocAbstract: no db\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
list<string> terms;
|
abstract = m_ndb->makeAbstract(doc.xdocid, query);
|
||||||
getQueryTerms(terms);
|
|
||||||
abstract = m_ndb->makeAbstract(doc.xdocid, terms);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1969,45 +1600,6 @@ bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
list<string> Db::expand(const Doc &doc)
|
|
||||||
{
|
|
||||||
list<string> res;
|
|
||||||
if (!m_ndb || !m_ndb->enquire) {
|
|
||||||
LOGERR(("Db::expand: no query opened\n"));
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
string ermsg;
|
|
||||||
for (int tries = 0; tries < 2; tries++) {
|
|
||||||
try {
|
|
||||||
Xapian::RSet rset;
|
|
||||||
rset.add_document(Xapian::docid(doc.xdocid));
|
|
||||||
// We don't exclude the original query terms.
|
|
||||||
Xapian::ESet eset = m_ndb->enquire->get_eset(20, rset, false);
|
|
||||||
LOGDEB(("ESet terms:\n"));
|
|
||||||
// We filter out the special terms
|
|
||||||
for (Xapian::ESetIterator it = eset.begin();
|
|
||||||
it != eset.end(); it++) {
|
|
||||||
LOGDEB((" [%s]\n", (*it).c_str()));
|
|
||||||
if ((*it).empty() || ((*it).at(0)>='A' && (*it).at(0)<='Z'))
|
|
||||||
continue;
|
|
||||||
res.push_back(*it);
|
|
||||||
if (res.size() >= 10)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} catch (const Xapian::DatabaseModifiedError &error) {
|
|
||||||
continue;
|
|
||||||
} XCATCHERROR(ermsg);
|
|
||||||
if (!ermsg.empty()) {
|
|
||||||
LOGERR(("Db::expand: xapian error %s\n", ermsg.c_str()));
|
|
||||||
res.clear();
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef NO_NAMESPACES
|
#ifndef NO_NAMESPACES
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _DB_H_INCLUDED_
|
#ifndef _DB_H_INCLUDED_
|
||||||
#define _DB_H_INCLUDED_
|
#define _DB_H_INCLUDED_
|
||||||
/* @(#$Id: rcldb.h,v 1.54 2007-07-10 09:23:28 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: rcldb.h,v 1.55 2008-06-13 18:22:46 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
#include <list>
|
||||||
@ -52,8 +52,8 @@ namespace Rcl {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
class SearchData;
|
class SearchData;
|
||||||
class Native;
|
|
||||||
class TermIter;
|
class TermIter;
|
||||||
|
class Query;
|
||||||
|
|
||||||
class TermMatchEntry {
|
class TermMatchEntry {
|
||||||
public:
|
public:
|
||||||
@ -71,17 +71,17 @@ public:
|
|||||||
*/
|
*/
|
||||||
class Db {
|
class Db {
|
||||||
public:
|
public:
|
||||||
|
// A place for things we don't want visible here.
|
||||||
|
class Native;
|
||||||
|
friend class Native;
|
||||||
|
|
||||||
/* General stuff (valid for query or update) ****************************/
|
/* General stuff (valid for query or update) ****************************/
|
||||||
Db();
|
Db();
|
||||||
~Db();
|
~Db();
|
||||||
|
|
||||||
enum OpenMode {DbRO, DbUpd, DbTrunc};
|
enum OpenMode {DbRO, DbUpd, DbTrunc};
|
||||||
// KEEP_UPDATED is internal use by reOpen() only
|
|
||||||
enum QueryOpts {QO_NONE=0, QO_STEM = 1, QO_KEEP_UPDATED = 8};
|
|
||||||
|
|
||||||
bool open(const string &dbdir, const string &stoplistfn,
|
bool open(const string &dbdir, const string &stoplistfn,
|
||||||
OpenMode mode, int qops = QO_NONE);
|
OpenMode mode, bool keep_updated = false);
|
||||||
bool close();
|
bool close();
|
||||||
bool isopen();
|
bool isopen();
|
||||||
|
|
||||||
@ -130,11 +130,12 @@ class Db {
|
|||||||
/** Return total docs in db */
|
/** Return total docs in db */
|
||||||
int docCnt();
|
int docCnt();
|
||||||
|
|
||||||
// Parse query string and initialize query
|
/** Add extra database for querying */
|
||||||
bool setQuery(RefCntr<SearchData> q, int opts = QO_NONE,
|
bool addQueryDb(const string &dir);
|
||||||
const string& stemlang = "english");
|
/** Remove extra database. if dir == "", remove all. */
|
||||||
bool getQueryTerms(list<string>& terms);
|
bool rmQueryDb(const string &dir);
|
||||||
bool getMatchTerms(const Doc& doc, list<string>& terms);
|
/** Tell if directory seems to hold xapian db */
|
||||||
|
static bool testDbDir(const string &dir);
|
||||||
|
|
||||||
/** Return a list of index terms that match the input string
|
/** Return a list of index terms that match the input string
|
||||||
* Expansion is performed either with either wildcard or regexp processing
|
* Expansion is performed either with either wildcard or regexp processing
|
||||||
@ -143,33 +144,12 @@ class Db {
|
|||||||
bool termMatch(MatchType typ, const string &lang, const string &s,
|
bool termMatch(MatchType typ, const string &lang, const string &s,
|
||||||
list<TermMatchEntry>& result, int max = -1);
|
list<TermMatchEntry>& result, int max = -1);
|
||||||
|
|
||||||
/** Add extra database for querying */
|
|
||||||
bool addQueryDb(const string &dir);
|
|
||||||
/** Remove extra database. if dir == "", remove all. */
|
|
||||||
bool rmQueryDb(const string &dir);
|
|
||||||
/** Tell if directory seems to hold xapian db */
|
|
||||||
static bool testDbDir(const string &dir);
|
|
||||||
|
|
||||||
/** Get document at rank i in current query.
|
|
||||||
|
|
||||||
This is probably vastly inferior to the type of interface in
|
|
||||||
Xapian, but we have to start with something simple to
|
|
||||||
experiment with the GUI. i is sequential from 0 to some value.
|
|
||||||
*/
|
|
||||||
bool getDoc(int i, Doc &doc, int *percent = 0);
|
|
||||||
|
|
||||||
/* Build synthetic abstract out of query terms and term position data */
|
/* Build synthetic abstract out of query terms and term position data */
|
||||||
bool makeDocAbstract(Doc &doc, string& abstract);
|
bool makeDocAbstract(Doc &doc, Query *query, string& abstract);
|
||||||
|
|
||||||
/** Get document for given filename and ipath */
|
/** Get document for given filename and ipath */
|
||||||
bool getDoc(const string &fn, const string &ipath, Doc &doc, int *percent);
|
bool getDoc(const string &fn, const string &ipath, Doc &doc, int *percent);
|
||||||
|
|
||||||
/** Expand query */
|
|
||||||
list<string> expand(const Doc &doc);
|
|
||||||
|
|
||||||
/** Get results count for current query */
|
|
||||||
int getResCnt();
|
|
||||||
|
|
||||||
/** Get a list of existing stemming databases */
|
/** Get a list of existing stemming databases */
|
||||||
std::list<std::string> getStemLangs();
|
std::list<std::string> getStemLangs();
|
||||||
|
|
||||||
@ -189,22 +169,16 @@ class Db {
|
|||||||
/** Filename wildcard expansion */
|
/** Filename wildcard expansion */
|
||||||
bool filenameWildExp(const string& exp, list<string>& names);
|
bool filenameWildExp(const string& exp, list<string>& names);
|
||||||
|
|
||||||
|
/** This has to be public for access by embedded Query::Native */
|
||||||
|
Native *m_ndb;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Internal form of close, can be called during destruction
|
// Internal form of close, can be called during destruction
|
||||||
bool i_close(bool final);
|
bool i_close(bool final);
|
||||||
|
|
||||||
string m_filterTopDir; // Current query filter on subtree top directory
|
|
||||||
vector<int> m_dbindices; // In case there is a postq filter: sequence of
|
|
||||||
// db indices that match
|
|
||||||
|
|
||||||
string m_reason; // Error explanation
|
string m_reason; // Error explanation
|
||||||
|
|
||||||
// A place for things we don't want visible here.
|
/* Parameters cached out of the configuration files */
|
||||||
friend class Native;
|
|
||||||
Native *m_ndb;
|
|
||||||
|
|
||||||
unsigned int m_qOpts;
|
|
||||||
|
|
||||||
// This is how long an abstract we keep or build from beginning of
|
// This is how long an abstract we keep or build from beginning of
|
||||||
// text when indexing. It only has an influence on the size of the
|
// text when indexing. It only has an influence on the size of the
|
||||||
// db as we are free to shorten it again when displaying
|
// db as we are free to shorten it again when displaying
|
||||||
@ -215,7 +189,6 @@ private:
|
|||||||
// This is how many words (context size) we keep around query terms
|
// This is how many words (context size) we keep around query terms
|
||||||
// when building the abstract
|
// when building the abstract
|
||||||
int m_synthAbsWordCtxLen;
|
int m_synthAbsWordCtxLen;
|
||||||
|
|
||||||
// Flush threshold. Megabytes of text indexed before we flush.
|
// Flush threshold. Megabytes of text indexed before we flush.
|
||||||
int m_flushMb;
|
int m_flushMb;
|
||||||
// Text bytes indexed since beginning
|
// Text bytes indexed since beginning
|
||||||
@ -224,7 +197,6 @@ private:
|
|||||||
long long m_flushtxtsz;
|
long long m_flushtxtsz;
|
||||||
// Text bytes at last fsoccup check
|
// Text bytes at last fsoccup check
|
||||||
long long m_occtxtsz;
|
long long m_occtxtsz;
|
||||||
|
|
||||||
// Maximum file system occupation percentage
|
// Maximum file system occupation percentage
|
||||||
int m_maxFsOccupPc;
|
int m_maxFsOccupPc;
|
||||||
|
|
||||||
|
|||||||
67
src/rcldb/rcldb_p.h
Normal file
67
src/rcldb/rcldb_p.h
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
#ifndef _rcldb_p_h_included_
|
||||||
|
#define _rcldb_p_h_included_
|
||||||
|
|
||||||
|
#include "xapian.h"
|
||||||
|
|
||||||
|
namespace Rcl {
|
||||||
|
/* @(#$Id: rcldb_p.h,v 1.1 2008-06-13 18:22:46 dockes Exp $ (C) 2007 J.F.Dockes */
|
||||||
|
|
||||||
|
// Generic Xapian exception catching code. We do this quite often,
|
||||||
|
// and I have no idea how to do this except for a macro
|
||||||
|
#define XCATCHERROR(MSG) \
|
||||||
|
catch (const Xapian::Error &e) { \
|
||||||
|
MSG = e.get_msg(); \
|
||||||
|
if (MSG.empty()) MSG = "Empty error message"; \
|
||||||
|
} catch (const string &s) { \
|
||||||
|
MSG = s; \
|
||||||
|
if (MSG.empty()) MSG = "Empty error message"; \
|
||||||
|
} catch (const char *s) { \
|
||||||
|
MSG = s; \
|
||||||
|
if (MSG.empty()) MSG = "Empty error message"; \
|
||||||
|
} catch (...) { \
|
||||||
|
MSG = "Caught unknown xapian exception"; \
|
||||||
|
}
|
||||||
|
|
||||||
|
class Query;
|
||||||
|
|
||||||
|
// A class for data and methods that would have to expose
|
||||||
|
// Xapian-specific stuff if they were in Rcl::Db. There could actually be
|
||||||
|
// 2 different ones for indexing or query as there is not much in
|
||||||
|
// common.
|
||||||
|
class Db::Native {
|
||||||
|
public:
|
||||||
|
Db *m_db;
|
||||||
|
bool m_isopen;
|
||||||
|
bool m_iswritable;
|
||||||
|
|
||||||
|
// Indexing
|
||||||
|
Xapian::WritableDatabase wdb;
|
||||||
|
|
||||||
|
// Querying
|
||||||
|
Xapian::Database db;
|
||||||
|
|
||||||
|
Native(Db *db)
|
||||||
|
: m_db(db), m_isopen(false), m_iswritable(false)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
~Native() {
|
||||||
|
}
|
||||||
|
|
||||||
|
string makeAbstract(Xapian::docid id, Query *query);
|
||||||
|
|
||||||
|
bool dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc);
|
||||||
|
|
||||||
|
/** Compute list of subdocuments for a given path (given by hash)
|
||||||
|
* We look for all Q terms beginning with the path/hash
|
||||||
|
* As suggested by James Aylett, a better method would be to add
|
||||||
|
* a single term (ie: XP/path/to/file) to all subdocs, then finding
|
||||||
|
* them would be a simple matter of retrieving the posting list for the
|
||||||
|
* term. There would still be a need for the current Qterm though, as a
|
||||||
|
* unique term for replace_document, and for retrieving by
|
||||||
|
* path/ipath (history)
|
||||||
|
*/
|
||||||
|
bool subDocs(const string &hash, vector<Xapian::docid>& docids);
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif /* _rcldb_p_h_included_ */
|
||||||
354
src/rcldb/rclquery.cpp
Normal file
354
src/rcldb/rclquery.cpp
Normal file
@ -0,0 +1,354 @@
|
|||||||
|
#ifndef lint
|
||||||
|
static char rcsid[] = "@(#$Id: rclquery.cpp,v 1.1 2008-06-13 18:22:46 dockes Exp $ (C) 2008 J.F.Dockes";
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <list>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "rcldb.h"
|
||||||
|
#include "rcldb_p.h"
|
||||||
|
#include "rclquery.h"
|
||||||
|
#include "rclquery_p.h"
|
||||||
|
#include "debuglog.h"
|
||||||
|
#include "conftree.h"
|
||||||
|
#include "smallut.h"
|
||||||
|
#include "searchdata.h"
|
||||||
|
|
||||||
|
#ifndef NO_NAMESPACES
|
||||||
|
namespace Rcl {
|
||||||
|
#endif
|
||||||
|
class FilterMatcher : public Xapian::MatchDecider {
|
||||||
|
public:
|
||||||
|
FilterMatcher(const string &topdir)
|
||||||
|
: m_topdir(topdir)
|
||||||
|
{}
|
||||||
|
virtual ~FilterMatcher() {}
|
||||||
|
|
||||||
|
virtual
|
||||||
|
#if XAPIAN_MAJOR_VERSION < 1
|
||||||
|
int
|
||||||
|
#else
|
||||||
|
bool
|
||||||
|
#endif
|
||||||
|
operator()(const Xapian::Document &xdoc) const
|
||||||
|
{
|
||||||
|
// Parse xapian document's data and populate doc fields
|
||||||
|
string data = xdoc.get_data();
|
||||||
|
ConfSimple parms(&data);
|
||||||
|
|
||||||
|
// The only filtering for now is on file path (subtree)
|
||||||
|
string url;
|
||||||
|
parms.get(string("url"), url);
|
||||||
|
LOGDEB2(("FilterMatcher topdir [%s] url [%s]\n",
|
||||||
|
m_topdir.c_str(), url.c_str()));
|
||||||
|
if (url.find(m_topdir, 7) == 7) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
string m_topdir;
|
||||||
|
};
|
||||||
|
|
||||||
|
Query::Query(Db *db)
|
||||||
|
: m_nq(new Native(this)), m_db(db)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
Query::~Query()
|
||||||
|
{
|
||||||
|
deleteZ(m_nq);
|
||||||
|
}
|
||||||
|
|
||||||
|
string Query::getReason() const
|
||||||
|
{
|
||||||
|
return m_reason;
|
||||||
|
}
|
||||||
|
|
||||||
|
Db *Query::whatDb()
|
||||||
|
{
|
||||||
|
return m_db;
|
||||||
|
}
|
||||||
|
|
||||||
|
//#define ISNULL(X) (X).isNull()
|
||||||
|
#define ISNULL(X) !(X)
|
||||||
|
|
||||||
|
// Prepare query out of user search data
|
||||||
|
bool Query::setQuery(RefCntr<SearchData> sdata, int opts,
|
||||||
|
const string& stemlang)
|
||||||
|
{
|
||||||
|
if (!m_db || ISNULL(m_nq)) {
|
||||||
|
LOGERR(("Query::setQuery: not initialised!\n"));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
m_reason.erase();
|
||||||
|
LOGDEB(("Query::setQuery:\n"));
|
||||||
|
|
||||||
|
m_filterTopDir = sdata->getTopdir();
|
||||||
|
deleteZ(m_nq->decider);
|
||||||
|
deleteZ(m_nq->postfilter);
|
||||||
|
if (!m_filterTopDir.empty()) {
|
||||||
|
#if XAPIAN_FILTERING
|
||||||
|
m_nq->decider =
|
||||||
|
#else
|
||||||
|
m_nq->postfilter =
|
||||||
|
#endif
|
||||||
|
new FilterMatcher(m_filterTopDir);
|
||||||
|
}
|
||||||
|
m_nq->m_dbindices.clear();
|
||||||
|
m_qOpts = opts;
|
||||||
|
m_nq->termfreqs.clear();
|
||||||
|
Xapian::Query xq;
|
||||||
|
if (!sdata->toNativeQuery(*m_db, &xq,
|
||||||
|
(opts & QO_STEM) ? stemlang : "")) {
|
||||||
|
m_reason += sdata->getReason();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
m_nq->query = xq;
|
||||||
|
string ermsg;
|
||||||
|
string d;
|
||||||
|
try {
|
||||||
|
delete m_nq->enquire;
|
||||||
|
m_nq->enquire = new Xapian::Enquire(m_db->m_ndb->db);
|
||||||
|
m_nq->enquire->set_query(m_nq->query);
|
||||||
|
m_nq->mset = Xapian::MSet();
|
||||||
|
// Get the query description and trim the "Xapian::Query"
|
||||||
|
d = m_nq->query.get_description();
|
||||||
|
} XCATCHERROR(ermsg);
|
||||||
|
if (!ermsg.empty()) {
|
||||||
|
LOGDEB(("Query::SetQuery: xapian error %s\n", ermsg.c_str()));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (d.find("Xapian::Query") == 0)
|
||||||
|
d.erase(0, strlen("Xapian::Query"));
|
||||||
|
if (!m_filterTopDir.empty()) {
|
||||||
|
d += string(" [dir: ") + m_filterTopDir + "]";
|
||||||
|
}
|
||||||
|
sdata->setDescription(d);
|
||||||
|
LOGDEB(("Query::SetQuery: Q: %s\n", sdata->getDescription().c_str()));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool Query::getQueryTerms(list<string>& terms)
|
||||||
|
{
|
||||||
|
if (ISNULL(m_nq))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
terms.clear();
|
||||||
|
Xapian::TermIterator it;
|
||||||
|
string ermsg;
|
||||||
|
try {
|
||||||
|
for (it = m_nq->query.get_terms_begin();
|
||||||
|
it != m_nq->query.get_terms_end(); it++) {
|
||||||
|
terms.push_back(*it);
|
||||||
|
}
|
||||||
|
} XCATCHERROR(ermsg);
|
||||||
|
if (!ermsg.empty()) {
|
||||||
|
LOGERR(("getQueryTerms: xapian error: %s\n", ermsg.c_str()));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Query::getMatchTerms(const Doc& doc, list<string>& terms)
|
||||||
|
{
|
||||||
|
if (ISNULL(m_nq) || !m_nq->enquire) {
|
||||||
|
LOGERR(("Query::getMatchTerms: no query opened\n"));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
terms.clear();
|
||||||
|
Xapian::TermIterator it;
|
||||||
|
Xapian::docid id = Xapian::docid(doc.xdocid);
|
||||||
|
string ermsg;
|
||||||
|
try {
|
||||||
|
for (it=m_nq->enquire->get_matching_terms_begin(id);
|
||||||
|
it != m_nq->enquire->get_matching_terms_end(id); it++) {
|
||||||
|
terms.push_back(*it);
|
||||||
|
}
|
||||||
|
} XCATCHERROR(ermsg);
|
||||||
|
if (!ermsg.empty()) {
|
||||||
|
LOGERR(("getQueryTerms: xapian error: %s\n", ermsg.c_str()));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mset size
|
||||||
|
static const int qquantum = 30;
|
||||||
|
|
||||||
|
int Query::getResCnt()
|
||||||
|
{
|
||||||
|
if (ISNULL(m_nq) || !m_nq->enquire) {
|
||||||
|
LOGERR(("Query::getResCnt: no query opened\n"));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
string ermsg;
|
||||||
|
if (m_nq->mset.size() <= 0) {
|
||||||
|
try {
|
||||||
|
m_nq->mset = m_nq->enquire->get_mset(0, qquantum,
|
||||||
|
0, m_nq->decider);
|
||||||
|
} catch (const Xapian::DatabaseModifiedError &error) {
|
||||||
|
m_db->m_ndb->db.reopen();
|
||||||
|
m_nq->mset = m_nq->enquire->get_mset(0, qquantum,
|
||||||
|
0, m_nq->decider);
|
||||||
|
} XCATCHERROR(ermsg);
|
||||||
|
if (!ermsg.empty()) {
|
||||||
|
LOGERR(("enquire->get_mset: exception: %s\n", ermsg.c_str()));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int ret = -1;
|
||||||
|
try {
|
||||||
|
ret = m_nq->mset.get_matches_lower_bound();
|
||||||
|
} catch (...) {}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Get document at rank i in query (i is the index in the whole result
|
||||||
|
// set, as in the enquire class. We check if the current mset has the
|
||||||
|
// doc, else ask for an other one. We use msets of 10 documents. Don't
|
||||||
|
// know if the whole thing makes sense at all but it seems to work.
|
||||||
|
//
|
||||||
|
// If there is a postquery filter (ie: file names), we have to
|
||||||
|
// maintain a correspondance from the sequential external index
|
||||||
|
// sequence to the internal Xapian hole-y one (the holes being the documents
|
||||||
|
// that dont match the filter).
|
||||||
|
bool Query::getDoc(int exti, Doc &doc, int *percent)
|
||||||
|
{
|
||||||
|
LOGDEB1(("Query::getDoc: exti %d\n", exti));
|
||||||
|
if (ISNULL(m_nq) || !m_nq->enquire) {
|
||||||
|
LOGERR(("Query::getDoc: no query opened\n"));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int xapi;
|
||||||
|
if (m_nq->postfilter) {
|
||||||
|
// There is a postquery filter, does this fall in already known area ?
|
||||||
|
if (exti >= (int)m_nq->m_dbindices.size()) {
|
||||||
|
// Have to fetch xapian docs and filter until we get
|
||||||
|
// enough or fail
|
||||||
|
m_nq->m_dbindices.reserve(exti+1);
|
||||||
|
// First xapian doc we fetch is the one after last stored
|
||||||
|
int first = m_nq->m_dbindices.size() > 0 ?
|
||||||
|
m_nq->m_dbindices.back() + 1 : 0;
|
||||||
|
// Loop until we get enough docs
|
||||||
|
while (exti >= (int)m_nq->m_dbindices.size()) {
|
||||||
|
LOGDEB(("Query::getDoc: fetching %d starting at %d\n",
|
||||||
|
qquantum, first));
|
||||||
|
try {
|
||||||
|
m_nq->mset = m_nq->enquire->get_mset(first, qquantum);
|
||||||
|
} catch (const Xapian::DatabaseModifiedError &error) {
|
||||||
|
m_db->m_ndb->db.reopen();
|
||||||
|
m_nq->mset = m_nq->enquire->get_mset(first, qquantum);
|
||||||
|
} catch (const Xapian::Error & error) {
|
||||||
|
LOGERR(("enquire->get_mset: exception: %s\n",
|
||||||
|
error.get_msg().c_str()));
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_nq->mset.empty()) {
|
||||||
|
LOGDEB(("Query::getDoc: got empty mset\n"));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
first = m_nq->mset.get_firstitem();
|
||||||
|
for (unsigned int i = 0; i < m_nq->mset.size() ; i++) {
|
||||||
|
LOGDEB(("Query::getDoc: [%d]\n", i));
|
||||||
|
Xapian::Document xdoc = m_nq->mset[i].get_document();
|
||||||
|
if ((*m_nq->postfilter)(xdoc)) {
|
||||||
|
m_nq->m_dbindices.push_back(first + i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
first = first + m_nq->mset.size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
xapi = m_nq->m_dbindices[exti];
|
||||||
|
} else {
|
||||||
|
xapi = exti;
|
||||||
|
}
|
||||||
|
|
||||||
|
// From there on, we work with a xapian enquire item number. Fetch it
|
||||||
|
int first = m_nq->mset.get_firstitem();
|
||||||
|
int last = first + m_nq->mset.size() -1;
|
||||||
|
|
||||||
|
if (!(xapi >= first && xapi <= last)) {
|
||||||
|
LOGDEB(("Fetching for first %d, count %d\n", xapi, qquantum));
|
||||||
|
try {
|
||||||
|
m_nq->mset = m_nq->enquire->get_mset(xapi, qquantum,
|
||||||
|
0, m_nq->decider);
|
||||||
|
} catch (const Xapian::DatabaseModifiedError &error) {
|
||||||
|
m_db->m_ndb->db.reopen();
|
||||||
|
m_nq->mset = m_nq->enquire->get_mset(xapi, qquantum,
|
||||||
|
0, m_nq->decider);
|
||||||
|
|
||||||
|
} catch (const Xapian::Error & error) {
|
||||||
|
LOGERR(("enquire->get_mset: exception: %s\n",
|
||||||
|
error.get_msg().c_str()));
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
if (m_nq->mset.empty())
|
||||||
|
return false;
|
||||||
|
first = m_nq->mset.get_firstitem();
|
||||||
|
last = first + m_nq->mset.size() -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOGDEB1(("Query::getDoc: Qry [%s] win [%d-%d] Estimated results: %d",
|
||||||
|
m_nq->query.get_description().c_str(),
|
||||||
|
first, last,
|
||||||
|
m_nq->mset.get_matches_lower_bound()));
|
||||||
|
|
||||||
|
Xapian::Document xdoc = m_nq->mset[xapi-first].get_document();
|
||||||
|
Xapian::docid docid = *(m_nq->mset[xapi-first]);
|
||||||
|
if (percent)
|
||||||
|
*percent = m_nq->mset.convert_to_percent(m_nq->mset[xapi-first]);
|
||||||
|
|
||||||
|
// Parse xapian document's data and populate doc fields
|
||||||
|
string data = xdoc.get_data();
|
||||||
|
return m_db->m_ndb->dbDataToRclDoc(docid, data, doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
list<string> Query::expand(const Doc &doc)
|
||||||
|
{
|
||||||
|
list<string> res;
|
||||||
|
if (ISNULL(m_nq) || !m_nq->enquire) {
|
||||||
|
LOGERR(("Query::expand: no query opened\n"));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
string ermsg;
|
||||||
|
for (int tries = 0; tries < 2; tries++) {
|
||||||
|
try {
|
||||||
|
Xapian::RSet rset;
|
||||||
|
rset.add_document(Xapian::docid(doc.xdocid));
|
||||||
|
// We don't exclude the original query terms.
|
||||||
|
Xapian::ESet eset = m_nq->enquire->get_eset(20, rset, false);
|
||||||
|
LOGDEB(("ESet terms:\n"));
|
||||||
|
// We filter out the special terms
|
||||||
|
for (Xapian::ESetIterator it = eset.begin();
|
||||||
|
it != eset.end(); it++) {
|
||||||
|
LOGDEB((" [%s]\n", (*it).c_str()));
|
||||||
|
if ((*it).empty() || ((*it).at(0)>='A' && (*it).at(0)<='Z'))
|
||||||
|
continue;
|
||||||
|
res.push_back(*it);
|
||||||
|
if (res.size() >= 10)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch (const Xapian::DatabaseModifiedError &error) {
|
||||||
|
continue;
|
||||||
|
} XCATCHERROR(ermsg);
|
||||||
|
if (!ermsg.empty()) {
|
||||||
|
LOGERR(("Query::expand: xapian error %s\n", ermsg.c_str()));
|
||||||
|
res.clear();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
92
src/rcldb/rclquery.h
Normal file
92
src/rcldb/rclquery.h
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
#ifndef _rclquery_h_included_
|
||||||
|
#define _rclquery_h_included_
|
||||||
|
/* @(#$Id: rclquery.h,v 1.1 2008-06-13 18:22:46 dockes Exp $ (C) 2008 J.F.Dockes */
|
||||||
|
/*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc.,
|
||||||
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
*/
|
||||||
|
#include <string>
|
||||||
|
#include <list>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#ifndef NO_NAMESPACES
|
||||||
|
using std::string;
|
||||||
|
using std::list;
|
||||||
|
using std::vector;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "refcntr.h"
|
||||||
|
|
||||||
|
#ifndef NO_NAMESPACES
|
||||||
|
namespace Rcl {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
class SearchData;
|
||||||
|
class Db;
|
||||||
|
class Doc;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An Rcl::Query is a question (SearchData) applied to a
|
||||||
|
* database. Handles access to the results. Somewhat equivalent to a
|
||||||
|
* cursor in an rdb.
|
||||||
|
*/
|
||||||
|
class Query {
|
||||||
|
public:
|
||||||
|
enum QueryOpts {QO_NONE=0, QO_STEM = 1};
|
||||||
|
|
||||||
|
Query(Db *db);
|
||||||
|
|
||||||
|
~Query();
|
||||||
|
|
||||||
|
/** Get explanation about last error */
|
||||||
|
string getReason() const;
|
||||||
|
|
||||||
|
/** Parse query string and initialize query */
|
||||||
|
bool setQuery(RefCntr<SearchData> q, int opts = QO_NONE,
|
||||||
|
const string& stemlang = "english");
|
||||||
|
bool getQueryTerms(list<string>& terms);
|
||||||
|
bool getMatchTerms(const Doc& doc, list<string>& terms);
|
||||||
|
|
||||||
|
/** Get document at rank i in current query. */
|
||||||
|
bool getDoc(int i, Doc &doc, int *percent = 0);
|
||||||
|
|
||||||
|
/** Expand query */
|
||||||
|
list<string> expand(const Doc &doc);
|
||||||
|
|
||||||
|
/** Get results count for current query */
|
||||||
|
int getResCnt();
|
||||||
|
|
||||||
|
Db *whatDb();
|
||||||
|
|
||||||
|
/** make this public for access from embedded Db::Native */
|
||||||
|
class Native;
|
||||||
|
Native *m_nq;
|
||||||
|
|
||||||
|
private:
|
||||||
|
string m_filterTopDir; // Current query filter on subtree top directory
|
||||||
|
string m_reason; // Error explanation
|
||||||
|
Db *m_db;
|
||||||
|
unsigned int m_qOpts;
|
||||||
|
/* Copyconst and assignemt private and forbidden */
|
||||||
|
Query(const Query &) {}
|
||||||
|
Query & operator=(const Query &) {return *this;};
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifndef NO_NAMESPACES
|
||||||
|
}
|
||||||
|
#endif // NO_NAMESPACES
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* _rclquery_h_included_ */
|
||||||
66
src/rcldb/rclquery_p.h
Normal file
66
src/rcldb/rclquery_p.h
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
#ifndef _rclquery_p_h_included_
|
||||||
|
#define _rclquery_p_h_included_
|
||||||
|
/* @(#$Id: rclquery_p.h,v 1.1 2008-06-13 18:22:46 dockes Exp $ (C) 2007 J.F.Dockes */
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
using std::map;
|
||||||
|
using std::vector;
|
||||||
|
|
||||||
|
#include <xapian.h>
|
||||||
|
#include "rclquery.h"
|
||||||
|
|
||||||
|
namespace Rcl {
|
||||||
|
|
||||||
|
class Query::Native {
|
||||||
|
public:
|
||||||
|
Xapian::Query query; // query descriptor: terms and subqueries
|
||||||
|
// joined by operators (or/and etc...)
|
||||||
|
|
||||||
|
vector<int> m_dbindices; // In case there is a postq filter: sequence of
|
||||||
|
// db indices that match
|
||||||
|
|
||||||
|
// Filtering results on location. There are 2 possible approaches
|
||||||
|
// for this:
|
||||||
|
// - Set a "MatchDecider" to be used by Xapian during the query
|
||||||
|
// - Filter the results out of Xapian (this also uses a
|
||||||
|
// Xapian::MatchDecider object, but applied to the results by Recoll.
|
||||||
|
//
|
||||||
|
// The result filtering approach was the first implemented.
|
||||||
|
//
|
||||||
|
// The efficiency of both methods depend on the searches, so the code
|
||||||
|
// for both has been kept. A nice point for the Xapian approach is that
|
||||||
|
// the result count estimate are correct (they are wrong with
|
||||||
|
// the postfilter approach). It is also faster in some worst case scenarios
|
||||||
|
// so this now the default (but the post-filtering is faster in many common
|
||||||
|
// cases).
|
||||||
|
//
|
||||||
|
// Which is used is decided in SetQuery(), by setting either of
|
||||||
|
// the two following members. This in turn is controlled by a
|
||||||
|
// preprocessor directive.
|
||||||
|
|
||||||
|
#define XAPIAN_FILTERING 1
|
||||||
|
|
||||||
|
Xapian::MatchDecider *decider; // Xapian does the filtering
|
||||||
|
Xapian::MatchDecider *postfilter; // Result filtering done by Recoll
|
||||||
|
|
||||||
|
Xapian::Enquire *enquire; // Open query descriptor.
|
||||||
|
Xapian::MSet mset; // Partial result set
|
||||||
|
Query *m_q;
|
||||||
|
// Term frequencies for current query. See makeAbstract, setQuery
|
||||||
|
map<string, double> termfreqs;
|
||||||
|
|
||||||
|
Native(Query *q)
|
||||||
|
: decider(0), postfilter(0), enquire(0), m_q(q)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
~Native() {
|
||||||
|
delete decider;
|
||||||
|
delete postfilter;
|
||||||
|
delete enquire;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif /* _rclquery_p_h_included_ */
|
||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _SEARCHDATA_H_INCLUDED_
|
#ifndef _SEARCHDATA_H_INCLUDED_
|
||||||
#define _SEARCHDATA_H_INCLUDED_
|
#define _SEARCHDATA_H_INCLUDED_
|
||||||
/* @(#$Id: searchdata.h,v 1.13 2008-05-08 10:00:20 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: searchdata.h,v 1.14 2008-06-13 18:22:46 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Structures to hold data coming almost directly from the gui
|
* Structures to hold data coming almost directly from the gui
|
||||||
@ -47,7 +47,7 @@ class SearchDataClause;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
Data structure representing a Recoll user query, for translation
|
Data structure representing a Recoll user query, for translation
|
||||||
into a Xapian query tree.
|
into a Xapian query tree. This could probably better called a 'question'.
|
||||||
|
|
||||||
This is a list of search clauses combined through either OR or AND.
|
This is a list of search clauses combined through either OR or AND.
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: pathut.cpp,v 1.19 2008-05-27 06:18:28 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: pathut.cpp,v 1.20 2008-06-13 18:22:47 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -36,6 +36,7 @@ using std::list;
|
|||||||
using std::stack;
|
using std::stack;
|
||||||
#endif /* NO_NAMESPACES */
|
#endif /* NO_NAMESPACES */
|
||||||
|
|
||||||
|
#include "autoconfig.h"
|
||||||
#include "pathut.h"
|
#include "pathut.h"
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
|||||||
@ -42,6 +42,7 @@ public:
|
|||||||
X *operator->() {return rep;}
|
X *operator->() {return rep;}
|
||||||
int getcnt() const {return pcount ? *pcount : 0;}
|
int getcnt() const {return pcount ? *pcount : 0;}
|
||||||
const X *getptr() const {return rep;}
|
const X *getptr() const {return rep;}
|
||||||
|
bool isNull() const {return rep == 0;}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user