use a Xapian MatchDecider to filter on dir path
This commit is contained in:
parent
f56d3849dd
commit
2b5593887f
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.124 2007-10-24 08:42:59 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.125 2007-10-24 15:38:53 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -104,7 +104,31 @@ class Native {
|
|||||||
Xapian::Database db;
|
Xapian::Database db;
|
||||||
Xapian::Query query; // query descriptor: terms and subqueries
|
Xapian::Query query; // query descriptor: terms and subqueries
|
||||||
// joined by operators (or/and etc...)
|
// joined by operators (or/and etc...)
|
||||||
Xapian::MatchDecider *decider;
|
|
||||||
|
// Filtering results on location. There are 2 possible approaches
|
||||||
|
// for this:
|
||||||
|
// - Set a "MatchDecider" to be used by Xapian during the query
|
||||||
|
// - Filter the results out of Xapian (this also uses a
|
||||||
|
// Xapian::MatchDecider object, but applied to the results by Recoll.
|
||||||
|
//
|
||||||
|
// The result filtering approach was the first implemented.
|
||||||
|
//
|
||||||
|
// The efficiency of both methods depend on the searches, so the code
|
||||||
|
// for both has been kept. A nice point for the Xapian approach is that
|
||||||
|
// the result count estimate are correct (they are wrong with
|
||||||
|
// the postfilter approach). It is also faster in some worst case scenarios
|
||||||
|
// so this now the default (but the post-filtering is faster in many common
|
||||||
|
// cases).
|
||||||
|
//
|
||||||
|
// Which is used is decided in SetQuery(), by setting either of
|
||||||
|
// the two following members. This in turn is controlled by a
|
||||||
|
// preprocessor directive.
|
||||||
|
|
||||||
|
#define XAPIAN_FILTERING 1
|
||||||
|
|
||||||
|
Xapian::MatchDecider *decider; // Xapian does the filtering
|
||||||
|
Xapian::MatchDecider *postfilter; // Result filtering done by Recoll
|
||||||
|
|
||||||
Xapian::Enquire *enquire; // Open query descriptor.
|
Xapian::Enquire *enquire; // Open query descriptor.
|
||||||
Xapian::MSet mset; // Partial result set
|
Xapian::MSet mset; // Partial result set
|
||||||
|
|
||||||
@ -113,11 +137,13 @@ class Native {
|
|||||||
|
|
||||||
Native(Db *db)
|
Native(Db *db)
|
||||||
: m_db(db),
|
: m_db(db),
|
||||||
m_isopen(false), m_iswritable(false), decider(0), enquire(0)
|
m_isopen(false), m_iswritable(false), decider(0), postfilter(0),
|
||||||
|
enquire(0)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
~Native() {
|
~Native() {
|
||||||
delete decider;
|
delete decider;
|
||||||
|
delete postfilter;
|
||||||
delete enquire;
|
delete enquire;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -145,7 +171,9 @@ public:
|
|||||||
{}
|
{}
|
||||||
virtual ~FilterMatcher() {}
|
virtual ~FilterMatcher() {}
|
||||||
|
|
||||||
virtual bool operator()(const Xapian::Document &xdoc) const {
|
virtual bool operator()(const Xapian::Document &xdoc) const
|
||||||
|
{
|
||||||
|
m_cnt++;
|
||||||
// Parse xapian document's data and populate doc fields
|
// Parse xapian document's data and populate doc fields
|
||||||
string data = xdoc.get_data();
|
string data = xdoc.get_data();
|
||||||
ConfSimple parms(&data);
|
ConfSimple parms(&data);
|
||||||
@ -156,17 +184,19 @@ public:
|
|||||||
LOGDEB2(("FilterMatcher topdir [%s] url [%s]\n",
|
LOGDEB2(("FilterMatcher topdir [%s] url [%s]\n",
|
||||||
m_topdir.c_str(), url.c_str()));
|
m_topdir.c_str(), url.c_str()));
|
||||||
if (url.find(m_topdir, 7) == 7) {
|
if (url.find(m_topdir, 7) == 7) {
|
||||||
LOGDEB(("FilterMatcher: MATCH\n"));
|
LOGDEB2(("FilterMatcher: MATCH %d\n", m_cnt));
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
LOGDEB(("FilterMatcher: NO MATCH\n"));
|
LOGDEB2(("FilterMatcher: NO MATCH %d\n", m_cnt));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
static int m_cnt;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
string m_topdir;
|
string m_topdir;
|
||||||
};
|
};
|
||||||
|
int FilterMatcher::m_cnt;
|
||||||
|
|
||||||
/* See comment in class declaration */
|
/* See comment in class declaration */
|
||||||
bool Native::subDocs(const string &hash, vector<Xapian::docid>& docids)
|
bool Native::subDocs(const string &hash, vector<Xapian::docid>& docids)
|
||||||
@ -664,8 +694,7 @@ bool Db::i_close(bool final)
|
|||||||
if (w)
|
if (w)
|
||||||
LOGDEB(("Rcl::Db:close: xapian will close. May take some time\n"));
|
LOGDEB(("Rcl::Db:close: xapian will close. May take some time\n"));
|
||||||
// Used to do a flush here. Cant see why it should be necessary.
|
// Used to do a flush here. Cant see why it should be necessary.
|
||||||
delete m_ndb;
|
deleteZ(m_ndb);
|
||||||
m_ndb = 0;
|
|
||||||
if (w)
|
if (w)
|
||||||
LOGDEB(("Rcl::Db:close() xapian close done.\n"));
|
LOGDEB(("Rcl::Db:close() xapian close done.\n"));
|
||||||
if (final) {
|
if (final) {
|
||||||
@ -1440,14 +1469,20 @@ bool Db::setQuery(RefCntr<SearchData> sdata, int opts,
|
|||||||
LOGDEB(("Db::setQuery:\n"));
|
LOGDEB(("Db::setQuery:\n"));
|
||||||
|
|
||||||
m_filterTopDir = sdata->getTopdir();
|
m_filterTopDir = sdata->getTopdir();
|
||||||
delete m_ndb->decider;
|
deleteZ(m_ndb->decider);
|
||||||
m_ndb->decider = 0;
|
deleteZ(m_ndb->postfilter);
|
||||||
if (!m_filterTopDir.empty())
|
if (!m_filterTopDir.empty()) {
|
||||||
m_ndb->decider = new FilterMatcher(m_filterTopDir);
|
#if XAPIAN_FILTERING
|
||||||
|
m_ndb->decider =
|
||||||
|
#else
|
||||||
|
m_ndb->postfilter =
|
||||||
|
#endif
|
||||||
|
new FilterMatcher(m_filterTopDir);
|
||||||
|
}
|
||||||
m_dbindices.clear();
|
m_dbindices.clear();
|
||||||
m_qOpts = opts;
|
m_qOpts = opts;
|
||||||
m_ndb->m_termfreqs.clear();
|
m_ndb->m_termfreqs.clear();
|
||||||
|
FilterMatcher::m_cnt = 0;
|
||||||
Xapian::Query xq;
|
Xapian::Query xq;
|
||||||
if (!sdata->toNativeQuery(*this, &xq,
|
if (!sdata->toNativeQuery(*this, &xq,
|
||||||
(opts & Db::QO_STEM) ? stemlang : "")) {
|
(opts & Db::QO_STEM) ? stemlang : "")) {
|
||||||
@ -1745,10 +1780,12 @@ int Db::getResCnt()
|
|||||||
string ermsg;
|
string ermsg;
|
||||||
if (m_ndb->mset.size() <= 0) {
|
if (m_ndb->mset.size() <= 0) {
|
||||||
try {
|
try {
|
||||||
m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum);
|
m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum,
|
||||||
|
0, m_ndb->decider);
|
||||||
} catch (const Xapian::DatabaseModifiedError &error) {
|
} catch (const Xapian::DatabaseModifiedError &error) {
|
||||||
m_ndb->db.reopen();
|
m_ndb->db.reopen();
|
||||||
m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum);
|
m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum,
|
||||||
|
0, m_ndb->decider);
|
||||||
} XCATCHERROR(ermsg);
|
} XCATCHERROR(ermsg);
|
||||||
if (!ermsg.empty()) {
|
if (!ermsg.empty()) {
|
||||||
LOGERR(("enquire->get_mset: exception: %s\n", ermsg.c_str()));
|
LOGERR(("enquire->get_mset: exception: %s\n", ermsg.c_str()));
|
||||||
@ -1781,7 +1818,7 @@ bool Db::getDoc(int exti, Doc &doc, int *percent)
|
|||||||
}
|
}
|
||||||
|
|
||||||
int xapi;
|
int xapi;
|
||||||
if (m_ndb->decider) {
|
if (m_ndb->postfilter) {
|
||||||
// There is a postquery filter, does this fall in already known area ?
|
// There is a postquery filter, does this fall in already known area ?
|
||||||
if (exti >= (int)m_dbindices.size()) {
|
if (exti >= (int)m_dbindices.size()) {
|
||||||
// Have to fetch xapian docs and filter until we get
|
// Have to fetch xapian docs and filter until we get
|
||||||
@ -1812,7 +1849,7 @@ bool Db::getDoc(int exti, Doc &doc, int *percent)
|
|||||||
for (unsigned int i = 0; i < m_ndb->mset.size() ; i++) {
|
for (unsigned int i = 0; i < m_ndb->mset.size() ; i++) {
|
||||||
LOGDEB(("Db::getDoc: [%d]\n", i));
|
LOGDEB(("Db::getDoc: [%d]\n", i));
|
||||||
Xapian::Document xdoc = m_ndb->mset[i].get_document();
|
Xapian::Document xdoc = m_ndb->mset[i].get_document();
|
||||||
if ((*m_ndb->decider)(xdoc)) {
|
if ((*m_ndb->postfilter)(xdoc)) {
|
||||||
m_dbindices.push_back(first + i);
|
m_dbindices.push_back(first + i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1831,10 +1868,13 @@ bool Db::getDoc(int exti, Doc &doc, int *percent)
|
|||||||
if (!(xapi >= first && xapi <= last)) {
|
if (!(xapi >= first && xapi <= last)) {
|
||||||
LOGDEB(("Fetching for first %d, count %d\n", xapi, qquantum));
|
LOGDEB(("Fetching for first %d, count %d\n", xapi, qquantum));
|
||||||
try {
|
try {
|
||||||
m_ndb->mset = m_ndb->enquire->get_mset(xapi, qquantum);
|
m_ndb->mset = m_ndb->enquire->get_mset(xapi, qquantum,
|
||||||
|
0, m_ndb->decider);
|
||||||
} catch (const Xapian::DatabaseModifiedError &error) {
|
} catch (const Xapian::DatabaseModifiedError &error) {
|
||||||
m_ndb->db.reopen();
|
m_ndb->db.reopen();
|
||||||
m_ndb->mset = m_ndb->enquire->get_mset(xapi, qquantum);
|
m_ndb->mset = m_ndb->enquire->get_mset(xapi, qquantum,
|
||||||
|
0, m_ndb->decider);
|
||||||
|
|
||||||
} catch (const Xapian::Error & error) {
|
} catch (const Xapian::Error & error) {
|
||||||
LOGERR(("enquire->get_mset: exception: %s\n",
|
LOGERR(("enquire->get_mset: exception: %s\n",
|
||||||
error.get_msg().c_str()));
|
error.get_msg().c_str()));
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user