use a Xapian MatchDecider to filter on dir path

This commit is contained in:
dockes 2007-10-24 15:38:53 +00:00
parent f56d3849dd
commit 2b5593887f

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.124 2007-10-24 08:42:59 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.125 2007-10-24 15:38:53 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -104,7 +104,31 @@ class Native {
Xapian::Database db;
Xapian::Query query; // query descriptor: terms and subqueries
// joined by operators (or/and etc...)
Xapian::MatchDecider *decider;
// Filtering results on location. There are 2 possible approaches
// for this:
// - Set a "MatchDecider" to be used by Xapian during the query
// - Filter the results out of Xapian (this also uses a
// Xapian::MatchDecider object, but applied to the results by Recoll.
//
// The result filtering approach was the first implemented.
//
// The efficiency of both methods depend on the searches, so the code
// for both has been kept. A nice point for the Xapian approach is that
// the result count estimate are correct (they are wrong with
// the postfilter approach). It is also faster in some worst case scenarios
// so this now the default (but the post-filtering is faster in many common
// cases).
//
// Which is used is decided in SetQuery(), by setting either of
// the two following members. This in turn is controlled by a
// preprocessor directive.
#define XAPIAN_FILTERING 1
Xapian::MatchDecider *decider; // Xapian does the filtering
Xapian::MatchDecider *postfilter; // Result filtering done by Recoll
Xapian::Enquire *enquire; // Open query descriptor.
Xapian::MSet mset; // Partial result set
@ -113,11 +137,13 @@ class Native {
Native(Db *db)
: m_db(db),
m_isopen(false), m_iswritable(false), decider(0), enquire(0)
m_isopen(false), m_iswritable(false), decider(0), postfilter(0),
enquire(0)
{ }
~Native() {
delete decider;
delete postfilter;
delete enquire;
}
@ -145,7 +171,9 @@ public:
{}
virtual ~FilterMatcher() {}
virtual bool operator()(const Xapian::Document &xdoc) const {
virtual bool operator()(const Xapian::Document &xdoc) const
{
m_cnt++;
// Parse xapian document's data and populate doc fields
string data = xdoc.get_data();
ConfSimple parms(&data);
@ -156,17 +184,19 @@ public:
LOGDEB2(("FilterMatcher topdir [%s] url [%s]\n",
m_topdir.c_str(), url.c_str()));
if (url.find(m_topdir, 7) == 7) {
LOGDEB(("FilterMatcher: MATCH\n"));
LOGDEB2(("FilterMatcher: MATCH %d\n", m_cnt));
return true;
} else {
LOGDEB(("FilterMatcher: NO MATCH\n"));
LOGDEB2(("FilterMatcher: NO MATCH %d\n", m_cnt));
return false;
}
}
static int m_cnt;
private:
string m_topdir;
};
int FilterMatcher::m_cnt;
/* See comment in class declaration */
bool Native::subDocs(const string &hash, vector<Xapian::docid>& docids)
@ -664,8 +694,7 @@ bool Db::i_close(bool final)
if (w)
LOGDEB(("Rcl::Db:close: xapian will close. May take some time\n"));
// Used to do a flush here. Cant see why it should be necessary.
delete m_ndb;
m_ndb = 0;
deleteZ(m_ndb);
if (w)
LOGDEB(("Rcl::Db:close() xapian close done.\n"));
if (final) {
@ -1440,14 +1469,20 @@ bool Db::setQuery(RefCntr<SearchData> sdata, int opts,
LOGDEB(("Db::setQuery:\n"));
m_filterTopDir = sdata->getTopdir();
delete m_ndb->decider;
m_ndb->decider = 0;
if (!m_filterTopDir.empty())
m_ndb->decider = new FilterMatcher(m_filterTopDir);
deleteZ(m_ndb->decider);
deleteZ(m_ndb->postfilter);
if (!m_filterTopDir.empty()) {
#if XAPIAN_FILTERING
m_ndb->decider =
#else
m_ndb->postfilter =
#endif
new FilterMatcher(m_filterTopDir);
}
m_dbindices.clear();
m_qOpts = opts;
m_ndb->m_termfreqs.clear();
FilterMatcher::m_cnt = 0;
Xapian::Query xq;
if (!sdata->toNativeQuery(*this, &xq,
(opts & Db::QO_STEM) ? stemlang : "")) {
@ -1745,10 +1780,12 @@ int Db::getResCnt()
string ermsg;
if (m_ndb->mset.size() <= 0) {
try {
m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum);
m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum,
0, m_ndb->decider);
} catch (const Xapian::DatabaseModifiedError &error) {
m_ndb->db.reopen();
m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum);
m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum,
0, m_ndb->decider);
} XCATCHERROR(ermsg);
if (!ermsg.empty()) {
LOGERR(("enquire->get_mset: exception: %s\n", ermsg.c_str()));
@ -1781,7 +1818,7 @@ bool Db::getDoc(int exti, Doc &doc, int *percent)
}
int xapi;
if (m_ndb->decider) {
if (m_ndb->postfilter) {
// There is a postquery filter, does this fall in already known area ?
if (exti >= (int)m_dbindices.size()) {
// Have to fetch xapian docs and filter until we get
@ -1812,7 +1849,7 @@ bool Db::getDoc(int exti, Doc &doc, int *percent)
for (unsigned int i = 0; i < m_ndb->mset.size() ; i++) {
LOGDEB(("Db::getDoc: [%d]\n", i));
Xapian::Document xdoc = m_ndb->mset[i].get_document();
if ((*m_ndb->decider)(xdoc)) {
if ((*m_ndb->postfilter)(xdoc)) {
m_dbindices.push_back(first + i);
}
}
@ -1831,10 +1868,13 @@ bool Db::getDoc(int exti, Doc &doc, int *percent)
if (!(xapi >= first && xapi <= last)) {
LOGDEB(("Fetching for first %d, count %d\n", xapi, qquantum));
try {
m_ndb->mset = m_ndb->enquire->get_mset(xapi, qquantum);
m_ndb->mset = m_ndb->enquire->get_mset(xapi, qquantum,
0, m_ndb->decider);
} catch (const Xapian::DatabaseModifiedError &error) {
m_ndb->db.reopen();
m_ndb->mset = m_ndb->enquire->get_mset(xapi, qquantum);
m_ndb->mset = m_ndb->enquire->get_mset(xapi, qquantum,
0, m_ndb->decider);
} catch (const Xapian::Error & error) {
LOGERR(("enquire->get_mset: exception: %s\n",
error.get_msg().c_str()));