simplified class structure

This commit is contained in:
dockes 2006-04-05 06:26:56 +00:00
parent 14194e30d8
commit daa37c68f7
2 changed files with 113 additions and 126 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.59 2006-03-29 11:18:14 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.60 2006-04-05 06:26:56 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -52,7 +52,9 @@ using namespace std;
#ifndef MIN #ifndef MIN
#define MIN(A,B) (A<B?A:B) #define MIN(A,B) (A<B?A:B)
#endif #endif
#ifndef NO_NAMESPACES
namespace Rcl {
#endif
// This is how long an abstract we keep or build from beginning of text when // This is how long an abstract we keep or build from beginning of text when
// indexing. It only has an influence on the size of the db as we are free // indexing. It only has an influence on the size of the db as we are free
// to shorten it again when displaying // to shorten it again when displaying
@ -81,11 +83,11 @@ class Native {
Xapian::Database db; Xapian::Database db;
Xapian::Query query; // query descriptor: terms and subqueries Xapian::Query query; // query descriptor: terms and subqueries
// joined by operators (or/and etc...) // joined by operators (or/and etc...)
Xapian::Enquire *enquire; Xapian::Enquire *enquire; // Open query descriptor.
Xapian::MSet mset; Xapian::MSet mset; // Partial result set
string makeAbstract(Xapian::docid id, const list<string>& terms); string makeAbstract(Xapian::docid id, const list<string>& terms);
bool dbDataToRclDoc(std::string &data, Rcl::Doc &doc, bool dbDataToRclDoc(std::string &data, Doc &doc,
int qopts, int qopts,
Xapian::docid docid, Xapian::docid docid,
const list<string>& terms); const list<string>& terms);
@ -94,27 +96,39 @@ class Native {
~Native() { ~Native() {
delete enquire; delete enquire;
} }
bool filterMatch(Db *rdb, Xapian::Document &xdoc) {
// Parse xapian document's data and populate doc fields
string data = xdoc.get_data();
ConfSimple parms(&data);
// The only filtering for now is on file path (subtree)
string url;
parms.get(string("url"), url);
url = url.substr(7);
if (url.find(rdb->m_asdata.topdir) == 0)
return true;
return false;
}
}; };
Rcl::Db::Db() Db::Db()
{ {
pdata = new Native; ndb = new Native;
m_qOpts = 0; m_qOpts = 0;
} }
Rcl::Db::~Db() Db::~Db()
{ {
LOGDEB1(("Rcl::Db::~Db\n")); LOGDEB1(("Db::~Db\n"));
if (pdata == 0) if (ndb == 0)
return; return;
Native *ndb = (Native *)pdata;
LOGDEB(("Db::~Db: isopen %d iswritable %d\n", ndb->isopen, LOGDEB(("Db::~Db: isopen %d iswritable %d\n", ndb->isopen,
ndb->iswritable)); ndb->iswritable));
if (ndb->isopen == false) if (ndb->isopen == false)
return; return;
const char *ermsg = "Unknown error"; const char *ermsg = "Unknown error";
try { try {
LOGDEB(("Rcl::Db::~Db: closing native database\n")); LOGDEB(("Db::~Db: closing native database\n"));
if (ndb->iswritable == true) { if (ndb->iswritable == true) {
ndb->wdb.flush(); ndb->wdb.flush();
} }
@ -129,20 +143,19 @@ Rcl::Db::~Db()
} catch (...) { } catch (...) {
ermsg = "Caught unknown exception"; ermsg = "Caught unknown exception";
} }
LOGERR(("Rcl::Db::~Db: got exception: %s\n", ermsg)); LOGERR(("Db::~Db: got exception: %s\n", ermsg));
} }
bool Rcl::Db::open(const string& dir, OpenMode mode, int qops) bool Db::open(const string& dir, OpenMode mode, int qops)
{ {
if (pdata == 0) if (ndb == 0)
return false; return false;
Native *ndb = (Native *)pdata;
LOGDEB(("Db::open: isopen %d iswritable %d\n", ndb->isopen, LOGDEB(("Db::open: isopen %d iswritable %d\n", ndb->isopen,
ndb->iswritable)); ndb->iswritable));
m_qOpts = qops; m_qOpts = qops;
if (ndb->isopen) { if (ndb->isopen) {
LOGERR(("Rcl::Db::open: already open\n")); LOGERR(("Db::open: already open\n"));
return false; return false;
} }
const char *ermsg = "Unknown"; const char *ermsg = "Unknown";
@ -154,7 +167,7 @@ bool Rcl::Db::open(const string& dir, OpenMode mode, int qops)
int action = (mode == DbUpd) ? Xapian::DB_CREATE_OR_OPEN : int action = (mode == DbUpd) ? Xapian::DB_CREATE_OR_OPEN :
Xapian::DB_CREATE_OR_OVERWRITE; Xapian::DB_CREATE_OR_OVERWRITE;
ndb->wdb = Xapian::WritableDatabase(dir, action); ndb->wdb = Xapian::WritableDatabase(dir, action);
LOGDEB(("Rcl::Db::open: lastdocid: %d\n", LOGDEB(("Db::open: lastdocid: %d\n",
ndb->wdb.get_lastdocid())); ndb->wdb.get_lastdocid()));
ndb->updated.resize(ndb->wdb.get_lastdocid() + 1); ndb->updated.resize(ndb->wdb.get_lastdocid() + 1);
for (unsigned int i = 0; i < ndb->updated.size(); i++) for (unsigned int i = 0; i < ndb->updated.size(); i++)
@ -180,17 +193,16 @@ bool Rcl::Db::open(const string& dir, OpenMode mode, int qops)
} catch (...) { } catch (...) {
ermsg = "Caught unknown exception"; ermsg = "Caught unknown exception";
} }
LOGERR(("Rcl::Db::open: exception while opening [%s]: %s\n", LOGERR(("Db::open: exception while opening [%s]: %s\n",
dir.c_str(), ermsg)); dir.c_str(), ermsg));
return false; return false;
} }
// Note: xapian has no close call, we delete and recreate the db // Note: xapian has no close call, we delete and recreate the db
bool Rcl::Db::close() bool Db::close()
{ {
if (pdata == 0) if (ndb == 0)
return false; return false;
Native *ndb = (Native *)pdata;
LOGDEB(("Db::close(): isopen %d iswritable %d\n", ndb->isopen, LOGDEB(("Db::close(): isopen %d iswritable %d\n", ndb->isopen,
ndb->iswritable)); ndb->iswritable));
if (ndb->isopen == false) if (ndb->isopen == false)
@ -202,8 +214,8 @@ bool Rcl::Db::close()
LOGDEB(("Rcl:Db: Called xapian flush\n")); LOGDEB(("Rcl:Db: Called xapian flush\n"));
} }
delete ndb; delete ndb;
pdata = new Native; ndb = new Native;
if (pdata) if (ndb)
return true; return true;
} catch (const Xapian::Error &e) { } catch (const Xapian::Error &e) {
ermsg = e.get_msg().c_str(); ermsg = e.get_msg().c_str();
@ -214,15 +226,14 @@ bool Rcl::Db::close()
} catch (...) { } catch (...) {
ermsg = "Caught unknown exception"; ermsg = "Caught unknown exception";
} }
LOGERR(("Rcl::Db:close: exception while deleting db: %s\n", ermsg)); LOGERR(("Db:close: exception while deleting db: %s\n", ermsg));
return false; return false;
} }
bool Rcl::Db::isopen() bool Db::isopen()
{ {
if (pdata == 0) if (ndb == 0)
return false; return false;
Native *ndb = (Native *)pdata;
return ndb->isopen; return ndb->isopen;
} }
@ -261,7 +272,7 @@ bool mySplitterCB::takeword(const std::string &term, int pos, int, int)
} catch (...) { } catch (...) {
ermsg= "Unknown error"; ermsg= "Unknown error";
} }
LOGERR(("Rcl::Db: xapian add_posting error %s\n", ermsg)); LOGERR(("Db: xapian add_posting error %s\n", ermsg));
return false; return false;
} }
@ -271,7 +282,7 @@ bool mySplitterCB::takeword(const std::string &term, int pos, int, int)
// //
// Note that we always return true (but set out to "" on error). We don't // Note that we always return true (but set out to "" on error). We don't
// want to stop indexation because of a bad string // want to stop indexation because of a bad string
bool Rcl::dumb_string(const string &in, string &out) bool dumb_string(const string &in, string &out)
{ {
out.erase(); out.erase();
if (in.empty()) if (in.empty())
@ -357,15 +368,14 @@ const static string rclSyntAbs = "?!#@";
// the title abstract and body and add special terms for file name, // the title abstract and body and add special terms for file name,
// date, mime type ... , create the document data record (more // date, mime type ... , create the document data record (more
// metadata), and update database // metadata), and update database
bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc, bool Db::add(const string &fn, const Doc &idoc,
const struct stat *stp) const struct stat *stp)
{ {
LOGDEB1(("Rcl::Db::add: fn %s\n", fn.c_str())); LOGDEB1(("Db::add: fn %s\n", fn.c_str()));
if (pdata == 0) if (ndb == 0)
return false; return false;
Native *ndb = (Native *)pdata;
Rcl::Doc doc = idoc; Doc doc = idoc;
// Truncate abstract, title and keywords to reasonable lengths. If // Truncate abstract, title and keywords to reasonable lengths. If
// abstract is currently empty, we make up one with the beginning // abstract is currently empty, we make up one with the beginning
@ -397,7 +407,7 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
// Split and index title // Split and index title
if (!dumb_string(doc.title, noacc)) { if (!dumb_string(doc.title, noacc)) {
LOGERR(("Rcl::Db::add: dumb_string failed\n")); LOGERR(("Db::add: dumb_string failed\n"));
return false; return false;
} }
splitter.text_to_words(noacc); splitter.text_to_words(noacc);
@ -405,7 +415,7 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
// Split and index body // Split and index body
if (!dumb_string(doc.text, noacc)) { if (!dumb_string(doc.text, noacc)) {
LOGERR(("Rcl::Db::add: dumb_string failed\n")); LOGERR(("Db::add: dumb_string failed\n"));
return false; return false;
} }
splitter.text_to_words(noacc); splitter.text_to_words(noacc);
@ -413,7 +423,7 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
// Split and index keywords // Split and index keywords
if (!dumb_string(doc.keywords, noacc)) { if (!dumb_string(doc.keywords, noacc)) {
LOGERR(("Rcl::Db::add: dumb_string failed\n")); LOGERR(("Db::add: dumb_string failed\n"));
return false; return false;
} }
splitter.text_to_words(noacc); splitter.text_to_words(noacc);
@ -421,7 +431,7 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
// Split and index abstract // Split and index abstract
if (!dumb_string(doc.abstract, noacc)) { if (!dumb_string(doc.abstract, noacc)) {
LOGERR(("Rcl::Db::add: dumb_string failed\n")); LOGERR(("Db::add: dumb_string failed\n"));
return false; return false;
} }
splitter.text_to_words(noacc); splitter.text_to_words(noacc);
@ -434,7 +444,7 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
// Path name term. This is used for existence/uptodate checks // Path name term. This is used for existence/uptodate checks
string hash; string hash;
pathHash(fn, hash, PATHHASHLEN); pathHash(fn, hash, PATHHASHLEN);
LOGDEB2(("Rcl::Db::add: pathhash [%s]\n", hash.c_str())); LOGDEB2(("Db::add: pathhash [%s]\n", hash.c_str()));
string pathterm = "P" + hash; string pathterm = "P" + hash;
newdocument.add_term(pathterm); newdocument.add_term(pathterm);
@ -507,20 +517,20 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
newdocument); newdocument);
if (did < ndb->updated.size()) { if (did < ndb->updated.size()) {
ndb->updated[did] = true; ndb->updated[did] = true;
LOGDEB(("Rcl::Db::add: docid %d updated [%s , %s]\n", did, fnc, LOGDEB(("Db::add: docid %d updated [%s , %s]\n", did, fnc,
doc.ipath.c_str())); doc.ipath.c_str()));
} else { } else {
LOGDEB(("Rcl::Db::add: docid %d added [%s , %s]\n", did, fnc, LOGDEB(("Db::add: docid %d added [%s , %s]\n", did, fnc,
doc.ipath.c_str())); doc.ipath.c_str()));
} }
} catch (...) { } catch (...) {
// FIXME: is this ever actually needed? // FIXME: is this ever actually needed?
try { try {
ndb->wdb.add_document(newdocument); ndb->wdb.add_document(newdocument);
LOGDEB(("Rcl::Db::add: %s added (failed re-seek for duplicate)\n", LOGDEB(("Db::add: %s added (failed re-seek for duplicate)\n",
fnc)); fnc));
} catch (...) { } catch (...) {
LOGERR(("Rcl::Db::add: failed again after replace_document\n")); LOGERR(("Db::add: failed again after replace_document\n"));
return false; return false;
} }
} }
@ -528,11 +538,10 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
} }
// Test if given filename has changed since last indexed: // Test if given filename has changed since last indexed:
bool Rcl::Db::needUpdate(const string &filename, const struct stat *stp) bool Db::needUpdate(const string &filename, const struct stat *stp)
{ {
if (pdata == 0) if (ndb == 0)
return false; return false;
Native *ndb = (Native *)pdata;
// If no document exist with this path, we do need update // If no document exist with this path, we do need update
string hash; string hash;
@ -615,12 +624,11 @@ p_notlowerorutf(unsigned int c)
/** /**
* Delete stem db for given language * Delete stem db for given language
*/ */
bool Rcl::Db::deleteStemDb(const string& lang) bool Db::deleteStemDb(const string& lang)
{ {
LOGDEB(("Rcl::Db::deleteStemDb(%s)\n", lang.c_str())); LOGDEB(("Db::deleteStemDb(%s)\n", lang.c_str()));
if (pdata == 0) if (ndb == 0)
return false; return false;
Native *ndb = (Native *)pdata;
if (ndb->isopen == false) if (ndb->isopen == false)
return false; return false;
@ -636,12 +644,11 @@ bool Rcl::Db::deleteStemDb(const string& lang)
* with documents indexed by a single term (the stem), and with the list of * with documents indexed by a single term (the stem), and with the list of
* parent terms in the document data. * parent terms in the document data.
*/ */
bool Rcl::Db::createStemDb(const string& lang) bool Db::createStemDb(const string& lang)
{ {
LOGDEB(("Rcl::Db::createStemDb(%s)\n", lang.c_str())); LOGDEB(("Db::createStemDb(%s)\n", lang.c_str()));
if (pdata == 0) if (ndb == 0)
return false; return false;
Native *ndb = (Native *)pdata;
if (ndb->isopen == false) if (ndb->isopen == false)
return false; return false;
@ -719,7 +726,7 @@ bool Rcl::Db::createStemDb(const string& lang)
ermsg = "Caught unknown exception"; ermsg = "Caught unknown exception";
} }
if (ermsg != "NOERROR") { if (ermsg != "NOERROR") {
LOGERR(("Rcl::Db::createstemdb: exception while opening [%s]: %s\n", LOGERR(("Db::createstemdb: exception while opening [%s]: %s\n",
stemdbdir.c_str(), ermsg)); stemdbdir.c_str(), ermsg));
return false; return false;
} }
@ -754,7 +761,7 @@ bool Rcl::Db::createStemDb(const string& lang)
try { try {
sdb.replace_document(stem, newdocument); sdb.replace_document(stem, newdocument);
} catch (...) { } catch (...) {
LOGERR(("Rcl::Db::createstemdb: replace failed\n")); LOGERR(("Db::createstemdb: replace failed\n"));
return false; return false;
} }
} }
@ -770,13 +777,12 @@ bool Rcl::Db::createStemDb(const string& lang)
return true; return true;
} }
list<string> Rcl::Db::getStemLangs() list<string> Db::getStemLangs()
{ {
list<string> dirs; list<string> dirs;
LOGDEB(("Rcl::Db::getStemLang\n")); LOGDEB(("Db::getStemLang\n"));
if (pdata == 0) if (ndb == 0)
return dirs; return dirs;
Native *ndb = (Native *)pdata;
string pattern = stemdirstem + "*"; string pattern = stemdirstem + "*";
dirs = path_dirglob(ndb->basedir, pattern); dirs = path_dirglob(ndb->basedir, pattern);
for (list<string>::iterator it = dirs.begin(); it != dirs.end(); it++) { for (list<string>::iterator it = dirs.begin(); it != dirs.end(); it++) {
@ -792,13 +798,12 @@ list<string> Rcl::Db::getStemLangs()
* documents for files that are no longer there. We also build the * documents for files that are no longer there. We also build the
* stem database while we are at it. * stem database while we are at it.
*/ */
bool Rcl::Db::purge() bool Db::purge()
{ {
LOGDEB(("Rcl::Db::purge\n")); LOGDEB(("Db::purge\n"));
if (pdata == 0) if (ndb == 0)
return false; return false;
Native *ndb = (Native *)pdata; LOGDEB(("Db::purge: isopen %d iswritable %d\n", ndb->isopen,
LOGDEB(("Rcl::Db::purge: isopen %d iswritable %d\n", ndb->isopen,
ndb->iswritable)); ndb->iswritable));
if (ndb->isopen == false || ndb->iswritable == false) if (ndb->isopen == false || ndb->iswritable == false)
return false; return false;
@ -813,22 +818,22 @@ bool Rcl::Db::purge()
try { try {
ndb->wdb.flush(); ndb->wdb.flush();
} catch (...) { } catch (...) {
LOGDEB(("Rcl::Db::purge: 1st flush failed\n")); LOGDEB(("Db::purge: 1st flush failed\n"));
} }
for (Xapian::docid docid = 1; docid < ndb->updated.size(); ++docid) { for (Xapian::docid docid = 1; docid < ndb->updated.size(); ++docid) {
if (!ndb->updated[docid]) { if (!ndb->updated[docid]) {
try { try {
ndb->wdb.delete_document(docid); ndb->wdb.delete_document(docid);
LOGDEB(("Rcl::Db::purge: deleted document #%d\n", docid)); LOGDEB(("Db::purge: deleted document #%d\n", docid));
} catch (const Xapian::DocNotFoundError &) { } catch (const Xapian::DocNotFoundError &) {
LOGDEB(("Rcl::Db::purge: document #%d not found\n", docid)); LOGDEB(("Db::purge: document #%d not found\n", docid));
} }
} }
} }
try { try {
ndb->wdb.flush(); ndb->wdb.flush();
} catch (...) { } catch (...) {
LOGDEB(("Rcl::Db::purge: 2nd flush failed\n")); LOGDEB(("Db::purge: 2nd flush failed\n"));
} }
return true; return true;
} }
@ -849,7 +854,7 @@ static list<string> stemexpand(Native *ndb, string term, const string& lang)
LOGDEB1(("stemexpand: %s lastdocid: %d\n", LOGDEB1(("stemexpand: %s lastdocid: %d\n",
stemdbdir.c_str(), sdb.get_lastdocid())); stemdbdir.c_str(), sdb.get_lastdocid()));
if (!sdb.term_exists(stem)) { if (!sdb.term_exists(stem)) {
LOGDEB1(("Rcl::Db::stemexpand: no term for %s\n", stem.c_str())); LOGDEB1(("Db::stemexpand: no term for %s\n", stem.c_str()));
explist.push_back(term); explist.push_back(term);
return explist; return explist;
} }
@ -903,7 +908,7 @@ class wsQData : public TextSplitCB {
void dumball() { void dumball() {
for (vector<string>::iterator it=terms.begin(); it !=terms.end();it++){ for (vector<string>::iterator it=terms.begin(); it !=terms.end();it++){
string dumb; string dumb;
Rcl::dumb_string(*it, dumb); dumb_string(*it, dumb);
*it = dumb; *it = dumb;
} }
} }
@ -922,7 +927,7 @@ static void stringToXapianQueries(const string &iq,
const string& stemlang, const string& stemlang,
Native *ndb, Native *ndb,
list<Xapian::Query> &pqueries, list<Xapian::Query> &pqueries,
Rcl::Db::QueryOpts opts = Rcl::Db::QO_NONE) Db::QueryOpts opts = Db::QO_NONE)
{ {
string qstring = iq; string qstring = iq;
@ -965,9 +970,9 @@ static void stringToXapianQueries(const string &iq,
list<string> exp; list<string> exp;
string term1; string term1;
Rcl::dumb_string(term, term1); dumb_string(term, term1);
// Possibly perform stem compression/expansion // Possibly perform stem compression/expansion
if (!nostemexp && (opts & Rcl::Db::QO_STEM)) { if (!nostemexp && (opts & Db::QO_STEM)) {
exp = stemexpand(ndb, term1, stemlang); exp = stemexpand(ndb, term1, stemlang);
} else { } else {
exp.push_back(term1); exp.push_back(term1);
@ -991,12 +996,11 @@ static void stringToXapianQueries(const string &iq,
} }
// Prepare query out of simple query string // Prepare query out of simple query string
bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts, bool Db::setQuery(const std::string &iqstring, QueryOpts opts,
const string& stemlang) const string& stemlang)
{ {
LOGDEB(("Rcl::Db::setQuery: q: [%s], opts 0x%x, stemlang %s\n", LOGDEB(("Db::setQuery: q: [%s], opts 0x%x, stemlang %s\n",
iqstring.c_str(), (unsigned int)opts, stemlang.c_str())); iqstring.c_str(), (unsigned int)opts, stemlang.c_str()));
Native *ndb = (Native *)pdata;
if (!ndb) if (!ndb)
return false; return false;
m_asdata.erase(); m_asdata.erase();
@ -1013,10 +1017,10 @@ bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts,
} }
// Prepare query out of "advanced search" data // Prepare query out of "advanced search" data
bool Rcl::Db::setQuery(AdvSearchData &sdata, QueryOpts opts, bool Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
const string& stemlang) const string& stemlang)
{ {
LOGDEB(("Rcl::Db::setQuery: adv:\n")); LOGDEB(("Db::setQuery: adv:\n"));
LOGDEB((" allwords: %s\n", sdata.allwords.c_str())); LOGDEB((" allwords: %s\n", sdata.allwords.c_str()));
LOGDEB((" phrase: %s\n", sdata.phrase.c_str())); LOGDEB((" phrase: %s\n", sdata.phrase.c_str()));
LOGDEB((" orwords: %s\n", sdata.orwords.c_str())); LOGDEB((" orwords: %s\n", sdata.orwords.c_str()));
@ -1033,7 +1037,6 @@ bool Rcl::Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
m_asdata = sdata; m_asdata = sdata;
dbindices.clear(); dbindices.clear();
Native *ndb = (Native *)pdata;
if (!ndb) if (!ndb)
return false; return false;
list<Xapian::Query> pqueries; list<Xapian::Query> pqueries;
@ -1072,7 +1075,7 @@ bool Rcl::Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
} }
// Limit the match count // Limit the match count
if (names.size() > 1000) { if (names.size() > 1000) {
LOGERR(("Rcl::Db::SetQuery: too many matched file names\n")); LOGERR(("Db::SetQuery: too many matched file names\n"));
break; break;
} }
} }
@ -1155,13 +1158,12 @@ bool Rcl::Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
sdata.description = ndb->query.get_description(); sdata.description = ndb->query.get_description();
if (sdata.description.find("Xapian::Query") == 0) if (sdata.description.find("Xapian::Query") == 0)
sdata.description = sdata.description.substr(strlen("Xapian::Query")); sdata.description = sdata.description.substr(strlen("Xapian::Query"));
LOGDEB(("Rcl::Db::SetQuery: Q: %s\n", sdata.description.c_str())); LOGDEB(("Db::SetQuery: Q: %s\n", sdata.description.c_str()));
return true; return true;
} }
bool Rcl::Db::getQueryTerms(list<string>& terms) bool Db::getQueryTerms(list<string>& terms)
{ {
Native *ndb = (Native *)pdata;
if (!ndb) if (!ndb)
return false; return false;
@ -1176,11 +1178,10 @@ bool Rcl::Db::getQueryTerms(list<string>& terms)
static const int qquantum = 30; static const int qquantum = 30;
int Rcl::Db::getResCnt() int Db::getResCnt()
{ {
Native *ndb = (Native *)pdata;
if (!ndb || !ndb->enquire) { if (!ndb || !ndb->enquire) {
LOGERR(("Rcl::Db::getResCnt: no query opened\n")); LOGERR(("Db::getResCnt: no query opened\n"));
return -1; return -1;
} }
if (ndb->mset.size() <= 0) { if (ndb->mset.size() <= 0) {
@ -1202,28 +1203,15 @@ int Rcl::Db::getResCnt()
// This class (friend to RclDb) exists so that we can have functions that // This class (friend to RclDb) exists so that we can have functions that
// access private RclDb data and have Xapian-specific parameters (so that we // access private RclDb data and have Xapian-specific parameters (so that we
// don't want them to appear in the public rcldb.h). // don't want them to appear in the public rcldb.h).
class Rcl::DbPops { class DbPops {
public: public:
static bool filterMatch(Rcl::Db *rdb, Xapian::Document &xdoc) {
// Parse xapian document's data and populate doc fields
string data = xdoc.get_data();
ConfSimple parms(&data);
// The only filtering for now is on file path (subtree)
string url;
parms.get(string("url"), url);
url = url.substr(7);
if (url.find(rdb->m_asdata.topdir) == 0)
return true;
return false;
}
}; };
bool Native::dbDataToRclDoc(std::string &data, Rcl::Doc &doc, bool Native::dbDataToRclDoc(std::string &data, Doc &doc,
int qopts, int qopts,
Xapian::docid docid, const list<string>& terms) Xapian::docid docid, const list<string>& terms)
{ {
LOGDEB1(("Rcl::Db::dbDataToRclDoc: data: %s\n", data.c_str())); LOGDEB1(("Db::dbDataToRclDoc: data: %s\n", data.c_str()));
ConfSimple parms(&data); ConfSimple parms(&data);
if (!parms.ok()) if (!parms.ok())
return false; return false;
@ -1240,10 +1228,10 @@ bool Native::dbDataToRclDoc(std::string &data, Rcl::Doc &doc,
doc.abstract = doc.abstract.substr(rclSyntAbs.length()); doc.abstract = doc.abstract.substr(rclSyntAbs.length());
syntabs = true; syntabs = true;
} }
if ((qopts && Rcl::Db::QO_BUILD_ABSTRACT) && !terms.empty()) { if ((qopts && Db::QO_BUILD_ABSTRACT) && !terms.empty()) {
LOGDEB1(("dbDataToRclDoc:: building abstract from position data\n")); LOGDEB1(("dbDataToRclDoc:: building abstract from position data\n"));
if (doc.abstract.empty() || syntabs || if (doc.abstract.empty() || syntabs ||
(qopts & Rcl::Db::QO_REPLACE_ABSTRACT)) (qopts & Db::QO_REPLACE_ABSTRACT))
doc.abstract = makeAbstract(docid, terms); doc.abstract = makeAbstract(docid, terms);
} }
parms.get(string("ipath"), doc.ipath); parms.get(string("ipath"), doc.ipath);
@ -1261,12 +1249,11 @@ bool Native::dbDataToRclDoc(std::string &data, Rcl::Doc &doc,
// maintain a correspondance from the sequential external index // maintain a correspondance from the sequential external index
// sequence to the internal Xapian hole-y one (the holes being the documents // sequence to the internal Xapian hole-y one (the holes being the documents
// that dont match the filter). // that dont match the filter).
bool Rcl::Db::getDoc(int exti, Doc &doc, int *percent) bool Db::getDoc(int exti, Doc &doc, int *percent)
{ {
LOGDEB1(("Rcl::Db::getDoc: exti %d\n", exti)); LOGDEB1(("Db::getDoc: exti %d\n", exti));
Native *ndb = (Native *)pdata;
if (!ndb || !ndb->enquire) { if (!ndb || !ndb->enquire) {
LOGERR(("Rcl::Db::getDoc: no query opened\n")); LOGERR(("Db::getDoc: no query opened\n"));
return false; return false;
} }
@ -1285,7 +1272,7 @@ bool Rcl::Db::getDoc(int exti, Doc &doc, int *percent)
int first = dbindices.size() > 0 ? dbindices.back() + 1 : 0; int first = dbindices.size() > 0 ? dbindices.back() + 1 : 0;
// Loop until we get enough docs // Loop until we get enough docs
while (exti >= (int)dbindices.size()) { while (exti >= (int)dbindices.size()) {
LOGDEB(("Rcl::Db::getDoc: fetching %d starting at %d\n", LOGDEB(("Db::getDoc: fetching %d starting at %d\n",
qquantum, first)); qquantum, first));
try { try {
ndb->mset = ndb->enquire->get_mset(first, qquantum); ndb->mset = ndb->enquire->get_mset(first, qquantum);
@ -1299,14 +1286,14 @@ bool Rcl::Db::getDoc(int exti, Doc &doc, int *percent)
} }
if (ndb->mset.empty()) { if (ndb->mset.empty()) {
LOGDEB(("Rcl::Db::getDoc: got empty mset\n")); LOGDEB(("Db::getDoc: got empty mset\n"));
return false; return false;
} }
first = ndb->mset.get_firstitem(); first = ndb->mset.get_firstitem();
for (unsigned int i = 0; i < ndb->mset.size() ; i++) { for (unsigned int i = 0; i < ndb->mset.size() ; i++) {
LOGDEB(("Rcl::Db::getDoc: [%d]\n", i)); LOGDEB(("Db::getDoc: [%d]\n", i));
Xapian::Document xdoc = ndb->mset[i].get_document(); Xapian::Document xdoc = ndb->mset[i].get_document();
if (Rcl::DbPops::filterMatch(this, xdoc)) { if (ndb->filterMatch(this, xdoc)) {
dbindices.push_back(first + i); dbindices.push_back(first + i);
} }
} }
@ -1341,7 +1328,7 @@ bool Rcl::Db::getDoc(int exti, Doc &doc, int *percent)
last = first + ndb->mset.size() -1; last = first + ndb->mset.size() -1;
} }
LOGDEB1(("Rcl::Db::getDoc: Qry [%s] win [%d-%d] Estimated results: %d", LOGDEB1(("Db::getDoc: Qry [%s] win [%d-%d] Estimated results: %d",
ndb->query.get_description().c_str(), ndb->query.get_description().c_str(),
first, last, first, last,
ndb->mset.get_matches_lower_bound())); ndb->mset.get_matches_lower_bound()));
@ -1361,13 +1348,12 @@ bool Rcl::Db::getDoc(int exti, Doc &doc, int *percent)
// Retrieve document defined by file name and internal path. Very inefficient, // Retrieve document defined by file name and internal path. Very inefficient,
// used only for history display. We'd need to enter path+ipath terms in the // used only for history display. We'd need to enter path+ipath terms in the
// db if we wanted to make this more efficient. // db if we wanted to make this more efficient.
bool Rcl::Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc) bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
{ {
LOGDEB(("Rcl::Db:getDoc: [%s] (%d) [%s]\n", fn.c_str(), fn.length(), LOGDEB(("Db:getDoc: [%s] (%d) [%s]\n", fn.c_str(), fn.length(),
ipath.c_str())); ipath.c_str()));
if (pdata == 0) if (ndb == 0)
return false; return false;
Native *ndb = (Native *)pdata;
// Initialize what we can in any case. If this is history, caller // Initialize what we can in any case. If this is history, caller
// will make partial display in case of error // will make partial display in case of error
@ -1389,7 +1375,7 @@ bool Rcl::Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
// but indicate the error with pc = -1 // but indicate the error with pc = -1
if (*pc) if (*pc)
*pc = -1; *pc = -1;
LOGINFO(("Rcl::Db:getDoc: path inexistant: [%s] length %d\n", LOGINFO(("Db:getDoc: path inexistant: [%s] length %d\n",
pathterm.c_str(), pathterm.length())); pathterm.c_str(), pathterm.length()));
return true; return true;
} }
@ -1414,7 +1400,7 @@ bool Rcl::Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
ermsg = "Caught unknown exception"; ermsg = "Caught unknown exception";
} }
if (*ermsg) { if (*ermsg) {
LOGERR(("Rcl::Db::getDoc: %s\n", ermsg)); LOGERR(("Db::getDoc: %s\n", ermsg));
} }
return false; return false;
} }
@ -1537,3 +1523,6 @@ string Native::makeAbstract(Xapian::docid docid, const list<string>& terms)
LOGDEB(("Abtract: done in %d mS\n", chron.millis())); LOGDEB(("Abtract: done in %d mS\n", chron.millis()));
return abstract; return abstract;
} }
#ifndef NO_NAMESPACES
}
#endif

View File

@ -16,7 +16,7 @@
*/ */
#ifndef _DB_H_INCLUDED_ #ifndef _DB_H_INCLUDED_
#define _DB_H_INCLUDED_ #define _DB_H_INCLUDED_
/* @(#$Id: rcldb.h,v 1.27 2006-03-29 11:18:14 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: rcldb.h,v 1.28 2006-04-05 06:26:56 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string> #include <string>
#include <list> #include <list>
@ -124,8 +124,6 @@ class AdvSearchData {
} }
}; };
class DbPops;
/** /**
* Wrapper class for the native database. * Wrapper class for the native database.
*/ */
@ -176,14 +174,14 @@ class Db {
std::list<std::string> getStemLangs(); std::list<std::string> getStemLangs();
/** Things we don't want to have here. */ /** Things we don't want to have here. */
friend class Rcl::DbPops; friend class Native;
private: private:
AdvSearchData m_asdata; AdvSearchData m_asdata;
vector<int> dbindices; // In case there is a postq filter: sequence of vector<int> dbindices; // In case there is a postq filter: sequence of
// db indices that match // db indices that match
void *pdata; // Pointer to private data. We don't want db(ie Native *ndb; // Pointer to private data. We don't want db(ie
// xapian)-specific defs to show in here // xapian)-specific defs to show in here
unsigned int m_qOpts; unsigned int m_qOpts;