simplified class structure
This commit is contained in:
parent
14194e30d8
commit
daa37c68f7
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.59 2006-03-29 11:18:14 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.60 2006-04-05 06:26:56 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -52,7 +52,9 @@ using namespace std;
|
|||||||
#ifndef MIN
|
#ifndef MIN
|
||||||
#define MIN(A,B) (A<B?A:B)
|
#define MIN(A,B) (A<B?A:B)
|
||||||
#endif
|
#endif
|
||||||
|
#ifndef NO_NAMESPACES
|
||||||
|
namespace Rcl {
|
||||||
|
#endif
|
||||||
// This is how long an abstract we keep or build from beginning of text when
|
// This is how long an abstract we keep or build from beginning of text when
|
||||||
// indexing. It only has an influence on the size of the db as we are free
|
// indexing. It only has an influence on the size of the db as we are free
|
||||||
// to shorten it again when displaying
|
// to shorten it again when displaying
|
||||||
@ -81,11 +83,11 @@ class Native {
|
|||||||
Xapian::Database db;
|
Xapian::Database db;
|
||||||
Xapian::Query query; // query descriptor: terms and subqueries
|
Xapian::Query query; // query descriptor: terms and subqueries
|
||||||
// joined by operators (or/and etc...)
|
// joined by operators (or/and etc...)
|
||||||
Xapian::Enquire *enquire;
|
Xapian::Enquire *enquire; // Open query descriptor.
|
||||||
Xapian::MSet mset;
|
Xapian::MSet mset; // Partial result set
|
||||||
|
|
||||||
string makeAbstract(Xapian::docid id, const list<string>& terms);
|
string makeAbstract(Xapian::docid id, const list<string>& terms);
|
||||||
bool dbDataToRclDoc(std::string &data, Rcl::Doc &doc,
|
bool dbDataToRclDoc(std::string &data, Doc &doc,
|
||||||
int qopts,
|
int qopts,
|
||||||
Xapian::docid docid,
|
Xapian::docid docid,
|
||||||
const list<string>& terms);
|
const list<string>& terms);
|
||||||
@ -94,27 +96,39 @@ class Native {
|
|||||||
~Native() {
|
~Native() {
|
||||||
delete enquire;
|
delete enquire;
|
||||||
}
|
}
|
||||||
|
bool filterMatch(Db *rdb, Xapian::Document &xdoc) {
|
||||||
|
// Parse xapian document's data and populate doc fields
|
||||||
|
string data = xdoc.get_data();
|
||||||
|
ConfSimple parms(&data);
|
||||||
|
|
||||||
|
// The only filtering for now is on file path (subtree)
|
||||||
|
string url;
|
||||||
|
parms.get(string("url"), url);
|
||||||
|
url = url.substr(7);
|
||||||
|
if (url.find(rdb->m_asdata.topdir) == 0)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
Rcl::Db::Db()
|
Db::Db()
|
||||||
{
|
{
|
||||||
pdata = new Native;
|
ndb = new Native;
|
||||||
m_qOpts = 0;
|
m_qOpts = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
Rcl::Db::~Db()
|
Db::~Db()
|
||||||
{
|
{
|
||||||
LOGDEB1(("Rcl::Db::~Db\n"));
|
LOGDEB1(("Db::~Db\n"));
|
||||||
if (pdata == 0)
|
if (ndb == 0)
|
||||||
return;
|
return;
|
||||||
Native *ndb = (Native *)pdata;
|
|
||||||
LOGDEB(("Db::~Db: isopen %d iswritable %d\n", ndb->isopen,
|
LOGDEB(("Db::~Db: isopen %d iswritable %d\n", ndb->isopen,
|
||||||
ndb->iswritable));
|
ndb->iswritable));
|
||||||
if (ndb->isopen == false)
|
if (ndb->isopen == false)
|
||||||
return;
|
return;
|
||||||
const char *ermsg = "Unknown error";
|
const char *ermsg = "Unknown error";
|
||||||
try {
|
try {
|
||||||
LOGDEB(("Rcl::Db::~Db: closing native database\n"));
|
LOGDEB(("Db::~Db: closing native database\n"));
|
||||||
if (ndb->iswritable == true) {
|
if (ndb->iswritable == true) {
|
||||||
ndb->wdb.flush();
|
ndb->wdb.flush();
|
||||||
}
|
}
|
||||||
@ -129,20 +143,19 @@ Rcl::Db::~Db()
|
|||||||
} catch (...) {
|
} catch (...) {
|
||||||
ermsg = "Caught unknown exception";
|
ermsg = "Caught unknown exception";
|
||||||
}
|
}
|
||||||
LOGERR(("Rcl::Db::~Db: got exception: %s\n", ermsg));
|
LOGERR(("Db::~Db: got exception: %s\n", ermsg));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Rcl::Db::open(const string& dir, OpenMode mode, int qops)
|
bool Db::open(const string& dir, OpenMode mode, int qops)
|
||||||
{
|
{
|
||||||
if (pdata == 0)
|
if (ndb == 0)
|
||||||
return false;
|
return false;
|
||||||
Native *ndb = (Native *)pdata;
|
|
||||||
LOGDEB(("Db::open: isopen %d iswritable %d\n", ndb->isopen,
|
LOGDEB(("Db::open: isopen %d iswritable %d\n", ndb->isopen,
|
||||||
ndb->iswritable));
|
ndb->iswritable));
|
||||||
m_qOpts = qops;
|
m_qOpts = qops;
|
||||||
|
|
||||||
if (ndb->isopen) {
|
if (ndb->isopen) {
|
||||||
LOGERR(("Rcl::Db::open: already open\n"));
|
LOGERR(("Db::open: already open\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const char *ermsg = "Unknown";
|
const char *ermsg = "Unknown";
|
||||||
@ -154,7 +167,7 @@ bool Rcl::Db::open(const string& dir, OpenMode mode, int qops)
|
|||||||
int action = (mode == DbUpd) ? Xapian::DB_CREATE_OR_OPEN :
|
int action = (mode == DbUpd) ? Xapian::DB_CREATE_OR_OPEN :
|
||||||
Xapian::DB_CREATE_OR_OVERWRITE;
|
Xapian::DB_CREATE_OR_OVERWRITE;
|
||||||
ndb->wdb = Xapian::WritableDatabase(dir, action);
|
ndb->wdb = Xapian::WritableDatabase(dir, action);
|
||||||
LOGDEB(("Rcl::Db::open: lastdocid: %d\n",
|
LOGDEB(("Db::open: lastdocid: %d\n",
|
||||||
ndb->wdb.get_lastdocid()));
|
ndb->wdb.get_lastdocid()));
|
||||||
ndb->updated.resize(ndb->wdb.get_lastdocid() + 1);
|
ndb->updated.resize(ndb->wdb.get_lastdocid() + 1);
|
||||||
for (unsigned int i = 0; i < ndb->updated.size(); i++)
|
for (unsigned int i = 0; i < ndb->updated.size(); i++)
|
||||||
@ -180,17 +193,16 @@ bool Rcl::Db::open(const string& dir, OpenMode mode, int qops)
|
|||||||
} catch (...) {
|
} catch (...) {
|
||||||
ermsg = "Caught unknown exception";
|
ermsg = "Caught unknown exception";
|
||||||
}
|
}
|
||||||
LOGERR(("Rcl::Db::open: exception while opening [%s]: %s\n",
|
LOGERR(("Db::open: exception while opening [%s]: %s\n",
|
||||||
dir.c_str(), ermsg));
|
dir.c_str(), ermsg));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note: xapian has no close call, we delete and recreate the db
|
// Note: xapian has no close call, we delete and recreate the db
|
||||||
bool Rcl::Db::close()
|
bool Db::close()
|
||||||
{
|
{
|
||||||
if (pdata == 0)
|
if (ndb == 0)
|
||||||
return false;
|
return false;
|
||||||
Native *ndb = (Native *)pdata;
|
|
||||||
LOGDEB(("Db::close(): isopen %d iswritable %d\n", ndb->isopen,
|
LOGDEB(("Db::close(): isopen %d iswritable %d\n", ndb->isopen,
|
||||||
ndb->iswritable));
|
ndb->iswritable));
|
||||||
if (ndb->isopen == false)
|
if (ndb->isopen == false)
|
||||||
@ -202,8 +214,8 @@ bool Rcl::Db::close()
|
|||||||
LOGDEB(("Rcl:Db: Called xapian flush\n"));
|
LOGDEB(("Rcl:Db: Called xapian flush\n"));
|
||||||
}
|
}
|
||||||
delete ndb;
|
delete ndb;
|
||||||
pdata = new Native;
|
ndb = new Native;
|
||||||
if (pdata)
|
if (ndb)
|
||||||
return true;
|
return true;
|
||||||
} catch (const Xapian::Error &e) {
|
} catch (const Xapian::Error &e) {
|
||||||
ermsg = e.get_msg().c_str();
|
ermsg = e.get_msg().c_str();
|
||||||
@ -214,15 +226,14 @@ bool Rcl::Db::close()
|
|||||||
} catch (...) {
|
} catch (...) {
|
||||||
ermsg = "Caught unknown exception";
|
ermsg = "Caught unknown exception";
|
||||||
}
|
}
|
||||||
LOGERR(("Rcl::Db:close: exception while deleting db: %s\n", ermsg));
|
LOGERR(("Db:close: exception while deleting db: %s\n", ermsg));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Rcl::Db::isopen()
|
bool Db::isopen()
|
||||||
{
|
{
|
||||||
if (pdata == 0)
|
if (ndb == 0)
|
||||||
return false;
|
return false;
|
||||||
Native *ndb = (Native *)pdata;
|
|
||||||
return ndb->isopen;
|
return ndb->isopen;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -261,7 +272,7 @@ bool mySplitterCB::takeword(const std::string &term, int pos, int, int)
|
|||||||
} catch (...) {
|
} catch (...) {
|
||||||
ermsg= "Unknown error";
|
ermsg= "Unknown error";
|
||||||
}
|
}
|
||||||
LOGERR(("Rcl::Db: xapian add_posting error %s\n", ermsg));
|
LOGERR(("Db: xapian add_posting error %s\n", ermsg));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -271,7 +282,7 @@ bool mySplitterCB::takeword(const std::string &term, int pos, int, int)
|
|||||||
//
|
//
|
||||||
// Note that we always return true (but set out to "" on error). We don't
|
// Note that we always return true (but set out to "" on error). We don't
|
||||||
// want to stop indexation because of a bad string
|
// want to stop indexation because of a bad string
|
||||||
bool Rcl::dumb_string(const string &in, string &out)
|
bool dumb_string(const string &in, string &out)
|
||||||
{
|
{
|
||||||
out.erase();
|
out.erase();
|
||||||
if (in.empty())
|
if (in.empty())
|
||||||
@ -357,15 +368,14 @@ const static string rclSyntAbs = "?!#@";
|
|||||||
// the title abstract and body and add special terms for file name,
|
// the title abstract and body and add special terms for file name,
|
||||||
// date, mime type ... , create the document data record (more
|
// date, mime type ... , create the document data record (more
|
||||||
// metadata), and update database
|
// metadata), and update database
|
||||||
bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
|
bool Db::add(const string &fn, const Doc &idoc,
|
||||||
const struct stat *stp)
|
const struct stat *stp)
|
||||||
{
|
{
|
||||||
LOGDEB1(("Rcl::Db::add: fn %s\n", fn.c_str()));
|
LOGDEB1(("Db::add: fn %s\n", fn.c_str()));
|
||||||
if (pdata == 0)
|
if (ndb == 0)
|
||||||
return false;
|
return false;
|
||||||
Native *ndb = (Native *)pdata;
|
|
||||||
|
|
||||||
Rcl::Doc doc = idoc;
|
Doc doc = idoc;
|
||||||
|
|
||||||
// Truncate abstract, title and keywords to reasonable lengths. If
|
// Truncate abstract, title and keywords to reasonable lengths. If
|
||||||
// abstract is currently empty, we make up one with the beginning
|
// abstract is currently empty, we make up one with the beginning
|
||||||
@ -397,7 +407,7 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
|
|||||||
|
|
||||||
// Split and index title
|
// Split and index title
|
||||||
if (!dumb_string(doc.title, noacc)) {
|
if (!dumb_string(doc.title, noacc)) {
|
||||||
LOGERR(("Rcl::Db::add: dumb_string failed\n"));
|
LOGERR(("Db::add: dumb_string failed\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
splitter.text_to_words(noacc);
|
splitter.text_to_words(noacc);
|
||||||
@ -405,7 +415,7 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
|
|||||||
|
|
||||||
// Split and index body
|
// Split and index body
|
||||||
if (!dumb_string(doc.text, noacc)) {
|
if (!dumb_string(doc.text, noacc)) {
|
||||||
LOGERR(("Rcl::Db::add: dumb_string failed\n"));
|
LOGERR(("Db::add: dumb_string failed\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
splitter.text_to_words(noacc);
|
splitter.text_to_words(noacc);
|
||||||
@ -413,7 +423,7 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
|
|||||||
|
|
||||||
// Split and index keywords
|
// Split and index keywords
|
||||||
if (!dumb_string(doc.keywords, noacc)) {
|
if (!dumb_string(doc.keywords, noacc)) {
|
||||||
LOGERR(("Rcl::Db::add: dumb_string failed\n"));
|
LOGERR(("Db::add: dumb_string failed\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
splitter.text_to_words(noacc);
|
splitter.text_to_words(noacc);
|
||||||
@ -421,7 +431,7 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
|
|||||||
|
|
||||||
// Split and index abstract
|
// Split and index abstract
|
||||||
if (!dumb_string(doc.abstract, noacc)) {
|
if (!dumb_string(doc.abstract, noacc)) {
|
||||||
LOGERR(("Rcl::Db::add: dumb_string failed\n"));
|
LOGERR(("Db::add: dumb_string failed\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
splitter.text_to_words(noacc);
|
splitter.text_to_words(noacc);
|
||||||
@ -434,7 +444,7 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
|
|||||||
// Path name term. This is used for existence/uptodate checks
|
// Path name term. This is used for existence/uptodate checks
|
||||||
string hash;
|
string hash;
|
||||||
pathHash(fn, hash, PATHHASHLEN);
|
pathHash(fn, hash, PATHHASHLEN);
|
||||||
LOGDEB2(("Rcl::Db::add: pathhash [%s]\n", hash.c_str()));
|
LOGDEB2(("Db::add: pathhash [%s]\n", hash.c_str()));
|
||||||
string pathterm = "P" + hash;
|
string pathterm = "P" + hash;
|
||||||
newdocument.add_term(pathterm);
|
newdocument.add_term(pathterm);
|
||||||
|
|
||||||
@ -507,20 +517,20 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
|
|||||||
newdocument);
|
newdocument);
|
||||||
if (did < ndb->updated.size()) {
|
if (did < ndb->updated.size()) {
|
||||||
ndb->updated[did] = true;
|
ndb->updated[did] = true;
|
||||||
LOGDEB(("Rcl::Db::add: docid %d updated [%s , %s]\n", did, fnc,
|
LOGDEB(("Db::add: docid %d updated [%s , %s]\n", did, fnc,
|
||||||
doc.ipath.c_str()));
|
doc.ipath.c_str()));
|
||||||
} else {
|
} else {
|
||||||
LOGDEB(("Rcl::Db::add: docid %d added [%s , %s]\n", did, fnc,
|
LOGDEB(("Db::add: docid %d added [%s , %s]\n", did, fnc,
|
||||||
doc.ipath.c_str()));
|
doc.ipath.c_str()));
|
||||||
}
|
}
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
// FIXME: is this ever actually needed?
|
// FIXME: is this ever actually needed?
|
||||||
try {
|
try {
|
||||||
ndb->wdb.add_document(newdocument);
|
ndb->wdb.add_document(newdocument);
|
||||||
LOGDEB(("Rcl::Db::add: %s added (failed re-seek for duplicate)\n",
|
LOGDEB(("Db::add: %s added (failed re-seek for duplicate)\n",
|
||||||
fnc));
|
fnc));
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
LOGERR(("Rcl::Db::add: failed again after replace_document\n"));
|
LOGERR(("Db::add: failed again after replace_document\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -528,11 +538,10 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Test if given filename has changed since last indexed:
|
// Test if given filename has changed since last indexed:
|
||||||
bool Rcl::Db::needUpdate(const string &filename, const struct stat *stp)
|
bool Db::needUpdate(const string &filename, const struct stat *stp)
|
||||||
{
|
{
|
||||||
if (pdata == 0)
|
if (ndb == 0)
|
||||||
return false;
|
return false;
|
||||||
Native *ndb = (Native *)pdata;
|
|
||||||
|
|
||||||
// If no document exist with this path, we do need update
|
// If no document exist with this path, we do need update
|
||||||
string hash;
|
string hash;
|
||||||
@ -615,12 +624,11 @@ p_notlowerorutf(unsigned int c)
|
|||||||
/**
|
/**
|
||||||
* Delete stem db for given language
|
* Delete stem db for given language
|
||||||
*/
|
*/
|
||||||
bool Rcl::Db::deleteStemDb(const string& lang)
|
bool Db::deleteStemDb(const string& lang)
|
||||||
{
|
{
|
||||||
LOGDEB(("Rcl::Db::deleteStemDb(%s)\n", lang.c_str()));
|
LOGDEB(("Db::deleteStemDb(%s)\n", lang.c_str()));
|
||||||
if (pdata == 0)
|
if (ndb == 0)
|
||||||
return false;
|
return false;
|
||||||
Native *ndb = (Native *)pdata;
|
|
||||||
if (ndb->isopen == false)
|
if (ndb->isopen == false)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -636,12 +644,11 @@ bool Rcl::Db::deleteStemDb(const string& lang)
|
|||||||
* with documents indexed by a single term (the stem), and with the list of
|
* with documents indexed by a single term (the stem), and with the list of
|
||||||
* parent terms in the document data.
|
* parent terms in the document data.
|
||||||
*/
|
*/
|
||||||
bool Rcl::Db::createStemDb(const string& lang)
|
bool Db::createStemDb(const string& lang)
|
||||||
{
|
{
|
||||||
LOGDEB(("Rcl::Db::createStemDb(%s)\n", lang.c_str()));
|
LOGDEB(("Db::createStemDb(%s)\n", lang.c_str()));
|
||||||
if (pdata == 0)
|
if (ndb == 0)
|
||||||
return false;
|
return false;
|
||||||
Native *ndb = (Native *)pdata;
|
|
||||||
if (ndb->isopen == false)
|
if (ndb->isopen == false)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -719,7 +726,7 @@ bool Rcl::Db::createStemDb(const string& lang)
|
|||||||
ermsg = "Caught unknown exception";
|
ermsg = "Caught unknown exception";
|
||||||
}
|
}
|
||||||
if (ermsg != "NOERROR") {
|
if (ermsg != "NOERROR") {
|
||||||
LOGERR(("Rcl::Db::createstemdb: exception while opening [%s]: %s\n",
|
LOGERR(("Db::createstemdb: exception while opening [%s]: %s\n",
|
||||||
stemdbdir.c_str(), ermsg));
|
stemdbdir.c_str(), ermsg));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -754,7 +761,7 @@ bool Rcl::Db::createStemDb(const string& lang)
|
|||||||
try {
|
try {
|
||||||
sdb.replace_document(stem, newdocument);
|
sdb.replace_document(stem, newdocument);
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
LOGERR(("Rcl::Db::createstemdb: replace failed\n"));
|
LOGERR(("Db::createstemdb: replace failed\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -770,13 +777,12 @@ bool Rcl::Db::createStemDb(const string& lang)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
list<string> Rcl::Db::getStemLangs()
|
list<string> Db::getStemLangs()
|
||||||
{
|
{
|
||||||
list<string> dirs;
|
list<string> dirs;
|
||||||
LOGDEB(("Rcl::Db::getStemLang\n"));
|
LOGDEB(("Db::getStemLang\n"));
|
||||||
if (pdata == 0)
|
if (ndb == 0)
|
||||||
return dirs;
|
return dirs;
|
||||||
Native *ndb = (Native *)pdata;
|
|
||||||
string pattern = stemdirstem + "*";
|
string pattern = stemdirstem + "*";
|
||||||
dirs = path_dirglob(ndb->basedir, pattern);
|
dirs = path_dirglob(ndb->basedir, pattern);
|
||||||
for (list<string>::iterator it = dirs.begin(); it != dirs.end(); it++) {
|
for (list<string>::iterator it = dirs.begin(); it != dirs.end(); it++) {
|
||||||
@ -792,13 +798,12 @@ list<string> Rcl::Db::getStemLangs()
|
|||||||
* documents for files that are no longer there. We also build the
|
* documents for files that are no longer there. We also build the
|
||||||
* stem database while we are at it.
|
* stem database while we are at it.
|
||||||
*/
|
*/
|
||||||
bool Rcl::Db::purge()
|
bool Db::purge()
|
||||||
{
|
{
|
||||||
LOGDEB(("Rcl::Db::purge\n"));
|
LOGDEB(("Db::purge\n"));
|
||||||
if (pdata == 0)
|
if (ndb == 0)
|
||||||
return false;
|
return false;
|
||||||
Native *ndb = (Native *)pdata;
|
LOGDEB(("Db::purge: isopen %d iswritable %d\n", ndb->isopen,
|
||||||
LOGDEB(("Rcl::Db::purge: isopen %d iswritable %d\n", ndb->isopen,
|
|
||||||
ndb->iswritable));
|
ndb->iswritable));
|
||||||
if (ndb->isopen == false || ndb->iswritable == false)
|
if (ndb->isopen == false || ndb->iswritable == false)
|
||||||
return false;
|
return false;
|
||||||
@ -813,22 +818,22 @@ bool Rcl::Db::purge()
|
|||||||
try {
|
try {
|
||||||
ndb->wdb.flush();
|
ndb->wdb.flush();
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
LOGDEB(("Rcl::Db::purge: 1st flush failed\n"));
|
LOGDEB(("Db::purge: 1st flush failed\n"));
|
||||||
}
|
}
|
||||||
for (Xapian::docid docid = 1; docid < ndb->updated.size(); ++docid) {
|
for (Xapian::docid docid = 1; docid < ndb->updated.size(); ++docid) {
|
||||||
if (!ndb->updated[docid]) {
|
if (!ndb->updated[docid]) {
|
||||||
try {
|
try {
|
||||||
ndb->wdb.delete_document(docid);
|
ndb->wdb.delete_document(docid);
|
||||||
LOGDEB(("Rcl::Db::purge: deleted document #%d\n", docid));
|
LOGDEB(("Db::purge: deleted document #%d\n", docid));
|
||||||
} catch (const Xapian::DocNotFoundError &) {
|
} catch (const Xapian::DocNotFoundError &) {
|
||||||
LOGDEB(("Rcl::Db::purge: document #%d not found\n", docid));
|
LOGDEB(("Db::purge: document #%d not found\n", docid));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
ndb->wdb.flush();
|
ndb->wdb.flush();
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
LOGDEB(("Rcl::Db::purge: 2nd flush failed\n"));
|
LOGDEB(("Db::purge: 2nd flush failed\n"));
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -849,7 +854,7 @@ static list<string> stemexpand(Native *ndb, string term, const string& lang)
|
|||||||
LOGDEB1(("stemexpand: %s lastdocid: %d\n",
|
LOGDEB1(("stemexpand: %s lastdocid: %d\n",
|
||||||
stemdbdir.c_str(), sdb.get_lastdocid()));
|
stemdbdir.c_str(), sdb.get_lastdocid()));
|
||||||
if (!sdb.term_exists(stem)) {
|
if (!sdb.term_exists(stem)) {
|
||||||
LOGDEB1(("Rcl::Db::stemexpand: no term for %s\n", stem.c_str()));
|
LOGDEB1(("Db::stemexpand: no term for %s\n", stem.c_str()));
|
||||||
explist.push_back(term);
|
explist.push_back(term);
|
||||||
return explist;
|
return explist;
|
||||||
}
|
}
|
||||||
@ -903,7 +908,7 @@ class wsQData : public TextSplitCB {
|
|||||||
void dumball() {
|
void dumball() {
|
||||||
for (vector<string>::iterator it=terms.begin(); it !=terms.end();it++){
|
for (vector<string>::iterator it=terms.begin(); it !=terms.end();it++){
|
||||||
string dumb;
|
string dumb;
|
||||||
Rcl::dumb_string(*it, dumb);
|
dumb_string(*it, dumb);
|
||||||
*it = dumb;
|
*it = dumb;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -922,7 +927,7 @@ static void stringToXapianQueries(const string &iq,
|
|||||||
const string& stemlang,
|
const string& stemlang,
|
||||||
Native *ndb,
|
Native *ndb,
|
||||||
list<Xapian::Query> &pqueries,
|
list<Xapian::Query> &pqueries,
|
||||||
Rcl::Db::QueryOpts opts = Rcl::Db::QO_NONE)
|
Db::QueryOpts opts = Db::QO_NONE)
|
||||||
{
|
{
|
||||||
string qstring = iq;
|
string qstring = iq;
|
||||||
|
|
||||||
@ -965,9 +970,9 @@ static void stringToXapianQueries(const string &iq,
|
|||||||
|
|
||||||
list<string> exp;
|
list<string> exp;
|
||||||
string term1;
|
string term1;
|
||||||
Rcl::dumb_string(term, term1);
|
dumb_string(term, term1);
|
||||||
// Possibly perform stem compression/expansion
|
// Possibly perform stem compression/expansion
|
||||||
if (!nostemexp && (opts & Rcl::Db::QO_STEM)) {
|
if (!nostemexp && (opts & Db::QO_STEM)) {
|
||||||
exp = stemexpand(ndb, term1, stemlang);
|
exp = stemexpand(ndb, term1, stemlang);
|
||||||
} else {
|
} else {
|
||||||
exp.push_back(term1);
|
exp.push_back(term1);
|
||||||
@ -991,12 +996,11 @@ static void stringToXapianQueries(const string &iq,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Prepare query out of simple query string
|
// Prepare query out of simple query string
|
||||||
bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts,
|
bool Db::setQuery(const std::string &iqstring, QueryOpts opts,
|
||||||
const string& stemlang)
|
const string& stemlang)
|
||||||
{
|
{
|
||||||
LOGDEB(("Rcl::Db::setQuery: q: [%s], opts 0x%x, stemlang %s\n",
|
LOGDEB(("Db::setQuery: q: [%s], opts 0x%x, stemlang %s\n",
|
||||||
iqstring.c_str(), (unsigned int)opts, stemlang.c_str()));
|
iqstring.c_str(), (unsigned int)opts, stemlang.c_str()));
|
||||||
Native *ndb = (Native *)pdata;
|
|
||||||
if (!ndb)
|
if (!ndb)
|
||||||
return false;
|
return false;
|
||||||
m_asdata.erase();
|
m_asdata.erase();
|
||||||
@ -1013,10 +1017,10 @@ bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Prepare query out of "advanced search" data
|
// Prepare query out of "advanced search" data
|
||||||
bool Rcl::Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
|
bool Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
|
||||||
const string& stemlang)
|
const string& stemlang)
|
||||||
{
|
{
|
||||||
LOGDEB(("Rcl::Db::setQuery: adv:\n"));
|
LOGDEB(("Db::setQuery: adv:\n"));
|
||||||
LOGDEB((" allwords: %s\n", sdata.allwords.c_str()));
|
LOGDEB((" allwords: %s\n", sdata.allwords.c_str()));
|
||||||
LOGDEB((" phrase: %s\n", sdata.phrase.c_str()));
|
LOGDEB((" phrase: %s\n", sdata.phrase.c_str()));
|
||||||
LOGDEB((" orwords: %s\n", sdata.orwords.c_str()));
|
LOGDEB((" orwords: %s\n", sdata.orwords.c_str()));
|
||||||
@ -1033,7 +1037,6 @@ bool Rcl::Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
|
|||||||
m_asdata = sdata;
|
m_asdata = sdata;
|
||||||
dbindices.clear();
|
dbindices.clear();
|
||||||
|
|
||||||
Native *ndb = (Native *)pdata;
|
|
||||||
if (!ndb)
|
if (!ndb)
|
||||||
return false;
|
return false;
|
||||||
list<Xapian::Query> pqueries;
|
list<Xapian::Query> pqueries;
|
||||||
@ -1072,7 +1075,7 @@ bool Rcl::Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
|
|||||||
}
|
}
|
||||||
// Limit the match count
|
// Limit the match count
|
||||||
if (names.size() > 1000) {
|
if (names.size() > 1000) {
|
||||||
LOGERR(("Rcl::Db::SetQuery: too many matched file names\n"));
|
LOGERR(("Db::SetQuery: too many matched file names\n"));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1155,13 +1158,12 @@ bool Rcl::Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
|
|||||||
sdata.description = ndb->query.get_description();
|
sdata.description = ndb->query.get_description();
|
||||||
if (sdata.description.find("Xapian::Query") == 0)
|
if (sdata.description.find("Xapian::Query") == 0)
|
||||||
sdata.description = sdata.description.substr(strlen("Xapian::Query"));
|
sdata.description = sdata.description.substr(strlen("Xapian::Query"));
|
||||||
LOGDEB(("Rcl::Db::SetQuery: Q: %s\n", sdata.description.c_str()));
|
LOGDEB(("Db::SetQuery: Q: %s\n", sdata.description.c_str()));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Rcl::Db::getQueryTerms(list<string>& terms)
|
bool Db::getQueryTerms(list<string>& terms)
|
||||||
{
|
{
|
||||||
Native *ndb = (Native *)pdata;
|
|
||||||
if (!ndb)
|
if (!ndb)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -1176,11 +1178,10 @@ bool Rcl::Db::getQueryTerms(list<string>& terms)
|
|||||||
|
|
||||||
static const int qquantum = 30;
|
static const int qquantum = 30;
|
||||||
|
|
||||||
int Rcl::Db::getResCnt()
|
int Db::getResCnt()
|
||||||
{
|
{
|
||||||
Native *ndb = (Native *)pdata;
|
|
||||||
if (!ndb || !ndb->enquire) {
|
if (!ndb || !ndb->enquire) {
|
||||||
LOGERR(("Rcl::Db::getResCnt: no query opened\n"));
|
LOGERR(("Db::getResCnt: no query opened\n"));
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (ndb->mset.size() <= 0) {
|
if (ndb->mset.size() <= 0) {
|
||||||
@ -1202,28 +1203,15 @@ int Rcl::Db::getResCnt()
|
|||||||
// This class (friend to RclDb) exists so that we can have functions that
|
// This class (friend to RclDb) exists so that we can have functions that
|
||||||
// access private RclDb data and have Xapian-specific parameters (so that we
|
// access private RclDb data and have Xapian-specific parameters (so that we
|
||||||
// don't want them to appear in the public rcldb.h).
|
// don't want them to appear in the public rcldb.h).
|
||||||
class Rcl::DbPops {
|
class DbPops {
|
||||||
public:
|
public:
|
||||||
static bool filterMatch(Rcl::Db *rdb, Xapian::Document &xdoc) {
|
|
||||||
// Parse xapian document's data and populate doc fields
|
|
||||||
string data = xdoc.get_data();
|
|
||||||
ConfSimple parms(&data);
|
|
||||||
|
|
||||||
// The only filtering for now is on file path (subtree)
|
|
||||||
string url;
|
|
||||||
parms.get(string("url"), url);
|
|
||||||
url = url.substr(7);
|
|
||||||
if (url.find(rdb->m_asdata.topdir) == 0)
|
|
||||||
return true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
bool Native::dbDataToRclDoc(std::string &data, Rcl::Doc &doc,
|
bool Native::dbDataToRclDoc(std::string &data, Doc &doc,
|
||||||
int qopts,
|
int qopts,
|
||||||
Xapian::docid docid, const list<string>& terms)
|
Xapian::docid docid, const list<string>& terms)
|
||||||
{
|
{
|
||||||
LOGDEB1(("Rcl::Db::dbDataToRclDoc: data: %s\n", data.c_str()));
|
LOGDEB1(("Db::dbDataToRclDoc: data: %s\n", data.c_str()));
|
||||||
ConfSimple parms(&data);
|
ConfSimple parms(&data);
|
||||||
if (!parms.ok())
|
if (!parms.ok())
|
||||||
return false;
|
return false;
|
||||||
@ -1240,10 +1228,10 @@ bool Native::dbDataToRclDoc(std::string &data, Rcl::Doc &doc,
|
|||||||
doc.abstract = doc.abstract.substr(rclSyntAbs.length());
|
doc.abstract = doc.abstract.substr(rclSyntAbs.length());
|
||||||
syntabs = true;
|
syntabs = true;
|
||||||
}
|
}
|
||||||
if ((qopts && Rcl::Db::QO_BUILD_ABSTRACT) && !terms.empty()) {
|
if ((qopts && Db::QO_BUILD_ABSTRACT) && !terms.empty()) {
|
||||||
LOGDEB1(("dbDataToRclDoc:: building abstract from position data\n"));
|
LOGDEB1(("dbDataToRclDoc:: building abstract from position data\n"));
|
||||||
if (doc.abstract.empty() || syntabs ||
|
if (doc.abstract.empty() || syntabs ||
|
||||||
(qopts & Rcl::Db::QO_REPLACE_ABSTRACT))
|
(qopts & Db::QO_REPLACE_ABSTRACT))
|
||||||
doc.abstract = makeAbstract(docid, terms);
|
doc.abstract = makeAbstract(docid, terms);
|
||||||
}
|
}
|
||||||
parms.get(string("ipath"), doc.ipath);
|
parms.get(string("ipath"), doc.ipath);
|
||||||
@ -1261,12 +1249,11 @@ bool Native::dbDataToRclDoc(std::string &data, Rcl::Doc &doc,
|
|||||||
// maintain a correspondance from the sequential external index
|
// maintain a correspondance from the sequential external index
|
||||||
// sequence to the internal Xapian hole-y one (the holes being the documents
|
// sequence to the internal Xapian hole-y one (the holes being the documents
|
||||||
// that dont match the filter).
|
// that dont match the filter).
|
||||||
bool Rcl::Db::getDoc(int exti, Doc &doc, int *percent)
|
bool Db::getDoc(int exti, Doc &doc, int *percent)
|
||||||
{
|
{
|
||||||
LOGDEB1(("Rcl::Db::getDoc: exti %d\n", exti));
|
LOGDEB1(("Db::getDoc: exti %d\n", exti));
|
||||||
Native *ndb = (Native *)pdata;
|
|
||||||
if (!ndb || !ndb->enquire) {
|
if (!ndb || !ndb->enquire) {
|
||||||
LOGERR(("Rcl::Db::getDoc: no query opened\n"));
|
LOGERR(("Db::getDoc: no query opened\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1285,7 +1272,7 @@ bool Rcl::Db::getDoc(int exti, Doc &doc, int *percent)
|
|||||||
int first = dbindices.size() > 0 ? dbindices.back() + 1 : 0;
|
int first = dbindices.size() > 0 ? dbindices.back() + 1 : 0;
|
||||||
// Loop until we get enough docs
|
// Loop until we get enough docs
|
||||||
while (exti >= (int)dbindices.size()) {
|
while (exti >= (int)dbindices.size()) {
|
||||||
LOGDEB(("Rcl::Db::getDoc: fetching %d starting at %d\n",
|
LOGDEB(("Db::getDoc: fetching %d starting at %d\n",
|
||||||
qquantum, first));
|
qquantum, first));
|
||||||
try {
|
try {
|
||||||
ndb->mset = ndb->enquire->get_mset(first, qquantum);
|
ndb->mset = ndb->enquire->get_mset(first, qquantum);
|
||||||
@ -1299,14 +1286,14 @@ bool Rcl::Db::getDoc(int exti, Doc &doc, int *percent)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (ndb->mset.empty()) {
|
if (ndb->mset.empty()) {
|
||||||
LOGDEB(("Rcl::Db::getDoc: got empty mset\n"));
|
LOGDEB(("Db::getDoc: got empty mset\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
first = ndb->mset.get_firstitem();
|
first = ndb->mset.get_firstitem();
|
||||||
for (unsigned int i = 0; i < ndb->mset.size() ; i++) {
|
for (unsigned int i = 0; i < ndb->mset.size() ; i++) {
|
||||||
LOGDEB(("Rcl::Db::getDoc: [%d]\n", i));
|
LOGDEB(("Db::getDoc: [%d]\n", i));
|
||||||
Xapian::Document xdoc = ndb->mset[i].get_document();
|
Xapian::Document xdoc = ndb->mset[i].get_document();
|
||||||
if (Rcl::DbPops::filterMatch(this, xdoc)) {
|
if (ndb->filterMatch(this, xdoc)) {
|
||||||
dbindices.push_back(first + i);
|
dbindices.push_back(first + i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1341,7 +1328,7 @@ bool Rcl::Db::getDoc(int exti, Doc &doc, int *percent)
|
|||||||
last = first + ndb->mset.size() -1;
|
last = first + ndb->mset.size() -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOGDEB1(("Rcl::Db::getDoc: Qry [%s] win [%d-%d] Estimated results: %d",
|
LOGDEB1(("Db::getDoc: Qry [%s] win [%d-%d] Estimated results: %d",
|
||||||
ndb->query.get_description().c_str(),
|
ndb->query.get_description().c_str(),
|
||||||
first, last,
|
first, last,
|
||||||
ndb->mset.get_matches_lower_bound()));
|
ndb->mset.get_matches_lower_bound()));
|
||||||
@ -1361,13 +1348,12 @@ bool Rcl::Db::getDoc(int exti, Doc &doc, int *percent)
|
|||||||
// Retrieve document defined by file name and internal path. Very inefficient,
|
// Retrieve document defined by file name and internal path. Very inefficient,
|
||||||
// used only for history display. We'd need to enter path+ipath terms in the
|
// used only for history display. We'd need to enter path+ipath terms in the
|
||||||
// db if we wanted to make this more efficient.
|
// db if we wanted to make this more efficient.
|
||||||
bool Rcl::Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
|
bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
|
||||||
{
|
{
|
||||||
LOGDEB(("Rcl::Db:getDoc: [%s] (%d) [%s]\n", fn.c_str(), fn.length(),
|
LOGDEB(("Db:getDoc: [%s] (%d) [%s]\n", fn.c_str(), fn.length(),
|
||||||
ipath.c_str()));
|
ipath.c_str()));
|
||||||
if (pdata == 0)
|
if (ndb == 0)
|
||||||
return false;
|
return false;
|
||||||
Native *ndb = (Native *)pdata;
|
|
||||||
|
|
||||||
// Initialize what we can in any case. If this is history, caller
|
// Initialize what we can in any case. If this is history, caller
|
||||||
// will make partial display in case of error
|
// will make partial display in case of error
|
||||||
@ -1389,7 +1375,7 @@ bool Rcl::Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
|
|||||||
// but indicate the error with pc = -1
|
// but indicate the error with pc = -1
|
||||||
if (*pc)
|
if (*pc)
|
||||||
*pc = -1;
|
*pc = -1;
|
||||||
LOGINFO(("Rcl::Db:getDoc: path inexistant: [%s] length %d\n",
|
LOGINFO(("Db:getDoc: path inexistant: [%s] length %d\n",
|
||||||
pathterm.c_str(), pathterm.length()));
|
pathterm.c_str(), pathterm.length()));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -1414,7 +1400,7 @@ bool Rcl::Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
|
|||||||
ermsg = "Caught unknown exception";
|
ermsg = "Caught unknown exception";
|
||||||
}
|
}
|
||||||
if (*ermsg) {
|
if (*ermsg) {
|
||||||
LOGERR(("Rcl::Db::getDoc: %s\n", ermsg));
|
LOGERR(("Db::getDoc: %s\n", ermsg));
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -1537,3 +1523,6 @@ string Native::makeAbstract(Xapian::docid docid, const list<string>& terms)
|
|||||||
LOGDEB(("Abtract: done in %d mS\n", chron.millis()));
|
LOGDEB(("Abtract: done in %d mS\n", chron.millis()));
|
||||||
return abstract;
|
return abstract;
|
||||||
}
|
}
|
||||||
|
#ifndef NO_NAMESPACES
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _DB_H_INCLUDED_
|
#ifndef _DB_H_INCLUDED_
|
||||||
#define _DB_H_INCLUDED_
|
#define _DB_H_INCLUDED_
|
||||||
/* @(#$Id: rcldb.h,v 1.27 2006-03-29 11:18:14 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: rcldb.h,v 1.28 2006-04-05 06:26:56 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
#include <list>
|
||||||
@ -124,8 +124,6 @@ class AdvSearchData {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class DbPops;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wrapper class for the native database.
|
* Wrapper class for the native database.
|
||||||
*/
|
*/
|
||||||
@ -176,14 +174,14 @@ class Db {
|
|||||||
std::list<std::string> getStemLangs();
|
std::list<std::string> getStemLangs();
|
||||||
|
|
||||||
/** Things we don't want to have here. */
|
/** Things we don't want to have here. */
|
||||||
friend class Rcl::DbPops;
|
friend class Native;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
AdvSearchData m_asdata;
|
AdvSearchData m_asdata;
|
||||||
vector<int> dbindices; // In case there is a postq filter: sequence of
|
vector<int> dbindices; // In case there is a postq filter: sequence of
|
||||||
// db indices that match
|
// db indices that match
|
||||||
void *pdata; // Pointer to private data. We don't want db(ie
|
Native *ndb; // Pointer to private data. We don't want db(ie
|
||||||
// xapian)-specific defs to show in here
|
// xapian)-specific defs to show in here
|
||||||
unsigned int m_qOpts;
|
unsigned int m_qOpts;
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user