fix slowness in needUpdate by using Database instead of WritableDatabase
This commit is contained in:
parent
d8f8dd851e
commit
c808c9437f
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.82 2006-10-22 15:54:23 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.83 2006-10-24 09:28:30 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -53,6 +53,9 @@ using namespace std;
|
||||
#ifndef MIN
|
||||
#define MIN(A,B) (A<B?A:B)
|
||||
#endif
|
||||
|
||||
#undef MTIME_IN_VALUE
|
||||
|
||||
#ifndef NO_NAMESPACES
|
||||
namespace Rcl {
|
||||
#endif
|
||||
@ -76,15 +79,9 @@ class Native {
|
||||
Db *m_db;
|
||||
bool m_isopen;
|
||||
bool m_iswritable;
|
||||
Db::OpenMode m_mode;
|
||||
string m_basedir;
|
||||
|
||||
// List of directories for additional databases to query
|
||||
list<string> m_extraDbs;
|
||||
|
||||
// Indexing
|
||||
Xapian::WritableDatabase wdb;
|
||||
vector<bool> updated;
|
||||
|
||||
// Querying
|
||||
Xapian::Database db;
|
||||
@ -93,48 +90,34 @@ class Native {
|
||||
Xapian::Enquire *enquire; // Open query descriptor.
|
||||
Xapian::MSet mset; // Partial result set
|
||||
|
||||
Native(Db *db)
|
||||
: m_db(db),
|
||||
m_isopen(false), m_iswritable(false), enquire(0)
|
||||
{ }
|
||||
|
||||
~Native() {
|
||||
delete enquire;
|
||||
}
|
||||
|
||||
string makeAbstract(Xapian::docid id, const list<string>& terms);
|
||||
|
||||
bool dbDataToRclDoc(std::string &data, Doc &doc,
|
||||
int qopts,
|
||||
Xapian::docid docid,
|
||||
const list<string>& terms);
|
||||
|
||||
/** Compute list of subdocuments for a given path (given by hash) */
|
||||
bool subDocs(const string &hash, vector<Xapian::docid>& docids) {
|
||||
/** Compute list of subdocuments for a given path (given by hash)
|
||||
* We look for all Q terms beginning with the path/hash
|
||||
* As suggested by James Aylett, a better method would be to add
|
||||
* a single term (ie: XP/path/to/file) to all subdocs, then finding
|
||||
* them would be a simple matter of retrieving the posting list for the
|
||||
* term. There would still be a need for the current Qterm though, as a
|
||||
* unique term for replace_document, and for retrieving by
|
||||
* path/ipath (history)
|
||||
*/
|
||||
bool subDocs(const string &hash, vector<Xapian::docid>& docids);
|
||||
|
||||
docids.clear();
|
||||
string qterm = "Q"+ hash + "|";
|
||||
Xapian::Database db = m_iswritable ? wdb: db;
|
||||
Xapian::TermIterator it = db.allterms_begin();
|
||||
it.skip_to(qterm);
|
||||
string ermsg;
|
||||
try {
|
||||
for (;it != db.allterms_end(); it++) {
|
||||
// If current term does not begin with qterm or has
|
||||
// another |, not the same file
|
||||
if ((*it).find(qterm) != 0 ||
|
||||
(*it).find_last_of("|") != qterm.length() -1)
|
||||
break;
|
||||
docids.push_back(*(db.postlist_begin(*it)));
|
||||
}
|
||||
return true;
|
||||
} catch (const Xapian::Error &e) {
|
||||
ermsg = e.get_msg().c_str();
|
||||
} catch (...) {
|
||||
ermsg= "Unknown error";
|
||||
}
|
||||
LOGERR(("Rcl::Db::subDocs: %s\n", ermsg.c_str()));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
Native(Db *db)
|
||||
: m_db(db),
|
||||
m_isopen(false), m_iswritable(false), m_mode(Db::DbRO), enquire(0)
|
||||
{ }
|
||||
~Native() {
|
||||
delete enquire;
|
||||
}
|
||||
/** Keep this inline */
|
||||
bool filterMatch(Db *rdb, Xapian::Document &xdoc) {
|
||||
// Parse xapian document's data and populate doc fields
|
||||
string data = xdoc.get_data();
|
||||
@ -150,32 +133,50 @@ class Native {
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Perform stem expansion across all dbs configured for searching
|
||||
list<string> stemExpand(const string& lang,
|
||||
const string& term)
|
||||
{
|
||||
list<string> dirs = m_extraDbs;
|
||||
dirs.push_front(m_basedir);
|
||||
list<string> exp;
|
||||
for (list<string>::iterator it = dirs.begin();
|
||||
it != dirs.end(); it++) {
|
||||
list<string> more = StemDb::stemExpand(*it, lang, term);
|
||||
LOGDEB1(("Native::stemExpand: Got %d from %s\n",
|
||||
more.size(), it->c_str()));
|
||||
exp.splice(exp.end(), more);
|
||||
}
|
||||
exp.sort();
|
||||
exp.unique();
|
||||
LOGDEB1(("Native::stemExpand: final count %d \n", exp.size()));
|
||||
return exp;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/* See comment in class declaration */
|
||||
bool Native::subDocs(const string &hash, vector<Xapian::docid>& docids)
|
||||
{
|
||||
docids.clear();
|
||||
string qterm = "Q"+ hash + "|";
|
||||
string ermsg;
|
||||
|
||||
for (int tries = 0; tries < 2; tries++) {
|
||||
try {
|
||||
Xapian::TermIterator it = db.allterms_begin();
|
||||
it.skip_to(qterm);
|
||||
for (;it != db.allterms_end(); it++) {
|
||||
// If current term does not begin with qterm or has
|
||||
// another |, not the same file
|
||||
if ((*it).find(qterm) != 0 ||
|
||||
(*it).find_last_of("|") != qterm.length() -1)
|
||||
break;
|
||||
docids.push_back(*(db.postlist_begin(*it)));
|
||||
}
|
||||
return true;
|
||||
} catch (const Xapian::DatabaseModifiedError &e) {
|
||||
LOGDEB(("Db::subDocs: got modified error. reopen/retry\n"));
|
||||
// Can't use reOpen here, it would delete *me*
|
||||
db = Xapian::Database(m_db->m_basedir);
|
||||
} catch (const Xapian::Error &e) {
|
||||
ermsg = e.get_msg().c_str();
|
||||
break;
|
||||
} catch (...) {
|
||||
ermsg= "Unknown error";
|
||||
break;
|
||||
}
|
||||
}
|
||||
LOGERR(("Rcl::Db::subDocs: %s\n", ermsg.c_str()));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/* Rcl::Db methods ///////////////////////////////// */
|
||||
|
||||
Db::Db()
|
||||
: m_qOpts(QO_NONE), m_idxAbsTruncLen(250), m_synthAbsLen(250),
|
||||
m_synthAbsWordCtxLen(4)
|
||||
: m_ndb(0), m_qOpts(QO_NONE), m_idxAbsTruncLen(250), m_synthAbsLen(250),
|
||||
m_synthAbsWordCtxLen(4), m_mode(Db::DbRO)
|
||||
{
|
||||
m_ndb = new Native(this);
|
||||
}
|
||||
@ -187,14 +188,9 @@ Db::~Db()
|
||||
return;
|
||||
LOGDEB(("Db::~Db: isopen %d m_iswritable %d\n", m_ndb->m_isopen,
|
||||
m_ndb->m_iswritable));
|
||||
if (m_ndb->m_isopen == false)
|
||||
return;
|
||||
const char *ermsg = "Unknown error";
|
||||
try {
|
||||
LOGDEB(("Db::~Db: closing native database\n"));
|
||||
if (m_ndb->m_iswritable == true) {
|
||||
m_ndb->wdb.flush();
|
||||
}
|
||||
// Used to do a flush here, but doesnt seem necessary
|
||||
delete m_ndb;
|
||||
m_ndb = 0;
|
||||
return;
|
||||
@ -212,6 +208,9 @@ Db::~Db()
|
||||
|
||||
bool Db::open(const string& dir, OpenMode mode, int qops)
|
||||
{
|
||||
bool keep_updated = (qops & QO_KEEP_UPDATED) != 0;
|
||||
qops &= ~QO_KEEP_UPDATED;
|
||||
|
||||
if (m_ndb == 0)
|
||||
return false;
|
||||
LOGDEB(("Db::open: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen,
|
||||
@ -231,20 +230,28 @@ bool Db::open(const string& dir, OpenMode mode, int qops)
|
||||
int action = (mode == DbUpd) ? Xapian::DB_CREATE_OR_OPEN :
|
||||
Xapian::DB_CREATE_OR_OVERWRITE;
|
||||
m_ndb->wdb = Xapian::WritableDatabase(dir, action);
|
||||
m_ndb->m_iswritable = true;
|
||||
// We open a readonly object in addition to the r/w
|
||||
// one because some operations are faster when
|
||||
// performed through a Database (no forced flushes on
|
||||
// allterms_begin(), ie, used in subDocs()
|
||||
m_ndb->db = Xapian::Database(dir);
|
||||
LOGDEB(("Db::open: lastdocid: %d\n",
|
||||
m_ndb->wdb.get_lastdocid()));
|
||||
m_ndb->updated.resize(m_ndb->wdb.get_lastdocid() + 1);
|
||||
for (unsigned int i = 0; i < m_ndb->updated.size(); i++)
|
||||
m_ndb->updated[i] = false;
|
||||
m_ndb->m_iswritable = true;
|
||||
if (!keep_updated) {
|
||||
LOGDEB2(("Db::open: resetting updated\n"));
|
||||
updated.resize(m_ndb->wdb.get_lastdocid() + 1);
|
||||
for (unsigned int i = 0; i < updated.size(); i++)
|
||||
updated[i] = false;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case DbRO:
|
||||
default:
|
||||
m_ndb->m_iswritable = false;
|
||||
m_ndb->db = Xapian::Database(dir);
|
||||
for (list<string>::iterator it = m_ndb->m_extraDbs.begin();
|
||||
it != m_ndb->m_extraDbs.end(); it++) {
|
||||
for (list<string>::iterator it = m_extraDbs.begin();
|
||||
it != m_extraDbs.end(); it++) {
|
||||
string aerr;
|
||||
LOGDEB(("Db::Open: adding query db [%s]\n", it->c_str()));
|
||||
aerr.erase();
|
||||
@ -266,10 +273,9 @@ bool Db::open(const string& dir, OpenMode mode, int qops)
|
||||
}
|
||||
break;
|
||||
}
|
||||
m_qOpts = qops;
|
||||
m_ndb->m_mode = mode;
|
||||
m_mode = mode;
|
||||
m_ndb->m_isopen = true;
|
||||
m_ndb->m_basedir = dir;
|
||||
m_basedir = dir;
|
||||
return true;
|
||||
} catch (const Xapian::Error &e) {
|
||||
ermsg = e.get_msg().c_str();
|
||||
@ -287,9 +293,7 @@ bool Db::open(const string& dir, OpenMode mode, int qops)
|
||||
|
||||
string Db::getDbDir()
|
||||
{
|
||||
if (m_ndb == 0)
|
||||
return "";
|
||||
return m_ndb->m_basedir;
|
||||
return m_basedir;
|
||||
}
|
||||
|
||||
// Note: xapian has no close call, we delete and recreate the db
|
||||
@ -303,10 +307,7 @@ bool Db::close()
|
||||
return true;
|
||||
const char *ermsg = "Unknown";
|
||||
try {
|
||||
if (m_ndb->m_iswritable == true) {
|
||||
m_ndb->wdb.flush();
|
||||
LOGDEB(("Rcl:Db: Called xapian flush\n"));
|
||||
}
|
||||
// Used to do a flush here. Cant see why it should be necessary.
|
||||
delete m_ndb;
|
||||
m_ndb = new Native(this);
|
||||
if (m_ndb)
|
||||
@ -329,7 +330,7 @@ bool Db::reOpen()
|
||||
if (m_ndb && m_ndb->m_isopen) {
|
||||
if (!close())
|
||||
return false;
|
||||
if (!open(m_ndb->m_basedir, m_ndb->m_mode, m_qOpts)) {
|
||||
if (!open(m_basedir, m_mode, m_qOpts | QO_KEEP_UPDATED)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -353,9 +354,8 @@ bool Db::addQueryDb(const string &dir)
|
||||
return false;
|
||||
if (m_ndb->m_iswritable)
|
||||
return false;
|
||||
if (find(m_ndb->m_extraDbs.begin(), m_ndb->m_extraDbs.end(), dir) ==
|
||||
m_ndb->m_extraDbs.end()) {
|
||||
m_ndb->m_extraDbs.push_back(dir);
|
||||
if (find(m_extraDbs.begin(), m_extraDbs.end(), dir) == m_extraDbs.end()) {
|
||||
m_extraDbs.push_back(dir);
|
||||
}
|
||||
return reOpen();
|
||||
}
|
||||
@ -367,12 +367,12 @@ bool Db::rmQueryDb(const string &dir)
|
||||
if (m_ndb->m_iswritable)
|
||||
return false;
|
||||
if (dir.empty()) {
|
||||
m_ndb->m_extraDbs.clear();
|
||||
m_extraDbs.clear();
|
||||
} else {
|
||||
list<string>::iterator it = find(m_ndb->m_extraDbs.begin(),
|
||||
m_ndb->m_extraDbs.end(), dir);
|
||||
if (it != m_ndb->m_extraDbs.end()) {
|
||||
m_ndb->m_extraDbs.erase(it);
|
||||
list<string>::iterator it = find(m_extraDbs.begin(),
|
||||
m_extraDbs.end(), dir);
|
||||
if (it != m_extraDbs.end()) {
|
||||
m_extraDbs.erase(it);
|
||||
}
|
||||
}
|
||||
return reOpen();
|
||||
@ -600,6 +600,9 @@ bool Db::add(const string &fn, const Doc &idoc,
|
||||
string uniterm;
|
||||
if (doc.ipath.empty()) {
|
||||
uniterm = "P" + hash;
|
||||
#ifdef MTIME_IN_VALUE
|
||||
newdocument.add_value(1, doc.fmtime);
|
||||
#endif
|
||||
} else {
|
||||
uniterm = "Q" + hash + "|" + doc.ipath;
|
||||
}
|
||||
@ -655,8 +658,8 @@ bool Db::add(const string &fn, const Doc &idoc,
|
||||
try {
|
||||
Xapian::docid did =
|
||||
m_ndb->wdb.replace_document(uniterm, newdocument);
|
||||
if (did < m_ndb->updated.size()) {
|
||||
m_ndb->updated[did] = true;
|
||||
if (did < updated.size()) {
|
||||
updated[did] = true;
|
||||
LOGDEB(("Db::add: docid %d updated [%s , %s]\n", did, fnc,
|
||||
doc.ipath.c_str()));
|
||||
} else {
|
||||
@ -687,68 +690,82 @@ bool Db::needUpdate(const string &filename, const struct stat *stp)
|
||||
string hash;
|
||||
pathHash(filename, hash, PATHHASHLEN);
|
||||
string pterm = "P" + hash;
|
||||
const char *ermsg;
|
||||
string ermsg;
|
||||
|
||||
// Look for all documents with this path. We need to look at all
|
||||
// to set their existence flag. We check the update time on the
|
||||
// fmtime field which will be identical for all docs inside a
|
||||
// multi-document file (we currently always reindex all if the
|
||||
// file changed)
|
||||
Xapian::PostingIterator doc;
|
||||
try {
|
||||
// Check the date using the Pterm doc or pseudo-doc
|
||||
Xapian::PostingIterator docid = m_ndb->wdb.postlist_begin(pterm);
|
||||
if (docid == m_ndb->wdb.postlist_end(pterm)) {
|
||||
// If no document exist with this path, we do need update
|
||||
LOGDEB2(("Db::needUpdate: no such path: [%s]\n", pterm.c_str()));
|
||||
return true;
|
||||
}
|
||||
Xapian::Document doc = m_ndb->wdb.get_document(*docid);
|
||||
string data = doc.get_data();
|
||||
const char *cp = strstr(data.c_str(), "fmtime=");
|
||||
if (cp) {
|
||||
cp += 7;
|
||||
} else {
|
||||
cp = strstr(data.c_str(), "mtime=");
|
||||
if (cp)
|
||||
cp+= 6;
|
||||
}
|
||||
long mtime = cp ? atol(cp) : 0;
|
||||
if (mtime < stp->st_mtime) {
|
||||
LOGDEB2(("Db::needUpdate: yes: mtime: Db %ld file %ld\n",
|
||||
(long)mtime, (long)stp->st_mtime));
|
||||
// Db is not up to date. Let's index the file
|
||||
return true;
|
||||
}
|
||||
|
||||
LOGDEB2(("Db::needUpdate: uptodate: [%s]\n", pterm.c_str()));
|
||||
|
||||
// Up to date.
|
||||
|
||||
// Set the uptodate flag for doc / pseudo doc
|
||||
m_ndb->updated[*docid] = true;
|
||||
|
||||
// Set the existence flag for all the subdocs (if any)
|
||||
vector<Xapian::docid> docids;
|
||||
if (!m_ndb->subDocs(hash, docids)) {
|
||||
LOGERR(("Rcl::Db::needUpdate: can't get subdocs list\n"));
|
||||
return true;
|
||||
}
|
||||
for (vector<Xapian::docid>::iterator it = docids.begin();
|
||||
it != docids.end(); it++) {
|
||||
if (*it < m_ndb->updated.size()) {
|
||||
LOGDEB2(("Db::needUpdate: set flag for docid %d\n", *it));
|
||||
m_ndb->updated[*it] = true;
|
||||
for (int tries = 0; tries < 2; tries++) {
|
||||
try {
|
||||
// Check the date using the Pterm doc or pseudo-doc
|
||||
Xapian::PostingIterator docid = m_ndb->db.postlist_begin(pterm);
|
||||
if (docid == m_ndb->db.postlist_end(pterm)) {
|
||||
// If no document exist with this path, we do need update
|
||||
LOGDEB2(("Db::needUpdate: no path: [%s]\n", pterm.c_str()));
|
||||
return true;
|
||||
}
|
||||
Xapian::Document doc = m_ndb->db.get_document(*docid);
|
||||
#ifdef MTIME_IN_VALUE
|
||||
// This is slightly faster, but we'd need to setup a conversion
|
||||
// for old dbs, and it's not really worth it
|
||||
string value = doc.get_value(1);
|
||||
const char *cp = value.c_str();
|
||||
#else
|
||||
string data = doc.get_data();
|
||||
const char *cp = strstr(data.c_str(), "fmtime=");
|
||||
if (cp) {
|
||||
cp += 7;
|
||||
} else {
|
||||
cp = strstr(data.c_str(), "mtime=");
|
||||
if (cp)
|
||||
cp+= 6;
|
||||
}
|
||||
#endif
|
||||
long mtime = cp ? atol(cp) : 0;
|
||||
if (mtime < stp->st_mtime) {
|
||||
LOGDEB2(("Db::needUpdate: yes: mtime: Db %ld file %ld\n",
|
||||
(long)mtime, (long)stp->st_mtime));
|
||||
// Db is not up to date. Let's index the file
|
||||
return true;
|
||||
}
|
||||
|
||||
LOGDEB2(("Db::needUpdate: uptodate: [%s]\n", pterm.c_str()));
|
||||
|
||||
// Up to date.
|
||||
|
||||
// Set the uptodate flag for doc / pseudo doc
|
||||
updated[*docid] = true;
|
||||
|
||||
// Set the existence flag for all the subdocs (if any)
|
||||
vector<Xapian::docid> docids;
|
||||
if (!m_ndb->subDocs(hash, docids)) {
|
||||
LOGERR(("Rcl::Db::needUpdate: can't get subdocs list\n"));
|
||||
return true;
|
||||
}
|
||||
for (vector<Xapian::docid>::iterator it = docids.begin();
|
||||
it != docids.end(); it++) {
|
||||
if (*it < updated.size()) {
|
||||
LOGDEB2(("Db::needUpdate: set flag for docid %d\n", *it));
|
||||
updated[*it] = true;
|
||||
}
|
||||
}
|
||||
// LOGDEB(("Db::needUpdate: used %d mS\n", chron.millis()));
|
||||
return false;
|
||||
} catch (const Xapian::DatabaseModifiedError &e) {
|
||||
LOGDEB(("Db::needUpdate: got modified error. reopen/retry\n"));
|
||||
reOpen();
|
||||
} catch (const Xapian::Error &e) {
|
||||
ermsg = e.get_msg();
|
||||
break;
|
||||
} catch (...) {
|
||||
ermsg= "Unknown error";
|
||||
break;
|
||||
}
|
||||
// LOGDEB(("Db::needUpdate: used %d mS\n", chron.millis()));
|
||||
return false;
|
||||
} catch (const Xapian::Error &e) {
|
||||
ermsg = e.get_msg().c_str();
|
||||
} catch (...) {
|
||||
ermsg= "Unknown error";
|
||||
}
|
||||
LOGERR(("Db::needUpdate: error while checking existence: %s\n", ermsg));
|
||||
LOGERR(("Db::needUpdate: error while checking existence: %s\n",
|
||||
ermsg.c_str()));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -760,7 +777,7 @@ list<string> Db::getStemLangs()
|
||||
list<string> dirs;
|
||||
if (m_ndb == 0 || m_ndb->m_isopen == false)
|
||||
return dirs;
|
||||
dirs = StemDb::getLangs(m_ndb->m_basedir);
|
||||
dirs = StemDb::getLangs(m_basedir);
|
||||
return dirs;
|
||||
}
|
||||
|
||||
@ -772,7 +789,7 @@ bool Db::deleteStemDb(const string& lang)
|
||||
LOGDEB(("Db::deleteStemDb(%s)\n", lang.c_str()));
|
||||
if (m_ndb == 0 || m_ndb->m_isopen == false)
|
||||
return false;
|
||||
return StemDb::deleteDb(m_ndb->m_basedir, lang);
|
||||
return StemDb::deleteDb(m_basedir, lang);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -788,7 +805,7 @@ bool Db::createStemDb(const string& lang)
|
||||
return false;
|
||||
|
||||
return StemDb:: createDb(m_ndb->m_iswritable ? m_ndb->wdb : m_ndb->db,
|
||||
m_ndb->m_basedir, lang);
|
||||
m_basedir, lang);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -804,7 +821,7 @@ bool Db::purge()
|
||||
return false;
|
||||
LOGDEB(("Db::purge: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen,
|
||||
m_ndb->m_iswritable));
|
||||
if (m_ndb->m_isopen == false || m_ndb->m_iswritable == false)
|
||||
if (m_ndb->m_isopen == false || m_ndb->m_iswritable == false)
|
||||
return false;
|
||||
|
||||
// There seems to be problems with the document delete code, when
|
||||
@ -819,8 +836,8 @@ bool Db::purge()
|
||||
} catch (...) {
|
||||
LOGDEB(("Db::purge: 1st flush failed\n"));
|
||||
}
|
||||
for (Xapian::docid docid = 1; docid < m_ndb->updated.size(); ++docid) {
|
||||
if (!m_ndb->updated[docid]) {
|
||||
for (Xapian::docid docid = 1; docid < updated.size(); ++docid) {
|
||||
if (!updated[docid]) {
|
||||
try {
|
||||
m_ndb->wdb.delete_document(docid);
|
||||
LOGDEB(("Db::purge: deleted document #%d\n", docid));
|
||||
@ -914,7 +931,7 @@ class wsQData : public TextSplitCB {
|
||||
// phrase terms (no stem expansion in this case)
|
||||
static void stringToXapianQueries(const string &iq,
|
||||
const string& stemlang,
|
||||
Native *m_ndb,
|
||||
Db *db,
|
||||
list<Xapian::Query> &pqueries,
|
||||
unsigned int opts = Db::QO_NONE)
|
||||
{
|
||||
@ -962,7 +979,7 @@ static void stringToXapianQueries(const string &iq,
|
||||
dumb_string(term, term1);
|
||||
// Possibly perform stem compression/expansion
|
||||
if (!nostemexp && (opts & Db::QO_STEM)) {
|
||||
exp = m_ndb->stemExpand(stemlang, term1);
|
||||
exp = db->stemExpand(stemlang, term1);
|
||||
} else {
|
||||
exp.push_back(term1);
|
||||
}
|
||||
@ -1061,7 +1078,7 @@ bool Db::setQuery(AdvSearchData &sdata, int opts, const string& stemlang)
|
||||
}
|
||||
|
||||
if (!sdata.allwords.empty()) {
|
||||
stringToXapianQueries(sdata.allwords, stemlang, m_ndb, pqueries, m_qOpts);
|
||||
stringToXapianQueries(sdata.allwords, stemlang, this,pqueries,m_qOpts);
|
||||
if (!pqueries.empty()) {
|
||||
Xapian::Query nq =
|
||||
Xapian::Query(Xapian::Query::OP_AND, pqueries.begin(),
|
||||
@ -1073,7 +1090,7 @@ bool Db::setQuery(AdvSearchData &sdata, int opts, const string& stemlang)
|
||||
}
|
||||
|
||||
if (!sdata.orwords.empty()) {
|
||||
stringToXapianQueries(sdata.orwords, stemlang, m_ndb, pqueries, m_qOpts);
|
||||
stringToXapianQueries(sdata.orwords, stemlang, this,pqueries,m_qOpts);
|
||||
if (!pqueries.empty()) {
|
||||
Xapian::Query nq =
|
||||
Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
||||
@ -1085,7 +1102,7 @@ bool Db::setQuery(AdvSearchData &sdata, int opts, const string& stemlang)
|
||||
}
|
||||
|
||||
if (!sdata.orwords1.empty()) {
|
||||
stringToXapianQueries(sdata.orwords1, stemlang, m_ndb, pqueries, m_qOpts);
|
||||
stringToXapianQueries(sdata.orwords1, stemlang, this,pqueries,m_qOpts);
|
||||
if (!pqueries.empty()) {
|
||||
Xapian::Query nq =
|
||||
Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
||||
@ -1099,7 +1116,7 @@ bool Db::setQuery(AdvSearchData &sdata, int opts, const string& stemlang)
|
||||
if (!sdata.phrase.empty()) {
|
||||
Xapian::Query nq;
|
||||
string s = string("\"") + sdata.phrase + string("\"");
|
||||
stringToXapianQueries(s, stemlang, m_ndb, pqueries);
|
||||
stringToXapianQueries(s, stemlang, this, pqueries);
|
||||
if (!pqueries.empty()) {
|
||||
// There should be a single list element phrase query.
|
||||
xq = xq.empty() ? *pqueries.begin() :
|
||||
@ -1124,7 +1141,7 @@ bool Db::setQuery(AdvSearchData &sdata, int opts, const string& stemlang)
|
||||
// the only term in the query. We do no stem expansion on 'No'
|
||||
// words. Should we ?
|
||||
if (!sdata.nowords.empty()) {
|
||||
stringToXapianQueries(sdata.nowords, stemlang, m_ndb, pqueries);
|
||||
stringToXapianQueries(sdata.nowords, stemlang, this, pqueries);
|
||||
if (!pqueries.empty()) {
|
||||
Xapian::Query nq;
|
||||
nq = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
||||
@ -1172,7 +1189,7 @@ list<string> Db::completions(const string &root, const string &lang, int max)
|
||||
res.push_back(*it);
|
||||
++n;
|
||||
} else {
|
||||
list<string> stemexps = m_ndb->stemExpand(lang, *it);
|
||||
list<string> stemexps = stemExpand(lang, *it);
|
||||
unsigned int cnt =
|
||||
(int)stemexps.size() > max - n ? max - n : stemexps.size();
|
||||
list<string>::iterator sit = stemexps.begin();
|
||||
@ -1231,6 +1248,24 @@ bool Db::termExists(const string& word)
|
||||
return true;
|
||||
}
|
||||
|
||||
list<string> Db::stemExpand(const string& lang, const string& term)
|
||||
{
|
||||
list<string> dirs = m_extraDbs;
|
||||
dirs.push_front(m_basedir);
|
||||
list<string> exp;
|
||||
for (list<string>::iterator it = dirs.begin();
|
||||
it != dirs.end(); it++) {
|
||||
list<string> more = StemDb::stemExpand(*it, lang, term);
|
||||
LOGDEB1(("Db::stemExpand: Got %d from %s\n",
|
||||
more.size(), it->c_str()));
|
||||
exp.splice(exp.end(), more);
|
||||
}
|
||||
exp.sort();
|
||||
exp.unique();
|
||||
LOGDEB1(("Db:::stemExpand: final count %d \n", exp.size()));
|
||||
return exp;
|
||||
}
|
||||
|
||||
bool Db::stemDiffers(const string& lang, const string& word,
|
||||
const string& base)
|
||||
{
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _DB_H_INCLUDED_
|
||||
#define _DB_H_INCLUDED_
|
||||
/* @(#$Id: rcldb.h,v 1.38 2006-10-22 14:47:13 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: rcldb.h,v 1.39 2006-10-24 09:28:31 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
@ -116,8 +116,9 @@ class Db {
|
||||
~Db();
|
||||
|
||||
enum OpenMode {DbRO, DbUpd, DbTrunc};
|
||||
// KEEP_UPDATED is internal use by reOpen() only
|
||||
enum QueryOpts {QO_NONE=0, QO_STEM = 1, QO_BUILD_ABSTRACT = 2,
|
||||
QO_REPLACE_ABSTRACT = 4};
|
||||
QO_REPLACE_ABSTRACT = 4, QO_KEEP_UPDATED = 8};
|
||||
|
||||
bool open(const string &dbdir, OpenMode mode, int qops = QO_NONE);
|
||||
bool close();
|
||||
@ -205,6 +206,9 @@ class Db {
|
||||
/** Test if terms stem to different roots. */
|
||||
bool stemDiffers(const string& lang, const string& term,
|
||||
const string& base);
|
||||
|
||||
/** Perform stem expansion across all dbs configured for searching */
|
||||
list<string> stemExpand(const string& lang, const string& term);
|
||||
|
||||
private:
|
||||
|
||||
@ -230,6 +234,16 @@ private:
|
||||
// when building the abstract
|
||||
int m_synthAbsWordCtxLen;
|
||||
|
||||
// Database directory
|
||||
string m_basedir;
|
||||
|
||||
// List of directories for additional databases to query
|
||||
list<string> m_extraDbs;
|
||||
|
||||
OpenMode m_mode;
|
||||
|
||||
vector<bool> updated;
|
||||
|
||||
bool reOpen(); // Close/open, same mode/opts
|
||||
/* Copyconst and assignemt private and forbidden */
|
||||
Db(const Db &) {}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user