fix slowness in needUpdate by using Database instead of WritableDatabase
This commit is contained in:
parent
d8f8dd851e
commit
c808c9437f
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.82 2006-10-22 15:54:23 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.83 2006-10-24 09:28:30 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -53,6 +53,9 @@ using namespace std;
|
|||||||
#ifndef MIN
|
#ifndef MIN
|
||||||
#define MIN(A,B) (A<B?A:B)
|
#define MIN(A,B) (A<B?A:B)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#undef MTIME_IN_VALUE
|
||||||
|
|
||||||
#ifndef NO_NAMESPACES
|
#ifndef NO_NAMESPACES
|
||||||
namespace Rcl {
|
namespace Rcl {
|
||||||
#endif
|
#endif
|
||||||
@ -76,15 +79,9 @@ class Native {
|
|||||||
Db *m_db;
|
Db *m_db;
|
||||||
bool m_isopen;
|
bool m_isopen;
|
||||||
bool m_iswritable;
|
bool m_iswritable;
|
||||||
Db::OpenMode m_mode;
|
|
||||||
string m_basedir;
|
|
||||||
|
|
||||||
// List of directories for additional databases to query
|
|
||||||
list<string> m_extraDbs;
|
|
||||||
|
|
||||||
// Indexing
|
// Indexing
|
||||||
Xapian::WritableDatabase wdb;
|
Xapian::WritableDatabase wdb;
|
||||||
vector<bool> updated;
|
|
||||||
|
|
||||||
// Querying
|
// Querying
|
||||||
Xapian::Database db;
|
Xapian::Database db;
|
||||||
@ -93,48 +90,34 @@ class Native {
|
|||||||
Xapian::Enquire *enquire; // Open query descriptor.
|
Xapian::Enquire *enquire; // Open query descriptor.
|
||||||
Xapian::MSet mset; // Partial result set
|
Xapian::MSet mset; // Partial result set
|
||||||
|
|
||||||
|
Native(Db *db)
|
||||||
|
: m_db(db),
|
||||||
|
m_isopen(false), m_iswritable(false), enquire(0)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
~Native() {
|
||||||
|
delete enquire;
|
||||||
|
}
|
||||||
|
|
||||||
string makeAbstract(Xapian::docid id, const list<string>& terms);
|
string makeAbstract(Xapian::docid id, const list<string>& terms);
|
||||||
|
|
||||||
bool dbDataToRclDoc(std::string &data, Doc &doc,
|
bool dbDataToRclDoc(std::string &data, Doc &doc,
|
||||||
int qopts,
|
int qopts,
|
||||||
Xapian::docid docid,
|
Xapian::docid docid,
|
||||||
const list<string>& terms);
|
const list<string>& terms);
|
||||||
|
|
||||||
/** Compute list of subdocuments for a given path (given by hash) */
|
/** Compute list of subdocuments for a given path (given by hash)
|
||||||
bool subDocs(const string &hash, vector<Xapian::docid>& docids) {
|
* We look for all Q terms beginning with the path/hash
|
||||||
|
* As suggested by James Aylett, a better method would be to add
|
||||||
|
* a single term (ie: XP/path/to/file) to all subdocs, then finding
|
||||||
|
* them would be a simple matter of retrieving the posting list for the
|
||||||
|
* term. There would still be a need for the current Qterm though, as a
|
||||||
|
* unique term for replace_document, and for retrieving by
|
||||||
|
* path/ipath (history)
|
||||||
|
*/
|
||||||
|
bool subDocs(const string &hash, vector<Xapian::docid>& docids);
|
||||||
|
|
||||||
docids.clear();
|
/** Keep this inline */
|
||||||
string qterm = "Q"+ hash + "|";
|
|
||||||
Xapian::Database db = m_iswritable ? wdb: db;
|
|
||||||
Xapian::TermIterator it = db.allterms_begin();
|
|
||||||
it.skip_to(qterm);
|
|
||||||
string ermsg;
|
|
||||||
try {
|
|
||||||
for (;it != db.allterms_end(); it++) {
|
|
||||||
// If current term does not begin with qterm or has
|
|
||||||
// another |, not the same file
|
|
||||||
if ((*it).find(qterm) != 0 ||
|
|
||||||
(*it).find_last_of("|") != qterm.length() -1)
|
|
||||||
break;
|
|
||||||
docids.push_back(*(db.postlist_begin(*it)));
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
} catch (const Xapian::Error &e) {
|
|
||||||
ermsg = e.get_msg().c_str();
|
|
||||||
} catch (...) {
|
|
||||||
ermsg= "Unknown error";
|
|
||||||
}
|
|
||||||
LOGERR(("Rcl::Db::subDocs: %s\n", ermsg.c_str()));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
Native(Db *db)
|
|
||||||
: m_db(db),
|
|
||||||
m_isopen(false), m_iswritable(false), m_mode(Db::DbRO), enquire(0)
|
|
||||||
{ }
|
|
||||||
~Native() {
|
|
||||||
delete enquire;
|
|
||||||
}
|
|
||||||
bool filterMatch(Db *rdb, Xapian::Document &xdoc) {
|
bool filterMatch(Db *rdb, Xapian::Document &xdoc) {
|
||||||
// Parse xapian document's data and populate doc fields
|
// Parse xapian document's data and populate doc fields
|
||||||
string data = xdoc.get_data();
|
string data = xdoc.get_data();
|
||||||
@ -150,32 +133,50 @@ class Native {
|
|||||||
return true;
|
return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Perform stem expansion across all dbs configured for searching
|
|
||||||
list<string> stemExpand(const string& lang,
|
|
||||||
const string& term)
|
|
||||||
{
|
|
||||||
list<string> dirs = m_extraDbs;
|
|
||||||
dirs.push_front(m_basedir);
|
|
||||||
list<string> exp;
|
|
||||||
for (list<string>::iterator it = dirs.begin();
|
|
||||||
it != dirs.end(); it++) {
|
|
||||||
list<string> more = StemDb::stemExpand(*it, lang, term);
|
|
||||||
LOGDEB1(("Native::stemExpand: Got %d from %s\n",
|
|
||||||
more.size(), it->c_str()));
|
|
||||||
exp.splice(exp.end(), more);
|
|
||||||
}
|
|
||||||
exp.sort();
|
|
||||||
exp.unique();
|
|
||||||
LOGDEB1(("Native::stemExpand: final count %d \n", exp.size()));
|
|
||||||
return exp;
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* See comment in class declaration */
|
||||||
|
bool Native::subDocs(const string &hash, vector<Xapian::docid>& docids)
|
||||||
|
{
|
||||||
|
docids.clear();
|
||||||
|
string qterm = "Q"+ hash + "|";
|
||||||
|
string ermsg;
|
||||||
|
|
||||||
|
for (int tries = 0; tries < 2; tries++) {
|
||||||
|
try {
|
||||||
|
Xapian::TermIterator it = db.allterms_begin();
|
||||||
|
it.skip_to(qterm);
|
||||||
|
for (;it != db.allterms_end(); it++) {
|
||||||
|
// If current term does not begin with qterm or has
|
||||||
|
// another |, not the same file
|
||||||
|
if ((*it).find(qterm) != 0 ||
|
||||||
|
(*it).find_last_of("|") != qterm.length() -1)
|
||||||
|
break;
|
||||||
|
docids.push_back(*(db.postlist_begin(*it)));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} catch (const Xapian::DatabaseModifiedError &e) {
|
||||||
|
LOGDEB(("Db::subDocs: got modified error. reopen/retry\n"));
|
||||||
|
// Can't use reOpen here, it would delete *me*
|
||||||
|
db = Xapian::Database(m_db->m_basedir);
|
||||||
|
} catch (const Xapian::Error &e) {
|
||||||
|
ermsg = e.get_msg().c_str();
|
||||||
|
break;
|
||||||
|
} catch (...) {
|
||||||
|
ermsg= "Unknown error";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LOGERR(("Rcl::Db::subDocs: %s\n", ermsg.c_str()));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Rcl::Db methods ///////////////////////////////// */
|
||||||
|
|
||||||
Db::Db()
|
Db::Db()
|
||||||
: m_qOpts(QO_NONE), m_idxAbsTruncLen(250), m_synthAbsLen(250),
|
: m_ndb(0), m_qOpts(QO_NONE), m_idxAbsTruncLen(250), m_synthAbsLen(250),
|
||||||
m_synthAbsWordCtxLen(4)
|
m_synthAbsWordCtxLen(4), m_mode(Db::DbRO)
|
||||||
{
|
{
|
||||||
m_ndb = new Native(this);
|
m_ndb = new Native(this);
|
||||||
}
|
}
|
||||||
@ -187,14 +188,9 @@ Db::~Db()
|
|||||||
return;
|
return;
|
||||||
LOGDEB(("Db::~Db: isopen %d m_iswritable %d\n", m_ndb->m_isopen,
|
LOGDEB(("Db::~Db: isopen %d m_iswritable %d\n", m_ndb->m_isopen,
|
||||||
m_ndb->m_iswritable));
|
m_ndb->m_iswritable));
|
||||||
if (m_ndb->m_isopen == false)
|
|
||||||
return;
|
|
||||||
const char *ermsg = "Unknown error";
|
const char *ermsg = "Unknown error";
|
||||||
try {
|
try {
|
||||||
LOGDEB(("Db::~Db: closing native database\n"));
|
// Used to do a flush here, but doesnt seem necessary
|
||||||
if (m_ndb->m_iswritable == true) {
|
|
||||||
m_ndb->wdb.flush();
|
|
||||||
}
|
|
||||||
delete m_ndb;
|
delete m_ndb;
|
||||||
m_ndb = 0;
|
m_ndb = 0;
|
||||||
return;
|
return;
|
||||||
@ -212,6 +208,9 @@ Db::~Db()
|
|||||||
|
|
||||||
bool Db::open(const string& dir, OpenMode mode, int qops)
|
bool Db::open(const string& dir, OpenMode mode, int qops)
|
||||||
{
|
{
|
||||||
|
bool keep_updated = (qops & QO_KEEP_UPDATED) != 0;
|
||||||
|
qops &= ~QO_KEEP_UPDATED;
|
||||||
|
|
||||||
if (m_ndb == 0)
|
if (m_ndb == 0)
|
||||||
return false;
|
return false;
|
||||||
LOGDEB(("Db::open: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen,
|
LOGDEB(("Db::open: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen,
|
||||||
@ -231,20 +230,28 @@ bool Db::open(const string& dir, OpenMode mode, int qops)
|
|||||||
int action = (mode == DbUpd) ? Xapian::DB_CREATE_OR_OPEN :
|
int action = (mode == DbUpd) ? Xapian::DB_CREATE_OR_OPEN :
|
||||||
Xapian::DB_CREATE_OR_OVERWRITE;
|
Xapian::DB_CREATE_OR_OVERWRITE;
|
||||||
m_ndb->wdb = Xapian::WritableDatabase(dir, action);
|
m_ndb->wdb = Xapian::WritableDatabase(dir, action);
|
||||||
|
m_ndb->m_iswritable = true;
|
||||||
|
// We open a readonly object in addition to the r/w
|
||||||
|
// one because some operations are faster when
|
||||||
|
// performed through a Database (no forced flushes on
|
||||||
|
// allterms_begin(), ie, used in subDocs()
|
||||||
|
m_ndb->db = Xapian::Database(dir);
|
||||||
LOGDEB(("Db::open: lastdocid: %d\n",
|
LOGDEB(("Db::open: lastdocid: %d\n",
|
||||||
m_ndb->wdb.get_lastdocid()));
|
m_ndb->wdb.get_lastdocid()));
|
||||||
m_ndb->updated.resize(m_ndb->wdb.get_lastdocid() + 1);
|
if (!keep_updated) {
|
||||||
for (unsigned int i = 0; i < m_ndb->updated.size(); i++)
|
LOGDEB2(("Db::open: resetting updated\n"));
|
||||||
m_ndb->updated[i] = false;
|
updated.resize(m_ndb->wdb.get_lastdocid() + 1);
|
||||||
m_ndb->m_iswritable = true;
|
for (unsigned int i = 0; i < updated.size(); i++)
|
||||||
|
updated[i] = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case DbRO:
|
case DbRO:
|
||||||
default:
|
default:
|
||||||
m_ndb->m_iswritable = false;
|
m_ndb->m_iswritable = false;
|
||||||
m_ndb->db = Xapian::Database(dir);
|
m_ndb->db = Xapian::Database(dir);
|
||||||
for (list<string>::iterator it = m_ndb->m_extraDbs.begin();
|
for (list<string>::iterator it = m_extraDbs.begin();
|
||||||
it != m_ndb->m_extraDbs.end(); it++) {
|
it != m_extraDbs.end(); it++) {
|
||||||
string aerr;
|
string aerr;
|
||||||
LOGDEB(("Db::Open: adding query db [%s]\n", it->c_str()));
|
LOGDEB(("Db::Open: adding query db [%s]\n", it->c_str()));
|
||||||
aerr.erase();
|
aerr.erase();
|
||||||
@ -266,10 +273,9 @@ bool Db::open(const string& dir, OpenMode mode, int qops)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
m_qOpts = qops;
|
m_mode = mode;
|
||||||
m_ndb->m_mode = mode;
|
|
||||||
m_ndb->m_isopen = true;
|
m_ndb->m_isopen = true;
|
||||||
m_ndb->m_basedir = dir;
|
m_basedir = dir;
|
||||||
return true;
|
return true;
|
||||||
} catch (const Xapian::Error &e) {
|
} catch (const Xapian::Error &e) {
|
||||||
ermsg = e.get_msg().c_str();
|
ermsg = e.get_msg().c_str();
|
||||||
@ -287,9 +293,7 @@ bool Db::open(const string& dir, OpenMode mode, int qops)
|
|||||||
|
|
||||||
string Db::getDbDir()
|
string Db::getDbDir()
|
||||||
{
|
{
|
||||||
if (m_ndb == 0)
|
return m_basedir;
|
||||||
return "";
|
|
||||||
return m_ndb->m_basedir;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note: xapian has no close call, we delete and recreate the db
|
// Note: xapian has no close call, we delete and recreate the db
|
||||||
@ -303,10 +307,7 @@ bool Db::close()
|
|||||||
return true;
|
return true;
|
||||||
const char *ermsg = "Unknown";
|
const char *ermsg = "Unknown";
|
||||||
try {
|
try {
|
||||||
if (m_ndb->m_iswritable == true) {
|
// Used to do a flush here. Cant see why it should be necessary.
|
||||||
m_ndb->wdb.flush();
|
|
||||||
LOGDEB(("Rcl:Db: Called xapian flush\n"));
|
|
||||||
}
|
|
||||||
delete m_ndb;
|
delete m_ndb;
|
||||||
m_ndb = new Native(this);
|
m_ndb = new Native(this);
|
||||||
if (m_ndb)
|
if (m_ndb)
|
||||||
@ -329,7 +330,7 @@ bool Db::reOpen()
|
|||||||
if (m_ndb && m_ndb->m_isopen) {
|
if (m_ndb && m_ndb->m_isopen) {
|
||||||
if (!close())
|
if (!close())
|
||||||
return false;
|
return false;
|
||||||
if (!open(m_ndb->m_basedir, m_ndb->m_mode, m_qOpts)) {
|
if (!open(m_basedir, m_mode, m_qOpts | QO_KEEP_UPDATED)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -353,9 +354,8 @@ bool Db::addQueryDb(const string &dir)
|
|||||||
return false;
|
return false;
|
||||||
if (m_ndb->m_iswritable)
|
if (m_ndb->m_iswritable)
|
||||||
return false;
|
return false;
|
||||||
if (find(m_ndb->m_extraDbs.begin(), m_ndb->m_extraDbs.end(), dir) ==
|
if (find(m_extraDbs.begin(), m_extraDbs.end(), dir) == m_extraDbs.end()) {
|
||||||
m_ndb->m_extraDbs.end()) {
|
m_extraDbs.push_back(dir);
|
||||||
m_ndb->m_extraDbs.push_back(dir);
|
|
||||||
}
|
}
|
||||||
return reOpen();
|
return reOpen();
|
||||||
}
|
}
|
||||||
@ -367,12 +367,12 @@ bool Db::rmQueryDb(const string &dir)
|
|||||||
if (m_ndb->m_iswritable)
|
if (m_ndb->m_iswritable)
|
||||||
return false;
|
return false;
|
||||||
if (dir.empty()) {
|
if (dir.empty()) {
|
||||||
m_ndb->m_extraDbs.clear();
|
m_extraDbs.clear();
|
||||||
} else {
|
} else {
|
||||||
list<string>::iterator it = find(m_ndb->m_extraDbs.begin(),
|
list<string>::iterator it = find(m_extraDbs.begin(),
|
||||||
m_ndb->m_extraDbs.end(), dir);
|
m_extraDbs.end(), dir);
|
||||||
if (it != m_ndb->m_extraDbs.end()) {
|
if (it != m_extraDbs.end()) {
|
||||||
m_ndb->m_extraDbs.erase(it);
|
m_extraDbs.erase(it);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return reOpen();
|
return reOpen();
|
||||||
@ -600,6 +600,9 @@ bool Db::add(const string &fn, const Doc &idoc,
|
|||||||
string uniterm;
|
string uniterm;
|
||||||
if (doc.ipath.empty()) {
|
if (doc.ipath.empty()) {
|
||||||
uniterm = "P" + hash;
|
uniterm = "P" + hash;
|
||||||
|
#ifdef MTIME_IN_VALUE
|
||||||
|
newdocument.add_value(1, doc.fmtime);
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
uniterm = "Q" + hash + "|" + doc.ipath;
|
uniterm = "Q" + hash + "|" + doc.ipath;
|
||||||
}
|
}
|
||||||
@ -655,8 +658,8 @@ bool Db::add(const string &fn, const Doc &idoc,
|
|||||||
try {
|
try {
|
||||||
Xapian::docid did =
|
Xapian::docid did =
|
||||||
m_ndb->wdb.replace_document(uniterm, newdocument);
|
m_ndb->wdb.replace_document(uniterm, newdocument);
|
||||||
if (did < m_ndb->updated.size()) {
|
if (did < updated.size()) {
|
||||||
m_ndb->updated[did] = true;
|
updated[did] = true;
|
||||||
LOGDEB(("Db::add: docid %d updated [%s , %s]\n", did, fnc,
|
LOGDEB(("Db::add: docid %d updated [%s , %s]\n", did, fnc,
|
||||||
doc.ipath.c_str()));
|
doc.ipath.c_str()));
|
||||||
} else {
|
} else {
|
||||||
@ -687,68 +690,82 @@ bool Db::needUpdate(const string &filename, const struct stat *stp)
|
|||||||
string hash;
|
string hash;
|
||||||
pathHash(filename, hash, PATHHASHLEN);
|
pathHash(filename, hash, PATHHASHLEN);
|
||||||
string pterm = "P" + hash;
|
string pterm = "P" + hash;
|
||||||
const char *ermsg;
|
string ermsg;
|
||||||
|
|
||||||
// Look for all documents with this path. We need to look at all
|
// Look for all documents with this path. We need to look at all
|
||||||
// to set their existence flag. We check the update time on the
|
// to set their existence flag. We check the update time on the
|
||||||
// fmtime field which will be identical for all docs inside a
|
// fmtime field which will be identical for all docs inside a
|
||||||
// multi-document file (we currently always reindex all if the
|
// multi-document file (we currently always reindex all if the
|
||||||
// file changed)
|
// file changed)
|
||||||
Xapian::PostingIterator doc;
|
for (int tries = 0; tries < 2; tries++) {
|
||||||
try {
|
try {
|
||||||
// Check the date using the Pterm doc or pseudo-doc
|
// Check the date using the Pterm doc or pseudo-doc
|
||||||
Xapian::PostingIterator docid = m_ndb->wdb.postlist_begin(pterm);
|
Xapian::PostingIterator docid = m_ndb->db.postlist_begin(pterm);
|
||||||
if (docid == m_ndb->wdb.postlist_end(pterm)) {
|
if (docid == m_ndb->db.postlist_end(pterm)) {
|
||||||
// If no document exist with this path, we do need update
|
// If no document exist with this path, we do need update
|
||||||
LOGDEB2(("Db::needUpdate: no such path: [%s]\n", pterm.c_str()));
|
LOGDEB2(("Db::needUpdate: no path: [%s]\n", pterm.c_str()));
|
||||||
return true;
|
return true;
|
||||||
}
|
|
||||||
Xapian::Document doc = m_ndb->wdb.get_document(*docid);
|
|
||||||
string data = doc.get_data();
|
|
||||||
const char *cp = strstr(data.c_str(), "fmtime=");
|
|
||||||
if (cp) {
|
|
||||||
cp += 7;
|
|
||||||
} else {
|
|
||||||
cp = strstr(data.c_str(), "mtime=");
|
|
||||||
if (cp)
|
|
||||||
cp+= 6;
|
|
||||||
}
|
|
||||||
long mtime = cp ? atol(cp) : 0;
|
|
||||||
if (mtime < stp->st_mtime) {
|
|
||||||
LOGDEB2(("Db::needUpdate: yes: mtime: Db %ld file %ld\n",
|
|
||||||
(long)mtime, (long)stp->st_mtime));
|
|
||||||
// Db is not up to date. Let's index the file
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
LOGDEB2(("Db::needUpdate: uptodate: [%s]\n", pterm.c_str()));
|
|
||||||
|
|
||||||
// Up to date.
|
|
||||||
|
|
||||||
// Set the uptodate flag for doc / pseudo doc
|
|
||||||
m_ndb->updated[*docid] = true;
|
|
||||||
|
|
||||||
// Set the existence flag for all the subdocs (if any)
|
|
||||||
vector<Xapian::docid> docids;
|
|
||||||
if (!m_ndb->subDocs(hash, docids)) {
|
|
||||||
LOGERR(("Rcl::Db::needUpdate: can't get subdocs list\n"));
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
for (vector<Xapian::docid>::iterator it = docids.begin();
|
|
||||||
it != docids.end(); it++) {
|
|
||||||
if (*it < m_ndb->updated.size()) {
|
|
||||||
LOGDEB2(("Db::needUpdate: set flag for docid %d\n", *it));
|
|
||||||
m_ndb->updated[*it] = true;
|
|
||||||
}
|
}
|
||||||
|
Xapian::Document doc = m_ndb->db.get_document(*docid);
|
||||||
|
#ifdef MTIME_IN_VALUE
|
||||||
|
// This is slightly faster, but we'd need to setup a conversion
|
||||||
|
// for old dbs, and it's not really worth it
|
||||||
|
string value = doc.get_value(1);
|
||||||
|
const char *cp = value.c_str();
|
||||||
|
#else
|
||||||
|
string data = doc.get_data();
|
||||||
|
const char *cp = strstr(data.c_str(), "fmtime=");
|
||||||
|
if (cp) {
|
||||||
|
cp += 7;
|
||||||
|
} else {
|
||||||
|
cp = strstr(data.c_str(), "mtime=");
|
||||||
|
if (cp)
|
||||||
|
cp+= 6;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
long mtime = cp ? atol(cp) : 0;
|
||||||
|
if (mtime < stp->st_mtime) {
|
||||||
|
LOGDEB2(("Db::needUpdate: yes: mtime: Db %ld file %ld\n",
|
||||||
|
(long)mtime, (long)stp->st_mtime));
|
||||||
|
// Db is not up to date. Let's index the file
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOGDEB2(("Db::needUpdate: uptodate: [%s]\n", pterm.c_str()));
|
||||||
|
|
||||||
|
// Up to date.
|
||||||
|
|
||||||
|
// Set the uptodate flag for doc / pseudo doc
|
||||||
|
updated[*docid] = true;
|
||||||
|
|
||||||
|
// Set the existence flag for all the subdocs (if any)
|
||||||
|
vector<Xapian::docid> docids;
|
||||||
|
if (!m_ndb->subDocs(hash, docids)) {
|
||||||
|
LOGERR(("Rcl::Db::needUpdate: can't get subdocs list\n"));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
for (vector<Xapian::docid>::iterator it = docids.begin();
|
||||||
|
it != docids.end(); it++) {
|
||||||
|
if (*it < updated.size()) {
|
||||||
|
LOGDEB2(("Db::needUpdate: set flag for docid %d\n", *it));
|
||||||
|
updated[*it] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// LOGDEB(("Db::needUpdate: used %d mS\n", chron.millis()));
|
||||||
|
return false;
|
||||||
|
} catch (const Xapian::DatabaseModifiedError &e) {
|
||||||
|
LOGDEB(("Db::needUpdate: got modified error. reopen/retry\n"));
|
||||||
|
reOpen();
|
||||||
|
} catch (const Xapian::Error &e) {
|
||||||
|
ermsg = e.get_msg();
|
||||||
|
break;
|
||||||
|
} catch (...) {
|
||||||
|
ermsg= "Unknown error";
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
// LOGDEB(("Db::needUpdate: used %d mS\n", chron.millis()));
|
|
||||||
return false;
|
|
||||||
} catch (const Xapian::Error &e) {
|
|
||||||
ermsg = e.get_msg().c_str();
|
|
||||||
} catch (...) {
|
|
||||||
ermsg= "Unknown error";
|
|
||||||
}
|
}
|
||||||
LOGERR(("Db::needUpdate: error while checking existence: %s\n", ermsg));
|
LOGERR(("Db::needUpdate: error while checking existence: %s\n",
|
||||||
|
ermsg.c_str()));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -760,7 +777,7 @@ list<string> Db::getStemLangs()
|
|||||||
list<string> dirs;
|
list<string> dirs;
|
||||||
if (m_ndb == 0 || m_ndb->m_isopen == false)
|
if (m_ndb == 0 || m_ndb->m_isopen == false)
|
||||||
return dirs;
|
return dirs;
|
||||||
dirs = StemDb::getLangs(m_ndb->m_basedir);
|
dirs = StemDb::getLangs(m_basedir);
|
||||||
return dirs;
|
return dirs;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -772,7 +789,7 @@ bool Db::deleteStemDb(const string& lang)
|
|||||||
LOGDEB(("Db::deleteStemDb(%s)\n", lang.c_str()));
|
LOGDEB(("Db::deleteStemDb(%s)\n", lang.c_str()));
|
||||||
if (m_ndb == 0 || m_ndb->m_isopen == false)
|
if (m_ndb == 0 || m_ndb->m_isopen == false)
|
||||||
return false;
|
return false;
|
||||||
return StemDb::deleteDb(m_ndb->m_basedir, lang);
|
return StemDb::deleteDb(m_basedir, lang);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -788,7 +805,7 @@ bool Db::createStemDb(const string& lang)
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
return StemDb:: createDb(m_ndb->m_iswritable ? m_ndb->wdb : m_ndb->db,
|
return StemDb:: createDb(m_ndb->m_iswritable ? m_ndb->wdb : m_ndb->db,
|
||||||
m_ndb->m_basedir, lang);
|
m_basedir, lang);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -804,7 +821,7 @@ bool Db::purge()
|
|||||||
return false;
|
return false;
|
||||||
LOGDEB(("Db::purge: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen,
|
LOGDEB(("Db::purge: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen,
|
||||||
m_ndb->m_iswritable));
|
m_ndb->m_iswritable));
|
||||||
if (m_ndb->m_isopen == false || m_ndb->m_iswritable == false)
|
if (m_ndb->m_isopen == false || m_ndb->m_iswritable == false)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// There seems to be problems with the document delete code, when
|
// There seems to be problems with the document delete code, when
|
||||||
@ -819,8 +836,8 @@ bool Db::purge()
|
|||||||
} catch (...) {
|
} catch (...) {
|
||||||
LOGDEB(("Db::purge: 1st flush failed\n"));
|
LOGDEB(("Db::purge: 1st flush failed\n"));
|
||||||
}
|
}
|
||||||
for (Xapian::docid docid = 1; docid < m_ndb->updated.size(); ++docid) {
|
for (Xapian::docid docid = 1; docid < updated.size(); ++docid) {
|
||||||
if (!m_ndb->updated[docid]) {
|
if (!updated[docid]) {
|
||||||
try {
|
try {
|
||||||
m_ndb->wdb.delete_document(docid);
|
m_ndb->wdb.delete_document(docid);
|
||||||
LOGDEB(("Db::purge: deleted document #%d\n", docid));
|
LOGDEB(("Db::purge: deleted document #%d\n", docid));
|
||||||
@ -914,7 +931,7 @@ class wsQData : public TextSplitCB {
|
|||||||
// phrase terms (no stem expansion in this case)
|
// phrase terms (no stem expansion in this case)
|
||||||
static void stringToXapianQueries(const string &iq,
|
static void stringToXapianQueries(const string &iq,
|
||||||
const string& stemlang,
|
const string& stemlang,
|
||||||
Native *m_ndb,
|
Db *db,
|
||||||
list<Xapian::Query> &pqueries,
|
list<Xapian::Query> &pqueries,
|
||||||
unsigned int opts = Db::QO_NONE)
|
unsigned int opts = Db::QO_NONE)
|
||||||
{
|
{
|
||||||
@ -962,7 +979,7 @@ static void stringToXapianQueries(const string &iq,
|
|||||||
dumb_string(term, term1);
|
dumb_string(term, term1);
|
||||||
// Possibly perform stem compression/expansion
|
// Possibly perform stem compression/expansion
|
||||||
if (!nostemexp && (opts & Db::QO_STEM)) {
|
if (!nostemexp && (opts & Db::QO_STEM)) {
|
||||||
exp = m_ndb->stemExpand(stemlang, term1);
|
exp = db->stemExpand(stemlang, term1);
|
||||||
} else {
|
} else {
|
||||||
exp.push_back(term1);
|
exp.push_back(term1);
|
||||||
}
|
}
|
||||||
@ -1061,7 +1078,7 @@ bool Db::setQuery(AdvSearchData &sdata, int opts, const string& stemlang)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!sdata.allwords.empty()) {
|
if (!sdata.allwords.empty()) {
|
||||||
stringToXapianQueries(sdata.allwords, stemlang, m_ndb, pqueries, m_qOpts);
|
stringToXapianQueries(sdata.allwords, stemlang, this,pqueries,m_qOpts);
|
||||||
if (!pqueries.empty()) {
|
if (!pqueries.empty()) {
|
||||||
Xapian::Query nq =
|
Xapian::Query nq =
|
||||||
Xapian::Query(Xapian::Query::OP_AND, pqueries.begin(),
|
Xapian::Query(Xapian::Query::OP_AND, pqueries.begin(),
|
||||||
@ -1073,7 +1090,7 @@ bool Db::setQuery(AdvSearchData &sdata, int opts, const string& stemlang)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!sdata.orwords.empty()) {
|
if (!sdata.orwords.empty()) {
|
||||||
stringToXapianQueries(sdata.orwords, stemlang, m_ndb, pqueries, m_qOpts);
|
stringToXapianQueries(sdata.orwords, stemlang, this,pqueries,m_qOpts);
|
||||||
if (!pqueries.empty()) {
|
if (!pqueries.empty()) {
|
||||||
Xapian::Query nq =
|
Xapian::Query nq =
|
||||||
Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
||||||
@ -1085,7 +1102,7 @@ bool Db::setQuery(AdvSearchData &sdata, int opts, const string& stemlang)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!sdata.orwords1.empty()) {
|
if (!sdata.orwords1.empty()) {
|
||||||
stringToXapianQueries(sdata.orwords1, stemlang, m_ndb, pqueries, m_qOpts);
|
stringToXapianQueries(sdata.orwords1, stemlang, this,pqueries,m_qOpts);
|
||||||
if (!pqueries.empty()) {
|
if (!pqueries.empty()) {
|
||||||
Xapian::Query nq =
|
Xapian::Query nq =
|
||||||
Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
||||||
@ -1099,7 +1116,7 @@ bool Db::setQuery(AdvSearchData &sdata, int opts, const string& stemlang)
|
|||||||
if (!sdata.phrase.empty()) {
|
if (!sdata.phrase.empty()) {
|
||||||
Xapian::Query nq;
|
Xapian::Query nq;
|
||||||
string s = string("\"") + sdata.phrase + string("\"");
|
string s = string("\"") + sdata.phrase + string("\"");
|
||||||
stringToXapianQueries(s, stemlang, m_ndb, pqueries);
|
stringToXapianQueries(s, stemlang, this, pqueries);
|
||||||
if (!pqueries.empty()) {
|
if (!pqueries.empty()) {
|
||||||
// There should be a single list element phrase query.
|
// There should be a single list element phrase query.
|
||||||
xq = xq.empty() ? *pqueries.begin() :
|
xq = xq.empty() ? *pqueries.begin() :
|
||||||
@ -1124,7 +1141,7 @@ bool Db::setQuery(AdvSearchData &sdata, int opts, const string& stemlang)
|
|||||||
// the only term in the query. We do no stem expansion on 'No'
|
// the only term in the query. We do no stem expansion on 'No'
|
||||||
// words. Should we ?
|
// words. Should we ?
|
||||||
if (!sdata.nowords.empty()) {
|
if (!sdata.nowords.empty()) {
|
||||||
stringToXapianQueries(sdata.nowords, stemlang, m_ndb, pqueries);
|
stringToXapianQueries(sdata.nowords, stemlang, this, pqueries);
|
||||||
if (!pqueries.empty()) {
|
if (!pqueries.empty()) {
|
||||||
Xapian::Query nq;
|
Xapian::Query nq;
|
||||||
nq = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
nq = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
||||||
@ -1172,7 +1189,7 @@ list<string> Db::completions(const string &root, const string &lang, int max)
|
|||||||
res.push_back(*it);
|
res.push_back(*it);
|
||||||
++n;
|
++n;
|
||||||
} else {
|
} else {
|
||||||
list<string> stemexps = m_ndb->stemExpand(lang, *it);
|
list<string> stemexps = stemExpand(lang, *it);
|
||||||
unsigned int cnt =
|
unsigned int cnt =
|
||||||
(int)stemexps.size() > max - n ? max - n : stemexps.size();
|
(int)stemexps.size() > max - n ? max - n : stemexps.size();
|
||||||
list<string>::iterator sit = stemexps.begin();
|
list<string>::iterator sit = stemexps.begin();
|
||||||
@ -1231,6 +1248,24 @@ bool Db::termExists(const string& word)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
list<string> Db::stemExpand(const string& lang, const string& term)
|
||||||
|
{
|
||||||
|
list<string> dirs = m_extraDbs;
|
||||||
|
dirs.push_front(m_basedir);
|
||||||
|
list<string> exp;
|
||||||
|
for (list<string>::iterator it = dirs.begin();
|
||||||
|
it != dirs.end(); it++) {
|
||||||
|
list<string> more = StemDb::stemExpand(*it, lang, term);
|
||||||
|
LOGDEB1(("Db::stemExpand: Got %d from %s\n",
|
||||||
|
more.size(), it->c_str()));
|
||||||
|
exp.splice(exp.end(), more);
|
||||||
|
}
|
||||||
|
exp.sort();
|
||||||
|
exp.unique();
|
||||||
|
LOGDEB1(("Db:::stemExpand: final count %d \n", exp.size()));
|
||||||
|
return exp;
|
||||||
|
}
|
||||||
|
|
||||||
bool Db::stemDiffers(const string& lang, const string& word,
|
bool Db::stemDiffers(const string& lang, const string& word,
|
||||||
const string& base)
|
const string& base)
|
||||||
{
|
{
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _DB_H_INCLUDED_
|
#ifndef _DB_H_INCLUDED_
|
||||||
#define _DB_H_INCLUDED_
|
#define _DB_H_INCLUDED_
|
||||||
/* @(#$Id: rcldb.h,v 1.38 2006-10-22 14:47:13 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: rcldb.h,v 1.39 2006-10-24 09:28:31 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
#include <list>
|
||||||
@ -116,8 +116,9 @@ class Db {
|
|||||||
~Db();
|
~Db();
|
||||||
|
|
||||||
enum OpenMode {DbRO, DbUpd, DbTrunc};
|
enum OpenMode {DbRO, DbUpd, DbTrunc};
|
||||||
|
// KEEP_UPDATED is internal use by reOpen() only
|
||||||
enum QueryOpts {QO_NONE=0, QO_STEM = 1, QO_BUILD_ABSTRACT = 2,
|
enum QueryOpts {QO_NONE=0, QO_STEM = 1, QO_BUILD_ABSTRACT = 2,
|
||||||
QO_REPLACE_ABSTRACT = 4};
|
QO_REPLACE_ABSTRACT = 4, QO_KEEP_UPDATED = 8};
|
||||||
|
|
||||||
bool open(const string &dbdir, OpenMode mode, int qops = QO_NONE);
|
bool open(const string &dbdir, OpenMode mode, int qops = QO_NONE);
|
||||||
bool close();
|
bool close();
|
||||||
@ -205,6 +206,9 @@ class Db {
|
|||||||
/** Test if terms stem to different roots. */
|
/** Test if terms stem to different roots. */
|
||||||
bool stemDiffers(const string& lang, const string& term,
|
bool stemDiffers(const string& lang, const string& term,
|
||||||
const string& base);
|
const string& base);
|
||||||
|
|
||||||
|
/** Perform stem expansion across all dbs configured for searching */
|
||||||
|
list<string> stemExpand(const string& lang, const string& term);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
@ -230,6 +234,16 @@ private:
|
|||||||
// when building the abstract
|
// when building the abstract
|
||||||
int m_synthAbsWordCtxLen;
|
int m_synthAbsWordCtxLen;
|
||||||
|
|
||||||
|
// Database directory
|
||||||
|
string m_basedir;
|
||||||
|
|
||||||
|
// List of directories for additional databases to query
|
||||||
|
list<string> m_extraDbs;
|
||||||
|
|
||||||
|
OpenMode m_mode;
|
||||||
|
|
||||||
|
vector<bool> updated;
|
||||||
|
|
||||||
bool reOpen(); // Close/open, same mode/opts
|
bool reOpen(); // Close/open, same mode/opts
|
||||||
/* Copyconst and assignemt private and forbidden */
|
/* Copyconst and assignemt private and forbidden */
|
||||||
Db(const Db &) {}
|
Db(const Db &) {}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user