Fix problems which occurred when using functions like open-parents with multiple indexes containing identical paths (udis)
This commit is contained in:
parent
11e0ccaa4e
commit
a1b7018cfd
@ -120,14 +120,12 @@ void FileInterner::reapXAttrs(const string& path)
|
||||
|
||||
// This is used when the user wants to retrieve a search result doc's parent
|
||||
// (ie message having a given attachment)
|
||||
bool FileInterner::getEnclosing(const string &url, const string &ipath,
|
||||
string &eurl, string &eipath, string& udi)
|
||||
bool FileInterner::getEnclosingUDI(const Rcl::Doc &doc, string& udi)
|
||||
{
|
||||
eurl = url;
|
||||
eipath = ipath;
|
||||
LOGDEB(("FileInterner::getEnclosingUDI(): url [%s] ipath [%s]\n",
|
||||
doc.url.c_str(), doc.ipath.c_str()));
|
||||
string eipath = doc.ipath;
|
||||
string::size_type colon;
|
||||
LOGDEB(("FileInterner::getEnclosing(): url [%s] ipath [%s]\n",
|
||||
url.c_str(), eipath.c_str()));
|
||||
if (eipath.empty())
|
||||
return false;
|
||||
if ((colon = eipath.find_last_of(cstr_isep)) != string::npos) {
|
||||
@ -135,10 +133,8 @@ bool FileInterner::getEnclosing(const string &url, const string &ipath,
|
||||
} else {
|
||||
eipath.erase();
|
||||
}
|
||||
make_udi(url_gpath(eurl), eipath, udi);
|
||||
|
||||
LOGDEB(("FileInterner::getEnclosing() after: [%s]\n", eipath.c_str()));
|
||||
return true;
|
||||
|
||||
make_udi(url_gpath(doc.idxurl.empty() ? doc.url : doc.idxurl), eipath, udi);
|
||||
}
|
||||
|
||||
string FileInterner::getLastIpathElt(const string& ipath)
|
||||
|
||||
@ -195,14 +195,14 @@ class FileInterner {
|
||||
}
|
||||
|
||||
/**
|
||||
* Get immediate parent for document.
|
||||
* Get UDI for immediate parent for document.
|
||||
*
|
||||
* This is not in general the same as the "parent" document used
|
||||
* with Rcl::Db::addOrUpdate(). The latter is the enclosing file,
|
||||
* this would be for exemple the email containing the attachment.
|
||||
* This is in internfile because of the ipath computation.
|
||||
*/
|
||||
static bool getEnclosing(const string &url, const string &ipath,
|
||||
string &eurl, string &eipath, string& udi);
|
||||
static bool getEnclosingUDI(const Rcl::Doc &doc, string& udi);
|
||||
|
||||
/** Return last element in ipath, like basename */
|
||||
static std::string getLastIpathElt(const std::string& ipath);
|
||||
|
||||
@ -430,7 +430,8 @@ void RclMain::viewUrl()
|
||||
(const char *)qurl.fragment().toLocal8Bit(), udi);
|
||||
|
||||
Rcl::Doc doc;
|
||||
if (!rcldb->getDoc(udi, doc) || doc.pc == -1)
|
||||
Rcl::Doc idxdoc; // idxdoc.idxi == 0 -> works with base index only
|
||||
if (!rcldb->getDoc(udi, idxdoc, doc) || doc.pc == -1)
|
||||
return;
|
||||
|
||||
// StartNativeViewer needs a db source to call getEnclosing() on.
|
||||
|
||||
@ -38,14 +38,16 @@ int DocSequence::getSeqSlice(int offs, int cnt, vector<ResListEntry>& result)
|
||||
|
||||
bool DocSequence::getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc)
|
||||
{
|
||||
// Note: no need for setQuery here, we're just passing through a
|
||||
// query-independant request
|
||||
|
||||
Rcl::Db *db = getDb();
|
||||
if (db == 0) {
|
||||
LOGERR(("DocSequence::getEnclosing: no db\n"));
|
||||
return false;
|
||||
}
|
||||
string udi;
|
||||
if (!FileInterner::getEnclosing(doc.url, doc.ipath, pdoc.url, pdoc.ipath,
|
||||
udi))
|
||||
if (!FileInterner::getEnclosingUDI(doc, udi))
|
||||
return false;
|
||||
bool dbret = getDb()->getDoc(udi, pdoc);
|
||||
|
||||
bool dbret = db->getDoc(udi, doc, pdoc);
|
||||
return dbret && pdoc.pc != -1;
|
||||
}
|
||||
|
||||
|
||||
@ -136,7 +136,10 @@ bool DocSequenceHistory::getDoc(int num, Rcl::Doc &doc, string *sh)
|
||||
} else
|
||||
sh->erase();
|
||||
}
|
||||
bool ret = m_db->getDoc(m_it->udi, doc);
|
||||
|
||||
// For now history does not store an index id. Use empty doc as ref.
|
||||
Rcl::Doc idxdoc;
|
||||
bool ret = m_db->getDoc(m_it->udi, idxdoc, doc);
|
||||
if (!ret || doc.pc == -1) {
|
||||
doc.url = "UNKNOWN";
|
||||
doc.ipath = "";
|
||||
|
||||
@ -219,20 +219,25 @@ void Db::Native::maybeStartThreads()
|
||||
/* See comment in class declaration: return all subdocuments of a
|
||||
* document given by its unique id.
|
||||
*/
|
||||
bool Db::Native::subDocs(const string &udi, vector<Xapian::docid>& docids)
|
||||
bool Db::Native::subDocs(const string &udi, int idxi,
|
||||
vector<Xapian::docid>& docids)
|
||||
{
|
||||
LOGDEB2(("subDocs: [%s]\n", uniterm.c_str()));
|
||||
string pterm = make_parentterm(udi);
|
||||
|
||||
vector<Xapian::docid> candidates;
|
||||
XAPTRY(docids.clear();
|
||||
docids.insert(docids.begin(), xrdb.postlist_begin(pterm),
|
||||
xrdb.postlist_end(pterm)),
|
||||
candidates.insert(candidates.begin(), xrdb.postlist_begin(pterm),
|
||||
xrdb.postlist_end(pterm)),
|
||||
xrdb, m_rcldb->m_reason);
|
||||
|
||||
if (!m_rcldb->m_reason.empty()) {
|
||||
LOGERR(("Rcl::Db::subDocs: %s\n", m_rcldb->m_reason.c_str()));
|
||||
return false;
|
||||
} else {
|
||||
for (unsigned int i = 0; i < candidates.size(); i++) {
|
||||
if (whatDbIdx(candidates[i]) == (size_t)idxi) {
|
||||
docids.push_back(candidates[i]);
|
||||
}
|
||||
}
|
||||
LOGDEB0(("Db::Native::subDocs: returning %d ids\n", docids.size()));
|
||||
return true;
|
||||
}
|
||||
@ -259,11 +264,11 @@ bool Db::Native::xdocToUdi(Xapian::Document& xdoc, string &udi)
|
||||
}
|
||||
|
||||
// Check if doc given by udi is indexed by term
|
||||
bool Db::Native::hasTerm(const string& udi, const string& term)
|
||||
bool Db::Native::hasTerm(const string& udi, int idxi, const string& term)
|
||||
{
|
||||
LOGDEB2(("Native::hasTerm: udi [%s] term [%s]\n",udi.c_str(),term.c_str()));
|
||||
Xapian::Document xdoc;
|
||||
if (getDoc(udi, xdoc)) {
|
||||
if (getDoc(udi, idxi, xdoc)) {
|
||||
Xapian::TermIterator xit;
|
||||
XAPTRY(xit = xdoc.termlist_begin();
|
||||
xit.skip_to(term);,
|
||||
@ -279,20 +284,23 @@ bool Db::Native::hasTerm(const string& udi, const string& term)
|
||||
return false;
|
||||
}
|
||||
|
||||
// Retrieve Xapian document, given udi
|
||||
Xapian::docid Db::Native::getDoc(const string& udi, Xapian::Document& xdoc)
|
||||
// Retrieve Xapian document, given udi. There may be several identical udis
|
||||
// if we are using multiple indexes.
|
||||
Xapian::docid Db::Native::getDoc(const string& udi, int idxi,
|
||||
Xapian::Document& xdoc)
|
||||
{
|
||||
string uniterm = make_uniterm(udi);
|
||||
for (int tries = 0; tries < 2; tries++) {
|
||||
try {
|
||||
Xapian::PostingIterator docid = xrdb.postlist_begin(uniterm);
|
||||
if (docid == xrdb.postlist_end(uniterm)) {
|
||||
// Udi not in Db.
|
||||
return 0;
|
||||
} else {
|
||||
Xapian::PostingIterator docid;
|
||||
for (docid = xrdb.postlist_begin(uniterm);
|
||||
docid != xrdb.postlist_end(uniterm); docid++) {
|
||||
xdoc = xrdb.get_document(*docid);
|
||||
return *docid;
|
||||
if (whatDbIdx(*docid) == (size_t)idxi)
|
||||
return *docid;
|
||||
}
|
||||
// Udi not in Db.
|
||||
return 0;
|
||||
} catch (const Xapian::DatabaseModifiedError &e) {
|
||||
m_rcldb->m_reason = e.get_msg();
|
||||
xrdb.reopen();
|
||||
@ -314,23 +322,27 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
|
||||
if (!parms.ok())
|
||||
return false;
|
||||
|
||||
// Set xdocid at once so that we can call whatDbIdx()
|
||||
doc.xdocid = docid;
|
||||
doc.haspages = hasPages(docid);
|
||||
|
||||
// Compute what index this comes from, and check for path translations
|
||||
string dbdir = m_rcldb->m_basedir;
|
||||
doc.idxi = 0;
|
||||
if (!m_rcldb->m_extraDbs.empty()) {
|
||||
unsigned int idxi = m_rcldb->whatDbIdx(doc);
|
||||
unsigned int idxi = whatDbIdx(docid);
|
||||
|
||||
// idxi is in [0, extraDbs.size()]. 0 is for the main index,
|
||||
// idxi-1 indexes into the additional dbs array.
|
||||
if (idxi) {
|
||||
dbdir = m_rcldb->m_extraDbs[idxi - 1];
|
||||
doc.idxi = idxi;
|
||||
}
|
||||
}
|
||||
parms.get(Doc::keyurl, doc.url);
|
||||
parms.get(Doc::keyurl, doc.idxurl);
|
||||
doc.url = doc.idxurl;
|
||||
m_rcldb->m_config->urlrewrite(dbdir, doc.url);
|
||||
if (!doc.url.compare(doc.idxurl))
|
||||
doc.idxurl.clear();
|
||||
|
||||
// Special cases:
|
||||
parms.get(Doc::keytp, doc.mimetype);
|
||||
@ -549,7 +561,7 @@ bool Db::Native::purgeFileWrite(bool orphansOnly, const string& udi,
|
||||
xwdb.delete_document(*docid);
|
||||
}
|
||||
vector<Xapian::docid> docids;
|
||||
subDocs(udi, docids);
|
||||
subDocs(udi, 0, docids);
|
||||
LOGDEB(("purgeFile: subdocs cnt %d\n", docids.size()));
|
||||
for (vector<Xapian::docid>::iterator it = docids.begin();
|
||||
it != docids.end(); it++) {
|
||||
@ -864,14 +876,19 @@ bool Db::rmQueryDb(const string &dir)
|
||||
// modulo of the docid against the db count. Ref:
|
||||
// http://trac.xapian.org/wiki/FAQ/MultiDatabaseDocumentID
|
||||
size_t Db::whatDbIdx(const Doc& doc)
|
||||
{
|
||||
return m_ndb->whatDbIdx(doc.xdocid);
|
||||
}
|
||||
|
||||
size_t Db::Native::whatDbIdx(Xapian::docid id)
|
||||
{
|
||||
LOGDEB1(("Db::whatDbIdx: xdocid %lu, %u extraDbs\n",
|
||||
(unsigned long)doc.xdocid, m_extraDbs.size()));
|
||||
if (doc.xdocid == 0)
|
||||
(unsigned long)id, m_extraDbs.size()));
|
||||
if (id == 0)
|
||||
return (size_t)-1;
|
||||
if (m_extraDbs.size() == 0)
|
||||
if (m_rcldb->m_extraDbs.size() == 0)
|
||||
return 0;
|
||||
return (doc.xdocid - 1) % (m_extraDbs.size() + 1);
|
||||
return (id - 1) % (m_rcldb->m_extraDbs.size() + 1);
|
||||
}
|
||||
|
||||
bool Db::testDbDir(const string &dir, bool *stripped_p)
|
||||
@ -1556,7 +1573,7 @@ bool Db::needUpdate(const string &udi, const string& sig, bool *existed)
|
||||
|
||||
// Set the existence flag for all the subdocs (if any)
|
||||
vector<Xapian::docid> docids;
|
||||
if (!m_ndb->subDocs(udi, docids)) {
|
||||
if (!m_ndb->subDocs(udi, 0, docids)) {
|
||||
LOGERR(("Rcl::Db::needUpdate: can't get subdocs\n"));
|
||||
return true;
|
||||
}
|
||||
@ -1808,7 +1825,7 @@ bool Db::dbStats(DbStats& res)
|
||||
// by the GUI history feature and by open parent/getenclosing
|
||||
// ! The return value is always true except for fatal errors. Document
|
||||
// existence should be tested by looking at doc.pc
|
||||
bool Db::getDoc(const string &udi, Doc &doc)
|
||||
bool Db::getDoc(const string &udi, const Doc& idxdoc, Doc &doc)
|
||||
{
|
||||
LOGDEB(("Db:getDoc: [%s]\n", udi.c_str()));
|
||||
if (m_ndb == 0)
|
||||
@ -1820,7 +1837,8 @@ bool Db::getDoc(const string &udi, Doc &doc)
|
||||
doc.pc = 100;
|
||||
Xapian::Document xdoc;
|
||||
Xapian::docid docid;
|
||||
if ((docid = m_ndb->getDoc(udi, xdoc))) {
|
||||
int idxi = idxdoc.idxi;
|
||||
if ((docid = m_ndb->getDoc(udi, idxi, xdoc))) {
|
||||
string data = xdoc.get_data();
|
||||
doc.meta[Rcl::Doc::keyudi] = udi;
|
||||
return m_ndb->dbDataToRclDoc(docid, data, doc);
|
||||
@ -1845,7 +1863,7 @@ bool Db::hasSubDocs(const Doc &idoc)
|
||||
return false;
|
||||
}
|
||||
vector<Xapian::docid> docids;
|
||||
if (!m_ndb->subDocs(inudi, docids)) {
|
||||
if (!m_ndb->subDocs(inudi, idoc.idxi, docids)) {
|
||||
LOGDEB(("Db:getSubDocs: lower level subdocs failed\n"));
|
||||
return false;
|
||||
}
|
||||
@ -1853,7 +1871,7 @@ bool Db::hasSubDocs(const Doc &idoc)
|
||||
return true;
|
||||
|
||||
// Check if doc has an has_children term
|
||||
if (m_ndb->hasTerm(inudi, has_children_term))
|
||||
if (m_ndb->hasTerm(inudi, idoc.idxi, has_children_term))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
@ -1879,7 +1897,7 @@ bool Db::getSubDocs(const Doc &idoc, vector<Doc>& subdocs)
|
||||
} else {
|
||||
// See if we have a parent term
|
||||
Xapian::Document xdoc;
|
||||
if (!m_ndb->getDoc(inudi, xdoc)) {
|
||||
if (!m_ndb->getDoc(inudi, idoc.idxi, xdoc)) {
|
||||
LOGERR(("Db::getSubDocs: can't get Xapian document\n"));
|
||||
return false;
|
||||
}
|
||||
@ -1902,7 +1920,7 @@ bool Db::getSubDocs(const Doc &idoc, vector<Doc>& subdocs)
|
||||
|
||||
// Retrieve all subdoc xapian ids for the root
|
||||
vector<Xapian::docid> docids;
|
||||
if (!m_ndb->subDocs(rootudi, docids)) {
|
||||
if (!m_ndb->subDocs(rootudi, idoc.idxi, docids)) {
|
||||
LOGDEB(("Db:getSubDocs: lower level subdocs failed\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -228,9 +228,12 @@ class Db {
|
||||
/* Update-related methods ******************************************/
|
||||
|
||||
/** Test if the db entry for the given udi is up to date (by
|
||||
* comparing the input and stored sigs).
|
||||
* Side-effect: set the existence flag for the file document
|
||||
* and all subdocs if any (for later use by 'purge()')
|
||||
* comparing the input and stored sigs). This is used both when
|
||||
* indexing and querying (before opening a document using stale info),
|
||||
* **This assumes that the udi pertains to the main index (idxi==0).**
|
||||
* Side-effect when the db is writeable: set the existence flag
|
||||
* for the file document and all subdocs if any (for later use by
|
||||
* 'purge()')
|
||||
*/
|
||||
bool needUpdate(const string &udi, const string& sig, bool *existed=0);
|
||||
|
||||
@ -355,8 +358,13 @@ class Db {
|
||||
/** Get document for given udi
|
||||
*
|
||||
* Used by the 'history' feature, and to retrieve ancestor documents.
|
||||
* @param udi the unique document identifier
|
||||
* @param idxdoc used when there are several index as an opaque way to pass
|
||||
* the index id. Use a doc from the same index
|
||||
* (e.g.: when looking for parent),
|
||||
* @param doc the output doc
|
||||
*/
|
||||
bool getDoc(const string &udi, Doc &doc);
|
||||
bool getDoc(const string &udi, const Doc& idxdoc, Doc &doc);
|
||||
|
||||
/** Test if documents has sub-documents.
|
||||
*
|
||||
|
||||
@ -120,12 +120,14 @@ class Db::Native {
|
||||
|
||||
bool dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc);
|
||||
|
||||
size_t whatDbIdx(Xapian::docid id);
|
||||
|
||||
/** Retrieve Xapian::docid, given unique document identifier,
|
||||
* using the posting list for the derived term.
|
||||
*
|
||||
* @return 0 if not found
|
||||
*/
|
||||
Xapian::docid getDoc(const string& udi, Xapian::Document& xdoc);
|
||||
Xapian::docid getDoc(const string& udi, int idxi, Xapian::Document& xdoc);
|
||||
|
||||
/** Retrieve unique document identifier for given Xapian document,
|
||||
* using the document termlist
|
||||
@ -133,7 +135,7 @@ class Db::Native {
|
||||
bool xdocToUdi(Xapian::Document& xdoc, string &udi);
|
||||
|
||||
/** Check if doc is indexed by term */
|
||||
bool hasTerm(const string& udi, const string& term);
|
||||
bool hasTerm(const string& udi, int idxi, const string& term);
|
||||
|
||||
/** Compute list of subdocuments for a given udi. We look for documents
|
||||
* indexed by a parent term matching the udi, the posting list for the
|
||||
@ -149,7 +151,7 @@ class Db::Native {
|
||||
* indexer (rcldb user), using the ipath.
|
||||
*
|
||||
*/
|
||||
bool subDocs(const string &udi, vector<Xapian::docid>& docids);
|
||||
bool subDocs(const string &udi, int idxi, vector<Xapian::docid>& docids);
|
||||
|
||||
/** Check if a page position list is defined */
|
||||
bool hasPages(Xapian::docid id);
|
||||
|
||||
@ -48,6 +48,7 @@ namespace Rcl {
|
||||
void Doc::dump(bool dotext) const
|
||||
{
|
||||
LOGDEB(("Rcl::Doc::dump: url: [%s]\n", url.c_str()));
|
||||
LOGDEB(("Rcl::Doc::dump: idxurl: [%s]\n", idxurl.c_str()));
|
||||
LOGDEB(("Rcl::Doc::dump: ipath: [%s]\n", ipath.c_str()));
|
||||
LOGDEB(("Rcl::Doc::dump: mimetype: [%s]\n", mimetype.c_str()));
|
||||
LOGDEB(("Rcl::Doc::dump: fmtime: [%s]\n", fmtime.c_str()));
|
||||
|
||||
@ -48,6 +48,12 @@ class Doc {
|
||||
// Query: from doc data.
|
||||
string url;
|
||||
|
||||
// When we do path translation for documents from external indexes, we
|
||||
// save the original path:
|
||||
string idxurl;
|
||||
// And the originating db. 0 is base, 1 first external etc.
|
||||
int idxi;
|
||||
|
||||
// Internal path for multi-doc files. Ascii
|
||||
// Set by FsIndexer::processone
|
||||
string ipath;
|
||||
@ -142,13 +148,16 @@ class Doc {
|
||||
sig.erase();
|
||||
|
||||
text.erase();
|
||||
|
||||
pc = 0;
|
||||
xdocid = 0;
|
||||
idxi = 0;
|
||||
haspages = false;
|
||||
haschildren = false;
|
||||
}
|
||||
Doc()
|
||||
: syntabs(false), pc(0), xdocid(0), haspages(false), haschildren(false)
|
||||
: idxi(0), syntabs(false), pc(0), xdocid(0),
|
||||
haspages(false), haschildren(false)
|
||||
{
|
||||
}
|
||||
/** Get value for named field. If value pointer is 0, just test existence */
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user