Store the origin dbdir inside the GUI doc history, so we can later fetch documents from external indexes

This commit is contained in:
Jean-Francois Dockes 2018-05-31 15:01:17 +02:00
parent 3b6dd445e3
commit 6441eea8aa
9 changed files with 137 additions and 54 deletions

View File

@ -876,10 +876,7 @@ bool Preview::loadDocInCurrentTab(const Rcl::Doc &idoc, int docnum)
// Enter document in document history
string udi;
if (idoc.getmeta(Rcl::Doc::keyudi, &udi)) {
historyEnterDoc(g_dynconf, udi);
}
historyEnterDoc(rcldb, g_dynconf, idoc);
editor->setFocus();
emit(previewExposed(this, m_searchId, docnum));

View File

@ -80,7 +80,7 @@ bool RclMain::containerUpToDate(Rcl::Doc& doc)
}
// We can only run indexing on the main index (dbidx 0)
bool ismainidx = rcldb->whatDbIdx(doc) == 0;
bool ismainidx = rcldb->fromMainIndex(doc);
// Indexer already running?
bool ixnotact = (m_indexerState == IXST_NOTRUNNING);

View File

@ -310,7 +310,9 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term)
}
}
bool enterHistory = false;
// Can't remember what enterHistory was actually for. Set it to
// true always for now
bool enterHistory = true;
bool istempfile = false;
LOGDEB("StartNativeViewer: groksipath " << groksipath << " wantsf " <<
@ -451,8 +453,8 @@ void RclMain::execViewer(const map<string, string>& subs, bool enterHistory,
stb->showMessage(msg, 10000);
}
if (!enterHistory)
historyEnterDoc(g_dynconf, doc.meta[Rcl::Doc::keyudi]);
if (enterHistory)
historyEnterDoc(rcldb, g_dynconf, doc);
// Do the zeitgeist thing
zg_send_event(ZGSEND_OPEN, doc);

View File

@ -34,21 +34,24 @@ using std::vector;
// The U distinguishes udi-based entries from older fn+ipath ones
bool RclDHistoryEntry::encode(string& value)
{
string budi;
string budi, bdir;
base64_encode(udi, budi);
value = string("U ") + lltodecstr(unixtime) + " " + budi;
base64_encode(dbdir, bdir);
value = string("V ") + lltodecstr(unixtime) + " " + budi + " " + bdir;
return true;
}
// Decode. We support historical entries which were like "time b64fn [b64ipath]"
// Current entry format is "U time b64udi"
// Previous entry format is "U time b64udi"
// Current entry format "V time b64udi [b64dir]"
bool RclDHistoryEntry::decode(const string &value)
{
vector<string> vall;
stringToStrings(value, vall);
vector<string>::const_iterator it = vall.begin();
udi.erase();
udi.clear();
dbdir.clear();
string fn, ipath;
switch (vall.size()) {
case 2:
@ -57,8 +60,8 @@ bool RclDHistoryEntry::decode(const string &value)
base64_decode(*it++, fn);
break;
case 3:
if (!it->compare("U")) {
// New udi-based entry
if (!it->compare("U") || !it->compare("V")) {
// New udi-based entry, no dir
it++;
unixtime = atoll((*it++).c_str());
base64_decode(*it++, udi);
@ -69,6 +72,13 @@ bool RclDHistoryEntry::decode(const string &value)
base64_decode(*it, ipath);
}
break;
case 4:
// New udi-based entry, with directory
it++;
unixtime = atoll((*it++).c_str());
base64_decode(*it++, udi);
base64_decode(*it++, dbdir);
break;
default:
return false;
}
@ -77,23 +87,31 @@ bool RclDHistoryEntry::decode(const string &value)
// Old style entry found, make an udi, using the fs udi maker
make_udi(fn, ipath, udi);
}
LOGDEB1("RclDHistoryEntry::decode: udi [" << udi << "]\n");
LOGDEB1("RclDHistoryEntry::decode: udi [" << udi << "] dbdir [" <<
dbdir << "]\n");
return true;
}
bool RclDHistoryEntry::equal(const DynConfEntry& other)
{
const RclDHistoryEntry& e = dynamic_cast<const RclDHistoryEntry&>(other);
return e.udi == udi;
return e.udi == udi && e.dbdir == dbdir;
}
bool historyEnterDoc(RclDynConf *dncf, const string& udi)
bool historyEnterDoc(Rcl::Db *db, RclDynConf *dncf, const Rcl::Doc& doc)
{
LOGDEB1("historyEnterDoc: [" << udi << "] into " << dncf->getFilename() <<
"\n");
RclDHistoryEntry ne(time(0), udi);
RclDHistoryEntry scratch;
return dncf->insertNew(docHistSubKey, ne, scratch, 200);
string udi;
if (db && doc.getmeta(Rcl::Doc::keyudi, &udi)) {
std::string dbdir = db->whatIndexForResultDoc(doc);
LOGDEB("historyEnterDoc: [" << udi << ", " << dbdir << "] into " <<
dncf->getFilename() << "\n");
RclDHistoryEntry ne(time(0), udi, dbdir);
RclDHistoryEntry scratch;
return dncf->insertNew(docHistSubKey, ne, scratch, 200);
} else {
LOGDEB("historyEnterDoc: doc has no udi\n");
}
return false;
}
vector<RclDHistoryEntry> getDocHistory(RclDynConf* dncf)
@ -111,8 +129,10 @@ bool DocSequenceHistory::getDoc(int num, Rcl::Doc &doc, string *sh)
if (num < 0 || num >= (int)m_history.size())
return false;
// We get the history oldest first, but our users expect newest first
RclDHistoryEntry& hentry = m_history[m_history.size() - 1 - num];
if (sh) {
if (m_prevtime < 0 ||
abs (float(m_prevtime) - float(hentry.unixtime)) > 86400) {
@ -126,9 +146,7 @@ bool DocSequenceHistory::getDoc(int num, Rcl::Doc &doc, string *sh)
}
}
// For now history does not store an index id. Use empty doc as ref.
Rcl::Doc idxdoc;
bool ret = m_db->getDoc(hentry.udi, idxdoc, doc);
bool ret = m_db->getDoc(hentry.udi, hentry.dbdir, doc);
if (!ret || doc.pc == -1) {
doc.url = "UNKNOWN";
doc.ipath = "";

View File

@ -31,14 +31,15 @@ namespace Rcl {
class RclDHistoryEntry : public DynConfEntry {
public:
RclDHistoryEntry() : unixtime(0) {}
RclDHistoryEntry(time_t t, const string& u)
: unixtime(t), udi(u) {}
RclDHistoryEntry(time_t t, const std::string& u, const std::string& d)
: unixtime(t), udi(u), dbdir(d) {}
virtual ~RclDHistoryEntry() {}
virtual bool decode(const string &value);
virtual bool encode(string& value);
virtual bool decode(const std::string &value);
virtual bool encode(std::string& value);
virtual bool equal(const DynConfEntry& other);
time_t unixtime;
string udi;
std::string udi;
std::string dbdir;
};
/** A DocSequence coming from the history file.
@ -46,14 +47,14 @@ class RclDHistoryEntry : public DynConfEntry {
* metadata for an url key */
class DocSequenceHistory : public DocSequence {
public:
DocSequenceHistory(Rcl::Db *d, RclDynConf *h, const string &t)
DocSequenceHistory(Rcl::Db *d, RclDynConf *h, const std::string &t)
: DocSequence(t), m_db(d), m_hist(h) {}
virtual ~DocSequenceHistory() {}
virtual bool getDoc(int num, Rcl::Doc &doc, string *sh = 0);
virtual bool getDoc(int num, Rcl::Doc &doc, std::string *sh = 0);
virtual int getResCnt();
virtual string getDescription() {return m_description;}
void setDescription(const string& desc) {m_description = desc;}
virtual std::string getDescription() {return m_description;}
void setDescription(const std::string& desc) {m_description = desc;}
protected:
virtual Rcl::Db *getDb();
private:
@ -64,6 +65,6 @@ private:
std::vector<RclDHistoryEntry> m_history;
};
extern bool historyEnterDoc(RclDynConf *dncf, const string& udi);
extern bool historyEnterDoc(Rcl::Db *db, RclDynConf *dncf, const Rcl::Doc& doc);
#endif /* _DOCSEQ_H_INCLUDED_ */

View File

@ -98,7 +98,7 @@ bool RclDynConf::insertNew(const string &sk, DynConfEntry &n, DynConfEntry &s,
n.encode(value);
LOGDEB1("Encoded value [" << value << "] (" << value.size() << ")\n");
if (!m_data.set(string(nname), value, sk)) {
LOGERR("RclDHistory::insertNew: set failed\n");
LOGERR("RclDynConf::insertNew: set failed\n");
return false;
}
return true;

View File

@ -147,7 +147,7 @@ Container<Type, std::allocator<Type>>
out.push_back(entry);
}
}
return std::move(out);
return out;
}
template <template <class, class> class Container>
@ -159,7 +159,7 @@ Container<std::string, std::allocator<std::string>>
for (const auto& entry : el) {
sl.push_back(entry.value);
}
return std::move(sl);
return sl;
}
// Defined subkeys. Values in dynconf.cpp

View File

@ -1123,9 +1123,26 @@ bool Db::rmQueryDb(const string &dir)
// Determining what index a doc result comes from is based on the
// modulo of the docid against the db count. Ref:
// http://trac.xapian.org/wiki/FAQ/MultiDatabaseDocumentID
size_t Db::whatDbIdx(const Doc& doc)
bool Db::fromMainIndex(const Doc& doc)
{
return m_ndb->whatDbIdx(doc.xdocid);
return m_ndb->whatDbIdx(doc.xdocid) == 0;
}
std::string Db::whatIndexForResultDoc(const Doc& doc)
{
size_t idx = m_ndb->whatDbIdx(doc.xdocid);
if (idx == (size_t)-1) {
LOGERR("whatIndexForResultDoc: whatDbIdx returned -1 for " <<
doc.xdocid << endl);
return string();
}
// idx is [0..m_extraDbs.size()] 0 is for the main index, else
// idx-1 indexes into m_extraDbs
if (idx == 0) {
return m_basedir;
} else {
return m_extraDbs[idx-1];
}
}
size_t Db::Native::whatDbIdx(Xapian::docid id)
@ -2352,18 +2369,44 @@ bool Db::dbStats(DbStats& res, bool listfailed)
// existence should be tested by looking at doc.pc
bool Db::getDoc(const string &udi, const Doc& idxdoc, Doc &doc)
{
LOGDEB("Db:getDoc: [" << udi << "]\n");
if (m_ndb == 0)
return false;
LOGDEB1("Db:getDoc: [" << udi << "]\n");
int idxi = idxdoc.idxi;
return getDoc(udi, idxi, doc);
}
bool Db::getDoc(const string &udi, const std::string& dbdir, Doc &doc)
{
LOGDEB1("Db::getDoc(udi, dbdir): (" << udi << ", " << dbdir << ")\n");
int idxi = -1;
if (dbdir.empty() || dbdir == m_basedir) {
idxi = 0;
} else {
for (unsigned int i = 0; i < m_extraDbs.size(); i++) {
if (dbdir == m_extraDbs[i]) {
idxi = int(i + 1);
break;
}
}
}
LOGDEB1("Db::getDoc(udi, dbdir): idxi: " << idxi << endl);
if (idxi < 0) {
LOGERR("Db::getDoc(udi, dbdir): dbdir not in current extra dbs\n");
return false;
}
return getDoc(udi, idxi, doc);
}
bool Db::getDoc(const string& udi, int idxi, Doc& doc)
{
// Initialize what we can in any case. If this is history, caller
// will make partial display in case of error
if (m_ndb == 0)
return false;
doc.meta[Rcl::Doc::keyrr] = "100%";
doc.pc = 100;
Xapian::Document xdoc;
Xapian::docid docid;
int idxi = idxdoc.idxi;
if ((docid = m_ndb->getDoc(udi, idxi, xdoc))) {
if (idxi >= 0 && (docid = m_ndb->getDoc(udi, idxi, xdoc))) {
string data = xdoc.get_data();
doc.meta[Rcl::Doc::keyudi] = udi;
return m_ndb->dbDataToRclDoc(docid, data, doc);
@ -2373,7 +2416,7 @@ bool Db::getDoc(const string &udi, const Doc& idxdoc, Doc &doc)
// other ok docs further) but indicate the error with
// pc = -1
doc.pc = -1;
LOGINFO("Db:getDoc: no such doc in index: [" << udi << "]\n");
LOGINFO("Db:getDoc: no such doc in current index: [" << udi << "]\n");
return true;
}
}

View File

@ -336,13 +336,18 @@ public:
bool addQueryDb(const string &dir);
/** Remove extra database. if dir == "", remove all. */
bool rmQueryDb(const string &dir);
/** Look where the doc result comes from.
* @param doc must come from a db query so that "opaque" xdocid is set.
* @return: 0 main index, (size_t)-1 don't know,
* other: order of database in add_database() sequence.
*/
size_t whatDbIdx(const Doc& doc);
/** Check if document comes from the main index (this is used to
decide if we can update the index for it */
bool fromMainIndex(const Doc& doc);
/** Retrieve an index designator for the document result. This is used
* by the GUI document history feature for remembering where a
* doc comes from and allowing later retrieval (if the ext index
* is still active...).
*/
std::string whatIndexForResultDoc(const Doc& doc);
/** Tell if directory seems to hold xapian db */
static bool testDbDir(const string &dir, bool *stripped = 0);
@ -391,9 +396,10 @@ public:
int getAbsLen() const {
return m_synthAbsLen;
}
/** Get document for given udi
/** Get document for given udi and db index
*
* Used by the 'history' feature, and to retrieve ancestor documents.
* Used to retrieve ancestor documents.
* @param udi The unique document identifier.
* @param idxdoc A document from the same database as an opaque way to pass
* the database id (e.g.: when looking for parent in a multi-database
@ -403,6 +409,20 @@ public:
*/
bool getDoc(const string &udi, const Doc& idxdoc, Doc &doc);
/** Get document for given udi and index directory.
*
* Used by the 'history' feature. This supposes that the extra db
* is still active.
* @param udi The unique document identifier.
* @param dbdir The index directory, from storage, as returned by
* whatIndexForResultDoc() at the time of the query. Can be
* empty to mean "main index" (allows the history to avoid
* storing the main dbdir value).
* @param[out] doc The output Recoll document.
* @return True for success.
*/
bool getDoc(const string &udi, const std::string& dbdir, Doc &doc);
/** Test if documents has sub-documents.
*
* This can always be detected for file-level documents, using the
@ -548,6 +568,8 @@ private:
bool maybeflush(int64_t moretext);
bool docExists(const string& uniterm);
bool getDoc(const std::string& udi, int idxi, Doc& doc);
/* Copyconst and assignement private and forbidden */
Db(const Db &) {}
Db& operator=(const Db &) {return *this;};