diff --git a/src/index/beaglequeue.cpp b/src/index/beaglequeue.cpp index a4911876..23781d77 100644 --- a/src/index/beaglequeue.cpp +++ b/src/index/beaglequeue.cpp @@ -175,7 +175,7 @@ BeagleQueueIndexer::BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db, if (!m_config->getConfParam("beaglequeuedir", m_queuedir)) m_queuedir = path_tildexpand("~/.beagle/ToIndex"); - if (m_tmpdir.empty() || access(m_tmpdir.c_str(), 0) < 0) { + if (m_db && m_tmpdir.empty() || access(m_tmpdir.c_str(), 0) < 0) { string reason; if (!maketmpdir(m_tmpdir, reason)) { LOGERR(("DbIndexer: cannot create temporary directory: %s\n", @@ -212,26 +212,23 @@ BeagleQueueIndexer::~BeagleQueueIndexer() deleteZ(m_cache); } -bool BeagleQueueIndexer::indexFromCache(const string& udi) +bool BeagleQueueIndexer::getFromCache(const string& udi, Rcl::Doc &dotdoc, + string& data, string *htt) { - string dict, data; + string dict; - // This is horribly inefficient and needs fixing either by saving - // the offsets during the forward scan, or using an auxiliary isam - // map + // This is horribly inefficient, especially while reindexing from + // cache, and needs fixing either by saving the offsets during the + // forward scan, or using an auxiliary isam map if (!m_cache->get(udi, dict, data)) return false; ConfSimple cf(dict, 1); - - string hittype; - if (!cf.get(keybght, hittype, "")) { - LOGERR(("BeagleIndexer::index: cc entry has no hit type\n")); - return false; - } + + if (htt) + cf.get(keybght, *htt, ""); // Build a doc from saved metadata - Rcl::Doc dotdoc; cf.get("url", dotdoc.url, ""); cf.get("mimetype", dotdoc.mimetype, ""); cf.get("fmtime", dotdoc.fmtime, ""); @@ -242,9 +239,29 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi) it != names.end(); it++) { cf.get(*it, dotdoc.meta[*it], ""); } + return true; +} +bool BeagleQueueIndexer::indexFromCache(const string& udi) +{ + if (!m_db) + return false; + + Rcl::Doc dotdoc; + string data; + string hittype; + + if (!getFromCache(udi, dotdoc, data, &hittype)) + return false; + + if (hittype.empty()) { + LOGERR(("BeagleIndexer::index: cc entry has no hit type\n")); + return false; + } + if (!stringlowercmp("bookmark", hittype)) { // Just index the dotdoc + dotdoc.meta[Rcl::Doc::keybcknd] = "BGL"; return m_db->addOrUpdate(udi, "", dotdoc); } else if (stringlowercmp("webhistory", dotdoc.meta[keybght]) || (dotdoc.mimetype.compare("text/html") && @@ -269,13 +286,15 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi) doc.url = dotdoc.url; doc.fbytes = dotdoc.fbytes; doc.sig = ""; - + doc.meta[Rcl::Doc::keybcknd] = "BGL"; return m_db->addOrUpdate(udi, "", doc); } } bool BeagleQueueIndexer::index() { + if (!m_db) + return false; LOGDEB(("BeagleQueueIndexer::processqueue: dir: [%s]\n", m_queuedir.c_str())); m_config->setKeyDir(m_queuedir); @@ -322,6 +341,9 @@ BeagleQueueIndexer::processone(const string &path, const struct stat *stp, FsTreeWalker::CbFlag flg) { + if (!m_db) //?? + return FsTreeWalker::FtwError; + bool dounlink = false; if (flg != FsTreeWalker::FtwRegular) @@ -365,6 +387,7 @@ BeagleQueueIndexer::processone(const string &path, dotfile.m_fields.set("fmtime", dotdoc.fmtime, ""); dotfile.m_fields.set("fbytes", dotdoc.fbytes, ""); + dotdoc.meta[Rcl::Doc::keybcknd] = "BGL"; if (!m_db->addOrUpdate(udi, "", dotdoc)) return FsTreeWalker::FtwError; @@ -402,6 +425,7 @@ BeagleQueueIndexer::processone(const string &path, dotfile.m_fields.set("fmtime", dotdoc.fmtime, ""); dotfile.m_fields.set("fbytes", dotdoc.fbytes, ""); + doc.meta[Rcl::Doc::keybcknd] = "BGL"; if (!m_db->addOrUpdate(udi, "", doc)) return FsTreeWalker::FtwError; diff --git a/src/index/beaglequeue.h b/src/index/beaglequeue.h index 31447344..5b2dc129 100644 --- a/src/index/beaglequeue.h +++ b/src/index/beaglequeue.h @@ -19,21 +19,30 @@ /* @(#$Id: $ (C) 2009 J.F.Dockes */ /** - * Code to process the Beagle indexing queue. Beagle MUST NOT be - * running, else mayhem will ensue. Interesting to reuse the beagle - * firefox visited page indexing plugin for example. + * Process the Beagle indexing queue. + * + * Beagle MUST NOT be running, else mayhem will ensue. + * + * This is mainly written to reuse the Beagle Firefox plug-in (which + * copies visited pages and bookmarks to the queue). */ -#include "rclconfig.h" #include "fstreewalk.h" -#include "rcldb.h" +#include "rcldoc.h" class DbIxStatusUpdater; class CirCache; +class RclConfig; +namespace Rcl { + class Db; +} class BeagleQueueIndexer : public FsTreeWalkerCB { public: - BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db, + /** + * @para db can be null when using readonly for calling getFromCache() + */ + BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db = 0, DbIxStatusUpdater *updfunc = 0); ~BeagleQueueIndexer(); @@ -42,6 +51,8 @@ public: FsTreeWalker::Status processone(const string &, const struct stat *, FsTreeWalker::CbFlag); + bool getFromCache(const string& udi, Rcl::Doc &doc, string& data, + string *hittype = 0); private: RclConfig *m_config; Rcl::Db *m_db; @@ -51,7 +62,6 @@ private: DbIxStatusUpdater *m_updater; bool indexFromCache(const string& udi); - }; #endif /* _beaglequeue_h_included_ */ diff --git a/src/internfile/internfile.cpp b/src/internfile/internfile.cpp index 4747357b..312d258c 100644 --- a/src/internfile/internfile.cpp +++ b/src/internfile/internfile.cpp @@ -45,6 +45,7 @@ using namespace std; #include "rclconfig.h" #include "mh_html.h" #include "fileudi.h" +#include "beaglequeue.h" #ifdef RCL_USE_XATTR #include "pxattr.h" @@ -166,12 +167,23 @@ void FileInterner::tmpcleanup() // // Empty handler on return says that we're in error, this will be // processed by the first call to internfile(). +// Split into "constructor calls init()" to allow use from other constructor FileInterner::FileInterner(const string &f, const struct stat *stp, RclConfig *cnf, const string& td, int flags, const string *imime) - : m_cfg(cnf), m_fn(f), m_forPreview(flags & FIF_forPreview), - m_tdir(td) + : m_tdir(td) { + initcommon(cnf, flags); + init(f, stp, cnf, td, flags, imime); +} + +void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf, + const string& td, int flags, const string *imime) +{ + m_fn = f; + + cnf->setKeyDir(path_getfather(m_fn)); + string l_mime; bool usfci = false; cnf->getConfParam("usesystemfilecommand", &usfci); @@ -237,8 +249,8 @@ FileInterner::FileInterner(const string &f, const struct stat *stp, if (!df) { // No handler for this type, for now :( if indexallfilenames // is set in the config, this normally wont happen (we get mh_unknown) - LOGERR(("FileInterner:: ignored: [%s] mime [%s]\n", - f.c_str(), l_mime.c_str())); + LOGINFO(("FileInterner:: ignored: [%s] mime [%s]\n", + f.c_str(), l_mime.c_str())); return; } df->set_property(Dijon::Filter::OPERATING_MODE, @@ -258,15 +270,143 @@ FileInterner::FileInterner(const string &f, const struct stat *stp, LOGERR(("FileInterner:: error parsing %s\n", m_fn.c_str())); return; } - m_handlers.reserve(MAXHANDLERS); - for (unsigned int i = 0; i < MAXHANDLERS; i++) - m_tmpflgs[i] = false; + m_handlers.push_back(df); LOGDEB(("FileInterner::FileInterner: %s [%s]\n", l_mime.c_str(), m_fn.c_str())); +} + +// Setup from memory data (ie: out of the web cache). imime needs to be set. +FileInterner::FileInterner(const string &data, RclConfig *cnf, + const string& td, int flags, const string& imime) + : m_tdir(td) +{ + initcommon(cnf, flags); + init(data, cnf, td, flags, imime); +} + +void FileInterner::init(const string &data, RclConfig *cnf, + const string& td, int flags, const string& imime) +{ + if (imime.empty()) { + LOGERR(("FileInterner: inmemory constructor needs input mime type\n")); + return; + } + m_mimetype = imime; + + // Look for appropriate handler (might still return empty) + Dijon::Filter *df = getMimeHandler(m_mimetype, m_cfg, !m_forPreview); + + if (!df) { + // No handler for this type, for now :( if indexallfilenames + // is set in the config, this normally wont happen (we get mh_unknown) + LOGINFO(("FileInterner:: ignored: mime [%s]\n", m_mimetype.c_str())); + return; + } + df->set_property(Dijon::Filter::OPERATING_MODE, + m_forPreview ? "view" : "index"); + + string charset = m_cfg->getDefCharset(); + df->set_property(Dijon::Filter::DEFAULT_CHARSET, charset); + + bool setres = false; + if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) { + setres = df->set_document_string(data); + } else if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_DATA)) { + setres = df->set_document_data(data.c_str(), data.length()); + } else if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_FILE_NAME)) { + string filename; + if (dataToTempFile(data, m_mimetype, filename)) { + if (!(setres=df->set_document_file(filename))) { + m_tmpflgs[0] = false; + m_tempfiles.pop_back(); + } + } + } + if (!setres) { + LOGINFO(("FileInterner:: set_doc failed inside for mtype %s\n", + m_mimetype.c_str())); + delete df; + return; + } + m_handlers.push_back(df); +} + +void FileInterner::initcommon(RclConfig *cnf, int flags) +{ + m_cfg = cnf; + m_forPreview = ((flags & FIF_forPreview) != 0); + // Initialize handler stack. + m_handlers.reserve(MAXHANDLERS); + for (unsigned int i = 0; i < MAXHANDLERS; i++) + m_tmpflgs[i] = false; m_targetMType = stxtplain; } +FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, + const string& td, int flags) + : m_tdir(td) +{ + initcommon(cnf, flags); + + // We do insist on having an url... + if (idoc.url.empty()) { + LOGERR(("FileInterner::FileInterner:: no url!\n")); + return; + } + + // This stuff will be moved to some kind of generic function: + // get(idoc, ofn, odata, ometa) + // and use some kind of backstore object factory next time we add a + // backend (if ever). + string backend; + map::const_iterator it; + if ((it = idoc.meta.find(Rcl::Doc::keybcknd)) != idoc.meta.end()) + backend = it->second; + + if (backend.empty() || !backend.compare("FS")) { + // The url has to be like file:// + if (idoc.url.find("file://") != 0) { + LOGERR(("FileInterner: FS backend and non fs url: [%s]\n", + idoc.url.c_str())); + return; + } + string fn = idoc.url.substr(7, string::npos); + struct stat st; + if (stat(fn.c_str(), &st) < 0) { + LOGERR(("InternFile: cannot access document file: [%s]\n", + fn.c_str())); + return; + } + init(fn, &st, cnf, td, flags, &idoc.mimetype); + } else if (!backend.compare("BGL")) { + // Retrieve from our webcache (beagle data) + BeagleQueueIndexer beagler(cnf); + string data; + Rcl::Doc dotdoc; + map::const_iterator it = + idoc.meta.find(Rcl::Doc::keyudi); + if (it == idoc.meta.end() || it->second.empty()) { + LOGERR(("Internfile: no udi in idoc\n")); + return; + } + string udi = it->second; + if (!beagler.getFromCache(udi, dotdoc, data)) { + LOGINFO(("Internfile: failed fetch from Beagle cache for [%s]\n", + udi.c_str())); + return; + } + if (dotdoc.mimetype.compare(idoc.mimetype)) { + LOGINFO(("Internfile: udi [%s], mimetype mismatch: in: [%s], bgl " + "[%s]\n", idoc.mimetype.c_str(), dotdoc.mimetype.c_str())); + } + init(data, cnf, td, flags, dotdoc.mimetype); + } else { + LOGERR(("InternFile: unknown backend: [%s]\n", backend.c_str())); + return; + } +} + FileInterner::~FileInterner() { tmpcleanup(); @@ -286,7 +426,10 @@ bool FileInterner::dataToTempFile(const string& dt, const string& mt, // Find appropriate suffix for mime type TempFile temp(new TempFileInternal(m_cfg->getSuffixFromMimeType(mt))); if (temp->ok()) { - m_tmpflgs[m_handlers.size()-1] = true; + // We are called before the handler is actually on the stack, so the + // index is m_handlers.size(). m_tmpflgs is a static array, so this is + // no problem + m_tmpflgs[m_handlers.size()] = true; m_tempfiles.push_back(temp); } else { LOGERR(("FileInterner::dataToTempFile: cant create tempfile: %s\n", @@ -550,7 +693,7 @@ int FileInterner::addHandler() string filename; if (dataToTempFile(*txt, mimetype, filename)) { if (!(setres = newflt->set_document_file(filename))) { - m_tmpflgs[m_handlers.size()-1] = false; + m_tmpflgs[m_handlers.size()] = false; m_tempfiles.pop_back(); } } @@ -711,6 +854,12 @@ class DirWiper { } }; +// Temporary while we fix backend things +static string urltolocalpath(string url) +{ + return url.substr(7, string::npos); +} + // Extract subdoc out of multidoc into temporary file. // We do the usual internfile stuff: create a temporary directory, // then create an interner and call internfile. The target mtype is set to @@ -722,11 +871,13 @@ class DirWiper { // - The output temporary file which is held in a reference-counted // object and will be deleted when done with. bool FileInterner::idocToFile(TempFile& otemp, const string& tofile, - RclConfig *cnf, - const string& fn, - const string& ipath, - const string& mtype) + RclConfig *cnf, const Rcl::Doc& idoc) { + LOGDEB(("FileInterner::idocToFile\n")); + idoc.dump(); + string fn = urltolocalpath(idoc.url); + string ipath = idoc.ipath; + string mtype = idoc.mimetype; struct stat st; if (stat(fn.c_str(), &st) < 0) { LOGERR(("FileInterner::idocToFile: can't stat [%s]\n", fn.c_str())); diff --git a/src/internfile/internfile.h b/src/internfile/internfile.h index 5d4e8674..43c59a9f 100644 --- a/src/internfile/internfile.h +++ b/src/internfile/internfile.h @@ -50,7 +50,8 @@ class FileInterner { * Get immediate parent for document. * * This is not in general the same as the "parent" document used - * with Rcl::Db::addOrUpdate(). The latter is generally the enclosing file. + * with Rcl::Db::addOrUpdate(). The latter is generally the enclosing file, + * this would be for exemple the email containing the attachment. */ static bool getEnclosing(const string &url, const string &ipath, string &eurl, string &eipath, string& udi); @@ -58,9 +59,9 @@ class FileInterner { enum Flags {FIF_none, FIF_forPreview, FIF_doUseInputMimetype}; /** - * Identify and possibly decompress file, create adequate - * handler. The mtype parameter is only set when the object is - * created for previewing a file. Filter output may be + * Identify and possibly decompress file, and create the top filter + * The mtype parameter is not always set (it is when the object is + * created for previewing a file). Filter output may be * different for previewing and indexing. * * @param fn file name @@ -69,12 +70,27 @@ class FileInterner { * @param td temporary directory to use as working space if * decompression needed. Must be private and will be wiped clean. * @param mtype mime type if known. For a compressed file this is the - * mime type for the uncompressed version. This currently doubles up - * to indicate that this object is for previewing (not indexing). + * mime type for the uncompressed version. */ FileInterner(const string &fn, const struct stat *stp, RclConfig *cnf, const string& td, int flags, const string *mtype = 0); + + /** + * Alternate constructor for the case where the data is in memory. + * This is mainly for data extracted from the web cache. The mime type + * must be set, input must be uncompressed. + */ + FileInterner(const string &data, RclConfig *cnf, const string& td, + int flags, const string& mtype); + + /** + * Alternate constructor for the case where it is not known where + * the data will come from. We'll use the doc fields and try our + * best... + */ + FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, const string& td, + int flags); ~FileInterner(); @@ -121,8 +137,7 @@ class FileInterner { * @param mtype The target mime type (we don't want to decode to text!) */ static bool idocToFile(TempFile& temp, const string& tofile, - RclConfig *cnf, const string& fn, - const string& ipath, const string& mtype); + RclConfig *cnf, const Rcl::Doc& doc); const string& getReason() const {return m_reason;} static void getMissingExternal(string& missing); @@ -160,6 +175,14 @@ class FileInterner { static set o_missingExternal; static map > o_typesForMissing; + // Pseudo-constructors + void init(const string &fn, const struct stat *stp, + RclConfig *cnf, const string& td, int flags, + const string *mtype = 0); + void init(const string &data, RclConfig *cnf, const string& td, + int flags, const string& mtype); + void initcommon(RclConfig *cnf, int flags); + void tmpcleanup(); bool dijontorcl(Rcl::Doc&); void collectIpathAndMT(Rcl::Doc&, string& ipath) const; diff --git a/src/qtgui/confgui/confguiindex.cpp b/src/qtgui/confgui/confguiindex.cpp index 1cce8d49..5049787c 100644 --- a/src/qtgui/confgui/confguiindex.cpp +++ b/src/qtgui/confgui/confguiindex.cpp @@ -156,10 +156,10 @@ ConfBeaglePanelW::ConfBeaglePanelW(QWidget *parent, ConfNull *config) ConfLink lnk1(new ConfLinkRclRep(config, "processbeaglequeue")); ConfParamBoolW* cp1 = - new ConfParamBoolW(gb1, lnk1, tr("Process Beagle queue"), + new ConfParamBoolW(gb1, lnk1, tr("Steal Beagle indexing queue"), tr("Beagle MUST NOT be running. Enables processing " - "the beagle queue to index Firefox web history.
" - "(you must also install the Firefox Beagle Plugin)" + "the beagle queue to index Firefox web history.
" + "(you should also install the Firefox Beagle plugin)" )); ConfLink lnk2(new ConfLinkRclRep(config, "webcachedir")); diff --git a/src/qtgui/preview_w.cpp b/src/qtgui/preview_w.cpp index 471a6c08..6fbf2d00 100644 --- a/src/qtgui/preview_w.cpp +++ b/src/qtgui/preview_w.cpp @@ -538,14 +538,16 @@ PreviewTextEdit *Preview::addEditorTab() return editor; } -void Preview::setCurTabProps(const string &fn, const Rcl::Doc &doc, - int docnum) +void Preview::setCurTabProps(const Rcl::Doc &doc, int docnum) { QString title; map::const_iterator meta_it; - if ((meta_it = doc.meta.find("title")) != doc.meta.end()) { + if ((meta_it = doc.meta.find(Rcl::Doc::keytt)) != doc.meta.end() + && !meta_it->second.empty()) { title = QString::fromUtf8(meta_it->second.c_str(), meta_it->second.length()); + } else { + title = QString::fromLocal8Bit(path_getsimple(doc.url).c_str()); } if (title.length() > 20) { title = title.left(10) + "..." + title.right(10); @@ -572,16 +574,15 @@ void Preview::setCurTabProps(const string &fn, const Rcl::Doc &doc, PreviewTextEdit *e = currentEditor(); if (e) { - e->m_data.fn = fn; + e->m_data.url = doc.url; e->m_data.ipath = doc.ipath; e->m_data.docnum = docnum; } } -bool Preview::makeDocCurrent(const string &fn, size_t sz, - const Rcl::Doc& doc, int docnum, bool sametab) +bool Preview::makeDocCurrent(const Rcl::Doc& doc, int docnum, bool sametab) { - LOGDEB(("Preview::makeDocCurrent: %s\n", fn.c_str())); + LOGDEB(("Preview::makeDocCurrent: %s\n", doc.url.c_str())); /* Check if we already have this page */ for (int i = 0; i < pvTab->count(); i++) { @@ -593,7 +594,7 @@ bool Preview::makeDocCurrent(const string &fn, size_t sz, if (tw) { PreviewTextEdit *edit = dynamic_cast(tw->child("pvEdit")); - if (edit && !edit->m_data.fn.compare(fn) && + if (edit && !edit->m_data.url.compare(doc.url) && !edit->m_data.ipath.compare(doc.ipath)) { pvTab->showPage(tw); return true; @@ -606,7 +607,7 @@ bool Preview::makeDocCurrent(const string &fn, size_t sz, return false; } m_justCreated = false; - if (!loadFileInCurrentTab(fn, sz, doc, docnum)) { + if (!loadDocInCurrentTab(doc, docnum)) { closeCurrentTab(); return false; } @@ -637,16 +638,15 @@ bool Preview::makeDocCurrent(const string &fn, size_t sz, /* A thread to to the file reading / format conversion */ class LoadThread : public QThread { int *statusp; - Rcl::Doc *out; + Rcl::Doc& out; + const Rcl::Doc& idoc; string filename; - string ipath; - string *mtype; string tmpdir; int loglevel; public: string missing; - LoadThread(int *stp, Rcl::Doc *odoc, string fn, string ip, string *mt) - : statusp(stp), out(odoc), filename(fn), ipath(ip), mtype(mt) + LoadThread(int *stp, Rcl::Doc& odoc, const Rcl::Doc& idc) + : statusp(stp), out(odoc), idoc(idc) { loglevel = DebugLog::getdbl()->getlevel(); } @@ -666,40 +666,35 @@ class LoadThread : public QThread { *statusp = -1; return; } - struct stat st; - if (stat(filename.c_str(), &st) < 0) { - LOGERR(("Preview: can't stat [%s]\n", filename.c_str())); - QMessageBox::critical(0, "Recoll", - Preview::tr("File does not exist")); - *statusp = -1; - return; - } + + // QMessageBox::critical(0, "Recoll", Preview::tr("File does not exist")); - FileInterner interner(filename, &st, rclconfig, tmpdir, - FileInterner::FIF_forPreview, - mtype); + FileInterner interner(idoc, rclconfig, tmpdir, + FileInterner::FIF_forPreview); + // We don't set the interner's target mtype to html because we // do want the html filter to do its work: we won't use the // text, but we need the conversion to utf-8 // interner.setTargetMType("text/html"); try { - FileInterner::Status ret = interner.internfile(*out, ipath); + string ipath = idoc.ipath; + FileInterner::Status ret = interner.internfile(out, ipath); if (ret == FileInterner::FIDone || ret == FileInterner::FIAgain) { // FIAgain is actually not nice here. It means that the record // for the *file* of a multidoc was selected. Actually this // shouldn't have had a preview link at all, but we don't know // how to handle it now. Better to show the first doc than // a mysterious error. Happens when the file name matches a - // a search term of course. + // a search term. *statusp = 0; // If we prefer html and it is available, replace the // text/plain document text if (prefs.previewHtml && !interner.get_html().empty()) { - out->text = interner.get_html(); - out->mimetype = "text/html"; + out.text = interner.get_html(); + out.mimetype = "text/html"; } } else { - out->mimetype = interner.getMimetype(); + out.mimetype = interner.getMimetype(); interner.getMissingExternal(missing); *statusp = -1; } @@ -754,8 +749,7 @@ public: ~LoadGuard() {*m_bp = false; CancelCheck::instance().setCancel(false);} }; -bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc, - int docnum) +bool Preview::loadDocInCurrentTab(const Rcl::Doc &idoc, int docnum) { if (m_loading) { LOGERR(("ALready loading\n")); @@ -767,18 +761,11 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc, m_haveAnchors = false; - Rcl::Doc doc = idoc; + setCurTabProps(idoc, docnum); - if (doc.meta[Rcl::Doc::keytt].empty()) - doc.meta[Rcl::Doc::keytt] = path_getsimple(doc.url); - - setCurTabProps(fn, doc, docnum); - - char csz[20]; - sprintf(csz, "%lu", (unsigned long)sz); QString msg = QString("Loading: %1 (size %2 bytes)") - .arg(QString::fromLocal8Bit(fn.c_str())) - .arg(csz); + .arg(QString::fromLocal8Bit(idoc.url.c_str())) + .arg(QString::fromAscii(idoc.fbytes.c_str())); // Create progress dialog and aux objects const int nsteps = 20; @@ -786,12 +773,12 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc, progress.setMinimumDuration(2000); WaiterThread waiter(100); - // Load and convert file + // Load and convert document + // idoc came out of the index data (main text and other fields missing). + // foc is the complete one what we are going to extract from storage. Rcl::Doc fdoc; - // Need to setup config to retrieve possibly local parameters - rclconfig->setKeyDir(path_getfather(fn)); int status = 1; - LoadThread lthr(&status, &fdoc, fn, doc.ipath, &doc.mimetype); + LoadThread lthr(&status, fdoc, idoc); lthr.start(); int prog; for (prog = 1;;prog++) { @@ -963,7 +950,7 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc, } // Enter document in document history - g_dynconf->enterDoc(fn, doc.ipath); + g_dynconf->enterDoc(idoc.url, idoc.ipath); editor->setFocus(); emit(previewExposed(this, m_searchId, docnum)); @@ -998,7 +985,7 @@ void PreviewTextEdit::toggleFields() // Else display fields m_dspflds = true; QString txt = "\n"; - txt += "" + QString::fromLocal8Bit(m_data.fn.c_str()); + txt += "" + QString::fromLocal8Bit(m_data.url.c_str()); if (!m_data.ipath.empty()) txt += "|" + QString::fromUtf8(m_data.ipath.c_str()); txt += "

"; diff --git a/src/qtgui/preview_w.h b/src/qtgui/preview_w.h index 9402f240..7de6b972 100644 --- a/src/qtgui/preview_w.h +++ b/src/qtgui/preview_w.h @@ -51,7 +51,7 @@ class Q3PopupMenu; // We keep a list of data associated to each tab class TabData { public: - string fn; // filename for this tab + string url; // filename for this tab string ipath; // Internal doc path inside file int docnum; // Index of doc in db search results. // doc out of internfile (previous fields come from the index) with @@ -133,8 +133,13 @@ public: virtual void closeEvent(QCloseEvent *e ); virtual bool eventFilter(QObject *target, QEvent *event ); - virtual bool makeDocCurrent(const string &fn, size_t sz, - const Rcl::Doc& idoc, int docnum, + /** + * Arrange for the document to be displayed either by exposing the tab + * if already loaded, or by creating a new tab and loading it. + * @para docnum is used to link back to the result list (to highlight + * paragraph when tab exposed etc. + */ + virtual bool makeDocCurrent(const Rcl::Doc& idoc, int docnum, bool sametab = false); friend class PreviewTextEdit; public slots: @@ -182,12 +187,10 @@ private: QCheckBox* matchCheck; void init(); - virtual void setCurTabProps(const string& fn, const Rcl::Doc& doc, - int docnum); + virtual void setCurTabProps(const Rcl::Doc& doc, int docnum); virtual PreviewTextEdit *currentEditor(); virtual PreviewTextEdit *addEditorTab(); - virtual bool loadFileInCurrentTab(string fn, size_t sz, - const Rcl::Doc& idoc, int dnm); + virtual bool loadDocInCurrentTab(const Rcl::Doc& idoc, int dnm); }; #endif /* _PREVIEW_W_H_INCLUDED_ */ diff --git a/src/qtgui/rclmain_w.cpp b/src/qtgui/rclmain_w.cpp index 2b8b50ed..a9371803 100644 --- a/src/qtgui/rclmain_w.cpp +++ b/src/qtgui/rclmain_w.cpp @@ -498,12 +498,6 @@ void RclMain::toggleIndexing() fileToggleIndexingAction->setEnabled(FALSE); } -// Note that all our 'urls' are like : file://... -static string urltolocalpath(string url) -{ - return url.substr(7, string::npos); -} - // Start a db query and set the reslist docsource void RclMain::startSearch(RefCntr sdata) { @@ -688,14 +682,6 @@ void RclMain::startPreview(int docnum, int mod) return; } - // Check file exists in file system - string fn = urltolocalpath(doc.url); - struct stat st; - if (stat(fn.c_str(), &st) < 0) { - QMessageBox::warning(0, "Recoll", tr("Cannot access document file: ") + - fn.c_str()); - return; - } if (mod & Qt::ShiftButton) { // User wants new preview window curPreview = 0; @@ -724,7 +710,7 @@ void RclMain::startPreview(int docnum, int mod) curPreview->setCaption(resList->getDescription()); curPreview->show(); } - curPreview->makeDocCurrent(fn, st.st_size, doc, docnum); + curPreview->makeDocCurrent(doc, docnum); } /** @@ -736,14 +722,6 @@ void RclMain::startPreview(int docnum, int mod) */ void RclMain::startPreview(Rcl::Doc doc) { - // Check file exists in file system - string fn = urltolocalpath(doc.url); - struct stat st; - if (stat(fn.c_str(), &st) < 0) { - QMessageBox::warning(0, "Recoll", tr("Cannot access document file: ") + - fn.c_str()); - return; - } Preview *preview = new Preview(0, HiliteData()); if (preview == 0) { QMessageBox::warning(0, tr("Warning"), @@ -755,7 +733,7 @@ void RclMain::startPreview(Rcl::Doc doc) connect(preview, SIGNAL(wordSelect(QString)), this, SLOT(ssearchAddTerm(QString))); preview->show(); - preview->makeDocCurrent(fn, st.st_size, doc, 0); + preview->makeDocCurrent(doc, 0); } // Show next document from result list in current preview tab @@ -802,15 +780,7 @@ void RclMain::previewPrevOrNextInTab(Preview * w, int sid, int docnum, bool nxt) } // Check that file exists in file system - string fn = urltolocalpath(doc.url); - struct stat st; - if (stat(fn.c_str(), &st) < 0) { - QMessageBox::warning(0, "Recoll", tr("Cannot access document file: ") + - fn.c_str()); - return; - } - - w->makeDocCurrent(fn, st.st_size, doc, docnum, true); + w->makeDocCurrent(doc, docnum, true); } // Preview tab exposed: if the preview comes from the currently @@ -862,7 +832,6 @@ void RclMain::saveDocToFile(int docnum) " from database")); return; } - string fn = urltolocalpath(doc.url); QString s = QFileDialog::getSaveFileName(path_home().c_str(), "", this, @@ -870,8 +839,7 @@ void RclMain::saveDocToFile(int docnum) tr("Choose a file name to save under")); string tofile((const char *)s.local8Bit()); TempFile temp; // not used - if (!FileInterner::idocToFile(temp, tofile, rclconfig, fn, - doc.ipath, doc.mimetype)) { + if (!FileInterner::idocToFile(temp, tofile, rclconfig, doc)) { QMessageBox::warning(0, "Recoll", tr("Cannot extract document or create " "temporary file")); @@ -913,6 +881,14 @@ void RclMain::startNativeViewer(int docnum) startNativeViewer(doc); } +// Convert to file path if url is like file:// +static string fileurltolocalpath(string url) +{ + if (url.find("file://") == 0) + return url.substr(7, string::npos); + return string(); +} + void RclMain::startNativeViewer(Rcl::Doc doc) { // Look for appropriate viewer @@ -993,22 +969,20 @@ void RclMain::startNativeViewer(Rcl::Doc doc) return; } - // For files with an ipath, we do things differently depending if - // the configured command seems to be able to grok it or not: if - // not, create a temporary file + // We may need a temp file, or not depending on the command arguments + // and the fact that this is a subdoc or not. bool wantsipath = cmd.find("%i") != string::npos; + bool wantsfile = cmd.find("%f") != string::npos; bool istempfile = false; - string fn = urltolocalpath(doc.url); - string url; + string fn = fileurltolocalpath(doc.url); + string url = doc.url; + + // If the command wants a file but this is not a file url, or + // there is an ipath that it won't understand, we need a temp file: rclconfig->setKeyDir(path_getfather(fn)); - if (doc.ipath.empty() || wantsipath) { - url = doc.url; - } else { - // There is an ipath and the command does not know about - // them. We need a temp file. + if ((wantsfile && fn.empty()) || (!wantsipath && !doc.ipath.empty())) { TempFile temp; - if (!FileInterner::idocToFile(temp, string(), rclconfig, fn, - doc.ipath, doc.mimetype)) { + if (!FileInterner::idocToFile(temp, string(), rclconfig, doc)) { QMessageBox::warning(0, "Recoll", tr("Cannot extract document or create " "temporary file")); diff --git a/src/rcldb/rcldoc.cpp b/src/rcldb/rcldoc.cpp index b86e9f16..f7ecf325 100644 --- a/src/rcldb/rcldoc.cpp +++ b/src/rcldb/rcldoc.cpp @@ -40,6 +40,8 @@ namespace Rcl { const string Doc::keytt("title"); const string Doc::keykw("keywords"); const string Doc::keymd5("md5"); + const string Doc::keybcknd("rclbes"); + const string Doc::keyudi("rcludi"); void Doc::dump(bool dotext) const { diff --git a/src/rcldb/rcldoc.h b/src/rcldb/rcldoc.h index cf344361..5e3dc1f9 100644 --- a/src/rcldb/rcldoc.h +++ b/src/rcldb/rcldoc.h @@ -44,9 +44,9 @@ class Doc { // can be accessed after a query without fetching the actual document). // We indicate the routine that sets them up during indexing - // This is just "file://" + binary or url-encoded filename. No - // transcoding: this is used to access files Index: computed from - // fn by Db::add caller. Query: from doc data. + // Binary or url-encoded url. No transcoding: this is used to access files + // Index: computed by Db::add caller. + // Query: from doc data. string url; // Transcoded version of the simple file name for SFN-prefixed @@ -160,6 +160,9 @@ class Doc { static const string keytt; // title static const string keykw; // keywords static const string keymd5; // file md5 checksum + static const string keybcknd; // backend type for data not from the filesys + // udi back from index. Only set by Rcl::Query::getdoc(). + static const string keyudi; }; diff --git a/src/rcldb/rclquery.cpp b/src/rcldb/rclquery.cpp index 42a04807..9bd24853 100644 --- a/src/rcldb/rclquery.cpp +++ b/src/rcldb/rclquery.cpp @@ -369,6 +369,7 @@ bool Query::getDoc(int exti, Doc &doc) Xapian::docid docid = 0; int pc = 0; string data; + string udi; m_reason.erase(); for (int xaptries=0; xaptries < 2; xaptries++) { try { @@ -377,6 +378,16 @@ bool Query::getDoc(int exti, Doc &doc) pc = m_nq->xmset.convert_to_percent(m_nq->xmset[xapi-first]); data = xdoc.get_data(); m_reason.erase(); + Chrono chron; + Xapian::TermIterator it = xdoc.termlist_begin(); + it.skip_to("Q"); + if (it != xdoc.termlist_end()) { + udi = *it; + if (!udi.empty()) + udi = udi.substr(1); + } + LOGDEB2(("Query::getDoc: %d ms to get udi [%s]\n", chron.millis(), + udi.c_str())); break; } catch (Xapian::DatabaseModifiedError &error) { // retry or end of loop @@ -390,6 +401,7 @@ bool Query::getDoc(int exti, Doc &doc) LOGERR(("Query::getDoc: %s\n", m_reason.c_str())); return false; } + doc.meta[Rcl::Doc::keyudi] = udi; // Parse xapian document's data and populate doc fields return m_db->m_ndb->dbDataToRclDoc(docid, data, doc, pc); } diff --git a/src/sampleconf/fields b/src/sampleconf/fields index b46dc858..7288df37 100644 --- a/src/sampleconf/fields +++ b/src/sampleconf/fields @@ -43,7 +43,7 @@ recipient = XTO # "author" used to be stored by default, now set here as optional # "apptag" is used for viewer specialization (depending on local config) [stored] -stored = author apptag +stored = author apptag rclbes ########################## # This section defines field names aliases or synonyms. Any right hand side