1st beagle version with index/preview working

This commit is contained in:
dockes 2009-11-13 13:29:34 +00:00
parent efa501f06d
commit bbba826c06
12 changed files with 341 additions and 152 deletions

View File

@ -175,7 +175,7 @@ BeagleQueueIndexer::BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db,
if (!m_config->getConfParam("beaglequeuedir", m_queuedir)) if (!m_config->getConfParam("beaglequeuedir", m_queuedir))
m_queuedir = path_tildexpand("~/.beagle/ToIndex"); m_queuedir = path_tildexpand("~/.beagle/ToIndex");
if (m_tmpdir.empty() || access(m_tmpdir.c_str(), 0) < 0) { if (m_db && m_tmpdir.empty() || access(m_tmpdir.c_str(), 0) < 0) {
string reason; string reason;
if (!maketmpdir(m_tmpdir, reason)) { if (!maketmpdir(m_tmpdir, reason)) {
LOGERR(("DbIndexer: cannot create temporary directory: %s\n", LOGERR(("DbIndexer: cannot create temporary directory: %s\n",
@ -212,26 +212,23 @@ BeagleQueueIndexer::~BeagleQueueIndexer()
deleteZ(m_cache); deleteZ(m_cache);
} }
bool BeagleQueueIndexer::indexFromCache(const string& udi) bool BeagleQueueIndexer::getFromCache(const string& udi, Rcl::Doc &dotdoc,
string& data, string *htt)
{ {
string dict, data; string dict;
// This is horribly inefficient and needs fixing either by saving // This is horribly inefficient, especially while reindexing from
// the offsets during the forward scan, or using an auxiliary isam // cache, and needs fixing either by saving the offsets during the
// map // forward scan, or using an auxiliary isam map
if (!m_cache->get(udi, dict, data)) if (!m_cache->get(udi, dict, data))
return false; return false;
ConfSimple cf(dict, 1); ConfSimple cf(dict, 1);
string hittype; if (htt)
if (!cf.get(keybght, hittype, "")) { cf.get(keybght, *htt, "");
LOGERR(("BeagleIndexer::index: cc entry has no hit type\n"));
return false;
}
// Build a doc from saved metadata // Build a doc from saved metadata
Rcl::Doc dotdoc;
cf.get("url", dotdoc.url, ""); cf.get("url", dotdoc.url, "");
cf.get("mimetype", dotdoc.mimetype, ""); cf.get("mimetype", dotdoc.mimetype, "");
cf.get("fmtime", dotdoc.fmtime, ""); cf.get("fmtime", dotdoc.fmtime, "");
@ -242,9 +239,29 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi)
it != names.end(); it++) { it != names.end(); it++) {
cf.get(*it, dotdoc.meta[*it], ""); cf.get(*it, dotdoc.meta[*it], "");
} }
return true;
}
bool BeagleQueueIndexer::indexFromCache(const string& udi)
{
if (!m_db)
return false;
Rcl::Doc dotdoc;
string data;
string hittype;
if (!getFromCache(udi, dotdoc, data, &hittype))
return false;
if (hittype.empty()) {
LOGERR(("BeagleIndexer::index: cc entry has no hit type\n"));
return false;
}
if (!stringlowercmp("bookmark", hittype)) { if (!stringlowercmp("bookmark", hittype)) {
// Just index the dotdoc // Just index the dotdoc
dotdoc.meta[Rcl::Doc::keybcknd] = "BGL";
return m_db->addOrUpdate(udi, "", dotdoc); return m_db->addOrUpdate(udi, "", dotdoc);
} else if (stringlowercmp("webhistory", dotdoc.meta[keybght]) || } else if (stringlowercmp("webhistory", dotdoc.meta[keybght]) ||
(dotdoc.mimetype.compare("text/html") && (dotdoc.mimetype.compare("text/html") &&
@ -269,13 +286,15 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi)
doc.url = dotdoc.url; doc.url = dotdoc.url;
doc.fbytes = dotdoc.fbytes; doc.fbytes = dotdoc.fbytes;
doc.sig = ""; doc.sig = "";
doc.meta[Rcl::Doc::keybcknd] = "BGL";
return m_db->addOrUpdate(udi, "", doc); return m_db->addOrUpdate(udi, "", doc);
} }
} }
bool BeagleQueueIndexer::index() bool BeagleQueueIndexer::index()
{ {
if (!m_db)
return false;
LOGDEB(("BeagleQueueIndexer::processqueue: dir: [%s]\n", LOGDEB(("BeagleQueueIndexer::processqueue: dir: [%s]\n",
m_queuedir.c_str())); m_queuedir.c_str()));
m_config->setKeyDir(m_queuedir); m_config->setKeyDir(m_queuedir);
@ -322,6 +341,9 @@ BeagleQueueIndexer::processone(const string &path,
const struct stat *stp, const struct stat *stp,
FsTreeWalker::CbFlag flg) FsTreeWalker::CbFlag flg)
{ {
if (!m_db) //??
return FsTreeWalker::FtwError;
bool dounlink = false; bool dounlink = false;
if (flg != FsTreeWalker::FtwRegular) if (flg != FsTreeWalker::FtwRegular)
@ -365,6 +387,7 @@ BeagleQueueIndexer::processone(const string &path,
dotfile.m_fields.set("fmtime", dotdoc.fmtime, ""); dotfile.m_fields.set("fmtime", dotdoc.fmtime, "");
dotfile.m_fields.set("fbytes", dotdoc.fbytes, ""); dotfile.m_fields.set("fbytes", dotdoc.fbytes, "");
dotdoc.meta[Rcl::Doc::keybcknd] = "BGL";
if (!m_db->addOrUpdate(udi, "", dotdoc)) if (!m_db->addOrUpdate(udi, "", dotdoc))
return FsTreeWalker::FtwError; return FsTreeWalker::FtwError;
@ -402,6 +425,7 @@ BeagleQueueIndexer::processone(const string &path,
dotfile.m_fields.set("fmtime", dotdoc.fmtime, ""); dotfile.m_fields.set("fmtime", dotdoc.fmtime, "");
dotfile.m_fields.set("fbytes", dotdoc.fbytes, ""); dotfile.m_fields.set("fbytes", dotdoc.fbytes, "");
doc.meta[Rcl::Doc::keybcknd] = "BGL";
if (!m_db->addOrUpdate(udi, "", doc)) if (!m_db->addOrUpdate(udi, "", doc))
return FsTreeWalker::FtwError; return FsTreeWalker::FtwError;

View File

@ -19,21 +19,30 @@
/* @(#$Id: $ (C) 2009 J.F.Dockes */ /* @(#$Id: $ (C) 2009 J.F.Dockes */
/** /**
* Code to process the Beagle indexing queue. Beagle MUST NOT be * Process the Beagle indexing queue.
* running, else mayhem will ensue. Interesting to reuse the beagle *
* firefox visited page indexing plugin for example. * Beagle MUST NOT be running, else mayhem will ensue.
*
* This is mainly written to reuse the Beagle Firefox plug-in (which
* copies visited pages and bookmarks to the queue).
*/ */
#include "rclconfig.h"
#include "fstreewalk.h" #include "fstreewalk.h"
#include "rcldb.h" #include "rcldoc.h"
class DbIxStatusUpdater; class DbIxStatusUpdater;
class CirCache; class CirCache;
class RclConfig;
namespace Rcl {
class Db;
}
class BeagleQueueIndexer : public FsTreeWalkerCB { class BeagleQueueIndexer : public FsTreeWalkerCB {
public: public:
BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db, /**
* @para db can be null when using readonly for calling getFromCache()
*/
BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db = 0,
DbIxStatusUpdater *updfunc = 0); DbIxStatusUpdater *updfunc = 0);
~BeagleQueueIndexer(); ~BeagleQueueIndexer();
@ -42,6 +51,8 @@ public:
FsTreeWalker::Status FsTreeWalker::Status
processone(const string &, const struct stat *, FsTreeWalker::CbFlag); processone(const string &, const struct stat *, FsTreeWalker::CbFlag);
bool getFromCache(const string& udi, Rcl::Doc &doc, string& data,
string *hittype = 0);
private: private:
RclConfig *m_config; RclConfig *m_config;
Rcl::Db *m_db; Rcl::Db *m_db;
@ -51,7 +62,6 @@ private:
DbIxStatusUpdater *m_updater; DbIxStatusUpdater *m_updater;
bool indexFromCache(const string& udi); bool indexFromCache(const string& udi);
}; };
#endif /* _beaglequeue_h_included_ */ #endif /* _beaglequeue_h_included_ */

View File

@ -45,6 +45,7 @@ using namespace std;
#include "rclconfig.h" #include "rclconfig.h"
#include "mh_html.h" #include "mh_html.h"
#include "fileudi.h" #include "fileudi.h"
#include "beaglequeue.h"
#ifdef RCL_USE_XATTR #ifdef RCL_USE_XATTR
#include "pxattr.h" #include "pxattr.h"
@ -166,12 +167,23 @@ void FileInterner::tmpcleanup()
// //
// Empty handler on return says that we're in error, this will be // Empty handler on return says that we're in error, this will be
// processed by the first call to internfile(). // processed by the first call to internfile().
// Split into "constructor calls init()" to allow use from other constructor
FileInterner::FileInterner(const string &f, const struct stat *stp, FileInterner::FileInterner(const string &f, const struct stat *stp,
RclConfig *cnf, RclConfig *cnf,
const string& td, int flags, const string *imime) const string& td, int flags, const string *imime)
: m_cfg(cnf), m_fn(f), m_forPreview(flags & FIF_forPreview), : m_tdir(td)
m_tdir(td)
{ {
initcommon(cnf, flags);
init(f, stp, cnf, td, flags, imime);
}
void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf,
const string& td, int flags, const string *imime)
{
m_fn = f;
cnf->setKeyDir(path_getfather(m_fn));
string l_mime; string l_mime;
bool usfci = false; bool usfci = false;
cnf->getConfParam("usesystemfilecommand", &usfci); cnf->getConfParam("usesystemfilecommand", &usfci);
@ -237,8 +249,8 @@ FileInterner::FileInterner(const string &f, const struct stat *stp,
if (!df) { if (!df) {
// No handler for this type, for now :( if indexallfilenames // No handler for this type, for now :( if indexallfilenames
// is set in the config, this normally wont happen (we get mh_unknown) // is set in the config, this normally wont happen (we get mh_unknown)
LOGERR(("FileInterner:: ignored: [%s] mime [%s]\n", LOGINFO(("FileInterner:: ignored: [%s] mime [%s]\n",
f.c_str(), l_mime.c_str())); f.c_str(), l_mime.c_str()));
return; return;
} }
df->set_property(Dijon::Filter::OPERATING_MODE, df->set_property(Dijon::Filter::OPERATING_MODE,
@ -258,15 +270,143 @@ FileInterner::FileInterner(const string &f, const struct stat *stp,
LOGERR(("FileInterner:: error parsing %s\n", m_fn.c_str())); LOGERR(("FileInterner:: error parsing %s\n", m_fn.c_str()));
return; return;
} }
m_handlers.reserve(MAXHANDLERS);
for (unsigned int i = 0; i < MAXHANDLERS; i++)
m_tmpflgs[i] = false;
m_handlers.push_back(df); m_handlers.push_back(df);
LOGDEB(("FileInterner::FileInterner: %s [%s]\n", l_mime.c_str(), LOGDEB(("FileInterner::FileInterner: %s [%s]\n", l_mime.c_str(),
m_fn.c_str())); m_fn.c_str()));
}
// Setup from memory data (ie: out of the web cache). imime needs to be set.
FileInterner::FileInterner(const string &data, RclConfig *cnf,
const string& td, int flags, const string& imime)
: m_tdir(td)
{
initcommon(cnf, flags);
init(data, cnf, td, flags, imime);
}
void FileInterner::init(const string &data, RclConfig *cnf,
const string& td, int flags, const string& imime)
{
if (imime.empty()) {
LOGERR(("FileInterner: inmemory constructor needs input mime type\n"));
return;
}
m_mimetype = imime;
// Look for appropriate handler (might still return empty)
Dijon::Filter *df = getMimeHandler(m_mimetype, m_cfg, !m_forPreview);
if (!df) {
// No handler for this type, for now :( if indexallfilenames
// is set in the config, this normally wont happen (we get mh_unknown)
LOGINFO(("FileInterner:: ignored: mime [%s]\n", m_mimetype.c_str()));
return;
}
df->set_property(Dijon::Filter::OPERATING_MODE,
m_forPreview ? "view" : "index");
string charset = m_cfg->getDefCharset();
df->set_property(Dijon::Filter::DEFAULT_CHARSET, charset);
bool setres = false;
if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) {
setres = df->set_document_string(data);
} else if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_DATA)) {
setres = df->set_document_data(data.c_str(), data.length());
} else if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_FILE_NAME)) {
string filename;
if (dataToTempFile(data, m_mimetype, filename)) {
if (!(setres=df->set_document_file(filename))) {
m_tmpflgs[0] = false;
m_tempfiles.pop_back();
}
}
}
if (!setres) {
LOGINFO(("FileInterner:: set_doc failed inside for mtype %s\n",
m_mimetype.c_str()));
delete df;
return;
}
m_handlers.push_back(df);
}
void FileInterner::initcommon(RclConfig *cnf, int flags)
{
m_cfg = cnf;
m_forPreview = ((flags & FIF_forPreview) != 0);
// Initialize handler stack.
m_handlers.reserve(MAXHANDLERS);
for (unsigned int i = 0; i < MAXHANDLERS; i++)
m_tmpflgs[i] = false;
m_targetMType = stxtplain; m_targetMType = stxtplain;
} }
FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf,
const string& td, int flags)
: m_tdir(td)
{
initcommon(cnf, flags);
// We do insist on having an url...
if (idoc.url.empty()) {
LOGERR(("FileInterner::FileInterner:: no url!\n"));
return;
}
// This stuff will be moved to some kind of generic function:
// get(idoc, ofn, odata, ometa)
// and use some kind of backstore object factory next time we add a
// backend (if ever).
string backend;
map<string, string>::const_iterator it;
if ((it = idoc.meta.find(Rcl::Doc::keybcknd)) != idoc.meta.end())
backend = it->second;
if (backend.empty() || !backend.compare("FS")) {
// The url has to be like file://
if (idoc.url.find("file://") != 0) {
LOGERR(("FileInterner: FS backend and non fs url: [%s]\n",
idoc.url.c_str()));
return;
}
string fn = idoc.url.substr(7, string::npos);
struct stat st;
if (stat(fn.c_str(), &st) < 0) {
LOGERR(("InternFile: cannot access document file: [%s]\n",
fn.c_str()));
return;
}
init(fn, &st, cnf, td, flags, &idoc.mimetype);
} else if (!backend.compare("BGL")) {
// Retrieve from our webcache (beagle data)
BeagleQueueIndexer beagler(cnf);
string data;
Rcl::Doc dotdoc;
map<string,string>::const_iterator it =
idoc.meta.find(Rcl::Doc::keyudi);
if (it == idoc.meta.end() || it->second.empty()) {
LOGERR(("Internfile: no udi in idoc\n"));
return;
}
string udi = it->second;
if (!beagler.getFromCache(udi, dotdoc, data)) {
LOGINFO(("Internfile: failed fetch from Beagle cache for [%s]\n",
udi.c_str()));
return;
}
if (dotdoc.mimetype.compare(idoc.mimetype)) {
LOGINFO(("Internfile: udi [%s], mimetype mismatch: in: [%s], bgl "
"[%s]\n", idoc.mimetype.c_str(), dotdoc.mimetype.c_str()));
}
init(data, cnf, td, flags, dotdoc.mimetype);
} else {
LOGERR(("InternFile: unknown backend: [%s]\n", backend.c_str()));
return;
}
}
FileInterner::~FileInterner() FileInterner::~FileInterner()
{ {
tmpcleanup(); tmpcleanup();
@ -286,7 +426,10 @@ bool FileInterner::dataToTempFile(const string& dt, const string& mt,
// Find appropriate suffix for mime type // Find appropriate suffix for mime type
TempFile temp(new TempFileInternal(m_cfg->getSuffixFromMimeType(mt))); TempFile temp(new TempFileInternal(m_cfg->getSuffixFromMimeType(mt)));
if (temp->ok()) { if (temp->ok()) {
m_tmpflgs[m_handlers.size()-1] = true; // We are called before the handler is actually on the stack, so the
// index is m_handlers.size(). m_tmpflgs is a static array, so this is
// no problem
m_tmpflgs[m_handlers.size()] = true;
m_tempfiles.push_back(temp); m_tempfiles.push_back(temp);
} else { } else {
LOGERR(("FileInterner::dataToTempFile: cant create tempfile: %s\n", LOGERR(("FileInterner::dataToTempFile: cant create tempfile: %s\n",
@ -550,7 +693,7 @@ int FileInterner::addHandler()
string filename; string filename;
if (dataToTempFile(*txt, mimetype, filename)) { if (dataToTempFile(*txt, mimetype, filename)) {
if (!(setres = newflt->set_document_file(filename))) { if (!(setres = newflt->set_document_file(filename))) {
m_tmpflgs[m_handlers.size()-1] = false; m_tmpflgs[m_handlers.size()] = false;
m_tempfiles.pop_back(); m_tempfiles.pop_back();
} }
} }
@ -711,6 +854,12 @@ class DirWiper {
} }
}; };
// Temporary while we fix backend things
static string urltolocalpath(string url)
{
return url.substr(7, string::npos);
}
// Extract subdoc out of multidoc into temporary file. // Extract subdoc out of multidoc into temporary file.
// We do the usual internfile stuff: create a temporary directory, // We do the usual internfile stuff: create a temporary directory,
// then create an interner and call internfile. The target mtype is set to // then create an interner and call internfile. The target mtype is set to
@ -722,11 +871,13 @@ class DirWiper {
// - The output temporary file which is held in a reference-counted // - The output temporary file which is held in a reference-counted
// object and will be deleted when done with. // object and will be deleted when done with.
bool FileInterner::idocToFile(TempFile& otemp, const string& tofile, bool FileInterner::idocToFile(TempFile& otemp, const string& tofile,
RclConfig *cnf, RclConfig *cnf, const Rcl::Doc& idoc)
const string& fn,
const string& ipath,
const string& mtype)
{ {
LOGDEB(("FileInterner::idocToFile\n"));
idoc.dump();
string fn = urltolocalpath(idoc.url);
string ipath = idoc.ipath;
string mtype = idoc.mimetype;
struct stat st; struct stat st;
if (stat(fn.c_str(), &st) < 0) { if (stat(fn.c_str(), &st) < 0) {
LOGERR(("FileInterner::idocToFile: can't stat [%s]\n", fn.c_str())); LOGERR(("FileInterner::idocToFile: can't stat [%s]\n", fn.c_str()));

View File

@ -50,7 +50,8 @@ class FileInterner {
* Get immediate parent for document. * Get immediate parent for document.
* *
* This is not in general the same as the "parent" document used * This is not in general the same as the "parent" document used
* with Rcl::Db::addOrUpdate(). The latter is generally the enclosing file. * with Rcl::Db::addOrUpdate(). The latter is generally the enclosing file,
* this would be for exemple the email containing the attachment.
*/ */
static bool getEnclosing(const string &url, const string &ipath, static bool getEnclosing(const string &url, const string &ipath,
string &eurl, string &eipath, string& udi); string &eurl, string &eipath, string& udi);
@ -58,9 +59,9 @@ class FileInterner {
enum Flags {FIF_none, FIF_forPreview, FIF_doUseInputMimetype}; enum Flags {FIF_none, FIF_forPreview, FIF_doUseInputMimetype};
/** /**
* Identify and possibly decompress file, create adequate * Identify and possibly decompress file, and create the top filter
* handler. The mtype parameter is only set when the object is * The mtype parameter is not always set (it is when the object is
* created for previewing a file. Filter output may be * created for previewing a file). Filter output may be
* different for previewing and indexing. * different for previewing and indexing.
* *
* @param fn file name * @param fn file name
@ -69,13 +70,28 @@ class FileInterner {
* @param td temporary directory to use as working space if * @param td temporary directory to use as working space if
* decompression needed. Must be private and will be wiped clean. * decompression needed. Must be private and will be wiped clean.
* @param mtype mime type if known. For a compressed file this is the * @param mtype mime type if known. For a compressed file this is the
* mime type for the uncompressed version. This currently doubles up * mime type for the uncompressed version.
* to indicate that this object is for previewing (not indexing).
*/ */
FileInterner(const string &fn, const struct stat *stp, FileInterner(const string &fn, const struct stat *stp,
RclConfig *cnf, const string& td, int flags, RclConfig *cnf, const string& td, int flags,
const string *mtype = 0); const string *mtype = 0);
/**
* Alternate constructor for the case where the data is in memory.
* This is mainly for data extracted from the web cache. The mime type
* must be set, input must be uncompressed.
*/
FileInterner(const string &data, RclConfig *cnf, const string& td,
int flags, const string& mtype);
/**
* Alternate constructor for the case where it is not known where
* the data will come from. We'll use the doc fields and try our
* best...
*/
FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, const string& td,
int flags);
~FileInterner(); ~FileInterner();
/// Return values for internfile() /// Return values for internfile()
@ -121,8 +137,7 @@ class FileInterner {
* @param mtype The target mime type (we don't want to decode to text!) * @param mtype The target mime type (we don't want to decode to text!)
*/ */
static bool idocToFile(TempFile& temp, const string& tofile, static bool idocToFile(TempFile& temp, const string& tofile,
RclConfig *cnf, const string& fn, RclConfig *cnf, const Rcl::Doc& doc);
const string& ipath, const string& mtype);
const string& getReason() const {return m_reason;} const string& getReason() const {return m_reason;}
static void getMissingExternal(string& missing); static void getMissingExternal(string& missing);
@ -160,6 +175,14 @@ class FileInterner {
static set<string> o_missingExternal; static set<string> o_missingExternal;
static map<string, set<string> > o_typesForMissing; static map<string, set<string> > o_typesForMissing;
// Pseudo-constructors
void init(const string &fn, const struct stat *stp,
RclConfig *cnf, const string& td, int flags,
const string *mtype = 0);
void init(const string &data, RclConfig *cnf, const string& td,
int flags, const string& mtype);
void initcommon(RclConfig *cnf, int flags);
void tmpcleanup(); void tmpcleanup();
bool dijontorcl(Rcl::Doc&); bool dijontorcl(Rcl::Doc&);
void collectIpathAndMT(Rcl::Doc&, string& ipath) const; void collectIpathAndMT(Rcl::Doc&, string& ipath) const;

View File

@ -156,10 +156,10 @@ ConfBeaglePanelW::ConfBeaglePanelW(QWidget *parent, ConfNull *config)
ConfLink lnk1(new ConfLinkRclRep(config, "processbeaglequeue")); ConfLink lnk1(new ConfLinkRclRep(config, "processbeaglequeue"));
ConfParamBoolW* cp1 = ConfParamBoolW* cp1 =
new ConfParamBoolW(gb1, lnk1, tr("Process Beagle queue"), new ConfParamBoolW(gb1, lnk1, tr("Steal Beagle indexing queue"),
tr("Beagle MUST NOT be running. Enables processing " tr("Beagle MUST NOT be running. Enables processing "
"the beagle queue to index Firefox web history.<br>" "the beagle queue to index Firefox web history.<br>"
"(you must also install the Firefox Beagle Plugin)" "(you should also install the Firefox Beagle plugin)"
)); ));
ConfLink lnk2(new ConfLinkRclRep(config, "webcachedir")); ConfLink lnk2(new ConfLinkRclRep(config, "webcachedir"));

View File

@ -538,14 +538,16 @@ PreviewTextEdit *Preview::addEditorTab()
return editor; return editor;
} }
void Preview::setCurTabProps(const string &fn, const Rcl::Doc &doc, void Preview::setCurTabProps(const Rcl::Doc &doc, int docnum)
int docnum)
{ {
QString title; QString title;
map<string,string>::const_iterator meta_it; map<string,string>::const_iterator meta_it;
if ((meta_it = doc.meta.find("title")) != doc.meta.end()) { if ((meta_it = doc.meta.find(Rcl::Doc::keytt)) != doc.meta.end()
&& !meta_it->second.empty()) {
title = QString::fromUtf8(meta_it->second.c_str(), title = QString::fromUtf8(meta_it->second.c_str(),
meta_it->second.length()); meta_it->second.length());
} else {
title = QString::fromLocal8Bit(path_getsimple(doc.url).c_str());
} }
if (title.length() > 20) { if (title.length() > 20) {
title = title.left(10) + "..." + title.right(10); title = title.left(10) + "..." + title.right(10);
@ -572,16 +574,15 @@ void Preview::setCurTabProps(const string &fn, const Rcl::Doc &doc,
PreviewTextEdit *e = currentEditor(); PreviewTextEdit *e = currentEditor();
if (e) { if (e) {
e->m_data.fn = fn; e->m_data.url = doc.url;
e->m_data.ipath = doc.ipath; e->m_data.ipath = doc.ipath;
e->m_data.docnum = docnum; e->m_data.docnum = docnum;
} }
} }
bool Preview::makeDocCurrent(const string &fn, size_t sz, bool Preview::makeDocCurrent(const Rcl::Doc& doc, int docnum, bool sametab)
const Rcl::Doc& doc, int docnum, bool sametab)
{ {
LOGDEB(("Preview::makeDocCurrent: %s\n", fn.c_str())); LOGDEB(("Preview::makeDocCurrent: %s\n", doc.url.c_str()));
/* Check if we already have this page */ /* Check if we already have this page */
for (int i = 0; i < pvTab->count(); i++) { for (int i = 0; i < pvTab->count(); i++) {
@ -593,7 +594,7 @@ bool Preview::makeDocCurrent(const string &fn, size_t sz,
if (tw) { if (tw) {
PreviewTextEdit *edit = PreviewTextEdit *edit =
dynamic_cast<PreviewTextEdit*>(tw->child("pvEdit")); dynamic_cast<PreviewTextEdit*>(tw->child("pvEdit"));
if (edit && !edit->m_data.fn.compare(fn) && if (edit && !edit->m_data.url.compare(doc.url) &&
!edit->m_data.ipath.compare(doc.ipath)) { !edit->m_data.ipath.compare(doc.ipath)) {
pvTab->showPage(tw); pvTab->showPage(tw);
return true; return true;
@ -606,7 +607,7 @@ bool Preview::makeDocCurrent(const string &fn, size_t sz,
return false; return false;
} }
m_justCreated = false; m_justCreated = false;
if (!loadFileInCurrentTab(fn, sz, doc, docnum)) { if (!loadDocInCurrentTab(doc, docnum)) {
closeCurrentTab(); closeCurrentTab();
return false; return false;
} }
@ -637,16 +638,15 @@ bool Preview::makeDocCurrent(const string &fn, size_t sz,
/* A thread to to the file reading / format conversion */ /* A thread to to the file reading / format conversion */
class LoadThread : public QThread { class LoadThread : public QThread {
int *statusp; int *statusp;
Rcl::Doc *out; Rcl::Doc& out;
const Rcl::Doc& idoc;
string filename; string filename;
string ipath;
string *mtype;
string tmpdir; string tmpdir;
int loglevel; int loglevel;
public: public:
string missing; string missing;
LoadThread(int *stp, Rcl::Doc *odoc, string fn, string ip, string *mt) LoadThread(int *stp, Rcl::Doc& odoc, const Rcl::Doc& idc)
: statusp(stp), out(odoc), filename(fn), ipath(ip), mtype(mt) : statusp(stp), out(odoc), idoc(idc)
{ {
loglevel = DebugLog::getdbl()->getlevel(); loglevel = DebugLog::getdbl()->getlevel();
} }
@ -666,40 +666,35 @@ class LoadThread : public QThread {
*statusp = -1; *statusp = -1;
return; return;
} }
struct stat st;
if (stat(filename.c_str(), &st) < 0) {
LOGERR(("Preview: can't stat [%s]\n", filename.c_str()));
QMessageBox::critical(0, "Recoll",
Preview::tr("File does not exist"));
*statusp = -1;
return;
}
FileInterner interner(filename, &st, rclconfig, tmpdir, // QMessageBox::critical(0, "Recoll", Preview::tr("File does not exist"));
FileInterner::FIF_forPreview,
mtype); FileInterner interner(idoc, rclconfig, tmpdir,
FileInterner::FIF_forPreview);
// We don't set the interner's target mtype to html because we // We don't set the interner's target mtype to html because we
// do want the html filter to do its work: we won't use the // do want the html filter to do its work: we won't use the
// text, but we need the conversion to utf-8 // text, but we need the conversion to utf-8
// interner.setTargetMType("text/html"); // interner.setTargetMType("text/html");
try { try {
FileInterner::Status ret = interner.internfile(*out, ipath); string ipath = idoc.ipath;
FileInterner::Status ret = interner.internfile(out, ipath);
if (ret == FileInterner::FIDone || ret == FileInterner::FIAgain) { if (ret == FileInterner::FIDone || ret == FileInterner::FIAgain) {
// FIAgain is actually not nice here. It means that the record // FIAgain is actually not nice here. It means that the record
// for the *file* of a multidoc was selected. Actually this // for the *file* of a multidoc was selected. Actually this
// shouldn't have had a preview link at all, but we don't know // shouldn't have had a preview link at all, but we don't know
// how to handle it now. Better to show the first doc than // how to handle it now. Better to show the first doc than
// a mysterious error. Happens when the file name matches a // a mysterious error. Happens when the file name matches a
// a search term of course. // a search term.
*statusp = 0; *statusp = 0;
// If we prefer html and it is available, replace the // If we prefer html and it is available, replace the
// text/plain document text // text/plain document text
if (prefs.previewHtml && !interner.get_html().empty()) { if (prefs.previewHtml && !interner.get_html().empty()) {
out->text = interner.get_html(); out.text = interner.get_html();
out->mimetype = "text/html"; out.mimetype = "text/html";
} }
} else { } else {
out->mimetype = interner.getMimetype(); out.mimetype = interner.getMimetype();
interner.getMissingExternal(missing); interner.getMissingExternal(missing);
*statusp = -1; *statusp = -1;
} }
@ -754,8 +749,7 @@ public:
~LoadGuard() {*m_bp = false; CancelCheck::instance().setCancel(false);} ~LoadGuard() {*m_bp = false; CancelCheck::instance().setCancel(false);}
}; };
bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc, bool Preview::loadDocInCurrentTab(const Rcl::Doc &idoc, int docnum)
int docnum)
{ {
if (m_loading) { if (m_loading) {
LOGERR(("ALready loading\n")); LOGERR(("ALready loading\n"));
@ -767,18 +761,11 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
m_haveAnchors = false; m_haveAnchors = false;
Rcl::Doc doc = idoc; setCurTabProps(idoc, docnum);
if (doc.meta[Rcl::Doc::keytt].empty())
doc.meta[Rcl::Doc::keytt] = path_getsimple(doc.url);
setCurTabProps(fn, doc, docnum);
char csz[20];
sprintf(csz, "%lu", (unsigned long)sz);
QString msg = QString("Loading: %1 (size %2 bytes)") QString msg = QString("Loading: %1 (size %2 bytes)")
.arg(QString::fromLocal8Bit(fn.c_str())) .arg(QString::fromLocal8Bit(idoc.url.c_str()))
.arg(csz); .arg(QString::fromAscii(idoc.fbytes.c_str()));
// Create progress dialog and aux objects // Create progress dialog and aux objects
const int nsteps = 20; const int nsteps = 20;
@ -786,12 +773,12 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
progress.setMinimumDuration(2000); progress.setMinimumDuration(2000);
WaiterThread waiter(100); WaiterThread waiter(100);
// Load and convert file // Load and convert document
// idoc came out of the index data (main text and other fields missing).
// foc is the complete one what we are going to extract from storage.
Rcl::Doc fdoc; Rcl::Doc fdoc;
// Need to setup config to retrieve possibly local parameters
rclconfig->setKeyDir(path_getfather(fn));
int status = 1; int status = 1;
LoadThread lthr(&status, &fdoc, fn, doc.ipath, &doc.mimetype); LoadThread lthr(&status, fdoc, idoc);
lthr.start(); lthr.start();
int prog; int prog;
for (prog = 1;;prog++) { for (prog = 1;;prog++) {
@ -963,7 +950,7 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
} }
// Enter document in document history // Enter document in document history
g_dynconf->enterDoc(fn, doc.ipath); g_dynconf->enterDoc(idoc.url, idoc.ipath);
editor->setFocus(); editor->setFocus();
emit(previewExposed(this, m_searchId, docnum)); emit(previewExposed(this, m_searchId, docnum));
@ -998,7 +985,7 @@ void PreviewTextEdit::toggleFields()
// Else display fields // Else display fields
m_dspflds = true; m_dspflds = true;
QString txt = "<html><head></head><body>\n"; QString txt = "<html><head></head><body>\n";
txt += "<b>" + QString::fromLocal8Bit(m_data.fn.c_str()); txt += "<b>" + QString::fromLocal8Bit(m_data.url.c_str());
if (!m_data.ipath.empty()) if (!m_data.ipath.empty())
txt += "|" + QString::fromUtf8(m_data.ipath.c_str()); txt += "|" + QString::fromUtf8(m_data.ipath.c_str());
txt += "</b><br><br>"; txt += "</b><br><br>";

View File

@ -51,7 +51,7 @@ class Q3PopupMenu;
// We keep a list of data associated to each tab // We keep a list of data associated to each tab
class TabData { class TabData {
public: public:
string fn; // filename for this tab string url; // filename for this tab
string ipath; // Internal doc path inside file string ipath; // Internal doc path inside file
int docnum; // Index of doc in db search results. int docnum; // Index of doc in db search results.
// doc out of internfile (previous fields come from the index) with // doc out of internfile (previous fields come from the index) with
@ -133,8 +133,13 @@ public:
virtual void closeEvent(QCloseEvent *e ); virtual void closeEvent(QCloseEvent *e );
virtual bool eventFilter(QObject *target, QEvent *event ); virtual bool eventFilter(QObject *target, QEvent *event );
virtual bool makeDocCurrent(const string &fn, size_t sz, /**
const Rcl::Doc& idoc, int docnum, * Arrange for the document to be displayed either by exposing the tab
* if already loaded, or by creating a new tab and loading it.
* @para docnum is used to link back to the result list (to highlight
* paragraph when tab exposed etc.
*/
virtual bool makeDocCurrent(const Rcl::Doc& idoc, int docnum,
bool sametab = false); bool sametab = false);
friend class PreviewTextEdit; friend class PreviewTextEdit;
public slots: public slots:
@ -182,12 +187,10 @@ private:
QCheckBox* matchCheck; QCheckBox* matchCheck;
void init(); void init();
virtual void setCurTabProps(const string& fn, const Rcl::Doc& doc, virtual void setCurTabProps(const Rcl::Doc& doc, int docnum);
int docnum);
virtual PreviewTextEdit *currentEditor(); virtual PreviewTextEdit *currentEditor();
virtual PreviewTextEdit *addEditorTab(); virtual PreviewTextEdit *addEditorTab();
virtual bool loadFileInCurrentTab(string fn, size_t sz, virtual bool loadDocInCurrentTab(const Rcl::Doc& idoc, int dnm);
const Rcl::Doc& idoc, int dnm);
}; };
#endif /* _PREVIEW_W_H_INCLUDED_ */ #endif /* _PREVIEW_W_H_INCLUDED_ */

View File

@ -498,12 +498,6 @@ void RclMain::toggleIndexing()
fileToggleIndexingAction->setEnabled(FALSE); fileToggleIndexingAction->setEnabled(FALSE);
} }
// Note that all our 'urls' are like : file://...
static string urltolocalpath(string url)
{
return url.substr(7, string::npos);
}
// Start a db query and set the reslist docsource // Start a db query and set the reslist docsource
void RclMain::startSearch(RefCntr<Rcl::SearchData> sdata) void RclMain::startSearch(RefCntr<Rcl::SearchData> sdata)
{ {
@ -688,14 +682,6 @@ void RclMain::startPreview(int docnum, int mod)
return; return;
} }
// Check file exists in file system
string fn = urltolocalpath(doc.url);
struct stat st;
if (stat(fn.c_str(), &st) < 0) {
QMessageBox::warning(0, "Recoll", tr("Cannot access document file: ") +
fn.c_str());
return;
}
if (mod & Qt::ShiftButton) { if (mod & Qt::ShiftButton) {
// User wants new preview window // User wants new preview window
curPreview = 0; curPreview = 0;
@ -724,7 +710,7 @@ void RclMain::startPreview(int docnum, int mod)
curPreview->setCaption(resList->getDescription()); curPreview->setCaption(resList->getDescription());
curPreview->show(); curPreview->show();
} }
curPreview->makeDocCurrent(fn, st.st_size, doc, docnum); curPreview->makeDocCurrent(doc, docnum);
} }
/** /**
@ -736,14 +722,6 @@ void RclMain::startPreview(int docnum, int mod)
*/ */
void RclMain::startPreview(Rcl::Doc doc) void RclMain::startPreview(Rcl::Doc doc)
{ {
// Check file exists in file system
string fn = urltolocalpath(doc.url);
struct stat st;
if (stat(fn.c_str(), &st) < 0) {
QMessageBox::warning(0, "Recoll", tr("Cannot access document file: ") +
fn.c_str());
return;
}
Preview *preview = new Preview(0, HiliteData()); Preview *preview = new Preview(0, HiliteData());
if (preview == 0) { if (preview == 0) {
QMessageBox::warning(0, tr("Warning"), QMessageBox::warning(0, tr("Warning"),
@ -755,7 +733,7 @@ void RclMain::startPreview(Rcl::Doc doc)
connect(preview, SIGNAL(wordSelect(QString)), connect(preview, SIGNAL(wordSelect(QString)),
this, SLOT(ssearchAddTerm(QString))); this, SLOT(ssearchAddTerm(QString)));
preview->show(); preview->show();
preview->makeDocCurrent(fn, st.st_size, doc, 0); preview->makeDocCurrent(doc, 0);
} }
// Show next document from result list in current preview tab // Show next document from result list in current preview tab
@ -802,15 +780,7 @@ void RclMain::previewPrevOrNextInTab(Preview * w, int sid, int docnum, bool nxt)
} }
// Check that file exists in file system // Check that file exists in file system
string fn = urltolocalpath(doc.url); w->makeDocCurrent(doc, docnum, true);
struct stat st;
if (stat(fn.c_str(), &st) < 0) {
QMessageBox::warning(0, "Recoll", tr("Cannot access document file: ") +
fn.c_str());
return;
}
w->makeDocCurrent(fn, st.st_size, doc, docnum, true);
} }
// Preview tab exposed: if the preview comes from the currently // Preview tab exposed: if the preview comes from the currently
@ -862,7 +832,6 @@ void RclMain::saveDocToFile(int docnum)
" from database")); " from database"));
return; return;
} }
string fn = urltolocalpath(doc.url);
QString s = QString s =
QFileDialog::getSaveFileName(path_home().c_str(), QFileDialog::getSaveFileName(path_home().c_str(),
"", this, "", this,
@ -870,8 +839,7 @@ void RclMain::saveDocToFile(int docnum)
tr("Choose a file name to save under")); tr("Choose a file name to save under"));
string tofile((const char *)s.local8Bit()); string tofile((const char *)s.local8Bit());
TempFile temp; // not used TempFile temp; // not used
if (!FileInterner::idocToFile(temp, tofile, rclconfig, fn, if (!FileInterner::idocToFile(temp, tofile, rclconfig, doc)) {
doc.ipath, doc.mimetype)) {
QMessageBox::warning(0, "Recoll", QMessageBox::warning(0, "Recoll",
tr("Cannot extract document or create " tr("Cannot extract document or create "
"temporary file")); "temporary file"));
@ -913,6 +881,14 @@ void RclMain::startNativeViewer(int docnum)
startNativeViewer(doc); startNativeViewer(doc);
} }
// Convert to file path if url is like file://
static string fileurltolocalpath(string url)
{
if (url.find("file://") == 0)
return url.substr(7, string::npos);
return string();
}
void RclMain::startNativeViewer(Rcl::Doc doc) void RclMain::startNativeViewer(Rcl::Doc doc)
{ {
// Look for appropriate viewer // Look for appropriate viewer
@ -993,22 +969,20 @@ void RclMain::startNativeViewer(Rcl::Doc doc)
return; return;
} }
// For files with an ipath, we do things differently depending if // We may need a temp file, or not depending on the command arguments
// the configured command seems to be able to grok it or not: if // and the fact that this is a subdoc or not.
// not, create a temporary file
bool wantsipath = cmd.find("%i") != string::npos; bool wantsipath = cmd.find("%i") != string::npos;
bool wantsfile = cmd.find("%f") != string::npos;
bool istempfile = false; bool istempfile = false;
string fn = urltolocalpath(doc.url); string fn = fileurltolocalpath(doc.url);
string url; string url = doc.url;
// If the command wants a file but this is not a file url, or
// there is an ipath that it won't understand, we need a temp file:
rclconfig->setKeyDir(path_getfather(fn)); rclconfig->setKeyDir(path_getfather(fn));
if (doc.ipath.empty() || wantsipath) { if ((wantsfile && fn.empty()) || (!wantsipath && !doc.ipath.empty())) {
url = doc.url;
} else {
// There is an ipath and the command does not know about
// them. We need a temp file.
TempFile temp; TempFile temp;
if (!FileInterner::idocToFile(temp, string(), rclconfig, fn, if (!FileInterner::idocToFile(temp, string(), rclconfig, doc)) {
doc.ipath, doc.mimetype)) {
QMessageBox::warning(0, "Recoll", QMessageBox::warning(0, "Recoll",
tr("Cannot extract document or create " tr("Cannot extract document or create "
"temporary file")); "temporary file"));

View File

@ -40,6 +40,8 @@ namespace Rcl {
const string Doc::keytt("title"); const string Doc::keytt("title");
const string Doc::keykw("keywords"); const string Doc::keykw("keywords");
const string Doc::keymd5("md5"); const string Doc::keymd5("md5");
const string Doc::keybcknd("rclbes");
const string Doc::keyudi("rcludi");
void Doc::dump(bool dotext) const void Doc::dump(bool dotext) const
{ {

View File

@ -44,9 +44,9 @@ class Doc {
// can be accessed after a query without fetching the actual document). // can be accessed after a query without fetching the actual document).
// We indicate the routine that sets them up during indexing // We indicate the routine that sets them up during indexing
// This is just "file://" + binary or url-encoded filename. No // Binary or url-encoded url. No transcoding: this is used to access files
// transcoding: this is used to access files Index: computed from // Index: computed by Db::add caller.
// fn by Db::add caller. Query: from doc data. // Query: from doc data.
string url; string url;
// Transcoded version of the simple file name for SFN-prefixed // Transcoded version of the simple file name for SFN-prefixed
@ -160,6 +160,9 @@ class Doc {
static const string keytt; // title static const string keytt; // title
static const string keykw; // keywords static const string keykw; // keywords
static const string keymd5; // file md5 checksum static const string keymd5; // file md5 checksum
static const string keybcknd; // backend type for data not from the filesys
// udi back from index. Only set by Rcl::Query::getdoc().
static const string keyudi;
}; };

View File

@ -369,6 +369,7 @@ bool Query::getDoc(int exti, Doc &doc)
Xapian::docid docid = 0; Xapian::docid docid = 0;
int pc = 0; int pc = 0;
string data; string data;
string udi;
m_reason.erase(); m_reason.erase();
for (int xaptries=0; xaptries < 2; xaptries++) { for (int xaptries=0; xaptries < 2; xaptries++) {
try { try {
@ -377,6 +378,16 @@ bool Query::getDoc(int exti, Doc &doc)
pc = m_nq->xmset.convert_to_percent(m_nq->xmset[xapi-first]); pc = m_nq->xmset.convert_to_percent(m_nq->xmset[xapi-first]);
data = xdoc.get_data(); data = xdoc.get_data();
m_reason.erase(); m_reason.erase();
Chrono chron;
Xapian::TermIterator it = xdoc.termlist_begin();
it.skip_to("Q");
if (it != xdoc.termlist_end()) {
udi = *it;
if (!udi.empty())
udi = udi.substr(1);
}
LOGDEB2(("Query::getDoc: %d ms to get udi [%s]\n", chron.millis(),
udi.c_str()));
break; break;
} catch (Xapian::DatabaseModifiedError &error) { } catch (Xapian::DatabaseModifiedError &error) {
// retry or end of loop // retry or end of loop
@ -390,6 +401,7 @@ bool Query::getDoc(int exti, Doc &doc)
LOGERR(("Query::getDoc: %s\n", m_reason.c_str())); LOGERR(("Query::getDoc: %s\n", m_reason.c_str()));
return false; return false;
} }
doc.meta[Rcl::Doc::keyudi] = udi;
// Parse xapian document's data and populate doc fields // Parse xapian document's data and populate doc fields
return m_db->m_ndb->dbDataToRclDoc(docid, data, doc, pc); return m_db->m_ndb->dbDataToRclDoc(docid, data, doc, pc);
} }

View File

@ -43,7 +43,7 @@ recipient = XTO
# "author" used to be stored by default, now set here as optional # "author" used to be stored by default, now set here as optional
# "apptag" is used for viewer specialization (depending on local config) # "apptag" is used for viewer specialization (depending on local config)
[stored] [stored]
stored = author apptag stored = author apptag rclbes
########################## ##########################
# This section defines field names aliases or synonyms. Any right hand side # This section defines field names aliases or synonyms. Any right hand side