1st beagle version with index/preview working

This commit is contained in:
dockes 2009-11-13 13:29:34 +00:00
parent efa501f06d
commit bbba826c06
12 changed files with 341 additions and 152 deletions

View File

@ -175,7 +175,7 @@ BeagleQueueIndexer::BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db,
if (!m_config->getConfParam("beaglequeuedir", m_queuedir))
m_queuedir = path_tildexpand("~/.beagle/ToIndex");
if (m_tmpdir.empty() || access(m_tmpdir.c_str(), 0) < 0) {
if (m_db && m_tmpdir.empty() || access(m_tmpdir.c_str(), 0) < 0) {
string reason;
if (!maketmpdir(m_tmpdir, reason)) {
LOGERR(("DbIndexer: cannot create temporary directory: %s\n",
@ -212,26 +212,23 @@ BeagleQueueIndexer::~BeagleQueueIndexer()
deleteZ(m_cache);
}
bool BeagleQueueIndexer::indexFromCache(const string& udi)
bool BeagleQueueIndexer::getFromCache(const string& udi, Rcl::Doc &dotdoc,
string& data, string *htt)
{
string dict, data;
string dict;
// This is horribly inefficient and needs fixing either by saving
// the offsets during the forward scan, or using an auxiliary isam
// map
// This is horribly inefficient, especially while reindexing from
// cache, and needs fixing either by saving the offsets during the
// forward scan, or using an auxiliary isam map
if (!m_cache->get(udi, dict, data))
return false;
ConfSimple cf(dict, 1);
string hittype;
if (!cf.get(keybght, hittype, "")) {
LOGERR(("BeagleIndexer::index: cc entry has no hit type\n"));
return false;
}
if (htt)
cf.get(keybght, *htt, "");
// Build a doc from saved metadata
Rcl::Doc dotdoc;
cf.get("url", dotdoc.url, "");
cf.get("mimetype", dotdoc.mimetype, "");
cf.get("fmtime", dotdoc.fmtime, "");
@ -242,9 +239,29 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi)
it != names.end(); it++) {
cf.get(*it, dotdoc.meta[*it], "");
}
return true;
}
bool BeagleQueueIndexer::indexFromCache(const string& udi)
{
if (!m_db)
return false;
Rcl::Doc dotdoc;
string data;
string hittype;
if (!getFromCache(udi, dotdoc, data, &hittype))
return false;
if (hittype.empty()) {
LOGERR(("BeagleIndexer::index: cc entry has no hit type\n"));
return false;
}
if (!stringlowercmp("bookmark", hittype)) {
// Just index the dotdoc
dotdoc.meta[Rcl::Doc::keybcknd] = "BGL";
return m_db->addOrUpdate(udi, "", dotdoc);
} else if (stringlowercmp("webhistory", dotdoc.meta[keybght]) ||
(dotdoc.mimetype.compare("text/html") &&
@ -269,13 +286,15 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi)
doc.url = dotdoc.url;
doc.fbytes = dotdoc.fbytes;
doc.sig = "";
doc.meta[Rcl::Doc::keybcknd] = "BGL";
return m_db->addOrUpdate(udi, "", doc);
}
}
bool BeagleQueueIndexer::index()
{
if (!m_db)
return false;
LOGDEB(("BeagleQueueIndexer::processqueue: dir: [%s]\n",
m_queuedir.c_str()));
m_config->setKeyDir(m_queuedir);
@ -322,6 +341,9 @@ BeagleQueueIndexer::processone(const string &path,
const struct stat *stp,
FsTreeWalker::CbFlag flg)
{
if (!m_db) //??
return FsTreeWalker::FtwError;
bool dounlink = false;
if (flg != FsTreeWalker::FtwRegular)
@ -365,6 +387,7 @@ BeagleQueueIndexer::processone(const string &path,
dotfile.m_fields.set("fmtime", dotdoc.fmtime, "");
dotfile.m_fields.set("fbytes", dotdoc.fbytes, "");
dotdoc.meta[Rcl::Doc::keybcknd] = "BGL";
if (!m_db->addOrUpdate(udi, "", dotdoc))
return FsTreeWalker::FtwError;
@ -402,6 +425,7 @@ BeagleQueueIndexer::processone(const string &path,
dotfile.m_fields.set("fmtime", dotdoc.fmtime, "");
dotfile.m_fields.set("fbytes", dotdoc.fbytes, "");
doc.meta[Rcl::Doc::keybcknd] = "BGL";
if (!m_db->addOrUpdate(udi, "", doc))
return FsTreeWalker::FtwError;

View File

@ -19,21 +19,30 @@
/* @(#$Id: $ (C) 2009 J.F.Dockes */
/**
* Code to process the Beagle indexing queue. Beagle MUST NOT be
* running, else mayhem will ensue. Interesting to reuse the beagle
* firefox visited page indexing plugin for example.
* Process the Beagle indexing queue.
*
* Beagle MUST NOT be running, else mayhem will ensue.
*
* This is mainly written to reuse the Beagle Firefox plug-in (which
* copies visited pages and bookmarks to the queue).
*/
#include "rclconfig.h"
#include "fstreewalk.h"
#include "rcldb.h"
#include "rcldoc.h"
class DbIxStatusUpdater;
class CirCache;
class RclConfig;
namespace Rcl {
class Db;
}
class BeagleQueueIndexer : public FsTreeWalkerCB {
public:
BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db,
/**
* @para db can be null when using readonly for calling getFromCache()
*/
BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db = 0,
DbIxStatusUpdater *updfunc = 0);
~BeagleQueueIndexer();
@ -42,6 +51,8 @@ public:
FsTreeWalker::Status
processone(const string &, const struct stat *, FsTreeWalker::CbFlag);
bool getFromCache(const string& udi, Rcl::Doc &doc, string& data,
string *hittype = 0);
private:
RclConfig *m_config;
Rcl::Db *m_db;
@ -51,7 +62,6 @@ private:
DbIxStatusUpdater *m_updater;
bool indexFromCache(const string& udi);
};
#endif /* _beaglequeue_h_included_ */

View File

@ -45,6 +45,7 @@ using namespace std;
#include "rclconfig.h"
#include "mh_html.h"
#include "fileudi.h"
#include "beaglequeue.h"
#ifdef RCL_USE_XATTR
#include "pxattr.h"
@ -166,12 +167,23 @@ void FileInterner::tmpcleanup()
//
// Empty handler on return says that we're in error, this will be
// processed by the first call to internfile().
// Split into "constructor calls init()" to allow use from other constructor
FileInterner::FileInterner(const string &f, const struct stat *stp,
RclConfig *cnf,
const string& td, int flags, const string *imime)
: m_cfg(cnf), m_fn(f), m_forPreview(flags & FIF_forPreview),
m_tdir(td)
: m_tdir(td)
{
initcommon(cnf, flags);
init(f, stp, cnf, td, flags, imime);
}
void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf,
const string& td, int flags, const string *imime)
{
m_fn = f;
cnf->setKeyDir(path_getfather(m_fn));
string l_mime;
bool usfci = false;
cnf->getConfParam("usesystemfilecommand", &usfci);
@ -237,8 +249,8 @@ FileInterner::FileInterner(const string &f, const struct stat *stp,
if (!df) {
// No handler for this type, for now :( if indexallfilenames
// is set in the config, this normally wont happen (we get mh_unknown)
LOGERR(("FileInterner:: ignored: [%s] mime [%s]\n",
f.c_str(), l_mime.c_str()));
LOGINFO(("FileInterner:: ignored: [%s] mime [%s]\n",
f.c_str(), l_mime.c_str()));
return;
}
df->set_property(Dijon::Filter::OPERATING_MODE,
@ -258,15 +270,143 @@ FileInterner::FileInterner(const string &f, const struct stat *stp,
LOGERR(("FileInterner:: error parsing %s\n", m_fn.c_str()));
return;
}
m_handlers.reserve(MAXHANDLERS);
for (unsigned int i = 0; i < MAXHANDLERS; i++)
m_tmpflgs[i] = false;
m_handlers.push_back(df);
LOGDEB(("FileInterner::FileInterner: %s [%s]\n", l_mime.c_str(),
m_fn.c_str()));
}
// Setup from memory data (ie: out of the web cache). imime needs to be set.
FileInterner::FileInterner(const string &data, RclConfig *cnf,
const string& td, int flags, const string& imime)
: m_tdir(td)
{
initcommon(cnf, flags);
init(data, cnf, td, flags, imime);
}
void FileInterner::init(const string &data, RclConfig *cnf,
const string& td, int flags, const string& imime)
{
if (imime.empty()) {
LOGERR(("FileInterner: inmemory constructor needs input mime type\n"));
return;
}
m_mimetype = imime;
// Look for appropriate handler (might still return empty)
Dijon::Filter *df = getMimeHandler(m_mimetype, m_cfg, !m_forPreview);
if (!df) {
// No handler for this type, for now :( if indexallfilenames
// is set in the config, this normally wont happen (we get mh_unknown)
LOGINFO(("FileInterner:: ignored: mime [%s]\n", m_mimetype.c_str()));
return;
}
df->set_property(Dijon::Filter::OPERATING_MODE,
m_forPreview ? "view" : "index");
string charset = m_cfg->getDefCharset();
df->set_property(Dijon::Filter::DEFAULT_CHARSET, charset);
bool setres = false;
if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) {
setres = df->set_document_string(data);
} else if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_DATA)) {
setres = df->set_document_data(data.c_str(), data.length());
} else if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_FILE_NAME)) {
string filename;
if (dataToTempFile(data, m_mimetype, filename)) {
if (!(setres=df->set_document_file(filename))) {
m_tmpflgs[0] = false;
m_tempfiles.pop_back();
}
}
}
if (!setres) {
LOGINFO(("FileInterner:: set_doc failed inside for mtype %s\n",
m_mimetype.c_str()));
delete df;
return;
}
m_handlers.push_back(df);
}
void FileInterner::initcommon(RclConfig *cnf, int flags)
{
m_cfg = cnf;
m_forPreview = ((flags & FIF_forPreview) != 0);
// Initialize handler stack.
m_handlers.reserve(MAXHANDLERS);
for (unsigned int i = 0; i < MAXHANDLERS; i++)
m_tmpflgs[i] = false;
m_targetMType = stxtplain;
}
FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf,
const string& td, int flags)
: m_tdir(td)
{
initcommon(cnf, flags);
// We do insist on having an url...
if (idoc.url.empty()) {
LOGERR(("FileInterner::FileInterner:: no url!\n"));
return;
}
// This stuff will be moved to some kind of generic function:
// get(idoc, ofn, odata, ometa)
// and use some kind of backstore object factory next time we add a
// backend (if ever).
string backend;
map<string, string>::const_iterator it;
if ((it = idoc.meta.find(Rcl::Doc::keybcknd)) != idoc.meta.end())
backend = it->second;
if (backend.empty() || !backend.compare("FS")) {
// The url has to be like file://
if (idoc.url.find("file://") != 0) {
LOGERR(("FileInterner: FS backend and non fs url: [%s]\n",
idoc.url.c_str()));
return;
}
string fn = idoc.url.substr(7, string::npos);
struct stat st;
if (stat(fn.c_str(), &st) < 0) {
LOGERR(("InternFile: cannot access document file: [%s]\n",
fn.c_str()));
return;
}
init(fn, &st, cnf, td, flags, &idoc.mimetype);
} else if (!backend.compare("BGL")) {
// Retrieve from our webcache (beagle data)
BeagleQueueIndexer beagler(cnf);
string data;
Rcl::Doc dotdoc;
map<string,string>::const_iterator it =
idoc.meta.find(Rcl::Doc::keyudi);
if (it == idoc.meta.end() || it->second.empty()) {
LOGERR(("Internfile: no udi in idoc\n"));
return;
}
string udi = it->second;
if (!beagler.getFromCache(udi, dotdoc, data)) {
LOGINFO(("Internfile: failed fetch from Beagle cache for [%s]\n",
udi.c_str()));
return;
}
if (dotdoc.mimetype.compare(idoc.mimetype)) {
LOGINFO(("Internfile: udi [%s], mimetype mismatch: in: [%s], bgl "
"[%s]\n", idoc.mimetype.c_str(), dotdoc.mimetype.c_str()));
}
init(data, cnf, td, flags, dotdoc.mimetype);
} else {
LOGERR(("InternFile: unknown backend: [%s]\n", backend.c_str()));
return;
}
}
FileInterner::~FileInterner()
{
tmpcleanup();
@ -286,7 +426,10 @@ bool FileInterner::dataToTempFile(const string& dt, const string& mt,
// Find appropriate suffix for mime type
TempFile temp(new TempFileInternal(m_cfg->getSuffixFromMimeType(mt)));
if (temp->ok()) {
m_tmpflgs[m_handlers.size()-1] = true;
// We are called before the handler is actually on the stack, so the
// index is m_handlers.size(). m_tmpflgs is a static array, so this is
// no problem
m_tmpflgs[m_handlers.size()] = true;
m_tempfiles.push_back(temp);
} else {
LOGERR(("FileInterner::dataToTempFile: cant create tempfile: %s\n",
@ -550,7 +693,7 @@ int FileInterner::addHandler()
string filename;
if (dataToTempFile(*txt, mimetype, filename)) {
if (!(setres = newflt->set_document_file(filename))) {
m_tmpflgs[m_handlers.size()-1] = false;
m_tmpflgs[m_handlers.size()] = false;
m_tempfiles.pop_back();
}
}
@ -711,6 +854,12 @@ class DirWiper {
}
};
// Temporary while we fix backend things
static string urltolocalpath(string url)
{
return url.substr(7, string::npos);
}
// Extract subdoc out of multidoc into temporary file.
// We do the usual internfile stuff: create a temporary directory,
// then create an interner and call internfile. The target mtype is set to
@ -722,11 +871,13 @@ class DirWiper {
// - The output temporary file which is held in a reference-counted
// object and will be deleted when done with.
bool FileInterner::idocToFile(TempFile& otemp, const string& tofile,
RclConfig *cnf,
const string& fn,
const string& ipath,
const string& mtype)
RclConfig *cnf, const Rcl::Doc& idoc)
{
LOGDEB(("FileInterner::idocToFile\n"));
idoc.dump();
string fn = urltolocalpath(idoc.url);
string ipath = idoc.ipath;
string mtype = idoc.mimetype;
struct stat st;
if (stat(fn.c_str(), &st) < 0) {
LOGERR(("FileInterner::idocToFile: can't stat [%s]\n", fn.c_str()));

View File

@ -50,7 +50,8 @@ class FileInterner {
* Get immediate parent for document.
*
* This is not in general the same as the "parent" document used
* with Rcl::Db::addOrUpdate(). The latter is generally the enclosing file.
* with Rcl::Db::addOrUpdate(). The latter is generally the enclosing file,
* this would be for exemple the email containing the attachment.
*/
static bool getEnclosing(const string &url, const string &ipath,
string &eurl, string &eipath, string& udi);
@ -58,9 +59,9 @@ class FileInterner {
enum Flags {FIF_none, FIF_forPreview, FIF_doUseInputMimetype};
/**
* Identify and possibly decompress file, create adequate
* handler. The mtype parameter is only set when the object is
* created for previewing a file. Filter output may be
* Identify and possibly decompress file, and create the top filter
* The mtype parameter is not always set (it is when the object is
* created for previewing a file). Filter output may be
* different for previewing and indexing.
*
* @param fn file name
@ -69,12 +70,27 @@ class FileInterner {
* @param td temporary directory to use as working space if
* decompression needed. Must be private and will be wiped clean.
* @param mtype mime type if known. For a compressed file this is the
* mime type for the uncompressed version. This currently doubles up
* to indicate that this object is for previewing (not indexing).
* mime type for the uncompressed version.
*/
FileInterner(const string &fn, const struct stat *stp,
RclConfig *cnf, const string& td, int flags,
const string *mtype = 0);
/**
* Alternate constructor for the case where the data is in memory.
* This is mainly for data extracted from the web cache. The mime type
* must be set, input must be uncompressed.
*/
FileInterner(const string &data, RclConfig *cnf, const string& td,
int flags, const string& mtype);
/**
* Alternate constructor for the case where it is not known where
* the data will come from. We'll use the doc fields and try our
* best...
*/
FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, const string& td,
int flags);
~FileInterner();
@ -121,8 +137,7 @@ class FileInterner {
* @param mtype The target mime type (we don't want to decode to text!)
*/
static bool idocToFile(TempFile& temp, const string& tofile,
RclConfig *cnf, const string& fn,
const string& ipath, const string& mtype);
RclConfig *cnf, const Rcl::Doc& doc);
const string& getReason() const {return m_reason;}
static void getMissingExternal(string& missing);
@ -160,6 +175,14 @@ class FileInterner {
static set<string> o_missingExternal;
static map<string, set<string> > o_typesForMissing;
// Pseudo-constructors
void init(const string &fn, const struct stat *stp,
RclConfig *cnf, const string& td, int flags,
const string *mtype = 0);
void init(const string &data, RclConfig *cnf, const string& td,
int flags, const string& mtype);
void initcommon(RclConfig *cnf, int flags);
void tmpcleanup();
bool dijontorcl(Rcl::Doc&);
void collectIpathAndMT(Rcl::Doc&, string& ipath) const;

View File

@ -156,10 +156,10 @@ ConfBeaglePanelW::ConfBeaglePanelW(QWidget *parent, ConfNull *config)
ConfLink lnk1(new ConfLinkRclRep(config, "processbeaglequeue"));
ConfParamBoolW* cp1 =
new ConfParamBoolW(gb1, lnk1, tr("Process Beagle queue"),
new ConfParamBoolW(gb1, lnk1, tr("Steal Beagle indexing queue"),
tr("Beagle MUST NOT be running. Enables processing "
"the beagle queue to index Firefox web history.<br>"
"(you must also install the Firefox Beagle Plugin)"
"the beagle queue to index Firefox web history.<br>"
"(you should also install the Firefox Beagle plugin)"
));
ConfLink lnk2(new ConfLinkRclRep(config, "webcachedir"));

View File

@ -538,14 +538,16 @@ PreviewTextEdit *Preview::addEditorTab()
return editor;
}
void Preview::setCurTabProps(const string &fn, const Rcl::Doc &doc,
int docnum)
void Preview::setCurTabProps(const Rcl::Doc &doc, int docnum)
{
QString title;
map<string,string>::const_iterator meta_it;
if ((meta_it = doc.meta.find("title")) != doc.meta.end()) {
if ((meta_it = doc.meta.find(Rcl::Doc::keytt)) != doc.meta.end()
&& !meta_it->second.empty()) {
title = QString::fromUtf8(meta_it->second.c_str(),
meta_it->second.length());
} else {
title = QString::fromLocal8Bit(path_getsimple(doc.url).c_str());
}
if (title.length() > 20) {
title = title.left(10) + "..." + title.right(10);
@ -572,16 +574,15 @@ void Preview::setCurTabProps(const string &fn, const Rcl::Doc &doc,
PreviewTextEdit *e = currentEditor();
if (e) {
e->m_data.fn = fn;
e->m_data.url = doc.url;
e->m_data.ipath = doc.ipath;
e->m_data.docnum = docnum;
}
}
bool Preview::makeDocCurrent(const string &fn, size_t sz,
const Rcl::Doc& doc, int docnum, bool sametab)
bool Preview::makeDocCurrent(const Rcl::Doc& doc, int docnum, bool sametab)
{
LOGDEB(("Preview::makeDocCurrent: %s\n", fn.c_str()));
LOGDEB(("Preview::makeDocCurrent: %s\n", doc.url.c_str()));
/* Check if we already have this page */
for (int i = 0; i < pvTab->count(); i++) {
@ -593,7 +594,7 @@ bool Preview::makeDocCurrent(const string &fn, size_t sz,
if (tw) {
PreviewTextEdit *edit =
dynamic_cast<PreviewTextEdit*>(tw->child("pvEdit"));
if (edit && !edit->m_data.fn.compare(fn) &&
if (edit && !edit->m_data.url.compare(doc.url) &&
!edit->m_data.ipath.compare(doc.ipath)) {
pvTab->showPage(tw);
return true;
@ -606,7 +607,7 @@ bool Preview::makeDocCurrent(const string &fn, size_t sz,
return false;
}
m_justCreated = false;
if (!loadFileInCurrentTab(fn, sz, doc, docnum)) {
if (!loadDocInCurrentTab(doc, docnum)) {
closeCurrentTab();
return false;
}
@ -637,16 +638,15 @@ bool Preview::makeDocCurrent(const string &fn, size_t sz,
/* A thread to to the file reading / format conversion */
class LoadThread : public QThread {
int *statusp;
Rcl::Doc *out;
Rcl::Doc& out;
const Rcl::Doc& idoc;
string filename;
string ipath;
string *mtype;
string tmpdir;
int loglevel;
public:
string missing;
LoadThread(int *stp, Rcl::Doc *odoc, string fn, string ip, string *mt)
: statusp(stp), out(odoc), filename(fn), ipath(ip), mtype(mt)
LoadThread(int *stp, Rcl::Doc& odoc, const Rcl::Doc& idc)
: statusp(stp), out(odoc), idoc(idc)
{
loglevel = DebugLog::getdbl()->getlevel();
}
@ -666,40 +666,35 @@ class LoadThread : public QThread {
*statusp = -1;
return;
}
struct stat st;
if (stat(filename.c_str(), &st) < 0) {
LOGERR(("Preview: can't stat [%s]\n", filename.c_str()));
QMessageBox::critical(0, "Recoll",
Preview::tr("File does not exist"));
*statusp = -1;
return;
}
// QMessageBox::critical(0, "Recoll", Preview::tr("File does not exist"));
FileInterner interner(filename, &st, rclconfig, tmpdir,
FileInterner::FIF_forPreview,
mtype);
FileInterner interner(idoc, rclconfig, tmpdir,
FileInterner::FIF_forPreview);
// We don't set the interner's target mtype to html because we
// do want the html filter to do its work: we won't use the
// text, but we need the conversion to utf-8
// interner.setTargetMType("text/html");
try {
FileInterner::Status ret = interner.internfile(*out, ipath);
string ipath = idoc.ipath;
FileInterner::Status ret = interner.internfile(out, ipath);
if (ret == FileInterner::FIDone || ret == FileInterner::FIAgain) {
// FIAgain is actually not nice here. It means that the record
// for the *file* of a multidoc was selected. Actually this
// shouldn't have had a preview link at all, but we don't know
// how to handle it now. Better to show the first doc than
// a mysterious error. Happens when the file name matches a
// a search term of course.
// a search term.
*statusp = 0;
// If we prefer html and it is available, replace the
// text/plain document text
if (prefs.previewHtml && !interner.get_html().empty()) {
out->text = interner.get_html();
out->mimetype = "text/html";
out.text = interner.get_html();
out.mimetype = "text/html";
}
} else {
out->mimetype = interner.getMimetype();
out.mimetype = interner.getMimetype();
interner.getMissingExternal(missing);
*statusp = -1;
}
@ -754,8 +749,7 @@ public:
~LoadGuard() {*m_bp = false; CancelCheck::instance().setCancel(false);}
};
bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
int docnum)
bool Preview::loadDocInCurrentTab(const Rcl::Doc &idoc, int docnum)
{
if (m_loading) {
LOGERR(("ALready loading\n"));
@ -767,18 +761,11 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
m_haveAnchors = false;
Rcl::Doc doc = idoc;
setCurTabProps(idoc, docnum);
if (doc.meta[Rcl::Doc::keytt].empty())
doc.meta[Rcl::Doc::keytt] = path_getsimple(doc.url);
setCurTabProps(fn, doc, docnum);
char csz[20];
sprintf(csz, "%lu", (unsigned long)sz);
QString msg = QString("Loading: %1 (size %2 bytes)")
.arg(QString::fromLocal8Bit(fn.c_str()))
.arg(csz);
.arg(QString::fromLocal8Bit(idoc.url.c_str()))
.arg(QString::fromAscii(idoc.fbytes.c_str()));
// Create progress dialog and aux objects
const int nsteps = 20;
@ -786,12 +773,12 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
progress.setMinimumDuration(2000);
WaiterThread waiter(100);
// Load and convert file
// Load and convert document
// idoc came out of the index data (main text and other fields missing).
// foc is the complete one what we are going to extract from storage.
Rcl::Doc fdoc;
// Need to setup config to retrieve possibly local parameters
rclconfig->setKeyDir(path_getfather(fn));
int status = 1;
LoadThread lthr(&status, &fdoc, fn, doc.ipath, &doc.mimetype);
LoadThread lthr(&status, fdoc, idoc);
lthr.start();
int prog;
for (prog = 1;;prog++) {
@ -963,7 +950,7 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
}
// Enter document in document history
g_dynconf->enterDoc(fn, doc.ipath);
g_dynconf->enterDoc(idoc.url, idoc.ipath);
editor->setFocus();
emit(previewExposed(this, m_searchId, docnum));
@ -998,7 +985,7 @@ void PreviewTextEdit::toggleFields()
// Else display fields
m_dspflds = true;
QString txt = "<html><head></head><body>\n";
txt += "<b>" + QString::fromLocal8Bit(m_data.fn.c_str());
txt += "<b>" + QString::fromLocal8Bit(m_data.url.c_str());
if (!m_data.ipath.empty())
txt += "|" + QString::fromUtf8(m_data.ipath.c_str());
txt += "</b><br><br>";

View File

@ -51,7 +51,7 @@ class Q3PopupMenu;
// We keep a list of data associated to each tab
class TabData {
public:
string fn; // filename for this tab
string url; // filename for this tab
string ipath; // Internal doc path inside file
int docnum; // Index of doc in db search results.
// doc out of internfile (previous fields come from the index) with
@ -133,8 +133,13 @@ public:
virtual void closeEvent(QCloseEvent *e );
virtual bool eventFilter(QObject *target, QEvent *event );
virtual bool makeDocCurrent(const string &fn, size_t sz,
const Rcl::Doc& idoc, int docnum,
/**
* Arrange for the document to be displayed either by exposing the tab
* if already loaded, or by creating a new tab and loading it.
* @para docnum is used to link back to the result list (to highlight
* paragraph when tab exposed etc.
*/
virtual bool makeDocCurrent(const Rcl::Doc& idoc, int docnum,
bool sametab = false);
friend class PreviewTextEdit;
public slots:
@ -182,12 +187,10 @@ private:
QCheckBox* matchCheck;
void init();
virtual void setCurTabProps(const string& fn, const Rcl::Doc& doc,
int docnum);
virtual void setCurTabProps(const Rcl::Doc& doc, int docnum);
virtual PreviewTextEdit *currentEditor();
virtual PreviewTextEdit *addEditorTab();
virtual bool loadFileInCurrentTab(string fn, size_t sz,
const Rcl::Doc& idoc, int dnm);
virtual bool loadDocInCurrentTab(const Rcl::Doc& idoc, int dnm);
};
#endif /* _PREVIEW_W_H_INCLUDED_ */

View File

@ -498,12 +498,6 @@ void RclMain::toggleIndexing()
fileToggleIndexingAction->setEnabled(FALSE);
}
// Note that all our 'urls' are like : file://...
static string urltolocalpath(string url)
{
return url.substr(7, string::npos);
}
// Start a db query and set the reslist docsource
void RclMain::startSearch(RefCntr<Rcl::SearchData> sdata)
{
@ -688,14 +682,6 @@ void RclMain::startPreview(int docnum, int mod)
return;
}
// Check file exists in file system
string fn = urltolocalpath(doc.url);
struct stat st;
if (stat(fn.c_str(), &st) < 0) {
QMessageBox::warning(0, "Recoll", tr("Cannot access document file: ") +
fn.c_str());
return;
}
if (mod & Qt::ShiftButton) {
// User wants new preview window
curPreview = 0;
@ -724,7 +710,7 @@ void RclMain::startPreview(int docnum, int mod)
curPreview->setCaption(resList->getDescription());
curPreview->show();
}
curPreview->makeDocCurrent(fn, st.st_size, doc, docnum);
curPreview->makeDocCurrent(doc, docnum);
}
/**
@ -736,14 +722,6 @@ void RclMain::startPreview(int docnum, int mod)
*/
void RclMain::startPreview(Rcl::Doc doc)
{
// Check file exists in file system
string fn = urltolocalpath(doc.url);
struct stat st;
if (stat(fn.c_str(), &st) < 0) {
QMessageBox::warning(0, "Recoll", tr("Cannot access document file: ") +
fn.c_str());
return;
}
Preview *preview = new Preview(0, HiliteData());
if (preview == 0) {
QMessageBox::warning(0, tr("Warning"),
@ -755,7 +733,7 @@ void RclMain::startPreview(Rcl::Doc doc)
connect(preview, SIGNAL(wordSelect(QString)),
this, SLOT(ssearchAddTerm(QString)));
preview->show();
preview->makeDocCurrent(fn, st.st_size, doc, 0);
preview->makeDocCurrent(doc, 0);
}
// Show next document from result list in current preview tab
@ -802,15 +780,7 @@ void RclMain::previewPrevOrNextInTab(Preview * w, int sid, int docnum, bool nxt)
}
// Check that file exists in file system
string fn = urltolocalpath(doc.url);
struct stat st;
if (stat(fn.c_str(), &st) < 0) {
QMessageBox::warning(0, "Recoll", tr("Cannot access document file: ") +
fn.c_str());
return;
}
w->makeDocCurrent(fn, st.st_size, doc, docnum, true);
w->makeDocCurrent(doc, docnum, true);
}
// Preview tab exposed: if the preview comes from the currently
@ -862,7 +832,6 @@ void RclMain::saveDocToFile(int docnum)
" from database"));
return;
}
string fn = urltolocalpath(doc.url);
QString s =
QFileDialog::getSaveFileName(path_home().c_str(),
"", this,
@ -870,8 +839,7 @@ void RclMain::saveDocToFile(int docnum)
tr("Choose a file name to save under"));
string tofile((const char *)s.local8Bit());
TempFile temp; // not used
if (!FileInterner::idocToFile(temp, tofile, rclconfig, fn,
doc.ipath, doc.mimetype)) {
if (!FileInterner::idocToFile(temp, tofile, rclconfig, doc)) {
QMessageBox::warning(0, "Recoll",
tr("Cannot extract document or create "
"temporary file"));
@ -913,6 +881,14 @@ void RclMain::startNativeViewer(int docnum)
startNativeViewer(doc);
}
// Convert to file path if url is like file://
static string fileurltolocalpath(string url)
{
if (url.find("file://") == 0)
return url.substr(7, string::npos);
return string();
}
void RclMain::startNativeViewer(Rcl::Doc doc)
{
// Look for appropriate viewer
@ -993,22 +969,20 @@ void RclMain::startNativeViewer(Rcl::Doc doc)
return;
}
// For files with an ipath, we do things differently depending if
// the configured command seems to be able to grok it or not: if
// not, create a temporary file
// We may need a temp file, or not depending on the command arguments
// and the fact that this is a subdoc or not.
bool wantsipath = cmd.find("%i") != string::npos;
bool wantsfile = cmd.find("%f") != string::npos;
bool istempfile = false;
string fn = urltolocalpath(doc.url);
string url;
string fn = fileurltolocalpath(doc.url);
string url = doc.url;
// If the command wants a file but this is not a file url, or
// there is an ipath that it won't understand, we need a temp file:
rclconfig->setKeyDir(path_getfather(fn));
if (doc.ipath.empty() || wantsipath) {
url = doc.url;
} else {
// There is an ipath and the command does not know about
// them. We need a temp file.
if ((wantsfile && fn.empty()) || (!wantsipath && !doc.ipath.empty())) {
TempFile temp;
if (!FileInterner::idocToFile(temp, string(), rclconfig, fn,
doc.ipath, doc.mimetype)) {
if (!FileInterner::idocToFile(temp, string(), rclconfig, doc)) {
QMessageBox::warning(0, "Recoll",
tr("Cannot extract document or create "
"temporary file"));

View File

@ -40,6 +40,8 @@ namespace Rcl {
const string Doc::keytt("title");
const string Doc::keykw("keywords");
const string Doc::keymd5("md5");
const string Doc::keybcknd("rclbes");
const string Doc::keyudi("rcludi");
void Doc::dump(bool dotext) const
{

View File

@ -44,9 +44,9 @@ class Doc {
// can be accessed after a query without fetching the actual document).
// We indicate the routine that sets them up during indexing
// This is just "file://" + binary or url-encoded filename. No
// transcoding: this is used to access files Index: computed from
// fn by Db::add caller. Query: from doc data.
// Binary or url-encoded url. No transcoding: this is used to access files
// Index: computed by Db::add caller.
// Query: from doc data.
string url;
// Transcoded version of the simple file name for SFN-prefixed
@ -160,6 +160,9 @@ class Doc {
static const string keytt; // title
static const string keykw; // keywords
static const string keymd5; // file md5 checksum
static const string keybcknd; // backend type for data not from the filesys
// udi back from index. Only set by Rcl::Query::getdoc().
static const string keyudi;
};

View File

@ -369,6 +369,7 @@ bool Query::getDoc(int exti, Doc &doc)
Xapian::docid docid = 0;
int pc = 0;
string data;
string udi;
m_reason.erase();
for (int xaptries=0; xaptries < 2; xaptries++) {
try {
@ -377,6 +378,16 @@ bool Query::getDoc(int exti, Doc &doc)
pc = m_nq->xmset.convert_to_percent(m_nq->xmset[xapi-first]);
data = xdoc.get_data();
m_reason.erase();
Chrono chron;
Xapian::TermIterator it = xdoc.termlist_begin();
it.skip_to("Q");
if (it != xdoc.termlist_end()) {
udi = *it;
if (!udi.empty())
udi = udi.substr(1);
}
LOGDEB2(("Query::getDoc: %d ms to get udi [%s]\n", chron.millis(),
udi.c_str()));
break;
} catch (Xapian::DatabaseModifiedError &error) {
// retry or end of loop
@ -390,6 +401,7 @@ bool Query::getDoc(int exti, Doc &doc)
LOGERR(("Query::getDoc: %s\n", m_reason.c_str()));
return false;
}
doc.meta[Rcl::Doc::keyudi] = udi;
// Parse xapian document's data and populate doc fields
return m_db->m_ndb->dbDataToRclDoc(docid, data, doc, pc);
}

View File

@ -43,7 +43,7 @@ recipient = XTO
# "author" used to be stored by default, now set here as optional
# "apptag" is used for viewer specialization (depending on local config)
[stored]
stored = author apptag
stored = author apptag rclbes
##########################
# This section defines field names aliases or synonyms. Any right hand side