1st beagle version with index/preview working
This commit is contained in:
parent
efa501f06d
commit
bbba826c06
@ -175,7 +175,7 @@ BeagleQueueIndexer::BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db,
|
||||
if (!m_config->getConfParam("beaglequeuedir", m_queuedir))
|
||||
m_queuedir = path_tildexpand("~/.beagle/ToIndex");
|
||||
|
||||
if (m_tmpdir.empty() || access(m_tmpdir.c_str(), 0) < 0) {
|
||||
if (m_db && m_tmpdir.empty() || access(m_tmpdir.c_str(), 0) < 0) {
|
||||
string reason;
|
||||
if (!maketmpdir(m_tmpdir, reason)) {
|
||||
LOGERR(("DbIndexer: cannot create temporary directory: %s\n",
|
||||
@ -212,26 +212,23 @@ BeagleQueueIndexer::~BeagleQueueIndexer()
|
||||
deleteZ(m_cache);
|
||||
}
|
||||
|
||||
bool BeagleQueueIndexer::indexFromCache(const string& udi)
|
||||
bool BeagleQueueIndexer::getFromCache(const string& udi, Rcl::Doc &dotdoc,
|
||||
string& data, string *htt)
|
||||
{
|
||||
string dict, data;
|
||||
string dict;
|
||||
|
||||
// This is horribly inefficient and needs fixing either by saving
|
||||
// the offsets during the forward scan, or using an auxiliary isam
|
||||
// map
|
||||
// This is horribly inefficient, especially while reindexing from
|
||||
// cache, and needs fixing either by saving the offsets during the
|
||||
// forward scan, or using an auxiliary isam map
|
||||
if (!m_cache->get(udi, dict, data))
|
||||
return false;
|
||||
|
||||
ConfSimple cf(dict, 1);
|
||||
|
||||
string hittype;
|
||||
if (!cf.get(keybght, hittype, "")) {
|
||||
LOGERR(("BeagleIndexer::index: cc entry has no hit type\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (htt)
|
||||
cf.get(keybght, *htt, "");
|
||||
|
||||
// Build a doc from saved metadata
|
||||
Rcl::Doc dotdoc;
|
||||
cf.get("url", dotdoc.url, "");
|
||||
cf.get("mimetype", dotdoc.mimetype, "");
|
||||
cf.get("fmtime", dotdoc.fmtime, "");
|
||||
@ -242,9 +239,29 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi)
|
||||
it != names.end(); it++) {
|
||||
cf.get(*it, dotdoc.meta[*it], "");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BeagleQueueIndexer::indexFromCache(const string& udi)
|
||||
{
|
||||
if (!m_db)
|
||||
return false;
|
||||
|
||||
Rcl::Doc dotdoc;
|
||||
string data;
|
||||
string hittype;
|
||||
|
||||
if (!getFromCache(udi, dotdoc, data, &hittype))
|
||||
return false;
|
||||
|
||||
if (hittype.empty()) {
|
||||
LOGERR(("BeagleIndexer::index: cc entry has no hit type\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!stringlowercmp("bookmark", hittype)) {
|
||||
// Just index the dotdoc
|
||||
dotdoc.meta[Rcl::Doc::keybcknd] = "BGL";
|
||||
return m_db->addOrUpdate(udi, "", dotdoc);
|
||||
} else if (stringlowercmp("webhistory", dotdoc.meta[keybght]) ||
|
||||
(dotdoc.mimetype.compare("text/html") &&
|
||||
@ -269,13 +286,15 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi)
|
||||
doc.url = dotdoc.url;
|
||||
doc.fbytes = dotdoc.fbytes;
|
||||
doc.sig = "";
|
||||
|
||||
doc.meta[Rcl::Doc::keybcknd] = "BGL";
|
||||
return m_db->addOrUpdate(udi, "", doc);
|
||||
}
|
||||
}
|
||||
|
||||
bool BeagleQueueIndexer::index()
|
||||
{
|
||||
if (!m_db)
|
||||
return false;
|
||||
LOGDEB(("BeagleQueueIndexer::processqueue: dir: [%s]\n",
|
||||
m_queuedir.c_str()));
|
||||
m_config->setKeyDir(m_queuedir);
|
||||
@ -322,6 +341,9 @@ BeagleQueueIndexer::processone(const string &path,
|
||||
const struct stat *stp,
|
||||
FsTreeWalker::CbFlag flg)
|
||||
{
|
||||
if (!m_db) //??
|
||||
return FsTreeWalker::FtwError;
|
||||
|
||||
bool dounlink = false;
|
||||
|
||||
if (flg != FsTreeWalker::FtwRegular)
|
||||
@ -365,6 +387,7 @@ BeagleQueueIndexer::processone(const string &path,
|
||||
dotfile.m_fields.set("fmtime", dotdoc.fmtime, "");
|
||||
dotfile.m_fields.set("fbytes", dotdoc.fbytes, "");
|
||||
|
||||
dotdoc.meta[Rcl::Doc::keybcknd] = "BGL";
|
||||
if (!m_db->addOrUpdate(udi, "", dotdoc))
|
||||
return FsTreeWalker::FtwError;
|
||||
|
||||
@ -402,6 +425,7 @@ BeagleQueueIndexer::processone(const string &path,
|
||||
dotfile.m_fields.set("fmtime", dotdoc.fmtime, "");
|
||||
dotfile.m_fields.set("fbytes", dotdoc.fbytes, "");
|
||||
|
||||
doc.meta[Rcl::Doc::keybcknd] = "BGL";
|
||||
if (!m_db->addOrUpdate(udi, "", doc))
|
||||
return FsTreeWalker::FtwError;
|
||||
|
||||
|
||||
@ -19,21 +19,30 @@
|
||||
/* @(#$Id: $ (C) 2009 J.F.Dockes */
|
||||
|
||||
/**
|
||||
* Code to process the Beagle indexing queue. Beagle MUST NOT be
|
||||
* running, else mayhem will ensue. Interesting to reuse the beagle
|
||||
* firefox visited page indexing plugin for example.
|
||||
* Process the Beagle indexing queue.
|
||||
*
|
||||
* Beagle MUST NOT be running, else mayhem will ensue.
|
||||
*
|
||||
* This is mainly written to reuse the Beagle Firefox plug-in (which
|
||||
* copies visited pages and bookmarks to the queue).
|
||||
*/
|
||||
|
||||
#include "rclconfig.h"
|
||||
#include "fstreewalk.h"
|
||||
#include "rcldb.h"
|
||||
#include "rcldoc.h"
|
||||
|
||||
class DbIxStatusUpdater;
|
||||
class CirCache;
|
||||
class RclConfig;
|
||||
namespace Rcl {
|
||||
class Db;
|
||||
}
|
||||
|
||||
class BeagleQueueIndexer : public FsTreeWalkerCB {
|
||||
public:
|
||||
BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db,
|
||||
/**
|
||||
* @para db can be null when using readonly for calling getFromCache()
|
||||
*/
|
||||
BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db = 0,
|
||||
DbIxStatusUpdater *updfunc = 0);
|
||||
~BeagleQueueIndexer();
|
||||
|
||||
@ -42,6 +51,8 @@ public:
|
||||
FsTreeWalker::Status
|
||||
processone(const string &, const struct stat *, FsTreeWalker::CbFlag);
|
||||
|
||||
bool getFromCache(const string& udi, Rcl::Doc &doc, string& data,
|
||||
string *hittype = 0);
|
||||
private:
|
||||
RclConfig *m_config;
|
||||
Rcl::Db *m_db;
|
||||
@ -51,7 +62,6 @@ private:
|
||||
DbIxStatusUpdater *m_updater;
|
||||
|
||||
bool indexFromCache(const string& udi);
|
||||
|
||||
};
|
||||
|
||||
#endif /* _beaglequeue_h_included_ */
|
||||
|
||||
@ -45,6 +45,7 @@ using namespace std;
|
||||
#include "rclconfig.h"
|
||||
#include "mh_html.h"
|
||||
#include "fileudi.h"
|
||||
#include "beaglequeue.h"
|
||||
|
||||
#ifdef RCL_USE_XATTR
|
||||
#include "pxattr.h"
|
||||
@ -166,12 +167,23 @@ void FileInterner::tmpcleanup()
|
||||
//
|
||||
// Empty handler on return says that we're in error, this will be
|
||||
// processed by the first call to internfile().
|
||||
// Split into "constructor calls init()" to allow use from other constructor
|
||||
FileInterner::FileInterner(const string &f, const struct stat *stp,
|
||||
RclConfig *cnf,
|
||||
const string& td, int flags, const string *imime)
|
||||
: m_cfg(cnf), m_fn(f), m_forPreview(flags & FIF_forPreview),
|
||||
m_tdir(td)
|
||||
: m_tdir(td)
|
||||
{
|
||||
initcommon(cnf, flags);
|
||||
init(f, stp, cnf, td, flags, imime);
|
||||
}
|
||||
|
||||
void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf,
|
||||
const string& td, int flags, const string *imime)
|
||||
{
|
||||
m_fn = f;
|
||||
|
||||
cnf->setKeyDir(path_getfather(m_fn));
|
||||
|
||||
string l_mime;
|
||||
bool usfci = false;
|
||||
cnf->getConfParam("usesystemfilecommand", &usfci);
|
||||
@ -237,8 +249,8 @@ FileInterner::FileInterner(const string &f, const struct stat *stp,
|
||||
if (!df) {
|
||||
// No handler for this type, for now :( if indexallfilenames
|
||||
// is set in the config, this normally wont happen (we get mh_unknown)
|
||||
LOGERR(("FileInterner:: ignored: [%s] mime [%s]\n",
|
||||
f.c_str(), l_mime.c_str()));
|
||||
LOGINFO(("FileInterner:: ignored: [%s] mime [%s]\n",
|
||||
f.c_str(), l_mime.c_str()));
|
||||
return;
|
||||
}
|
||||
df->set_property(Dijon::Filter::OPERATING_MODE,
|
||||
@ -258,15 +270,143 @@ FileInterner::FileInterner(const string &f, const struct stat *stp,
|
||||
LOGERR(("FileInterner:: error parsing %s\n", m_fn.c_str()));
|
||||
return;
|
||||
}
|
||||
m_handlers.reserve(MAXHANDLERS);
|
||||
for (unsigned int i = 0; i < MAXHANDLERS; i++)
|
||||
m_tmpflgs[i] = false;
|
||||
|
||||
m_handlers.push_back(df);
|
||||
LOGDEB(("FileInterner::FileInterner: %s [%s]\n", l_mime.c_str(),
|
||||
m_fn.c_str()));
|
||||
}
|
||||
|
||||
// Setup from memory data (ie: out of the web cache). imime needs to be set.
|
||||
FileInterner::FileInterner(const string &data, RclConfig *cnf,
|
||||
const string& td, int flags, const string& imime)
|
||||
: m_tdir(td)
|
||||
{
|
||||
initcommon(cnf, flags);
|
||||
init(data, cnf, td, flags, imime);
|
||||
}
|
||||
|
||||
void FileInterner::init(const string &data, RclConfig *cnf,
|
||||
const string& td, int flags, const string& imime)
|
||||
{
|
||||
if (imime.empty()) {
|
||||
LOGERR(("FileInterner: inmemory constructor needs input mime type\n"));
|
||||
return;
|
||||
}
|
||||
m_mimetype = imime;
|
||||
|
||||
// Look for appropriate handler (might still return empty)
|
||||
Dijon::Filter *df = getMimeHandler(m_mimetype, m_cfg, !m_forPreview);
|
||||
|
||||
if (!df) {
|
||||
// No handler for this type, for now :( if indexallfilenames
|
||||
// is set in the config, this normally wont happen (we get mh_unknown)
|
||||
LOGINFO(("FileInterner:: ignored: mime [%s]\n", m_mimetype.c_str()));
|
||||
return;
|
||||
}
|
||||
df->set_property(Dijon::Filter::OPERATING_MODE,
|
||||
m_forPreview ? "view" : "index");
|
||||
|
||||
string charset = m_cfg->getDefCharset();
|
||||
df->set_property(Dijon::Filter::DEFAULT_CHARSET, charset);
|
||||
|
||||
bool setres = false;
|
||||
if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) {
|
||||
setres = df->set_document_string(data);
|
||||
} else if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_DATA)) {
|
||||
setres = df->set_document_data(data.c_str(), data.length());
|
||||
} else if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_FILE_NAME)) {
|
||||
string filename;
|
||||
if (dataToTempFile(data, m_mimetype, filename)) {
|
||||
if (!(setres=df->set_document_file(filename))) {
|
||||
m_tmpflgs[0] = false;
|
||||
m_tempfiles.pop_back();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!setres) {
|
||||
LOGINFO(("FileInterner:: set_doc failed inside for mtype %s\n",
|
||||
m_mimetype.c_str()));
|
||||
delete df;
|
||||
return;
|
||||
}
|
||||
m_handlers.push_back(df);
|
||||
}
|
||||
|
||||
void FileInterner::initcommon(RclConfig *cnf, int flags)
|
||||
{
|
||||
m_cfg = cnf;
|
||||
m_forPreview = ((flags & FIF_forPreview) != 0);
|
||||
// Initialize handler stack.
|
||||
m_handlers.reserve(MAXHANDLERS);
|
||||
for (unsigned int i = 0; i < MAXHANDLERS; i++)
|
||||
m_tmpflgs[i] = false;
|
||||
m_targetMType = stxtplain;
|
||||
}
|
||||
|
||||
FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf,
|
||||
const string& td, int flags)
|
||||
: m_tdir(td)
|
||||
{
|
||||
initcommon(cnf, flags);
|
||||
|
||||
// We do insist on having an url...
|
||||
if (idoc.url.empty()) {
|
||||
LOGERR(("FileInterner::FileInterner:: no url!\n"));
|
||||
return;
|
||||
}
|
||||
|
||||
// This stuff will be moved to some kind of generic function:
|
||||
// get(idoc, ofn, odata, ometa)
|
||||
// and use some kind of backstore object factory next time we add a
|
||||
// backend (if ever).
|
||||
string backend;
|
||||
map<string, string>::const_iterator it;
|
||||
if ((it = idoc.meta.find(Rcl::Doc::keybcknd)) != idoc.meta.end())
|
||||
backend = it->second;
|
||||
|
||||
if (backend.empty() || !backend.compare("FS")) {
|
||||
// The url has to be like file://
|
||||
if (idoc.url.find("file://") != 0) {
|
||||
LOGERR(("FileInterner: FS backend and non fs url: [%s]\n",
|
||||
idoc.url.c_str()));
|
||||
return;
|
||||
}
|
||||
string fn = idoc.url.substr(7, string::npos);
|
||||
struct stat st;
|
||||
if (stat(fn.c_str(), &st) < 0) {
|
||||
LOGERR(("InternFile: cannot access document file: [%s]\n",
|
||||
fn.c_str()));
|
||||
return;
|
||||
}
|
||||
init(fn, &st, cnf, td, flags, &idoc.mimetype);
|
||||
} else if (!backend.compare("BGL")) {
|
||||
// Retrieve from our webcache (beagle data)
|
||||
BeagleQueueIndexer beagler(cnf);
|
||||
string data;
|
||||
Rcl::Doc dotdoc;
|
||||
map<string,string>::const_iterator it =
|
||||
idoc.meta.find(Rcl::Doc::keyudi);
|
||||
if (it == idoc.meta.end() || it->second.empty()) {
|
||||
LOGERR(("Internfile: no udi in idoc\n"));
|
||||
return;
|
||||
}
|
||||
string udi = it->second;
|
||||
if (!beagler.getFromCache(udi, dotdoc, data)) {
|
||||
LOGINFO(("Internfile: failed fetch from Beagle cache for [%s]\n",
|
||||
udi.c_str()));
|
||||
return;
|
||||
}
|
||||
if (dotdoc.mimetype.compare(idoc.mimetype)) {
|
||||
LOGINFO(("Internfile: udi [%s], mimetype mismatch: in: [%s], bgl "
|
||||
"[%s]\n", idoc.mimetype.c_str(), dotdoc.mimetype.c_str()));
|
||||
}
|
||||
init(data, cnf, td, flags, dotdoc.mimetype);
|
||||
} else {
|
||||
LOGERR(("InternFile: unknown backend: [%s]\n", backend.c_str()));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
FileInterner::~FileInterner()
|
||||
{
|
||||
tmpcleanup();
|
||||
@ -286,7 +426,10 @@ bool FileInterner::dataToTempFile(const string& dt, const string& mt,
|
||||
// Find appropriate suffix for mime type
|
||||
TempFile temp(new TempFileInternal(m_cfg->getSuffixFromMimeType(mt)));
|
||||
if (temp->ok()) {
|
||||
m_tmpflgs[m_handlers.size()-1] = true;
|
||||
// We are called before the handler is actually on the stack, so the
|
||||
// index is m_handlers.size(). m_tmpflgs is a static array, so this is
|
||||
// no problem
|
||||
m_tmpflgs[m_handlers.size()] = true;
|
||||
m_tempfiles.push_back(temp);
|
||||
} else {
|
||||
LOGERR(("FileInterner::dataToTempFile: cant create tempfile: %s\n",
|
||||
@ -550,7 +693,7 @@ int FileInterner::addHandler()
|
||||
string filename;
|
||||
if (dataToTempFile(*txt, mimetype, filename)) {
|
||||
if (!(setres = newflt->set_document_file(filename))) {
|
||||
m_tmpflgs[m_handlers.size()-1] = false;
|
||||
m_tmpflgs[m_handlers.size()] = false;
|
||||
m_tempfiles.pop_back();
|
||||
}
|
||||
}
|
||||
@ -711,6 +854,12 @@ class DirWiper {
|
||||
}
|
||||
};
|
||||
|
||||
// Temporary while we fix backend things
|
||||
static string urltolocalpath(string url)
|
||||
{
|
||||
return url.substr(7, string::npos);
|
||||
}
|
||||
|
||||
// Extract subdoc out of multidoc into temporary file.
|
||||
// We do the usual internfile stuff: create a temporary directory,
|
||||
// then create an interner and call internfile. The target mtype is set to
|
||||
@ -722,11 +871,13 @@ class DirWiper {
|
||||
// - The output temporary file which is held in a reference-counted
|
||||
// object and will be deleted when done with.
|
||||
bool FileInterner::idocToFile(TempFile& otemp, const string& tofile,
|
||||
RclConfig *cnf,
|
||||
const string& fn,
|
||||
const string& ipath,
|
||||
const string& mtype)
|
||||
RclConfig *cnf, const Rcl::Doc& idoc)
|
||||
{
|
||||
LOGDEB(("FileInterner::idocToFile\n"));
|
||||
idoc.dump();
|
||||
string fn = urltolocalpath(idoc.url);
|
||||
string ipath = idoc.ipath;
|
||||
string mtype = idoc.mimetype;
|
||||
struct stat st;
|
||||
if (stat(fn.c_str(), &st) < 0) {
|
||||
LOGERR(("FileInterner::idocToFile: can't stat [%s]\n", fn.c_str()));
|
||||
|
||||
@ -50,7 +50,8 @@ class FileInterner {
|
||||
* Get immediate parent for document.
|
||||
*
|
||||
* This is not in general the same as the "parent" document used
|
||||
* with Rcl::Db::addOrUpdate(). The latter is generally the enclosing file.
|
||||
* with Rcl::Db::addOrUpdate(). The latter is generally the enclosing file,
|
||||
* this would be for exemple the email containing the attachment.
|
||||
*/
|
||||
static bool getEnclosing(const string &url, const string &ipath,
|
||||
string &eurl, string &eipath, string& udi);
|
||||
@ -58,9 +59,9 @@ class FileInterner {
|
||||
enum Flags {FIF_none, FIF_forPreview, FIF_doUseInputMimetype};
|
||||
|
||||
/**
|
||||
* Identify and possibly decompress file, create adequate
|
||||
* handler. The mtype parameter is only set when the object is
|
||||
* created for previewing a file. Filter output may be
|
||||
* Identify and possibly decompress file, and create the top filter
|
||||
* The mtype parameter is not always set (it is when the object is
|
||||
* created for previewing a file). Filter output may be
|
||||
* different for previewing and indexing.
|
||||
*
|
||||
* @param fn file name
|
||||
@ -69,12 +70,27 @@ class FileInterner {
|
||||
* @param td temporary directory to use as working space if
|
||||
* decompression needed. Must be private and will be wiped clean.
|
||||
* @param mtype mime type if known. For a compressed file this is the
|
||||
* mime type for the uncompressed version. This currently doubles up
|
||||
* to indicate that this object is for previewing (not indexing).
|
||||
* mime type for the uncompressed version.
|
||||
*/
|
||||
FileInterner(const string &fn, const struct stat *stp,
|
||||
RclConfig *cnf, const string& td, int flags,
|
||||
const string *mtype = 0);
|
||||
|
||||
/**
|
||||
* Alternate constructor for the case where the data is in memory.
|
||||
* This is mainly for data extracted from the web cache. The mime type
|
||||
* must be set, input must be uncompressed.
|
||||
*/
|
||||
FileInterner(const string &data, RclConfig *cnf, const string& td,
|
||||
int flags, const string& mtype);
|
||||
|
||||
/**
|
||||
* Alternate constructor for the case where it is not known where
|
||||
* the data will come from. We'll use the doc fields and try our
|
||||
* best...
|
||||
*/
|
||||
FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, const string& td,
|
||||
int flags);
|
||||
|
||||
~FileInterner();
|
||||
|
||||
@ -121,8 +137,7 @@ class FileInterner {
|
||||
* @param mtype The target mime type (we don't want to decode to text!)
|
||||
*/
|
||||
static bool idocToFile(TempFile& temp, const string& tofile,
|
||||
RclConfig *cnf, const string& fn,
|
||||
const string& ipath, const string& mtype);
|
||||
RclConfig *cnf, const Rcl::Doc& doc);
|
||||
|
||||
const string& getReason() const {return m_reason;}
|
||||
static void getMissingExternal(string& missing);
|
||||
@ -160,6 +175,14 @@ class FileInterner {
|
||||
static set<string> o_missingExternal;
|
||||
static map<string, set<string> > o_typesForMissing;
|
||||
|
||||
// Pseudo-constructors
|
||||
void init(const string &fn, const struct stat *stp,
|
||||
RclConfig *cnf, const string& td, int flags,
|
||||
const string *mtype = 0);
|
||||
void init(const string &data, RclConfig *cnf, const string& td,
|
||||
int flags, const string& mtype);
|
||||
void initcommon(RclConfig *cnf, int flags);
|
||||
|
||||
void tmpcleanup();
|
||||
bool dijontorcl(Rcl::Doc&);
|
||||
void collectIpathAndMT(Rcl::Doc&, string& ipath) const;
|
||||
|
||||
@ -156,10 +156,10 @@ ConfBeaglePanelW::ConfBeaglePanelW(QWidget *parent, ConfNull *config)
|
||||
|
||||
ConfLink lnk1(new ConfLinkRclRep(config, "processbeaglequeue"));
|
||||
ConfParamBoolW* cp1 =
|
||||
new ConfParamBoolW(gb1, lnk1, tr("Process Beagle queue"),
|
||||
new ConfParamBoolW(gb1, lnk1, tr("Steal Beagle indexing queue"),
|
||||
tr("Beagle MUST NOT be running. Enables processing "
|
||||
"the beagle queue to index Firefox web history.<br>"
|
||||
"(you must also install the Firefox Beagle Plugin)"
|
||||
"the beagle queue to index Firefox web history.<br>"
|
||||
"(you should also install the Firefox Beagle plugin)"
|
||||
));
|
||||
|
||||
ConfLink lnk2(new ConfLinkRclRep(config, "webcachedir"));
|
||||
|
||||
@ -538,14 +538,16 @@ PreviewTextEdit *Preview::addEditorTab()
|
||||
return editor;
|
||||
}
|
||||
|
||||
void Preview::setCurTabProps(const string &fn, const Rcl::Doc &doc,
|
||||
int docnum)
|
||||
void Preview::setCurTabProps(const Rcl::Doc &doc, int docnum)
|
||||
{
|
||||
QString title;
|
||||
map<string,string>::const_iterator meta_it;
|
||||
if ((meta_it = doc.meta.find("title")) != doc.meta.end()) {
|
||||
if ((meta_it = doc.meta.find(Rcl::Doc::keytt)) != doc.meta.end()
|
||||
&& !meta_it->second.empty()) {
|
||||
title = QString::fromUtf8(meta_it->second.c_str(),
|
||||
meta_it->second.length());
|
||||
} else {
|
||||
title = QString::fromLocal8Bit(path_getsimple(doc.url).c_str());
|
||||
}
|
||||
if (title.length() > 20) {
|
||||
title = title.left(10) + "..." + title.right(10);
|
||||
@ -572,16 +574,15 @@ void Preview::setCurTabProps(const string &fn, const Rcl::Doc &doc,
|
||||
|
||||
PreviewTextEdit *e = currentEditor();
|
||||
if (e) {
|
||||
e->m_data.fn = fn;
|
||||
e->m_data.url = doc.url;
|
||||
e->m_data.ipath = doc.ipath;
|
||||
e->m_data.docnum = docnum;
|
||||
}
|
||||
}
|
||||
|
||||
bool Preview::makeDocCurrent(const string &fn, size_t sz,
|
||||
const Rcl::Doc& doc, int docnum, bool sametab)
|
||||
bool Preview::makeDocCurrent(const Rcl::Doc& doc, int docnum, bool sametab)
|
||||
{
|
||||
LOGDEB(("Preview::makeDocCurrent: %s\n", fn.c_str()));
|
||||
LOGDEB(("Preview::makeDocCurrent: %s\n", doc.url.c_str()));
|
||||
|
||||
/* Check if we already have this page */
|
||||
for (int i = 0; i < pvTab->count(); i++) {
|
||||
@ -593,7 +594,7 @@ bool Preview::makeDocCurrent(const string &fn, size_t sz,
|
||||
if (tw) {
|
||||
PreviewTextEdit *edit =
|
||||
dynamic_cast<PreviewTextEdit*>(tw->child("pvEdit"));
|
||||
if (edit && !edit->m_data.fn.compare(fn) &&
|
||||
if (edit && !edit->m_data.url.compare(doc.url) &&
|
||||
!edit->m_data.ipath.compare(doc.ipath)) {
|
||||
pvTab->showPage(tw);
|
||||
return true;
|
||||
@ -606,7 +607,7 @@ bool Preview::makeDocCurrent(const string &fn, size_t sz,
|
||||
return false;
|
||||
}
|
||||
m_justCreated = false;
|
||||
if (!loadFileInCurrentTab(fn, sz, doc, docnum)) {
|
||||
if (!loadDocInCurrentTab(doc, docnum)) {
|
||||
closeCurrentTab();
|
||||
return false;
|
||||
}
|
||||
@ -637,16 +638,15 @@ bool Preview::makeDocCurrent(const string &fn, size_t sz,
|
||||
/* A thread to to the file reading / format conversion */
|
||||
class LoadThread : public QThread {
|
||||
int *statusp;
|
||||
Rcl::Doc *out;
|
||||
Rcl::Doc& out;
|
||||
const Rcl::Doc& idoc;
|
||||
string filename;
|
||||
string ipath;
|
||||
string *mtype;
|
||||
string tmpdir;
|
||||
int loglevel;
|
||||
public:
|
||||
string missing;
|
||||
LoadThread(int *stp, Rcl::Doc *odoc, string fn, string ip, string *mt)
|
||||
: statusp(stp), out(odoc), filename(fn), ipath(ip), mtype(mt)
|
||||
LoadThread(int *stp, Rcl::Doc& odoc, const Rcl::Doc& idc)
|
||||
: statusp(stp), out(odoc), idoc(idc)
|
||||
{
|
||||
loglevel = DebugLog::getdbl()->getlevel();
|
||||
}
|
||||
@ -666,40 +666,35 @@ class LoadThread : public QThread {
|
||||
*statusp = -1;
|
||||
return;
|
||||
}
|
||||
struct stat st;
|
||||
if (stat(filename.c_str(), &st) < 0) {
|
||||
LOGERR(("Preview: can't stat [%s]\n", filename.c_str()));
|
||||
QMessageBox::critical(0, "Recoll",
|
||||
Preview::tr("File does not exist"));
|
||||
*statusp = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
// QMessageBox::critical(0, "Recoll", Preview::tr("File does not exist"));
|
||||
|
||||
FileInterner interner(filename, &st, rclconfig, tmpdir,
|
||||
FileInterner::FIF_forPreview,
|
||||
mtype);
|
||||
FileInterner interner(idoc, rclconfig, tmpdir,
|
||||
FileInterner::FIF_forPreview);
|
||||
|
||||
// We don't set the interner's target mtype to html because we
|
||||
// do want the html filter to do its work: we won't use the
|
||||
// text, but we need the conversion to utf-8
|
||||
// interner.setTargetMType("text/html");
|
||||
try {
|
||||
FileInterner::Status ret = interner.internfile(*out, ipath);
|
||||
string ipath = idoc.ipath;
|
||||
FileInterner::Status ret = interner.internfile(out, ipath);
|
||||
if (ret == FileInterner::FIDone || ret == FileInterner::FIAgain) {
|
||||
// FIAgain is actually not nice here. It means that the record
|
||||
// for the *file* of a multidoc was selected. Actually this
|
||||
// shouldn't have had a preview link at all, but we don't know
|
||||
// how to handle it now. Better to show the first doc than
|
||||
// a mysterious error. Happens when the file name matches a
|
||||
// a search term of course.
|
||||
// a search term.
|
||||
*statusp = 0;
|
||||
// If we prefer html and it is available, replace the
|
||||
// text/plain document text
|
||||
if (prefs.previewHtml && !interner.get_html().empty()) {
|
||||
out->text = interner.get_html();
|
||||
out->mimetype = "text/html";
|
||||
out.text = interner.get_html();
|
||||
out.mimetype = "text/html";
|
||||
}
|
||||
} else {
|
||||
out->mimetype = interner.getMimetype();
|
||||
out.mimetype = interner.getMimetype();
|
||||
interner.getMissingExternal(missing);
|
||||
*statusp = -1;
|
||||
}
|
||||
@ -754,8 +749,7 @@ public:
|
||||
~LoadGuard() {*m_bp = false; CancelCheck::instance().setCancel(false);}
|
||||
};
|
||||
|
||||
bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
|
||||
int docnum)
|
||||
bool Preview::loadDocInCurrentTab(const Rcl::Doc &idoc, int docnum)
|
||||
{
|
||||
if (m_loading) {
|
||||
LOGERR(("ALready loading\n"));
|
||||
@ -767,18 +761,11 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
|
||||
|
||||
m_haveAnchors = false;
|
||||
|
||||
Rcl::Doc doc = idoc;
|
||||
setCurTabProps(idoc, docnum);
|
||||
|
||||
if (doc.meta[Rcl::Doc::keytt].empty())
|
||||
doc.meta[Rcl::Doc::keytt] = path_getsimple(doc.url);
|
||||
|
||||
setCurTabProps(fn, doc, docnum);
|
||||
|
||||
char csz[20];
|
||||
sprintf(csz, "%lu", (unsigned long)sz);
|
||||
QString msg = QString("Loading: %1 (size %2 bytes)")
|
||||
.arg(QString::fromLocal8Bit(fn.c_str()))
|
||||
.arg(csz);
|
||||
.arg(QString::fromLocal8Bit(idoc.url.c_str()))
|
||||
.arg(QString::fromAscii(idoc.fbytes.c_str()));
|
||||
|
||||
// Create progress dialog and aux objects
|
||||
const int nsteps = 20;
|
||||
@ -786,12 +773,12 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
|
||||
progress.setMinimumDuration(2000);
|
||||
WaiterThread waiter(100);
|
||||
|
||||
// Load and convert file
|
||||
// Load and convert document
|
||||
// idoc came out of the index data (main text and other fields missing).
|
||||
// foc is the complete one what we are going to extract from storage.
|
||||
Rcl::Doc fdoc;
|
||||
// Need to setup config to retrieve possibly local parameters
|
||||
rclconfig->setKeyDir(path_getfather(fn));
|
||||
int status = 1;
|
||||
LoadThread lthr(&status, &fdoc, fn, doc.ipath, &doc.mimetype);
|
||||
LoadThread lthr(&status, fdoc, idoc);
|
||||
lthr.start();
|
||||
int prog;
|
||||
for (prog = 1;;prog++) {
|
||||
@ -963,7 +950,7 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
|
||||
}
|
||||
|
||||
// Enter document in document history
|
||||
g_dynconf->enterDoc(fn, doc.ipath);
|
||||
g_dynconf->enterDoc(idoc.url, idoc.ipath);
|
||||
|
||||
editor->setFocus();
|
||||
emit(previewExposed(this, m_searchId, docnum));
|
||||
@ -998,7 +985,7 @@ void PreviewTextEdit::toggleFields()
|
||||
// Else display fields
|
||||
m_dspflds = true;
|
||||
QString txt = "<html><head></head><body>\n";
|
||||
txt += "<b>" + QString::fromLocal8Bit(m_data.fn.c_str());
|
||||
txt += "<b>" + QString::fromLocal8Bit(m_data.url.c_str());
|
||||
if (!m_data.ipath.empty())
|
||||
txt += "|" + QString::fromUtf8(m_data.ipath.c_str());
|
||||
txt += "</b><br><br>";
|
||||
|
||||
@ -51,7 +51,7 @@ class Q3PopupMenu;
|
||||
// We keep a list of data associated to each tab
|
||||
class TabData {
|
||||
public:
|
||||
string fn; // filename for this tab
|
||||
string url; // filename for this tab
|
||||
string ipath; // Internal doc path inside file
|
||||
int docnum; // Index of doc in db search results.
|
||||
// doc out of internfile (previous fields come from the index) with
|
||||
@ -133,8 +133,13 @@ public:
|
||||
|
||||
virtual void closeEvent(QCloseEvent *e );
|
||||
virtual bool eventFilter(QObject *target, QEvent *event );
|
||||
virtual bool makeDocCurrent(const string &fn, size_t sz,
|
||||
const Rcl::Doc& idoc, int docnum,
|
||||
/**
|
||||
* Arrange for the document to be displayed either by exposing the tab
|
||||
* if already loaded, or by creating a new tab and loading it.
|
||||
* @para docnum is used to link back to the result list (to highlight
|
||||
* paragraph when tab exposed etc.
|
||||
*/
|
||||
virtual bool makeDocCurrent(const Rcl::Doc& idoc, int docnum,
|
||||
bool sametab = false);
|
||||
friend class PreviewTextEdit;
|
||||
public slots:
|
||||
@ -182,12 +187,10 @@ private:
|
||||
QCheckBox* matchCheck;
|
||||
|
||||
void init();
|
||||
virtual void setCurTabProps(const string& fn, const Rcl::Doc& doc,
|
||||
int docnum);
|
||||
virtual void setCurTabProps(const Rcl::Doc& doc, int docnum);
|
||||
virtual PreviewTextEdit *currentEditor();
|
||||
virtual PreviewTextEdit *addEditorTab();
|
||||
virtual bool loadFileInCurrentTab(string fn, size_t sz,
|
||||
const Rcl::Doc& idoc, int dnm);
|
||||
virtual bool loadDocInCurrentTab(const Rcl::Doc& idoc, int dnm);
|
||||
};
|
||||
|
||||
#endif /* _PREVIEW_W_H_INCLUDED_ */
|
||||
|
||||
@ -498,12 +498,6 @@ void RclMain::toggleIndexing()
|
||||
fileToggleIndexingAction->setEnabled(FALSE);
|
||||
}
|
||||
|
||||
// Note that all our 'urls' are like : file://...
|
||||
static string urltolocalpath(string url)
|
||||
{
|
||||
return url.substr(7, string::npos);
|
||||
}
|
||||
|
||||
// Start a db query and set the reslist docsource
|
||||
void RclMain::startSearch(RefCntr<Rcl::SearchData> sdata)
|
||||
{
|
||||
@ -688,14 +682,6 @@ void RclMain::startPreview(int docnum, int mod)
|
||||
return;
|
||||
}
|
||||
|
||||
// Check file exists in file system
|
||||
string fn = urltolocalpath(doc.url);
|
||||
struct stat st;
|
||||
if (stat(fn.c_str(), &st) < 0) {
|
||||
QMessageBox::warning(0, "Recoll", tr("Cannot access document file: ") +
|
||||
fn.c_str());
|
||||
return;
|
||||
}
|
||||
if (mod & Qt::ShiftButton) {
|
||||
// User wants new preview window
|
||||
curPreview = 0;
|
||||
@ -724,7 +710,7 @@ void RclMain::startPreview(int docnum, int mod)
|
||||
curPreview->setCaption(resList->getDescription());
|
||||
curPreview->show();
|
||||
}
|
||||
curPreview->makeDocCurrent(fn, st.st_size, doc, docnum);
|
||||
curPreview->makeDocCurrent(doc, docnum);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -736,14 +722,6 @@ void RclMain::startPreview(int docnum, int mod)
|
||||
*/
|
||||
void RclMain::startPreview(Rcl::Doc doc)
|
||||
{
|
||||
// Check file exists in file system
|
||||
string fn = urltolocalpath(doc.url);
|
||||
struct stat st;
|
||||
if (stat(fn.c_str(), &st) < 0) {
|
||||
QMessageBox::warning(0, "Recoll", tr("Cannot access document file: ") +
|
||||
fn.c_str());
|
||||
return;
|
||||
}
|
||||
Preview *preview = new Preview(0, HiliteData());
|
||||
if (preview == 0) {
|
||||
QMessageBox::warning(0, tr("Warning"),
|
||||
@ -755,7 +733,7 @@ void RclMain::startPreview(Rcl::Doc doc)
|
||||
connect(preview, SIGNAL(wordSelect(QString)),
|
||||
this, SLOT(ssearchAddTerm(QString)));
|
||||
preview->show();
|
||||
preview->makeDocCurrent(fn, st.st_size, doc, 0);
|
||||
preview->makeDocCurrent(doc, 0);
|
||||
}
|
||||
|
||||
// Show next document from result list in current preview tab
|
||||
@ -802,15 +780,7 @@ void RclMain::previewPrevOrNextInTab(Preview * w, int sid, int docnum, bool nxt)
|
||||
}
|
||||
|
||||
// Check that file exists in file system
|
||||
string fn = urltolocalpath(doc.url);
|
||||
struct stat st;
|
||||
if (stat(fn.c_str(), &st) < 0) {
|
||||
QMessageBox::warning(0, "Recoll", tr("Cannot access document file: ") +
|
||||
fn.c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
w->makeDocCurrent(fn, st.st_size, doc, docnum, true);
|
||||
w->makeDocCurrent(doc, docnum, true);
|
||||
}
|
||||
|
||||
// Preview tab exposed: if the preview comes from the currently
|
||||
@ -862,7 +832,6 @@ void RclMain::saveDocToFile(int docnum)
|
||||
" from database"));
|
||||
return;
|
||||
}
|
||||
string fn = urltolocalpath(doc.url);
|
||||
QString s =
|
||||
QFileDialog::getSaveFileName(path_home().c_str(),
|
||||
"", this,
|
||||
@ -870,8 +839,7 @@ void RclMain::saveDocToFile(int docnum)
|
||||
tr("Choose a file name to save under"));
|
||||
string tofile((const char *)s.local8Bit());
|
||||
TempFile temp; // not used
|
||||
if (!FileInterner::idocToFile(temp, tofile, rclconfig, fn,
|
||||
doc.ipath, doc.mimetype)) {
|
||||
if (!FileInterner::idocToFile(temp, tofile, rclconfig, doc)) {
|
||||
QMessageBox::warning(0, "Recoll",
|
||||
tr("Cannot extract document or create "
|
||||
"temporary file"));
|
||||
@ -913,6 +881,14 @@ void RclMain::startNativeViewer(int docnum)
|
||||
startNativeViewer(doc);
|
||||
}
|
||||
|
||||
// Convert to file path if url is like file://
|
||||
static string fileurltolocalpath(string url)
|
||||
{
|
||||
if (url.find("file://") == 0)
|
||||
return url.substr(7, string::npos);
|
||||
return string();
|
||||
}
|
||||
|
||||
void RclMain::startNativeViewer(Rcl::Doc doc)
|
||||
{
|
||||
// Look for appropriate viewer
|
||||
@ -993,22 +969,20 @@ void RclMain::startNativeViewer(Rcl::Doc doc)
|
||||
return;
|
||||
}
|
||||
|
||||
// For files with an ipath, we do things differently depending if
|
||||
// the configured command seems to be able to grok it or not: if
|
||||
// not, create a temporary file
|
||||
// We may need a temp file, or not depending on the command arguments
|
||||
// and the fact that this is a subdoc or not.
|
||||
bool wantsipath = cmd.find("%i") != string::npos;
|
||||
bool wantsfile = cmd.find("%f") != string::npos;
|
||||
bool istempfile = false;
|
||||
string fn = urltolocalpath(doc.url);
|
||||
string url;
|
||||
string fn = fileurltolocalpath(doc.url);
|
||||
string url = doc.url;
|
||||
|
||||
// If the command wants a file but this is not a file url, or
|
||||
// there is an ipath that it won't understand, we need a temp file:
|
||||
rclconfig->setKeyDir(path_getfather(fn));
|
||||
if (doc.ipath.empty() || wantsipath) {
|
||||
url = doc.url;
|
||||
} else {
|
||||
// There is an ipath and the command does not know about
|
||||
// them. We need a temp file.
|
||||
if ((wantsfile && fn.empty()) || (!wantsipath && !doc.ipath.empty())) {
|
||||
TempFile temp;
|
||||
if (!FileInterner::idocToFile(temp, string(), rclconfig, fn,
|
||||
doc.ipath, doc.mimetype)) {
|
||||
if (!FileInterner::idocToFile(temp, string(), rclconfig, doc)) {
|
||||
QMessageBox::warning(0, "Recoll",
|
||||
tr("Cannot extract document or create "
|
||||
"temporary file"));
|
||||
|
||||
@ -40,6 +40,8 @@ namespace Rcl {
|
||||
const string Doc::keytt("title");
|
||||
const string Doc::keykw("keywords");
|
||||
const string Doc::keymd5("md5");
|
||||
const string Doc::keybcknd("rclbes");
|
||||
const string Doc::keyudi("rcludi");
|
||||
|
||||
void Doc::dump(bool dotext) const
|
||||
{
|
||||
|
||||
@ -44,9 +44,9 @@ class Doc {
|
||||
// can be accessed after a query without fetching the actual document).
|
||||
// We indicate the routine that sets them up during indexing
|
||||
|
||||
// This is just "file://" + binary or url-encoded filename. No
|
||||
// transcoding: this is used to access files Index: computed from
|
||||
// fn by Db::add caller. Query: from doc data.
|
||||
// Binary or url-encoded url. No transcoding: this is used to access files
|
||||
// Index: computed by Db::add caller.
|
||||
// Query: from doc data.
|
||||
string url;
|
||||
|
||||
// Transcoded version of the simple file name for SFN-prefixed
|
||||
@ -160,6 +160,9 @@ class Doc {
|
||||
static const string keytt; // title
|
||||
static const string keykw; // keywords
|
||||
static const string keymd5; // file md5 checksum
|
||||
static const string keybcknd; // backend type for data not from the filesys
|
||||
// udi back from index. Only set by Rcl::Query::getdoc().
|
||||
static const string keyudi;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -369,6 +369,7 @@ bool Query::getDoc(int exti, Doc &doc)
|
||||
Xapian::docid docid = 0;
|
||||
int pc = 0;
|
||||
string data;
|
||||
string udi;
|
||||
m_reason.erase();
|
||||
for (int xaptries=0; xaptries < 2; xaptries++) {
|
||||
try {
|
||||
@ -377,6 +378,16 @@ bool Query::getDoc(int exti, Doc &doc)
|
||||
pc = m_nq->xmset.convert_to_percent(m_nq->xmset[xapi-first]);
|
||||
data = xdoc.get_data();
|
||||
m_reason.erase();
|
||||
Chrono chron;
|
||||
Xapian::TermIterator it = xdoc.termlist_begin();
|
||||
it.skip_to("Q");
|
||||
if (it != xdoc.termlist_end()) {
|
||||
udi = *it;
|
||||
if (!udi.empty())
|
||||
udi = udi.substr(1);
|
||||
}
|
||||
LOGDEB2(("Query::getDoc: %d ms to get udi [%s]\n", chron.millis(),
|
||||
udi.c_str()));
|
||||
break;
|
||||
} catch (Xapian::DatabaseModifiedError &error) {
|
||||
// retry or end of loop
|
||||
@ -390,6 +401,7 @@ bool Query::getDoc(int exti, Doc &doc)
|
||||
LOGERR(("Query::getDoc: %s\n", m_reason.c_str()));
|
||||
return false;
|
||||
}
|
||||
doc.meta[Rcl::Doc::keyudi] = udi;
|
||||
// Parse xapian document's data and populate doc fields
|
||||
return m_db->m_ndb->dbDataToRclDoc(docid, data, doc, pc);
|
||||
}
|
||||
|
||||
@ -43,7 +43,7 @@ recipient = XTO
|
||||
# "author" used to be stored by default, now set here as optional
|
||||
# "apptag" is used for viewer specialization (depending on local config)
|
||||
[stored]
|
||||
stored = author apptag
|
||||
stored = author apptag rclbes
|
||||
|
||||
##########################
|
||||
# This section defines field names aliases or synonyms. Any right hand side
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user