diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index c1cd0f2f..c3473723 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -1,6 +1,7 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.5 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.6 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes"; #endif +#include #include @@ -67,3 +68,28 @@ RclConfig::RclConfig() m_ok = true; return; } + +// Look up an executable filter. +// We look in RECOLL_BINDIR, RECOLL_CONFDIR, then let the system use +// the PATH +string find_filter(RclConfig *conf, const string &icmd) +{ + // If the path is absolute, this is it + if (icmd[0] == '/') + return icmd; + + string cmd; + const char *cp; + if (cp = getenv("RECOLL_BINDIR")) { + cmd = cp; + path_cat(cmd, icmd); + if (access(cmd.c_str(), X_OK) == 0) + return cmd; + } else { + cmd = conf->getConfDir(); + path_cat(cmd, icmd); + if (access(cmd.c_str(), X_OK) == 0) + return cmd; + } + return icmd; +} diff --git a/src/common/rclconfig.h b/src/common/rclconfig.h index 0dcadfa3..f59a06d2 100644 --- a/src/common/rclconfig.h +++ b/src/common/rclconfig.h @@ -1,29 +1,41 @@ #ifndef _RCLCONFIG_H_INCLUDED_ #define _RCLCONFIG_H_INCLUDED_ -/* @(#$Id: rclconfig.h,v 1.3 2004-12-17 13:01:01 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: rclconfig.h,v 1.4 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes */ #include "conftree.h" class RclConfig { int m_ok; - string confdir; // Directory where the files are stored - ConfTree *conf; // Parsed main configuration - string keydir; // Current directory used for parameter fetches. - // Note: this will have to change if/when we support per directory maps - ConfTree *mimemap; - ConfTree *mimeconf; + string confdir; // Directory where the files are stored + ConfTree *conf; // Parsed main configuration + string keydir; // Current directory used for parameter fetches. + + ConfTree *mimemap; // These are independant of current keydir. We might + ConfTree *mimeconf; // want to change it one day. + + // Parameters auto-fetched on setkeydir + string defcharset; // These are stored locally to avoid + string deflang; // a config lookup each time. + bool guesscharset; // They are fetched initially or on setKeydir() + public: - // Let some parameters be accessed directly - string defcharset; // These are stored locally to avoid a config lookup - string deflang; // each time. - bool guesscharset; RclConfig(); ~RclConfig() {delete conf;delete mimemap;delete mimeconf;} + bool ok() {return m_ok;} + + string getConfDir() {return confdir;} ConfTree *getConfig() {return m_ok ? conf : 0;} - ConfTree *getMimeMap() {return m_ok ? mimemap : 0;} - ConfTree *getMimeConf() {return m_ok ? mimeconf : 0;} + + /// Get generic configuration parameter according to current keydir + bool getConfParam(const string &name, string &value) + { + if (conf == 0) + return false; + return conf->get(name, value, keydir); + } + /// Set current directory reference, and fetch automatic parameters. void setKeyDir(const string &dir) { keydir = dir; @@ -33,19 +45,13 @@ class RclConfig { conf->get("guesscharset", str, keydir); guesscharset = ConfTree::stringToBool(str); } - bool getConfParam(const string &name, string &value) - { - if (conf == 0) - return false; - return conf->get(name, value, keydir); - } - const string &getDefCharset() { - return defcharset; - } - const string &getDefLang() { - return deflang; - } + ConfTree *getMimeMap() {return m_ok ? mimemap : 0;} + ConfTree *getMimeConf() {return m_ok ? mimeconf : 0;} + const string &getDefCharset() {return defcharset;} + const string &getDefLang() {return deflang;} + bool getGuessCharset() {return guesscharset;} }; +std::string find_filter(RclConfig *conf, const string& cmd); #endif /* _RCLCONFIG_H_INCLUDED_ */ diff --git a/src/index/indexer.cpp b/src/index/indexer.cpp index 08016a8d..fadc3a82 100644 --- a/src/index/indexer.cpp +++ b/src/index/indexer.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: indexer.cpp,v 1.2 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: indexer.cpp,v 1.3 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include @@ -19,8 +19,8 @@ static char rcsid[] = "@(#$Id: indexer.cpp,v 1.2 2005-02-01 17:20:05 dockes Exp #include "indexer.h" #include "csguess.h" #include "transcode.h" -#include "mimehandler.h" #include "debuglog.h" +#include "internfile.h" using namespace std; @@ -76,6 +76,7 @@ bool DbIndexer::index() return true; } + /** * This function gets called for every file and directory found by the * tree walker. It checks with the db if the file has changed and needs to @@ -97,40 +98,17 @@ indexfile(void *cdata, const std::string &fn, const struct stat *stp, return FsTreeWalker::FtwOk; } - string mime = mimetype(fn, me->config->getMimeMap()); - if (mime.empty()) { - // No mime type ?? pass on. - LOGDEB(("indexfile: (no mime) [%s]\n", fn.c_str())); - return FsTreeWalker::FtwOk; - } - - // Look for appropriate handler - MimeHandler *handler = getMimeHandler(mime, me->config->getMimeConf()); - if (!handler) { - // No handler for this type, for now :( - LOGDEB(("indexfile: %s : no handler\n", mime.c_str())); - return FsTreeWalker::FtwOk; - } - - LOGDEB(("indexfile: %s [%s]\n", mime.c_str(), fn.c_str())); - // Check db up to date ? if (!me->db.needUpdate(fn, stp)) { - delete handler; + LOGDEB(("indexfile: up to date: %s\n", fn.c_str())); return FsTreeWalker::FtwOk; } - // Turn file into a document. The document has fields for title, body - // etc., all text converted to utf8 Rcl::Doc doc; - if (!handler->worker(me->config, fn, mime, doc)) { - delete handler; + if (!internfile(fn, me->config, doc)) return FsTreeWalker::FtwOk; - } - delete handler; // Set up common fields: - doc.mimetype = mime; char ascdate[20]; sprintf(ascdate, "%ld", long(stp->st_mtime)); doc.mtime = ascdate; @@ -161,13 +139,13 @@ bool ConfIndexer::index() // Group the directories by database: it is important that all // directories for a database be indexed at once so that deleted // file cleanup works - vector tdl; // List of directories to be indexed + list tdl; // List of directories to be indexed if (!ConfTree::stringToStrings(topdirs, tdl)) { LOGERR(("ConfIndexer::index: parse error for directory list\n")); return false; } - vector::iterator dirit; + list::iterator dirit; map > dbmap; map >::iterator dbit; for (dirit = tdl.begin(); dirit != tdl.end(); dirit++) { diff --git a/src/internfile/mh_html.cpp b/src/internfile/mh_html.cpp index 993c5748..7c3c51d0 100644 --- a/src/internfile/mh_html.cpp +++ b/src/internfile/mh_html.cpp @@ -50,7 +50,7 @@ bool MimeHandlerHtml::worker(RclConfig *conf, const string &fn, return worker1(conf, fn, otext, mtype, docout); } -bool MimeHandlerHtml::worker1(RclConfig *conf, const string &fn, +bool MimeHandlerHtml::worker1(RclConfig *conf, const string &, const string& htext, const string &mtype, Rcl::Doc &docout) { @@ -63,10 +63,10 @@ bool MimeHandlerHtml::worker1(RclConfig *conf, const string &fn, // what we started with, we abort and restart with the parameter value // instead of the configuration one. string charset; - if (conf->guesscharset) { - charset = csguess(htext, conf->defcharset); + if (conf->getGuessCharset()) { + charset = csguess(htext, conf->getDefCharset()); } else - charset = conf->defcharset; + charset = conf->getDefCharset(); LOGDEB(("textHtmlToDoc: charset before parsing: %s\n", charset.c_str())); diff --git a/src/internfile/mimehandler.cpp b/src/internfile/mimehandler.cpp index 19e4f173..e5fd1071 100644 --- a/src/internfile/mimehandler.cpp +++ b/src/internfile/mimehandler.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.6 2005-02-01 17:52:06 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.7 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include @@ -14,6 +14,7 @@ using namespace std; #include "smallut.h" #include "html.h" #include "execmd.h" +#include "pathut.h" class MimeHandlerText : public MimeHandler { public: @@ -34,10 +35,10 @@ bool MimeHandlerText::worker(RclConfig *conf, const string &fn, // fields The charset guesser really doesnt work well in general // and should be avoided (especially for short documents) string charset; - if (conf->guesscharset) { - charset = csguess(otext, conf->defcharset); + if (conf->getGuessCharset()) { + charset = csguess(otext, conf->getDefCharset()); } else - charset = conf->defcharset; + charset = conf->getDefCharset(); string utf8; LOGDEB1(("textPlainToDoc: transcod from %s to %s\n", charset, "UTF-8")); @@ -70,11 +71,15 @@ class MimeHandlerExec : public MimeHandler { bool MimeHandlerExec::worker(RclConfig *conf, const string &fn, const string &mtype, Rcl::Doc &docout) { - string cmd = params.front(); + // Command name + string cmd = find_filter(conf, params.front()); + + // Build parameter list: delete cmd name and add the file name list::iterator it = params.begin(); listmyparams(++it, params.end()); myparams.push_back(fn); + // Execute command and store the result text, which is supposedly html string html; ExecCmd exec; int status = exec.doexec(cmd, myparams, 0, &html); @@ -83,6 +88,8 @@ bool MimeHandlerExec::worker(RclConfig *conf, const string &fn, status, cmd.c_str())); return false; } + + // Process/index the html MimeHandlerHtml hh; return hh.worker1(conf, fn, html, mtype, docout); } @@ -109,26 +116,26 @@ MimeHandler *getMimeHandler(const std::string &mtype, ConfTree *mhandlers) } // Break definition into type and name - vector toks; + list toks; ConfTree::stringToStrings(hs, toks); - if (toks.size() < 1) { + if (toks.empty()) { LOGERR(("getMimeHandler: bad mimeconf line for %s\n", mtype.c_str())); return 0; } // Retrieve handler function according to type - if (!stringlowercmp("internal", toks[0])) { + if (!stringlowercmp("internal", toks.front())) { return mhfact(mtype); - } else if (!stringlowercmp("dll", toks[0])) { + } else if (!stringlowercmp("dll", toks.front())) { return 0; - } else if (!stringlowercmp("exec", toks[0])) { + } else if (!stringlowercmp("exec", toks.front())) { if (toks.size() < 2) { LOGERR(("getMimeHandler: bad line for %s: %s\n", mtype.c_str(), hs.c_str())); return 0; } MimeHandlerExec *h = new MimeHandlerExec; - vector::const_iterator it1 = toks.begin(); + list::const_iterator it1 = toks.begin(); it1++; for (;it1 != toks.end();it1++) h->params.push_back(*it1); @@ -146,3 +153,21 @@ string getMimeViewer(const std::string &mtype, ConfTree *mhandlers) mhandlers->get(mtype, hs, "view"); return hs; } + +/** + * Return decompression command line for given mime type + */ +bool getUncompressor(const std::string &mtype, ConfTree *mhandlers, + list& cmd) +{ + string hs; + + mhandlers->get(mtype, hs, ""); + list tokens; + ConfTree::stringToStrings(hs, tokens); + if (stringlowercmp("uncompress", tokens.front())) + return false; + list::iterator it = tokens.begin(); + cmd.assign(++it, tokens.end()); + return true; +} diff --git a/src/internfile/mimehandler.h b/src/internfile/mimehandler.h index 48597003..1b4b82ae 100644 --- a/src/internfile/mimehandler.h +++ b/src/internfile/mimehandler.h @@ -1,8 +1,9 @@ #ifndef _MIMEHANDLER_H_INCLUDED_ #define _MIMEHANDLER_H_INCLUDED_ -/* @(#$Id: mimehandler.h,v 1.4 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: mimehandler.h,v 1.5 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes */ #include +#include #include "rclconfig.h" #include "rcldb.h" @@ -22,13 +23,14 @@ class MimeHandler { * Return indexing handler class for given mime type * returned pointer should be deleted by caller */ -extern MimeHandler *getMimeHandler(const std::string &mtype, - ConfTree *mhandlers); +extern MimeHandler *getMimeHandler(const std::string &mtyp, ConfTree *mhdlers); /** * Return external viewer exec string for given mime type */ -extern string getMimeViewer(const std::string &mtype, - ConfTree *mhandlers); +extern std::string getMimeViewer(const std::string &mtyp, ConfTree *mhandlers); + +bool getUncompressor(const std::string &mtype, ConfTree *mhandlers, + std::list& cmd); #endif /* _MIMEHANDLER_H_INCLUDED_ */ diff --git a/src/lib/Makefile b/src/lib/Makefile index db95483d..4400c3aa 100644 --- a/src/lib/Makefile +++ b/src/lib/Makefile @@ -8,7 +8,7 @@ all: $(LIBS) OBJS = conftree.o csguess.o debuglog.o \ execmd.o \ - fstreewalk.o html.o htmlparse.o indexer.o \ + fstreewalk.o html.o htmlparse.o indexer.o internfile.o \ mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathut.o \ rclconfig.o rcldb.o readfile.o smallut.o \ textsplit.o transcode.o \ @@ -16,7 +16,7 @@ OBJS = conftree.o csguess.o debuglog.o \ SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \ ../utils/execmd.cpp \ ../utils/fstreewalk.cpp ../common/html.cpp ../common/htmlparse.cpp \ - ../index/indexer.cpp \ + ../index/indexer.cpp ../common/internfile.cpp \ ../common/mimehandler.cpp ../utils/mimeparse.cpp ../index/mimetype.cpp \ ../common/myhtmlparse.cpp ../utils/pathut.cpp \ ../common/rclconfig.cpp ../common/rcldb.cpp ../utils/readfile.cpp \ @@ -47,6 +47,8 @@ htmlparse.o : ../common/htmlparse.cpp $(CXX) $(CXXFLAGS) -c $< indexer.o : ../index/indexer.cpp $(CXX) $(CXXFLAGS) -c $< +internfile.o : ../common/internfile.cpp + $(CXX) $(CXXFLAGS) -c $< mimehandler.o : ../common/mimehandler.cpp $(CXX) $(CXXFLAGS) -c $< mimeparse.o : ../utils/mimeparse.cpp diff --git a/src/qtgui/recollmain.ui.h b/src/qtgui/recollmain.ui.h index cc4e1322..bf23033c 100644 --- a/src/qtgui/recollmain.ui.h +++ b/src/qtgui/recollmain.ui.h @@ -24,6 +24,7 @@ #include "mimehandler.h" #include "pathut.h" #include "recoll.h" +#include "internfile.h" void RecollMain::fileExit() { @@ -145,28 +146,14 @@ void RecollMain::reslistTE_clicked(int par, int car) // Go to the file system to retrieve / convert the document text // for preview: - - // Look for appropriate handler - MimeHandler *handler = - getMimeHandler(doc.mimetype, rclconfig->getMimeConf()); - if (!handler) { - QMessageBox::warning(0, "Recoll", - QString("No mime handler for mime type ") + - doc.mimetype.c_str()); - return; - } - string fn = urltolocalpath(doc.url); Rcl::Doc fdoc; - if (!handler->worker(rclconfig, fn, doc.mimetype, fdoc)) { + if (!internfile(fn, rclconfig, fdoc)) { QMessageBox::warning(0, "Recoll", - QString("Failed to convert document for preview!\n") + - fn.c_str() + " mimetype " + + QString("Can't turn doc into internal rep ") + doc.mimetype.c_str()); - delete handler; return; } - delete handler; string rich = plaintorich(fdoc.text); diff --git a/src/utils/smallut.cpp b/src/utils/smallut.cpp index 23bfa636..cfad866b 100644 --- a/src/utils/smallut.cpp +++ b/src/utils/smallut.cpp @@ -1,13 +1,12 @@ #ifndef lint -static char rcsid[] = "@(#$Id: smallut.cpp,v 1.1 2005-01-31 14:31:10 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: smallut.cpp,v 1.2 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes"; #endif - #ifndef TEST_SMALLUT #include -#include "smallut.h" - #include +#include "smallut.h" + #define MIN(A,B) ((A)<(B)?(A):(B)) int stringicmp(const string & s1, const string& s2) diff --git a/src/utils/smallut.h b/src/utils/smallut.h index 6bc7aa3f..9129f37c 100644 --- a/src/utils/smallut.h +++ b/src/utils/smallut.h @@ -1,6 +1,6 @@ #ifndef _SMALLUT_H_INCLUDED_ #define _SMALLUT_H_INCLUDED_ -/* @(#$Id: smallut.h,v 1.1 2005-01-31 14:31:10 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: smallut.h,v 1.2 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes */ #include using std::string; @@ -9,5 +9,4 @@ extern int stringicmp(const string& s1, const string& s2); extern int stringlowercmp(const string& alreadylower, const string& s2); extern int stringuppercmp(const string& alreadyupper, const string& s2); - #endif /* _SMALLUT_H_INCLUDED_ */