*** empty log message ***

This commit is contained in:
dockes 2005-02-04 09:39:44 +00:00
parent 038205ce02
commit 1f8fbc0d39
10 changed files with 123 additions and 99 deletions

View File

@ -1,6 +1,7 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.5 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.6 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
#include <unistd.h>
#include <iostream> #include <iostream>
@ -67,3 +68,28 @@ RclConfig::RclConfig()
m_ok = true; m_ok = true;
return; return;
} }
// Look up an executable filter.
// We look in RECOLL_BINDIR, RECOLL_CONFDIR, then let the system use
// the PATH
string find_filter(RclConfig *conf, const string &icmd)
{
// If the path is absolute, this is it
if (icmd[0] == '/')
return icmd;
string cmd;
const char *cp;
if (cp = getenv("RECOLL_BINDIR")) {
cmd = cp;
path_cat(cmd, icmd);
if (access(cmd.c_str(), X_OK) == 0)
return cmd;
} else {
cmd = conf->getConfDir();
path_cat(cmd, icmd);
if (access(cmd.c_str(), X_OK) == 0)
return cmd;
}
return icmd;
}

View File

@ -1,29 +1,41 @@
#ifndef _RCLCONFIG_H_INCLUDED_ #ifndef _RCLCONFIG_H_INCLUDED_
#define _RCLCONFIG_H_INCLUDED_ #define _RCLCONFIG_H_INCLUDED_
/* @(#$Id: rclconfig.h,v 1.3 2004-12-17 13:01:01 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: rclconfig.h,v 1.4 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes */
#include "conftree.h" #include "conftree.h"
class RclConfig { class RclConfig {
int m_ok; int m_ok;
string confdir; // Directory where the files are stored string confdir; // Directory where the files are stored
ConfTree *conf; // Parsed main configuration ConfTree *conf; // Parsed main configuration
string keydir; // Current directory used for parameter fetches. string keydir; // Current directory used for parameter fetches.
// Note: this will have to change if/when we support per directory maps
ConfTree *mimemap; ConfTree *mimemap; // These are independant of current keydir. We might
ConfTree *mimeconf; ConfTree *mimeconf; // want to change it one day.
// Parameters auto-fetched on setkeydir
string defcharset; // These are stored locally to avoid
string deflang; // a config lookup each time.
bool guesscharset; // They are fetched initially or on setKeydir()
public: public:
// Let some parameters be accessed directly
string defcharset; // These are stored locally to avoid a config lookup
string deflang; // each time.
bool guesscharset;
RclConfig(); RclConfig();
~RclConfig() {delete conf;delete mimemap;delete mimeconf;} ~RclConfig() {delete conf;delete mimemap;delete mimeconf;}
bool ok() {return m_ok;} bool ok() {return m_ok;}
string getConfDir() {return confdir;}
ConfTree *getConfig() {return m_ok ? conf : 0;} ConfTree *getConfig() {return m_ok ? conf : 0;}
ConfTree *getMimeMap() {return m_ok ? mimemap : 0;}
ConfTree *getMimeConf() {return m_ok ? mimeconf : 0;} /// Get generic configuration parameter according to current keydir
bool getConfParam(const string &name, string &value)
{
if (conf == 0)
return false;
return conf->get(name, value, keydir);
}
/// Set current directory reference, and fetch automatic parameters.
void setKeyDir(const string &dir) void setKeyDir(const string &dir)
{ {
keydir = dir; keydir = dir;
@ -33,19 +45,13 @@ class RclConfig {
conf->get("guesscharset", str, keydir); conf->get("guesscharset", str, keydir);
guesscharset = ConfTree::stringToBool(str); guesscharset = ConfTree::stringToBool(str);
} }
bool getConfParam(const string &name, string &value) ConfTree *getMimeMap() {return m_ok ? mimemap : 0;}
{ ConfTree *getMimeConf() {return m_ok ? mimeconf : 0;}
if (conf == 0) const string &getDefCharset() {return defcharset;}
return false; const string &getDefLang() {return deflang;}
return conf->get(name, value, keydir); bool getGuessCharset() {return guesscharset;}
}
const string &getDefCharset() {
return defcharset;
}
const string &getDefLang() {
return deflang;
}
}; };
std::string find_filter(RclConfig *conf, const string& cmd);
#endif /* _RCLCONFIG_H_INCLUDED_ */ #endif /* _RCLCONFIG_H_INCLUDED_ */

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.2 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: indexer.cpp,v 1.3 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
#include <sys/stat.h> #include <sys/stat.h>
@ -19,8 +19,8 @@ static char rcsid[] = "@(#$Id: indexer.cpp,v 1.2 2005-02-01 17:20:05 dockes Exp
#include "indexer.h" #include "indexer.h"
#include "csguess.h" #include "csguess.h"
#include "transcode.h" #include "transcode.h"
#include "mimehandler.h"
#include "debuglog.h" #include "debuglog.h"
#include "internfile.h"
using namespace std; using namespace std;
@ -76,6 +76,7 @@ bool DbIndexer::index()
return true; return true;
} }
/** /**
* This function gets called for every file and directory found by the * This function gets called for every file and directory found by the
* tree walker. It checks with the db if the file has changed and needs to * tree walker. It checks with the db if the file has changed and needs to
@ -97,40 +98,17 @@ indexfile(void *cdata, const std::string &fn, const struct stat *stp,
return FsTreeWalker::FtwOk; return FsTreeWalker::FtwOk;
} }
string mime = mimetype(fn, me->config->getMimeMap());
if (mime.empty()) {
// No mime type ?? pass on.
LOGDEB(("indexfile: (no mime) [%s]\n", fn.c_str()));
return FsTreeWalker::FtwOk;
}
// Look for appropriate handler
MimeHandler *handler = getMimeHandler(mime, me->config->getMimeConf());
if (!handler) {
// No handler for this type, for now :(
LOGDEB(("indexfile: %s : no handler\n", mime.c_str()));
return FsTreeWalker::FtwOk;
}
LOGDEB(("indexfile: %s [%s]\n", mime.c_str(), fn.c_str()));
// Check db up to date ? // Check db up to date ?
if (!me->db.needUpdate(fn, stp)) { if (!me->db.needUpdate(fn, stp)) {
delete handler; LOGDEB(("indexfile: up to date: %s\n", fn.c_str()));
return FsTreeWalker::FtwOk; return FsTreeWalker::FtwOk;
} }
// Turn file into a document. The document has fields for title, body
// etc., all text converted to utf8
Rcl::Doc doc; Rcl::Doc doc;
if (!handler->worker(me->config, fn, mime, doc)) { if (!internfile(fn, me->config, doc))
delete handler;
return FsTreeWalker::FtwOk; return FsTreeWalker::FtwOk;
}
delete handler;
// Set up common fields: // Set up common fields:
doc.mimetype = mime;
char ascdate[20]; char ascdate[20];
sprintf(ascdate, "%ld", long(stp->st_mtime)); sprintf(ascdate, "%ld", long(stp->st_mtime));
doc.mtime = ascdate; doc.mtime = ascdate;
@ -161,13 +139,13 @@ bool ConfIndexer::index()
// Group the directories by database: it is important that all // Group the directories by database: it is important that all
// directories for a database be indexed at once so that deleted // directories for a database be indexed at once so that deleted
// file cleanup works // file cleanup works
vector<string> tdl; // List of directories to be indexed list<string> tdl; // List of directories to be indexed
if (!ConfTree::stringToStrings(topdirs, tdl)) { if (!ConfTree::stringToStrings(topdirs, tdl)) {
LOGERR(("ConfIndexer::index: parse error for directory list\n")); LOGERR(("ConfIndexer::index: parse error for directory list\n"));
return false; return false;
} }
vector<string>::iterator dirit; list<string>::iterator dirit;
map<string, list<string> > dbmap; map<string, list<string> > dbmap;
map<string, list<string> >::iterator dbit; map<string, list<string> >::iterator dbit;
for (dirit = tdl.begin(); dirit != tdl.end(); dirit++) { for (dirit = tdl.begin(); dirit != tdl.end(); dirit++) {

View File

@ -50,7 +50,7 @@ bool MimeHandlerHtml::worker(RclConfig *conf, const string &fn,
return worker1(conf, fn, otext, mtype, docout); return worker1(conf, fn, otext, mtype, docout);
} }
bool MimeHandlerHtml::worker1(RclConfig *conf, const string &fn, bool MimeHandlerHtml::worker1(RclConfig *conf, const string &,
const string& htext, const string& htext,
const string &mtype, Rcl::Doc &docout) const string &mtype, Rcl::Doc &docout)
{ {
@ -63,10 +63,10 @@ bool MimeHandlerHtml::worker1(RclConfig *conf, const string &fn,
// what we started with, we abort and restart with the parameter value // what we started with, we abort and restart with the parameter value
// instead of the configuration one. // instead of the configuration one.
string charset; string charset;
if (conf->guesscharset) { if (conf->getGuessCharset()) {
charset = csguess(htext, conf->defcharset); charset = csguess(htext, conf->getDefCharset());
} else } else
charset = conf->defcharset; charset = conf->getDefCharset();
LOGDEB(("textHtmlToDoc: charset before parsing: %s\n", charset.c_str())); LOGDEB(("textHtmlToDoc: charset before parsing: %s\n", charset.c_str()));

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.6 2005-02-01 17:52:06 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.7 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
#include <iostream> #include <iostream>
@ -14,6 +14,7 @@ using namespace std;
#include "smallut.h" #include "smallut.h"
#include "html.h" #include "html.h"
#include "execmd.h" #include "execmd.h"
#include "pathut.h"
class MimeHandlerText : public MimeHandler { class MimeHandlerText : public MimeHandler {
public: public:
@ -34,10 +35,10 @@ bool MimeHandlerText::worker(RclConfig *conf, const string &fn,
// fields The charset guesser really doesnt work well in general // fields The charset guesser really doesnt work well in general
// and should be avoided (especially for short documents) // and should be avoided (especially for short documents)
string charset; string charset;
if (conf->guesscharset) { if (conf->getGuessCharset()) {
charset = csguess(otext, conf->defcharset); charset = csguess(otext, conf->getDefCharset());
} else } else
charset = conf->defcharset; charset = conf->getDefCharset();
string utf8; string utf8;
LOGDEB1(("textPlainToDoc: transcod from %s to %s\n", charset, "UTF-8")); LOGDEB1(("textPlainToDoc: transcod from %s to %s\n", charset, "UTF-8"));
@ -70,11 +71,15 @@ class MimeHandlerExec : public MimeHandler {
bool MimeHandlerExec::worker(RclConfig *conf, const string &fn, bool MimeHandlerExec::worker(RclConfig *conf, const string &fn,
const string &mtype, Rcl::Doc &docout) const string &mtype, Rcl::Doc &docout)
{ {
string cmd = params.front(); // Command name
string cmd = find_filter(conf, params.front());
// Build parameter list: delete cmd name and add the file name
list<string>::iterator it = params.begin(); list<string>::iterator it = params.begin();
list<string>myparams(++it, params.end()); list<string>myparams(++it, params.end());
myparams.push_back(fn); myparams.push_back(fn);
// Execute command and store the result text, which is supposedly html
string html; string html;
ExecCmd exec; ExecCmd exec;
int status = exec.doexec(cmd, myparams, 0, &html); int status = exec.doexec(cmd, myparams, 0, &html);
@ -83,6 +88,8 @@ bool MimeHandlerExec::worker(RclConfig *conf, const string &fn,
status, cmd.c_str())); status, cmd.c_str()));
return false; return false;
} }
// Process/index the html
MimeHandlerHtml hh; MimeHandlerHtml hh;
return hh.worker1(conf, fn, html, mtype, docout); return hh.worker1(conf, fn, html, mtype, docout);
} }
@ -109,26 +116,26 @@ MimeHandler *getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
} }
// Break definition into type and name // Break definition into type and name
vector<string> toks; list<string> toks;
ConfTree::stringToStrings(hs, toks); ConfTree::stringToStrings(hs, toks);
if (toks.size() < 1) { if (toks.empty()) {
LOGERR(("getMimeHandler: bad mimeconf line for %s\n", mtype.c_str())); LOGERR(("getMimeHandler: bad mimeconf line for %s\n", mtype.c_str()));
return 0; return 0;
} }
// Retrieve handler function according to type // Retrieve handler function according to type
if (!stringlowercmp("internal", toks[0])) { if (!stringlowercmp("internal", toks.front())) {
return mhfact(mtype); return mhfact(mtype);
} else if (!stringlowercmp("dll", toks[0])) { } else if (!stringlowercmp("dll", toks.front())) {
return 0; return 0;
} else if (!stringlowercmp("exec", toks[0])) { } else if (!stringlowercmp("exec", toks.front())) {
if (toks.size() < 2) { if (toks.size() < 2) {
LOGERR(("getMimeHandler: bad line for %s: %s\n", mtype.c_str(), LOGERR(("getMimeHandler: bad line for %s: %s\n", mtype.c_str(),
hs.c_str())); hs.c_str()));
return 0; return 0;
} }
MimeHandlerExec *h = new MimeHandlerExec; MimeHandlerExec *h = new MimeHandlerExec;
vector<string>::const_iterator it1 = toks.begin(); list<string>::const_iterator it1 = toks.begin();
it1++; it1++;
for (;it1 != toks.end();it1++) for (;it1 != toks.end();it1++)
h->params.push_back(*it1); h->params.push_back(*it1);
@ -146,3 +153,21 @@ string getMimeViewer(const std::string &mtype, ConfTree *mhandlers)
mhandlers->get(mtype, hs, "view"); mhandlers->get(mtype, hs, "view");
return hs; return hs;
} }
/**
* Return decompression command line for given mime type
*/
bool getUncompressor(const std::string &mtype, ConfTree *mhandlers,
list<string>& cmd)
{
string hs;
mhandlers->get(mtype, hs, "");
list<string> tokens;
ConfTree::stringToStrings(hs, tokens);
if (stringlowercmp("uncompress", tokens.front()))
return false;
list<string>::iterator it = tokens.begin();
cmd.assign(++it, tokens.end());
return true;
}

View File

@ -1,8 +1,9 @@
#ifndef _MIMEHANDLER_H_INCLUDED_ #ifndef _MIMEHANDLER_H_INCLUDED_
#define _MIMEHANDLER_H_INCLUDED_ #define _MIMEHANDLER_H_INCLUDED_
/* @(#$Id: mimehandler.h,v 1.4 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: mimehandler.h,v 1.5 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string> #include <string>
#include <list>
#include "rclconfig.h" #include "rclconfig.h"
#include "rcldb.h" #include "rcldb.h"
@ -22,13 +23,14 @@ class MimeHandler {
* Return indexing handler class for given mime type * Return indexing handler class for given mime type
* returned pointer should be deleted by caller * returned pointer should be deleted by caller
*/ */
extern MimeHandler *getMimeHandler(const std::string &mtype, extern MimeHandler *getMimeHandler(const std::string &mtyp, ConfTree *mhdlers);
ConfTree *mhandlers);
/** /**
* Return external viewer exec string for given mime type * Return external viewer exec string for given mime type
*/ */
extern string getMimeViewer(const std::string &mtype, extern std::string getMimeViewer(const std::string &mtyp, ConfTree *mhandlers);
ConfTree *mhandlers);
bool getUncompressor(const std::string &mtype, ConfTree *mhandlers,
std::list<std::string>& cmd);
#endif /* _MIMEHANDLER_H_INCLUDED_ */ #endif /* _MIMEHANDLER_H_INCLUDED_ */

View File

@ -8,7 +8,7 @@ all: $(LIBS)
OBJS = conftree.o csguess.o debuglog.o \ OBJS = conftree.o csguess.o debuglog.o \
execmd.o \ execmd.o \
fstreewalk.o html.o htmlparse.o indexer.o \ fstreewalk.o html.o htmlparse.o indexer.o internfile.o \
mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathut.o \ mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathut.o \
rclconfig.o rcldb.o readfile.o smallut.o \ rclconfig.o rcldb.o readfile.o smallut.o \
textsplit.o transcode.o \ textsplit.o transcode.o \
@ -16,7 +16,7 @@ OBJS = conftree.o csguess.o debuglog.o \
SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \ SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \
../utils/execmd.cpp \ ../utils/execmd.cpp \
../utils/fstreewalk.cpp ../common/html.cpp ../common/htmlparse.cpp \ ../utils/fstreewalk.cpp ../common/html.cpp ../common/htmlparse.cpp \
../index/indexer.cpp \ ../index/indexer.cpp ../common/internfile.cpp \
../common/mimehandler.cpp ../utils/mimeparse.cpp ../index/mimetype.cpp \ ../common/mimehandler.cpp ../utils/mimeparse.cpp ../index/mimetype.cpp \
../common/myhtmlparse.cpp ../utils/pathut.cpp \ ../common/myhtmlparse.cpp ../utils/pathut.cpp \
../common/rclconfig.cpp ../common/rcldb.cpp ../utils/readfile.cpp \ ../common/rclconfig.cpp ../common/rcldb.cpp ../utils/readfile.cpp \
@ -47,6 +47,8 @@ htmlparse.o : ../common/htmlparse.cpp
$(CXX) $(CXXFLAGS) -c $< $(CXX) $(CXXFLAGS) -c $<
indexer.o : ../index/indexer.cpp indexer.o : ../index/indexer.cpp
$(CXX) $(CXXFLAGS) -c $< $(CXX) $(CXXFLAGS) -c $<
internfile.o : ../common/internfile.cpp
$(CXX) $(CXXFLAGS) -c $<
mimehandler.o : ../common/mimehandler.cpp mimehandler.o : ../common/mimehandler.cpp
$(CXX) $(CXXFLAGS) -c $< $(CXX) $(CXXFLAGS) -c $<
mimeparse.o : ../utils/mimeparse.cpp mimeparse.o : ../utils/mimeparse.cpp

View File

@ -24,6 +24,7 @@
#include "mimehandler.h" #include "mimehandler.h"
#include "pathut.h" #include "pathut.h"
#include "recoll.h" #include "recoll.h"
#include "internfile.h"
void RecollMain::fileExit() void RecollMain::fileExit()
{ {
@ -145,28 +146,14 @@ void RecollMain::reslistTE_clicked(int par, int car)
// Go to the file system to retrieve / convert the document text // Go to the file system to retrieve / convert the document text
// for preview: // for preview:
// Look for appropriate handler
MimeHandler *handler =
getMimeHandler(doc.mimetype, rclconfig->getMimeConf());
if (!handler) {
QMessageBox::warning(0, "Recoll",
QString("No mime handler for mime type ") +
doc.mimetype.c_str());
return;
}
string fn = urltolocalpath(doc.url); string fn = urltolocalpath(doc.url);
Rcl::Doc fdoc; Rcl::Doc fdoc;
if (!handler->worker(rclconfig, fn, doc.mimetype, fdoc)) { if (!internfile(fn, rclconfig, fdoc)) {
QMessageBox::warning(0, "Recoll", QMessageBox::warning(0, "Recoll",
QString("Failed to convert document for preview!\n") + QString("Can't turn doc into internal rep ") +
fn.c_str() + " mimetype " +
doc.mimetype.c_str()); doc.mimetype.c_str());
delete handler;
return; return;
} }
delete handler;
string rich = plaintorich(fdoc.text); string rich = plaintorich(fdoc.text);

View File

@ -1,13 +1,12 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: smallut.cpp,v 1.1 2005-01-31 14:31:10 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: smallut.cpp,v 1.2 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
#ifndef TEST_SMALLUT #ifndef TEST_SMALLUT
#include <string> #include <string>
#include "smallut.h"
#include <ctype.h> #include <ctype.h>
#include "smallut.h"
#define MIN(A,B) ((A)<(B)?(A):(B)) #define MIN(A,B) ((A)<(B)?(A):(B))
int stringicmp(const string & s1, const string& s2) int stringicmp(const string & s1, const string& s2)

View File

@ -1,6 +1,6 @@
#ifndef _SMALLUT_H_INCLUDED_ #ifndef _SMALLUT_H_INCLUDED_
#define _SMALLUT_H_INCLUDED_ #define _SMALLUT_H_INCLUDED_
/* @(#$Id: smallut.h,v 1.1 2005-01-31 14:31:10 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: smallut.h,v 1.2 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string> #include <string>
using std::string; using std::string;
@ -9,5 +9,4 @@ extern int stringicmp(const string& s1, const string& s2);
extern int stringlowercmp(const string& alreadylower, const string& s2); extern int stringlowercmp(const string& alreadylower, const string& s2);
extern int stringuppercmp(const string& alreadyupper, const string& s2); extern int stringuppercmp(const string& alreadyupper, const string& s2);
#endif /* _SMALLUT_H_INCLUDED_ */ #endif /* _SMALLUT_H_INCLUDED_ */