*** empty log message ***

This commit is contained in:
dockes 2005-02-04 09:39:44 +00:00
parent 038205ce02
commit 1f8fbc0d39
10 changed files with 123 additions and 99 deletions

View File

@ -1,6 +1,7 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.5 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.6 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <unistd.h>
#include <iostream>
@ -67,3 +68,28 @@ RclConfig::RclConfig()
m_ok = true;
return;
}
// Look up an executable filter.
// We look in RECOLL_BINDIR, RECOLL_CONFDIR, then let the system use
// the PATH
string find_filter(RclConfig *conf, const string &icmd)
{
// If the path is absolute, this is it
if (icmd[0] == '/')
return icmd;
string cmd;
const char *cp;
if (cp = getenv("RECOLL_BINDIR")) {
cmd = cp;
path_cat(cmd, icmd);
if (access(cmd.c_str(), X_OK) == 0)
return cmd;
} else {
cmd = conf->getConfDir();
path_cat(cmd, icmd);
if (access(cmd.c_str(), X_OK) == 0)
return cmd;
}
return icmd;
}

View File

@ -1,29 +1,41 @@
#ifndef _RCLCONFIG_H_INCLUDED_
#define _RCLCONFIG_H_INCLUDED_
/* @(#$Id: rclconfig.h,v 1.3 2004-12-17 13:01:01 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: rclconfig.h,v 1.4 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes */
#include "conftree.h"
class RclConfig {
int m_ok;
string confdir; // Directory where the files are stored
ConfTree *conf; // Parsed main configuration
string keydir; // Current directory used for parameter fetches.
// Note: this will have to change if/when we support per directory maps
ConfTree *mimemap;
ConfTree *mimeconf;
string confdir; // Directory where the files are stored
ConfTree *conf; // Parsed main configuration
string keydir; // Current directory used for parameter fetches.
ConfTree *mimemap; // These are independant of current keydir. We might
ConfTree *mimeconf; // want to change it one day.
// Parameters auto-fetched on setkeydir
string defcharset; // These are stored locally to avoid
string deflang; // a config lookup each time.
bool guesscharset; // They are fetched initially or on setKeydir()
public:
// Let some parameters be accessed directly
string defcharset; // These are stored locally to avoid a config lookup
string deflang; // each time.
bool guesscharset;
RclConfig();
~RclConfig() {delete conf;delete mimemap;delete mimeconf;}
bool ok() {return m_ok;}
string getConfDir() {return confdir;}
ConfTree *getConfig() {return m_ok ? conf : 0;}
ConfTree *getMimeMap() {return m_ok ? mimemap : 0;}
ConfTree *getMimeConf() {return m_ok ? mimeconf : 0;}
/// Get generic configuration parameter according to current keydir
bool getConfParam(const string &name, string &value)
{
if (conf == 0)
return false;
return conf->get(name, value, keydir);
}
/// Set current directory reference, and fetch automatic parameters.
void setKeyDir(const string &dir)
{
keydir = dir;
@ -33,19 +45,13 @@ class RclConfig {
conf->get("guesscharset", str, keydir);
guesscharset = ConfTree::stringToBool(str);
}
bool getConfParam(const string &name, string &value)
{
if (conf == 0)
return false;
return conf->get(name, value, keydir);
}
const string &getDefCharset() {
return defcharset;
}
const string &getDefLang() {
return deflang;
}
ConfTree *getMimeMap() {return m_ok ? mimemap : 0;}
ConfTree *getMimeConf() {return m_ok ? mimeconf : 0;}
const string &getDefCharset() {return defcharset;}
const string &getDefLang() {return deflang;}
bool getGuessCharset() {return guesscharset;}
};
std::string find_filter(RclConfig *conf, const string& cmd);
#endif /* _RCLCONFIG_H_INCLUDED_ */

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.2 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.3 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <sys/stat.h>
@ -19,8 +19,8 @@ static char rcsid[] = "@(#$Id: indexer.cpp,v 1.2 2005-02-01 17:20:05 dockes Exp
#include "indexer.h"
#include "csguess.h"
#include "transcode.h"
#include "mimehandler.h"
#include "debuglog.h"
#include "internfile.h"
using namespace std;
@ -76,6 +76,7 @@ bool DbIndexer::index()
return true;
}
/**
* This function gets called for every file and directory found by the
* tree walker. It checks with the db if the file has changed and needs to
@ -97,40 +98,17 @@ indexfile(void *cdata, const std::string &fn, const struct stat *stp,
return FsTreeWalker::FtwOk;
}
string mime = mimetype(fn, me->config->getMimeMap());
if (mime.empty()) {
// No mime type ?? pass on.
LOGDEB(("indexfile: (no mime) [%s]\n", fn.c_str()));
return FsTreeWalker::FtwOk;
}
// Look for appropriate handler
MimeHandler *handler = getMimeHandler(mime, me->config->getMimeConf());
if (!handler) {
// No handler for this type, for now :(
LOGDEB(("indexfile: %s : no handler\n", mime.c_str()));
return FsTreeWalker::FtwOk;
}
LOGDEB(("indexfile: %s [%s]\n", mime.c_str(), fn.c_str()));
// Check db up to date ?
if (!me->db.needUpdate(fn, stp)) {
delete handler;
LOGDEB(("indexfile: up to date: %s\n", fn.c_str()));
return FsTreeWalker::FtwOk;
}
// Turn file into a document. The document has fields for title, body
// etc., all text converted to utf8
Rcl::Doc doc;
if (!handler->worker(me->config, fn, mime, doc)) {
delete handler;
if (!internfile(fn, me->config, doc))
return FsTreeWalker::FtwOk;
}
delete handler;
// Set up common fields:
doc.mimetype = mime;
char ascdate[20];
sprintf(ascdate, "%ld", long(stp->st_mtime));
doc.mtime = ascdate;
@ -161,13 +139,13 @@ bool ConfIndexer::index()
// Group the directories by database: it is important that all
// directories for a database be indexed at once so that deleted
// file cleanup works
vector<string> tdl; // List of directories to be indexed
list<string> tdl; // List of directories to be indexed
if (!ConfTree::stringToStrings(topdirs, tdl)) {
LOGERR(("ConfIndexer::index: parse error for directory list\n"));
return false;
}
vector<string>::iterator dirit;
list<string>::iterator dirit;
map<string, list<string> > dbmap;
map<string, list<string> >::iterator dbit;
for (dirit = tdl.begin(); dirit != tdl.end(); dirit++) {

View File

@ -50,7 +50,7 @@ bool MimeHandlerHtml::worker(RclConfig *conf, const string &fn,
return worker1(conf, fn, otext, mtype, docout);
}
bool MimeHandlerHtml::worker1(RclConfig *conf, const string &fn,
bool MimeHandlerHtml::worker1(RclConfig *conf, const string &,
const string& htext,
const string &mtype, Rcl::Doc &docout)
{
@ -63,10 +63,10 @@ bool MimeHandlerHtml::worker1(RclConfig *conf, const string &fn,
// what we started with, we abort and restart with the parameter value
// instead of the configuration one.
string charset;
if (conf->guesscharset) {
charset = csguess(htext, conf->defcharset);
if (conf->getGuessCharset()) {
charset = csguess(htext, conf->getDefCharset());
} else
charset = conf->defcharset;
charset = conf->getDefCharset();
LOGDEB(("textHtmlToDoc: charset before parsing: %s\n", charset.c_str()));

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.6 2005-02-01 17:52:06 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.7 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <iostream>
@ -14,6 +14,7 @@ using namespace std;
#include "smallut.h"
#include "html.h"
#include "execmd.h"
#include "pathut.h"
class MimeHandlerText : public MimeHandler {
public:
@ -34,10 +35,10 @@ bool MimeHandlerText::worker(RclConfig *conf, const string &fn,
// fields The charset guesser really doesnt work well in general
// and should be avoided (especially for short documents)
string charset;
if (conf->guesscharset) {
charset = csguess(otext, conf->defcharset);
if (conf->getGuessCharset()) {
charset = csguess(otext, conf->getDefCharset());
} else
charset = conf->defcharset;
charset = conf->getDefCharset();
string utf8;
LOGDEB1(("textPlainToDoc: transcod from %s to %s\n", charset, "UTF-8"));
@ -70,11 +71,15 @@ class MimeHandlerExec : public MimeHandler {
bool MimeHandlerExec::worker(RclConfig *conf, const string &fn,
const string &mtype, Rcl::Doc &docout)
{
string cmd = params.front();
// Command name
string cmd = find_filter(conf, params.front());
// Build parameter list: delete cmd name and add the file name
list<string>::iterator it = params.begin();
list<string>myparams(++it, params.end());
myparams.push_back(fn);
// Execute command and store the result text, which is supposedly html
string html;
ExecCmd exec;
int status = exec.doexec(cmd, myparams, 0, &html);
@ -83,6 +88,8 @@ bool MimeHandlerExec::worker(RclConfig *conf, const string &fn,
status, cmd.c_str()));
return false;
}
// Process/index the html
MimeHandlerHtml hh;
return hh.worker1(conf, fn, html, mtype, docout);
}
@ -109,26 +116,26 @@ MimeHandler *getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
}
// Break definition into type and name
vector<string> toks;
list<string> toks;
ConfTree::stringToStrings(hs, toks);
if (toks.size() < 1) {
if (toks.empty()) {
LOGERR(("getMimeHandler: bad mimeconf line for %s\n", mtype.c_str()));
return 0;
}
// Retrieve handler function according to type
if (!stringlowercmp("internal", toks[0])) {
if (!stringlowercmp("internal", toks.front())) {
return mhfact(mtype);
} else if (!stringlowercmp("dll", toks[0])) {
} else if (!stringlowercmp("dll", toks.front())) {
return 0;
} else if (!stringlowercmp("exec", toks[0])) {
} else if (!stringlowercmp("exec", toks.front())) {
if (toks.size() < 2) {
LOGERR(("getMimeHandler: bad line for %s: %s\n", mtype.c_str(),
hs.c_str()));
return 0;
}
MimeHandlerExec *h = new MimeHandlerExec;
vector<string>::const_iterator it1 = toks.begin();
list<string>::const_iterator it1 = toks.begin();
it1++;
for (;it1 != toks.end();it1++)
h->params.push_back(*it1);
@ -146,3 +153,21 @@ string getMimeViewer(const std::string &mtype, ConfTree *mhandlers)
mhandlers->get(mtype, hs, "view");
return hs;
}
/**
* Return decompression command line for given mime type
*/
bool getUncompressor(const std::string &mtype, ConfTree *mhandlers,
list<string>& cmd)
{
string hs;
mhandlers->get(mtype, hs, "");
list<string> tokens;
ConfTree::stringToStrings(hs, tokens);
if (stringlowercmp("uncompress", tokens.front()))
return false;
list<string>::iterator it = tokens.begin();
cmd.assign(++it, tokens.end());
return true;
}

View File

@ -1,8 +1,9 @@
#ifndef _MIMEHANDLER_H_INCLUDED_
#define _MIMEHANDLER_H_INCLUDED_
/* @(#$Id: mimehandler.h,v 1.4 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: mimehandler.h,v 1.5 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include <list>
#include "rclconfig.h"
#include "rcldb.h"
@ -22,13 +23,14 @@ class MimeHandler {
* Return indexing handler class for given mime type
* returned pointer should be deleted by caller
*/
extern MimeHandler *getMimeHandler(const std::string &mtype,
ConfTree *mhandlers);
extern MimeHandler *getMimeHandler(const std::string &mtyp, ConfTree *mhdlers);
/**
* Return external viewer exec string for given mime type
*/
extern string getMimeViewer(const std::string &mtype,
ConfTree *mhandlers);
extern std::string getMimeViewer(const std::string &mtyp, ConfTree *mhandlers);
bool getUncompressor(const std::string &mtype, ConfTree *mhandlers,
std::list<std::string>& cmd);
#endif /* _MIMEHANDLER_H_INCLUDED_ */

View File

@ -8,7 +8,7 @@ all: $(LIBS)
OBJS = conftree.o csguess.o debuglog.o \
execmd.o \
fstreewalk.o html.o htmlparse.o indexer.o \
fstreewalk.o html.o htmlparse.o indexer.o internfile.o \
mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathut.o \
rclconfig.o rcldb.o readfile.o smallut.o \
textsplit.o transcode.o \
@ -16,7 +16,7 @@ OBJS = conftree.o csguess.o debuglog.o \
SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \
../utils/execmd.cpp \
../utils/fstreewalk.cpp ../common/html.cpp ../common/htmlparse.cpp \
../index/indexer.cpp \
../index/indexer.cpp ../common/internfile.cpp \
../common/mimehandler.cpp ../utils/mimeparse.cpp ../index/mimetype.cpp \
../common/myhtmlparse.cpp ../utils/pathut.cpp \
../common/rclconfig.cpp ../common/rcldb.cpp ../utils/readfile.cpp \
@ -47,6 +47,8 @@ htmlparse.o : ../common/htmlparse.cpp
$(CXX) $(CXXFLAGS) -c $<
indexer.o : ../index/indexer.cpp
$(CXX) $(CXXFLAGS) -c $<
internfile.o : ../common/internfile.cpp
$(CXX) $(CXXFLAGS) -c $<
mimehandler.o : ../common/mimehandler.cpp
$(CXX) $(CXXFLAGS) -c $<
mimeparse.o : ../utils/mimeparse.cpp

View File

@ -24,6 +24,7 @@
#include "mimehandler.h"
#include "pathut.h"
#include "recoll.h"
#include "internfile.h"
void RecollMain::fileExit()
{
@ -145,28 +146,14 @@ void RecollMain::reslistTE_clicked(int par, int car)
// Go to the file system to retrieve / convert the document text
// for preview:
// Look for appropriate handler
MimeHandler *handler =
getMimeHandler(doc.mimetype, rclconfig->getMimeConf());
if (!handler) {
QMessageBox::warning(0, "Recoll",
QString("No mime handler for mime type ") +
doc.mimetype.c_str());
return;
}
string fn = urltolocalpath(doc.url);
Rcl::Doc fdoc;
if (!handler->worker(rclconfig, fn, doc.mimetype, fdoc)) {
if (!internfile(fn, rclconfig, fdoc)) {
QMessageBox::warning(0, "Recoll",
QString("Failed to convert document for preview!\n") +
fn.c_str() + " mimetype " +
QString("Can't turn doc into internal rep ") +
doc.mimetype.c_str());
delete handler;
return;
}
delete handler;
string rich = plaintorich(fdoc.text);

View File

@ -1,13 +1,12 @@
#ifndef lint
static char rcsid[] = "@(#$Id: smallut.cpp,v 1.1 2005-01-31 14:31:10 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: smallut.cpp,v 1.2 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#ifndef TEST_SMALLUT
#include <string>
#include "smallut.h"
#include <ctype.h>
#include "smallut.h"
#define MIN(A,B) ((A)<(B)?(A):(B))
int stringicmp(const string & s1, const string& s2)

View File

@ -1,6 +1,6 @@
#ifndef _SMALLUT_H_INCLUDED_
#define _SMALLUT_H_INCLUDED_
/* @(#$Id: smallut.h,v 1.1 2005-01-31 14:31:10 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: smallut.h,v 1.2 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
using std::string;
@ -9,5 +9,4 @@ extern int stringicmp(const string& s1, const string& s2);
extern int stringlowercmp(const string& alreadylower, const string& s2);
extern int stringuppercmp(const string& alreadyupper, const string& s2);
#endif /* _SMALLUT_H_INCLUDED_ */