mimemap processing recentered in rclconfig. Handle directory-local suffix to mime-type definitions. Implement gaim log handling

This commit is contained in:
dockes 2005-11-21 14:31:24 +00:00
parent 64377c44d3
commit ad67a6cbb7
16 changed files with 192 additions and 152 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.11 2005-11-17 12:47:03 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.12 2005-11-21 14:31:24 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <unistd.h>
#include <errno.h>
@ -10,11 +10,13 @@ static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.11 2005-11-17 12:47:03 dockes E
#include "pathut.h"
#include "conftree.h"
#include "debuglog.h"
#include "smallut.h"
using namespace std;
RclConfig::RclConfig()
: m_ok(false), conf(0), mimemap(0), mimeconf(0)
: m_ok(false), conf(0), mimemap(0), mimeconf(0), stopsuffixes(0)
{
static int loginit = 0;
if (!loginit) {
@ -94,13 +96,21 @@ bool RclConfig::getConfParam(const std::string &name, int *ivp)
*ivp = int(lval);
return true;
}
bool RclConfig::getConfParam(const std::string &name, bool *value)
bool RclConfig::getConfParam(const std::string &name, bool *bvp)
{
int ival;
if (!getConfParam(name, &ival))
*bvp = false;
string s;
if (!getConfParam(name, s))
return false;
if (*value)
*value = ival ? true : false;
if (s.empty())
return true;
if (isdigit(s[0])) {
int val = atoi(s.c_str());
*bvp = val ? true : false;
} else if (strchr("yYoOtT", s[0])) {
*bvp = true;
}
return true;
}
@ -127,6 +137,55 @@ std::list<string> RclConfig::getAllMimeTypes()
return lst;
}
bool RclConfig::getStopSuffixes(list<string>& sufflist)
{
if (stopsuffixes == 0 && (stopsuffixes = new list<string>) != 0) {
string stp;
if (mimemap->get("recoll_noindex", stp, keydir)) {
ConfTree::stringToStrings(stp, *stopsuffixes);
}
}
if (stopsuffixes) {
sufflist = *stopsuffixes;
return true;
}
return false;
}
string RclConfig::getMimeTypeFromSuffix(const string &suff)
{
string mtype;
mimemap->get(suff, mtype, keydir);
return mtype;
}
string RclConfig::getMimeHandlerDef(const std::string &mtype)
{
string hs;
if (!mimeconf->get(mtype, hs, "index")) {
LOGDEB(("getMimeHandler: no handler for '%s'\n", mtype.c_str()));
}
return hs;
}
string RclConfig::getMimeViewerDef(const string &mtype)
{
string hs;
mimeconf->get(mtype, hs, "view");
return hs;
}
/**
* Return icon name
*/
string RclConfig::getMimeIconName(const string &mtype)
{
string hs;
mimeconf->get(mtype, hs, "icons");
return hs;
}
// Look up an executable filter.
// We look in RECOLL_BINDIR, RECOLL_CONFDIR, then let the system use
// the PATH
@ -152,3 +211,25 @@ string find_filter(RclConfig *conf, const string &icmd)
return icmd;
}
/**
* Return decompression command line for given mime type
*/
bool RclConfig::getUncompressor(const string &mtype, list<string>& cmd)
{
string hs;
mimeconf->get(mtype, hs, "");
if (hs.empty())
return false;
list<string> tokens;
ConfTree::stringToStrings(hs, tokens);
if (tokens.empty()) {
LOGERR(("getUncompressor: empty spec for mtype %s\n", mtype.c_str()));
return false;
}
if (stringlowercmp("uncompress", tokens.front()))
return false;
list<string>::iterator it = tokens.begin();
cmd.assign(++it, tokens.end());
return true;
}

View File

@ -1,26 +1,12 @@
#ifndef _RCLCONFIG_H_INCLUDED_
#define _RCLCONFIG_H_INCLUDED_
/* @(#$Id: rclconfig.h,v 1.7 2005-11-17 12:47:03 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: rclconfig.h,v 1.8 2005-11-21 14:31:24 dockes Exp $ (C) 2004 J.F.Dockes */
#include <list>
#include "conftree.h"
class RclConfig {
int m_ok;
string reason; // Explanation for bad state
string confdir; // Directory where the files are stored
ConfTree *conf; // Parsed main configuration
string keydir; // Current directory used for parameter fetches.
ConfTree *mimemap; // These are independant of current keydir. We might
ConfTree *mimeconf; // want to change it one day.
// Parameters auto-fetched on setkeydir
string defcharset; // These are stored locally to avoid
string deflang; // a config lookup each time.
bool guesscharset; // They are fetched initially or on setKeydir()
public:
RclConfig();
@ -29,7 +15,7 @@ class RclConfig {
bool ok() {return m_ok;}
const string &getReason() {return reason;}
string getConfDir() {return confdir;}
ConfTree *getConfig() {return m_ok ? conf : 0;}
//ConfTree *getConfig() {return m_ok ? conf : 0;}
/// Get generic configuration parameter according to current keydir
bool getConfParam(const string &name, string &value)
@ -55,12 +41,48 @@ class RclConfig {
conf->get("guesscharset", str, keydir);
guesscharset = ConfTree::stringToBool(str);
}
ConfTree *getMimeMap() {return m_ok ? mimemap : 0;}
ConfTree *getMimeConf() {return m_ok ? mimeconf : 0;}
/**
* Check if input mime type is a compressed one, and return command to
* uncompress if it is
* The returned command has substitutable places for input file name
* and temp dir name, and will return output name
*/
bool getUncompressor(const std::string &mtpe, std::list<std::string>& cmd);
bool getStopSuffixes(std::list<std::string>& sufflist);
std::string getMimeTypeFromSuffix(const std::string &suffix);
std::string getMimeHandlerDef(const std::string &mtype);
/**
* Return external viewer exec string for given mime type
*/
std::string getMimeViewerDef(const std::string &mtype);
/**
* Return icon name for mime type
*/
string getMimeIconName(const string &mtype);
const string &getDefCharset() {return defcharset;}
const string &getDefLang() {return deflang;}
bool getGuessCharset() {return guesscharset;}
std::list<string> getAllMimeTypes();
private:
int m_ok;
string reason; // Explanation for bad state
string confdir; // Directory where the files are stored
ConfTree *conf; // Parsed main configuration
string keydir; // Current directory used for parameter fetches.
ConfTree *mimemap; // These are independant of current keydir.
ConfTree *mimeconf;
ConfTree *mimemap_local; //
std::list<std::string> *stopsuffixes;
// Parameters auto-fetched on setkeydir
string defcharset; // These are stored locally to avoid
string deflang; // a config lookup each time.
bool guesscharset; // They are fetched initially or on setKeydir()
};
std::string find_filter(RclConfig *conf, const string& cmd);

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.15 2005-11-14 09:57:11 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.16 2005-11-21 14:31:24 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <stdio.h>
#include <sys/stat.h>
@ -215,11 +215,9 @@ ConfIndexer::~ConfIndexer()
bool ConfIndexer::index()
{
ConfTree *conf = config->getConfig();
// Retrieve the list of directories to be indexed.
string topdirs;
if (conf->get("topdirs", topdirs, "") == 0) {
if (!config->getConfParam("topdirs", topdirs)) {
LOGERR(("ConfIndexer::index: no top directories in configuration\n"));
return false;
}
@ -239,7 +237,8 @@ bool ConfIndexer::index()
for (dirit = tdl.begin(); dirit != tdl.end(); dirit++) {
string db;
string dir = path_tildexpand(*dirit);
if (conf->get("dbdir", db, dir) == 0) {
config->setKeyDir(dir);
if (!config->getConfParam("dbdir", db)) {
LOGERR(("ConfIndexer::index: no database directory in "
"configuration for %s\n", dir.c_str()));
return false;
@ -254,6 +253,7 @@ bool ConfIndexer::index()
dbit->second.push_back(dir);
}
}
config->setKeyDir("");
// Index each directory group in turn
for (dbit = dbmap.begin(); dbit != dbmap.end(); dbit++) {

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.10 2005-11-10 08:47:49 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.11 2005-11-21 14:31:24 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#ifndef TEST_MIMETYPE
@ -13,7 +13,7 @@ using std::list;
#include "mimetype.h"
#include "debuglog.h"
#include "execmd.h"
#include "conftree.h"
#include "rclconfig.h"
#include "smallut.h"
#include "idfile.h"
@ -82,26 +82,20 @@ static string mimetypefromdata(const string &fn, bool usfc)
/// Guess mime type, first from suffix, then from file data. We also
/// have a list of suffixes that we don't touch at all (ie: .jpg,
/// etc...)
string mimetype(const string &fn, ConfTree *mtypes, bool usfc)
string mimetype(const string &fn, RclConfig *cfg, bool usfc)
{
if (mtypes == 0)
if (cfg == 0)
return "";
static list<string> stoplist;
if (stoplist.empty()) {
string stp;
if (mtypes->get(string("recoll_noindex"), stp, "")) {
ConfTree::stringToStrings(stp, stoplist);
}
}
list<string> stoplist;
cfg->getStopSuffixes(stoplist);
if (!stoplist.empty()) {
for (list<string>::const_iterator it = stoplist.begin();
it != stoplist.end(); it++) {
if (it->length() > fn.length())
continue;
if (!stringicmp(fn.substr(fn.length() - it->length(),string::npos),
*it)) {
if (!stringicmp(fn.substr(fn.length() - it->length(),
string::npos), *it)) {
LOGDEB(("mimetype: fn %s in stoplist (%s)\n", fn.c_str(),
it->c_str()));
return "";
@ -109,7 +103,7 @@ string mimetype(const string &fn, ConfTree *mtypes, bool usfc)
}
}
// If the file name has a suffix and we find it in the map, we're done
// Look for suffix in mimetype map
string::size_type dot = fn.find_last_of(".");
string suff;
if (dot != string::npos) {
@ -117,18 +111,12 @@ string mimetype(const string &fn, ConfTree *mtypes, bool usfc)
for (unsigned int i = 0; i < suff.length(); i++)
suff[i] = tolower(suff[i]);
string mtype;
if (mtypes->get(suff, mtype, ""))
string mtype = cfg->getMimeTypeFromSuffix(suff);
if (!mtype.empty())
return mtype;
}
// Look at file data ? Only when no suffix or always ?
#if 0
// Don't do this only for empty suffixes: would cause problems
// with shifted files, like messages.1, messages.2 etc... And others too
if (suff.empty())
#endif
return mimetypefromdata(fn, usfc);
return mimetypefromdata(fn, usfc);
}
@ -158,7 +146,7 @@ int main(int argc, const char **argv)
while (--argc > 0) {
string filename = *++argv;
cout << filename << " -> " <<
mimetype(filename, config->getMimeMap(), true) << endl;
mimetype(filename, config, true) << endl;
}
return 0;

View File

@ -1,17 +1,17 @@
#ifndef _MIMETYPE_H_INCLUDED_
#define _MIMETYPE_H_INCLUDED_
/* @(#$Id: mimetype.h,v 1.3 2005-11-10 08:47:49 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: mimetype.h,v 1.4 2005-11-21 14:31:24 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include "conftree.h"
class RclConfig;
/**
* Try to determine a mime type for filename.
* This may imply more than matching the suffix, the name must be usable
* to actually access file data.
*/
string mimetype(const std::string &filename, ConfTree *mtypes, bool usfc);
string mimetype(const std::string &filename, RclConfig *cfg, bool usfc);
#endif /* _MIMETYPE_H_INCLUDED_ */

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.9 2005-11-18 15:19:14 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.10 2005-11-21 14:31:24 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <unistd.h>
#include <sys/types.h>
@ -102,7 +102,7 @@ FileInterner::FileInterner(const std::string &f, RclConfig *cnf,
// We need to run mime type identification in any case to check
// for a compressed file.
m_mime = mimetype(m_fn, m_cfg->getMimeMap(), usfci);
m_mime = mimetype(m_fn, m_cfg, usfci);
// If identification fails, try to use the input parameter. Note that this
// is normally not a compressed type (it's the mime type from the db)
@ -118,14 +118,14 @@ FileInterner::FileInterner(const std::string &f, RclConfig *cnf,
// uncompressed file, and rerun the mime type identification, then do the
// rest with the temp file.
list<string>ucmd;
if (getUncompressor(m_mime, m_cfg->getMimeConf(), ucmd)) {
if (m_cfg->getUncompressor(m_mime, ucmd)) {
if (!uncompressfile(m_cfg, m_fn, ucmd, m_tdir, m_tfile)) {
return;
}
LOGDEB(("internfile: after ucomp: m_tdir %s, tfile %s\n",
m_tdir.c_str(), m_tfile.c_str()));
m_fn = m_tfile;
m_mime = mimetype(m_fn, m_cfg->getMimeMap(), usfci);
m_mime = mimetype(m_fn, m_cfg, usfci);
if (m_mime.empty() && imime)
m_mime = *imime;
if (m_mime.empty()) {
@ -136,7 +136,7 @@ FileInterner::FileInterner(const std::string &f, RclConfig *cnf,
}
// Look for appropriate handler
m_handler = getMimeHandler(m_mime, m_cfg->getMimeConf());
m_handler = getMimeHandler(m_mime, m_cfg);
if (!m_handler) {
// No handler for this type, for now :(
LOGDEB(("FileInterner::FileInterner: %s: no handler\n",

View File

@ -110,13 +110,12 @@ MimeHandlerHtml::mkDoc(RclConfig *conf, const string &,
}
}
Rcl::Doc out;
out.origcharset = charset;
out.text = pres.dump;
docout.origcharset = charset;
docout.text = pres.dump;
// LOGDEB(("textHtmlToDoc: dump : %s\n", pres.dump.c_str()));
out.title = pres.title;
out.keywords = pres.keywords;
out.abstract = pres.sample;
docout = out;
docout.title = pres.title;
docout.keywords = pres.keywords;
docout.abstract = pres.sample;
docout.dmtime = pres.dmtime;
return MimeHandler::MHDone;
}

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.12 2005-11-18 13:23:46 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.13 2005-11-21 14:31:24 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <iostream>
@ -31,14 +31,12 @@ static MimeHandler *mhFactory(const string &mime)
/**
* Return handler object for given mime type:
*/
MimeHandler *getMimeHandler(const string &mtype, ConfTree *mhandlers)
MimeHandler *getMimeHandler(const string &mtype, RclConfig *cfg)
{
// Return handler definition for mime type
string hs;
if (!mhandlers->get(mtype, hs, "index")) {
LOGDEB(("getMimeHandler: no handler for '%s'\n", mtype.c_str()));
// Get handler definition for mime type
string hs = cfg->getMimeHandlerDef(mtype);
if (hs.empty())
return 0;
}
// Break definition into type and name
list<string> toks;
@ -68,47 +66,3 @@ MimeHandler *getMimeHandler(const string &mtype, ConfTree *mhandlers)
}
return 0;
}
/**
* Return external viewer exec string for given mime type
*/
string getMimeViewer(const string &mtype, ConfTree *mhandlers)
{
string hs;
mhandlers->get(mtype, hs, "view");
return hs;
}
/**
* Return icon name
*/
string getMimeIconName(const string &mtype, ConfTree *mhandlers)
{
string hs;
mhandlers->get(mtype, hs, "icons");
return hs;
}
/**
* Return decompression command line for given mime type
*/
bool getUncompressor(const string &mtype, ConfTree *mhandlers,
list<string>& cmd)
{
string hs;
mhandlers->get(mtype, hs, "");
if (hs.empty())
return false;
list<string> tokens;
ConfTree::stringToStrings(hs, tokens);
if (tokens.empty()) {
LOGERR(("getUncompressor: empty spec for mtype %s\n", mtype.c_str()));
return false;
}
if (stringlowercmp("uncompress", tokens.front()))
return false;
list<string>::iterator it = tokens.begin();
cmd.assign(++it, tokens.end());
return true;
}

View File

@ -1,6 +1,6 @@
#ifndef _MIMEHANDLER_H_INCLUDED_
#define _MIMEHANDLER_H_INCLUDED_
/* @(#$Id: mimehandler.h,v 1.9 2005-11-18 15:19:14 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: mimehandler.h,v 1.10 2005-11-21 14:31:24 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include <list>
@ -53,25 +53,6 @@ class MimeHandler {
* Return indexing handler object for the given mime type
* returned pointer should be deleted by caller
*/
extern MimeHandler *getMimeHandler(const std::string &mtyp, ConfTree *mhdlers);
/**
* Return external viewer exec string for given mime type
*/
extern std::string getMimeViewer(const std::string &mtyp, ConfTree *mhandlers);
/**
* Return icon name
*/
extern std::string getMimeIconName(const std::string &mtyp, ConfTree *mhandlers);
/**
* Return command to uncompress the given type. The returned command has
* substitutable places for input file name and temp dir name, and will
* return output name
*/
bool getUncompressor(const std::string &mtype, ConfTree *mhandlers,
std::list<std::string>& cmd);
extern MimeHandler *getMimeHandler(const std::string &mtyp, RclConfig *cfg);
#endif /* _MIMEHANDLER_H_INCLUDED_ */

View File

@ -20,6 +20,7 @@
* USA
* -----END-LICENCE-----
*/
#include <time.h>
#include "myhtmlparse.h"
@ -135,6 +136,19 @@ MyHtmlParser::opening_tag(const string &tag, const map<string,string> &p)
string tmp = i->second;
decode_entities(tmp);
keywords += tmp;
} else if (name == "date") {
// Yes this doesnt exist. It's output by filters
// And the format isn't even standard http/html
// FIXME
string tmp = i->second;
decode_entities(tmp);
struct tm tm;
if (strptime(tmp.c_str(),
" %Y-%m-%d %H:%M:%S ", &tm)) {
char ascuxtime[100];
sprintf(ascuxtime, "%ld", (long)mktime(&tm));
dmtime = ascuxtime;
}
} else if (name == "robots") {
string val = i->second;
decode_entities(val);

View File

@ -33,7 +33,7 @@ class MyHtmlParser : public HtmlParser {
bool in_script_tag;
bool in_style_tag;
bool pending_space;
string title, sample, keywords, dump;
string title, sample, keywords, dump, dmtime;
string ocharset; // This is the charset our user thinks the doc was
string charset; // This is the charset it was supposedly converted to
string doccharset; // Set this to value of charset parameter in header

View File

@ -52,8 +52,6 @@ mh_exec.o : ../common/mh_exec.cpp
$(CXX) $(CXXFLAGS) -c $<
mh_text.o : ../common/mh_text.cpp
$(CXX) $(CXXFLAGS) -c $<
mh_html.o : ../common/mh_html.cpp
$(CXX) $(CXXFLAGS) -c $<
htmlparse.o : ../common/htmlparse.cpp
$(CXX) $(CXXFLAGS) -c $<
idfile.o : ../utils/idfile.cpp

View File

@ -248,7 +248,7 @@ void RecollMain::reslistTE_doubleClicked(int par, int)
return;
// Look for appropriate viewer
string cmd = getMimeViewer(doc.mimetype, rclconfig->getMimeConf());
string cmd = rclconfig->getMimeViewerDef(doc.mimetype);
if (cmd.length() == 0) {
QMessageBox::warning(0, "Recoll",
tr("No external viewer configured for mime type ")
@ -409,8 +409,7 @@ void RecollMain::listNextPB_clicked()
string img_name;
if (showicons) {
string iconname = getMimeIconName(doc.mimetype,
rclconfig->getMimeConf());
string iconname = rclconfig->getMimeIconName(doc.mimetype);
if (iconname.empty())
iconname = "document";
string imgfile = iconsdir + "/" + iconname + ".png";

View File

@ -1,4 +1,4 @@
# @(#$Id: mimeconf,v 1.8 2005-11-16 15:07:20 dockes Exp $ (C) 2004 J.F.Dockes
# @(#$Id: mimeconf,v 1.9 2005-11-21 14:31:24 dockes Exp $ (C) 2004 J.F.Dockes
# Recoll : associations of mime types to processing filters.
# There are different sections for decompression, 'interning' for indexing
@ -45,6 +45,8 @@ application/vnd.sun.xml.writer = exec rclsoff
application/vnd.sun.xml.writer.global = exec rclsoff
application/vnd.sun.xml.writer.template = exec rclsoff
text/x-gaim-log = exec rclgaim
##
# External viewers, launched when you double-click a result entry
[view]

View File

@ -1,4 +1,4 @@
# @(#$Id: mimemap,v 1.8 2005-11-12 11:26:10 dockes Exp $ (C) 2004 J.F.Dockes
# @(#$Id: mimemap,v 1.9 2005-11-21 14:31:24 dockes Exp $ (C) 2004 J.F.Dockes
# Recoll: associations of file name extensions to mime types
.txt = text/plain
@ -55,3 +55,5 @@ recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz .log.gz .md5 .map \
.jpg .gif .bmp .xpm .png \
,v
[~/.gaim]
.txt = text/x-gaim-log

View File

@ -1,4 +1,4 @@
# @(#$Id: recoll.conf,v 1.8 2005-11-17 12:47:03 dockes Exp $ (C) 2004 J.F.Dockes
# @(#$Id: recoll.conf,v 1.9 2005-11-21 14:31:24 dockes Exp $ (C) 2004 J.F.Dockes
# Recoll default configuration file. This should be copied to
# ~/.recoll/recoll.conf
@ -8,7 +8,7 @@ topdirs = ~
# Wildcard expressions for names of files and directories that we should
# ignore:
skippedNames = *~ #* .* bin CVS Cache caughtspam
skippedNames = *~ #* .* bin CVS Cache caughtspam tmp
# Debug messages
loglevel = 4