allow specifying format and charset for ext filters. Cache and reuse filters
This commit is contained in:
parent
d05694fb82
commit
9082f3bf65
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.68 2008-07-29 06:25:29 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.69 2008-10-04 14:26:59 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -455,20 +455,17 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||||||
// The not so nice point was that the file name was not
|
// The not so nice point was that the file name was not
|
||||||
// indexed.
|
// indexed.
|
||||||
//
|
//
|
||||||
// We now index at least the file name. We use a dirty
|
// We now index at least the file name and the mod time.
|
||||||
// hack to ensure that the indexing will be retried each
|
// We change the signature to ensure that the indexing will
|
||||||
// time: the stored number as decimal ascii mtime is
|
// be retried every time. This can make indexing passes quite
|
||||||
// prefixed with a '+', which doesnt change its value for
|
// slower if there are many files of types with no helper
|
||||||
// atoll() but is tested by rcldb::needUpdate()
|
|
||||||
// Reset the date as set by the handler if any
|
|
||||||
doc.fmtime.erase();
|
doc.fmtime.erase();
|
||||||
// Go through:
|
// Go through:
|
||||||
}
|
}
|
||||||
|
|
||||||
if (doc.fmtime.empty()) {
|
if (doc.fmtime.empty()) {
|
||||||
// Set the date if this was not done in the document handler
|
// Set the date if this was not done in the document handler
|
||||||
doc.fmtime = (fis == FileInterner::FIError) ? plus + ascdate :
|
doc.fmtime = ascdate;
|
||||||
ascdate;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Internal access path for multi-document files
|
// Internal access path for multi-document files
|
||||||
@ -492,6 +489,14 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||||||
// need for reversible formatting
|
// need for reversible formatting
|
||||||
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->st_mtime);
|
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->st_mtime);
|
||||||
doc.sig = cbuf;
|
doc.sig = cbuf;
|
||||||
|
// If there was an error, ensure indexing will be
|
||||||
|
// retried. This is for the once missing, later installed
|
||||||
|
// filter case. It can make indexing much slower (if there are
|
||||||
|
// myriads of such files, the ext script is executed for them
|
||||||
|
// and fails every time)
|
||||||
|
if (fis == FileInterner::FIError) {
|
||||||
|
doc.sig += plus;
|
||||||
|
}
|
||||||
|
|
||||||
// Add document to database. If there is an ipath, add it as a children
|
// Add document to database. If there is an ipath, add it as a children
|
||||||
// of the file document.
|
// of the file document.
|
||||||
|
|||||||
@ -157,11 +157,13 @@ namespace Dijon
|
|||||||
* that the client application can pass the nested document's content
|
* that the client application can pass the nested document's content
|
||||||
* to another filter that supports this particular type.
|
* to another filter that supports this particular type.
|
||||||
*/
|
*/
|
||||||
const std::map<std::string, std::string> &get_meta_data(void) const
|
virtual const std::map<std::string, std::string> &get_meta_data(void) const
|
||||||
{
|
{
|
||||||
return m_metaData;
|
return m_metaData;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual void clear() {m_metaData.clear();}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
/// The MIME type handled by the filter.
|
/// The MIME type handled by the filter.
|
||||||
std::string m_mimeType;
|
std::string m_mimeType;
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.43 2008-10-03 06:23:23 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.44 2008-10-04 14:26:59 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -203,8 +203,9 @@ FileInterner::~FileInterner()
|
|||||||
{
|
{
|
||||||
tmpcleanup();
|
tmpcleanup();
|
||||||
for (vector<Dijon::Filter*>::iterator it = m_handlers.begin();
|
for (vector<Dijon::Filter*>::iterator it = m_handlers.begin();
|
||||||
it != m_handlers.end(); it++)
|
it != m_handlers.end(); it++) {
|
||||||
delete *it;
|
returnMimeHandler(*it);
|
||||||
|
}
|
||||||
// m_tempfiles will take care of itself
|
// m_tempfiles will take care of itself
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -283,8 +284,10 @@ static inline bool getKeyValue(const map<string, string>& docdata,
|
|||||||
it = docdata.find(key);
|
it = docdata.find(key);
|
||||||
if (it != docdata.end()) {
|
if (it != docdata.end()) {
|
||||||
value = it->second;
|
value = it->second;
|
||||||
|
LOGDEB2(("getKeyValue: [%s]->[%s]\n", key.c_str(), value.c_str()));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
LOGDEB2(("getKeyValue: no value for [%s]\n", key.c_str()));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -314,7 +317,7 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
|||||||
} else if (it->first == Rcl::Doc::keyoc) {
|
} else if (it->first == Rcl::Doc::keyoc) {
|
||||||
doc.origcharset = it->second;
|
doc.origcharset = it->second;
|
||||||
} else if (it->first == keymt || it->first == keycs) {
|
} else if (it->first == keymt || it->first == keycs) {
|
||||||
// don't need these.
|
// don't need/want these.
|
||||||
} else {
|
} else {
|
||||||
doc.meta[it->first] = it->second;
|
doc.meta[it->first] = it->second;
|
||||||
}
|
}
|
||||||
@ -338,7 +341,6 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc, string& ipath) const
|
|||||||
|
|
||||||
// If there is no ipath stack, the mimetype is the one from the file
|
// If there is no ipath stack, the mimetype is the one from the file
|
||||||
doc.mimetype = m_mimetype;
|
doc.mimetype = m_mimetype;
|
||||||
LOGDEB2(("INITIAL mimetype: %s\n", doc.mimetype.c_str()));
|
|
||||||
|
|
||||||
string ipathel;
|
string ipathel;
|
||||||
for (vector<Dijon::Filter*>::const_iterator hit = m_handlers.begin();
|
for (vector<Dijon::Filter*>::const_iterator hit = m_handlers.begin();
|
||||||
@ -382,7 +384,7 @@ void FileInterner::popHandler()
|
|||||||
m_tempfiles.pop_back();
|
m_tempfiles.pop_back();
|
||||||
m_tmpflgs[i] = false;
|
m_tmpflgs[i] = false;
|
||||||
}
|
}
|
||||||
delete m_handlers.back();
|
returnMimeHandler(m_handlers.back());
|
||||||
m_handlers.pop_back();
|
m_handlers.pop_back();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -430,8 +432,8 @@ int FileInterner::addHandler()
|
|||||||
m_forPreview ? "view" : "index");
|
m_forPreview ? "view" : "index");
|
||||||
newflt->set_property(Dijon::Filter::DEFAULT_CHARSET, charset);
|
newflt->set_property(Dijon::Filter::DEFAULT_CHARSET, charset);
|
||||||
|
|
||||||
// Get content: we don't use getkeyvalue() here to avoid copying
|
// Get current content: we don't use getkeyvalue() here to avoid
|
||||||
// the text, which may be big.
|
// copying the text, which may be big.
|
||||||
string ns;
|
string ns;
|
||||||
const string *txt = &ns;
|
const string *txt = &ns;
|
||||||
{
|
{
|
||||||
@ -469,9 +471,8 @@ int FileInterner::addHandler()
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Information and debug after a next_document error
|
// Information and debug after a next_document error
|
||||||
void FileInterner::processNextDocError()
|
void FileInterner::processNextDocError(Rcl::Doc &doc, string& ipath)
|
||||||
{
|
{
|
||||||
Rcl::Doc doc; string ipath;
|
|
||||||
collectIpathAndMT(doc, ipath);
|
collectIpathAndMT(doc, ipath);
|
||||||
m_reason = m_handlers.back()->get_error();
|
m_reason = m_handlers.back()->get_error();
|
||||||
checkExternalMissing(m_reason);
|
checkExternalMissing(m_reason);
|
||||||
@ -530,7 +531,7 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
|||||||
// might be ie an error while decoding an attachment, but we
|
// might be ie an error while decoding an attachment, but we
|
||||||
// still want to process the rest of the mbox! For preview: fatal.
|
// still want to process the rest of the mbox! For preview: fatal.
|
||||||
if (!m_handlers.back()->next_document()) {
|
if (!m_handlers.back()->next_document()) {
|
||||||
processNextDocError(); // Debug etc.
|
processNextDocError(doc, ipath);
|
||||||
if (m_forPreview)
|
if (m_forPreview)
|
||||||
return FIError;
|
return FIError;
|
||||||
popHandler();
|
popHandler();
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _INTERNFILE_H_INCLUDED_
|
#ifndef _INTERNFILE_H_INCLUDED_
|
||||||
#define _INTERNFILE_H_INCLUDED_
|
#define _INTERNFILE_H_INCLUDED_
|
||||||
/* @(#$Id: internfile.h,v 1.19 2008-10-03 06:23:23 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: internfile.h,v 1.20 2008-10-04 14:26:59 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@ -144,7 +144,7 @@ class FileInterner {
|
|||||||
void popHandler();
|
void popHandler();
|
||||||
int addHandler();
|
int addHandler();
|
||||||
void checkExternalMissing(const string& msg);
|
void checkExternalMissing(const string& msg);
|
||||||
void processNextDocError();
|
void processNextDocError(Rcl::Doc &doc, string& ipath);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: mh_exec.cpp,v 1.11 2008-10-02 13:30:32 dockes Exp $ (C) 2005 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: mh_exec.cpp,v 1.12 2008-10-04 14:26:59 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -35,8 +35,8 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Execute an external program to translate a file from its native format
|
// Execute an external program to translate a file from its native
|
||||||
// to html. Then call the html parser to do the actual indexing
|
// format to text or html.
|
||||||
bool MimeHandlerExec::next_document()
|
bool MimeHandlerExec::next_document()
|
||||||
{
|
{
|
||||||
if (m_havedoc == false)
|
if (m_havedoc == false)
|
||||||
@ -59,29 +59,28 @@ bool MimeHandlerExec::next_document()
|
|||||||
if (!m_ipath.empty())
|
if (!m_ipath.empty())
|
||||||
myparams.push_back(m_ipath);
|
myparams.push_back(m_ipath);
|
||||||
|
|
||||||
// Execute command and store the result text, which is supposedly html
|
// Execute command and store the result text
|
||||||
string& html = m_metaData["content"];
|
string& output = m_metaData["content"];
|
||||||
html.erase();
|
output.erase();
|
||||||
ExecCmd mexec;
|
ExecCmd mexec;
|
||||||
MEAdv adv;
|
MEAdv adv;
|
||||||
mexec.setAdvise(&adv);
|
mexec.setAdvise(&adv);
|
||||||
mexec.putenv(m_forPreview ? "RECOLL_FILTER_FORPREVIEW=yes" :
|
mexec.putenv(m_forPreview ? "RECOLL_FILTER_FORPREVIEW=yes" :
|
||||||
"RECOLL_FILTER_FORPREVIEW=no");
|
"RECOLL_FILTER_FORPREVIEW=no");
|
||||||
int status = mexec.doexec(cmd, myparams, 0, &html);
|
int status = mexec.doexec(cmd, myparams, 0, &output);
|
||||||
if (status) {
|
if (status) {
|
||||||
LOGERR(("MimeHandlerExec: command status 0x%x: %s\n",
|
LOGERR(("MimeHandlerExec: command status 0x%x: %s\n",
|
||||||
status, cmd.c_str()));
|
status, cmd.c_str()));
|
||||||
// If the output string begins with RECFILTERROR, then it's
|
// If the output string begins with RECFILTERROR, then it's
|
||||||
// interpretable error information
|
// interpretable error information
|
||||||
if (html.find("RECFILTERROR") == 0)
|
if (output.find("RECFILTERROR") == 0)
|
||||||
m_reason = html;
|
m_reason = output;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_metaData["origcharset"] = m_defcharset;
|
m_metaData["origcharset"] = m_defcharset;
|
||||||
// Default charset: all recoll filters output utf-8, but this
|
// Default charset: all recoll filters output utf-8, but this
|
||||||
// could still be overridden by the content-type meta tag.
|
// could still be overridden by the content-type meta tag.
|
||||||
m_metaData["charset"] = "utf-8";
|
m_metaData["charset"] = cfgCharset.empty() ? "utf-8" : cfgCharset;
|
||||||
m_metaData["mimetype"] = "text/html";
|
m_metaData["mimetype"] = cfgMtype.empty() ? "text/html" : cfgMtype;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _MH_EXEC_H_INCLUDED_
|
#ifndef _MH_EXEC_H_INCLUDED_
|
||||||
#define _MH_EXEC_H_INCLUDED_
|
#define _MH_EXEC_H_INCLUDED_
|
||||||
/* @(#$Id: mh_exec.h,v 1.6 2008-10-02 13:30:32 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: mh_exec.h,v 1.7 2008-10-04 14:26:59 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
#include <list>
|
||||||
@ -33,7 +33,16 @@ using std::string;
|
|||||||
*/
|
*/
|
||||||
class MimeHandlerExec : public RecollFilter {
|
class MimeHandlerExec : public RecollFilter {
|
||||||
public:
|
public:
|
||||||
|
// params, cfgMtype and chgCharset do not get reset by
|
||||||
|
// clear(). They define what I am
|
||||||
list<string> params;
|
list<string> params;
|
||||||
|
// The defaults for external filters is to output html except if defined
|
||||||
|
// otherwise in the config.
|
||||||
|
string cfgMtype;
|
||||||
|
// For ext programs which don't output html, the output charset
|
||||||
|
// has to be known: ie they have a --charset utf-8 like option.
|
||||||
|
string cfgCharset;
|
||||||
|
|
||||||
MimeHandlerExec(const string& mt) : RecollFilter(mt) {}
|
MimeHandlerExec(const string& mt) : RecollFilter(mt) {}
|
||||||
virtual ~MimeHandlerExec() {}
|
virtual ~MimeHandlerExec() {}
|
||||||
virtual bool set_document_file(const string &file_path) {
|
virtual bool set_document_file(const string &file_path) {
|
||||||
@ -46,6 +55,12 @@ class MimeHandlerExec : public RecollFilter {
|
|||||||
m_ipath = ipath;
|
m_ipath = ipath;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
virtual void clear() {
|
||||||
|
m_fn.erase();
|
||||||
|
m_ipath.erase();
|
||||||
|
RecollFilter::clear();
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
string m_fn;
|
string m_fn;
|
||||||
string m_ipath;
|
string m_ipath;
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _HTML_H_INCLUDED_
|
#ifndef _HTML_H_INCLUDED_
|
||||||
#define _HTML_H_INCLUDED_
|
#define _HTML_H_INCLUDED_
|
||||||
/* @(#$Id: mh_html.h,v 1.11 2008-10-03 06:17:46 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: mh_html.h,v 1.12 2008-10-04 14:26:59 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
@ -41,7 +41,11 @@ class MimeHandlerHtml : public RecollFilter {
|
|||||||
{
|
{
|
||||||
return m_html;
|
return m_html;
|
||||||
}
|
}
|
||||||
|
virtual void clear() {
|
||||||
|
m_filename.erase();
|
||||||
|
m_html.erase();
|
||||||
|
RecollFilter::clear();
|
||||||
|
}
|
||||||
private:
|
private:
|
||||||
string m_filename;
|
string m_filename;
|
||||||
string m_html;
|
string m_html;
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.34 2008-09-16 08:13:45 dockes Exp $ (C) 2005 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.35 2008-10-04 14:26:59 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -54,14 +54,24 @@ static const string cstr_title = "title";
|
|||||||
|
|
||||||
MimeHandlerMail::~MimeHandlerMail()
|
MimeHandlerMail::~MimeHandlerMail()
|
||||||
{
|
{
|
||||||
delete m_bincdoc;
|
clear();
|
||||||
if (m_fd >= 0)
|
}
|
||||||
|
void MimeHandlerMail::clear()
|
||||||
|
{
|
||||||
|
delete m_bincdoc; m_bincdoc = 0;
|
||||||
|
if (m_fd >= 0) {
|
||||||
close(m_fd);
|
close(m_fd);
|
||||||
delete m_stream;
|
m_fd = -1;
|
||||||
|
}
|
||||||
|
delete m_stream; m_stream = 0;
|
||||||
|
m_idx = -1;
|
||||||
|
m_subject.erase();
|
||||||
for (vector<MHMailAttach*>::iterator it = m_attachments.begin();
|
for (vector<MHMailAttach*>::iterator it = m_attachments.begin();
|
||||||
it != m_attachments.end(); it++) {
|
it != m_attachments.end(); it++) {
|
||||||
delete *it;
|
delete *it;
|
||||||
}
|
}
|
||||||
|
m_attachments.clear();
|
||||||
|
RecollFilter::clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MimeHandlerMail::set_document_file(const string &fn)
|
bool MimeHandlerMail::set_document_file(const string &fn)
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _MAIL_H_INCLUDED_
|
#ifndef _MAIL_H_INCLUDED_
|
||||||
#define _MAIL_H_INCLUDED_
|
#define _MAIL_H_INCLUDED_
|
||||||
/* @(#$Id: mh_mail.h,v 1.12 2007-10-17 11:40:35 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: mh_mail.h,v 1.13 2008-10-04 14:26:59 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@ -51,6 +51,7 @@ class MimeHandlerMail : public RecollFilter {
|
|||||||
}
|
}
|
||||||
virtual bool next_document();
|
virtual bool next_document();
|
||||||
virtual bool skip_to_document(const string& ipath);
|
virtual bool skip_to_document(const string& ipath);
|
||||||
|
virtual void clear();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool processMsg(Binc::MimePart *doc, int depth);
|
bool processMsg(Binc::MimePart *doc, int depth);
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: mh_mbox.cpp,v 1.4 2008-08-29 13:05:12 dockes Exp $ (C) 2005 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: mh_mbox.cpp,v 1.5 2008-10-04 14:26:59 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -39,10 +39,19 @@ using namespace std;
|
|||||||
|
|
||||||
MimeHandlerMbox::~MimeHandlerMbox()
|
MimeHandlerMbox::~MimeHandlerMbox()
|
||||||
{
|
{
|
||||||
|
clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void MimeHandlerMbox::clear()
|
||||||
|
{
|
||||||
|
m_fn.erase();
|
||||||
if (m_vfp) {
|
if (m_vfp) {
|
||||||
fclose((FILE *)m_vfp);
|
fclose((FILE *)m_vfp);
|
||||||
m_vfp = 0;
|
m_vfp = 0;
|
||||||
}
|
}
|
||||||
|
m_msgnum = m_lineno = 0;
|
||||||
|
m_ipath.erase();
|
||||||
|
RecollFilter::clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MimeHandlerMbox::set_document_file(const string &fn)
|
bool MimeHandlerMbox::set_document_file(const string &fn)
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _MBOX_H_INCLUDED_
|
#ifndef _MBOX_H_INCLUDED_
|
||||||
#define _MBOX_H_INCLUDED_
|
#define _MBOX_H_INCLUDED_
|
||||||
/* @(#$Id: mh_mbox.h,v 1.2 2007-10-03 14:53:37 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: mh_mbox.h,v 1.3 2008-10-04 14:26:59 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
using std::string;
|
using std::string;
|
||||||
@ -40,7 +40,7 @@ class MimeHandlerMbox : public RecollFilter {
|
|||||||
m_ipath = ipath;
|
m_ipath = ipath;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
virtual void clear();
|
||||||
private:
|
private:
|
||||||
string m_fn; // File name
|
string m_fn; // File name
|
||||||
void *m_vfp; // File pointer for folder
|
void *m_vfp; // File pointer for folder
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _MH_TEXT_H_INCLUDED_
|
#ifndef _MH_TEXT_H_INCLUDED_
|
||||||
#define _MH_TEXT_H_INCLUDED_
|
#define _MH_TEXT_H_INCLUDED_
|
||||||
/* @(#$Id: mh_text.h,v 1.4 2006-12-16 15:39:54 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: mh_text.h,v 1.5 2008-10-04 14:26:59 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
using std::string;
|
using std::string;
|
||||||
@ -40,6 +40,11 @@ class MimeHandlerText : public RecollFilter {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
virtual bool next_document();
|
virtual bool next_document();
|
||||||
|
virtual void clear()
|
||||||
|
{
|
||||||
|
m_text.erase();
|
||||||
|
RecollFilter::clear();
|
||||||
|
}
|
||||||
private:
|
private:
|
||||||
string m_text;
|
string m_text;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _MH_UNKNOWN_H_INCLUDED_
|
#ifndef _MH_UNKNOWN_H_INCLUDED_
|
||||||
#define _MH_UNKNOWN_H_INCLUDED_
|
#define _MH_UNKNOWN_H_INCLUDED_
|
||||||
/* @(#$Id: mh_unknown.h,v 1.2 2006-12-15 12:40:02 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: mh_unknown.h,v 1.3 2008-10-04 14:26:59 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
@ -44,6 +44,9 @@ class MimeHandlerUnknown : public RecollFilter {
|
|||||||
m_metaData["mimetype"] = "text/plain";
|
m_metaData["mimetype"] = "text/plain";
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
virtual void clear() {
|
||||||
|
RecollFilter::clear();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _MH_UNKNOWN_H_INCLUDED_ */
|
#endif /* _MH_UNKNOWN_H_INCLUDED_ */
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.22 2007-11-16 14:28:52 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.23 2008-10-04 14:26:59 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -35,6 +35,9 @@ using namespace std;
|
|||||||
#include "mh_text.h"
|
#include "mh_text.h"
|
||||||
#include "mh_unknown.h"
|
#include "mh_unknown.h"
|
||||||
|
|
||||||
|
// Pool of already known and created handlers
|
||||||
|
static map<string, Dijon::Filter*> o_handlers;
|
||||||
|
|
||||||
/** Create internal handler object appropriate for given mime type */
|
/** Create internal handler object appropriate for given mime type */
|
||||||
static Dijon::Filter *mhFactory(const string &mime)
|
static Dijon::Filter *mhFactory(const string &mime)
|
||||||
{
|
{
|
||||||
@ -52,16 +55,103 @@ static Dijon::Filter *mhFactory(const string &mime)
|
|||||||
return new MimeHandlerUnknown(lmime);
|
return new MimeHandlerUnknown(lmime);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/**
|
||||||
* Return handler object for given mime type:
|
* Create a filter that executes an external program or script
|
||||||
|
* A filter def can look like.
|
||||||
|
* exec someprog -v -t " h i j";charset= xx; mimetype=yy
|
||||||
|
* We don't support ';' inside a quoted string for now. Can't see a use
|
||||||
|
* for it
|
||||||
*/
|
*/
|
||||||
|
MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs)
|
||||||
|
{
|
||||||
|
list<string>semicolist;
|
||||||
|
stringToTokens(hs, semicolist, ";");
|
||||||
|
if (hs.size() < 1) {
|
||||||
|
LOGERR(("mhExecFactory: bad filter def: [%s]\n", hs.c_str()));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
string& cmd = *(semicolist.begin());
|
||||||
|
|
||||||
|
list<string> toks;
|
||||||
|
stringToStrings(cmd, toks);
|
||||||
|
if (toks.size() < 2) {
|
||||||
|
LOGERR(("mhExecFactory: bad config line for [%s]: [%s]\n",
|
||||||
|
mtype.c_str(), hs.c_str()));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
MimeHandlerExec *h = new MimeHandlerExec(mtype.c_str());
|
||||||
|
|
||||||
|
list<string>::iterator it;
|
||||||
|
|
||||||
|
// toks size is at least 2, this has been checked by caller.
|
||||||
|
it = toks.begin();
|
||||||
|
it++;
|
||||||
|
h->params.push_back(cfg->findFilter(*it++));
|
||||||
|
h->params.insert(h->params.end(), it, toks.end());
|
||||||
|
|
||||||
|
// Handle additional parameters
|
||||||
|
it = semicolist.begin();
|
||||||
|
it++;
|
||||||
|
for (;it != semicolist.end(); it++) {
|
||||||
|
string &line = *it;
|
||||||
|
string::size_type eqpos = line.find("=");
|
||||||
|
if (eqpos == string::npos)
|
||||||
|
continue;
|
||||||
|
// Compute name and value, trim white space
|
||||||
|
string nm, val;
|
||||||
|
nm = line.substr(0, eqpos);
|
||||||
|
trimstring(nm);
|
||||||
|
val = line.substr(eqpos+1, string::npos);
|
||||||
|
trimstring(val);
|
||||||
|
if (!nm.compare("charset")) {
|
||||||
|
h->cfgCharset = val;
|
||||||
|
} else if (!nm.compare("mimetype")) {
|
||||||
|
h->cfgMtype = val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
string sparams;
|
||||||
|
for (it = h->params.begin(); it != h->params.end(); it++) {
|
||||||
|
sparams += string("[") + *it + "] ";
|
||||||
|
}
|
||||||
|
LOGDEB(("mhExecFactory:mt [%s] cfgmt [%s] cfgcs [%s] params: [%s]\n",
|
||||||
|
mtype.c_str(), h->cfgMtype.c_str(), h->cfgCharset.c_str(),
|
||||||
|
sparams.c_str()));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Return mime handler to pool */
|
||||||
|
void returnMimeHandler(Dijon::Filter *handler)
|
||||||
|
{
|
||||||
|
if (handler) {
|
||||||
|
handler->clear();
|
||||||
|
o_handlers[handler->get_mime_type()] = handler;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get handler/filter object for given mime type: */
|
||||||
Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
||||||
bool filtertypes)
|
bool filtertypes)
|
||||||
{
|
{
|
||||||
|
if (mtype.empty())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Do we already have one ?
|
||||||
|
map<string, Dijon::Filter *>::iterator it = o_handlers.find(mtype);
|
||||||
|
if (it != o_handlers.end()) {
|
||||||
|
Dijon::Filter *h = it->second;
|
||||||
|
o_handlers.erase(it);
|
||||||
|
LOGDEB2(("getMimeHandler: found in cache\n"));
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
// Get handler definition for mime type
|
// Get handler definition for mime type
|
||||||
string hs;
|
string hs;
|
||||||
if (!mtype.empty())
|
hs = cfg->getMimeHandlerDef(mtype, filtertypes);
|
||||||
hs = cfg->getMimeHandlerDef(mtype, filtertypes);
|
|
||||||
|
|
||||||
if (!hs.empty()) {
|
if (!hs.empty()) {
|
||||||
// Break definition into type and name
|
// Break definition into type and name
|
||||||
@ -84,11 +174,7 @@ Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
|||||||
mtype.c_str(), hs.c_str()));
|
mtype.c_str(), hs.c_str()));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
MimeHandlerExec *h = new MimeHandlerExec(mtype.c_str());
|
return mhExecFactory(cfg, mtype, hs);
|
||||||
it++;
|
|
||||||
h->params.push_back(cfg->findFilter(*it++));
|
|
||||||
h->params.insert(h->params.end(), it, toks.end());
|
|
||||||
return h;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _MIMEHANDLER_H_INCLUDED_
|
#ifndef _MIMEHANDLER_H_INCLUDED_
|
||||||
#define _MIMEHANDLER_H_INCLUDED_
|
#define _MIMEHANDLER_H_INCLUDED_
|
||||||
/* @(#$Id: mimehandler.h,v 1.15 2007-11-16 14:28:52 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: mimehandler.h,v 1.16 2008-10-04 14:26:59 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
#include <list>
|
||||||
@ -76,6 +76,11 @@ public:
|
|||||||
return m_reason;
|
return m_reason;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual void clear() {
|
||||||
|
m_forPreview = m_havedoc = false;
|
||||||
|
Dijon::Filter::clear();
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
bool m_forPreview;
|
bool m_forPreview;
|
||||||
string m_defcharset;
|
string m_defcharset;
|
||||||
@ -92,9 +97,11 @@ protected:
|
|||||||
* indexedmimetypes (if this is set at all).
|
* indexedmimetypes (if this is set at all).
|
||||||
*/
|
*/
|
||||||
extern Dijon::Filter *getMimeHandler(const std::string &mtyp, RclConfig *cfg,
|
extern Dijon::Filter *getMimeHandler(const std::string &mtyp, RclConfig *cfg,
|
||||||
|
|
||||||
bool filtertypes=false);
|
bool filtertypes=false);
|
||||||
|
/// Free up filter for reuse (you can also delete it)
|
||||||
|
extern void returnMimeHandler(Dijon::Filter *);
|
||||||
|
|
||||||
/// Can this mime type be interned ?
|
/// Can this mime type be interned ?
|
||||||
extern bool canIntern(const std::string mimetype, RclConfig *cfg);
|
extern bool canIntern(const std::string mimetype, RclConfig *cfg);
|
||||||
|
|
||||||
#endif /* _MIMEHANDLER_H_INCLUDED_ */
|
#endif /* _MIMEHANDLER_H_INCLUDED_ */
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user