changed the mime handler cache key (was the mime type), to avoid having multiple copies of the same filter when applied to different mime types. This reduces a lot the number of processes during indexing, with no impact on performance
This commit is contained in:
parent
62ca9549a3
commit
a7728ceb91
@ -52,9 +52,10 @@ namespace Dijon
|
|||||||
class Filter
|
class Filter
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/// Builds an empty filter.
|
|
||||||
Filter(const std::string &mime_type) : m_mimeType(mime_type) {}
|
|
||||||
/// Destroys the filter.
|
/// Destroys the filter.
|
||||||
|
Filter()
|
||||||
|
{
|
||||||
|
}
|
||||||
virtual ~Filter() {}
|
virtual ~Filter() {}
|
||||||
virtual void setConfig(RclConfig *) = 0;
|
virtual void setConfig(RclConfig *) = 0;
|
||||||
|
|
||||||
@ -63,7 +64,8 @@ namespace Dijon
|
|||||||
/** What data a filter supports as input.
|
/** What data a filter supports as input.
|
||||||
* It can be either the whole document data, its file name, or its URI.
|
* It can be either the whole document data, its file name, or its URI.
|
||||||
*/
|
*/
|
||||||
typedef enum { DOCUMENT_DATA=0, DOCUMENT_STRING, DOCUMENT_FILE_NAME, DOCUMENT_URI } DataInput;
|
typedef enum { DOCUMENT_DATA=0, DOCUMENT_STRING, DOCUMENT_FILE_NAME,
|
||||||
|
DOCUMENT_URI } DataInput;
|
||||||
|
|
||||||
/** Input properties supported by the filter.
|
/** Input properties supported by the filter.
|
||||||
*
|
*
|
||||||
@ -94,7 +96,8 @@ namespace Dijon
|
|||||||
/** Sets a property, prior to calling set_document_XXX().
|
/** Sets a property, prior to calling set_document_XXX().
|
||||||
* Returns false if the property is not supported.
|
* Returns false if the property is not supported.
|
||||||
*/
|
*/
|
||||||
virtual bool set_property(Properties prop_name, const std::string &prop_value) = 0;
|
virtual bool set_property(Properties prop_name,
|
||||||
|
const std::string &prop_value) = 0;
|
||||||
|
|
||||||
/** (Re)initializes the filter with the given data.
|
/** (Re)initializes the filter with the given data.
|
||||||
* Caller should ensure the given pointer is valid until the
|
* Caller should ensure the given pointer is valid until the
|
||||||
@ -103,25 +106,30 @@ namespace Dijon
|
|||||||
* Call next_document() to position the filter onto the first document.
|
* Call next_document() to position the filter onto the first document.
|
||||||
* Returns false if this input is not supported or an error occured.
|
* Returns false if this input is not supported or an error occured.
|
||||||
*/
|
*/
|
||||||
virtual bool set_document_data(const char *data_ptr, unsigned int data_length) = 0;
|
virtual bool set_document_data(const std::string& mtype,
|
||||||
|
const char *data_ptr,
|
||||||
|
unsigned int data_length) = 0;
|
||||||
|
|
||||||
/** (Re)initializes the filter with the given data.
|
/** (Re)initializes the filter with the given data.
|
||||||
* Call next_document() to position the filter onto the first document.
|
* Call next_document() to position the filter onto the first document.
|
||||||
* Returns false if this input is not supported or an error occured.
|
* Returns false if this input is not supported or an error occured.
|
||||||
*/
|
*/
|
||||||
virtual bool set_document_string(const std::string &data_str) = 0;
|
virtual bool set_document_string(const std::string& mtype,
|
||||||
|
const std::string &data_str) = 0;
|
||||||
|
|
||||||
/** (Re)initializes the filter with the given file.
|
/** (Re)initializes the filter with the given file.
|
||||||
* Call next_document() to position the filter onto the first document.
|
* Call next_document() to position the filter onto the first document.
|
||||||
* Returns false if this input is not supported or an error occured.
|
* Returns false if this input is not supported or an error occured.
|
||||||
*/
|
*/
|
||||||
virtual bool set_document_file(const std::string &file_path) = 0;
|
virtual bool set_document_file(const std::string& mtype,
|
||||||
|
const std::string &file_path) = 0;
|
||||||
|
|
||||||
/** (Re)initializes the filter with the given URI.
|
/** (Re)initializes the filter with the given URI.
|
||||||
* Call next_document() to position the filter onto the first document.
|
* Call next_document() to position the filter onto the first document.
|
||||||
* Returns false if this input is not supported or an error occured.
|
* Returns false if this input is not supported or an error occured.
|
||||||
*/
|
*/
|
||||||
virtual bool set_document_uri(const std::string &uri) = 0;
|
virtual bool set_document_uri(const std::string& mtype,
|
||||||
|
const std::string &uri) = 0;
|
||||||
|
|
||||||
/** Set the document size meta_data element. This is the size
|
/** Set the document size meta_data element. This is the size
|
||||||
of the immediate containing file (ie, a .doc, a .odt), not
|
of the immediate containing file (ie, a .doc, a .odt), not
|
||||||
|
|||||||
@ -263,7 +263,7 @@ void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf,
|
|||||||
|
|
||||||
// Look for appropriate handler (might still return empty)
|
// Look for appropriate handler (might still return empty)
|
||||||
m_mimetype = l_mime;
|
m_mimetype = l_mime;
|
||||||
Dijon::Filter *df = getMimeHandler(l_mime, m_cfg, !m_forPreview);
|
RecollFilter *df = getMimeHandler(l_mime, m_cfg, !m_forPreview);
|
||||||
|
|
||||||
if (!df or df->is_unknown()) {
|
if (!df or df->is_unknown()) {
|
||||||
// No real handler for this type, for now :(
|
// No real handler for this type, for now :(
|
||||||
@ -284,7 +284,7 @@ void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf,
|
|||||||
#endif //RCL_USE_XATTR
|
#endif //RCL_USE_XATTR
|
||||||
|
|
||||||
df->set_docsize(docsize);
|
df->set_docsize(docsize);
|
||||||
if (!df->set_document_file(m_fn)) {
|
if (!df->set_document_file(l_mime, m_fn)) {
|
||||||
delete df;
|
delete df;
|
||||||
LOGERR(("FileInterner:: error converting %s\n", m_fn.c_str()));
|
LOGERR(("FileInterner:: error converting %s\n", m_fn.c_str()));
|
||||||
return;
|
return;
|
||||||
@ -315,7 +315,7 @@ void FileInterner::init(const string &data, RclConfig *cnf,
|
|||||||
m_mimetype = imime;
|
m_mimetype = imime;
|
||||||
|
|
||||||
// Look for appropriate handler (might still return empty)
|
// Look for appropriate handler (might still return empty)
|
||||||
Dijon::Filter *df = getMimeHandler(m_mimetype, m_cfg, !m_forPreview);
|
RecollFilter *df = getMimeHandler(m_mimetype, m_cfg, !m_forPreview);
|
||||||
|
|
||||||
if (!df) {
|
if (!df) {
|
||||||
// No handler for this type, for now :( if indexallfilenames
|
// No handler for this type, for now :( if indexallfilenames
|
||||||
@ -329,13 +329,13 @@ void FileInterner::init(const string &data, RclConfig *cnf,
|
|||||||
bool result = false;
|
bool result = false;
|
||||||
df->set_docsize(data.length());
|
df->set_docsize(data.length());
|
||||||
if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) {
|
if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) {
|
||||||
result = df->set_document_string(data);
|
result = df->set_document_string(m_mimetype, data);
|
||||||
} else if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_DATA)) {
|
} else if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_DATA)) {
|
||||||
result = df->set_document_data(data.c_str(), data.length());
|
result = df->set_document_data(m_mimetype, data.c_str(), data.length());
|
||||||
} else if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_FILE_NAME)) {
|
} else if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_FILE_NAME)) {
|
||||||
TempFile temp = dataToTempFile(data, m_mimetype);
|
TempFile temp = dataToTempFile(data, m_mimetype);
|
||||||
if (temp.isNotNull() &&
|
if (temp.isNotNull() &&
|
||||||
(result = df->set_document_file(temp->filename()))) {
|
(result = df->set_document_file(m_mimetype, temp->filename()))) {
|
||||||
m_tmpflgs[m_handlers.size()] = true;
|
m_tmpflgs[m_handlers.size()] = true;
|
||||||
m_tempfiles.push_back(temp);
|
m_tempfiles.push_back(temp);
|
||||||
}
|
}
|
||||||
@ -406,7 +406,7 @@ bool FileInterner::makesig(RclConfig *cnf, const Rcl::Doc& idoc, string& sig)
|
|||||||
|
|
||||||
FileInterner::~FileInterner()
|
FileInterner::~FileInterner()
|
||||||
{
|
{
|
||||||
for (vector<Dijon::Filter*>::iterator it = m_handlers.begin();
|
for (vector<RecollFilter*>::iterator it = m_handlers.begin();
|
||||||
it != m_handlers.end(); it++) {
|
it != m_handlers.end(); it++) {
|
||||||
returnMimeHandler(*it);
|
returnMimeHandler(*it);
|
||||||
}
|
}
|
||||||
@ -548,7 +548,7 @@ static inline bool getKeyValue(const map<string, string>& docdata,
|
|||||||
|
|
||||||
bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
||||||
{
|
{
|
||||||
Dijon::Filter *df = m_handlers.back();
|
RecollFilter *df = m_handlers.back();
|
||||||
if (df == 0) {
|
if (df == 0) {
|
||||||
//??
|
//??
|
||||||
LOGERR(("FileInterner::dijontorcl: null top handler ??\n"));
|
LOGERR(("FileInterner::dijontorcl: null top handler ??\n"));
|
||||||
@ -632,7 +632,7 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const
|
|||||||
doc.mimetype = m_mimetype;
|
doc.mimetype = m_mimetype;
|
||||||
|
|
||||||
string ipathel;
|
string ipathel;
|
||||||
for (vector<Dijon::Filter*>::const_iterator hit = m_handlers.begin();
|
for (vector<RecollFilter*>::const_iterator hit = m_handlers.begin();
|
||||||
hit != m_handlers.end(); hit++) {
|
hit != m_handlers.end(); hit++) {
|
||||||
const map<string, string>& docdata = (*hit)->get_meta_data();
|
const map<string, string>& docdata = (*hit)->get_meta_data();
|
||||||
if (getKeyValue(docdata, cstr_dj_keyipath, ipathel)) {
|
if (getKeyValue(docdata, cstr_dj_keyipath, ipathel)) {
|
||||||
@ -714,7 +714,7 @@ int FileInterner::addHandler()
|
|||||||
return ADD_CONTINUE;
|
return ADD_CONTINUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
Dijon::Filter *newflt = getMimeHandler(mimetype, m_cfg);
|
RecollFilter *newflt = getMimeHandler(mimetype, m_cfg);
|
||||||
if (!newflt) {
|
if (!newflt) {
|
||||||
// If we can't find a handler, this doc can't be handled
|
// If we can't find a handler, this doc can't be handled
|
||||||
// but there can be other ones so we go on
|
// but there can be other ones so we go on
|
||||||
@ -740,13 +740,13 @@ int FileInterner::addHandler()
|
|||||||
bool setres = false;
|
bool setres = false;
|
||||||
newflt->set_docsize(txt->length());
|
newflt->set_docsize(txt->length());
|
||||||
if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) {
|
if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) {
|
||||||
setres = newflt->set_document_string(*txt);
|
setres = newflt->set_document_string(mimetype, *txt);
|
||||||
} else if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_DATA)) {
|
} else if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_DATA)) {
|
||||||
setres = newflt->set_document_data(txt->c_str(), txt->length());
|
setres = newflt->set_document_data(mimetype,txt->c_str(),txt->length());
|
||||||
} else if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_FILE_NAME)) {
|
} else if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_FILE_NAME)) {
|
||||||
TempFile temp = dataToTempFile(*txt, mimetype);
|
TempFile temp = dataToTempFile(*txt, mimetype);
|
||||||
if (temp.isNotNull() &&
|
if (temp.isNotNull() &&
|
||||||
(setres = newflt->set_document_file(temp->filename()))) {
|
(setres = newflt->set_document_file(mimetype, temp->filename()))) {
|
||||||
m_tmpflgs[m_handlers.size()] = true;
|
m_tmpflgs[m_handlers.size()] = true;
|
||||||
m_tempfiles.push_back(temp);
|
m_tempfiles.push_back(temp);
|
||||||
// Hack here, but really helps perfs: if we happen to
|
// Hack here, but really helps perfs: if we happen to
|
||||||
|
|||||||
@ -28,7 +28,7 @@ using std::vector;
|
|||||||
using std::map;
|
using std::map;
|
||||||
using std::set;
|
using std::set;
|
||||||
|
|
||||||
#include "Filter.h"
|
#include "mimehandler.h"
|
||||||
#include "uncomp.h"
|
#include "uncomp.h"
|
||||||
#include "pathut.h"
|
#include "pathut.h"
|
||||||
|
|
||||||
@ -262,7 +262,7 @@ class FileInterner {
|
|||||||
|
|
||||||
// Filter stack, path to the current document from which we're
|
// Filter stack, path to the current document from which we're
|
||||||
// fetching subdocs
|
// fetching subdocs
|
||||||
vector<Dijon::Filter*> m_handlers;
|
vector<RecollFilter*> m_handlers;
|
||||||
// Temporary files used for decoding the current stack
|
// Temporary files used for decoding the current stack
|
||||||
bool m_tmpflgs[MAXHANDLERS];
|
bool m_tmpflgs[MAXHANDLERS];
|
||||||
vector<TempFile> m_tempfiles;
|
vector<TempFile> m_tempfiles;
|
||||||
|
|||||||
@ -14,6 +14,14 @@
|
|||||||
* Free Software Foundation, Inc.,
|
* Free Software Foundation, Inc.,
|
||||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
*/
|
*/
|
||||||
|
#include "autoconfig.h"
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/wait.h>
|
||||||
|
|
||||||
|
#include <list>
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
#include "cstr.h"
|
#include "cstr.h"
|
||||||
#include "execmd.h"
|
#include "execmd.h"
|
||||||
#include "mh_exec.h"
|
#include "mh_exec.h"
|
||||||
@ -24,13 +32,6 @@
|
|||||||
#include "md5.h"
|
#include "md5.h"
|
||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <sys/wait.h>
|
|
||||||
|
|
||||||
#ifndef NO_NAMESPACES
|
|
||||||
using namespace std;
|
|
||||||
#endif /* NO_NAMESPACES */
|
|
||||||
|
|
||||||
// This is called periodically by ExeCmd when it is waiting for data,
|
// This is called periodically by ExeCmd when it is waiting for data,
|
||||||
// or when it does receive some. We may choose to interrupt the
|
// or when it does receive some. We may choose to interrupt the
|
||||||
// command.
|
// command.
|
||||||
|
|||||||
@ -56,11 +56,11 @@ class MimeHandlerExec : public RecollFilter {
|
|||||||
bool missingHelper;
|
bool missingHelper;
|
||||||
////////////////
|
////////////////
|
||||||
|
|
||||||
MimeHandlerExec(RclConfig *cnf, const string& mt)
|
MimeHandlerExec(RclConfig *cnf, const string& id)
|
||||||
: RecollFilter(cnf, mt), missingHelper(false)
|
: RecollFilter(cnf, id), missingHelper(false)
|
||||||
{}
|
{}
|
||||||
virtual bool set_document_file(const string &file_path) {
|
virtual bool set_document_file(const string& mt, const string &file_path) {
|
||||||
RecollFilter::set_document_file(file_path);
|
RecollFilter::set_document_file(mt, file_path);
|
||||||
m_fn = file_path;
|
m_fn = file_path;
|
||||||
m_havedoc = true;
|
m_havedoc = true;
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@ -102,14 +102,14 @@ class MimeHandlerExecMultiple : public MimeHandlerExec {
|
|||||||
/////// End un-cleared stuff.
|
/////// End un-cleared stuff.
|
||||||
|
|
||||||
public:
|
public:
|
||||||
MimeHandlerExecMultiple(RclConfig *cnf, const string& mt)
|
MimeHandlerExecMultiple(RclConfig *cnf, const string& id)
|
||||||
: MimeHandlerExec(cnf, mt)
|
: MimeHandlerExec(cnf, id)
|
||||||
{}
|
{}
|
||||||
// No resources to clean up, the ExecCmd destructor does it.
|
// No resources to clean up, the ExecCmd destructor does it.
|
||||||
virtual ~MimeHandlerExecMultiple() {}
|
virtual ~MimeHandlerExecMultiple() {}
|
||||||
virtual bool set_document_file(const string &file_path) {
|
virtual bool set_document_file(const string& mt, const string &file_path) {
|
||||||
m_filefirst = true;
|
m_filefirst = true;
|
||||||
return MimeHandlerExec::set_document_file(file_path);
|
return MimeHandlerExec::set_document_file(mt, file_path);
|
||||||
}
|
}
|
||||||
virtual bool next_document();
|
virtual bool next_document();
|
||||||
|
|
||||||
|
|||||||
@ -34,21 +34,23 @@ using namespace std;
|
|||||||
#endif /* NO_NAMESPACES */
|
#endif /* NO_NAMESPACES */
|
||||||
|
|
||||||
|
|
||||||
bool MimeHandlerHtml::set_document_file(const string &fn)
|
bool MimeHandlerHtml::set_document_file(const string& mt, const string &fn)
|
||||||
{
|
{
|
||||||
LOGDEB0(("textHtmlToDoc: %s\n", fn.c_str()));
|
LOGDEB0(("textHtmlToDoc: %s\n", fn.c_str()));
|
||||||
RecollFilter::set_document_file(fn);
|
RecollFilter::set_document_file(mt, fn);
|
||||||
string otext;
|
string otext;
|
||||||
if (!file_to_string(fn, otext)) {
|
if (!file_to_string(fn, otext)) {
|
||||||
LOGINFO(("textHtmlToDoc: cant read: %s\n", fn.c_str()));
|
LOGINFO(("textHtmlToDoc: cant read: %s\n", fn.c_str()));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
m_filename = fn;
|
m_filename = fn;
|
||||||
return set_document_string(otext);
|
return set_document_string(mt, otext);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MimeHandlerHtml::set_document_string(const string& htext)
|
bool MimeHandlerHtml::set_document_string(const string& mt,
|
||||||
|
const string& htext)
|
||||||
{
|
{
|
||||||
|
RecollFilter::set_document_string(mt, htext);
|
||||||
m_html = htext;
|
m_html = htext;
|
||||||
m_havedoc = true;
|
m_havedoc = true;
|
||||||
|
|
||||||
|
|||||||
@ -26,11 +26,15 @@
|
|||||||
*/
|
*/
|
||||||
class MimeHandlerHtml : public RecollFilter {
|
class MimeHandlerHtml : public RecollFilter {
|
||||||
public:
|
public:
|
||||||
MimeHandlerHtml(RclConfig *cnf, const string& mt)
|
MimeHandlerHtml(RclConfig *cnf, const string& id)
|
||||||
: RecollFilter(cnf, mt) {}
|
: RecollFilter(cnf, id)
|
||||||
virtual ~MimeHandlerHtml() {}
|
{
|
||||||
virtual bool set_document_file(const string &file_path);
|
}
|
||||||
virtual bool set_document_string(const string &data);
|
virtual ~MimeHandlerHtml()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
virtual bool set_document_file(const string& mt, const string &file_path);
|
||||||
|
virtual bool set_document_string(const string& mt, const string &data);
|
||||||
virtual bool is_data_input_ok(DataInput input) const {
|
virtual bool is_data_input_ok(DataInput input) const {
|
||||||
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@ -46,8 +46,8 @@ using namespace std;
|
|||||||
static const int maxdepth = 20;
|
static const int maxdepth = 20;
|
||||||
static const string cstr_mail_charset("charset");
|
static const string cstr_mail_charset("charset");
|
||||||
|
|
||||||
MimeHandlerMail::MimeHandlerMail(RclConfig *cnf, const string &mt)
|
MimeHandlerMail::MimeHandlerMail(RclConfig *cnf, const string &id)
|
||||||
: RecollFilter(cnf, mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
|
: RecollFilter(cnf, id), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
|
||||||
{
|
{
|
||||||
|
|
||||||
// Look for additional headers to be processed as per config:
|
// Look for additional headers to be processed as per config:
|
||||||
@ -85,10 +85,10 @@ void MimeHandlerMail::clear()
|
|||||||
RecollFilter::clear();
|
RecollFilter::clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MimeHandlerMail::set_document_file(const string &fn)
|
bool MimeHandlerMail::set_document_file(const string& mt, const string &fn)
|
||||||
{
|
{
|
||||||
LOGDEB(("MimeHandlerMail::set_document_file(%s)\n", fn.c_str()));
|
LOGDEB(("MimeHandlerMail::set_document_file(%s)\n", fn.c_str()));
|
||||||
RecollFilter::set_document_file(fn);
|
RecollFilter::set_document_file(mt, fn);
|
||||||
if (m_fd >= 0) {
|
if (m_fd >= 0) {
|
||||||
close(m_fd);
|
close(m_fd);
|
||||||
m_fd = -1;
|
m_fd = -1;
|
||||||
@ -123,10 +123,12 @@ bool MimeHandlerMail::set_document_file(const string &fn)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MimeHandlerMail::set_document_string(const string &msgtxt)
|
bool MimeHandlerMail::set_document_string(const string& mt,
|
||||||
|
const string &msgtxt)
|
||||||
{
|
{
|
||||||
LOGDEB1(("MimeHandlerMail::set_document_string\n"));
|
LOGDEB1(("MimeHandlerMail::set_document_string\n"));
|
||||||
LOGDEB2(("Message text: [%s]\n", msgtxt.c_str()));
|
LOGDEB2(("Message text: [%s]\n", msgtxt.c_str()));
|
||||||
|
RecollFilter::set_document_string(mt, msgtxt);
|
||||||
delete m_stream;
|
delete m_stream;
|
||||||
|
|
||||||
if (!m_forPreview) {
|
if (!m_forPreview) {
|
||||||
@ -614,11 +616,11 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
|||||||
|
|
||||||
// Handle html stripping and transcoding to utf8
|
// Handle html stripping and transcoding to utf8
|
||||||
if (!stringlowercmp("text/html", content_type.value)) {
|
if (!stringlowercmp("text/html", content_type.value)) {
|
||||||
MimeHandlerHtml mh(m_config, "text/html");
|
MimeHandlerHtml mh(m_config, "1234");
|
||||||
mh.set_property(Dijon::Filter::OPERATING_MODE,
|
mh.set_property(Dijon::Filter::OPERATING_MODE,
|
||||||
m_forPreview ? "view" : "index");
|
m_forPreview ? "view" : "index");
|
||||||
mh.set_property(Dijon::Filter::DEFAULT_CHARSET, charset);
|
mh.set_property(Dijon::Filter::DEFAULT_CHARSET, charset);
|
||||||
mh.set_document_string(body);
|
mh.set_document_string("text/html", body);
|
||||||
mh.next_document();
|
mh.next_document();
|
||||||
map<string, string>::const_iterator it =
|
map<string, string>::const_iterator it =
|
||||||
mh.get_meta_data().find(cstr_dj_keycontent);
|
mh.get_meta_data().find(cstr_dj_keycontent);
|
||||||
|
|||||||
@ -39,10 +39,10 @@ class MHMailAttach;
|
|||||||
*/
|
*/
|
||||||
class MimeHandlerMail : public RecollFilter {
|
class MimeHandlerMail : public RecollFilter {
|
||||||
public:
|
public:
|
||||||
MimeHandlerMail(RclConfig *cnf, const string &mt);
|
MimeHandlerMail(RclConfig *cnf, const string &id);
|
||||||
virtual ~MimeHandlerMail();
|
virtual ~MimeHandlerMail();
|
||||||
virtual bool set_document_file(const string& file_path);
|
virtual bool set_document_file(const string& mt, const string& file_path);
|
||||||
virtual bool set_document_string(const string& data);
|
virtual bool set_document_string(const string& mt, const string& data);
|
||||||
virtual bool is_data_input_ok(DataInput input) const {
|
virtual bool is_data_input_ok(DataInput input) const {
|
||||||
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@ -251,10 +251,10 @@ void MimeHandlerMbox::clear()
|
|||||||
RecollFilter::clear();
|
RecollFilter::clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MimeHandlerMbox::set_document_file(const string &fn)
|
bool MimeHandlerMbox::set_document_file(const string& mt, const string &fn)
|
||||||
{
|
{
|
||||||
LOGDEB(("MimeHandlerMbox::set_document_file(%s)\n", fn.c_str()));
|
LOGDEB(("MimeHandlerMbox::set_document_file(%s)\n", fn.c_str()));
|
||||||
RecollFilter::set_document_file(fn);
|
RecollFilter::set_document_file(mt, fn);
|
||||||
m_fn = fn;
|
m_fn = fn;
|
||||||
if (m_vfp) {
|
if (m_vfp) {
|
||||||
fclose((FILE *)m_vfp);
|
fclose((FILE *)m_vfp);
|
||||||
@ -598,8 +598,8 @@ int main(int argc, char **argv)
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
config->setKeyDir(path_getfather(filename));
|
config->setKeyDir(path_getfather(filename));
|
||||||
MimeHandlerMbox mh(config, "text/x-mail");
|
MimeHandlerMbox mh(config, "some_id");
|
||||||
if (!mh.set_document_file(filename)) {
|
if (!mh.set_document_file("text/x-mail", filename)) {
|
||||||
cerr << "set_document_file failed" << endl;
|
cerr << "set_document_file failed" << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -31,12 +31,12 @@ using std::vector;
|
|||||||
*/
|
*/
|
||||||
class MimeHandlerMbox : public RecollFilter {
|
class MimeHandlerMbox : public RecollFilter {
|
||||||
public:
|
public:
|
||||||
MimeHandlerMbox(RclConfig *cnf, const string& mime)
|
MimeHandlerMbox(RclConfig *cnf, const string& id)
|
||||||
: RecollFilter(cnf, mime), m_vfp(0), m_msgnum(0),
|
: RecollFilter(cnf, id), m_vfp(0), m_msgnum(0),
|
||||||
m_lineno(0), m_fsize(0)
|
m_lineno(0), m_fsize(0)
|
||||||
{}
|
{}
|
||||||
virtual ~MimeHandlerMbox();
|
virtual ~MimeHandlerMbox();
|
||||||
virtual bool set_document_file(const string &file_path);
|
virtual bool set_document_file(const string& mt, const string &file_path);
|
||||||
virtual bool next_document();
|
virtual bool next_document();
|
||||||
virtual bool skip_to_document(const string& ipath) {
|
virtual bool skip_to_document(const string& ipath) {
|
||||||
m_ipath = ipath;
|
m_ipath = ipath;
|
||||||
|
|||||||
@ -35,12 +35,16 @@
|
|||||||
*/
|
*/
|
||||||
class MimeHandlerSymlink : public RecollFilter {
|
class MimeHandlerSymlink : public RecollFilter {
|
||||||
public:
|
public:
|
||||||
MimeHandlerSymlink(RclConfig *cnf, const std::string& mt)
|
MimeHandlerSymlink(RclConfig *cnf, const std::string& id)
|
||||||
: RecollFilter(cnf, mt) {}
|
: RecollFilter(cnf, id)
|
||||||
virtual ~MimeHandlerSymlink() {}
|
|
||||||
virtual bool set_document_file(const string& fn)
|
|
||||||
{
|
{
|
||||||
RecollFilter::set_document_file(fn);
|
}
|
||||||
|
virtual ~MimeHandlerSymlink()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
virtual bool set_document_file(const string& mt, const string& fn)
|
||||||
|
{
|
||||||
|
RecollFilter::set_document_file(mt, fn);
|
||||||
m_fn = fn;
|
m_fn = fn;
|
||||||
return m_havedoc = true;
|
return m_havedoc = true;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -39,11 +39,11 @@ const int MB = 1024*1024;
|
|||||||
const int KB = 1024;
|
const int KB = 1024;
|
||||||
|
|
||||||
// Process a plain text file
|
// Process a plain text file
|
||||||
bool MimeHandlerText::set_document_file(const string &fn)
|
bool MimeHandlerText::set_document_file(const string& mt, const string &fn)
|
||||||
{
|
{
|
||||||
LOGDEB(("MimeHandlerText::set_document_file: [%s]\n", fn.c_str()));
|
LOGDEB(("MimeHandlerText::set_document_file: [%s]\n", fn.c_str()));
|
||||||
|
|
||||||
RecollFilter::set_document_file(fn);
|
RecollFilter::set_document_file(mt, fn);
|
||||||
m_fn = fn;
|
m_fn = fn;
|
||||||
|
|
||||||
// file size for oversize check
|
// file size for oversize check
|
||||||
@ -91,8 +91,9 @@ bool MimeHandlerText::set_document_file(const string &fn)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MimeHandlerText::set_document_string(const string& otext)
|
bool MimeHandlerText::set_document_string(const string& mt, const string& otext)
|
||||||
{
|
{
|
||||||
|
RecollFilter::set_document_string(mt, otext);
|
||||||
m_text = otext;
|
m_text = otext;
|
||||||
if (!m_forPreview) {
|
if (!m_forPreview) {
|
||||||
string md5, xmd5;
|
string md5, xmd5;
|
||||||
|
|||||||
@ -30,11 +30,15 @@ using std::string;
|
|||||||
*/
|
*/
|
||||||
class MimeHandlerText : public RecollFilter {
|
class MimeHandlerText : public RecollFilter {
|
||||||
public:
|
public:
|
||||||
MimeHandlerText(RclConfig *cnf, const string& mt)
|
MimeHandlerText(RclConfig *cnf, const string& id)
|
||||||
: RecollFilter(cnf, mt), m_paging(false), m_offs(0) {}
|
: RecollFilter(cnf, id), m_paging(false), m_offs(0)
|
||||||
virtual ~MimeHandlerText() {}
|
{
|
||||||
virtual bool set_document_file(const string &file_path);
|
}
|
||||||
virtual bool set_document_string(const string&);
|
virtual ~MimeHandlerText()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
virtual bool set_document_file(const string& mt, const string &file_path);
|
||||||
|
virtual bool set_document_string(const string&, const string&);
|
||||||
virtual bool is_data_input_ok(DataInput input) const {
|
virtual bool is_data_input_ok(DataInput input) const {
|
||||||
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@ -28,14 +28,20 @@
|
|||||||
*/
|
*/
|
||||||
class MimeHandlerUnknown : public RecollFilter {
|
class MimeHandlerUnknown : public RecollFilter {
|
||||||
public:
|
public:
|
||||||
MimeHandlerUnknown(RclConfig *cnf, const string& mt)
|
MimeHandlerUnknown(RclConfig *cnf, const string& id)
|
||||||
: RecollFilter(cnf, mt) {}
|
: RecollFilter(cnf, id)
|
||||||
virtual ~MimeHandlerUnknown() {}
|
{
|
||||||
virtual bool set_document_file(const string& fn) {
|
}
|
||||||
RecollFilter::set_document_file(fn);
|
virtual ~MimeHandlerUnknown()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
virtual bool set_document_file(const string& mt, const string& fn)
|
||||||
|
{
|
||||||
|
RecollFilter::set_document_file(mt, fn);
|
||||||
return m_havedoc = true;
|
return m_havedoc = true;
|
||||||
}
|
}
|
||||||
virtual bool set_document_string(const string&) {
|
virtual bool set_document_string(const string& mt, const string& s) {
|
||||||
|
RecollFilter::set_document_string(mt, s);
|
||||||
return m_havedoc = true;
|
return m_havedoc = true;
|
||||||
}
|
}
|
||||||
virtual bool next_document() {
|
virtual bool next_document() {
|
||||||
|
|||||||
@ -30,6 +30,7 @@ using namespace std;
|
|||||||
#include "debuglog.h"
|
#include "debuglog.h"
|
||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
|
#include "md5.h"
|
||||||
|
|
||||||
#include "mh_exec.h"
|
#include "mh_exec.h"
|
||||||
#include "mh_execm.h"
|
#include "mh_execm.h"
|
||||||
@ -45,24 +46,26 @@ using namespace std;
|
|||||||
// handlers. There can be several instances for a given mime type
|
// handlers. There can be several instances for a given mime type
|
||||||
// (think email attachment in email message: 2 rfc822 handlers are
|
// (think email attachment in email message: 2 rfc822 handlers are
|
||||||
// needed simulteanously)
|
// needed simulteanously)
|
||||||
static multimap<string, Dijon::Filter*> o_handlers;
|
static multimap<string, RecollFilter*> o_handlers;
|
||||||
static list<multimap<string, Dijon::Filter*>::iterator> o_hlru;
|
static list<multimap<string, RecollFilter*>::iterator> o_hlru;
|
||||||
typedef list<multimap<string, Dijon::Filter*>::iterator>::iterator hlruit_tp;
|
typedef list<multimap<string, RecollFilter*>::iterator>::iterator hlruit_tp;
|
||||||
|
|
||||||
static PTMutexInit o_handlers_mutex;
|
static PTMutexInit o_handlers_mutex;
|
||||||
|
|
||||||
static const unsigned int max_handlers_cache_size = 100;
|
static const unsigned int max_handlers_cache_size = 100;
|
||||||
|
|
||||||
/* Look for mime handler in pool */
|
/* Look for mime handler in pool */
|
||||||
static Dijon::Filter *getMimeHandlerFromCache(const string& key)
|
static RecollFilter *getMimeHandlerFromCache(const string& key)
|
||||||
{
|
{
|
||||||
PTMutexLocker locker(o_handlers_mutex);
|
PTMutexLocker locker(o_handlers_mutex);
|
||||||
|
string xdigest;
|
||||||
|
MD5HexPrint(key, xdigest);
|
||||||
LOGDEB(("getMimeHandlerFromCache: %s cache size %u\n",
|
LOGDEB(("getMimeHandlerFromCache: %s cache size %u\n",
|
||||||
key.c_str(), o_handlers.size()));
|
xdigest.c_str(), o_handlers.size()));
|
||||||
|
|
||||||
multimap<string, Dijon::Filter *>::iterator it = o_handlers.find(key);
|
multimap<string, RecollFilter *>::iterator it = o_handlers.find(key);
|
||||||
if (it != o_handlers.end()) {
|
if (it != o_handlers.end()) {
|
||||||
Dijon::Filter *h = it->second;
|
RecollFilter *h = it->second;
|
||||||
hlruit_tp it1 = find(o_hlru.begin(), o_hlru.end(), it);
|
hlruit_tp it1 = find(o_hlru.begin(), o_hlru.end(), it);
|
||||||
if (it1 != o_hlru.end()) {
|
if (it1 != o_hlru.end()) {
|
||||||
o_hlru.erase(it1);
|
o_hlru.erase(it1);
|
||||||
@ -71,20 +74,22 @@ static Dijon::Filter *getMimeHandlerFromCache(const string& key)
|
|||||||
}
|
}
|
||||||
o_handlers.erase(it);
|
o_handlers.erase(it);
|
||||||
LOGDEB(("getMimeHandlerFromCache: %s found size %u\n",
|
LOGDEB(("getMimeHandlerFromCache: %s found size %u\n",
|
||||||
key.c_str(), o_handlers.size()));
|
xdigest.c_str(), o_handlers.size()));
|
||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
LOGDEB(("getMimeHandlerFromCache: %s not found\n", key.c_str()));
|
LOGDEB(("getMimeHandlerFromCache: %s not found\n", xdigest.c_str()));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return mime handler to pool */
|
/* Return mime handler to pool */
|
||||||
void returnMimeHandler(Dijon::Filter *handler)
|
void returnMimeHandler(RecollFilter *handler)
|
||||||
{
|
{
|
||||||
typedef multimap<string, Dijon::Filter*>::value_type value_type;
|
typedef multimap<string, RecollFilter*>::value_type value_type;
|
||||||
|
|
||||||
if (handler==0)
|
if (handler == 0) {
|
||||||
|
LOGERR(("returnMimeHandler: bad parameter\n"));
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
handler->clear();
|
handler->clear();
|
||||||
|
|
||||||
PTMutexLocker locker(o_handlers_mutex);
|
PTMutexLocker locker(o_handlers_mutex);
|
||||||
@ -97,7 +102,7 @@ void returnMimeHandler(Dijon::Filter *handler)
|
|||||||
// at the same time either because it occurs several times in a
|
// at the same time either because it occurs several times in a
|
||||||
// stack (ie mail attachment to mail), or because several threads
|
// stack (ie mail attachment to mail), or because several threads
|
||||||
// are processing the same mime type at the same time.
|
// are processing the same mime type at the same time.
|
||||||
multimap<string, Dijon::Filter *>::iterator it;
|
multimap<string, RecollFilter *>::iterator it;
|
||||||
if (o_handlers.size() >= max_handlers_cache_size) {
|
if (o_handlers.size() >= max_handlers_cache_size) {
|
||||||
static int once = 1;
|
static int once = 1;
|
||||||
if (once) {
|
if (once) {
|
||||||
@ -114,15 +119,15 @@ void returnMimeHandler(Dijon::Filter *handler)
|
|||||||
o_handlers.erase(it);
|
o_handlers.erase(it);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
it = o_handlers.insert(value_type(handler->get_mime_type(), handler));
|
it = o_handlers.insert(value_type(handler->get_id(), handler));
|
||||||
o_hlru.push_front(it);
|
o_hlru.push_front(it);
|
||||||
}
|
}
|
||||||
|
|
||||||
void clearMimeHandlerCache()
|
void clearMimeHandlerCache()
|
||||||
{
|
{
|
||||||
LOGDEB(("clearMimeHandlerCache()\n"));
|
LOGDEB(("clearMimeHandlerCache()\n"));
|
||||||
typedef multimap<string, Dijon::Filter*>::value_type value_type;
|
typedef multimap<string, RecollFilter*>::value_type value_type;
|
||||||
map<string, Dijon::Filter *>::iterator it;
|
map<string, RecollFilter *>::iterator it;
|
||||||
PTMutexLocker locker(o_handlers_mutex);
|
PTMutexLocker locker(o_handlers_mutex);
|
||||||
for (it = o_handlers.begin(); it != o_handlers.end(); it++) {
|
for (it = o_handlers.begin(); it != o_handlers.end(); it++) {
|
||||||
delete it->second;
|
delete it->second;
|
||||||
@ -132,26 +137,32 @@ void clearMimeHandlerCache()
|
|||||||
|
|
||||||
/** For mime types set as "internal" in mimeconf:
|
/** For mime types set as "internal" in mimeconf:
|
||||||
* create appropriate handler object. */
|
* create appropriate handler object. */
|
||||||
static Dijon::Filter *mhFactory(RclConfig *config, const string &mime)
|
static RecollFilter *mhFactory(RclConfig *config, const string &mime,
|
||||||
|
bool nobuild, string& id)
|
||||||
{
|
{
|
||||||
LOGDEB2(("mhFactory(%s)\n", mime.c_str()));
|
LOGDEB2(("mhFactory(%s)\n", mime.c_str()));
|
||||||
string lmime(mime);
|
string lmime(mime);
|
||||||
stringtolower(lmime);
|
stringtolower(lmime);
|
||||||
if (cstr_textplain == lmime) {
|
if (cstr_textplain == lmime) {
|
||||||
LOGDEB2(("mhFactory(%s): returning MimeHandlerText\n", mime.c_str()));
|
LOGDEB2(("mhFactory(%s): returning MimeHandlerText\n", mime.c_str()));
|
||||||
return new MimeHandlerText(config, lmime);
|
MD5String("MimeHandlerText", id);
|
||||||
|
return nobuild ? 0 : new MimeHandlerText(config, id);
|
||||||
} else if ("text/html" == lmime) {
|
} else if ("text/html" == lmime) {
|
||||||
LOGDEB2(("mhFactory(%s): returning MimeHandlerHtml\n", mime.c_str()));
|
LOGDEB2(("mhFactory(%s): returning MimeHandlerHtml\n", mime.c_str()));
|
||||||
return new MimeHandlerHtml(config, lmime);
|
MD5String("MimeHandlerHtml", id);
|
||||||
|
return nobuild ? 0 : new MimeHandlerHtml(config, id);
|
||||||
} else if ("text/x-mail" == lmime) {
|
} else if ("text/x-mail" == lmime) {
|
||||||
LOGDEB2(("mhFactory(%s): returning MimeHandlerMbox\n", mime.c_str()));
|
LOGDEB2(("mhFactory(%s): returning MimeHandlerMbox\n", mime.c_str()));
|
||||||
return new MimeHandlerMbox(config, lmime);
|
MD5String("MimeHandlerMbox", id);
|
||||||
|
return nobuild ? 0 : new MimeHandlerMbox(config, id);
|
||||||
} else if ("message/rfc822" == lmime) {
|
} else if ("message/rfc822" == lmime) {
|
||||||
LOGDEB2(("mhFactory(%s): returning MimeHandlerMail\n", mime.c_str()));
|
LOGDEB2(("mhFactory(%s): returning MimeHandlerMail\n", mime.c_str()));
|
||||||
return new MimeHandlerMail(config, lmime);
|
MD5String("MimeHandlerMail", id);
|
||||||
|
return nobuild ? 0 : new MimeHandlerMail(config, id);
|
||||||
} else if ("inode/symlink" == lmime) {
|
} else if ("inode/symlink" == lmime) {
|
||||||
LOGDEB2(("mhFactory(%s): ret MimeHandlerSymlink\n", mime.c_str()));
|
LOGDEB2(("mhFactory(%s): ret MimeHandlerSymlink\n", mime.c_str()));
|
||||||
return new MimeHandlerSymlink(config, lmime);
|
MD5String("MimeHandlerSymlink", id);
|
||||||
|
return nobuild ? 0 : new MimeHandlerSymlink(config, id);
|
||||||
} else if (lmime.find("text/") == 0) {
|
} else if (lmime.find("text/") == 0) {
|
||||||
// Try to handle unknown text/xx as text/plain. This
|
// Try to handle unknown text/xx as text/plain. This
|
||||||
// only happen if the text/xx was defined as "internal" in
|
// only happen if the text/xx was defined as "internal" in
|
||||||
@ -159,14 +170,16 @@ static Dijon::Filter *mhFactory(RclConfig *config, const string &mime)
|
|||||||
// allows indexing and previewing as text/plain (no filter
|
// allows indexing and previewing as text/plain (no filter
|
||||||
// exec) but still opening with a specific editor.
|
// exec) but still opening with a specific editor.
|
||||||
LOGDEB2(("mhFactory(%s): returning MimeHandlerText(x)\n",mime.c_str()));
|
LOGDEB2(("mhFactory(%s): returning MimeHandlerText(x)\n",mime.c_str()));
|
||||||
return new MimeHandlerText(config, lmime);
|
MD5String("MimeHandlerText", id);
|
||||||
|
return nobuild ? 0 : new MimeHandlerText(config, id);
|
||||||
} else {
|
} else {
|
||||||
// We should not get there. It means that "internal" was set
|
// We should not get there. It means that "internal" was set
|
||||||
// as a handler in mimeconf for a mime type we actually can't
|
// as a handler in mimeconf for a mime type we actually can't
|
||||||
// handle.
|
// handle.
|
||||||
LOGERR(("mhFactory: mime type [%s] set as internal but unknown\n",
|
LOGERR(("mhFactory: mime type [%s] set as internal but unknown\n",
|
||||||
lmime.c_str()));
|
lmime.c_str()));
|
||||||
return new MimeHandlerUnknown(config, lmime);
|
MD5String("MimeHandlerUnknown", id);
|
||||||
|
return nobuild ? 0 : new MimeHandlerUnknown(config, id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -181,10 +194,11 @@ static const string cstr_mh_charset("charset");
|
|||||||
* a ';' inside a quoted string for now. Can't see a use for it.
|
* a ';' inside a quoted string for now. Can't see a use for it.
|
||||||
*/
|
*/
|
||||||
MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
||||||
bool multiple)
|
bool multiple, const string& id)
|
||||||
{
|
{
|
||||||
ConfSimple attrs;
|
ConfSimple attrs;
|
||||||
string cmdstr;
|
string cmdstr;
|
||||||
|
|
||||||
if (!cfg->valueSplitAttributes(hs, cmdstr, attrs)) {
|
if (!cfg->valueSplitAttributes(hs, cmdstr, attrs)) {
|
||||||
LOGERR(("mhExecFactory: bad config line for [%s]: [%s]\n",
|
LOGERR(("mhExecFactory: bad config line for [%s]: [%s]\n",
|
||||||
mtype.c_str(), hs.c_str()));
|
mtype.c_str(), hs.c_str()));
|
||||||
@ -200,8 +214,8 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
MimeHandlerExec *h = multiple ?
|
MimeHandlerExec *h = multiple ?
|
||||||
new MimeHandlerExecMultiple(cfg, mtype.c_str()) :
|
new MimeHandlerExecMultiple(cfg, id) :
|
||||||
new MimeHandlerExec(cfg, mtype.c_str());
|
new MimeHandlerExec(cfg, id);
|
||||||
list<string>::iterator it = cmdtoks.begin();
|
list<string>::iterator it = cmdtoks.begin();
|
||||||
h->params.push_back(cfg->findFilter(*it++));
|
h->params.push_back(cfg->findFilter(*it++));
|
||||||
h->params.insert(h->params.end(), it, cmdtoks.end());
|
h->params.insert(h->params.end(), it, cmdtoks.end());
|
||||||
@ -228,32 +242,27 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Get handler/filter object for given mime type: */
|
/* Get handler/filter object for given mime type: */
|
||||||
Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
||||||
bool filtertypes)
|
bool filtertypes)
|
||||||
{
|
{
|
||||||
LOGDEB(("getMimeHandler: mtype [%s] filtertypes %d\n",
|
LOGDEB(("getMimeHandler: mtype [%s] filtertypes %d\n",
|
||||||
mtype.c_str(), filtertypes));
|
mtype.c_str(), filtertypes));
|
||||||
Dijon::Filter *h = 0;
|
RecollFilter *h = 0;
|
||||||
|
|
||||||
// Get handler definition for mime type. We do this even if an
|
// Get handler definition for mime type. We do this even if an
|
||||||
// appropriate handler object may be in the cache (indexed by mime
|
// appropriate handler object may be in the cache.
|
||||||
// type). This is fast, and necessary to conform to the
|
// This is fast, and necessary to conform to the
|
||||||
// configuration, (ie: text/html might be filtered out by
|
// configuration, (ie: text/html might be filtered out by
|
||||||
// indexedmimetypes but an html handler could still be in the
|
// indexedmimetypes but an html handler could still be in the
|
||||||
// cache because it was needed by some other interning stack).
|
// cache because it was needed by some other interning stack).
|
||||||
string hs;
|
string hs;
|
||||||
hs = cfg->getMimeHandlerDef(mtype, filtertypes);
|
hs = cfg->getMimeHandlerDef(mtype, filtertypes);
|
||||||
|
string id;
|
||||||
|
|
||||||
if (!hs.empty()) { // Got a handler definition line
|
if (!hs.empty()) {
|
||||||
|
// Got a handler definition line
|
||||||
// Do we already have a handler object in the cache ?
|
// Break definition into type (internal/exec/execm)
|
||||||
h = getMimeHandlerFromCache(mtype);
|
// and name/command string
|
||||||
if (h != 0)
|
|
||||||
goto out;
|
|
||||||
LOGDEB2(("getMimeHandler: %s not in cache\n", mtype.c_str()));
|
|
||||||
|
|
||||||
// Not in cache. Break definition into type and name/command
|
|
||||||
// string and instanciate handler object
|
|
||||||
string::size_type b1 = hs.find_first_of(" \t");
|
string::size_type b1 = hs.find_first_of(" \t");
|
||||||
string handlertype = hs.substr(0, b1);
|
string handlertype = hs.substr(0, b1);
|
||||||
string cmdstr;
|
string cmdstr;
|
||||||
@ -261,7 +270,30 @@ Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
|||||||
cmdstr = hs.substr(b1);
|
cmdstr = hs.substr(b1);
|
||||||
trimstring(cmdstr);
|
trimstring(cmdstr);
|
||||||
}
|
}
|
||||||
if (!stringlowercmp("internal", handlertype)) {
|
bool internal = !stringlowercmp("internal", handlertype);
|
||||||
|
if (internal) {
|
||||||
|
// For internal types let the factory compute the id
|
||||||
|
mhFactory(cfg, cmdstr.empty() ? mtype : cmdstr, true, id);
|
||||||
|
} else {
|
||||||
|
// exec/execm: use the md5 of the def line
|
||||||
|
MD5String(hs, id);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
{ // string xdigest; LOGDEB2(("getMimeHandler: [%s] hs [%s] id [%s]\n",
|
||||||
|
//mtype.c_str(), hs.c_str(), MD5HexPrint(id, xdigest).c_str()));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Do we already have a handler object in the cache ?
|
||||||
|
h = getMimeHandlerFromCache(id);
|
||||||
|
if (h != 0)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
LOGDEB2(("getMimeHandler: %s not in cache\n", mtype.c_str()));
|
||||||
|
|
||||||
|
// Not in cache.
|
||||||
|
if (internal) {
|
||||||
// If there is a parameter after "internal" it's the mime
|
// If there is a parameter after "internal" it's the mime
|
||||||
// type to use. This is so that we can have bogus mime
|
// type to use. This is so that we can have bogus mime
|
||||||
// types like text/x-purple-html-log (for ie: specific
|
// types like text/x-purple-html-log (for ie: specific
|
||||||
@ -270,14 +302,7 @@ Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
|||||||
// better and the latter will probably go away at some
|
// better and the latter will probably go away at some
|
||||||
// point in the future.
|
// point in the future.
|
||||||
LOGDEB2(("handlertype internal, cmdstr [%s]\n", cmdstr.c_str()));
|
LOGDEB2(("handlertype internal, cmdstr [%s]\n", cmdstr.c_str()));
|
||||||
if (!cmdstr.empty()) {
|
h = mhFactory(cfg, cmdstr.empty() ? mtype : cmdstr, false, id);
|
||||||
// Have to redo the cache thing. Maybe we should
|
|
||||||
// rather just recurse instead ?
|
|
||||||
if ((h = getMimeHandlerFromCache(cmdstr)) == 0)
|
|
||||||
h = mhFactory(cfg, cmdstr);
|
|
||||||
} else {
|
|
||||||
h = mhFactory(cfg, mtype);
|
|
||||||
}
|
|
||||||
goto out;
|
goto out;
|
||||||
} else if (!stringlowercmp("dll", handlertype)) {
|
} else if (!stringlowercmp("dll", handlertype)) {
|
||||||
} else {
|
} else {
|
||||||
@ -287,10 +312,10 @@ Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
if (!stringlowercmp("exec", handlertype)) {
|
if (!stringlowercmp("exec", handlertype)) {
|
||||||
h = mhExecFactory(cfg, mtype, cmdstr, false);
|
h = mhExecFactory(cfg, mtype, cmdstr, false, id);
|
||||||
goto out;
|
goto out;
|
||||||
} else if (!stringlowercmp("execm", handlertype)) {
|
} else if (!stringlowercmp("execm", handlertype)) {
|
||||||
h = mhExecFactory(cfg, mtype, cmdstr, true);
|
h = mhExecFactory(cfg, mtype, cmdstr, true, id);
|
||||||
goto out;
|
goto out;
|
||||||
} else {
|
} else {
|
||||||
LOGERR(("getMimeHandler: bad line for %s: %s\n",
|
LOGERR(("getMimeHandler: bad line for %s: %s\n",
|
||||||
@ -305,20 +330,20 @@ Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
|||||||
|
|
||||||
// Finally, unhandled files are either ignored or their name and
|
// Finally, unhandled files are either ignored or their name and
|
||||||
// generic metadata is indexed, depending on configuration
|
// generic metadata is indexed, depending on configuration
|
||||||
{bool indexunknown = false;
|
{
|
||||||
|
bool indexunknown = false;
|
||||||
cfg->getConfParam("indexallfilenames", &indexunknown);
|
cfg->getConfParam("indexallfilenames", &indexunknown);
|
||||||
if (indexunknown) {
|
if (indexunknown) {
|
||||||
if ((h = getMimeHandlerFromCache("application/octet-stream")) == 0)
|
MD5String("MimeHandlerUnknown", id);
|
||||||
h = new MimeHandlerUnknown(cfg, "application/octet-stream");
|
if ((h = getMimeHandlerFromCache(id)) == 0)
|
||||||
goto out;
|
h = new MimeHandlerUnknown(cfg, id);
|
||||||
} else {
|
|
||||||
goto out;
|
|
||||||
}
|
}
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
if (h) {
|
if (h) {
|
||||||
h->set_property(Dijon::Filter::DEFAULT_CHARSET, cfg->getDefCharset());
|
h->set_property(RecollFilter::DEFAULT_CHARSET, cfg->getDefCharset());
|
||||||
// In multithread context, and in case this handler is out
|
// In multithread context, and in case this handler is out
|
||||||
// from the cache, it may have a config pointer belonging to
|
// from the cache, it may have a config pointer belonging to
|
||||||
// another thread. Fix it.
|
// another thread. Fix it.
|
||||||
|
|||||||
@ -21,26 +21,23 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
|
||||||
using std::string;
|
|
||||||
using std::list;
|
|
||||||
|
|
||||||
#include <Filter.h>
|
#include "Filter.h"
|
||||||
|
#include "cstr.h"
|
||||||
|
|
||||||
class RclConfig;
|
class RclConfig;
|
||||||
|
|
||||||
class RecollFilter : public Dijon::Filter {
|
class RecollFilter : public Dijon::Filter {
|
||||||
public:
|
public:
|
||||||
RecollFilter(RclConfig *config, const string& mtype)
|
RecollFilter(RclConfig *config, const std::string& id)
|
||||||
: Dijon::Filter(mtype), m_config(config),
|
: m_config(config), m_forPreview(false), m_havedoc(false), m_id(id)
|
||||||
m_forPreview(false), m_havedoc(false)
|
|
||||||
{}
|
{}
|
||||||
virtual ~RecollFilter() {}
|
virtual ~RecollFilter() {}
|
||||||
virtual void setConfig(RclConfig *config)
|
virtual void setConfig(RclConfig *config)
|
||||||
{
|
{
|
||||||
m_config = config;
|
m_config = config;
|
||||||
}
|
}
|
||||||
virtual bool set_property(Properties p, const string &v) {
|
virtual bool set_property(Properties p, const std::string &v) {
|
||||||
switch (p) {
|
switch (p) {
|
||||||
case DJF_UDI:
|
case DJF_UDI:
|
||||||
m_udi = v;
|
m_udi = v;
|
||||||
@ -59,7 +56,12 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// We don't use this for now
|
// We don't use this for now
|
||||||
virtual bool set_document_uri(const std::string &) {return false;}
|
virtual bool set_document_uri(const std::string& mtype,
|
||||||
|
const std::string &)
|
||||||
|
{
|
||||||
|
m_mimeType = mtype;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// This does nothing right now but should be called from the
|
// This does nothing right now but should be called from the
|
||||||
// subclass method in case we need some common processing one day
|
// subclass method in case we need some common processing one day
|
||||||
@ -69,12 +71,24 @@ public:
|
|||||||
// having a pure virtual called from here and implemented in the
|
// having a pure virtual called from here and implemented in the
|
||||||
// subclass) would have to be repeated in each derived class. It's
|
// subclass) would have to be repeated in each derived class. It's
|
||||||
// just simpler this way.
|
// just simpler this way.
|
||||||
virtual bool set_document_file(const string & /*file_path*/) {return true;}
|
virtual bool set_document_file(const std::string& mtype,
|
||||||
|
const std::string & /*file_path*/)
|
||||||
|
{
|
||||||
|
m_mimeType = mtype;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// Default implementations
|
// Default implementations
|
||||||
virtual bool set_document_string(const std::string &) {return false;}
|
virtual bool set_document_string(const std::string& mtype,
|
||||||
virtual bool set_document_data(const char *cp, unsigned int sz) {
|
const std::string &)
|
||||||
return set_document_string(string(cp, sz));
|
{
|
||||||
|
m_mimeType = mtype;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
virtual bool set_document_data(const std::string& mtype,
|
||||||
|
const char *cp, unsigned int sz)
|
||||||
|
{
|
||||||
|
return set_document_string(mtype, std::string(cp, sz));
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void set_docsize(size_t size)
|
virtual void set_docsize(size_t size)
|
||||||
@ -87,7 +101,7 @@ public:
|
|||||||
virtual bool has_documents() const {return m_havedoc;}
|
virtual bool has_documents() const {return m_havedoc;}
|
||||||
|
|
||||||
// Most doc types are single-doc
|
// Most doc types are single-doc
|
||||||
virtual bool skip_to_document(const string& s) {
|
virtual bool skip_to_document(const std::string& s) {
|
||||||
if (s.empty())
|
if (s.empty())
|
||||||
return true;
|
return true;
|
||||||
return false;
|
return false;
|
||||||
@ -99,10 +113,15 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual string get_error() const {
|
virtual std::string get_error() const {
|
||||||
return m_reason;
|
return m_reason;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual const std::string& get_id() const
|
||||||
|
{
|
||||||
|
return m_id;
|
||||||
|
}
|
||||||
|
|
||||||
// "Call super" anti-pattern again. Must be called from derived
|
// "Call super" anti-pattern again. Must be called from derived
|
||||||
// classes which reimplement clear()
|
// classes which reimplement clear()
|
||||||
virtual void clear() {
|
virtual void clear() {
|
||||||
@ -114,17 +133,20 @@ public:
|
|||||||
|
|
||||||
// This only makes sense if the contents are currently txt/plain
|
// This only makes sense if the contents are currently txt/plain
|
||||||
// It converts from keyorigcharset to UTF-8 and sets keycharset.
|
// It converts from keyorigcharset to UTF-8 and sets keycharset.
|
||||||
bool txtdcode(const string& who);
|
bool txtdcode(const std::string& who);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
bool preview() {return m_forPreview;}
|
bool preview() {return m_forPreview;}
|
||||||
|
|
||||||
RclConfig *m_config;
|
RclConfig *m_config;
|
||||||
bool m_forPreview;
|
bool m_forPreview;
|
||||||
string m_dfltInputCharset;
|
std::string m_dfltInputCharset;
|
||||||
string m_reason;
|
std::string m_reason;
|
||||||
bool m_havedoc;
|
bool m_havedoc;
|
||||||
string m_udi; // May be set by creator as a hint
|
std::string m_udi; // May be set by creator as a hint
|
||||||
|
// m_id is and md5 of the filter definition line (from mimeconf) and
|
||||||
|
// is used when fetching/returning filters to / from the cache.
|
||||||
|
std::string m_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -135,11 +157,11 @@ protected:
|
|||||||
* @param filtertypes decide if we should restrict to types in
|
* @param filtertypes decide if we should restrict to types in
|
||||||
* indexedmimetypes (if this is set at all).
|
* indexedmimetypes (if this is set at all).
|
||||||
*/
|
*/
|
||||||
extern Dijon::Filter *getMimeHandler(const std::string &mtyp, RclConfig *cfg,
|
extern RecollFilter *getMimeHandler(const std::string &mtyp, RclConfig *cfg,
|
||||||
bool filtertypes=false);
|
bool filtertypes=false);
|
||||||
|
|
||||||
/// Free up filter for reuse (you can also delete it)
|
/// Free up filter for reuse (you can also delete it)
|
||||||
extern void returnMimeHandler(Dijon::Filter *);
|
extern void returnMimeHandler(RecollFilter *);
|
||||||
|
|
||||||
/// Clean up cache at the end of an indexing pass. For people who use
|
/// Clean up cache at the end of an indexing pass. For people who use
|
||||||
/// the GUI to index: avoid all those filter processes forever hanging
|
/// the GUI to index: avoid all those filter processes forever hanging
|
||||||
|
|||||||
@ -62,10 +62,6 @@ using std::pair;
|
|||||||
#include "docseqhist.h"
|
#include "docseqhist.h"
|
||||||
#include "rclhelp.h"
|
#include "rclhelp.h"
|
||||||
|
|
||||||
#ifndef MIN
|
|
||||||
#define MIN(A,B) ((A)<(B)?(A):(B))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Subclass plainToRich to add <termtag>s and anchors to the preview text
|
// Subclass plainToRich to add <termtag>s and anchors to the preview text
|
||||||
class PlainToRichQtPreview : public PlainToRich {
|
class PlainToRichQtPreview : public PlainToRich {
|
||||||
public:
|
public:
|
||||||
|
|||||||
@ -24,8 +24,10 @@
|
|||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
#include <list>
|
||||||
using std::ostringstream;
|
using std::ostringstream;
|
||||||
using std::endl;
|
using std::endl;
|
||||||
|
using std::list;
|
||||||
|
|
||||||
#include "cstr.h"
|
#include "cstr.h"
|
||||||
#include "reslistpager.h"
|
#include "reslistpager.h"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user