diff --git a/src/internfile/Filter.h b/src/internfile/Filter.h index 328a1d29..c0b309f3 100644 --- a/src/internfile/Filter.h +++ b/src/internfile/Filter.h @@ -52,9 +52,10 @@ namespace Dijon class Filter { public: - /// Builds an empty filter. - Filter(const std::string &mime_type) : m_mimeType(mime_type) {} /// Destroys the filter. + Filter() + { + } virtual ~Filter() {} virtual void setConfig(RclConfig *) = 0; @@ -63,7 +64,8 @@ namespace Dijon /** What data a filter supports as input. * It can be either the whole document data, its file name, or its URI. */ - typedef enum { DOCUMENT_DATA=0, DOCUMENT_STRING, DOCUMENT_FILE_NAME, DOCUMENT_URI } DataInput; + typedef enum { DOCUMENT_DATA=0, DOCUMENT_STRING, DOCUMENT_FILE_NAME, + DOCUMENT_URI } DataInput; /** Input properties supported by the filter. * @@ -94,7 +96,8 @@ namespace Dijon /** Sets a property, prior to calling set_document_XXX(). * Returns false if the property is not supported. */ - virtual bool set_property(Properties prop_name, const std::string &prop_value) = 0; + virtual bool set_property(Properties prop_name, + const std::string &prop_value) = 0; /** (Re)initializes the filter with the given data. * Caller should ensure the given pointer is valid until the @@ -103,25 +106,30 @@ namespace Dijon * Call next_document() to position the filter onto the first document. * Returns false if this input is not supported or an error occured. */ - virtual bool set_document_data(const char *data_ptr, unsigned int data_length) = 0; + virtual bool set_document_data(const std::string& mtype, + const char *data_ptr, + unsigned int data_length) = 0; /** (Re)initializes the filter with the given data. * Call next_document() to position the filter onto the first document. * Returns false if this input is not supported or an error occured. */ - virtual bool set_document_string(const std::string &data_str) = 0; + virtual bool set_document_string(const std::string& mtype, + const std::string &data_str) = 0; /** (Re)initializes the filter with the given file. * Call next_document() to position the filter onto the first document. * Returns false if this input is not supported or an error occured. */ - virtual bool set_document_file(const std::string &file_path) = 0; + virtual bool set_document_file(const std::string& mtype, + const std::string &file_path) = 0; /** (Re)initializes the filter with the given URI. * Call next_document() to position the filter onto the first document. * Returns false if this input is not supported or an error occured. */ - virtual bool set_document_uri(const std::string &uri) = 0; + virtual bool set_document_uri(const std::string& mtype, + const std::string &uri) = 0; /** Set the document size meta_data element. This is the size of the immediate containing file (ie, a .doc, a .odt), not diff --git a/src/internfile/internfile.cpp b/src/internfile/internfile.cpp index a48d206c..177c22d6 100644 --- a/src/internfile/internfile.cpp +++ b/src/internfile/internfile.cpp @@ -263,7 +263,7 @@ void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf, // Look for appropriate handler (might still return empty) m_mimetype = l_mime; - Dijon::Filter *df = getMimeHandler(l_mime, m_cfg, !m_forPreview); + RecollFilter *df = getMimeHandler(l_mime, m_cfg, !m_forPreview); if (!df or df->is_unknown()) { // No real handler for this type, for now :( @@ -284,7 +284,7 @@ void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf, #endif //RCL_USE_XATTR df->set_docsize(docsize); - if (!df->set_document_file(m_fn)) { + if (!df->set_document_file(l_mime, m_fn)) { delete df; LOGERR(("FileInterner:: error converting %s\n", m_fn.c_str())); return; @@ -315,7 +315,7 @@ void FileInterner::init(const string &data, RclConfig *cnf, m_mimetype = imime; // Look for appropriate handler (might still return empty) - Dijon::Filter *df = getMimeHandler(m_mimetype, m_cfg, !m_forPreview); + RecollFilter *df = getMimeHandler(m_mimetype, m_cfg, !m_forPreview); if (!df) { // No handler for this type, for now :( if indexallfilenames @@ -329,13 +329,13 @@ void FileInterner::init(const string &data, RclConfig *cnf, bool result = false; df->set_docsize(data.length()); if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) { - result = df->set_document_string(data); + result = df->set_document_string(m_mimetype, data); } else if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_DATA)) { - result = df->set_document_data(data.c_str(), data.length()); + result = df->set_document_data(m_mimetype, data.c_str(), data.length()); } else if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_FILE_NAME)) { TempFile temp = dataToTempFile(data, m_mimetype); if (temp.isNotNull() && - (result = df->set_document_file(temp->filename()))) { + (result = df->set_document_file(m_mimetype, temp->filename()))) { m_tmpflgs[m_handlers.size()] = true; m_tempfiles.push_back(temp); } @@ -406,7 +406,7 @@ bool FileInterner::makesig(RclConfig *cnf, const Rcl::Doc& idoc, string& sig) FileInterner::~FileInterner() { - for (vector::iterator it = m_handlers.begin(); + for (vector::iterator it = m_handlers.begin(); it != m_handlers.end(); it++) { returnMimeHandler(*it); } @@ -548,7 +548,7 @@ static inline bool getKeyValue(const map& docdata, bool FileInterner::dijontorcl(Rcl::Doc& doc) { - Dijon::Filter *df = m_handlers.back(); + RecollFilter *df = m_handlers.back(); if (df == 0) { //?? LOGERR(("FileInterner::dijontorcl: null top handler ??\n")); @@ -632,7 +632,7 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const doc.mimetype = m_mimetype; string ipathel; - for (vector::const_iterator hit = m_handlers.begin(); + for (vector::const_iterator hit = m_handlers.begin(); hit != m_handlers.end(); hit++) { const map& docdata = (*hit)->get_meta_data(); if (getKeyValue(docdata, cstr_dj_keyipath, ipathel)) { @@ -714,7 +714,7 @@ int FileInterner::addHandler() return ADD_CONTINUE; } - Dijon::Filter *newflt = getMimeHandler(mimetype, m_cfg); + RecollFilter *newflt = getMimeHandler(mimetype, m_cfg); if (!newflt) { // If we can't find a handler, this doc can't be handled // but there can be other ones so we go on @@ -740,13 +740,13 @@ int FileInterner::addHandler() bool setres = false; newflt->set_docsize(txt->length()); if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) { - setres = newflt->set_document_string(*txt); + setres = newflt->set_document_string(mimetype, *txt); } else if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_DATA)) { - setres = newflt->set_document_data(txt->c_str(), txt->length()); + setres = newflt->set_document_data(mimetype,txt->c_str(),txt->length()); } else if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_FILE_NAME)) { TempFile temp = dataToTempFile(*txt, mimetype); if (temp.isNotNull() && - (setres = newflt->set_document_file(temp->filename()))) { + (setres = newflt->set_document_file(mimetype, temp->filename()))) { m_tmpflgs[m_handlers.size()] = true; m_tempfiles.push_back(temp); // Hack here, but really helps perfs: if we happen to diff --git a/src/internfile/internfile.h b/src/internfile/internfile.h index 5f43b0d9..d11ead80 100644 --- a/src/internfile/internfile.h +++ b/src/internfile/internfile.h @@ -28,7 +28,7 @@ using std::vector; using std::map; using std::set; -#include "Filter.h" +#include "mimehandler.h" #include "uncomp.h" #include "pathut.h" @@ -262,7 +262,7 @@ class FileInterner { // Filter stack, path to the current document from which we're // fetching subdocs - vector m_handlers; + vector m_handlers; // Temporary files used for decoding the current stack bool m_tmpflgs[MAXHANDLERS]; vector m_tempfiles; diff --git a/src/internfile/mh_exec.cpp b/src/internfile/mh_exec.cpp index fd271fa4..77f3f0b3 100644 --- a/src/internfile/mh_exec.cpp +++ b/src/internfile/mh_exec.cpp @@ -14,6 +14,14 @@ * Free Software Foundation, Inc., * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include "autoconfig.h" + +#include +#include + +#include +using namespace std; + #include "cstr.h" #include "execmd.h" #include "mh_exec.h" @@ -24,13 +32,6 @@ #include "md5.h" #include "rclconfig.h" -#include -#include - -#ifndef NO_NAMESPACES -using namespace std; -#endif /* NO_NAMESPACES */ - // This is called periodically by ExeCmd when it is waiting for data, // or when it does receive some. We may choose to interrupt the // command. diff --git a/src/internfile/mh_exec.h b/src/internfile/mh_exec.h index 4c5a2654..0f9be27c 100644 --- a/src/internfile/mh_exec.h +++ b/src/internfile/mh_exec.h @@ -56,11 +56,11 @@ class MimeHandlerExec : public RecollFilter { bool missingHelper; //////////////// - MimeHandlerExec(RclConfig *cnf, const string& mt) - : RecollFilter(cnf, mt), missingHelper(false) + MimeHandlerExec(RclConfig *cnf, const string& id) + : RecollFilter(cnf, id), missingHelper(false) {} - virtual bool set_document_file(const string &file_path) { - RecollFilter::set_document_file(file_path); + virtual bool set_document_file(const string& mt, const string &file_path) { + RecollFilter::set_document_file(mt, file_path); m_fn = file_path; m_havedoc = true; return true; diff --git a/src/internfile/mh_execm.h b/src/internfile/mh_execm.h index 56be0615..c2b29242 100644 --- a/src/internfile/mh_execm.h +++ b/src/internfile/mh_execm.h @@ -102,14 +102,14 @@ class MimeHandlerExecMultiple : public MimeHandlerExec { /////// End un-cleared stuff. public: - MimeHandlerExecMultiple(RclConfig *cnf, const string& mt) - : MimeHandlerExec(cnf, mt) + MimeHandlerExecMultiple(RclConfig *cnf, const string& id) + : MimeHandlerExec(cnf, id) {} // No resources to clean up, the ExecCmd destructor does it. virtual ~MimeHandlerExecMultiple() {} - virtual bool set_document_file(const string &file_path) { + virtual bool set_document_file(const string& mt, const string &file_path) { m_filefirst = true; - return MimeHandlerExec::set_document_file(file_path); + return MimeHandlerExec::set_document_file(mt, file_path); } virtual bool next_document(); diff --git a/src/internfile/mh_html.cpp b/src/internfile/mh_html.cpp index 3ee209d6..c16a8009 100644 --- a/src/internfile/mh_html.cpp +++ b/src/internfile/mh_html.cpp @@ -34,21 +34,23 @@ using namespace std; #endif /* NO_NAMESPACES */ -bool MimeHandlerHtml::set_document_file(const string &fn) +bool MimeHandlerHtml::set_document_file(const string& mt, const string &fn) { LOGDEB0(("textHtmlToDoc: %s\n", fn.c_str())); - RecollFilter::set_document_file(fn); + RecollFilter::set_document_file(mt, fn); string otext; if (!file_to_string(fn, otext)) { LOGINFO(("textHtmlToDoc: cant read: %s\n", fn.c_str())); return false; } m_filename = fn; - return set_document_string(otext); + return set_document_string(mt, otext); } -bool MimeHandlerHtml::set_document_string(const string& htext) +bool MimeHandlerHtml::set_document_string(const string& mt, + const string& htext) { + RecollFilter::set_document_string(mt, htext); m_html = htext; m_havedoc = true; diff --git a/src/internfile/mh_html.h b/src/internfile/mh_html.h index f6f1ff90..d6335413 100644 --- a/src/internfile/mh_html.h +++ b/src/internfile/mh_html.h @@ -26,11 +26,15 @@ */ class MimeHandlerHtml : public RecollFilter { public: - MimeHandlerHtml(RclConfig *cnf, const string& mt) - : RecollFilter(cnf, mt) {} - virtual ~MimeHandlerHtml() {} - virtual bool set_document_file(const string &file_path); - virtual bool set_document_string(const string &data); + MimeHandlerHtml(RclConfig *cnf, const string& id) + : RecollFilter(cnf, id) + { + } + virtual ~MimeHandlerHtml() + { + } + virtual bool set_document_file(const string& mt, const string &file_path); + virtual bool set_document_string(const string& mt, const string &data); virtual bool is_data_input_ok(DataInput input) const { if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING) return true; diff --git a/src/internfile/mh_mail.cpp b/src/internfile/mh_mail.cpp index ce327cd0..84268536 100644 --- a/src/internfile/mh_mail.cpp +++ b/src/internfile/mh_mail.cpp @@ -46,8 +46,8 @@ using namespace std; static const int maxdepth = 20; static const string cstr_mail_charset("charset"); -MimeHandlerMail::MimeHandlerMail(RclConfig *cnf, const string &mt) - : RecollFilter(cnf, mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1) +MimeHandlerMail::MimeHandlerMail(RclConfig *cnf, const string &id) + : RecollFilter(cnf, id), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1) { // Look for additional headers to be processed as per config: @@ -85,10 +85,10 @@ void MimeHandlerMail::clear() RecollFilter::clear(); } -bool MimeHandlerMail::set_document_file(const string &fn) +bool MimeHandlerMail::set_document_file(const string& mt, const string &fn) { LOGDEB(("MimeHandlerMail::set_document_file(%s)\n", fn.c_str())); - RecollFilter::set_document_file(fn); + RecollFilter::set_document_file(mt, fn); if (m_fd >= 0) { close(m_fd); m_fd = -1; @@ -123,10 +123,12 @@ bool MimeHandlerMail::set_document_file(const string &fn) return true; } -bool MimeHandlerMail::set_document_string(const string &msgtxt) +bool MimeHandlerMail::set_document_string(const string& mt, + const string &msgtxt) { LOGDEB1(("MimeHandlerMail::set_document_string\n")); LOGDEB2(("Message text: [%s]\n", msgtxt.c_str())); + RecollFilter::set_document_string(mt, msgtxt); delete m_stream; if (!m_forPreview) { @@ -614,11 +616,11 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth) // Handle html stripping and transcoding to utf8 if (!stringlowercmp("text/html", content_type.value)) { - MimeHandlerHtml mh(m_config, "text/html"); + MimeHandlerHtml mh(m_config, "1234"); mh.set_property(Dijon::Filter::OPERATING_MODE, m_forPreview ? "view" : "index"); mh.set_property(Dijon::Filter::DEFAULT_CHARSET, charset); - mh.set_document_string(body); + mh.set_document_string("text/html", body); mh.next_document(); map::const_iterator it = mh.get_meta_data().find(cstr_dj_keycontent); diff --git a/src/internfile/mh_mail.h b/src/internfile/mh_mail.h index 1808af66..0a2e93d5 100644 --- a/src/internfile/mh_mail.h +++ b/src/internfile/mh_mail.h @@ -39,10 +39,10 @@ class MHMailAttach; */ class MimeHandlerMail : public RecollFilter { public: - MimeHandlerMail(RclConfig *cnf, const string &mt); + MimeHandlerMail(RclConfig *cnf, const string &id); virtual ~MimeHandlerMail(); - virtual bool set_document_file(const string& file_path); - virtual bool set_document_string(const string& data); + virtual bool set_document_file(const string& mt, const string& file_path); + virtual bool set_document_string(const string& mt, const string& data); virtual bool is_data_input_ok(DataInput input) const { if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING) return true; diff --git a/src/internfile/mh_mbox.cpp b/src/internfile/mh_mbox.cpp index ff1c05a9..fbdb0b52 100644 --- a/src/internfile/mh_mbox.cpp +++ b/src/internfile/mh_mbox.cpp @@ -251,10 +251,10 @@ void MimeHandlerMbox::clear() RecollFilter::clear(); } -bool MimeHandlerMbox::set_document_file(const string &fn) +bool MimeHandlerMbox::set_document_file(const string& mt, const string &fn) { LOGDEB(("MimeHandlerMbox::set_document_file(%s)\n", fn.c_str())); - RecollFilter::set_document_file(fn); + RecollFilter::set_document_file(mt, fn); m_fn = fn; if (m_vfp) { fclose((FILE *)m_vfp); @@ -598,8 +598,8 @@ int main(int argc, char **argv) exit(1); } config->setKeyDir(path_getfather(filename)); - MimeHandlerMbox mh(config, "text/x-mail"); - if (!mh.set_document_file(filename)) { + MimeHandlerMbox mh(config, "some_id"); + if (!mh.set_document_file("text/x-mail", filename)) { cerr << "set_document_file failed" << endl; exit(1); } diff --git a/src/internfile/mh_mbox.h b/src/internfile/mh_mbox.h index 9ad4a03a..7238fee2 100644 --- a/src/internfile/mh_mbox.h +++ b/src/internfile/mh_mbox.h @@ -31,12 +31,12 @@ using std::vector; */ class MimeHandlerMbox : public RecollFilter { public: - MimeHandlerMbox(RclConfig *cnf, const string& mime) - : RecollFilter(cnf, mime), m_vfp(0), m_msgnum(0), + MimeHandlerMbox(RclConfig *cnf, const string& id) + : RecollFilter(cnf, id), m_vfp(0), m_msgnum(0), m_lineno(0), m_fsize(0) {} virtual ~MimeHandlerMbox(); - virtual bool set_document_file(const string &file_path); + virtual bool set_document_file(const string& mt, const string &file_path); virtual bool next_document(); virtual bool skip_to_document(const string& ipath) { m_ipath = ipath; diff --git a/src/internfile/mh_symlink.h b/src/internfile/mh_symlink.h index 8ce2dbea..f144ecf6 100644 --- a/src/internfile/mh_symlink.h +++ b/src/internfile/mh_symlink.h @@ -35,12 +35,16 @@ */ class MimeHandlerSymlink : public RecollFilter { public: - MimeHandlerSymlink(RclConfig *cnf, const std::string& mt) - : RecollFilter(cnf, mt) {} - virtual ~MimeHandlerSymlink() {} - virtual bool set_document_file(const string& fn) + MimeHandlerSymlink(RclConfig *cnf, const std::string& id) + : RecollFilter(cnf, id) { - RecollFilter::set_document_file(fn); + } + virtual ~MimeHandlerSymlink() + { + } + virtual bool set_document_file(const string& mt, const string& fn) + { + RecollFilter::set_document_file(mt, fn); m_fn = fn; return m_havedoc = true; } diff --git a/src/internfile/mh_text.cpp b/src/internfile/mh_text.cpp index 04e7a832..6f2fa7fa 100644 --- a/src/internfile/mh_text.cpp +++ b/src/internfile/mh_text.cpp @@ -39,11 +39,11 @@ const int MB = 1024*1024; const int KB = 1024; // Process a plain text file -bool MimeHandlerText::set_document_file(const string &fn) +bool MimeHandlerText::set_document_file(const string& mt, const string &fn) { LOGDEB(("MimeHandlerText::set_document_file: [%s]\n", fn.c_str())); - RecollFilter::set_document_file(fn); + RecollFilter::set_document_file(mt, fn); m_fn = fn; // file size for oversize check @@ -91,8 +91,9 @@ bool MimeHandlerText::set_document_file(const string &fn) return true; } -bool MimeHandlerText::set_document_string(const string& otext) +bool MimeHandlerText::set_document_string(const string& mt, const string& otext) { + RecollFilter::set_document_string(mt, otext); m_text = otext; if (!m_forPreview) { string md5, xmd5; diff --git a/src/internfile/mh_text.h b/src/internfile/mh_text.h index 800822fe..1861ee76 100644 --- a/src/internfile/mh_text.h +++ b/src/internfile/mh_text.h @@ -30,11 +30,15 @@ using std::string; */ class MimeHandlerText : public RecollFilter { public: - MimeHandlerText(RclConfig *cnf, const string& mt) - : RecollFilter(cnf, mt), m_paging(false), m_offs(0) {} - virtual ~MimeHandlerText() {} - virtual bool set_document_file(const string &file_path); - virtual bool set_document_string(const string&); + MimeHandlerText(RclConfig *cnf, const string& id) + : RecollFilter(cnf, id), m_paging(false), m_offs(0) + { + } + virtual ~MimeHandlerText() + { + } + virtual bool set_document_file(const string& mt, const string &file_path); + virtual bool set_document_string(const string&, const string&); virtual bool is_data_input_ok(DataInput input) const { if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING) return true; diff --git a/src/internfile/mh_unknown.h b/src/internfile/mh_unknown.h index 206b98ae..b84ee4f0 100644 --- a/src/internfile/mh_unknown.h +++ b/src/internfile/mh_unknown.h @@ -28,14 +28,20 @@ */ class MimeHandlerUnknown : public RecollFilter { public: - MimeHandlerUnknown(RclConfig *cnf, const string& mt) - : RecollFilter(cnf, mt) {} - virtual ~MimeHandlerUnknown() {} - virtual bool set_document_file(const string& fn) { - RecollFilter::set_document_file(fn); + MimeHandlerUnknown(RclConfig *cnf, const string& id) + : RecollFilter(cnf, id) + { + } + virtual ~MimeHandlerUnknown() + { + } + virtual bool set_document_file(const string& mt, const string& fn) + { + RecollFilter::set_document_file(mt, fn); return m_havedoc = true; } - virtual bool set_document_string(const string&) { + virtual bool set_document_string(const string& mt, const string& s) { + RecollFilter::set_document_string(mt, s); return m_havedoc = true; } virtual bool next_document() { diff --git a/src/internfile/mimehandler.cpp b/src/internfile/mimehandler.cpp index 5f33e759..3b2b2681 100644 --- a/src/internfile/mimehandler.cpp +++ b/src/internfile/mimehandler.cpp @@ -30,6 +30,7 @@ using namespace std; #include "debuglog.h" #include "rclconfig.h" #include "smallut.h" +#include "md5.h" #include "mh_exec.h" #include "mh_execm.h" @@ -45,24 +46,26 @@ using namespace std; // handlers. There can be several instances for a given mime type // (think email attachment in email message: 2 rfc822 handlers are // needed simulteanously) -static multimap o_handlers; -static list::iterator> o_hlru; -typedef list::iterator>::iterator hlruit_tp; +static multimap o_handlers; +static list::iterator> o_hlru; +typedef list::iterator>::iterator hlruit_tp; static PTMutexInit o_handlers_mutex; static const unsigned int max_handlers_cache_size = 100; /* Look for mime handler in pool */ -static Dijon::Filter *getMimeHandlerFromCache(const string& key) +static RecollFilter *getMimeHandlerFromCache(const string& key) { PTMutexLocker locker(o_handlers_mutex); + string xdigest; + MD5HexPrint(key, xdigest); LOGDEB(("getMimeHandlerFromCache: %s cache size %u\n", - key.c_str(), o_handlers.size())); + xdigest.c_str(), o_handlers.size())); - multimap::iterator it = o_handlers.find(key); + multimap::iterator it = o_handlers.find(key); if (it != o_handlers.end()) { - Dijon::Filter *h = it->second; + RecollFilter *h = it->second; hlruit_tp it1 = find(o_hlru.begin(), o_hlru.end(), it); if (it1 != o_hlru.end()) { o_hlru.erase(it1); @@ -71,20 +74,22 @@ static Dijon::Filter *getMimeHandlerFromCache(const string& key) } o_handlers.erase(it); LOGDEB(("getMimeHandlerFromCache: %s found size %u\n", - key.c_str(), o_handlers.size())); + xdigest.c_str(), o_handlers.size())); return h; } - LOGDEB(("getMimeHandlerFromCache: %s not found\n", key.c_str())); + LOGDEB(("getMimeHandlerFromCache: %s not found\n", xdigest.c_str())); return 0; } /* Return mime handler to pool */ -void returnMimeHandler(Dijon::Filter *handler) +void returnMimeHandler(RecollFilter *handler) { - typedef multimap::value_type value_type; + typedef multimap::value_type value_type; - if (handler==0) + if (handler == 0) { + LOGERR(("returnMimeHandler: bad parameter\n")); return; + } handler->clear(); PTMutexLocker locker(o_handlers_mutex); @@ -97,7 +102,7 @@ void returnMimeHandler(Dijon::Filter *handler) // at the same time either because it occurs several times in a // stack (ie mail attachment to mail), or because several threads // are processing the same mime type at the same time. - multimap::iterator it; + multimap::iterator it; if (o_handlers.size() >= max_handlers_cache_size) { static int once = 1; if (once) { @@ -114,15 +119,15 @@ void returnMimeHandler(Dijon::Filter *handler) o_handlers.erase(it); } } - it = o_handlers.insert(value_type(handler->get_mime_type(), handler)); + it = o_handlers.insert(value_type(handler->get_id(), handler)); o_hlru.push_front(it); } void clearMimeHandlerCache() { LOGDEB(("clearMimeHandlerCache()\n")); - typedef multimap::value_type value_type; - map::iterator it; + typedef multimap::value_type value_type; + map::iterator it; PTMutexLocker locker(o_handlers_mutex); for (it = o_handlers.begin(); it != o_handlers.end(); it++) { delete it->second; @@ -132,26 +137,32 @@ void clearMimeHandlerCache() /** For mime types set as "internal" in mimeconf: * create appropriate handler object. */ -static Dijon::Filter *mhFactory(RclConfig *config, const string &mime) +static RecollFilter *mhFactory(RclConfig *config, const string &mime, + bool nobuild, string& id) { LOGDEB2(("mhFactory(%s)\n", mime.c_str())); string lmime(mime); stringtolower(lmime); if (cstr_textplain == lmime) { LOGDEB2(("mhFactory(%s): returning MimeHandlerText\n", mime.c_str())); - return new MimeHandlerText(config, lmime); + MD5String("MimeHandlerText", id); + return nobuild ? 0 : new MimeHandlerText(config, id); } else if ("text/html" == lmime) { LOGDEB2(("mhFactory(%s): returning MimeHandlerHtml\n", mime.c_str())); - return new MimeHandlerHtml(config, lmime); + MD5String("MimeHandlerHtml", id); + return nobuild ? 0 : new MimeHandlerHtml(config, id); } else if ("text/x-mail" == lmime) { LOGDEB2(("mhFactory(%s): returning MimeHandlerMbox\n", mime.c_str())); - return new MimeHandlerMbox(config, lmime); + MD5String("MimeHandlerMbox", id); + return nobuild ? 0 : new MimeHandlerMbox(config, id); } else if ("message/rfc822" == lmime) { LOGDEB2(("mhFactory(%s): returning MimeHandlerMail\n", mime.c_str())); - return new MimeHandlerMail(config, lmime); + MD5String("MimeHandlerMail", id); + return nobuild ? 0 : new MimeHandlerMail(config, id); } else if ("inode/symlink" == lmime) { LOGDEB2(("mhFactory(%s): ret MimeHandlerSymlink\n", mime.c_str())); - return new MimeHandlerSymlink(config, lmime); + MD5String("MimeHandlerSymlink", id); + return nobuild ? 0 : new MimeHandlerSymlink(config, id); } else if (lmime.find("text/") == 0) { // Try to handle unknown text/xx as text/plain. This // only happen if the text/xx was defined as "internal" in @@ -159,14 +170,16 @@ static Dijon::Filter *mhFactory(RclConfig *config, const string &mime) // allows indexing and previewing as text/plain (no filter // exec) but still opening with a specific editor. LOGDEB2(("mhFactory(%s): returning MimeHandlerText(x)\n",mime.c_str())); - return new MimeHandlerText(config, lmime); + MD5String("MimeHandlerText", id); + return nobuild ? 0 : new MimeHandlerText(config, id); } else { // We should not get there. It means that "internal" was set // as a handler in mimeconf for a mime type we actually can't // handle. LOGERR(("mhFactory: mime type [%s] set as internal but unknown\n", lmime.c_str())); - return new MimeHandlerUnknown(config, lmime); + MD5String("MimeHandlerUnknown", id); + return nobuild ? 0 : new MimeHandlerUnknown(config, id); } } @@ -181,10 +194,11 @@ static const string cstr_mh_charset("charset"); * a ';' inside a quoted string for now. Can't see a use for it. */ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs, - bool multiple) + bool multiple, const string& id) { ConfSimple attrs; string cmdstr; + if (!cfg->valueSplitAttributes(hs, cmdstr, attrs)) { LOGERR(("mhExecFactory: bad config line for [%s]: [%s]\n", mtype.c_str(), hs.c_str())); @@ -200,8 +214,8 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs, return 0; } MimeHandlerExec *h = multiple ? - new MimeHandlerExecMultiple(cfg, mtype.c_str()) : - new MimeHandlerExec(cfg, mtype.c_str()); + new MimeHandlerExecMultiple(cfg, id) : + new MimeHandlerExec(cfg, id); list::iterator it = cmdtoks.begin(); h->params.push_back(cfg->findFilter(*it++)); h->params.insert(h->params.end(), it, cmdtoks.end()); @@ -228,32 +242,27 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs, } /* Get handler/filter object for given mime type: */ -Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg, +RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg, bool filtertypes) { LOGDEB(("getMimeHandler: mtype [%s] filtertypes %d\n", mtype.c_str(), filtertypes)); - Dijon::Filter *h = 0; + RecollFilter *h = 0; // Get handler definition for mime type. We do this even if an - // appropriate handler object may be in the cache (indexed by mime - // type). This is fast, and necessary to conform to the + // appropriate handler object may be in the cache. + // This is fast, and necessary to conform to the // configuration, (ie: text/html might be filtered out by // indexedmimetypes but an html handler could still be in the // cache because it was needed by some other interning stack). string hs; hs = cfg->getMimeHandlerDef(mtype, filtertypes); + string id; - if (!hs.empty()) { // Got a handler definition line - - // Do we already have a handler object in the cache ? - h = getMimeHandlerFromCache(mtype); - if (h != 0) - goto out; - LOGDEB2(("getMimeHandler: %s not in cache\n", mtype.c_str())); - - // Not in cache. Break definition into type and name/command - // string and instanciate handler object + if (!hs.empty()) { + // Got a handler definition line + // Break definition into type (internal/exec/execm) + // and name/command string string::size_type b1 = hs.find_first_of(" \t"); string handlertype = hs.substr(0, b1); string cmdstr; @@ -261,7 +270,30 @@ Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg, cmdstr = hs.substr(b1); trimstring(cmdstr); } - if (!stringlowercmp("internal", handlertype)) { + bool internal = !stringlowercmp("internal", handlertype); + if (internal) { + // For internal types let the factory compute the id + mhFactory(cfg, cmdstr.empty() ? mtype : cmdstr, true, id); + } else { + // exec/execm: use the md5 of the def line + MD5String(hs, id); + } + +#if 0 + { // string xdigest; LOGDEB2(("getMimeHandler: [%s] hs [%s] id [%s]\n", + //mtype.c_str(), hs.c_str(), MD5HexPrint(id, xdigest).c_str())); + } +#endif + + // Do we already have a handler object in the cache ? + h = getMimeHandlerFromCache(id); + if (h != 0) + goto out; + + LOGDEB2(("getMimeHandler: %s not in cache\n", mtype.c_str())); + + // Not in cache. + if (internal) { // If there is a parameter after "internal" it's the mime // type to use. This is so that we can have bogus mime // types like text/x-purple-html-log (for ie: specific @@ -270,14 +302,7 @@ Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg, // better and the latter will probably go away at some // point in the future. LOGDEB2(("handlertype internal, cmdstr [%s]\n", cmdstr.c_str())); - if (!cmdstr.empty()) { - // Have to redo the cache thing. Maybe we should - // rather just recurse instead ? - if ((h = getMimeHandlerFromCache(cmdstr)) == 0) - h = mhFactory(cfg, cmdstr); - } else { - h = mhFactory(cfg, mtype); - } + h = mhFactory(cfg, cmdstr.empty() ? mtype : cmdstr, false, id); goto out; } else if (!stringlowercmp("dll", handlertype)) { } else { @@ -287,10 +312,10 @@ Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg, goto out; } if (!stringlowercmp("exec", handlertype)) { - h = mhExecFactory(cfg, mtype, cmdstr, false); + h = mhExecFactory(cfg, mtype, cmdstr, false, id); goto out; } else if (!stringlowercmp("execm", handlertype)) { - h = mhExecFactory(cfg, mtype, cmdstr, true); + h = mhExecFactory(cfg, mtype, cmdstr, true, id); goto out; } else { LOGERR(("getMimeHandler: bad line for %s: %s\n", @@ -305,20 +330,20 @@ Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg, // Finally, unhandled files are either ignored or their name and // generic metadata is indexed, depending on configuration - {bool indexunknown = false; + { + bool indexunknown = false; cfg->getConfParam("indexallfilenames", &indexunknown); if (indexunknown) { - if ((h = getMimeHandlerFromCache("application/octet-stream")) == 0) - h = new MimeHandlerUnknown(cfg, "application/octet-stream"); - goto out; - } else { - goto out; + MD5String("MimeHandlerUnknown", id); + if ((h = getMimeHandlerFromCache(id)) == 0) + h = new MimeHandlerUnknown(cfg, id); } + goto out; } out: if (h) { - h->set_property(Dijon::Filter::DEFAULT_CHARSET, cfg->getDefCharset()); + h->set_property(RecollFilter::DEFAULT_CHARSET, cfg->getDefCharset()); // In multithread context, and in case this handler is out // from the cache, it may have a config pointer belonging to // another thread. Fix it. diff --git a/src/internfile/mimehandler.h b/src/internfile/mimehandler.h index 03ec039e..c85ec0ba 100644 --- a/src/internfile/mimehandler.h +++ b/src/internfile/mimehandler.h @@ -21,26 +21,23 @@ #include #include -#include -using std::string; -using std::list; -#include +#include "Filter.h" +#include "cstr.h" class RclConfig; class RecollFilter : public Dijon::Filter { public: - RecollFilter(RclConfig *config, const string& mtype) - : Dijon::Filter(mtype), m_config(config), - m_forPreview(false), m_havedoc(false) + RecollFilter(RclConfig *config, const std::string& id) + : m_config(config), m_forPreview(false), m_havedoc(false), m_id(id) {} virtual ~RecollFilter() {} virtual void setConfig(RclConfig *config) { m_config = config; } - virtual bool set_property(Properties p, const string &v) { + virtual bool set_property(Properties p, const std::string &v) { switch (p) { case DJF_UDI: m_udi = v; @@ -59,7 +56,12 @@ public: } // We don't use this for now - virtual bool set_document_uri(const std::string &) {return false;} + virtual bool set_document_uri(const std::string& mtype, + const std::string &) + { + m_mimeType = mtype; + return false; + } // This does nothing right now but should be called from the // subclass method in case we need some common processing one day @@ -69,12 +71,24 @@ public: // having a pure virtual called from here and implemented in the // subclass) would have to be repeated in each derived class. It's // just simpler this way. - virtual bool set_document_file(const string & /*file_path*/) {return true;} + virtual bool set_document_file(const std::string& mtype, + const std::string & /*file_path*/) + { + m_mimeType = mtype; + return true; + } // Default implementations - virtual bool set_document_string(const std::string &) {return false;} - virtual bool set_document_data(const char *cp, unsigned int sz) { - return set_document_string(string(cp, sz)); + virtual bool set_document_string(const std::string& mtype, + const std::string &) + { + m_mimeType = mtype; + return false; + } + virtual bool set_document_data(const std::string& mtype, + const char *cp, unsigned int sz) + { + return set_document_string(mtype, std::string(cp, sz)); } virtual void set_docsize(size_t size) @@ -87,7 +101,7 @@ public: virtual bool has_documents() const {return m_havedoc;} // Most doc types are single-doc - virtual bool skip_to_document(const string& s) { + virtual bool skip_to_document(const std::string& s) { if (s.empty()) return true; return false; @@ -99,10 +113,15 @@ public: return false; } - virtual string get_error() const { + virtual std::string get_error() const { return m_reason; } + virtual const std::string& get_id() const + { + return m_id; + } + // "Call super" anti-pattern again. Must be called from derived // classes which reimplement clear() virtual void clear() { @@ -114,17 +133,20 @@ public: // This only makes sense if the contents are currently txt/plain // It converts from keyorigcharset to UTF-8 and sets keycharset. - bool txtdcode(const string& who); + bool txtdcode(const std::string& who); protected: bool preview() {return m_forPreview;} RclConfig *m_config; bool m_forPreview; - string m_dfltInputCharset; - string m_reason; + std::string m_dfltInputCharset; + std::string m_reason; bool m_havedoc; - string m_udi; // May be set by creator as a hint + std::string m_udi; // May be set by creator as a hint + // m_id is and md5 of the filter definition line (from mimeconf) and + // is used when fetching/returning filters to / from the cache. + std::string m_id; }; /** @@ -135,11 +157,11 @@ protected: * @param filtertypes decide if we should restrict to types in * indexedmimetypes (if this is set at all). */ -extern Dijon::Filter *getMimeHandler(const std::string &mtyp, RclConfig *cfg, +extern RecollFilter *getMimeHandler(const std::string &mtyp, RclConfig *cfg, bool filtertypes=false); /// Free up filter for reuse (you can also delete it) -extern void returnMimeHandler(Dijon::Filter *); +extern void returnMimeHandler(RecollFilter *); /// Clean up cache at the end of an indexing pass. For people who use /// the GUI to index: avoid all those filter processes forever hanging diff --git a/src/qtgui/preview_w.cpp b/src/qtgui/preview_w.cpp index 4257168a..c1b67d79 100644 --- a/src/qtgui/preview_w.cpp +++ b/src/qtgui/preview_w.cpp @@ -62,10 +62,6 @@ using std::pair; #include "docseqhist.h" #include "rclhelp.h" -#ifndef MIN -#define MIN(A,B) ((A)<(B)?(A):(B)) -#endif - // Subclass plainToRich to add s and anchors to the preview text class PlainToRichQtPreview : public PlainToRich { public: diff --git a/src/query/reslistpager.cpp b/src/query/reslistpager.cpp index f27b8511..7d999f92 100644 --- a/src/query/reslistpager.cpp +++ b/src/query/reslistpager.cpp @@ -24,8 +24,10 @@ #include #include +#include using std::ostringstream; using std::endl; +using std::list; #include "cstr.h" #include "reslistpager.h"