add nomd5types parameter to set file types for which dedup is not that useful and computation is expensive (e.g. audio files). Replace "call parent" misfeature with call to virtual in MimeHandler constructor. Fix log calls indent
This commit is contained in:
parent
fea8ff6e41
commit
b55f4b3b0a
@ -34,14 +34,6 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
MimeHandlerExec::MimeHandlerExec(RclConfig *cnf, const std::string& id)
|
||||
: RecollFilter(cnf, id), missingHelper(false), m_filtermaxseconds(900),
|
||||
m_filtermaxmbytes(0)
|
||||
{
|
||||
m_config->getConfParam("filtermaxseconds", &m_filtermaxseconds);
|
||||
m_config->getConfParam("filtermaxmbytes", &m_filtermaxmbytes);
|
||||
}
|
||||
|
||||
MEAdv::MEAdv(int maxsecs)
|
||||
: m_filtermaxseconds(maxsecs)
|
||||
{
|
||||
@ -55,10 +47,11 @@ void MEAdv::reset()
|
||||
|
||||
void MEAdv::newData(int n)
|
||||
{
|
||||
LOGDEB2("MHExec:newData(" << (n) << ")\n" );
|
||||
LOGDEB2("MHExec:newData(" << n << ")\n");
|
||||
if (m_filtermaxseconds > 0 &&
|
||||
time(0L) - m_start > m_filtermaxseconds) {
|
||||
LOGERR("MimeHandlerExec: filter timeout (" << (m_filtermaxseconds) << " S)\n" );
|
||||
LOGERR("MimeHandlerExec: filter timeout (" << m_filtermaxseconds <<
|
||||
" S)\n");
|
||||
throw HandlerTimeout();
|
||||
}
|
||||
// If a cancel request was set by the signal handler (or by us
|
||||
@ -67,9 +60,65 @@ void MEAdv::newData(int n)
|
||||
CancelCheck::instance().checkCancel();
|
||||
}
|
||||
|
||||
|
||||
MimeHandlerExec::MimeHandlerExec(RclConfig *cnf, const std::string& id)
|
||||
: RecollFilter(cnf, id), missingHelper(false), m_filtermaxseconds(900),
|
||||
m_filtermaxmbytes(0), m_handlernomd5(false), m_hnomd5init(false),
|
||||
m_nomd5(false)
|
||||
{
|
||||
m_config->getConfParam("filtermaxseconds", &m_filtermaxseconds);
|
||||
m_config->getConfParam("filtermaxmbytes", &m_filtermaxmbytes);
|
||||
}
|
||||
|
||||
bool MimeHandlerExec::set_document_file_impl(const std::string& mt,
|
||||
const std::string &file_path)
|
||||
{
|
||||
// Can't do this in constructor as script name not set yet. Do it
|
||||
// once on first call
|
||||
unordered_set<string> nomd5tps;
|
||||
bool tpsread(false);
|
||||
|
||||
if (false == m_hnomd5init) {
|
||||
m_hnomd5init = true;
|
||||
if (m_config->getConfParam("nomd5types", &nomd5tps)) {
|
||||
tpsread = true;
|
||||
if (!nomd5tps.empty()) {
|
||||
if (params.size() &&
|
||||
nomd5tps.find(path_getsimple(params[0])) !=
|
||||
nomd5tps.end()) {
|
||||
m_handlernomd5 = true;
|
||||
}
|
||||
// On windows the 1st param is often a script interp
|
||||
// name (e.g. "python", and the script name is 2nd
|
||||
if (params.size() > 1 &&
|
||||
nomd5tps.find(path_getsimple(params[1])) !=
|
||||
nomd5tps.end()) {
|
||||
m_handlernomd5 = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_nomd5 = m_handlernomd5;
|
||||
|
||||
if (!m_nomd5) {
|
||||
// Check for MIME type based md5 suppression
|
||||
if (!tpsread) {
|
||||
m_config->getConfParam("nomd5types", &nomd5tps);
|
||||
}
|
||||
if (nomd5tps.find(mt) != nomd5tps.end()) {
|
||||
m_nomd5 = true;
|
||||
}
|
||||
}
|
||||
|
||||
m_fn = file_path;
|
||||
m_havedoc = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MimeHandlerExec::skip_to_document(const string& ipath)
|
||||
{
|
||||
LOGDEB("MimeHandlerExec:skip_to_document: [" << (ipath) << "]\n" );
|
||||
LOGDEB("MimeHandlerExec:skip_to_document: [" << ipath << "]\n");
|
||||
m_ipath = ipath;
|
||||
return true;
|
||||
}
|
||||
@ -82,13 +131,13 @@ bool MimeHandlerExec::next_document()
|
||||
return false;
|
||||
m_havedoc = false;
|
||||
if (missingHelper) {
|
||||
LOGDEB("MimeHandlerExec::next_document(): helper known missing\n" );
|
||||
LOGDEB("MimeHandlerExec::next_document(): helper known missing\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (params.empty()) {
|
||||
// Hu ho
|
||||
LOGERR("MimeHandlerExec::mkDoc: empty params\n" );
|
||||
LOGERR("MimeHandlerExec::next_document: empty params\n");
|
||||
m_reason = "RECFILTERROR BADCONFIG";
|
||||
return false;
|
||||
}
|
||||
@ -110,7 +159,7 @@ bool MimeHandlerExec::next_document()
|
||||
mexec.setAdvise(&adv);
|
||||
mexec.putenv("RECOLL_CONFDIR", m_config->getConfDir());
|
||||
mexec.putenv(m_forPreview ? "RECOLL_FILTER_FORPREVIEW=yes" :
|
||||
"RECOLL_FILTER_FORPREVIEW=no");
|
||||
"RECOLL_FILTER_FORPREVIEW=no");
|
||||
mexec.setrlimit_as(m_filtermaxmbytes);
|
||||
|
||||
int status;
|
||||
@ -125,7 +174,8 @@ bool MimeHandlerExec::next_document()
|
||||
}
|
||||
|
||||
if (status) {
|
||||
LOGERR("MimeHandlerExec: command status 0x" << (status) << " for " << (cmd) << "\n" );
|
||||
LOGERR("MimeHandlerExec: command status 0x" << status << " for " <<
|
||||
cmd << "\n");
|
||||
if (WIFEXITED(status) && WEXITSTATUS(status) == 127) {
|
||||
// That's how execmd signals a failed exec (most probably
|
||||
// a missing command). Let'hope no filter uses the same value as
|
||||
@ -188,12 +238,13 @@ void MimeHandlerExec::finaldetails()
|
||||
m_metaData[cstr_dj_keymt] = cfgFilterOutputMtype.empty() ? "text/html" :
|
||||
cfgFilterOutputMtype;
|
||||
|
||||
if (!m_forPreview) {
|
||||
if (!m_forPreview && !m_nomd5) {
|
||||
string md5, xmd5, reason;
|
||||
if (MD5File(m_fn, md5, &reason)) {
|
||||
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||
} else {
|
||||
LOGERR("MimeHandlerExec: cant compute md5 for [" << (m_fn) << "]: " << (reason) << "\n" );
|
||||
LOGERR("MimeHandlerExec: cant compute md5 for [" << m_fn << "]: " <<
|
||||
reason << "\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -62,14 +62,6 @@ class MimeHandlerExec : public RecollFilter {
|
||||
|
||||
MimeHandlerExec(RclConfig *cnf, const std::string& id);
|
||||
|
||||
virtual bool set_document_file(const std::string& mt,
|
||||
const std::string &file_path) {
|
||||
RecollFilter::set_document_file(mt, file_path);
|
||||
m_fn = file_path;
|
||||
m_havedoc = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool next_document();
|
||||
virtual bool skip_to_document(const std::string& ipath);
|
||||
|
||||
@ -80,9 +72,17 @@ class MimeHandlerExec : public RecollFilter {
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual bool set_document_file_impl(const std::string& mt,
|
||||
const std::string& file_path);
|
||||
|
||||
std::string m_fn;
|
||||
std::string m_ipath;
|
||||
|
||||
// md5 computation excluded by handler name: can't change after init
|
||||
bool m_handlernomd5;
|
||||
bool m_hnomd5init;
|
||||
// If md5 not excluded by handler name, allow/forbid depending on mime
|
||||
bool m_nomd5;
|
||||
|
||||
// Set up the character set metadata fields and possibly transcode
|
||||
// text/plain output.
|
||||
// @param charset when called from mh_execm, a possible explicit
|
||||
|
||||
@ -178,7 +178,7 @@ bool MimeHandlerExecMultiple::next_document()
|
||||
ostringstream obuf;
|
||||
string file_md5;
|
||||
if (m_filefirst) {
|
||||
if (!m_forPreview) {
|
||||
if (!m_forPreview && !m_nomd5) {
|
||||
string md5, xmd5, reason;
|
||||
if (MD5File(m_fn, md5, &reason)) {
|
||||
file_md5 = MD5HexPrint(md5, xmd5);
|
||||
|
||||
@ -102,22 +102,27 @@ class MimeHandlerExecMultiple : public MimeHandlerExec {
|
||||
/////// End un-cleared stuff.
|
||||
|
||||
public:
|
||||
MimeHandlerExecMultiple(RclConfig *cnf, const string& id)
|
||||
: MimeHandlerExec(cnf, id)
|
||||
{}
|
||||
MimeHandlerExecMultiple(RclConfig *cnf, const std::string& id)
|
||||
: MimeHandlerExec(cnf, id) {
|
||||
}
|
||||
// No resources to clean up, the ExecCmd destructor does it.
|
||||
virtual ~MimeHandlerExecMultiple() {}
|
||||
virtual bool set_document_file(const string& mt, const string &file_path) {
|
||||
m_filefirst = true;
|
||||
return MimeHandlerExec::set_document_file(mt, file_path);
|
||||
}
|
||||
|
||||
virtual bool next_document();
|
||||
|
||||
// skip_to and clear inherited from MimeHandlerExec
|
||||
|
||||
protected:
|
||||
// This is the only 2nd-level derived handler class. Use call-super.
|
||||
virtual bool set_document_file_impl(const std::string& mt,
|
||||
const std::string &file_path) {
|
||||
m_filefirst = true;
|
||||
return MimeHandlerExec::set_document_file_impl(mt, file_path);
|
||||
}
|
||||
|
||||
private:
|
||||
bool startCmd();
|
||||
bool readDataElement(string& name, string& data);
|
||||
bool readDataElement(std::string& name, std::string& data);
|
||||
bool m_filefirst;
|
||||
int m_maxmemberkb;
|
||||
MEAdv m_adv;
|
||||
|
||||
@ -34,23 +34,21 @@ using namespace std;
|
||||
#endif /* NO_NAMESPACES */
|
||||
|
||||
|
||||
bool MimeHandlerHtml::set_document_file(const string& mt, const string &fn)
|
||||
bool MimeHandlerHtml::set_document_file_impl(const string& mt, const string &fn)
|
||||
{
|
||||
LOGDEB0("textHtmlToDoc: " << (fn) << "\n" );
|
||||
RecollFilter::set_document_file(mt, fn);
|
||||
LOGDEB0("textHtmlToDoc: " << fn << "\n");
|
||||
string otext;
|
||||
if (!file_to_string(fn, otext)) {
|
||||
LOGINFO("textHtmlToDoc: cant read: " << (fn) << "\n" );
|
||||
LOGINFO("textHtmlToDoc: cant read: " << fn << "\n");
|
||||
return false;
|
||||
}
|
||||
m_filename = fn;
|
||||
return set_document_string(mt, otext);
|
||||
}
|
||||
|
||||
bool MimeHandlerHtml::set_document_string(const string& mt,
|
||||
const string& htext)
|
||||
bool MimeHandlerHtml::set_document_string_impl(const string& mt,
|
||||
const string& htext)
|
||||
{
|
||||
RecollFilter::set_document_string(mt, htext);
|
||||
m_html = htext;
|
||||
m_havedoc = true;
|
||||
|
||||
@ -73,12 +71,14 @@ bool MimeHandlerHtml::next_document()
|
||||
m_filename.erase();
|
||||
|
||||
string charset = m_dfltInputCharset;
|
||||
LOGDEB("MHHtml::next_doc.: default supposed input charset: [" << (charset) << "]\n" );
|
||||
LOGDEB("MHHtml::next_doc.: default supposed input charset: [" << charset
|
||||
<< "]\n");
|
||||
// Override default input charset if someone took care to set one:
|
||||
map<string,string>::const_iterator it = m_metaData.find(cstr_dj_keycharset);
|
||||
if (it != m_metaData.end() && !it->second.empty()) {
|
||||
charset = it->second;
|
||||
LOGDEB("MHHtml: next_doc.: input charset from ext. metadata: [" << (charset) << "]\n" );
|
||||
LOGDEB("MHHtml: next_doc.: input charset from ext. metadata: [" <<
|
||||
charset << "]\n");
|
||||
}
|
||||
|
||||
// - We first try to convert from the supposed charset
|
||||
@ -91,13 +91,15 @@ bool MimeHandlerHtml::next_document()
|
||||
MyHtmlParser result;
|
||||
for (int pass = 0; pass < 2; pass++) {
|
||||
string transcoded;
|
||||
LOGDEB("Html::mkDoc: pass " << (pass) << "\n" );
|
||||
LOGDEB("Html::mkDoc: pass " << pass << "\n");
|
||||
MyHtmlParser p;
|
||||
|
||||
// Try transcoding. If it fails, use original text.
|
||||
int ecnt;
|
||||
if (!transcode(m_html, transcoded, charset, "UTF-8", &ecnt)) {
|
||||
LOGDEB("textHtmlToDoc: transcode failed from cs '" << (charset) << "' to UTF-8 for[" << (fn.empty()?"unknown":fn) << "]" );
|
||||
LOGDEB("textHtmlToDoc: transcode failed from cs '" <<
|
||||
charset << "' to UTF-8 for[" << (fn.empty()?"unknown":fn) <<
|
||||
"]");
|
||||
transcoded = m_html;
|
||||
// We don't know the charset, at all
|
||||
p.reset_charsets();
|
||||
@ -105,9 +107,11 @@ bool MimeHandlerHtml::next_document()
|
||||
} else {
|
||||
if (ecnt) {
|
||||
if (pass == 0) {
|
||||
LOGDEB("textHtmlToDoc: init transcode had " << (ecnt) << " errors for [" << (fn.empty()?"unknown":fn) << "]\n" );
|
||||
LOGDEB("textHtmlToDoc: init transcode had " << ecnt <<
|
||||
" errors for ["<<(fn.empty()?"unknown":fn)<< "]\n");
|
||||
} else {
|
||||
LOGERR("textHtmlToDoc: final transcode had " << (ecnt) << " errors for [" << (fn.empty()?"unknown":fn) << "]\n" );
|
||||
LOGERR("textHtmlToDoc: final transcode had " << ecnt <<
|
||||
" errors for ["<< (fn.empty()?"unknown":fn)<< "]\n");
|
||||
}
|
||||
}
|
||||
// charset has the putative source charset, transcoded is now
|
||||
@ -145,15 +149,16 @@ bool MimeHandlerHtml::next_document()
|
||||
break;
|
||||
}
|
||||
|
||||
LOGDEB("textHtmlToDoc: charset [" << (charset) << "] doc charset [" << (result.get_charset()) << "]\n" );
|
||||
LOGDEB("textHtmlToDoc: charset [" << charset << "] doc charset ["<<
|
||||
result.get_charset() << "]\n");
|
||||
if (!result.get_charset().empty() &&
|
||||
!samecharset(result.get_charset(), result.fromcharset)) {
|
||||
LOGDEB("textHtmlToDoc: reparse for charsets\n" );
|
||||
LOGDEB("textHtmlToDoc: reparse for charsets\n");
|
||||
// Set the origin charset as specified in document before
|
||||
// transcoding again
|
||||
charset = result.get_charset();
|
||||
} else {
|
||||
LOGERR("textHtmlToDoc:: error: non charset exception\n" );
|
||||
LOGERR("textHtmlToDoc:: error: non charset exception\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -26,22 +26,18 @@
|
||||
*/
|
||||
class MimeHandlerHtml : public RecollFilter {
|
||||
public:
|
||||
MimeHandlerHtml(RclConfig *cnf, const string& id)
|
||||
: RecollFilter(cnf, id)
|
||||
{
|
||||
MimeHandlerHtml(RclConfig *cnf, const std::string& id)
|
||||
: RecollFilter(cnf, id) {
|
||||
}
|
||||
virtual ~MimeHandlerHtml()
|
||||
{
|
||||
}
|
||||
virtual bool set_document_file(const string& mt, const string &file_path);
|
||||
virtual bool set_document_string(const string& mt, const string &data);
|
||||
virtual ~MimeHandlerHtml() {}
|
||||
|
||||
virtual bool is_data_input_ok(DataInput input) const {
|
||||
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
virtual bool next_document();
|
||||
const string& get_html()
|
||||
const std::string& get_html()
|
||||
{
|
||||
return m_html;
|
||||
}
|
||||
@ -50,9 +46,15 @@ class MimeHandlerHtml : public RecollFilter {
|
||||
m_html.erase();
|
||||
RecollFilter::clear();
|
||||
}
|
||||
protected:
|
||||
virtual bool set_document_file_impl(const std::string& mt,
|
||||
const std::string &file_path);
|
||||
virtual bool set_document_string_impl(const std::string& mt,
|
||||
const std::string &data);
|
||||
|
||||
private:
|
||||
string m_filename;
|
||||
string m_html;
|
||||
std::string m_filename;
|
||||
std::string m_html;
|
||||
};
|
||||
|
||||
#endif /* _HTML_H_INCLUDED_ */
|
||||
|
||||
@ -87,10 +87,9 @@ void MimeHandlerMail::clear()
|
||||
RecollFilter::clear();
|
||||
}
|
||||
|
||||
bool MimeHandlerMail::set_document_file(const string& mt, const string &fn)
|
||||
bool MimeHandlerMail::set_document_file_impl(const string& mt, const string &fn)
|
||||
{
|
||||
LOGDEB("MimeHandlerMail::set_document_file(" << (fn) << ")\n" );
|
||||
RecollFilter::set_document_file(mt, fn);
|
||||
LOGDEB("MimeHandlerMail::set_document_file(" << fn << ")\n");
|
||||
if (m_fd >= 0) {
|
||||
close(m_fd);
|
||||
m_fd = -1;
|
||||
@ -103,12 +102,13 @@ bool MimeHandlerMail::set_document_file(const string& mt, const string &fn)
|
||||
if (MD5File(fn, md5, &reason)) {
|
||||
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||
} else {
|
||||
LOGERR("MimeHandlerMail: cant md5 [" << (fn) << "]: " << (reason) << "\n" );
|
||||
LOGERR("MimeHandlerMail: md5 [" << fn << "]: " << reason << "\n");
|
||||
}
|
||||
}
|
||||
m_fd = open(fn.c_str(), 0);
|
||||
if (m_fd < 0) {
|
||||
LOGERR("MimeHandlerMail::set_document_file: open(" << (fn) << ") errno " << (errno) << "\n" );
|
||||
LOGERR("MimeHandlerMail::set_document_file: open(" << fn <<
|
||||
") errno " << errno << "\n");
|
||||
return false;
|
||||
}
|
||||
#if defined O_NOATIME && O_NOATIME != 0
|
||||
@ -120,19 +120,18 @@ bool MimeHandlerMail::set_document_file(const string& mt, const string &fn)
|
||||
m_bincdoc = new Binc::MimeDocument;
|
||||
m_bincdoc->parseFull(m_fd);
|
||||
if (!m_bincdoc->isHeaderParsed() && !m_bincdoc->isAllParsed()) {
|
||||
LOGERR("MimeHandlerMail::mkDoc: mime parse error for " << (fn) << "\n" );
|
||||
LOGERR("MimeHandlerMail::mkDoc: mime parse error for " << fn << "\n");
|
||||
return false;
|
||||
}
|
||||
m_havedoc = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MimeHandlerMail::set_document_string(const string& mt,
|
||||
const string &msgtxt)
|
||||
bool MimeHandlerMail::set_document_string_impl(const string& mt,
|
||||
const string& msgtxt)
|
||||
{
|
||||
LOGDEB1("MimeHandlerMail::set_document_string\n" );
|
||||
LOGDEB2("Message text: [" << (msgtxt) << "]\n" );
|
||||
RecollFilter::set_document_string(mt, msgtxt);
|
||||
LOGDEB1("MimeHandlerMail::set_document_string\n");
|
||||
LOGDEB2("Message text: [" << msgtxt << "]\n");
|
||||
delete m_stream;
|
||||
|
||||
if (!m_forPreview) {
|
||||
@ -142,17 +141,19 @@ bool MimeHandlerMail::set_document_string(const string& mt,
|
||||
}
|
||||
|
||||
if ((m_stream = new stringstream(msgtxt)) == 0 || !m_stream->good()) {
|
||||
LOGERR("MimeHandlerMail::set_document_string: stream create error.msgtxt.size() " << (int(msgtxt.size())) << "\n" );
|
||||
LOGERR("MimeHandlerMail::set_document_string: stream create error."
|
||||
"msgtxt.size() " << msgtxt.size() << "\n");
|
||||
return false;
|
||||
}
|
||||
delete m_bincdoc;
|
||||
if ((m_bincdoc = new Binc::MimeDocument) == 0) {
|
||||
LOGERR("MimeHandlerMail::set_doc._string: new Binc:Document failed. Out of memory?" );
|
||||
LOGERR("MimeHandlerMail::set_doc._string: new Binc:Document failed. "
|
||||
"Out of memory?");
|
||||
return false;
|
||||
}
|
||||
m_bincdoc->parseFull(*m_stream);
|
||||
if (!m_bincdoc->isHeaderParsed() && !m_bincdoc->isAllParsed()) {
|
||||
LOGERR("MimeHandlerMail::set_document_string: mime parse error\n" );
|
||||
LOGERR("MimeHandlerMail::set_document_string: mime parse error\n");
|
||||
return false;
|
||||
}
|
||||
m_havedoc = true;
|
||||
@ -161,14 +162,14 @@ bool MimeHandlerMail::set_document_string(const string& mt,
|
||||
|
||||
bool MimeHandlerMail::skip_to_document(const string& ipath)
|
||||
{
|
||||
LOGDEB("MimeHandlerMail::skip_to_document(" << (ipath) << ")\n" );
|
||||
LOGDEB("MimeHandlerMail::skip_to_document(" << ipath << ")\n");
|
||||
if (m_idx == -1) {
|
||||
// No decoding done yet. If ipath is null need do nothing
|
||||
if (ipath.empty() || ipath == "-1")
|
||||
return true;
|
||||
// ipath points to attachment: need to decode message
|
||||
if (!next_document()) {
|
||||
LOGERR("MimeHandlerMail::skip_to_doc: next_document failed\n" );
|
||||
LOGERR("MimeHandlerMail::skip_to_doc: next_document failed\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -178,7 +179,8 @@ bool MimeHandlerMail::skip_to_document(const string& ipath)
|
||||
|
||||
bool MimeHandlerMail::next_document()
|
||||
{
|
||||
LOGDEB("MimeHandlerMail::next_document m_idx " << (m_idx) << " m_havedoc " << (m_havedoc) << "\n" );
|
||||
LOGDEB("MimeHandlerMail::next_document m_idx " << m_idx << " m_havedoc " <<
|
||||
m_havedoc << "\n");
|
||||
if (!m_havedoc)
|
||||
return false;
|
||||
bool res = false;
|
||||
@ -186,7 +188,9 @@ bool MimeHandlerMail::next_document()
|
||||
if (m_idx == -1) {
|
||||
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
||||
res = processMsg(m_bincdoc, 0);
|
||||
LOGDEB1("MimeHandlerMail::next_document: mt " << (m_metaData[cstr_dj_keymt]) << ", att cnt " << (m_attachments.size()) << "\n" );
|
||||
LOGDEB1("MimeHandlerMail::next_document: mt " <<
|
||||
m_metaData[cstr_dj_keymt] << ", att cnt " <<
|
||||
m_attachments.size() << "\n");
|
||||
const string& txt = m_metaData[cstr_dj_keycontent];
|
||||
if (m_startoftext < txt.size())
|
||||
m_metaData[cstr_dj_keyabstract] =
|
||||
@ -221,16 +225,16 @@ static bool decodeBody(const string& cte, // Content transfer encoding
|
||||
|
||||
if (!stringlowercmp("quoted-printable", cte)) {
|
||||
if (!qp_decode(body, decoded)) {
|
||||
LOGERR("decodeBody: quoted-printable decoding failed !\n" );
|
||||
LOGDEB(" Body: \n" << (body) << "\n" );
|
||||
LOGERR("decodeBody: quoted-printable decoding failed !\n");
|
||||
LOGDEB(" Body: \n" << body << "\n");
|
||||
return false;
|
||||
}
|
||||
*respp = &decoded;
|
||||
} else if (!stringlowercmp("base64", cte)) {
|
||||
if (!base64_decode(body, decoded)) {
|
||||
// base64 encoding errors are actually relatively common
|
||||
LOGERR("decodeBody: base64 decoding failed !\n" );
|
||||
LOGDEB(" Body: \n" << (body) << "\n" );
|
||||
LOGERR("decodeBody: base64 decoding failed !\n");
|
||||
LOGDEB(" Body: \n" << body << "\n");
|
||||
return false;
|
||||
}
|
||||
*respp = &decoded;
|
||||
@ -240,7 +244,7 @@ static bool decodeBody(const string& cte, // Content transfer encoding
|
||||
|
||||
bool MimeHandlerMail::processAttach()
|
||||
{
|
||||
LOGDEB("MimeHandlerMail::processAttach() m_idx " << (m_idx) << "\n" );
|
||||
LOGDEB("MimeHandlerMail::processAttach() m_idx " << m_idx << "\n");
|
||||
if (!m_havedoc)
|
||||
return false;
|
||||
if (m_idx >= (int)m_attachments.size()) {
|
||||
@ -254,7 +258,8 @@ bool MimeHandlerMail::processAttach()
|
||||
m_metaData[cstr_dj_keycharset] = att->m_charset;
|
||||
m_metaData[cstr_dj_keyfn] = att->m_filename;
|
||||
m_metaData[cstr_dj_keytitle] = att->m_filename + " (" + m_subject + ")";
|
||||
LOGDEB1(" processAttach:ct [" << (att->m_contentType) << "] cs [" << (att->m_charset) << "] fn [" << (att->m_filename) << "]\n" );
|
||||
LOGDEB1(" processAttach:ct [" << att->m_contentType << "] cs [" <<
|
||||
att->m_charset << "] fn [" << att->m_filename << "]\n");
|
||||
|
||||
// Erase current content and replace
|
||||
m_metaData[cstr_dj_keycontent] = string();
|
||||
@ -305,10 +310,11 @@ bool MimeHandlerMail::processAttach()
|
||||
// text
|
||||
bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
||||
{
|
||||
LOGDEB2("MimeHandlerMail::processMsg: depth " << (depth) << "\n" );
|
||||
LOGDEB2("MimeHandlerMail::processMsg: depth " << depth << "\n");
|
||||
if (depth++ >= maxdepth) {
|
||||
// Have to stop somewhere
|
||||
LOGINFO("MimeHandlerMail::processMsg: maxdepth " << (maxdepth) << " exceeded\n" );
|
||||
LOGINFO("MimeHandlerMail::processMsg: maxdepth " << maxdepth <<
|
||||
" exceeded\n");
|
||||
// Return true anyway, better to index partially than not at all
|
||||
return true;
|
||||
}
|
||||
@ -360,7 +366,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
||||
m_metaData[cstr_dj_keymd] = ascuxtime;
|
||||
} else {
|
||||
// Leave mtime field alone, ftime will be used instead.
|
||||
LOGDEB("rfc2822Date...: failed: [" << (decoded) << "]\n" );
|
||||
LOGDEB("rfc2822Date...: failed: [" << decoded << "]\n");
|
||||
}
|
||||
}
|
||||
if (preview())
|
||||
@ -394,10 +400,12 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
||||
|
||||
text += '\n';
|
||||
m_startoftext = text.size();
|
||||
LOGDEB2("MimeHandlerMail::processMsg:ismultipart " << (doc->isMultipart()) << " mime subtype '" << (doc->getSubType()) << "'\n" );
|
||||
LOGDEB2("MimeHandlerMail::processMsg:ismultipart " <<
|
||||
doc->isMultipart() << " mime subtype '"<<doc->getSubType()<< "'\n");
|
||||
walkmime(doc, depth);
|
||||
|
||||
LOGDEB2("MimeHandlerMail::processMsg:text:[" << (m_metaData[cstr_dj_keycontent]) << "]\n" );
|
||||
LOGDEB2("MimeHandlerMail::processMsg:text:[" <<
|
||||
m_metaData[cstr_dj_keycontent] << "]\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -413,16 +421,17 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
||||
// message/rfc822 may also be of interest.
|
||||
void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
||||
{
|
||||
LOGDEB2("MimeHandlerMail::walkmime: depth " << (depth) << "\n" );
|
||||
LOGDEB2("MimeHandlerMail::walkmime: depth " << depth << "\n");
|
||||
if (depth++ >= maxdepth) {
|
||||
LOGINFO("walkmime: max depth (" << (maxdepth) << ") exceeded\n" );
|
||||
LOGINFO("walkmime: max depth (" << maxdepth << ") exceeded\n");
|
||||
return;
|
||||
}
|
||||
|
||||
string& out = m_metaData[cstr_dj_keycontent];
|
||||
|
||||
if (doc->isMultipart()) {
|
||||
LOGDEB2("walkmime: ismultipart " << (doc->isMultipart()) << " subtype '" << (doc->getSubType()) << "'\n" );
|
||||
LOGDEB2("walkmime: ismultipart " << doc->isMultipart() <<
|
||||
" subtype '" << doc->getSubType() << "'\n");
|
||||
// We only handle alternative, related and mixed (no digests).
|
||||
std::vector<Binc::MimePart>::iterator it;
|
||||
|
||||
@ -445,22 +454,22 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
||||
// Get and parse content-type header
|
||||
Binc::HeaderItem hi;
|
||||
if (!it->h.getFirstHeader("Content-Type", hi)) {
|
||||
LOGDEB("walkmime:no ctent-type header for part " << (i) << "\n" );
|
||||
LOGDEB("walkmime:no ctent-type header for part "<<i<< "\n");
|
||||
continue;
|
||||
}
|
||||
MimeHeaderValue content_type;
|
||||
parseMimeHeaderValue(hi.getValue(), content_type);
|
||||
LOGDEB2("walkmime: C-type: " << (content_type.value) << "\n" );
|
||||
LOGDEB2("walkmime: C-type: " << content_type.value << "\n");
|
||||
if (!stringlowercmp(cstr_textplain, content_type.value))
|
||||
ittxt = it;
|
||||
else if (!stringlowercmp("text/html", content_type.value))
|
||||
ithtml = it;
|
||||
}
|
||||
if (ittxt != doc->members.end()) {
|
||||
LOGDEB2("walkmime: alternative: chose text/plain part\n" );
|
||||
LOGDEB2("walkmime: alternative: chose text/plain part\n");
|
||||
walkmime(&(*ittxt), depth);
|
||||
} else if (ithtml != doc->members.end()) {
|
||||
LOGDEB2("walkmime: alternative: chose text/html part\n" );
|
||||
LOGDEB2("walkmime: alternative: chose text/html part\n");
|
||||
walkmime(&(*ithtml), depth);
|
||||
}
|
||||
}
|
||||
@ -476,7 +485,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
||||
if (doc->h.getFirstHeader("Content-Type", hi)) {
|
||||
ctt = hi.getValue();
|
||||
}
|
||||
LOGDEB2("walkmime:content-type: " << (ctt) << "\n" );
|
||||
LOGDEB2("walkmime:content-type: " << ctt << "\n");
|
||||
MimeHeaderValue content_type;
|
||||
parseMimeHeaderValue(ctt, content_type);
|
||||
|
||||
@ -487,7 +496,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
||||
}
|
||||
MimeHeaderValue content_disposition;
|
||||
parseMimeHeaderValue(ctd, content_disposition);
|
||||
LOGDEB2("Content_disposition:[" << (content_disposition.value) << "]\n" );
|
||||
LOGDEB2("Content_disposition:[" << content_disposition.value << "]\n");
|
||||
string dispindic;
|
||||
if (stringlowercmp("inline", content_disposition.value))
|
||||
dispindic = "Attachment";
|
||||
@ -507,7 +516,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
||||
}
|
||||
|
||||
if (doc->isMessageRFC822()) {
|
||||
LOGDEB2("walkmime: message/RFC822 part\n" );
|
||||
LOGDEB2("walkmime: message/RFC822 part\n");
|
||||
|
||||
// The first part is the already parsed message. Call
|
||||
// processMsg instead of walkmime so that mail headers get
|
||||
@ -528,7 +537,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
||||
}
|
||||
|
||||
// "Simple" part.
|
||||
LOGDEB2("walkmime: simple part\n" );
|
||||
LOGDEB2("walkmime: simple part\n");
|
||||
// Normally the default charset is us-ascii. But it happens that 8
|
||||
// bit chars exist in a message that is stated as us-ascii. Ie the
|
||||
// mailer used by yahoo support ('KANA') does this. We could
|
||||
@ -575,7 +584,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
||||
}
|
||||
MHMailAttach *att = new MHMailAttach;
|
||||
if (att == 0) {
|
||||
LOGERR("Out of memory\n" );
|
||||
LOGERR("Out of memory\n");
|
||||
return;
|
||||
}
|
||||
att->m_contentType = content_type.value;
|
||||
@ -584,7 +593,9 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
||||
att->m_charset = charset;
|
||||
att->m_contentTransferEncoding = cte;
|
||||
att->m_part = doc;
|
||||
LOGDEB("walkmime: attachmnt: ct [" << (att->m_contentType) << "] cte [" << (att->m_contentTransferEncoding) << "] cs [" << (att->m_charset) << "] fn [" << (filename) << "]\n" );
|
||||
LOGDEB("walkmime: attachmnt: ct [" << att->m_contentType <<
|
||||
"] cte [" << att->m_contentTransferEncoding << "] cs [" <<
|
||||
att->m_charset << "] fn [" << filename << "]\n");
|
||||
m_attachments.push_back(att);
|
||||
return;
|
||||
}
|
||||
@ -594,14 +605,15 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
||||
// filter stack work: this would create another subdocument, but
|
||||
// we want instead to decode a body part of this message document.
|
||||
|
||||
LOGDEB2("walkmime: final: body start offset " << (doc->getBodyStartOffset()) << ", length " << (doc->getBodyLength()) << "\n" );
|
||||
LOGDEB2("walkmime: final: body start offset " <<
|
||||
doc->getBodyStartOffset()<<", length "<<doc->getBodyLength()<<"\n");
|
||||
string body;
|
||||
doc->getBody(body, 0, doc->bodylength);
|
||||
{
|
||||
string decoded;
|
||||
const string *bdp;
|
||||
if (!decodeBody(cte, body, decoded, &bdp)) {
|
||||
LOGERR("MimeHandlerMail::walkmime: failed decoding body\n" );
|
||||
LOGERR("MimeHandlerMail::walkmime: failed decoding body\n");
|
||||
}
|
||||
if (bdp != &body)
|
||||
body.swap(decoded);
|
||||
@ -622,9 +634,10 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
||||
} else {
|
||||
string utf8;
|
||||
// Transcode to utf-8
|
||||
LOGDEB1("walkmime: transcoding from " << (charset) << " to UTF-8\n" );
|
||||
LOGDEB1("walkmime: transcoding from " << charset << " to UTF-8\n");
|
||||
if (!transcode(body, utf8, charset, cstr_utf8)) {
|
||||
LOGERR("walkmime: transcode failed from cs '" << (charset) << "' to UTF-8\n" );
|
||||
LOGERR("walkmime: transcode failed from cs '" << charset <<
|
||||
"' to UTF-8\n");
|
||||
out += body;
|
||||
} else {
|
||||
out += utf8;
|
||||
@ -634,6 +647,6 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
||||
if (out.length() && out[out.length()-1] != '\n')
|
||||
out += '\n';
|
||||
|
||||
LOGDEB2("walkmime: out now: [" << (out) << "]\n" );
|
||||
LOGDEB2("walkmime: out now: [" << out << "]\n");
|
||||
}
|
||||
|
||||
|
||||
@ -20,8 +20,6 @@
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
using std::vector;
|
||||
using std::map;
|
||||
|
||||
#include "mimehandler.h"
|
||||
|
||||
@ -39,19 +37,23 @@ class MHMailAttach;
|
||||
*/
|
||||
class MimeHandlerMail : public RecollFilter {
|
||||
public:
|
||||
MimeHandlerMail(RclConfig *cnf, const string &id);
|
||||
MimeHandlerMail(RclConfig *cnf, const std::string &id);
|
||||
virtual ~MimeHandlerMail();
|
||||
virtual bool set_document_file(const string& mt, const string& file_path);
|
||||
virtual bool set_document_string(const string& mt, const string& data);
|
||||
virtual bool is_data_input_ok(DataInput input) const {
|
||||
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
virtual bool next_document();
|
||||
virtual bool skip_to_document(const string& ipath);
|
||||
virtual bool skip_to_document(const std::string& ipath);
|
||||
virtual void clear();
|
||||
|
||||
protected:
|
||||
virtual bool set_document_file_impl(const std::string& mt,
|
||||
const std::string& file_path);
|
||||
virtual bool set_document_string_impl(const std::string& mt,
|
||||
const std::string& data);
|
||||
|
||||
private:
|
||||
bool processMsg(Binc::MimePart *doc, int depth);
|
||||
void walkmime(Binc::MimePart* doc, int depth);
|
||||
@ -65,19 +67,19 @@ private:
|
||||
int m_idx;
|
||||
// Start of actual text (after the reprinted headers. This is for
|
||||
// generating a semi-meaningful "abstract")
|
||||
string::size_type m_startoftext;
|
||||
string m_subject;
|
||||
vector<MHMailAttach *> m_attachments;
|
||||
std::string::size_type m_startoftext;
|
||||
std::string m_subject;
|
||||
std::vector<MHMailAttach *> m_attachments;
|
||||
// Additional headers to be process as per config + field name translation
|
||||
map<string,string> m_addProcdHdrs;
|
||||
std::map<std::string, std::string> m_addProcdHdrs;
|
||||
};
|
||||
|
||||
class MHMailAttach {
|
||||
public:
|
||||
string m_contentType;
|
||||
string m_filename;
|
||||
string m_charset;
|
||||
string m_contentTransferEncoding;
|
||||
std::string m_contentType;
|
||||
std::string m_filename;
|
||||
std::string m_charset;
|
||||
std::string m_contentTransferEncoding;
|
||||
Binc::MimePart *m_part;
|
||||
};
|
||||
|
||||
|
||||
@ -100,43 +100,43 @@ public:
|
||||
~MboxCache() {}
|
||||
mbhoff_type get_offset(RclConfig *config, const string& udi, int msgnum)
|
||||
{
|
||||
LOGDEB0("MboxCache::get_offsets: udi [" << (udi) << "] msgnum " << (msgnum) << "\n" );
|
||||
LOGDEB0("MboxCache::get_offsets: udi [" << (udi) << "] msgnum " << (msgnum) << "\n");
|
||||
if (!ok(config)) {
|
||||
LOGDEB0("MboxCache::get_offsets: init failed\n" );
|
||||
LOGDEB0("MboxCache::get_offsets: init failed\n");
|
||||
return -1;
|
||||
}
|
||||
std::unique_lock<std::mutex> locker(o_mcache_mutex);
|
||||
string fn = makefilename(udi);
|
||||
FILE *fp = 0;
|
||||
if ((fp = fopen(fn.c_str(), "r")) == 0) {
|
||||
LOGDEB("MboxCache::get_offsets: open failed, errno " << (errno) << "\n" );
|
||||
LOGDEB("MboxCache::get_offsets: open failed, errno " << (errno) << "\n");
|
||||
return -1;
|
||||
}
|
||||
FpKeeper keeper(&fp);
|
||||
|
||||
char blk1[M_o_b1size];
|
||||
if (fread(blk1, 1, o_b1size, fp) != o_b1size) {
|
||||
LOGDEB0("MboxCache::get_offsets: read blk1 errno " << (errno) << "\n" );
|
||||
LOGDEB0("MboxCache::get_offsets: read blk1 errno " << (errno) << "\n");
|
||||
return -1;
|
||||
}
|
||||
ConfSimple cf(string(blk1, o_b1size));
|
||||
string fudi;
|
||||
if (!cf.get("udi", fudi) || fudi.compare(udi)) {
|
||||
LOGINFO("MboxCache::get_offset:badudi fn " << (fn) << " udi [" << (udi) << "], fudi [" << (fudi) << "]\n" );
|
||||
LOGINFO("MboxCache::get_offset:badudi fn " << (fn) << " udi [" << (udi) << "], fudi [" << (fudi) << "]\n");
|
||||
return -1;
|
||||
}
|
||||
if (fseeko(fp, cacheoffset(msgnum), SEEK_SET) != 0) {
|
||||
LOGDEB0("MboxCache::get_offsets: seek " << (lltodecstr(cacheoffset(msgnum))) << " errno " << (errno) << "\n" );
|
||||
LOGDEB0("MboxCache::get_offsets: seek " << (lltodecstr(cacheoffset(msgnum))) << " errno " << (errno) << "\n");
|
||||
return -1;
|
||||
}
|
||||
mbhoff_type offset = -1;
|
||||
size_t ret;
|
||||
if ((ret = fread(&offset, 1, sizeof(mbhoff_type), fp))
|
||||
!= sizeof(mbhoff_type)) {
|
||||
LOGDEB0("MboxCache::get_offsets: read ret " << (ret) << " errno " << (errno) << "\n" );
|
||||
LOGDEB0("MboxCache::get_offsets: read ret " << (ret) << " errno " << (errno) << "\n");
|
||||
return -1;
|
||||
}
|
||||
LOGDEB0("MboxCache::get_offsets: ret " << (lltodecstr(offset)) << "\n" );
|
||||
LOGDEB0("MboxCache::get_offsets: ret " << (lltodecstr(offset)) << "\n");
|
||||
return offset;
|
||||
}
|
||||
|
||||
@ -144,7 +144,7 @@ public:
|
||||
void put_offsets(RclConfig *config, const string& udi, mbhoff_type fsize,
|
||||
vector<mbhoff_type>& offs)
|
||||
{
|
||||
LOGDEB0("MboxCache::put_offsets: " << (offs.size()) << " offsets\n" );
|
||||
LOGDEB0("MboxCache::put_offsets: " << (offs.size()) << " offsets\n");
|
||||
if (!ok(config) || !maybemakedir())
|
||||
return;
|
||||
if (fsize < m_minfsize)
|
||||
@ -153,7 +153,7 @@ public:
|
||||
string fn = makefilename(udi);
|
||||
FILE *fp;
|
||||
if ((fp = fopen(fn.c_str(), "w")) == 0) {
|
||||
LOGDEB("MboxCache::put_offsets: fopen errno " << (errno) << "\n" );
|
||||
LOGDEB("MboxCache::put_offsets: fopen errno " << (errno) << "\n");
|
||||
return;
|
||||
}
|
||||
FpKeeper keeper(&fp);
|
||||
@ -163,7 +163,7 @@ public:
|
||||
blk1.append(cstr_newline);
|
||||
blk1.resize(o_b1size, 0);
|
||||
if (fwrite(blk1.c_str(), 1, o_b1size, fp) != o_b1size) {
|
||||
LOGDEB("MboxCache::put_offsets: fwrite errno " << (errno) << "\n" );
|
||||
LOGDEB("MboxCache::put_offsets: fwrite errno " << (errno) << "\n");
|
||||
return;
|
||||
}
|
||||
|
||||
@ -255,10 +255,9 @@ void MimeHandlerMbox::clear()
|
||||
RecollFilter::clear();
|
||||
}
|
||||
|
||||
bool MimeHandlerMbox::set_document_file(const string& mt, const string &fn)
|
||||
bool MimeHandlerMbox::set_document_file_impl(const string& mt, const string &fn)
|
||||
{
|
||||
LOGDEB("MimeHandlerMbox::set_document_file(" << (fn) << ")\n" );
|
||||
RecollFilter::set_document_file(mt, fn);
|
||||
LOGDEB("MimeHandlerMbox::set_document_file(" << fn << ")\n");
|
||||
m_fn = fn;
|
||||
if (m_vfp) {
|
||||
fclose((FILE *)m_vfp);
|
||||
@ -267,7 +266,8 @@ bool MimeHandlerMbox::set_document_file(const string& mt, const string &fn)
|
||||
|
||||
m_vfp = fopen(fn.c_str(), "r");
|
||||
if (m_vfp == 0) {
|
||||
LOGERR("MimeHandlerMail::set_document_file: error opening " << (fn) << "\n" );
|
||||
LOGERR("MimeHandlerMail::set_document_file: error opening " << fn <<
|
||||
"\n");
|
||||
return false;
|
||||
}
|
||||
#if defined O_NOATIME && O_NOATIME != 0
|
||||
@ -278,7 +278,8 @@ bool MimeHandlerMbox::set_document_file(const string& mt, const string &fn)
|
||||
// Used to use ftell() here: no good beyond 2GB
|
||||
{struct stat st;
|
||||
if (fstat(fileno((FILE*)m_vfp), &st) < 0) {
|
||||
LOGERR("MimeHandlerMbox:setdocfile: fstat(" << (fn) << ") failed errno " << (errno) << "\n" );
|
||||
LOGERR("MimeHandlerMbox:setdocfile: fstat(" << fn <<
|
||||
") failed errno " << errno << "\n");
|
||||
return false;
|
||||
}
|
||||
m_fsize = st.st_size;
|
||||
@ -291,7 +292,7 @@ bool MimeHandlerMbox::set_document_file(const string& mt, const string &fn)
|
||||
string quirks;
|
||||
if (m_config && m_config->getConfParam(cstr_keyquirks, quirks)) {
|
||||
if (quirks == "tbird") {
|
||||
LOGDEB("MimeHandlerMbox: setting quirks TBIRD\n" );
|
||||
LOGDEB("MimeHandlerMbox: setting quirks TBIRD\n");
|
||||
m_quirks |= MBOXQUIRK_TBIRD;
|
||||
}
|
||||
}
|
||||
@ -299,7 +300,7 @@ bool MimeHandlerMbox::set_document_file(const string& mt, const string &fn)
|
||||
// And double check for thunderbird
|
||||
string tbirdmsf = fn + ".msf";
|
||||
if ((m_quirks&MBOXQUIRK_TBIRD) == 0 && path_exists(tbirdmsf)) {
|
||||
LOGDEB("MimeHandlerMbox: detected unconfigured tbird mbox in " << (fn) << "\n" );
|
||||
LOGDEB("MimeHandlerMbox: detected unconfigured tbird mbox in " << (fn) << "\n");
|
||||
m_quirks |= MBOXQUIRK_TBIRD;
|
||||
}
|
||||
|
||||
@ -416,7 +417,7 @@ static void compileregexes()
|
||||
bool MimeHandlerMbox::next_document()
|
||||
{
|
||||
if (m_vfp == 0) {
|
||||
LOGERR("MimeHandlerMbox::next_document: not open\n" );
|
||||
LOGERR("MimeHandlerMbox::next_document: not open\n");
|
||||
return false;
|
||||
}
|
||||
if (!m_havedoc) {
|
||||
@ -428,10 +429,10 @@ bool MimeHandlerMbox::next_document()
|
||||
sscanf(m_ipath.c_str(), "%d", &mtarg);
|
||||
} else if (m_forPreview) {
|
||||
// Can't preview an mbox.
|
||||
LOGDEB("MimeHandlerMbox::next_document: can't preview folders!\n" );
|
||||
LOGDEB("MimeHandlerMbox::next_document: can't preview folders!\n");
|
||||
return false;
|
||||
}
|
||||
LOGDEB0("MimeHandlerMbox::next_document: fn " << (m_fn) << ", msgnum " << (m_msgnum) << " mtarg " << (mtarg) << " \n" );
|
||||
LOGDEB0("MimeHandlerMbox::next_document: fn " << (m_fn) << ", msgnum " << (m_msgnum) << " mtarg " << (mtarg) << " \n");
|
||||
if (mtarg == 0)
|
||||
mtarg = -1;
|
||||
|
||||
@ -451,7 +452,7 @@ bool MimeHandlerMbox::next_document()
|
||||
if (mtarg > 0) {
|
||||
mbhoff_type off;
|
||||
line_type line;
|
||||
LOGDEB0("MimeHandlerMbox::next_doc: mtarg " << (mtarg) << " m_udi[" << (m_udi) << "]\n" );
|
||||
LOGDEB0("MimeHandlerMbox::next_doc: mtarg " << (mtarg) << " m_udi[" << (m_udi) << "]\n");
|
||||
if (!m_udi.empty() &&
|
||||
(off = o_mcache.get_offset(m_config, m_udi, mtarg)) >= 0 &&
|
||||
fseeko(fp, (off_t)off, SEEK_SET) >= 0 &&
|
||||
@ -459,7 +460,7 @@ bool MimeHandlerMbox::next_document()
|
||||
(!M_regexec(fromregex, line, 0, 0, 0) ||
|
||||
((m_quirks & MBOXQUIRK_TBIRD) &&
|
||||
!M_regexec(minifromregex, line, 0, 0, 0))) ) {
|
||||
LOGDEB0("MimeHandlerMbox: Cache: From_ Ok\n" );
|
||||
LOGDEB0("MimeHandlerMbox: Cache: From_ Ok\n");
|
||||
fseeko(fp, (off_t)off, SEEK_SET);
|
||||
m_msgnum = mtarg -1;
|
||||
storeoffsets = false;
|
||||
@ -478,7 +479,7 @@ bool MimeHandlerMbox::next_document()
|
||||
for (;;) {
|
||||
message_end = ftello(fp);
|
||||
if (!fgets(line, LL, fp)) {
|
||||
LOGDEB2("MimeHandlerMbox:next: eof\n" );
|
||||
LOGDEB2("MimeHandlerMbox:next: eof\n");
|
||||
iseof = true;
|
||||
m_msgnum++;
|
||||
break;
|
||||
@ -486,7 +487,7 @@ bool MimeHandlerMbox::next_document()
|
||||
m_lineno++;
|
||||
int ll;
|
||||
stripendnl(line, ll);
|
||||
LOGDEB2("mhmbox:next: hadempty " << (hademptyline) << " lineno " << (m_lineno) << " ll " << (ll) << " Line: [" << (line) << "]\n" );
|
||||
LOGDEB2("mhmbox:next: hadempty " << (hademptyline) << " lineno " << (m_lineno) << " ll " << (ll) << " Line: [" << (line) << "]\n");
|
||||
if (hademptyline) {
|
||||
if (ll > 0) {
|
||||
// Non-empty line with empty line flag set, reset flag
|
||||
@ -504,7 +505,7 @@ bool MimeHandlerMbox::next_document()
|
||||
((m_quirks & MBOXQUIRK_TBIRD) &&
|
||||
!M_regexec(minifromregex, line, 0, 0, 0)))
|
||||
) {
|
||||
LOGDEB0("MimeHandlerMbox: msgnum " << (m_msgnum) << ", From_ at line " << (m_lineno) << ": [" << (line) << "]\n" );
|
||||
LOGDEB0("MimeHandlerMbox: msgnum " << (m_msgnum) << ", From_ at line " << (m_lineno) << ": [" << (line) << "]\n");
|
||||
if (storeoffsets)
|
||||
m_offsets.push_back(message_end);
|
||||
m_msgnum++;
|
||||
@ -527,13 +528,13 @@ bool MimeHandlerMbox::next_document()
|
||||
line[ll+1] = 0;
|
||||
msgtxt += line;
|
||||
if (msgtxt.size() > max_mbox_member_size) {
|
||||
LOGERR("mh_mbox: huge message (more than " << (max_mbox_member_size/(1024*1024)) << " MB) inside " << (m_fn) << ", giving up\n" );
|
||||
LOGERR("mh_mbox: huge message (more than " << (max_mbox_member_size/(1024*1024)) << " MB) inside " << (m_fn) << ", giving up\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
LOGDEB2("Message text length " << (msgtxt.size()) << "\n" );
|
||||
LOGDEB2("Message text: [" << (msgtxt) << "]\n" );
|
||||
LOGDEB2("Message text length " << (msgtxt.size()) << "\n");
|
||||
LOGDEB2("Message text: [" << (msgtxt) << "]\n");
|
||||
char buf[20];
|
||||
// m_msgnum was incremented when hitting the next From_ or eof, so the data
|
||||
// is for m_msgnum - 1
|
||||
@ -541,7 +542,7 @@ bool MimeHandlerMbox::next_document()
|
||||
m_metaData[cstr_dj_keyipath] = buf;
|
||||
m_metaData[cstr_dj_keymt] = "message/rfc822";
|
||||
if (iseof) {
|
||||
LOGDEB2("MimeHandlerMbox::next: eof hit\n" );
|
||||
LOGDEB2("MimeHandlerMbox::next: eof hit\n");
|
||||
m_havedoc = false;
|
||||
if (!m_udi.empty() && storeoffsets) {
|
||||
o_mcache.put_offsets(m_config, m_udi, m_fsize, m_offsets);
|
||||
@ -658,7 +659,7 @@ int main(int argc, char **argv)
|
||||
} else {
|
||||
size = it->second.length();
|
||||
}
|
||||
cout << "Doc " << docnt << " size " << size << endl;
|
||||
cout << "Doc " << docnt << " size " << size << endl;
|
||||
}
|
||||
cout << docnt << " documents found in " << filename << endl;
|
||||
exit(0);
|
||||
|
||||
@ -19,8 +19,6 @@
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
#include "mimehandler.h"
|
||||
|
||||
@ -30,28 +28,32 @@ using std::vector;
|
||||
* file.
|
||||
*/
|
||||
class MimeHandlerMbox : public RecollFilter {
|
||||
public:
|
||||
MimeHandlerMbox(RclConfig *cnf, const string& id)
|
||||
public:
|
||||
MimeHandlerMbox(RclConfig *cnf, const std::string& id)
|
||||
: RecollFilter(cnf, id), m_vfp(0), m_msgnum(0),
|
||||
m_lineno(0), m_fsize(0)
|
||||
{}
|
||||
m_lineno(0), m_fsize(0) {
|
||||
}
|
||||
virtual ~MimeHandlerMbox();
|
||||
virtual bool set_document_file(const string& mt, const string &file_path);
|
||||
virtual bool next_document();
|
||||
virtual bool skip_to_document(const string& ipath) {
|
||||
virtual bool skip_to_document(const std::string& ipath) {
|
||||
m_ipath = ipath;
|
||||
return true;
|
||||
}
|
||||
virtual void clear();
|
||||
typedef long long mbhoff_type;
|
||||
private:
|
||||
string m_fn; // File name
|
||||
|
||||
protected:
|
||||
virtual bool set_document_file_impl(const std::string&,
|
||||
const std::string&);
|
||||
|
||||
private:
|
||||
std::string m_fn; // File name
|
||||
void *m_vfp; // File pointer for folder
|
||||
int m_msgnum; // Current message number in folder. Starts at 1
|
||||
string m_ipath;
|
||||
std::string m_ipath;
|
||||
int m_lineno; // debug
|
||||
mbhoff_type m_fsize;
|
||||
vector<mbhoff_type> m_offsets;
|
||||
std::vector<mbhoff_type> m_offsets;
|
||||
enum Quirks {MBOXQUIRK_TBIRD=1};
|
||||
int m_quirks;
|
||||
};
|
||||
|
||||
@ -17,34 +17,28 @@
|
||||
#ifndef _MH_NULL_H_INCLUDED_
|
||||
#define _MH_NULL_H_INCLUDED_
|
||||
|
||||
// It may make sense in some cases to set this null filter (no output)
|
||||
// instead of using recoll_noindex or leaving the default filter in
|
||||
// case one doesn't want to install it: this will avoid endless retries
|
||||
// to reindex the affected files, as recoll will think it has succeeded
|
||||
// indexing them. Downside: the files won't be indexed when one
|
||||
// actually installs the real filter, will need a -z
|
||||
// Actually used for empty files
|
||||
// Associated to application/x-zerosize, so use
|
||||
// <mimetype> = internal application/x-zerosize
|
||||
// in mimeconf
|
||||
#include <string>
|
||||
#include "cstr.h"
|
||||
#include "mimehandler.h"
|
||||
|
||||
/// Null input handler always returning empty data.
|
||||
///
|
||||
/// It may make sense in some cases to set this null filter (no output)
|
||||
/// instead of using recoll_noindex or leaving the default filter in
|
||||
/// case one doesn't want to install it: this will avoid endless retries
|
||||
/// to reindex the affected files, as recoll will think it has succeeded
|
||||
/// indexing them. Downside: the files won't be indexed when one
|
||||
/// actually installs the real filter, will need a -z
|
||||
/// Actually used for empty files.
|
||||
/// Associated to application/x-zerosize, so use the following in mimeconf:
|
||||
/// <mimetype> = internal application/x-zerosize
|
||||
class MimeHandlerNull : public RecollFilter {
|
||||
public:
|
||||
MimeHandlerNull(RclConfig *cnf, const std::string& id)
|
||||
: RecollFilter(cnf, id)
|
||||
{
|
||||
}
|
||||
virtual ~MimeHandlerNull()
|
||||
{
|
||||
}
|
||||
virtual bool set_document_file(const string& mt, const string& fn)
|
||||
{
|
||||
RecollFilter::set_document_file(mt, fn);
|
||||
return m_havedoc = true;
|
||||
: RecollFilter(cnf, id) {
|
||||
}
|
||||
virtual ~MimeHandlerNull() {}
|
||||
|
||||
virtual bool next_document()
|
||||
{
|
||||
if (m_havedoc == false)
|
||||
|
||||
@ -36,18 +36,10 @@
|
||||
class MimeHandlerSymlink : public RecollFilter {
|
||||
public:
|
||||
MimeHandlerSymlink(RclConfig *cnf, const std::string& id)
|
||||
: RecollFilter(cnf, id)
|
||||
{
|
||||
}
|
||||
virtual ~MimeHandlerSymlink()
|
||||
{
|
||||
}
|
||||
virtual bool set_document_file(const string& mt, const string& fn)
|
||||
{
|
||||
RecollFilter::set_document_file(mt, fn);
|
||||
m_fn = fn;
|
||||
return m_havedoc = true;
|
||||
: RecollFilter(cnf, id) {
|
||||
}
|
||||
virtual ~MimeHandlerSymlink() {}
|
||||
|
||||
virtual bool next_document()
|
||||
{
|
||||
if (m_havedoc == false)
|
||||
@ -61,11 +53,18 @@ class MimeHandlerSymlink : public RecollFilter {
|
||||
transcode(path_getsimple(slc), m_metaData[cstr_dj_keycontent],
|
||||
m_config->getDefCharset(true), "UTF-8");
|
||||
} else {
|
||||
LOGDEB("Symlink: readlink [" << (m_fn) << "] failed, errno " << (errno) << "\n" );
|
||||
LOGDEB("Symlink: readlink [" << m_fn << "] failed, errno " <<
|
||||
errno << "\n");
|
||||
}
|
||||
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
||||
return true;
|
||||
}
|
||||
protected:
|
||||
virtual bool set_document_file_impl(const string& mt, const string& fn) {
|
||||
m_fn = fn;
|
||||
return m_havedoc = true;
|
||||
}
|
||||
|
||||
private:
|
||||
std::string m_fn;
|
||||
};
|
||||
|
||||
@ -40,13 +40,11 @@ const int MB = 1024*1024;
|
||||
const int KB = 1024;
|
||||
|
||||
// Process a plain text file
|
||||
bool MimeHandlerText::set_document_file(const string& mt, const string &fn)
|
||||
bool MimeHandlerText::set_document_file_impl(const string& mt, const string &fn)
|
||||
{
|
||||
LOGDEB("MimeHandlerText::set_document_file: [" << fn << "] offs " <<
|
||||
LOGDEB("MimeHandlerText::set_document_file: [" << fn << "] offs " <<
|
||||
m_offs << "\n");
|
||||
|
||||
RecollFilter::set_document_file(mt, fn);
|
||||
|
||||
m_fn = fn;
|
||||
// This should not be necessary, but it happens on msw that offset is large
|
||||
// negative at this point, could not find the reason (still trying).
|
||||
@ -93,9 +91,9 @@ bool MimeHandlerText::set_document_file(const string& mt, const string &fn)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MimeHandlerText::set_document_string(const string& mt, const string& otext)
|
||||
bool MimeHandlerText::set_document_string_impl(const string& mt,
|
||||
const string& otext)
|
||||
{
|
||||
RecollFilter::set_document_string(mt, otext);
|
||||
m_text = otext;
|
||||
if (!m_forPreview) {
|
||||
string md5, xmd5;
|
||||
@ -175,7 +173,7 @@ bool MimeHandlerText::readnext()
|
||||
string reason;
|
||||
m_text.clear();
|
||||
if (!file_to_string(m_fn, m_text, m_offs, m_pagesz, &reason)) {
|
||||
LOGERR("MimeHandlerText: can't read file: " << (reason) << "\n" );
|
||||
LOGERR("MimeHandlerText: can't read file: " << reason << "\n" );
|
||||
m_havedoc = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -30,14 +30,10 @@
|
||||
class MimeHandlerText : public RecollFilter {
|
||||
public:
|
||||
MimeHandlerText(RclConfig *cnf, const std::string& id)
|
||||
: RecollFilter(cnf, id), m_paging(false), m_offs(0), m_pagesz(0)
|
||||
{
|
||||
: RecollFilter(cnf, id), m_paging(false), m_offs(0), m_pagesz(0) {
|
||||
}
|
||||
virtual ~MimeHandlerText()
|
||||
{
|
||||
}
|
||||
virtual bool set_document_file(const std::string& mt, const std::string &file_path);
|
||||
virtual bool set_document_string(const std::string&, const std::string&);
|
||||
virtual ~MimeHandlerText() {}
|
||||
|
||||
virtual bool is_data_input_ok(DataInput input) const {
|
||||
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
||||
return true;
|
||||
@ -45,14 +41,20 @@ class MimeHandlerText : public RecollFilter {
|
||||
}
|
||||
virtual bool next_document();
|
||||
virtual bool skip_to_document(const std::string& s);
|
||||
virtual void clear()
|
||||
{
|
||||
virtual void clear() {
|
||||
m_paging = false;
|
||||
m_text.erase();
|
||||
m_fn.erase();
|
||||
m_offs = 0;
|
||||
RecollFilter::clear();
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual bool set_document_file_impl(const std::string& mt,
|
||||
const std::string &file_path);
|
||||
virtual bool set_document_string_impl(const std::string&,
|
||||
const std::string&);
|
||||
|
||||
private:
|
||||
bool m_paging;
|
||||
std::string m_text;
|
||||
|
||||
@ -29,21 +29,9 @@
|
||||
class MimeHandlerUnknown : public RecollFilter {
|
||||
public:
|
||||
MimeHandlerUnknown(RclConfig *cnf, const string& id)
|
||||
: RecollFilter(cnf, id)
|
||||
{
|
||||
}
|
||||
virtual ~MimeHandlerUnknown()
|
||||
{
|
||||
}
|
||||
virtual bool set_document_file(const string& mt, const string& fn)
|
||||
{
|
||||
RecollFilter::set_document_file(mt, fn);
|
||||
return m_havedoc = true;
|
||||
}
|
||||
virtual bool set_document_string(const string& mt, const string& s) {
|
||||
RecollFilter::set_document_string(mt, s);
|
||||
return m_havedoc = true;
|
||||
: RecollFilter(cnf, id) {
|
||||
}
|
||||
virtual ~MimeHandlerUnknown() {}
|
||||
virtual bool next_document() {
|
||||
if (m_havedoc == false)
|
||||
return false;
|
||||
|
||||
@ -60,7 +60,8 @@ static RecollFilter *getMimeHandlerFromCache(const string& key)
|
||||
std::unique_lock<std::mutex> locker(o_handlers_mutex);
|
||||
string xdigest;
|
||||
MD5HexPrint(key, xdigest);
|
||||
LOGDEB("getMimeHandlerFromCache: " << (xdigest) << " cache size " << (o_handlers.size()) << "\n" );
|
||||
LOGDEB("getMimeHandlerFromCache: " << xdigest << " cache size " <<
|
||||
o_handlers.size() << "\n");
|
||||
|
||||
multimap<string, RecollFilter *>::iterator it = o_handlers.find(key);
|
||||
if (it != o_handlers.end()) {
|
||||
@ -69,13 +70,14 @@ static RecollFilter *getMimeHandlerFromCache(const string& key)
|
||||
if (it1 != o_hlru.end()) {
|
||||
o_hlru.erase(it1);
|
||||
} else {
|
||||
LOGERR("getMimeHandlerFromCache: lru position not found\n" );
|
||||
LOGERR("getMimeHandlerFromCache: lru position not found\n");
|
||||
}
|
||||
o_handlers.erase(it);
|
||||
LOGDEB("getMimeHandlerFromCache: " << (xdigest) << " found size " << (o_handlers.size()) << "\n" );
|
||||
LOGDEB("getMimeHandlerFromCache: " << xdigest << " found size " <<
|
||||
o_handlers.size() << "\n");
|
||||
return h;
|
||||
}
|
||||
LOGDEB("getMimeHandlerFromCache: " << (xdigest) << " not found\n" );
|
||||
LOGDEB("getMimeHandlerFromCache: " << xdigest << " not found\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -85,14 +87,16 @@ void returnMimeHandler(RecollFilter *handler)
|
||||
typedef multimap<string, RecollFilter*>::value_type value_type;
|
||||
|
||||
if (handler == 0) {
|
||||
LOGERR("returnMimeHandler: bad parameter\n" );
|
||||
LOGERR("returnMimeHandler: bad parameter\n");
|
||||
return;
|
||||
}
|
||||
handler->clear();
|
||||
|
||||
std::unique_lock<std::mutex> locker(o_handlers_mutex);
|
||||
|
||||
LOGDEB("returnMimeHandler: returning filter for " << (handler->get_mime_type()) << " cache size " << (o_handlers.size()) << "\n" );
|
||||
LOGDEB("returnMimeHandler: returning filter for " <<
|
||||
handler->get_mime_type() << " cache size " << o_handlers.size() <<
|
||||
"\n");
|
||||
|
||||
// Limit pool size. The pool can grow quite big because there are
|
||||
// many filter types, each of which can be used in several copies
|
||||
@ -105,9 +109,9 @@ void returnMimeHandler(RecollFilter *handler)
|
||||
if (once) {
|
||||
once = 0;
|
||||
for (it = o_handlers.begin(); it != o_handlers.end(); it++) {
|
||||
LOGDEB1("Cache full. key: " << (it->first) << "\n" );
|
||||
LOGDEB1("Cache full. key: " << it->first << "\n");
|
||||
}
|
||||
LOGDEB1("Cache LRU size: " << (o_hlru.size()) << "\n" );
|
||||
LOGDEB1("Cache LRU size: " << o_hlru.size() << "\n");
|
||||
}
|
||||
if (o_hlru.size() > 0) {
|
||||
it = o_hlru.back();
|
||||
@ -122,7 +126,7 @@ void returnMimeHandler(RecollFilter *handler)
|
||||
|
||||
void clearMimeHandlerCache()
|
||||
{
|
||||
LOGDEB("clearMimeHandlerCache()\n" );
|
||||
LOGDEB("clearMimeHandlerCache()\n");
|
||||
multimap<string, RecollFilter *>::iterator it;
|
||||
std::unique_lock<std::mutex> locker(o_handlers_mutex);
|
||||
for (it = o_handlers.begin(); it != o_handlers.end(); it++) {
|
||||
@ -136,31 +140,31 @@ void clearMimeHandlerCache()
|
||||
static RecollFilter *mhFactory(RclConfig *config, const string &mime,
|
||||
bool nobuild, string& id)
|
||||
{
|
||||
LOGDEB2("mhFactory(" << (mime) << ")\n" );
|
||||
LOGDEB2("mhFactory(" << mime << ")\n");
|
||||
string lmime(mime);
|
||||
stringtolower(lmime);
|
||||
if (cstr_textplain == lmime) {
|
||||
LOGDEB2("mhFactory(" << (mime) << "): returning MimeHandlerText\n" );
|
||||
LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerText\n");
|
||||
MD5String("MimeHandlerText", id);
|
||||
return nobuild ? 0 : new MimeHandlerText(config, id);
|
||||
} else if ("text/html" == lmime) {
|
||||
LOGDEB2("mhFactory(" << (mime) << "): returning MimeHandlerHtml\n" );
|
||||
LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerHtml\n");
|
||||
MD5String("MimeHandlerHtml", id);
|
||||
return nobuild ? 0 : new MimeHandlerHtml(config, id);
|
||||
} else if ("text/x-mail" == lmime) {
|
||||
LOGDEB2("mhFactory(" << (mime) << "): returning MimeHandlerMbox\n" );
|
||||
LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerMbox\n");
|
||||
MD5String("MimeHandlerMbox", id);
|
||||
return nobuild ? 0 : new MimeHandlerMbox(config, id);
|
||||
} else if ("message/rfc822" == lmime) {
|
||||
LOGDEB2("mhFactory(" << (mime) << "): returning MimeHandlerMail\n" );
|
||||
LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerMail\n");
|
||||
MD5String("MimeHandlerMail", id);
|
||||
return nobuild ? 0 : new MimeHandlerMail(config, id);
|
||||
} else if ("inode/symlink" == lmime) {
|
||||
LOGDEB2("mhFactory(" << (mime) << "): ret MimeHandlerSymlink\n" );
|
||||
LOGDEB2("mhFactory(" << mime << "): ret MimeHandlerSymlink\n");
|
||||
MD5String("MimeHandlerSymlink", id);
|
||||
return nobuild ? 0 : new MimeHandlerSymlink(config, id);
|
||||
} else if ("application/x-zerosize" == lmime) {
|
||||
LOGDEB("mhFactory(" << (mime) << "): ret MimeHandlerNull\n" );
|
||||
LOGDEB("mhFactory(" << mime << "): ret MimeHandlerNull\n");
|
||||
MD5String("MimeHandlerNull", id);
|
||||
return nobuild ? 0 : new MimeHandlerNull(config, id);
|
||||
} else if (lmime.find("text/") == 0) {
|
||||
@ -169,14 +173,15 @@ static RecollFilter *mhFactory(RclConfig *config, const string &mime,
|
||||
// mimeconf, not at random. For programs, for example this
|
||||
// allows indexing and previewing as text/plain (no filter
|
||||
// exec) but still opening with a specific editor.
|
||||
LOGDEB2("mhFactory(" << (mime) << "): returning MimeHandlerText(x)\n" );
|
||||
LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerText(x)\n");
|
||||
MD5String("MimeHandlerText", id);
|
||||
return nobuild ? 0 : new MimeHandlerText(config, id);
|
||||
} else {
|
||||
// We should not get there. It means that "internal" was set
|
||||
// as a handler in mimeconf for a mime type we actually can't
|
||||
// handle.
|
||||
LOGERR("mhFactory: mime type [" << (lmime) << "] set as internal but unknown\n" );
|
||||
LOGERR("mhFactory: mime type [" << lmime <<
|
||||
"] set as internal but unknown\n");
|
||||
MD5String("MimeHandlerUnknown", id);
|
||||
return nobuild ? 0 : new MimeHandlerUnknown(config, id);
|
||||
}
|
||||
@ -199,7 +204,8 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
||||
string cmdstr;
|
||||
|
||||
if (!cfg->valueSplitAttributes(hs, cmdstr, attrs)) {
|
||||
LOGERR("mhExecFactory: bad config line for [" << (mtype) << "]: [" << (hs) << "]\n" );
|
||||
LOGERR("mhExecFactory: bad config line for [" <<
|
||||
mtype << "]: [" << hs << "]\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -207,7 +213,8 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
||||
vector<string> cmdtoks;
|
||||
stringToStrings(cmdstr, cmdtoks);
|
||||
if (cmdtoks.empty()) {
|
||||
LOGERR("mhExecFactory: bad config line for [" << (mtype) << "]: [" << (hs) << "]\n" );
|
||||
LOGERR("mhExecFactory: bad config line for [" << mtype <<
|
||||
"]: [" << hs << "]\n");
|
||||
return 0;
|
||||
}
|
||||
MimeHandlerExec *h = multiple ?
|
||||
@ -221,7 +228,8 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
||||
// the same change if we ever want to use the same cmdling as windows
|
||||
if (!stringlowercmp("python", *it) || !stringlowercmp("perl", *it)) {
|
||||
if (cmdtoks.size() < 2) {
|
||||
LOGERR("mhExecFactory: python/perl cmd: no script?. [" << (mtype) << "]: [" << (hs) << "]\n" );
|
||||
LOGERR("mhExecFactory: python/perl cmd: no script?. [" <<
|
||||
mtype << "]: [" << hs << "]\n");
|
||||
}
|
||||
vector<string>::iterator it1(it);
|
||||
it1++;
|
||||
@ -244,7 +252,9 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
||||
for (it = h->params.begin(); it != h->params.end(); it++) {
|
||||
scmd += string("[") + *it + "] ";
|
||||
}
|
||||
LOGDEB("mhExecFactory:mt [" << (mtype) << "] cfgmt [" << (h->cfgFilterOutputMtype) << "] cfgcs [" << (h->cfgFilterOutputCharset) << "] cmd: [" << (scmd) << "]\n" );
|
||||
LOGDEB("mhExecFactory:mt [" << mtype << "] cfgmt [" <<
|
||||
h->cfgFilterOutputMtype << "] cfgcs [" <<
|
||||
h->cfgFilterOutputCharset << "] cmd: [" << scmd << "]\n");
|
||||
#endif
|
||||
|
||||
return h;
|
||||
@ -254,7 +264,8 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
||||
RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
||||
bool filtertypes)
|
||||
{
|
||||
LOGDEB("getMimeHandler: mtype [" << (mtype) << "] filtertypes " << (filtertypes) << "\n" );
|
||||
LOGDEB("getMimeHandler: mtype [" << mtype << "] filtertypes " <<
|
||||
filtertypes << "\n");
|
||||
RecollFilter *h = 0;
|
||||
|
||||
// Get handler definition for mime type. We do this even if an
|
||||
@ -292,7 +303,7 @@ RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
||||
if (h != 0)
|
||||
goto out;
|
||||
|
||||
LOGDEB2("getMimeHandler: " << (mtype) << " not in cache\n" );
|
||||
LOGDEB2("getMimeHandler: " << mtype << " not in cache\n");
|
||||
|
||||
// Not in cache.
|
||||
if (internal) {
|
||||
@ -303,13 +314,14 @@ RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
||||
// partly redundant with the localfields/rclaptg, but
|
||||
// better and the latter will probably go away at some
|
||||
// point in the future.
|
||||
LOGDEB2("handlertype internal, cmdstr [" << (cmdstr) << "]\n" );
|
||||
LOGDEB2("handlertype internal, cmdstr [" << cmdstr << "]\n");
|
||||
h = mhFactory(cfg, cmdstr.empty() ? mtype : cmdstr, false, id);
|
||||
goto out;
|
||||
} else if (!stringlowercmp("dll", handlertype)) {
|
||||
} else {
|
||||
if (cmdstr.empty()) {
|
||||
LOGERR("getMimeHandler: bad line for " << (mtype) << ": " << (hs) << "\n" );
|
||||
LOGERR("getMimeHandler: bad line for " << mtype << ": " <<
|
||||
hs << "\n");
|
||||
goto out;
|
||||
}
|
||||
if (!stringlowercmp("exec", handlertype)) {
|
||||
@ -319,7 +331,8 @@ RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
||||
h = mhExecFactory(cfg, mtype, cmdstr, true, id);
|
||||
goto out;
|
||||
} else {
|
||||
LOGERR("getMimeHandler: bad line for " << (mtype) << ": " << (hs) << "\n" );
|
||||
LOGERR("getMimeHandler: bad line for " << mtype << ": " <<
|
||||
hs << "\n");
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
@ -31,13 +31,14 @@ class RclConfig;
|
||||
class RecollFilter : public Dijon::Filter {
|
||||
public:
|
||||
RecollFilter(RclConfig *config, const std::string& id)
|
||||
: m_config(config), m_forPreview(false), m_havedoc(false), m_id(id)
|
||||
{}
|
||||
: m_config(config), m_forPreview(false), m_havedoc(false), m_id(id) {
|
||||
}
|
||||
virtual ~RecollFilter() {}
|
||||
virtual void setConfig(RclConfig *config)
|
||||
{
|
||||
|
||||
virtual void setConfig(RclConfig *config) {
|
||||
m_config = config;
|
||||
}
|
||||
|
||||
virtual bool set_property(Properties p, const std::string &v) {
|
||||
switch (p) {
|
||||
case DJF_UDI:
|
||||
@ -58,34 +59,23 @@ public:
|
||||
|
||||
// We don't use this for now
|
||||
virtual bool set_document_uri(const std::string& mtype,
|
||||
const std::string &)
|
||||
{
|
||||
const std::string &) {
|
||||
m_mimeType = mtype;
|
||||
return false;
|
||||
}
|
||||
|
||||
// This does nothing right now but should be called from the
|
||||
// subclass method in case we need some common processing one day
|
||||
// (was used for xattrs at some point). Yes this is the "call
|
||||
// super" anti-pattern, bad, but we have several layers of derived
|
||||
// classes, so that implementing the template method approach (by
|
||||
// having a pure virtual called from here and implemented in the
|
||||
// subclass) would have to be repeated in each derived class. It's
|
||||
// just simpler this way.
|
||||
virtual bool set_document_file(const std::string& mtype,
|
||||
const std::string & /*file_path*/)
|
||||
{
|
||||
const std::string &file_path) {
|
||||
m_mimeType = mtype;
|
||||
return true;
|
||||
return set_document_file_impl(mtype, file_path);
|
||||
}
|
||||
|
||||
// Default implementations
|
||||
virtual bool set_document_string(const std::string& mtype,
|
||||
const std::string &)
|
||||
{
|
||||
const std::string &contents) {
|
||||
m_mimeType = mtype;
|
||||
return false;
|
||||
return set_document_string_impl(mtype, contents);
|
||||
}
|
||||
|
||||
virtual bool set_document_data(const std::string& mtype,
|
||||
const char *cp, size_t sz)
|
||||
{
|
||||
@ -95,11 +85,14 @@ public:
|
||||
virtual void set_docsize(off_t size) {
|
||||
m_docsize = size;
|
||||
}
|
||||
|
||||
virtual off_t get_docsize() const {
|
||||
return m_docsize;
|
||||
}
|
||||
|
||||
virtual bool has_documents() const {return m_havedoc;}
|
||||
virtual bool has_documents() const {
|
||||
return m_havedoc;
|
||||
}
|
||||
|
||||
// Most doc types are single-doc
|
||||
virtual bool skip_to_document(const std::string& s) {
|
||||
@ -118,8 +111,7 @@ public:
|
||||
return m_reason;
|
||||
}
|
||||
|
||||
virtual const std::string& get_id() const
|
||||
{
|
||||
virtual const std::string& get_id() const {
|
||||
return m_id;
|
||||
}
|
||||
|
||||
@ -137,7 +129,21 @@ public:
|
||||
bool txtdcode(const std::string& who);
|
||||
|
||||
protected:
|
||||
bool preview() {return m_forPreview;}
|
||||
|
||||
// We provide default implementation as not all handlers need both methods
|
||||
virtual bool set_document_file_impl(const std::string&,
|
||||
const std::string&) {
|
||||
return m_havedoc = true;
|
||||
}
|
||||
|
||||
virtual bool set_document_string_impl(const std::string&,
|
||||
const std::string&) {
|
||||
return m_havedoc = true;
|
||||
}
|
||||
|
||||
bool preview() {
|
||||
return m_forPreview;
|
||||
}
|
||||
|
||||
RclConfig *m_config;
|
||||
bool m_forPreview;
|
||||
|
||||
@ -122,6 +122,16 @@ skippedPaths = /media
|
||||
# redefined for subtrees.</descr></var>
|
||||
#excludedmimetypes =
|
||||
|
||||
# <var name="nomd5mimetypes" type="string"><brief>Don't compute md5 for
|
||||
# these types.</brief><descr>md5 checksums are used only for deduplicating
|
||||
# results, and can be very expensive to compute on multimedia or other big
|
||||
# files. This list lets you turn off md5 computation for selected types. It
|
||||
# is global (no redefinition for subtrees). At the moment, it only has an
|
||||
# effect for external handlers (exec and execm). The file types can be
|
||||
# specified by listing either MIME types (e.g. audio/mpeg) or handler names
|
||||
# (e.g. rclaudio).</descr></var>
|
||||
nomd5types = rclaudio
|
||||
|
||||
# <var name="compressedfilemaxkbs" type="int"><brief>Size limit for compressed
|
||||
# files.</brief><descr>We need to decompress these in a
|
||||
# temporary directory for identification, which can be wasteful in some
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user