add nomd5types parameter to set file types for which dedup is not that useful and computation is expensive (e.g. audio files). Replace "call parent" misfeature with call to virtual in MimeHandler constructor. Fix log calls indent
This commit is contained in:
parent
fea8ff6e41
commit
b55f4b3b0a
@ -34,14 +34,6 @@
|
|||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
MimeHandlerExec::MimeHandlerExec(RclConfig *cnf, const std::string& id)
|
|
||||||
: RecollFilter(cnf, id), missingHelper(false), m_filtermaxseconds(900),
|
|
||||||
m_filtermaxmbytes(0)
|
|
||||||
{
|
|
||||||
m_config->getConfParam("filtermaxseconds", &m_filtermaxseconds);
|
|
||||||
m_config->getConfParam("filtermaxmbytes", &m_filtermaxmbytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
MEAdv::MEAdv(int maxsecs)
|
MEAdv::MEAdv(int maxsecs)
|
||||||
: m_filtermaxseconds(maxsecs)
|
: m_filtermaxseconds(maxsecs)
|
||||||
{
|
{
|
||||||
@ -55,10 +47,11 @@ void MEAdv::reset()
|
|||||||
|
|
||||||
void MEAdv::newData(int n)
|
void MEAdv::newData(int n)
|
||||||
{
|
{
|
||||||
LOGDEB2("MHExec:newData(" << (n) << ")\n" );
|
LOGDEB2("MHExec:newData(" << n << ")\n");
|
||||||
if (m_filtermaxseconds > 0 &&
|
if (m_filtermaxseconds > 0 &&
|
||||||
time(0L) - m_start > m_filtermaxseconds) {
|
time(0L) - m_start > m_filtermaxseconds) {
|
||||||
LOGERR("MimeHandlerExec: filter timeout (" << (m_filtermaxseconds) << " S)\n" );
|
LOGERR("MimeHandlerExec: filter timeout (" << m_filtermaxseconds <<
|
||||||
|
" S)\n");
|
||||||
throw HandlerTimeout();
|
throw HandlerTimeout();
|
||||||
}
|
}
|
||||||
// If a cancel request was set by the signal handler (or by us
|
// If a cancel request was set by the signal handler (or by us
|
||||||
@ -67,9 +60,65 @@ void MEAdv::newData(int n)
|
|||||||
CancelCheck::instance().checkCancel();
|
CancelCheck::instance().checkCancel();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
MimeHandlerExec::MimeHandlerExec(RclConfig *cnf, const std::string& id)
|
||||||
|
: RecollFilter(cnf, id), missingHelper(false), m_filtermaxseconds(900),
|
||||||
|
m_filtermaxmbytes(0), m_handlernomd5(false), m_hnomd5init(false),
|
||||||
|
m_nomd5(false)
|
||||||
|
{
|
||||||
|
m_config->getConfParam("filtermaxseconds", &m_filtermaxseconds);
|
||||||
|
m_config->getConfParam("filtermaxmbytes", &m_filtermaxmbytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MimeHandlerExec::set_document_file_impl(const std::string& mt,
|
||||||
|
const std::string &file_path)
|
||||||
|
{
|
||||||
|
// Can't do this in constructor as script name not set yet. Do it
|
||||||
|
// once on first call
|
||||||
|
unordered_set<string> nomd5tps;
|
||||||
|
bool tpsread(false);
|
||||||
|
|
||||||
|
if (false == m_hnomd5init) {
|
||||||
|
m_hnomd5init = true;
|
||||||
|
if (m_config->getConfParam("nomd5types", &nomd5tps)) {
|
||||||
|
tpsread = true;
|
||||||
|
if (!nomd5tps.empty()) {
|
||||||
|
if (params.size() &&
|
||||||
|
nomd5tps.find(path_getsimple(params[0])) !=
|
||||||
|
nomd5tps.end()) {
|
||||||
|
m_handlernomd5 = true;
|
||||||
|
}
|
||||||
|
// On windows the 1st param is often a script interp
|
||||||
|
// name (e.g. "python", and the script name is 2nd
|
||||||
|
if (params.size() > 1 &&
|
||||||
|
nomd5tps.find(path_getsimple(params[1])) !=
|
||||||
|
nomd5tps.end()) {
|
||||||
|
m_handlernomd5 = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m_nomd5 = m_handlernomd5;
|
||||||
|
|
||||||
|
if (!m_nomd5) {
|
||||||
|
// Check for MIME type based md5 suppression
|
||||||
|
if (!tpsread) {
|
||||||
|
m_config->getConfParam("nomd5types", &nomd5tps);
|
||||||
|
}
|
||||||
|
if (nomd5tps.find(mt) != nomd5tps.end()) {
|
||||||
|
m_nomd5 = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m_fn = file_path;
|
||||||
|
m_havedoc = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool MimeHandlerExec::skip_to_document(const string& ipath)
|
bool MimeHandlerExec::skip_to_document(const string& ipath)
|
||||||
{
|
{
|
||||||
LOGDEB("MimeHandlerExec:skip_to_document: [" << (ipath) << "]\n" );
|
LOGDEB("MimeHandlerExec:skip_to_document: [" << ipath << "]\n");
|
||||||
m_ipath = ipath;
|
m_ipath = ipath;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -82,13 +131,13 @@ bool MimeHandlerExec::next_document()
|
|||||||
return false;
|
return false;
|
||||||
m_havedoc = false;
|
m_havedoc = false;
|
||||||
if (missingHelper) {
|
if (missingHelper) {
|
||||||
LOGDEB("MimeHandlerExec::next_document(): helper known missing\n" );
|
LOGDEB("MimeHandlerExec::next_document(): helper known missing\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (params.empty()) {
|
if (params.empty()) {
|
||||||
// Hu ho
|
// Hu ho
|
||||||
LOGERR("MimeHandlerExec::mkDoc: empty params\n" );
|
LOGERR("MimeHandlerExec::next_document: empty params\n");
|
||||||
m_reason = "RECFILTERROR BADCONFIG";
|
m_reason = "RECFILTERROR BADCONFIG";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -110,7 +159,7 @@ bool MimeHandlerExec::next_document()
|
|||||||
mexec.setAdvise(&adv);
|
mexec.setAdvise(&adv);
|
||||||
mexec.putenv("RECOLL_CONFDIR", m_config->getConfDir());
|
mexec.putenv("RECOLL_CONFDIR", m_config->getConfDir());
|
||||||
mexec.putenv(m_forPreview ? "RECOLL_FILTER_FORPREVIEW=yes" :
|
mexec.putenv(m_forPreview ? "RECOLL_FILTER_FORPREVIEW=yes" :
|
||||||
"RECOLL_FILTER_FORPREVIEW=no");
|
"RECOLL_FILTER_FORPREVIEW=no");
|
||||||
mexec.setrlimit_as(m_filtermaxmbytes);
|
mexec.setrlimit_as(m_filtermaxmbytes);
|
||||||
|
|
||||||
int status;
|
int status;
|
||||||
@ -125,7 +174,8 @@ bool MimeHandlerExec::next_document()
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (status) {
|
if (status) {
|
||||||
LOGERR("MimeHandlerExec: command status 0x" << (status) << " for " << (cmd) << "\n" );
|
LOGERR("MimeHandlerExec: command status 0x" << status << " for " <<
|
||||||
|
cmd << "\n");
|
||||||
if (WIFEXITED(status) && WEXITSTATUS(status) == 127) {
|
if (WIFEXITED(status) && WEXITSTATUS(status) == 127) {
|
||||||
// That's how execmd signals a failed exec (most probably
|
// That's how execmd signals a failed exec (most probably
|
||||||
// a missing command). Let'hope no filter uses the same value as
|
// a missing command). Let'hope no filter uses the same value as
|
||||||
@ -188,12 +238,13 @@ void MimeHandlerExec::finaldetails()
|
|||||||
m_metaData[cstr_dj_keymt] = cfgFilterOutputMtype.empty() ? "text/html" :
|
m_metaData[cstr_dj_keymt] = cfgFilterOutputMtype.empty() ? "text/html" :
|
||||||
cfgFilterOutputMtype;
|
cfgFilterOutputMtype;
|
||||||
|
|
||||||
if (!m_forPreview) {
|
if (!m_forPreview && !m_nomd5) {
|
||||||
string md5, xmd5, reason;
|
string md5, xmd5, reason;
|
||||||
if (MD5File(m_fn, md5, &reason)) {
|
if (MD5File(m_fn, md5, &reason)) {
|
||||||
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||||
} else {
|
} else {
|
||||||
LOGERR("MimeHandlerExec: cant compute md5 for [" << (m_fn) << "]: " << (reason) << "\n" );
|
LOGERR("MimeHandlerExec: cant compute md5 for [" << m_fn << "]: " <<
|
||||||
|
reason << "\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -62,14 +62,6 @@ class MimeHandlerExec : public RecollFilter {
|
|||||||
|
|
||||||
MimeHandlerExec(RclConfig *cnf, const std::string& id);
|
MimeHandlerExec(RclConfig *cnf, const std::string& id);
|
||||||
|
|
||||||
virtual bool set_document_file(const std::string& mt,
|
|
||||||
const std::string &file_path) {
|
|
||||||
RecollFilter::set_document_file(mt, file_path);
|
|
||||||
m_fn = file_path;
|
|
||||||
m_havedoc = true;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual bool next_document();
|
virtual bool next_document();
|
||||||
virtual bool skip_to_document(const std::string& ipath);
|
virtual bool skip_to_document(const std::string& ipath);
|
||||||
|
|
||||||
@ -80,8 +72,16 @@ class MimeHandlerExec : public RecollFilter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
virtual bool set_document_file_impl(const std::string& mt,
|
||||||
|
const std::string& file_path);
|
||||||
|
|
||||||
std::string m_fn;
|
std::string m_fn;
|
||||||
std::string m_ipath;
|
std::string m_ipath;
|
||||||
|
// md5 computation excluded by handler name: can't change after init
|
||||||
|
bool m_handlernomd5;
|
||||||
|
bool m_hnomd5init;
|
||||||
|
// If md5 not excluded by handler name, allow/forbid depending on mime
|
||||||
|
bool m_nomd5;
|
||||||
|
|
||||||
// Set up the character set metadata fields and possibly transcode
|
// Set up the character set metadata fields and possibly transcode
|
||||||
// text/plain output.
|
// text/plain output.
|
||||||
|
|||||||
@ -178,7 +178,7 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
ostringstream obuf;
|
ostringstream obuf;
|
||||||
string file_md5;
|
string file_md5;
|
||||||
if (m_filefirst) {
|
if (m_filefirst) {
|
||||||
if (!m_forPreview) {
|
if (!m_forPreview && !m_nomd5) {
|
||||||
string md5, xmd5, reason;
|
string md5, xmd5, reason;
|
||||||
if (MD5File(m_fn, md5, &reason)) {
|
if (MD5File(m_fn, md5, &reason)) {
|
||||||
file_md5 = MD5HexPrint(md5, xmd5);
|
file_md5 = MD5HexPrint(md5, xmd5);
|
||||||
|
|||||||
@ -102,22 +102,27 @@ class MimeHandlerExecMultiple : public MimeHandlerExec {
|
|||||||
/////// End un-cleared stuff.
|
/////// End un-cleared stuff.
|
||||||
|
|
||||||
public:
|
public:
|
||||||
MimeHandlerExecMultiple(RclConfig *cnf, const string& id)
|
MimeHandlerExecMultiple(RclConfig *cnf, const std::string& id)
|
||||||
: MimeHandlerExec(cnf, id)
|
: MimeHandlerExec(cnf, id) {
|
||||||
{}
|
}
|
||||||
// No resources to clean up, the ExecCmd destructor does it.
|
// No resources to clean up, the ExecCmd destructor does it.
|
||||||
virtual ~MimeHandlerExecMultiple() {}
|
virtual ~MimeHandlerExecMultiple() {}
|
||||||
virtual bool set_document_file(const string& mt, const string &file_path) {
|
|
||||||
m_filefirst = true;
|
|
||||||
return MimeHandlerExec::set_document_file(mt, file_path);
|
|
||||||
}
|
|
||||||
virtual bool next_document();
|
virtual bool next_document();
|
||||||
|
|
||||||
// skip_to and clear inherited from MimeHandlerExec
|
// skip_to and clear inherited from MimeHandlerExec
|
||||||
|
|
||||||
|
protected:
|
||||||
|
// This is the only 2nd-level derived handler class. Use call-super.
|
||||||
|
virtual bool set_document_file_impl(const std::string& mt,
|
||||||
|
const std::string &file_path) {
|
||||||
|
m_filefirst = true;
|
||||||
|
return MimeHandlerExec::set_document_file_impl(mt, file_path);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool startCmd();
|
bool startCmd();
|
||||||
bool readDataElement(string& name, string& data);
|
bool readDataElement(std::string& name, std::string& data);
|
||||||
bool m_filefirst;
|
bool m_filefirst;
|
||||||
int m_maxmemberkb;
|
int m_maxmemberkb;
|
||||||
MEAdv m_adv;
|
MEAdv m_adv;
|
||||||
|
|||||||
@ -34,23 +34,21 @@ using namespace std;
|
|||||||
#endif /* NO_NAMESPACES */
|
#endif /* NO_NAMESPACES */
|
||||||
|
|
||||||
|
|
||||||
bool MimeHandlerHtml::set_document_file(const string& mt, const string &fn)
|
bool MimeHandlerHtml::set_document_file_impl(const string& mt, const string &fn)
|
||||||
{
|
{
|
||||||
LOGDEB0("textHtmlToDoc: " << (fn) << "\n" );
|
LOGDEB0("textHtmlToDoc: " << fn << "\n");
|
||||||
RecollFilter::set_document_file(mt, fn);
|
|
||||||
string otext;
|
string otext;
|
||||||
if (!file_to_string(fn, otext)) {
|
if (!file_to_string(fn, otext)) {
|
||||||
LOGINFO("textHtmlToDoc: cant read: " << (fn) << "\n" );
|
LOGINFO("textHtmlToDoc: cant read: " << fn << "\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
m_filename = fn;
|
m_filename = fn;
|
||||||
return set_document_string(mt, otext);
|
return set_document_string(mt, otext);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MimeHandlerHtml::set_document_string(const string& mt,
|
bool MimeHandlerHtml::set_document_string_impl(const string& mt,
|
||||||
const string& htext)
|
const string& htext)
|
||||||
{
|
{
|
||||||
RecollFilter::set_document_string(mt, htext);
|
|
||||||
m_html = htext;
|
m_html = htext;
|
||||||
m_havedoc = true;
|
m_havedoc = true;
|
||||||
|
|
||||||
@ -73,12 +71,14 @@ bool MimeHandlerHtml::next_document()
|
|||||||
m_filename.erase();
|
m_filename.erase();
|
||||||
|
|
||||||
string charset = m_dfltInputCharset;
|
string charset = m_dfltInputCharset;
|
||||||
LOGDEB("MHHtml::next_doc.: default supposed input charset: [" << (charset) << "]\n" );
|
LOGDEB("MHHtml::next_doc.: default supposed input charset: [" << charset
|
||||||
|
<< "]\n");
|
||||||
// Override default input charset if someone took care to set one:
|
// Override default input charset if someone took care to set one:
|
||||||
map<string,string>::const_iterator it = m_metaData.find(cstr_dj_keycharset);
|
map<string,string>::const_iterator it = m_metaData.find(cstr_dj_keycharset);
|
||||||
if (it != m_metaData.end() && !it->second.empty()) {
|
if (it != m_metaData.end() && !it->second.empty()) {
|
||||||
charset = it->second;
|
charset = it->second;
|
||||||
LOGDEB("MHHtml: next_doc.: input charset from ext. metadata: [" << (charset) << "]\n" );
|
LOGDEB("MHHtml: next_doc.: input charset from ext. metadata: [" <<
|
||||||
|
charset << "]\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// - We first try to convert from the supposed charset
|
// - We first try to convert from the supposed charset
|
||||||
@ -91,13 +91,15 @@ bool MimeHandlerHtml::next_document()
|
|||||||
MyHtmlParser result;
|
MyHtmlParser result;
|
||||||
for (int pass = 0; pass < 2; pass++) {
|
for (int pass = 0; pass < 2; pass++) {
|
||||||
string transcoded;
|
string transcoded;
|
||||||
LOGDEB("Html::mkDoc: pass " << (pass) << "\n" );
|
LOGDEB("Html::mkDoc: pass " << pass << "\n");
|
||||||
MyHtmlParser p;
|
MyHtmlParser p;
|
||||||
|
|
||||||
// Try transcoding. If it fails, use original text.
|
// Try transcoding. If it fails, use original text.
|
||||||
int ecnt;
|
int ecnt;
|
||||||
if (!transcode(m_html, transcoded, charset, "UTF-8", &ecnt)) {
|
if (!transcode(m_html, transcoded, charset, "UTF-8", &ecnt)) {
|
||||||
LOGDEB("textHtmlToDoc: transcode failed from cs '" << (charset) << "' to UTF-8 for[" << (fn.empty()?"unknown":fn) << "]" );
|
LOGDEB("textHtmlToDoc: transcode failed from cs '" <<
|
||||||
|
charset << "' to UTF-8 for[" << (fn.empty()?"unknown":fn) <<
|
||||||
|
"]");
|
||||||
transcoded = m_html;
|
transcoded = m_html;
|
||||||
// We don't know the charset, at all
|
// We don't know the charset, at all
|
||||||
p.reset_charsets();
|
p.reset_charsets();
|
||||||
@ -105,9 +107,11 @@ bool MimeHandlerHtml::next_document()
|
|||||||
} else {
|
} else {
|
||||||
if (ecnt) {
|
if (ecnt) {
|
||||||
if (pass == 0) {
|
if (pass == 0) {
|
||||||
LOGDEB("textHtmlToDoc: init transcode had " << (ecnt) << " errors for [" << (fn.empty()?"unknown":fn) << "]\n" );
|
LOGDEB("textHtmlToDoc: init transcode had " << ecnt <<
|
||||||
|
" errors for ["<<(fn.empty()?"unknown":fn)<< "]\n");
|
||||||
} else {
|
} else {
|
||||||
LOGERR("textHtmlToDoc: final transcode had " << (ecnt) << " errors for [" << (fn.empty()?"unknown":fn) << "]\n" );
|
LOGERR("textHtmlToDoc: final transcode had " << ecnt <<
|
||||||
|
" errors for ["<< (fn.empty()?"unknown":fn)<< "]\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// charset has the putative source charset, transcoded is now
|
// charset has the putative source charset, transcoded is now
|
||||||
@ -145,15 +149,16 @@ bool MimeHandlerHtml::next_document()
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOGDEB("textHtmlToDoc: charset [" << (charset) << "] doc charset [" << (result.get_charset()) << "]\n" );
|
LOGDEB("textHtmlToDoc: charset [" << charset << "] doc charset ["<<
|
||||||
|
result.get_charset() << "]\n");
|
||||||
if (!result.get_charset().empty() &&
|
if (!result.get_charset().empty() &&
|
||||||
!samecharset(result.get_charset(), result.fromcharset)) {
|
!samecharset(result.get_charset(), result.fromcharset)) {
|
||||||
LOGDEB("textHtmlToDoc: reparse for charsets\n" );
|
LOGDEB("textHtmlToDoc: reparse for charsets\n");
|
||||||
// Set the origin charset as specified in document before
|
// Set the origin charset as specified in document before
|
||||||
// transcoding again
|
// transcoding again
|
||||||
charset = result.get_charset();
|
charset = result.get_charset();
|
||||||
} else {
|
} else {
|
||||||
LOGERR("textHtmlToDoc:: error: non charset exception\n" );
|
LOGERR("textHtmlToDoc:: error: non charset exception\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -26,22 +26,18 @@
|
|||||||
*/
|
*/
|
||||||
class MimeHandlerHtml : public RecollFilter {
|
class MimeHandlerHtml : public RecollFilter {
|
||||||
public:
|
public:
|
||||||
MimeHandlerHtml(RclConfig *cnf, const string& id)
|
MimeHandlerHtml(RclConfig *cnf, const std::string& id)
|
||||||
: RecollFilter(cnf, id)
|
: RecollFilter(cnf, id) {
|
||||||
{
|
|
||||||
}
|
}
|
||||||
virtual ~MimeHandlerHtml()
|
virtual ~MimeHandlerHtml() {}
|
||||||
{
|
|
||||||
}
|
|
||||||
virtual bool set_document_file(const string& mt, const string &file_path);
|
|
||||||
virtual bool set_document_string(const string& mt, const string &data);
|
|
||||||
virtual bool is_data_input_ok(DataInput input) const {
|
virtual bool is_data_input_ok(DataInput input) const {
|
||||||
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
||||||
return true;
|
return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
virtual bool next_document();
|
virtual bool next_document();
|
||||||
const string& get_html()
|
const std::string& get_html()
|
||||||
{
|
{
|
||||||
return m_html;
|
return m_html;
|
||||||
}
|
}
|
||||||
@ -50,9 +46,15 @@ class MimeHandlerHtml : public RecollFilter {
|
|||||||
m_html.erase();
|
m_html.erase();
|
||||||
RecollFilter::clear();
|
RecollFilter::clear();
|
||||||
}
|
}
|
||||||
|
protected:
|
||||||
|
virtual bool set_document_file_impl(const std::string& mt,
|
||||||
|
const std::string &file_path);
|
||||||
|
virtual bool set_document_string_impl(const std::string& mt,
|
||||||
|
const std::string &data);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
string m_filename;
|
std::string m_filename;
|
||||||
string m_html;
|
std::string m_html;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _HTML_H_INCLUDED_ */
|
#endif /* _HTML_H_INCLUDED_ */
|
||||||
|
|||||||
@ -87,10 +87,9 @@ void MimeHandlerMail::clear()
|
|||||||
RecollFilter::clear();
|
RecollFilter::clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MimeHandlerMail::set_document_file(const string& mt, const string &fn)
|
bool MimeHandlerMail::set_document_file_impl(const string& mt, const string &fn)
|
||||||
{
|
{
|
||||||
LOGDEB("MimeHandlerMail::set_document_file(" << (fn) << ")\n" );
|
LOGDEB("MimeHandlerMail::set_document_file(" << fn << ")\n");
|
||||||
RecollFilter::set_document_file(mt, fn);
|
|
||||||
if (m_fd >= 0) {
|
if (m_fd >= 0) {
|
||||||
close(m_fd);
|
close(m_fd);
|
||||||
m_fd = -1;
|
m_fd = -1;
|
||||||
@ -103,12 +102,13 @@ bool MimeHandlerMail::set_document_file(const string& mt, const string &fn)
|
|||||||
if (MD5File(fn, md5, &reason)) {
|
if (MD5File(fn, md5, &reason)) {
|
||||||
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||||
} else {
|
} else {
|
||||||
LOGERR("MimeHandlerMail: cant md5 [" << (fn) << "]: " << (reason) << "\n" );
|
LOGERR("MimeHandlerMail: md5 [" << fn << "]: " << reason << "\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m_fd = open(fn.c_str(), 0);
|
m_fd = open(fn.c_str(), 0);
|
||||||
if (m_fd < 0) {
|
if (m_fd < 0) {
|
||||||
LOGERR("MimeHandlerMail::set_document_file: open(" << (fn) << ") errno " << (errno) << "\n" );
|
LOGERR("MimeHandlerMail::set_document_file: open(" << fn <<
|
||||||
|
") errno " << errno << "\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
#if defined O_NOATIME && O_NOATIME != 0
|
#if defined O_NOATIME && O_NOATIME != 0
|
||||||
@ -120,19 +120,18 @@ bool MimeHandlerMail::set_document_file(const string& mt, const string &fn)
|
|||||||
m_bincdoc = new Binc::MimeDocument;
|
m_bincdoc = new Binc::MimeDocument;
|
||||||
m_bincdoc->parseFull(m_fd);
|
m_bincdoc->parseFull(m_fd);
|
||||||
if (!m_bincdoc->isHeaderParsed() && !m_bincdoc->isAllParsed()) {
|
if (!m_bincdoc->isHeaderParsed() && !m_bincdoc->isAllParsed()) {
|
||||||
LOGERR("MimeHandlerMail::mkDoc: mime parse error for " << (fn) << "\n" );
|
LOGERR("MimeHandlerMail::mkDoc: mime parse error for " << fn << "\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
m_havedoc = true;
|
m_havedoc = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MimeHandlerMail::set_document_string(const string& mt,
|
bool MimeHandlerMail::set_document_string_impl(const string& mt,
|
||||||
const string &msgtxt)
|
const string& msgtxt)
|
||||||
{
|
{
|
||||||
LOGDEB1("MimeHandlerMail::set_document_string\n" );
|
LOGDEB1("MimeHandlerMail::set_document_string\n");
|
||||||
LOGDEB2("Message text: [" << (msgtxt) << "]\n" );
|
LOGDEB2("Message text: [" << msgtxt << "]\n");
|
||||||
RecollFilter::set_document_string(mt, msgtxt);
|
|
||||||
delete m_stream;
|
delete m_stream;
|
||||||
|
|
||||||
if (!m_forPreview) {
|
if (!m_forPreview) {
|
||||||
@ -142,17 +141,19 @@ bool MimeHandlerMail::set_document_string(const string& mt,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if ((m_stream = new stringstream(msgtxt)) == 0 || !m_stream->good()) {
|
if ((m_stream = new stringstream(msgtxt)) == 0 || !m_stream->good()) {
|
||||||
LOGERR("MimeHandlerMail::set_document_string: stream create error.msgtxt.size() " << (int(msgtxt.size())) << "\n" );
|
LOGERR("MimeHandlerMail::set_document_string: stream create error."
|
||||||
|
"msgtxt.size() " << msgtxt.size() << "\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
delete m_bincdoc;
|
delete m_bincdoc;
|
||||||
if ((m_bincdoc = new Binc::MimeDocument) == 0) {
|
if ((m_bincdoc = new Binc::MimeDocument) == 0) {
|
||||||
LOGERR("MimeHandlerMail::set_doc._string: new Binc:Document failed. Out of memory?" );
|
LOGERR("MimeHandlerMail::set_doc._string: new Binc:Document failed. "
|
||||||
|
"Out of memory?");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
m_bincdoc->parseFull(*m_stream);
|
m_bincdoc->parseFull(*m_stream);
|
||||||
if (!m_bincdoc->isHeaderParsed() && !m_bincdoc->isAllParsed()) {
|
if (!m_bincdoc->isHeaderParsed() && !m_bincdoc->isAllParsed()) {
|
||||||
LOGERR("MimeHandlerMail::set_document_string: mime parse error\n" );
|
LOGERR("MimeHandlerMail::set_document_string: mime parse error\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
m_havedoc = true;
|
m_havedoc = true;
|
||||||
@ -161,14 +162,14 @@ bool MimeHandlerMail::set_document_string(const string& mt,
|
|||||||
|
|
||||||
bool MimeHandlerMail::skip_to_document(const string& ipath)
|
bool MimeHandlerMail::skip_to_document(const string& ipath)
|
||||||
{
|
{
|
||||||
LOGDEB("MimeHandlerMail::skip_to_document(" << (ipath) << ")\n" );
|
LOGDEB("MimeHandlerMail::skip_to_document(" << ipath << ")\n");
|
||||||
if (m_idx == -1) {
|
if (m_idx == -1) {
|
||||||
// No decoding done yet. If ipath is null need do nothing
|
// No decoding done yet. If ipath is null need do nothing
|
||||||
if (ipath.empty() || ipath == "-1")
|
if (ipath.empty() || ipath == "-1")
|
||||||
return true;
|
return true;
|
||||||
// ipath points to attachment: need to decode message
|
// ipath points to attachment: need to decode message
|
||||||
if (!next_document()) {
|
if (!next_document()) {
|
||||||
LOGERR("MimeHandlerMail::skip_to_doc: next_document failed\n" );
|
LOGERR("MimeHandlerMail::skip_to_doc: next_document failed\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -178,7 +179,8 @@ bool MimeHandlerMail::skip_to_document(const string& ipath)
|
|||||||
|
|
||||||
bool MimeHandlerMail::next_document()
|
bool MimeHandlerMail::next_document()
|
||||||
{
|
{
|
||||||
LOGDEB("MimeHandlerMail::next_document m_idx " << (m_idx) << " m_havedoc " << (m_havedoc) << "\n" );
|
LOGDEB("MimeHandlerMail::next_document m_idx " << m_idx << " m_havedoc " <<
|
||||||
|
m_havedoc << "\n");
|
||||||
if (!m_havedoc)
|
if (!m_havedoc)
|
||||||
return false;
|
return false;
|
||||||
bool res = false;
|
bool res = false;
|
||||||
@ -186,7 +188,9 @@ bool MimeHandlerMail::next_document()
|
|||||||
if (m_idx == -1) {
|
if (m_idx == -1) {
|
||||||
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
||||||
res = processMsg(m_bincdoc, 0);
|
res = processMsg(m_bincdoc, 0);
|
||||||
LOGDEB1("MimeHandlerMail::next_document: mt " << (m_metaData[cstr_dj_keymt]) << ", att cnt " << (m_attachments.size()) << "\n" );
|
LOGDEB1("MimeHandlerMail::next_document: mt " <<
|
||||||
|
m_metaData[cstr_dj_keymt] << ", att cnt " <<
|
||||||
|
m_attachments.size() << "\n");
|
||||||
const string& txt = m_metaData[cstr_dj_keycontent];
|
const string& txt = m_metaData[cstr_dj_keycontent];
|
||||||
if (m_startoftext < txt.size())
|
if (m_startoftext < txt.size())
|
||||||
m_metaData[cstr_dj_keyabstract] =
|
m_metaData[cstr_dj_keyabstract] =
|
||||||
@ -221,16 +225,16 @@ static bool decodeBody(const string& cte, // Content transfer encoding
|
|||||||
|
|
||||||
if (!stringlowercmp("quoted-printable", cte)) {
|
if (!stringlowercmp("quoted-printable", cte)) {
|
||||||
if (!qp_decode(body, decoded)) {
|
if (!qp_decode(body, decoded)) {
|
||||||
LOGERR("decodeBody: quoted-printable decoding failed !\n" );
|
LOGERR("decodeBody: quoted-printable decoding failed !\n");
|
||||||
LOGDEB(" Body: \n" << (body) << "\n" );
|
LOGDEB(" Body: \n" << body << "\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
*respp = &decoded;
|
*respp = &decoded;
|
||||||
} else if (!stringlowercmp("base64", cte)) {
|
} else if (!stringlowercmp("base64", cte)) {
|
||||||
if (!base64_decode(body, decoded)) {
|
if (!base64_decode(body, decoded)) {
|
||||||
// base64 encoding errors are actually relatively common
|
// base64 encoding errors are actually relatively common
|
||||||
LOGERR("decodeBody: base64 decoding failed !\n" );
|
LOGERR("decodeBody: base64 decoding failed !\n");
|
||||||
LOGDEB(" Body: \n" << (body) << "\n" );
|
LOGDEB(" Body: \n" << body << "\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
*respp = &decoded;
|
*respp = &decoded;
|
||||||
@ -240,7 +244,7 @@ static bool decodeBody(const string& cte, // Content transfer encoding
|
|||||||
|
|
||||||
bool MimeHandlerMail::processAttach()
|
bool MimeHandlerMail::processAttach()
|
||||||
{
|
{
|
||||||
LOGDEB("MimeHandlerMail::processAttach() m_idx " << (m_idx) << "\n" );
|
LOGDEB("MimeHandlerMail::processAttach() m_idx " << m_idx << "\n");
|
||||||
if (!m_havedoc)
|
if (!m_havedoc)
|
||||||
return false;
|
return false;
|
||||||
if (m_idx >= (int)m_attachments.size()) {
|
if (m_idx >= (int)m_attachments.size()) {
|
||||||
@ -254,7 +258,8 @@ bool MimeHandlerMail::processAttach()
|
|||||||
m_metaData[cstr_dj_keycharset] = att->m_charset;
|
m_metaData[cstr_dj_keycharset] = att->m_charset;
|
||||||
m_metaData[cstr_dj_keyfn] = att->m_filename;
|
m_metaData[cstr_dj_keyfn] = att->m_filename;
|
||||||
m_metaData[cstr_dj_keytitle] = att->m_filename + " (" + m_subject + ")";
|
m_metaData[cstr_dj_keytitle] = att->m_filename + " (" + m_subject + ")";
|
||||||
LOGDEB1(" processAttach:ct [" << (att->m_contentType) << "] cs [" << (att->m_charset) << "] fn [" << (att->m_filename) << "]\n" );
|
LOGDEB1(" processAttach:ct [" << att->m_contentType << "] cs [" <<
|
||||||
|
att->m_charset << "] fn [" << att->m_filename << "]\n");
|
||||||
|
|
||||||
// Erase current content and replace
|
// Erase current content and replace
|
||||||
m_metaData[cstr_dj_keycontent] = string();
|
m_metaData[cstr_dj_keycontent] = string();
|
||||||
@ -305,10 +310,11 @@ bool MimeHandlerMail::processAttach()
|
|||||||
// text
|
// text
|
||||||
bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
||||||
{
|
{
|
||||||
LOGDEB2("MimeHandlerMail::processMsg: depth " << (depth) << "\n" );
|
LOGDEB2("MimeHandlerMail::processMsg: depth " << depth << "\n");
|
||||||
if (depth++ >= maxdepth) {
|
if (depth++ >= maxdepth) {
|
||||||
// Have to stop somewhere
|
// Have to stop somewhere
|
||||||
LOGINFO("MimeHandlerMail::processMsg: maxdepth " << (maxdepth) << " exceeded\n" );
|
LOGINFO("MimeHandlerMail::processMsg: maxdepth " << maxdepth <<
|
||||||
|
" exceeded\n");
|
||||||
// Return true anyway, better to index partially than not at all
|
// Return true anyway, better to index partially than not at all
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -360,7 +366,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
|||||||
m_metaData[cstr_dj_keymd] = ascuxtime;
|
m_metaData[cstr_dj_keymd] = ascuxtime;
|
||||||
} else {
|
} else {
|
||||||
// Leave mtime field alone, ftime will be used instead.
|
// Leave mtime field alone, ftime will be used instead.
|
||||||
LOGDEB("rfc2822Date...: failed: [" << (decoded) << "]\n" );
|
LOGDEB("rfc2822Date...: failed: [" << decoded << "]\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (preview())
|
if (preview())
|
||||||
@ -394,10 +400,12 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
|||||||
|
|
||||||
text += '\n';
|
text += '\n';
|
||||||
m_startoftext = text.size();
|
m_startoftext = text.size();
|
||||||
LOGDEB2("MimeHandlerMail::processMsg:ismultipart " << (doc->isMultipart()) << " mime subtype '" << (doc->getSubType()) << "'\n" );
|
LOGDEB2("MimeHandlerMail::processMsg:ismultipart " <<
|
||||||
|
doc->isMultipart() << " mime subtype '"<<doc->getSubType()<< "'\n");
|
||||||
walkmime(doc, depth);
|
walkmime(doc, depth);
|
||||||
|
|
||||||
LOGDEB2("MimeHandlerMail::processMsg:text:[" << (m_metaData[cstr_dj_keycontent]) << "]\n" );
|
LOGDEB2("MimeHandlerMail::processMsg:text:[" <<
|
||||||
|
m_metaData[cstr_dj_keycontent] << "]\n");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -413,16 +421,17 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
|||||||
// message/rfc822 may also be of interest.
|
// message/rfc822 may also be of interest.
|
||||||
void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
||||||
{
|
{
|
||||||
LOGDEB2("MimeHandlerMail::walkmime: depth " << (depth) << "\n" );
|
LOGDEB2("MimeHandlerMail::walkmime: depth " << depth << "\n");
|
||||||
if (depth++ >= maxdepth) {
|
if (depth++ >= maxdepth) {
|
||||||
LOGINFO("walkmime: max depth (" << (maxdepth) << ") exceeded\n" );
|
LOGINFO("walkmime: max depth (" << maxdepth << ") exceeded\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
string& out = m_metaData[cstr_dj_keycontent];
|
string& out = m_metaData[cstr_dj_keycontent];
|
||||||
|
|
||||||
if (doc->isMultipart()) {
|
if (doc->isMultipart()) {
|
||||||
LOGDEB2("walkmime: ismultipart " << (doc->isMultipart()) << " subtype '" << (doc->getSubType()) << "'\n" );
|
LOGDEB2("walkmime: ismultipart " << doc->isMultipart() <<
|
||||||
|
" subtype '" << doc->getSubType() << "'\n");
|
||||||
// We only handle alternative, related and mixed (no digests).
|
// We only handle alternative, related and mixed (no digests).
|
||||||
std::vector<Binc::MimePart>::iterator it;
|
std::vector<Binc::MimePart>::iterator it;
|
||||||
|
|
||||||
@ -445,22 +454,22 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
|||||||
// Get and parse content-type header
|
// Get and parse content-type header
|
||||||
Binc::HeaderItem hi;
|
Binc::HeaderItem hi;
|
||||||
if (!it->h.getFirstHeader("Content-Type", hi)) {
|
if (!it->h.getFirstHeader("Content-Type", hi)) {
|
||||||
LOGDEB("walkmime:no ctent-type header for part " << (i) << "\n" );
|
LOGDEB("walkmime:no ctent-type header for part "<<i<< "\n");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
MimeHeaderValue content_type;
|
MimeHeaderValue content_type;
|
||||||
parseMimeHeaderValue(hi.getValue(), content_type);
|
parseMimeHeaderValue(hi.getValue(), content_type);
|
||||||
LOGDEB2("walkmime: C-type: " << (content_type.value) << "\n" );
|
LOGDEB2("walkmime: C-type: " << content_type.value << "\n");
|
||||||
if (!stringlowercmp(cstr_textplain, content_type.value))
|
if (!stringlowercmp(cstr_textplain, content_type.value))
|
||||||
ittxt = it;
|
ittxt = it;
|
||||||
else if (!stringlowercmp("text/html", content_type.value))
|
else if (!stringlowercmp("text/html", content_type.value))
|
||||||
ithtml = it;
|
ithtml = it;
|
||||||
}
|
}
|
||||||
if (ittxt != doc->members.end()) {
|
if (ittxt != doc->members.end()) {
|
||||||
LOGDEB2("walkmime: alternative: chose text/plain part\n" );
|
LOGDEB2("walkmime: alternative: chose text/plain part\n");
|
||||||
walkmime(&(*ittxt), depth);
|
walkmime(&(*ittxt), depth);
|
||||||
} else if (ithtml != doc->members.end()) {
|
} else if (ithtml != doc->members.end()) {
|
||||||
LOGDEB2("walkmime: alternative: chose text/html part\n" );
|
LOGDEB2("walkmime: alternative: chose text/html part\n");
|
||||||
walkmime(&(*ithtml), depth);
|
walkmime(&(*ithtml), depth);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -476,7 +485,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
|||||||
if (doc->h.getFirstHeader("Content-Type", hi)) {
|
if (doc->h.getFirstHeader("Content-Type", hi)) {
|
||||||
ctt = hi.getValue();
|
ctt = hi.getValue();
|
||||||
}
|
}
|
||||||
LOGDEB2("walkmime:content-type: " << (ctt) << "\n" );
|
LOGDEB2("walkmime:content-type: " << ctt << "\n");
|
||||||
MimeHeaderValue content_type;
|
MimeHeaderValue content_type;
|
||||||
parseMimeHeaderValue(ctt, content_type);
|
parseMimeHeaderValue(ctt, content_type);
|
||||||
|
|
||||||
@ -487,7 +496,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
|||||||
}
|
}
|
||||||
MimeHeaderValue content_disposition;
|
MimeHeaderValue content_disposition;
|
||||||
parseMimeHeaderValue(ctd, content_disposition);
|
parseMimeHeaderValue(ctd, content_disposition);
|
||||||
LOGDEB2("Content_disposition:[" << (content_disposition.value) << "]\n" );
|
LOGDEB2("Content_disposition:[" << content_disposition.value << "]\n");
|
||||||
string dispindic;
|
string dispindic;
|
||||||
if (stringlowercmp("inline", content_disposition.value))
|
if (stringlowercmp("inline", content_disposition.value))
|
||||||
dispindic = "Attachment";
|
dispindic = "Attachment";
|
||||||
@ -507,7 +516,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (doc->isMessageRFC822()) {
|
if (doc->isMessageRFC822()) {
|
||||||
LOGDEB2("walkmime: message/RFC822 part\n" );
|
LOGDEB2("walkmime: message/RFC822 part\n");
|
||||||
|
|
||||||
// The first part is the already parsed message. Call
|
// The first part is the already parsed message. Call
|
||||||
// processMsg instead of walkmime so that mail headers get
|
// processMsg instead of walkmime so that mail headers get
|
||||||
@ -528,7 +537,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// "Simple" part.
|
// "Simple" part.
|
||||||
LOGDEB2("walkmime: simple part\n" );
|
LOGDEB2("walkmime: simple part\n");
|
||||||
// Normally the default charset is us-ascii. But it happens that 8
|
// Normally the default charset is us-ascii. But it happens that 8
|
||||||
// bit chars exist in a message that is stated as us-ascii. Ie the
|
// bit chars exist in a message that is stated as us-ascii. Ie the
|
||||||
// mailer used by yahoo support ('KANA') does this. We could
|
// mailer used by yahoo support ('KANA') does this. We could
|
||||||
@ -575,7 +584,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
|||||||
}
|
}
|
||||||
MHMailAttach *att = new MHMailAttach;
|
MHMailAttach *att = new MHMailAttach;
|
||||||
if (att == 0) {
|
if (att == 0) {
|
||||||
LOGERR("Out of memory\n" );
|
LOGERR("Out of memory\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
att->m_contentType = content_type.value;
|
att->m_contentType = content_type.value;
|
||||||
@ -584,7 +593,9 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
|||||||
att->m_charset = charset;
|
att->m_charset = charset;
|
||||||
att->m_contentTransferEncoding = cte;
|
att->m_contentTransferEncoding = cte;
|
||||||
att->m_part = doc;
|
att->m_part = doc;
|
||||||
LOGDEB("walkmime: attachmnt: ct [" << (att->m_contentType) << "] cte [" << (att->m_contentTransferEncoding) << "] cs [" << (att->m_charset) << "] fn [" << (filename) << "]\n" );
|
LOGDEB("walkmime: attachmnt: ct [" << att->m_contentType <<
|
||||||
|
"] cte [" << att->m_contentTransferEncoding << "] cs [" <<
|
||||||
|
att->m_charset << "] fn [" << filename << "]\n");
|
||||||
m_attachments.push_back(att);
|
m_attachments.push_back(att);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -594,14 +605,15 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
|||||||
// filter stack work: this would create another subdocument, but
|
// filter stack work: this would create another subdocument, but
|
||||||
// we want instead to decode a body part of this message document.
|
// we want instead to decode a body part of this message document.
|
||||||
|
|
||||||
LOGDEB2("walkmime: final: body start offset " << (doc->getBodyStartOffset()) << ", length " << (doc->getBodyLength()) << "\n" );
|
LOGDEB2("walkmime: final: body start offset " <<
|
||||||
|
doc->getBodyStartOffset()<<", length "<<doc->getBodyLength()<<"\n");
|
||||||
string body;
|
string body;
|
||||||
doc->getBody(body, 0, doc->bodylength);
|
doc->getBody(body, 0, doc->bodylength);
|
||||||
{
|
{
|
||||||
string decoded;
|
string decoded;
|
||||||
const string *bdp;
|
const string *bdp;
|
||||||
if (!decodeBody(cte, body, decoded, &bdp)) {
|
if (!decodeBody(cte, body, decoded, &bdp)) {
|
||||||
LOGERR("MimeHandlerMail::walkmime: failed decoding body\n" );
|
LOGERR("MimeHandlerMail::walkmime: failed decoding body\n");
|
||||||
}
|
}
|
||||||
if (bdp != &body)
|
if (bdp != &body)
|
||||||
body.swap(decoded);
|
body.swap(decoded);
|
||||||
@ -622,9 +634,10 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
|||||||
} else {
|
} else {
|
||||||
string utf8;
|
string utf8;
|
||||||
// Transcode to utf-8
|
// Transcode to utf-8
|
||||||
LOGDEB1("walkmime: transcoding from " << (charset) << " to UTF-8\n" );
|
LOGDEB1("walkmime: transcoding from " << charset << " to UTF-8\n");
|
||||||
if (!transcode(body, utf8, charset, cstr_utf8)) {
|
if (!transcode(body, utf8, charset, cstr_utf8)) {
|
||||||
LOGERR("walkmime: transcode failed from cs '" << (charset) << "' to UTF-8\n" );
|
LOGERR("walkmime: transcode failed from cs '" << charset <<
|
||||||
|
"' to UTF-8\n");
|
||||||
out += body;
|
out += body;
|
||||||
} else {
|
} else {
|
||||||
out += utf8;
|
out += utf8;
|
||||||
@ -634,6 +647,6 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
|||||||
if (out.length() && out[out.length()-1] != '\n')
|
if (out.length() && out[out.length()-1] != '\n')
|
||||||
out += '\n';
|
out += '\n';
|
||||||
|
|
||||||
LOGDEB2("walkmime: out now: [" << (out) << "]\n" );
|
LOGDEB2("walkmime: out now: [" << out << "]\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -20,8 +20,6 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <map>
|
#include <map>
|
||||||
using std::vector;
|
|
||||||
using std::map;
|
|
||||||
|
|
||||||
#include "mimehandler.h"
|
#include "mimehandler.h"
|
||||||
|
|
||||||
@ -39,19 +37,23 @@ class MHMailAttach;
|
|||||||
*/
|
*/
|
||||||
class MimeHandlerMail : public RecollFilter {
|
class MimeHandlerMail : public RecollFilter {
|
||||||
public:
|
public:
|
||||||
MimeHandlerMail(RclConfig *cnf, const string &id);
|
MimeHandlerMail(RclConfig *cnf, const std::string &id);
|
||||||
virtual ~MimeHandlerMail();
|
virtual ~MimeHandlerMail();
|
||||||
virtual bool set_document_file(const string& mt, const string& file_path);
|
|
||||||
virtual bool set_document_string(const string& mt, const string& data);
|
|
||||||
virtual bool is_data_input_ok(DataInput input) const {
|
virtual bool is_data_input_ok(DataInput input) const {
|
||||||
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
||||||
return true;
|
return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
virtual bool next_document();
|
virtual bool next_document();
|
||||||
virtual bool skip_to_document(const string& ipath);
|
virtual bool skip_to_document(const std::string& ipath);
|
||||||
virtual void clear();
|
virtual void clear();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual bool set_document_file_impl(const std::string& mt,
|
||||||
|
const std::string& file_path);
|
||||||
|
virtual bool set_document_string_impl(const std::string& mt,
|
||||||
|
const std::string& data);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool processMsg(Binc::MimePart *doc, int depth);
|
bool processMsg(Binc::MimePart *doc, int depth);
|
||||||
void walkmime(Binc::MimePart* doc, int depth);
|
void walkmime(Binc::MimePart* doc, int depth);
|
||||||
@ -65,19 +67,19 @@ private:
|
|||||||
int m_idx;
|
int m_idx;
|
||||||
// Start of actual text (after the reprinted headers. This is for
|
// Start of actual text (after the reprinted headers. This is for
|
||||||
// generating a semi-meaningful "abstract")
|
// generating a semi-meaningful "abstract")
|
||||||
string::size_type m_startoftext;
|
std::string::size_type m_startoftext;
|
||||||
string m_subject;
|
std::string m_subject;
|
||||||
vector<MHMailAttach *> m_attachments;
|
std::vector<MHMailAttach *> m_attachments;
|
||||||
// Additional headers to be process as per config + field name translation
|
// Additional headers to be process as per config + field name translation
|
||||||
map<string,string> m_addProcdHdrs;
|
std::map<std::string, std::string> m_addProcdHdrs;
|
||||||
};
|
};
|
||||||
|
|
||||||
class MHMailAttach {
|
class MHMailAttach {
|
||||||
public:
|
public:
|
||||||
string m_contentType;
|
std::string m_contentType;
|
||||||
string m_filename;
|
std::string m_filename;
|
||||||
string m_charset;
|
std::string m_charset;
|
||||||
string m_contentTransferEncoding;
|
std::string m_contentTransferEncoding;
|
||||||
Binc::MimePart *m_part;
|
Binc::MimePart *m_part;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -100,43 +100,43 @@ public:
|
|||||||
~MboxCache() {}
|
~MboxCache() {}
|
||||||
mbhoff_type get_offset(RclConfig *config, const string& udi, int msgnum)
|
mbhoff_type get_offset(RclConfig *config, const string& udi, int msgnum)
|
||||||
{
|
{
|
||||||
LOGDEB0("MboxCache::get_offsets: udi [" << (udi) << "] msgnum " << (msgnum) << "\n" );
|
LOGDEB0("MboxCache::get_offsets: udi [" << (udi) << "] msgnum " << (msgnum) << "\n");
|
||||||
if (!ok(config)) {
|
if (!ok(config)) {
|
||||||
LOGDEB0("MboxCache::get_offsets: init failed\n" );
|
LOGDEB0("MboxCache::get_offsets: init failed\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
std::unique_lock<std::mutex> locker(o_mcache_mutex);
|
std::unique_lock<std::mutex> locker(o_mcache_mutex);
|
||||||
string fn = makefilename(udi);
|
string fn = makefilename(udi);
|
||||||
FILE *fp = 0;
|
FILE *fp = 0;
|
||||||
if ((fp = fopen(fn.c_str(), "r")) == 0) {
|
if ((fp = fopen(fn.c_str(), "r")) == 0) {
|
||||||
LOGDEB("MboxCache::get_offsets: open failed, errno " << (errno) << "\n" );
|
LOGDEB("MboxCache::get_offsets: open failed, errno " << (errno) << "\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
FpKeeper keeper(&fp);
|
FpKeeper keeper(&fp);
|
||||||
|
|
||||||
char blk1[M_o_b1size];
|
char blk1[M_o_b1size];
|
||||||
if (fread(blk1, 1, o_b1size, fp) != o_b1size) {
|
if (fread(blk1, 1, o_b1size, fp) != o_b1size) {
|
||||||
LOGDEB0("MboxCache::get_offsets: read blk1 errno " << (errno) << "\n" );
|
LOGDEB0("MboxCache::get_offsets: read blk1 errno " << (errno) << "\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
ConfSimple cf(string(blk1, o_b1size));
|
ConfSimple cf(string(blk1, o_b1size));
|
||||||
string fudi;
|
string fudi;
|
||||||
if (!cf.get("udi", fudi) || fudi.compare(udi)) {
|
if (!cf.get("udi", fudi) || fudi.compare(udi)) {
|
||||||
LOGINFO("MboxCache::get_offset:badudi fn " << (fn) << " udi [" << (udi) << "], fudi [" << (fudi) << "]\n" );
|
LOGINFO("MboxCache::get_offset:badudi fn " << (fn) << " udi [" << (udi) << "], fudi [" << (fudi) << "]\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (fseeko(fp, cacheoffset(msgnum), SEEK_SET) != 0) {
|
if (fseeko(fp, cacheoffset(msgnum), SEEK_SET) != 0) {
|
||||||
LOGDEB0("MboxCache::get_offsets: seek " << (lltodecstr(cacheoffset(msgnum))) << " errno " << (errno) << "\n" );
|
LOGDEB0("MboxCache::get_offsets: seek " << (lltodecstr(cacheoffset(msgnum))) << " errno " << (errno) << "\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
mbhoff_type offset = -1;
|
mbhoff_type offset = -1;
|
||||||
size_t ret;
|
size_t ret;
|
||||||
if ((ret = fread(&offset, 1, sizeof(mbhoff_type), fp))
|
if ((ret = fread(&offset, 1, sizeof(mbhoff_type), fp))
|
||||||
!= sizeof(mbhoff_type)) {
|
!= sizeof(mbhoff_type)) {
|
||||||
LOGDEB0("MboxCache::get_offsets: read ret " << (ret) << " errno " << (errno) << "\n" );
|
LOGDEB0("MboxCache::get_offsets: read ret " << (ret) << " errno " << (errno) << "\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
LOGDEB0("MboxCache::get_offsets: ret " << (lltodecstr(offset)) << "\n" );
|
LOGDEB0("MboxCache::get_offsets: ret " << (lltodecstr(offset)) << "\n");
|
||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -144,7 +144,7 @@ public:
|
|||||||
void put_offsets(RclConfig *config, const string& udi, mbhoff_type fsize,
|
void put_offsets(RclConfig *config, const string& udi, mbhoff_type fsize,
|
||||||
vector<mbhoff_type>& offs)
|
vector<mbhoff_type>& offs)
|
||||||
{
|
{
|
||||||
LOGDEB0("MboxCache::put_offsets: " << (offs.size()) << " offsets\n" );
|
LOGDEB0("MboxCache::put_offsets: " << (offs.size()) << " offsets\n");
|
||||||
if (!ok(config) || !maybemakedir())
|
if (!ok(config) || !maybemakedir())
|
||||||
return;
|
return;
|
||||||
if (fsize < m_minfsize)
|
if (fsize < m_minfsize)
|
||||||
@ -153,7 +153,7 @@ public:
|
|||||||
string fn = makefilename(udi);
|
string fn = makefilename(udi);
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
if ((fp = fopen(fn.c_str(), "w")) == 0) {
|
if ((fp = fopen(fn.c_str(), "w")) == 0) {
|
||||||
LOGDEB("MboxCache::put_offsets: fopen errno " << (errno) << "\n" );
|
LOGDEB("MboxCache::put_offsets: fopen errno " << (errno) << "\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
FpKeeper keeper(&fp);
|
FpKeeper keeper(&fp);
|
||||||
@ -163,7 +163,7 @@ public:
|
|||||||
blk1.append(cstr_newline);
|
blk1.append(cstr_newline);
|
||||||
blk1.resize(o_b1size, 0);
|
blk1.resize(o_b1size, 0);
|
||||||
if (fwrite(blk1.c_str(), 1, o_b1size, fp) != o_b1size) {
|
if (fwrite(blk1.c_str(), 1, o_b1size, fp) != o_b1size) {
|
||||||
LOGDEB("MboxCache::put_offsets: fwrite errno " << (errno) << "\n" );
|
LOGDEB("MboxCache::put_offsets: fwrite errno " << (errno) << "\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -255,10 +255,9 @@ void MimeHandlerMbox::clear()
|
|||||||
RecollFilter::clear();
|
RecollFilter::clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MimeHandlerMbox::set_document_file(const string& mt, const string &fn)
|
bool MimeHandlerMbox::set_document_file_impl(const string& mt, const string &fn)
|
||||||
{
|
{
|
||||||
LOGDEB("MimeHandlerMbox::set_document_file(" << (fn) << ")\n" );
|
LOGDEB("MimeHandlerMbox::set_document_file(" << fn << ")\n");
|
||||||
RecollFilter::set_document_file(mt, fn);
|
|
||||||
m_fn = fn;
|
m_fn = fn;
|
||||||
if (m_vfp) {
|
if (m_vfp) {
|
||||||
fclose((FILE *)m_vfp);
|
fclose((FILE *)m_vfp);
|
||||||
@ -267,7 +266,8 @@ bool MimeHandlerMbox::set_document_file(const string& mt, const string &fn)
|
|||||||
|
|
||||||
m_vfp = fopen(fn.c_str(), "r");
|
m_vfp = fopen(fn.c_str(), "r");
|
||||||
if (m_vfp == 0) {
|
if (m_vfp == 0) {
|
||||||
LOGERR("MimeHandlerMail::set_document_file: error opening " << (fn) << "\n" );
|
LOGERR("MimeHandlerMail::set_document_file: error opening " << fn <<
|
||||||
|
"\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
#if defined O_NOATIME && O_NOATIME != 0
|
#if defined O_NOATIME && O_NOATIME != 0
|
||||||
@ -278,7 +278,8 @@ bool MimeHandlerMbox::set_document_file(const string& mt, const string &fn)
|
|||||||
// Used to use ftell() here: no good beyond 2GB
|
// Used to use ftell() here: no good beyond 2GB
|
||||||
{struct stat st;
|
{struct stat st;
|
||||||
if (fstat(fileno((FILE*)m_vfp), &st) < 0) {
|
if (fstat(fileno((FILE*)m_vfp), &st) < 0) {
|
||||||
LOGERR("MimeHandlerMbox:setdocfile: fstat(" << (fn) << ") failed errno " << (errno) << "\n" );
|
LOGERR("MimeHandlerMbox:setdocfile: fstat(" << fn <<
|
||||||
|
") failed errno " << errno << "\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
m_fsize = st.st_size;
|
m_fsize = st.st_size;
|
||||||
@ -291,7 +292,7 @@ bool MimeHandlerMbox::set_document_file(const string& mt, const string &fn)
|
|||||||
string quirks;
|
string quirks;
|
||||||
if (m_config && m_config->getConfParam(cstr_keyquirks, quirks)) {
|
if (m_config && m_config->getConfParam(cstr_keyquirks, quirks)) {
|
||||||
if (quirks == "tbird") {
|
if (quirks == "tbird") {
|
||||||
LOGDEB("MimeHandlerMbox: setting quirks TBIRD\n" );
|
LOGDEB("MimeHandlerMbox: setting quirks TBIRD\n");
|
||||||
m_quirks |= MBOXQUIRK_TBIRD;
|
m_quirks |= MBOXQUIRK_TBIRD;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -299,7 +300,7 @@ bool MimeHandlerMbox::set_document_file(const string& mt, const string &fn)
|
|||||||
// And double check for thunderbird
|
// And double check for thunderbird
|
||||||
string tbirdmsf = fn + ".msf";
|
string tbirdmsf = fn + ".msf";
|
||||||
if ((m_quirks&MBOXQUIRK_TBIRD) == 0 && path_exists(tbirdmsf)) {
|
if ((m_quirks&MBOXQUIRK_TBIRD) == 0 && path_exists(tbirdmsf)) {
|
||||||
LOGDEB("MimeHandlerMbox: detected unconfigured tbird mbox in " << (fn) << "\n" );
|
LOGDEB("MimeHandlerMbox: detected unconfigured tbird mbox in " << (fn) << "\n");
|
||||||
m_quirks |= MBOXQUIRK_TBIRD;
|
m_quirks |= MBOXQUIRK_TBIRD;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -416,7 +417,7 @@ static void compileregexes()
|
|||||||
bool MimeHandlerMbox::next_document()
|
bool MimeHandlerMbox::next_document()
|
||||||
{
|
{
|
||||||
if (m_vfp == 0) {
|
if (m_vfp == 0) {
|
||||||
LOGERR("MimeHandlerMbox::next_document: not open\n" );
|
LOGERR("MimeHandlerMbox::next_document: not open\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!m_havedoc) {
|
if (!m_havedoc) {
|
||||||
@ -428,10 +429,10 @@ bool MimeHandlerMbox::next_document()
|
|||||||
sscanf(m_ipath.c_str(), "%d", &mtarg);
|
sscanf(m_ipath.c_str(), "%d", &mtarg);
|
||||||
} else if (m_forPreview) {
|
} else if (m_forPreview) {
|
||||||
// Can't preview an mbox.
|
// Can't preview an mbox.
|
||||||
LOGDEB("MimeHandlerMbox::next_document: can't preview folders!\n" );
|
LOGDEB("MimeHandlerMbox::next_document: can't preview folders!\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
LOGDEB0("MimeHandlerMbox::next_document: fn " << (m_fn) << ", msgnum " << (m_msgnum) << " mtarg " << (mtarg) << " \n" );
|
LOGDEB0("MimeHandlerMbox::next_document: fn " << (m_fn) << ", msgnum " << (m_msgnum) << " mtarg " << (mtarg) << " \n");
|
||||||
if (mtarg == 0)
|
if (mtarg == 0)
|
||||||
mtarg = -1;
|
mtarg = -1;
|
||||||
|
|
||||||
@ -451,7 +452,7 @@ bool MimeHandlerMbox::next_document()
|
|||||||
if (mtarg > 0) {
|
if (mtarg > 0) {
|
||||||
mbhoff_type off;
|
mbhoff_type off;
|
||||||
line_type line;
|
line_type line;
|
||||||
LOGDEB0("MimeHandlerMbox::next_doc: mtarg " << (mtarg) << " m_udi[" << (m_udi) << "]\n" );
|
LOGDEB0("MimeHandlerMbox::next_doc: mtarg " << (mtarg) << " m_udi[" << (m_udi) << "]\n");
|
||||||
if (!m_udi.empty() &&
|
if (!m_udi.empty() &&
|
||||||
(off = o_mcache.get_offset(m_config, m_udi, mtarg)) >= 0 &&
|
(off = o_mcache.get_offset(m_config, m_udi, mtarg)) >= 0 &&
|
||||||
fseeko(fp, (off_t)off, SEEK_SET) >= 0 &&
|
fseeko(fp, (off_t)off, SEEK_SET) >= 0 &&
|
||||||
@ -459,7 +460,7 @@ bool MimeHandlerMbox::next_document()
|
|||||||
(!M_regexec(fromregex, line, 0, 0, 0) ||
|
(!M_regexec(fromregex, line, 0, 0, 0) ||
|
||||||
((m_quirks & MBOXQUIRK_TBIRD) &&
|
((m_quirks & MBOXQUIRK_TBIRD) &&
|
||||||
!M_regexec(minifromregex, line, 0, 0, 0))) ) {
|
!M_regexec(minifromregex, line, 0, 0, 0))) ) {
|
||||||
LOGDEB0("MimeHandlerMbox: Cache: From_ Ok\n" );
|
LOGDEB0("MimeHandlerMbox: Cache: From_ Ok\n");
|
||||||
fseeko(fp, (off_t)off, SEEK_SET);
|
fseeko(fp, (off_t)off, SEEK_SET);
|
||||||
m_msgnum = mtarg -1;
|
m_msgnum = mtarg -1;
|
||||||
storeoffsets = false;
|
storeoffsets = false;
|
||||||
@ -478,7 +479,7 @@ bool MimeHandlerMbox::next_document()
|
|||||||
for (;;) {
|
for (;;) {
|
||||||
message_end = ftello(fp);
|
message_end = ftello(fp);
|
||||||
if (!fgets(line, LL, fp)) {
|
if (!fgets(line, LL, fp)) {
|
||||||
LOGDEB2("MimeHandlerMbox:next: eof\n" );
|
LOGDEB2("MimeHandlerMbox:next: eof\n");
|
||||||
iseof = true;
|
iseof = true;
|
||||||
m_msgnum++;
|
m_msgnum++;
|
||||||
break;
|
break;
|
||||||
@ -486,7 +487,7 @@ bool MimeHandlerMbox::next_document()
|
|||||||
m_lineno++;
|
m_lineno++;
|
||||||
int ll;
|
int ll;
|
||||||
stripendnl(line, ll);
|
stripendnl(line, ll);
|
||||||
LOGDEB2("mhmbox:next: hadempty " << (hademptyline) << " lineno " << (m_lineno) << " ll " << (ll) << " Line: [" << (line) << "]\n" );
|
LOGDEB2("mhmbox:next: hadempty " << (hademptyline) << " lineno " << (m_lineno) << " ll " << (ll) << " Line: [" << (line) << "]\n");
|
||||||
if (hademptyline) {
|
if (hademptyline) {
|
||||||
if (ll > 0) {
|
if (ll > 0) {
|
||||||
// Non-empty line with empty line flag set, reset flag
|
// Non-empty line with empty line flag set, reset flag
|
||||||
@ -504,7 +505,7 @@ bool MimeHandlerMbox::next_document()
|
|||||||
((m_quirks & MBOXQUIRK_TBIRD) &&
|
((m_quirks & MBOXQUIRK_TBIRD) &&
|
||||||
!M_regexec(minifromregex, line, 0, 0, 0)))
|
!M_regexec(minifromregex, line, 0, 0, 0)))
|
||||||
) {
|
) {
|
||||||
LOGDEB0("MimeHandlerMbox: msgnum " << (m_msgnum) << ", From_ at line " << (m_lineno) << ": [" << (line) << "]\n" );
|
LOGDEB0("MimeHandlerMbox: msgnum " << (m_msgnum) << ", From_ at line " << (m_lineno) << ": [" << (line) << "]\n");
|
||||||
if (storeoffsets)
|
if (storeoffsets)
|
||||||
m_offsets.push_back(message_end);
|
m_offsets.push_back(message_end);
|
||||||
m_msgnum++;
|
m_msgnum++;
|
||||||
@ -527,13 +528,13 @@ bool MimeHandlerMbox::next_document()
|
|||||||
line[ll+1] = 0;
|
line[ll+1] = 0;
|
||||||
msgtxt += line;
|
msgtxt += line;
|
||||||
if (msgtxt.size() > max_mbox_member_size) {
|
if (msgtxt.size() > max_mbox_member_size) {
|
||||||
LOGERR("mh_mbox: huge message (more than " << (max_mbox_member_size/(1024*1024)) << " MB) inside " << (m_fn) << ", giving up\n" );
|
LOGERR("mh_mbox: huge message (more than " << (max_mbox_member_size/(1024*1024)) << " MB) inside " << (m_fn) << ", giving up\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
LOGDEB2("Message text length " << (msgtxt.size()) << "\n" );
|
LOGDEB2("Message text length " << (msgtxt.size()) << "\n");
|
||||||
LOGDEB2("Message text: [" << (msgtxt) << "]\n" );
|
LOGDEB2("Message text: [" << (msgtxt) << "]\n");
|
||||||
char buf[20];
|
char buf[20];
|
||||||
// m_msgnum was incremented when hitting the next From_ or eof, so the data
|
// m_msgnum was incremented when hitting the next From_ or eof, so the data
|
||||||
// is for m_msgnum - 1
|
// is for m_msgnum - 1
|
||||||
@ -541,7 +542,7 @@ bool MimeHandlerMbox::next_document()
|
|||||||
m_metaData[cstr_dj_keyipath] = buf;
|
m_metaData[cstr_dj_keyipath] = buf;
|
||||||
m_metaData[cstr_dj_keymt] = "message/rfc822";
|
m_metaData[cstr_dj_keymt] = "message/rfc822";
|
||||||
if (iseof) {
|
if (iseof) {
|
||||||
LOGDEB2("MimeHandlerMbox::next: eof hit\n" );
|
LOGDEB2("MimeHandlerMbox::next: eof hit\n");
|
||||||
m_havedoc = false;
|
m_havedoc = false;
|
||||||
if (!m_udi.empty() && storeoffsets) {
|
if (!m_udi.empty() && storeoffsets) {
|
||||||
o_mcache.put_offsets(m_config, m_udi, m_fsize, m_offsets);
|
o_mcache.put_offsets(m_config, m_udi, m_fsize, m_offsets);
|
||||||
@ -658,7 +659,7 @@ int main(int argc, char **argv)
|
|||||||
} else {
|
} else {
|
||||||
size = it->second.length();
|
size = it->second.length();
|
||||||
}
|
}
|
||||||
cout << "Doc " << docnt << " size " << size << endl;
|
cout << "Doc " << docnt << " size " << size << endl;
|
||||||
}
|
}
|
||||||
cout << docnt << " documents found in " << filename << endl;
|
cout << docnt << " documents found in " << filename << endl;
|
||||||
exit(0);
|
exit(0);
|
||||||
|
|||||||
@ -19,8 +19,6 @@
|
|||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
using std::string;
|
|
||||||
using std::vector;
|
|
||||||
|
|
||||||
#include "mimehandler.h"
|
#include "mimehandler.h"
|
||||||
|
|
||||||
@ -30,28 +28,32 @@ using std::vector;
|
|||||||
* file.
|
* file.
|
||||||
*/
|
*/
|
||||||
class MimeHandlerMbox : public RecollFilter {
|
class MimeHandlerMbox : public RecollFilter {
|
||||||
public:
|
public:
|
||||||
MimeHandlerMbox(RclConfig *cnf, const string& id)
|
MimeHandlerMbox(RclConfig *cnf, const std::string& id)
|
||||||
: RecollFilter(cnf, id), m_vfp(0), m_msgnum(0),
|
: RecollFilter(cnf, id), m_vfp(0), m_msgnum(0),
|
||||||
m_lineno(0), m_fsize(0)
|
m_lineno(0), m_fsize(0) {
|
||||||
{}
|
}
|
||||||
virtual ~MimeHandlerMbox();
|
virtual ~MimeHandlerMbox();
|
||||||
virtual bool set_document_file(const string& mt, const string &file_path);
|
|
||||||
virtual bool next_document();
|
virtual bool next_document();
|
||||||
virtual bool skip_to_document(const string& ipath) {
|
virtual bool skip_to_document(const std::string& ipath) {
|
||||||
m_ipath = ipath;
|
m_ipath = ipath;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
virtual void clear();
|
virtual void clear();
|
||||||
typedef long long mbhoff_type;
|
typedef long long mbhoff_type;
|
||||||
private:
|
|
||||||
string m_fn; // File name
|
protected:
|
||||||
|
virtual bool set_document_file_impl(const std::string&,
|
||||||
|
const std::string&);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::string m_fn; // File name
|
||||||
void *m_vfp; // File pointer for folder
|
void *m_vfp; // File pointer for folder
|
||||||
int m_msgnum; // Current message number in folder. Starts at 1
|
int m_msgnum; // Current message number in folder. Starts at 1
|
||||||
string m_ipath;
|
std::string m_ipath;
|
||||||
int m_lineno; // debug
|
int m_lineno; // debug
|
||||||
mbhoff_type m_fsize;
|
mbhoff_type m_fsize;
|
||||||
vector<mbhoff_type> m_offsets;
|
std::vector<mbhoff_type> m_offsets;
|
||||||
enum Quirks {MBOXQUIRK_TBIRD=1};
|
enum Quirks {MBOXQUIRK_TBIRD=1};
|
||||||
int m_quirks;
|
int m_quirks;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -17,34 +17,28 @@
|
|||||||
#ifndef _MH_NULL_H_INCLUDED_
|
#ifndef _MH_NULL_H_INCLUDED_
|
||||||
#define _MH_NULL_H_INCLUDED_
|
#define _MH_NULL_H_INCLUDED_
|
||||||
|
|
||||||
// It may make sense in some cases to set this null filter (no output)
|
|
||||||
// instead of using recoll_noindex or leaving the default filter in
|
|
||||||
// case one doesn't want to install it: this will avoid endless retries
|
|
||||||
// to reindex the affected files, as recoll will think it has succeeded
|
|
||||||
// indexing them. Downside: the files won't be indexed when one
|
|
||||||
// actually installs the real filter, will need a -z
|
|
||||||
// Actually used for empty files
|
|
||||||
// Associated to application/x-zerosize, so use
|
|
||||||
// <mimetype> = internal application/x-zerosize
|
|
||||||
// in mimeconf
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "cstr.h"
|
#include "cstr.h"
|
||||||
#include "mimehandler.h"
|
#include "mimehandler.h"
|
||||||
|
|
||||||
|
/// Null input handler always returning empty data.
|
||||||
|
///
|
||||||
|
/// It may make sense in some cases to set this null filter (no output)
|
||||||
|
/// instead of using recoll_noindex or leaving the default filter in
|
||||||
|
/// case one doesn't want to install it: this will avoid endless retries
|
||||||
|
/// to reindex the affected files, as recoll will think it has succeeded
|
||||||
|
/// indexing them. Downside: the files won't be indexed when one
|
||||||
|
/// actually installs the real filter, will need a -z
|
||||||
|
/// Actually used for empty files.
|
||||||
|
/// Associated to application/x-zerosize, so use the following in mimeconf:
|
||||||
|
/// <mimetype> = internal application/x-zerosize
|
||||||
class MimeHandlerNull : public RecollFilter {
|
class MimeHandlerNull : public RecollFilter {
|
||||||
public:
|
public:
|
||||||
MimeHandlerNull(RclConfig *cnf, const std::string& id)
|
MimeHandlerNull(RclConfig *cnf, const std::string& id)
|
||||||
: RecollFilter(cnf, id)
|
: RecollFilter(cnf, id) {
|
||||||
{
|
|
||||||
}
|
|
||||||
virtual ~MimeHandlerNull()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
virtual bool set_document_file(const string& mt, const string& fn)
|
|
||||||
{
|
|
||||||
RecollFilter::set_document_file(mt, fn);
|
|
||||||
return m_havedoc = true;
|
|
||||||
}
|
}
|
||||||
|
virtual ~MimeHandlerNull() {}
|
||||||
|
|
||||||
virtual bool next_document()
|
virtual bool next_document()
|
||||||
{
|
{
|
||||||
if (m_havedoc == false)
|
if (m_havedoc == false)
|
||||||
|
|||||||
@ -36,18 +36,10 @@
|
|||||||
class MimeHandlerSymlink : public RecollFilter {
|
class MimeHandlerSymlink : public RecollFilter {
|
||||||
public:
|
public:
|
||||||
MimeHandlerSymlink(RclConfig *cnf, const std::string& id)
|
MimeHandlerSymlink(RclConfig *cnf, const std::string& id)
|
||||||
: RecollFilter(cnf, id)
|
: RecollFilter(cnf, id) {
|
||||||
{
|
|
||||||
}
|
|
||||||
virtual ~MimeHandlerSymlink()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
virtual bool set_document_file(const string& mt, const string& fn)
|
|
||||||
{
|
|
||||||
RecollFilter::set_document_file(mt, fn);
|
|
||||||
m_fn = fn;
|
|
||||||
return m_havedoc = true;
|
|
||||||
}
|
}
|
||||||
|
virtual ~MimeHandlerSymlink() {}
|
||||||
|
|
||||||
virtual bool next_document()
|
virtual bool next_document()
|
||||||
{
|
{
|
||||||
if (m_havedoc == false)
|
if (m_havedoc == false)
|
||||||
@ -61,11 +53,18 @@ class MimeHandlerSymlink : public RecollFilter {
|
|||||||
transcode(path_getsimple(slc), m_metaData[cstr_dj_keycontent],
|
transcode(path_getsimple(slc), m_metaData[cstr_dj_keycontent],
|
||||||
m_config->getDefCharset(true), "UTF-8");
|
m_config->getDefCharset(true), "UTF-8");
|
||||||
} else {
|
} else {
|
||||||
LOGDEB("Symlink: readlink [" << (m_fn) << "] failed, errno " << (errno) << "\n" );
|
LOGDEB("Symlink: readlink [" << m_fn << "] failed, errno " <<
|
||||||
|
errno << "\n");
|
||||||
}
|
}
|
||||||
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
protected:
|
||||||
|
virtual bool set_document_file_impl(const string& mt, const string& fn) {
|
||||||
|
m_fn = fn;
|
||||||
|
return m_havedoc = true;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::string m_fn;
|
std::string m_fn;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -40,13 +40,11 @@ const int MB = 1024*1024;
|
|||||||
const int KB = 1024;
|
const int KB = 1024;
|
||||||
|
|
||||||
// Process a plain text file
|
// Process a plain text file
|
||||||
bool MimeHandlerText::set_document_file(const string& mt, const string &fn)
|
bool MimeHandlerText::set_document_file_impl(const string& mt, const string &fn)
|
||||||
{
|
{
|
||||||
LOGDEB("MimeHandlerText::set_document_file: [" << fn << "] offs " <<
|
LOGDEB("MimeHandlerText::set_document_file: [" << fn << "] offs " <<
|
||||||
m_offs << "\n");
|
m_offs << "\n");
|
||||||
|
|
||||||
RecollFilter::set_document_file(mt, fn);
|
|
||||||
|
|
||||||
m_fn = fn;
|
m_fn = fn;
|
||||||
// This should not be necessary, but it happens on msw that offset is large
|
// This should not be necessary, but it happens on msw that offset is large
|
||||||
// negative at this point, could not find the reason (still trying).
|
// negative at this point, could not find the reason (still trying).
|
||||||
@ -93,9 +91,9 @@ bool MimeHandlerText::set_document_file(const string& mt, const string &fn)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MimeHandlerText::set_document_string(const string& mt, const string& otext)
|
bool MimeHandlerText::set_document_string_impl(const string& mt,
|
||||||
|
const string& otext)
|
||||||
{
|
{
|
||||||
RecollFilter::set_document_string(mt, otext);
|
|
||||||
m_text = otext;
|
m_text = otext;
|
||||||
if (!m_forPreview) {
|
if (!m_forPreview) {
|
||||||
string md5, xmd5;
|
string md5, xmd5;
|
||||||
@ -175,7 +173,7 @@ bool MimeHandlerText::readnext()
|
|||||||
string reason;
|
string reason;
|
||||||
m_text.clear();
|
m_text.clear();
|
||||||
if (!file_to_string(m_fn, m_text, m_offs, m_pagesz, &reason)) {
|
if (!file_to_string(m_fn, m_text, m_offs, m_pagesz, &reason)) {
|
||||||
LOGERR("MimeHandlerText: can't read file: " << (reason) << "\n" );
|
LOGERR("MimeHandlerText: can't read file: " << reason << "\n" );
|
||||||
m_havedoc = false;
|
m_havedoc = false;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -30,14 +30,10 @@
|
|||||||
class MimeHandlerText : public RecollFilter {
|
class MimeHandlerText : public RecollFilter {
|
||||||
public:
|
public:
|
||||||
MimeHandlerText(RclConfig *cnf, const std::string& id)
|
MimeHandlerText(RclConfig *cnf, const std::string& id)
|
||||||
: RecollFilter(cnf, id), m_paging(false), m_offs(0), m_pagesz(0)
|
: RecollFilter(cnf, id), m_paging(false), m_offs(0), m_pagesz(0) {
|
||||||
{
|
|
||||||
}
|
}
|
||||||
virtual ~MimeHandlerText()
|
virtual ~MimeHandlerText() {}
|
||||||
{
|
|
||||||
}
|
|
||||||
virtual bool set_document_file(const std::string& mt, const std::string &file_path);
|
|
||||||
virtual bool set_document_string(const std::string&, const std::string&);
|
|
||||||
virtual bool is_data_input_ok(DataInput input) const {
|
virtual bool is_data_input_ok(DataInput input) const {
|
||||||
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
||||||
return true;
|
return true;
|
||||||
@ -45,14 +41,20 @@ class MimeHandlerText : public RecollFilter {
|
|||||||
}
|
}
|
||||||
virtual bool next_document();
|
virtual bool next_document();
|
||||||
virtual bool skip_to_document(const std::string& s);
|
virtual bool skip_to_document(const std::string& s);
|
||||||
virtual void clear()
|
virtual void clear() {
|
||||||
{
|
|
||||||
m_paging = false;
|
m_paging = false;
|
||||||
m_text.erase();
|
m_text.erase();
|
||||||
m_fn.erase();
|
m_fn.erase();
|
||||||
m_offs = 0;
|
m_offs = 0;
|
||||||
RecollFilter::clear();
|
RecollFilter::clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual bool set_document_file_impl(const std::string& mt,
|
||||||
|
const std::string &file_path);
|
||||||
|
virtual bool set_document_string_impl(const std::string&,
|
||||||
|
const std::string&);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool m_paging;
|
bool m_paging;
|
||||||
std::string m_text;
|
std::string m_text;
|
||||||
|
|||||||
@ -29,21 +29,9 @@
|
|||||||
class MimeHandlerUnknown : public RecollFilter {
|
class MimeHandlerUnknown : public RecollFilter {
|
||||||
public:
|
public:
|
||||||
MimeHandlerUnknown(RclConfig *cnf, const string& id)
|
MimeHandlerUnknown(RclConfig *cnf, const string& id)
|
||||||
: RecollFilter(cnf, id)
|
: RecollFilter(cnf, id) {
|
||||||
{
|
|
||||||
}
|
|
||||||
virtual ~MimeHandlerUnknown()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
virtual bool set_document_file(const string& mt, const string& fn)
|
|
||||||
{
|
|
||||||
RecollFilter::set_document_file(mt, fn);
|
|
||||||
return m_havedoc = true;
|
|
||||||
}
|
|
||||||
virtual bool set_document_string(const string& mt, const string& s) {
|
|
||||||
RecollFilter::set_document_string(mt, s);
|
|
||||||
return m_havedoc = true;
|
|
||||||
}
|
}
|
||||||
|
virtual ~MimeHandlerUnknown() {}
|
||||||
virtual bool next_document() {
|
virtual bool next_document() {
|
||||||
if (m_havedoc == false)
|
if (m_havedoc == false)
|
||||||
return false;
|
return false;
|
||||||
|
|||||||
@ -60,7 +60,8 @@ static RecollFilter *getMimeHandlerFromCache(const string& key)
|
|||||||
std::unique_lock<std::mutex> locker(o_handlers_mutex);
|
std::unique_lock<std::mutex> locker(o_handlers_mutex);
|
||||||
string xdigest;
|
string xdigest;
|
||||||
MD5HexPrint(key, xdigest);
|
MD5HexPrint(key, xdigest);
|
||||||
LOGDEB("getMimeHandlerFromCache: " << (xdigest) << " cache size " << (o_handlers.size()) << "\n" );
|
LOGDEB("getMimeHandlerFromCache: " << xdigest << " cache size " <<
|
||||||
|
o_handlers.size() << "\n");
|
||||||
|
|
||||||
multimap<string, RecollFilter *>::iterator it = o_handlers.find(key);
|
multimap<string, RecollFilter *>::iterator it = o_handlers.find(key);
|
||||||
if (it != o_handlers.end()) {
|
if (it != o_handlers.end()) {
|
||||||
@ -69,13 +70,14 @@ static RecollFilter *getMimeHandlerFromCache(const string& key)
|
|||||||
if (it1 != o_hlru.end()) {
|
if (it1 != o_hlru.end()) {
|
||||||
o_hlru.erase(it1);
|
o_hlru.erase(it1);
|
||||||
} else {
|
} else {
|
||||||
LOGERR("getMimeHandlerFromCache: lru position not found\n" );
|
LOGERR("getMimeHandlerFromCache: lru position not found\n");
|
||||||
}
|
}
|
||||||
o_handlers.erase(it);
|
o_handlers.erase(it);
|
||||||
LOGDEB("getMimeHandlerFromCache: " << (xdigest) << " found size " << (o_handlers.size()) << "\n" );
|
LOGDEB("getMimeHandlerFromCache: " << xdigest << " found size " <<
|
||||||
|
o_handlers.size() << "\n");
|
||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
LOGDEB("getMimeHandlerFromCache: " << (xdigest) << " not found\n" );
|
LOGDEB("getMimeHandlerFromCache: " << xdigest << " not found\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -85,14 +87,16 @@ void returnMimeHandler(RecollFilter *handler)
|
|||||||
typedef multimap<string, RecollFilter*>::value_type value_type;
|
typedef multimap<string, RecollFilter*>::value_type value_type;
|
||||||
|
|
||||||
if (handler == 0) {
|
if (handler == 0) {
|
||||||
LOGERR("returnMimeHandler: bad parameter\n" );
|
LOGERR("returnMimeHandler: bad parameter\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
handler->clear();
|
handler->clear();
|
||||||
|
|
||||||
std::unique_lock<std::mutex> locker(o_handlers_mutex);
|
std::unique_lock<std::mutex> locker(o_handlers_mutex);
|
||||||
|
|
||||||
LOGDEB("returnMimeHandler: returning filter for " << (handler->get_mime_type()) << " cache size " << (o_handlers.size()) << "\n" );
|
LOGDEB("returnMimeHandler: returning filter for " <<
|
||||||
|
handler->get_mime_type() << " cache size " << o_handlers.size() <<
|
||||||
|
"\n");
|
||||||
|
|
||||||
// Limit pool size. The pool can grow quite big because there are
|
// Limit pool size. The pool can grow quite big because there are
|
||||||
// many filter types, each of which can be used in several copies
|
// many filter types, each of which can be used in several copies
|
||||||
@ -105,9 +109,9 @@ void returnMimeHandler(RecollFilter *handler)
|
|||||||
if (once) {
|
if (once) {
|
||||||
once = 0;
|
once = 0;
|
||||||
for (it = o_handlers.begin(); it != o_handlers.end(); it++) {
|
for (it = o_handlers.begin(); it != o_handlers.end(); it++) {
|
||||||
LOGDEB1("Cache full. key: " << (it->first) << "\n" );
|
LOGDEB1("Cache full. key: " << it->first << "\n");
|
||||||
}
|
}
|
||||||
LOGDEB1("Cache LRU size: " << (o_hlru.size()) << "\n" );
|
LOGDEB1("Cache LRU size: " << o_hlru.size() << "\n");
|
||||||
}
|
}
|
||||||
if (o_hlru.size() > 0) {
|
if (o_hlru.size() > 0) {
|
||||||
it = o_hlru.back();
|
it = o_hlru.back();
|
||||||
@ -122,7 +126,7 @@ void returnMimeHandler(RecollFilter *handler)
|
|||||||
|
|
||||||
void clearMimeHandlerCache()
|
void clearMimeHandlerCache()
|
||||||
{
|
{
|
||||||
LOGDEB("clearMimeHandlerCache()\n" );
|
LOGDEB("clearMimeHandlerCache()\n");
|
||||||
multimap<string, RecollFilter *>::iterator it;
|
multimap<string, RecollFilter *>::iterator it;
|
||||||
std::unique_lock<std::mutex> locker(o_handlers_mutex);
|
std::unique_lock<std::mutex> locker(o_handlers_mutex);
|
||||||
for (it = o_handlers.begin(); it != o_handlers.end(); it++) {
|
for (it = o_handlers.begin(); it != o_handlers.end(); it++) {
|
||||||
@ -136,31 +140,31 @@ void clearMimeHandlerCache()
|
|||||||
static RecollFilter *mhFactory(RclConfig *config, const string &mime,
|
static RecollFilter *mhFactory(RclConfig *config, const string &mime,
|
||||||
bool nobuild, string& id)
|
bool nobuild, string& id)
|
||||||
{
|
{
|
||||||
LOGDEB2("mhFactory(" << (mime) << ")\n" );
|
LOGDEB2("mhFactory(" << mime << ")\n");
|
||||||
string lmime(mime);
|
string lmime(mime);
|
||||||
stringtolower(lmime);
|
stringtolower(lmime);
|
||||||
if (cstr_textplain == lmime) {
|
if (cstr_textplain == lmime) {
|
||||||
LOGDEB2("mhFactory(" << (mime) << "): returning MimeHandlerText\n" );
|
LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerText\n");
|
||||||
MD5String("MimeHandlerText", id);
|
MD5String("MimeHandlerText", id);
|
||||||
return nobuild ? 0 : new MimeHandlerText(config, id);
|
return nobuild ? 0 : new MimeHandlerText(config, id);
|
||||||
} else if ("text/html" == lmime) {
|
} else if ("text/html" == lmime) {
|
||||||
LOGDEB2("mhFactory(" << (mime) << "): returning MimeHandlerHtml\n" );
|
LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerHtml\n");
|
||||||
MD5String("MimeHandlerHtml", id);
|
MD5String("MimeHandlerHtml", id);
|
||||||
return nobuild ? 0 : new MimeHandlerHtml(config, id);
|
return nobuild ? 0 : new MimeHandlerHtml(config, id);
|
||||||
} else if ("text/x-mail" == lmime) {
|
} else if ("text/x-mail" == lmime) {
|
||||||
LOGDEB2("mhFactory(" << (mime) << "): returning MimeHandlerMbox\n" );
|
LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerMbox\n");
|
||||||
MD5String("MimeHandlerMbox", id);
|
MD5String("MimeHandlerMbox", id);
|
||||||
return nobuild ? 0 : new MimeHandlerMbox(config, id);
|
return nobuild ? 0 : new MimeHandlerMbox(config, id);
|
||||||
} else if ("message/rfc822" == lmime) {
|
} else if ("message/rfc822" == lmime) {
|
||||||
LOGDEB2("mhFactory(" << (mime) << "): returning MimeHandlerMail\n" );
|
LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerMail\n");
|
||||||
MD5String("MimeHandlerMail", id);
|
MD5String("MimeHandlerMail", id);
|
||||||
return nobuild ? 0 : new MimeHandlerMail(config, id);
|
return nobuild ? 0 : new MimeHandlerMail(config, id);
|
||||||
} else if ("inode/symlink" == lmime) {
|
} else if ("inode/symlink" == lmime) {
|
||||||
LOGDEB2("mhFactory(" << (mime) << "): ret MimeHandlerSymlink\n" );
|
LOGDEB2("mhFactory(" << mime << "): ret MimeHandlerSymlink\n");
|
||||||
MD5String("MimeHandlerSymlink", id);
|
MD5String("MimeHandlerSymlink", id);
|
||||||
return nobuild ? 0 : new MimeHandlerSymlink(config, id);
|
return nobuild ? 0 : new MimeHandlerSymlink(config, id);
|
||||||
} else if ("application/x-zerosize" == lmime) {
|
} else if ("application/x-zerosize" == lmime) {
|
||||||
LOGDEB("mhFactory(" << (mime) << "): ret MimeHandlerNull\n" );
|
LOGDEB("mhFactory(" << mime << "): ret MimeHandlerNull\n");
|
||||||
MD5String("MimeHandlerNull", id);
|
MD5String("MimeHandlerNull", id);
|
||||||
return nobuild ? 0 : new MimeHandlerNull(config, id);
|
return nobuild ? 0 : new MimeHandlerNull(config, id);
|
||||||
} else if (lmime.find("text/") == 0) {
|
} else if (lmime.find("text/") == 0) {
|
||||||
@ -169,14 +173,15 @@ static RecollFilter *mhFactory(RclConfig *config, const string &mime,
|
|||||||
// mimeconf, not at random. For programs, for example this
|
// mimeconf, not at random. For programs, for example this
|
||||||
// allows indexing and previewing as text/plain (no filter
|
// allows indexing and previewing as text/plain (no filter
|
||||||
// exec) but still opening with a specific editor.
|
// exec) but still opening with a specific editor.
|
||||||
LOGDEB2("mhFactory(" << (mime) << "): returning MimeHandlerText(x)\n" );
|
LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerText(x)\n");
|
||||||
MD5String("MimeHandlerText", id);
|
MD5String("MimeHandlerText", id);
|
||||||
return nobuild ? 0 : new MimeHandlerText(config, id);
|
return nobuild ? 0 : new MimeHandlerText(config, id);
|
||||||
} else {
|
} else {
|
||||||
// We should not get there. It means that "internal" was set
|
// We should not get there. It means that "internal" was set
|
||||||
// as a handler in mimeconf for a mime type we actually can't
|
// as a handler in mimeconf for a mime type we actually can't
|
||||||
// handle.
|
// handle.
|
||||||
LOGERR("mhFactory: mime type [" << (lmime) << "] set as internal but unknown\n" );
|
LOGERR("mhFactory: mime type [" << lmime <<
|
||||||
|
"] set as internal but unknown\n");
|
||||||
MD5String("MimeHandlerUnknown", id);
|
MD5String("MimeHandlerUnknown", id);
|
||||||
return nobuild ? 0 : new MimeHandlerUnknown(config, id);
|
return nobuild ? 0 : new MimeHandlerUnknown(config, id);
|
||||||
}
|
}
|
||||||
@ -199,7 +204,8 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
|||||||
string cmdstr;
|
string cmdstr;
|
||||||
|
|
||||||
if (!cfg->valueSplitAttributes(hs, cmdstr, attrs)) {
|
if (!cfg->valueSplitAttributes(hs, cmdstr, attrs)) {
|
||||||
LOGERR("mhExecFactory: bad config line for [" << (mtype) << "]: [" << (hs) << "]\n" );
|
LOGERR("mhExecFactory: bad config line for [" <<
|
||||||
|
mtype << "]: [" << hs << "]\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -207,7 +213,8 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
|||||||
vector<string> cmdtoks;
|
vector<string> cmdtoks;
|
||||||
stringToStrings(cmdstr, cmdtoks);
|
stringToStrings(cmdstr, cmdtoks);
|
||||||
if (cmdtoks.empty()) {
|
if (cmdtoks.empty()) {
|
||||||
LOGERR("mhExecFactory: bad config line for [" << (mtype) << "]: [" << (hs) << "]\n" );
|
LOGERR("mhExecFactory: bad config line for [" << mtype <<
|
||||||
|
"]: [" << hs << "]\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
MimeHandlerExec *h = multiple ?
|
MimeHandlerExec *h = multiple ?
|
||||||
@ -221,7 +228,8 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
|||||||
// the same change if we ever want to use the same cmdling as windows
|
// the same change if we ever want to use the same cmdling as windows
|
||||||
if (!stringlowercmp("python", *it) || !stringlowercmp("perl", *it)) {
|
if (!stringlowercmp("python", *it) || !stringlowercmp("perl", *it)) {
|
||||||
if (cmdtoks.size() < 2) {
|
if (cmdtoks.size() < 2) {
|
||||||
LOGERR("mhExecFactory: python/perl cmd: no script?. [" << (mtype) << "]: [" << (hs) << "]\n" );
|
LOGERR("mhExecFactory: python/perl cmd: no script?. [" <<
|
||||||
|
mtype << "]: [" << hs << "]\n");
|
||||||
}
|
}
|
||||||
vector<string>::iterator it1(it);
|
vector<string>::iterator it1(it);
|
||||||
it1++;
|
it1++;
|
||||||
@ -244,7 +252,9 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
|||||||
for (it = h->params.begin(); it != h->params.end(); it++) {
|
for (it = h->params.begin(); it != h->params.end(); it++) {
|
||||||
scmd += string("[") + *it + "] ";
|
scmd += string("[") + *it + "] ";
|
||||||
}
|
}
|
||||||
LOGDEB("mhExecFactory:mt [" << (mtype) << "] cfgmt [" << (h->cfgFilterOutputMtype) << "] cfgcs [" << (h->cfgFilterOutputCharset) << "] cmd: [" << (scmd) << "]\n" );
|
LOGDEB("mhExecFactory:mt [" << mtype << "] cfgmt [" <<
|
||||||
|
h->cfgFilterOutputMtype << "] cfgcs [" <<
|
||||||
|
h->cfgFilterOutputCharset << "] cmd: [" << scmd << "]\n");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return h;
|
return h;
|
||||||
@ -254,7 +264,8 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
|||||||
RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
||||||
bool filtertypes)
|
bool filtertypes)
|
||||||
{
|
{
|
||||||
LOGDEB("getMimeHandler: mtype [" << (mtype) << "] filtertypes " << (filtertypes) << "\n" );
|
LOGDEB("getMimeHandler: mtype [" << mtype << "] filtertypes " <<
|
||||||
|
filtertypes << "\n");
|
||||||
RecollFilter *h = 0;
|
RecollFilter *h = 0;
|
||||||
|
|
||||||
// Get handler definition for mime type. We do this even if an
|
// Get handler definition for mime type. We do this even if an
|
||||||
@ -292,7 +303,7 @@ RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
|||||||
if (h != 0)
|
if (h != 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
LOGDEB2("getMimeHandler: " << (mtype) << " not in cache\n" );
|
LOGDEB2("getMimeHandler: " << mtype << " not in cache\n");
|
||||||
|
|
||||||
// Not in cache.
|
// Not in cache.
|
||||||
if (internal) {
|
if (internal) {
|
||||||
@ -303,13 +314,14 @@ RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
|||||||
// partly redundant with the localfields/rclaptg, but
|
// partly redundant with the localfields/rclaptg, but
|
||||||
// better and the latter will probably go away at some
|
// better and the latter will probably go away at some
|
||||||
// point in the future.
|
// point in the future.
|
||||||
LOGDEB2("handlertype internal, cmdstr [" << (cmdstr) << "]\n" );
|
LOGDEB2("handlertype internal, cmdstr [" << cmdstr << "]\n");
|
||||||
h = mhFactory(cfg, cmdstr.empty() ? mtype : cmdstr, false, id);
|
h = mhFactory(cfg, cmdstr.empty() ? mtype : cmdstr, false, id);
|
||||||
goto out;
|
goto out;
|
||||||
} else if (!stringlowercmp("dll", handlertype)) {
|
} else if (!stringlowercmp("dll", handlertype)) {
|
||||||
} else {
|
} else {
|
||||||
if (cmdstr.empty()) {
|
if (cmdstr.empty()) {
|
||||||
LOGERR("getMimeHandler: bad line for " << (mtype) << ": " << (hs) << "\n" );
|
LOGERR("getMimeHandler: bad line for " << mtype << ": " <<
|
||||||
|
hs << "\n");
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
if (!stringlowercmp("exec", handlertype)) {
|
if (!stringlowercmp("exec", handlertype)) {
|
||||||
@ -319,7 +331,8 @@ RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
|||||||
h = mhExecFactory(cfg, mtype, cmdstr, true, id);
|
h = mhExecFactory(cfg, mtype, cmdstr, true, id);
|
||||||
goto out;
|
goto out;
|
||||||
} else {
|
} else {
|
||||||
LOGERR("getMimeHandler: bad line for " << (mtype) << ": " << (hs) << "\n" );
|
LOGERR("getMimeHandler: bad line for " << mtype << ": " <<
|
||||||
|
hs << "\n");
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -31,13 +31,14 @@ class RclConfig;
|
|||||||
class RecollFilter : public Dijon::Filter {
|
class RecollFilter : public Dijon::Filter {
|
||||||
public:
|
public:
|
||||||
RecollFilter(RclConfig *config, const std::string& id)
|
RecollFilter(RclConfig *config, const std::string& id)
|
||||||
: m_config(config), m_forPreview(false), m_havedoc(false), m_id(id)
|
: m_config(config), m_forPreview(false), m_havedoc(false), m_id(id) {
|
||||||
{}
|
}
|
||||||
virtual ~RecollFilter() {}
|
virtual ~RecollFilter() {}
|
||||||
virtual void setConfig(RclConfig *config)
|
|
||||||
{
|
virtual void setConfig(RclConfig *config) {
|
||||||
m_config = config;
|
m_config = config;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool set_property(Properties p, const std::string &v) {
|
virtual bool set_property(Properties p, const std::string &v) {
|
||||||
switch (p) {
|
switch (p) {
|
||||||
case DJF_UDI:
|
case DJF_UDI:
|
||||||
@ -58,34 +59,23 @@ public:
|
|||||||
|
|
||||||
// We don't use this for now
|
// We don't use this for now
|
||||||
virtual bool set_document_uri(const std::string& mtype,
|
virtual bool set_document_uri(const std::string& mtype,
|
||||||
const std::string &)
|
const std::string &) {
|
||||||
{
|
|
||||||
m_mimeType = mtype;
|
m_mimeType = mtype;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This does nothing right now but should be called from the
|
|
||||||
// subclass method in case we need some common processing one day
|
|
||||||
// (was used for xattrs at some point). Yes this is the "call
|
|
||||||
// super" anti-pattern, bad, but we have several layers of derived
|
|
||||||
// classes, so that implementing the template method approach (by
|
|
||||||
// having a pure virtual called from here and implemented in the
|
|
||||||
// subclass) would have to be repeated in each derived class. It's
|
|
||||||
// just simpler this way.
|
|
||||||
virtual bool set_document_file(const std::string& mtype,
|
virtual bool set_document_file(const std::string& mtype,
|
||||||
const std::string & /*file_path*/)
|
const std::string &file_path) {
|
||||||
{
|
|
||||||
m_mimeType = mtype;
|
m_mimeType = mtype;
|
||||||
return true;
|
return set_document_file_impl(mtype, file_path);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Default implementations
|
|
||||||
virtual bool set_document_string(const std::string& mtype,
|
virtual bool set_document_string(const std::string& mtype,
|
||||||
const std::string &)
|
const std::string &contents) {
|
||||||
{
|
|
||||||
m_mimeType = mtype;
|
m_mimeType = mtype;
|
||||||
return false;
|
return set_document_string_impl(mtype, contents);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool set_document_data(const std::string& mtype,
|
virtual bool set_document_data(const std::string& mtype,
|
||||||
const char *cp, size_t sz)
|
const char *cp, size_t sz)
|
||||||
{
|
{
|
||||||
@ -95,11 +85,14 @@ public:
|
|||||||
virtual void set_docsize(off_t size) {
|
virtual void set_docsize(off_t size) {
|
||||||
m_docsize = size;
|
m_docsize = size;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual off_t get_docsize() const {
|
virtual off_t get_docsize() const {
|
||||||
return m_docsize;
|
return m_docsize;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool has_documents() const {return m_havedoc;}
|
virtual bool has_documents() const {
|
||||||
|
return m_havedoc;
|
||||||
|
}
|
||||||
|
|
||||||
// Most doc types are single-doc
|
// Most doc types are single-doc
|
||||||
virtual bool skip_to_document(const std::string& s) {
|
virtual bool skip_to_document(const std::string& s) {
|
||||||
@ -118,8 +111,7 @@ public:
|
|||||||
return m_reason;
|
return m_reason;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual const std::string& get_id() const
|
virtual const std::string& get_id() const {
|
||||||
{
|
|
||||||
return m_id;
|
return m_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -137,7 +129,21 @@ public:
|
|||||||
bool txtdcode(const std::string& who);
|
bool txtdcode(const std::string& who);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
bool preview() {return m_forPreview;}
|
|
||||||
|
// We provide default implementation as not all handlers need both methods
|
||||||
|
virtual bool set_document_file_impl(const std::string&,
|
||||||
|
const std::string&) {
|
||||||
|
return m_havedoc = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool set_document_string_impl(const std::string&,
|
||||||
|
const std::string&) {
|
||||||
|
return m_havedoc = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool preview() {
|
||||||
|
return m_forPreview;
|
||||||
|
}
|
||||||
|
|
||||||
RclConfig *m_config;
|
RclConfig *m_config;
|
||||||
bool m_forPreview;
|
bool m_forPreview;
|
||||||
|
|||||||
@ -122,6 +122,16 @@ skippedPaths = /media
|
|||||||
# redefined for subtrees.</descr></var>
|
# redefined for subtrees.</descr></var>
|
||||||
#excludedmimetypes =
|
#excludedmimetypes =
|
||||||
|
|
||||||
|
# <var name="nomd5mimetypes" type="string"><brief>Don't compute md5 for
|
||||||
|
# these types.</brief><descr>md5 checksums are used only for deduplicating
|
||||||
|
# results, and can be very expensive to compute on multimedia or other big
|
||||||
|
# files. This list lets you turn off md5 computation for selected types. It
|
||||||
|
# is global (no redefinition for subtrees). At the moment, it only has an
|
||||||
|
# effect for external handlers (exec and execm). The file types can be
|
||||||
|
# specified by listing either MIME types (e.g. audio/mpeg) or handler names
|
||||||
|
# (e.g. rclaudio).</descr></var>
|
||||||
|
nomd5types = rclaudio
|
||||||
|
|
||||||
# <var name="compressedfilemaxkbs" type="int"><brief>Size limit for compressed
|
# <var name="compressedfilemaxkbs" type="int"><brief>Size limit for compressed
|
||||||
# files.</brief><descr>We need to decompress these in a
|
# files.</brief><descr>We need to decompress these in a
|
||||||
# temporary directory for identification, which can be wasteful in some
|
# temporary directory for identification, which can be wasteful in some
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user