clarified the use of string keys inside the Filter metaData array
This commit is contained in:
parent
1e28525e5a
commit
638d468796
@ -25,6 +25,8 @@
|
||||
#include "pathut.h"
|
||||
#include "rcldoc.h"
|
||||
|
||||
const string cstr_bgc_mimetype("mimetype");
|
||||
|
||||
BeagleQueueCache::BeagleQueueCache(RclConfig *cnf)
|
||||
{
|
||||
string ccdir;
|
||||
@ -64,7 +66,7 @@ bool BeagleQueueCache::getFromCache(const string& udi, Rcl::Doc &dotdoc,
|
||||
|
||||
// Build a doc from saved metadata
|
||||
cf.get(cstr_url, dotdoc.url, cstr_null);
|
||||
cf.get(cstr_mimetype, dotdoc.mimetype, cstr_null);
|
||||
cf.get(cstr_bgc_mimetype, dotdoc.mimetype, cstr_null);
|
||||
cf.get(cstr_fmtime, dotdoc.fmtime, cstr_null);
|
||||
cf.get(cstr_fbytes, dotdoc.fbytes, cstr_null);
|
||||
dotdoc.sig.clear();
|
||||
|
||||
@ -45,5 +45,6 @@ public:
|
||||
private:
|
||||
CirCache *m_cache;
|
||||
};
|
||||
extern const string cstr_bgc_mimetype;
|
||||
|
||||
#endif /* _beaglequeuecache_h_included_ */
|
||||
|
||||
@ -40,25 +40,39 @@ using std::string;
|
||||
#define DEF_CSTR(NM, STR) extern const string cstr_##NM
|
||||
#endif
|
||||
|
||||
DEF_CSTR(author, "author");
|
||||
DEF_CSTR(caption, "caption");
|
||||
DEF_CSTR(charset, "charset");
|
||||
DEF_CSTR(content, "content");
|
||||
DEF_CSTR(dmtime, "dmtime");
|
||||
DEF_CSTR(dquote, "\"");
|
||||
DEF_CSTR(fbytes, "fbytes");
|
||||
DEF_CSTR(fileu, "file://");
|
||||
DEF_CSTR(fmtime, "fmtime");
|
||||
DEF_CSTR(ipath, "ipath");
|
||||
DEF_CSTR(iso_8859_1, "ISO-8859-1");
|
||||
DEF_CSTR(md5, "md5");
|
||||
DEF_CSTR(mimetype, "mimetype");
|
||||
DEF_CSTR(minwilds, "*?[");
|
||||
DEF_CSTR(newline, "\n");
|
||||
DEF_CSTR(origcharset, "origcharset");
|
||||
DEF_CSTR(null, "");
|
||||
DEF_CSTR(plus, "+");
|
||||
DEF_CSTR(textplain, "text/plain");
|
||||
DEF_CSTR(url, "url");
|
||||
|
||||
|
||||
// Values used as keys inside Dijon::Filter::metaData[]. This structure is
|
||||
// used to store all data generated by format-translating filters. It is
|
||||
// different from Rcl::Doc for mostly historical reasons. The translation
|
||||
// from Filter to Doc occurs inside internfile.cpp
|
||||
DEF_CSTR(dj_keyds, "description");
|
||||
DEF_CSTR(dj_keyfn, "filename");
|
||||
DEF_CSTR(dj_keymd, "modificationdate");
|
||||
DEF_CSTR(dj_keyorigcharset, "origcharset");
|
||||
DEF_CSTR(dj_keytitle, "title");
|
||||
DEF_CSTR(dj_keyrecipient, "recipient");
|
||||
DEF_CSTR(dj_keymsgid, "msgid");
|
||||
DEF_CSTR(dj_keyabstract, "abstract");
|
||||
DEF_CSTR(dj_keyauthor, "author");
|
||||
DEF_CSTR(dj_keycharset, "charset");
|
||||
DEF_CSTR(dj_keycontent, "content");
|
||||
DEF_CSTR(dj_keyipath, "ipath");
|
||||
DEF_CSTR(dj_keymd5, "md5");
|
||||
DEF_CSTR(dj_keymt, "mimetype");
|
||||
DEF_CSTR(dj_keydocsize, "docsize");
|
||||
|
||||
#endif /* _CSTR_H_INCLUDED_ */
|
||||
|
||||
@ -161,7 +161,7 @@ public:
|
||||
m_fields.set((*it).first, (*it).second, cstr_null);
|
||||
}
|
||||
m_fields.set(cstr_url, doc.url, cstr_null);
|
||||
m_fields.set(cstr_mimetype, doc.mimetype, cstr_null);
|
||||
m_fields.set(cstr_bgc_mimetype, doc.mimetype, cstr_null);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -638,14 +638,6 @@ static inline bool getKeyValue(const map<string, string>& docdata,
|
||||
return false;
|
||||
}
|
||||
|
||||
// These defs are for the Dijon meta array. Rcl::Doc predefined field
|
||||
// names are used where appropriate. In some cases, Rcl::Doc names are
|
||||
// used inside the Dijon metadata (ex: origcharset)
|
||||
static const string cstr_keyds("description");
|
||||
static const string cstr_keyfn("filename");
|
||||
static const string cstr_keymd("modificationdate");
|
||||
static const string cstr_keytt("title");
|
||||
|
||||
bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
||||
{
|
||||
Dijon::Filter *df = m_handlers.back();
|
||||
@ -658,21 +650,21 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
||||
|
||||
for (map<string,string>::const_iterator it = docdata.begin();
|
||||
it != docdata.end(); it++) {
|
||||
if (it->first == cstr_content) {
|
||||
if (it->first == cstr_dj_keycontent) {
|
||||
doc.text = it->second;
|
||||
} else if (it->first == cstr_keymd) {
|
||||
} else if (it->first == cstr_dj_keymd) {
|
||||
doc.dmtime = it->second;
|
||||
} else if (it->first == Rcl::Doc::keyoc) {
|
||||
} else if (it->first == cstr_dj_keyorigcharset) {
|
||||
doc.origcharset = it->second;
|
||||
} else if (it->first == cstr_mimetype || it->first == cstr_charset) {
|
||||
} else if (it->first == cstr_dj_keymt || it->first == cstr_dj_keycharset) {
|
||||
// don't need/want these.
|
||||
} else {
|
||||
doc.meta[it->first] = it->second;
|
||||
}
|
||||
}
|
||||
if (doc.meta[Rcl::Doc::keyabs].empty() && !doc.meta[cstr_keyds].empty()) {
|
||||
doc.meta[Rcl::Doc::keyabs] = doc.meta[cstr_keyds];
|
||||
doc.meta.erase(cstr_keyds);
|
||||
if (doc.meta[Rcl::Doc::keyabs].empty() && !doc.meta[cstr_dj_keyds].empty()) {
|
||||
doc.meta[Rcl::Doc::keyabs] = doc.meta[cstr_dj_keyds];
|
||||
doc.meta.erase(cstr_dj_keyds);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -704,19 +696,19 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const
|
||||
for (vector<Dijon::Filter*>::const_iterator hit = m_handlers.begin();
|
||||
hit != m_handlers.end(); hit++) {
|
||||
const map<string, string>& docdata = (*hit)->get_meta_data();
|
||||
if (getKeyValue(docdata, cstr_ipath, ipathel)) {
|
||||
if (getKeyValue(docdata, cstr_dj_keyipath, ipathel)) {
|
||||
if (!ipathel.empty()) {
|
||||
// We have a non-empty ipath
|
||||
hasipath = true;
|
||||
getKeyValue(docdata, cstr_mimetype, doc.mimetype);
|
||||
getKeyValue(docdata, cstr_keyfn, doc.utf8fn);
|
||||
getKeyValue(docdata, cstr_dj_keymt, doc.mimetype);
|
||||
getKeyValue(docdata, cstr_dj_keyfn, doc.utf8fn);
|
||||
}
|
||||
doc.ipath += colon_hide(ipathel) + cstr_isep;
|
||||
} else {
|
||||
doc.ipath += cstr_isep;
|
||||
}
|
||||
getKeyValue(docdata, cstr_author, doc.meta[Rcl::Doc::keyau]);
|
||||
getKeyValue(docdata, cstr_keymd, doc.dmtime);
|
||||
getKeyValue(docdata, cstr_dj_keyauthor, doc.meta[Rcl::Doc::keyau]);
|
||||
getKeyValue(docdata, cstr_dj_keymd, doc.dmtime);
|
||||
}
|
||||
|
||||
// Trim empty tail elements in ipath.
|
||||
@ -754,8 +746,8 @@ int FileInterner::addHandler()
|
||||
{
|
||||
const map<string, string>& docdata = m_handlers.back()->get_meta_data();
|
||||
string charset, mimetype;
|
||||
getKeyValue(docdata, cstr_charset, charset);
|
||||
getKeyValue(docdata, cstr_mimetype, mimetype);
|
||||
getKeyValue(docdata, cstr_dj_keycharset, charset);
|
||||
getKeyValue(docdata, cstr_dj_keymt, mimetype);
|
||||
|
||||
LOGDEB(("FileInterner::addHandler: next_doc is %s\n", mimetype.c_str()));
|
||||
|
||||
@ -796,7 +788,7 @@ int FileInterner::addHandler()
|
||||
const string *txt = &ns;
|
||||
{
|
||||
map<string,string>::const_iterator it;
|
||||
it = docdata.find(cstr_content);
|
||||
it = docdata.find(cstr_dj_keycontent);
|
||||
if (it != docdata.end())
|
||||
txt = &it->second;
|
||||
}
|
||||
|
||||
@ -94,7 +94,7 @@ bool MimeHandlerExec::next_document()
|
||||
myparams.push_back(m_ipath);
|
||||
|
||||
// Execute command, store the output
|
||||
string& output = m_metaData[cstr_content];
|
||||
string& output = m_metaData[cstr_dj_keycontent];
|
||||
output.erase();
|
||||
ExecCmd mexec;
|
||||
MEAdv adv(filtermaxseconds);
|
||||
@ -145,16 +145,16 @@ bool MimeHandlerExec::next_document()
|
||||
|
||||
void MimeHandlerExec::finaldetails()
|
||||
{
|
||||
m_metaData[cstr_origcharset] = m_dfltInputCharset;
|
||||
m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset;
|
||||
|
||||
// cfgFilterOutputCharset comes from the mimeconf filter definition line
|
||||
string& charset = m_metaData[cstr_charset];
|
||||
string& charset = m_metaData[cstr_dj_keycharset];
|
||||
charset = cfgFilterOutputCharset.empty() ? "UTF-8" : cfgFilterOutputCharset;
|
||||
if (!stringlowercmp("default", charset)) {
|
||||
charset = m_dfltInputCharset;
|
||||
}
|
||||
|
||||
string& mt = m_metaData[cstr_mimetype];
|
||||
string& mt = m_metaData[cstr_dj_keymt];
|
||||
mt = cfgFilterOutputMtype.empty() ? "text/html" :
|
||||
cfgFilterOutputMtype;
|
||||
|
||||
@ -165,7 +165,7 @@ void MimeHandlerExec::finaldetails()
|
||||
|
||||
string md5, xmd5, reason;
|
||||
if (MD5File(m_fn, md5, &reason)) {
|
||||
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
|
||||
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||
} else {
|
||||
LOGERR(("MimeHandlerExec: cant compute md5 for [%s]: %s\n",
|
||||
m_fn.c_str(), reason.c_str()));
|
||||
|
||||
@ -66,7 +66,7 @@ bool MimeHandlerExecMultiple::startCmd()
|
||||
}
|
||||
|
||||
// Note: data is not used if this is the "document:" field: it goes
|
||||
// directly to m_metaData["content"] to avoid an extra copy
|
||||
// directly to m_metaData[cstr_dj_keycontent] to avoid an extra copy
|
||||
//
|
||||
// Messages are made of data elements. Each element is like:
|
||||
// name: len\ndata
|
||||
@ -118,11 +118,11 @@ bool MimeHandlerExecMultiple::readDataElement(string& name, string &data)
|
||||
LOGDEB1(("MHExecMultiple: got name [%s] len: %d\n", name.c_str(), len));
|
||||
|
||||
// Hack: check for 'Document:' and read directly the document data
|
||||
// to m_metaData["content"] to avoid an extra copy of the bulky
|
||||
// to m_metaData[cstr_dj_keycontent] to avoid an extra copy of the bulky
|
||||
// piece
|
||||
string *datap = &data;
|
||||
if (!stringlowercmp("document:", name)) {
|
||||
datap = &m_metaData[cstr_content];
|
||||
datap = &m_metaData[cstr_dj_keycontent];
|
||||
} else {
|
||||
datap = &data;
|
||||
}
|
||||
@ -238,7 +238,7 @@ bool MimeHandlerExecMultiple::next_document()
|
||||
// It used to be that eof could be signalled just by an empty document, but
|
||||
// this was wrong. Empty documents can be found ie in zip files and should
|
||||
// not be interpreted as eof.
|
||||
if (m_metaData[cstr_content].empty()) {
|
||||
if (m_metaData[cstr_dj_keycontent].empty()) {
|
||||
LOGDEB0(("MHExecMultiple: got empty document inside [%s]: [%s]\n",
|
||||
m_fn.c_str(), ipath.c_str()));
|
||||
}
|
||||
@ -248,14 +248,14 @@ bool MimeHandlerExecMultiple::next_document()
|
||||
// mimetype, or the ipath MUST be a filename-like string which we can use
|
||||
// to compute a mime type
|
||||
if (!ipath.empty()) {
|
||||
m_metaData[cstr_ipath] = ipath;
|
||||
m_metaData[cstr_dj_keyipath] = ipath;
|
||||
if (mtype.empty()) {
|
||||
LOGDEB0(("MHExecMultiple: no mime type from filter, "
|
||||
"using ipath for a guess\n"));
|
||||
mtype = mimetype(ipath, 0, m_config, false);
|
||||
if (mtype.empty()) {
|
||||
// mimetype() won't call idFile when there is no file. Do it
|
||||
mtype = idFileMem(m_metaData[cstr_content]);
|
||||
mtype = idFileMem(m_metaData[cstr_dj_keycontent]);
|
||||
if (mtype.empty()) {
|
||||
// Note this happens for example for directory zip members
|
||||
// We could recognize them by the end /, but wouldn't know
|
||||
@ -265,16 +265,16 @@ bool MimeHandlerExecMultiple::next_document()
|
||||
}
|
||||
}
|
||||
}
|
||||
m_metaData[cstr_mimetype] = mtype;
|
||||
m_metaData[cstr_dj_keymt] = mtype;
|
||||
string md5, xmd5;
|
||||
MD5String(m_metaData[cstr_content], md5);
|
||||
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
|
||||
MD5String(m_metaData[cstr_dj_keycontent], md5);
|
||||
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||
} else {
|
||||
m_metaData[cstr_mimetype] = mtype.empty() ? "text/html" : mtype;
|
||||
m_metaData.erase(cstr_ipath);
|
||||
m_metaData[cstr_dj_keymt] = mtype.empty() ? "text/html" : mtype;
|
||||
m_metaData.erase(cstr_dj_keyipath);
|
||||
string md5, xmd5, reason;
|
||||
if (MD5File(m_fn, md5, &reason)) {
|
||||
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
|
||||
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||
} else {
|
||||
LOGERR(("MimeHandlerExecM: cant compute md5 for [%s]: %s\n",
|
||||
m_fn.c_str(), reason.c_str()));
|
||||
@ -290,10 +290,10 @@ bool MimeHandlerExecMultiple::next_document()
|
||||
charset = m_dfltInputCharset;
|
||||
}
|
||||
}
|
||||
m_metaData[cstr_origcharset] = charset;
|
||||
m_metaData[cstr_charset] = charset;
|
||||
m_metaData[cstr_dj_keyorigcharset] = charset;
|
||||
m_metaData[cstr_dj_keycharset] = charset;
|
||||
|
||||
if (!m_metaData[cstr_mimetype].compare(cstr_textplain)) {
|
||||
if (!m_metaData[cstr_dj_keymt].compare(cstr_textplain)) {
|
||||
(void)txtdcode("mh_execm");
|
||||
}
|
||||
|
||||
@ -302,7 +302,7 @@ bool MimeHandlerExecMultiple::next_document()
|
||||
m_havedoc = false;
|
||||
|
||||
LOGDEB0(("MHExecMultiple: returning %d bytes of content,"
|
||||
" mtype [%s] charset [%s]\n", m_metaData[cstr_content].size(),
|
||||
m_metaData[cstr_mimetype].c_str(), m_metaData[cstr_charset].c_str()));
|
||||
" mtype [%s] charset [%s]\n", m_metaData[cstr_dj_keycontent].size(),
|
||||
m_metaData[cstr_dj_keymt].c_str(), m_metaData[cstr_dj_keycharset].c_str()));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -56,7 +56,7 @@ bool MimeHandlerHtml::set_document_string(const string& htext)
|
||||
// We want to compute the md5 now because we may modify m_html later
|
||||
string md5, xmd5;
|
||||
MD5String(htext, md5);
|
||||
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
|
||||
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -74,7 +74,7 @@ bool MimeHandlerHtml::next_document()
|
||||
LOGDEB(("MHHtml::next_doc.: default supposed input charset: [%s]\n",
|
||||
charset.c_str()));
|
||||
// Override default input charset if someone took care to set one:
|
||||
map<string,string>::const_iterator it = m_metaData.find(cstr_charset);
|
||||
map<string,string>::const_iterator it = m_metaData.find(cstr_dj_keycharset);
|
||||
if (it != m_metaData.end() && !it->second.empty()) {
|
||||
charset = it->second;
|
||||
LOGDEB(("MHHtml: next_doc.: input charset from ext. metadata: [%s]\n",
|
||||
@ -163,14 +163,14 @@ bool MimeHandlerHtml::next_document()
|
||||
}
|
||||
}
|
||||
|
||||
m_metaData[cstr_origcharset] = result.get_charset();
|
||||
m_metaData[cstr_content] = result.dump;
|
||||
m_metaData[cstr_charset] = "utf-8";
|
||||
m_metaData[cstr_dj_keyorigcharset] = result.get_charset();
|
||||
m_metaData[cstr_dj_keycontent] = result.dump;
|
||||
m_metaData[cstr_dj_keycharset] = "utf-8";
|
||||
// Avoid setting empty values which would crush ones possibly inherited
|
||||
// from parent (if we're an attachment)
|
||||
if (!result.dmtime.empty())
|
||||
m_metaData["modificationdate"] = result.dmtime;
|
||||
m_metaData[cstr_mimetype] = cstr_textplain;
|
||||
m_metaData[cstr_dj_keymd] = result.dmtime;
|
||||
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
||||
|
||||
for (map<string,string>::const_iterator it = result.meta.begin();
|
||||
it != result.meta.end(); it++) {
|
||||
|
||||
@ -44,11 +44,7 @@
|
||||
using namespace std;
|
||||
|
||||
static const int maxdepth = 20;
|
||||
static const string cstr_recipient = "recipient";
|
||||
static const string cstr_modificationdate = "modificationdate";
|
||||
static const string cstr_title = "title";
|
||||
static const string cstr_msgid = "msgid";
|
||||
static const string cstr_abstract = "abstract";
|
||||
static const string cstr_mail_charset("charset");
|
||||
|
||||
MimeHandlerMail::MimeHandlerMail(RclConfig *cnf, const string &mt)
|
||||
: RecollFilter(cnf, mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
|
||||
@ -100,7 +96,7 @@ bool MimeHandlerMail::set_document_file(const string &fn)
|
||||
// the md5 computation to the mime analysis, but ...
|
||||
string md5, xmd5, reason;
|
||||
if (MD5File(fn, md5, &reason)) {
|
||||
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
|
||||
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||
} else {
|
||||
LOGERR(("MimeHandlerMail: cant compute md5 for [%s]: %s\n", fn.c_str(),
|
||||
reason.c_str()));
|
||||
@ -132,7 +128,7 @@ bool MimeHandlerMail::set_document_string(const string &msgtxt)
|
||||
|
||||
string md5, xmd5;
|
||||
MD5String(msgtxt, md5);
|
||||
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
|
||||
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||
|
||||
m_stream = new stringstream(msgtxt);
|
||||
delete m_bincdoc;
|
||||
@ -172,16 +168,16 @@ bool MimeHandlerMail::next_document()
|
||||
bool res = false;
|
||||
|
||||
if (m_idx == -1) {
|
||||
m_metaData[cstr_mimetype] = cstr_textplain;
|
||||
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
||||
res = processMsg(m_bincdoc, 0);
|
||||
LOGDEB1(("MimeHandlerMail::next_document: mimetype %s\n",
|
||||
m_metaData[cstr_mimetype].c_str()));
|
||||
const string& txt = m_metaData[cstr_content];
|
||||
m_metaData[cstr_dj_keymt].c_str()));
|
||||
const string& txt = m_metaData[cstr_dj_keycontent];
|
||||
if (m_startoftext < txt.size())
|
||||
m_metaData[cstr_abstract] =
|
||||
m_metaData[cstr_dj_keyabstract] =
|
||||
truncate_to_word(txt.substr(m_startoftext), 250);
|
||||
} else {
|
||||
m_metaData[cstr_abstract].clear();
|
||||
m_metaData[cstr_dj_keyabstract].clear();
|
||||
res = processAttach();
|
||||
}
|
||||
m_idx++;
|
||||
@ -235,18 +231,18 @@ bool MimeHandlerMail::processAttach()
|
||||
}
|
||||
MHMailAttach *att = m_attachments[m_idx];
|
||||
|
||||
m_metaData[cstr_mimetype] = att->m_contentType;
|
||||
m_metaData[cstr_charset] = att->m_charset;
|
||||
m_metaData["filename"] = att->m_filename;
|
||||
m_metaData[cstr_dj_keymt] = att->m_contentType;
|
||||
m_metaData[cstr_dj_keycharset] = att->m_charset;
|
||||
m_metaData[cstr_dj_keyfn] = att->m_filename;
|
||||
// Change the title to something helpul
|
||||
m_metaData[cstr_title] = att->m_filename + " (" + m_subject + ")";
|
||||
m_metaData[cstr_dj_keytitle] = att->m_filename + " (" + m_subject + ")";
|
||||
LOGDEB1((" processAttach:ct [%s] cs [%s] fn [%s]\n",
|
||||
att->m_contentType.c_str(),
|
||||
att->m_charset.c_str(),
|
||||
att->m_filename.c_str()));
|
||||
|
||||
m_metaData[cstr_content] = string();
|
||||
string& body = m_metaData[cstr_content];
|
||||
m_metaData[cstr_dj_keycontent] = string();
|
||||
string& body = m_metaData[cstr_dj_keycontent];
|
||||
att->m_part->getBody(body, 0, att->m_part->bodylength);
|
||||
string decoded;
|
||||
const string *bdp;
|
||||
@ -259,11 +255,11 @@ bool MimeHandlerMail::processAttach()
|
||||
// Special case for text/plain content. Internfile should deal
|
||||
// with this but it expects text/plain to be utf-8 already, so we
|
||||
// handle the transcoding if needed
|
||||
if (m_metaData[cstr_mimetype] == cstr_textplain) {
|
||||
if (m_metaData[cstr_dj_keymt] == cstr_textplain) {
|
||||
string utf8;
|
||||
if (!transcode(body, utf8, m_metaData[cstr_charset], "UTF-8")) {
|
||||
if (!transcode(body, utf8, m_metaData[cstr_dj_keycharset], "UTF-8")) {
|
||||
LOGERR((" processAttach: transcode to utf-8 failed "
|
||||
"for charset [%s]\n", m_metaData[cstr_charset].c_str()));
|
||||
"for charset [%s]\n", m_metaData[cstr_dj_keycharset].c_str()));
|
||||
// can't transcode at all -> data is garbage just erase it
|
||||
body.clear();
|
||||
} else {
|
||||
@ -273,18 +269,18 @@ bool MimeHandlerMail::processAttach()
|
||||
|
||||
// Special case for application/octet-stream: try to better
|
||||
// identify content, using file name if set
|
||||
if (m_metaData[cstr_mimetype] == "application/octet-stream" &&
|
||||
!m_metaData["filename"].empty()) {
|
||||
string mt = mimetype(m_metaData["filename"], 0,
|
||||
if (m_metaData[cstr_dj_keymt] == "application/octet-stream" &&
|
||||
!m_metaData[cstr_dj_keyfn].empty()) {
|
||||
string mt = mimetype(m_metaData[cstr_dj_keyfn], 0,
|
||||
m_config, false);
|
||||
if (!mt.empty())
|
||||
m_metaData[cstr_mimetype] = mt;
|
||||
m_metaData[cstr_dj_keymt] = mt;
|
||||
}
|
||||
|
||||
// Ipath
|
||||
char nbuf[20];
|
||||
sprintf(nbuf, "%d", m_idx);
|
||||
m_metaData[cstr_ipath] = nbuf;
|
||||
m_metaData[cstr_dj_keyipath] = nbuf;
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -308,7 +304,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
||||
}
|
||||
|
||||
// Handle some headers.
|
||||
string& text = m_metaData[cstr_content];
|
||||
string& text = m_metaData[cstr_dj_keycontent];
|
||||
Binc::HeaderItem hi;
|
||||
string transcoded;
|
||||
if (doc->h.getFirstHeader("From", hi)) {
|
||||
@ -317,7 +313,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
||||
text += string("From: ");
|
||||
text += transcoded + cstr_newline;
|
||||
if (depth == 1) {
|
||||
m_metaData[cstr_author] = transcoded;
|
||||
m_metaData[cstr_dj_keyauthor] = transcoded;
|
||||
}
|
||||
}
|
||||
if (doc->h.getFirstHeader("To", hi)) {
|
||||
@ -326,7 +322,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
||||
text += string("To: ");
|
||||
text += transcoded + cstr_newline;
|
||||
if (depth == 1) {
|
||||
m_metaData[cstr_recipient] = transcoded;
|
||||
m_metaData[cstr_dj_keyrecipient] = transcoded;
|
||||
}
|
||||
}
|
||||
if (doc->h.getFirstHeader("Cc", hi)) {
|
||||
@ -335,13 +331,13 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
||||
text += string("Cc: ");
|
||||
text += transcoded + cstr_newline;
|
||||
if (depth == 1) {
|
||||
m_metaData[cstr_recipient] += " " + transcoded;
|
||||
m_metaData[cstr_dj_keyrecipient] += " " + transcoded;
|
||||
}
|
||||
}
|
||||
if (doc->h.getFirstHeader("Message-Id", hi)) {
|
||||
if (depth == 1) {
|
||||
m_metaData[cstr_msgid] = hi.getValue();
|
||||
trimstring(m_metaData[cstr_msgid], "<>");
|
||||
m_metaData[cstr_dj_keymsgid] = hi.getValue();
|
||||
trimstring(m_metaData[cstr_dj_keymsgid], "<>");
|
||||
}
|
||||
}
|
||||
if (doc->h.getFirstHeader("Date", hi)) {
|
||||
@ -351,7 +347,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
||||
if (t != (time_t)-1) {
|
||||
char ascuxtime[100];
|
||||
sprintf(ascuxtime, "%ld", (long)t);
|
||||
m_metaData[cstr_modificationdate] = ascuxtime;
|
||||
m_metaData[cstr_dj_keymd] = ascuxtime;
|
||||
} else {
|
||||
// Leave mtime field alone, ftime will be used instead.
|
||||
LOGDEB(("rfc2822Date...: failed: [%s]\n", transcoded.c_str()));
|
||||
@ -364,7 +360,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
||||
if (doc->h.getFirstHeader("Subject", hi)) {
|
||||
rfc2047_decode(hi.getValue(), transcoded);
|
||||
if (depth == 1) {
|
||||
m_metaData[cstr_title] = transcoded;
|
||||
m_metaData[cstr_dj_keytitle] = transcoded;
|
||||
m_subject = transcoded;
|
||||
}
|
||||
if (preview())
|
||||
@ -393,7 +389,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
||||
walkmime(doc, depth);
|
||||
|
||||
LOGDEB2(("MimeHandlerMail::processMsg:text:[%s]\n",
|
||||
m_metaData[cstr_content].c_str()));
|
||||
m_metaData[cstr_dj_keycontent].c_str()));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -415,7 +411,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
||||
return;
|
||||
}
|
||||
|
||||
string& out = m_metaData[cstr_content];
|
||||
string& out = m_metaData[cstr_dj_keycontent];
|
||||
|
||||
if (doc->isMultipart()) {
|
||||
LOGDEB2(("walkmime: ismultipart %d subtype '%s'\n",
|
||||
@ -527,7 +523,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
||||
// to iso-8859 only if the transfer-encoding is 8 bit, or test for
|
||||
// actual 8 bit chars, but what the heck, le'ts use 8859-1 as default
|
||||
string charset;
|
||||
it = content_type.params.find(string(cstr_charset));
|
||||
it = content_type.params.find(cstr_mail_charset);
|
||||
if (it != content_type.params.end())
|
||||
charset = it->second;
|
||||
if (charset.empty() ||
|
||||
@ -609,7 +605,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
||||
mh.set_document_string(body);
|
||||
mh.next_document();
|
||||
map<string, string>::const_iterator it =
|
||||
mh.get_meta_data().find(cstr_content);
|
||||
mh.get_meta_data().find(cstr_dj_keycontent);
|
||||
if (it != mh.get_meta_data().end())
|
||||
out += it->second;
|
||||
} else {
|
||||
|
||||
@ -435,7 +435,7 @@ bool MimeHandlerMbox::next_document()
|
||||
off_t message_end = 0;
|
||||
bool iseof = false;
|
||||
bool hademptyline = true;
|
||||
string& msgtxt = m_metaData[cstr_content];
|
||||
string& msgtxt = m_metaData[cstr_dj_keycontent];
|
||||
msgtxt.erase();
|
||||
line_type line;
|
||||
for (;;) {
|
||||
@ -499,8 +499,8 @@ bool MimeHandlerMbox::next_document()
|
||||
// m_msgnum was incremented when hitting the next From_ or eof, so the data
|
||||
// is for m_msgnum - 1
|
||||
sprintf(buf, "%d", m_msgnum - 1);
|
||||
m_metaData[cstr_ipath] = buf;
|
||||
m_metaData[cstr_mimetype] = "message/rfc822";
|
||||
m_metaData[cstr_dj_keyipath] = buf;
|
||||
m_metaData[cstr_dj_keymt] = "message/rfc822";
|
||||
if (iseof) {
|
||||
LOGDEB2(("MimeHandlerMbox::next: eof hit\n"));
|
||||
m_havedoc = false;
|
||||
@ -591,7 +591,7 @@ int main(int argc, char **argv)
|
||||
exit(1);
|
||||
}
|
||||
map<string, string>::const_iterator it =
|
||||
mh.get_meta_data().find(cstr_content);
|
||||
mh.get_meta_data().find(cstr_dj_keycontent);
|
||||
int size;
|
||||
if (it == mh.get_meta_data().end()) {
|
||||
size = -1;
|
||||
@ -611,7 +611,7 @@ int main(int argc, char **argv)
|
||||
}
|
||||
docnt++;
|
||||
map<string, string>::const_iterator it =
|
||||
mh.get_meta_data().find(cstr_content);
|
||||
mh.get_meta_data().find(cstr_dj_keycontent);
|
||||
int size;
|
||||
if (it == mh.get_meta_data().end()) {
|
||||
size = -1;
|
||||
|
||||
@ -81,7 +81,7 @@ bool MimeHandlerText::set_document_file(const string &fn)
|
||||
|
||||
string md5, xmd5;
|
||||
MD5String(m_text, md5);
|
||||
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
|
||||
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||
m_havedoc = true;
|
||||
return true;
|
||||
}
|
||||
@ -91,7 +91,7 @@ bool MimeHandlerText::set_document_string(const string& otext)
|
||||
m_text = otext;
|
||||
string md5, xmd5;
|
||||
MD5String(m_text, md5);
|
||||
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
|
||||
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||
m_havedoc = true;
|
||||
return true;
|
||||
}
|
||||
@ -118,11 +118,11 @@ bool MimeHandlerText::next_document()
|
||||
|
||||
// We transcode even if defcharset is supposedly already utf-8:
|
||||
// this validates the encoding.
|
||||
m_metaData[cstr_origcharset] = m_dfltInputCharset;
|
||||
m_metaData[cstr_mimetype] = cstr_textplain;
|
||||
m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset;
|
||||
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
||||
|
||||
size_t srclen = m_text.length();
|
||||
m_metaData[cstr_content].swap(m_text);
|
||||
m_metaData[cstr_dj_keycontent].swap(m_text);
|
||||
|
||||
// txtdcode() truncates the text if transcoding fails
|
||||
(void)txtdcode("mh_text");
|
||||
@ -144,7 +144,7 @@ bool MimeHandlerText::next_document()
|
||||
char buf[30];
|
||||
sprintf(buf, "%lld", (long long)(m_offs - srclen));
|
||||
if (m_offs - srclen != 0)
|
||||
m_metaData[cstr_ipath] = buf;
|
||||
m_metaData[cstr_dj_keyipath] = buf;
|
||||
readnext();
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -42,8 +42,8 @@ class MimeHandlerUnknown : public RecollFilter {
|
||||
if (m_havedoc == false)
|
||||
return false;
|
||||
m_havedoc = false;
|
||||
m_metaData[cstr_content] = cstr_null;
|
||||
m_metaData[cstr_mimetype] = cstr_textplain;
|
||||
m_metaData[cstr_dj_keycontent] = cstr_null;
|
||||
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
||||
return true;
|
||||
}
|
||||
virtual bool is_unknown() {return true;}
|
||||
|
||||
@ -141,6 +141,7 @@ static Dijon::Filter *mhFactory(RclConfig *config, const string &mime)
|
||||
}
|
||||
}
|
||||
|
||||
static const string cstr_mh_charset("charset");
|
||||
/**
|
||||
* Create a filter that executes an external program or script
|
||||
* A filter def can look like:
|
||||
@ -179,9 +180,9 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
||||
// Handle additional attributes. We substitute the semi-colons
|
||||
// with newlines and use a ConfSimple
|
||||
string value;
|
||||
if (attrs.get(cstr_charset, value))
|
||||
if (attrs.get(cstr_mh_charset, value))
|
||||
h->cfgFilterOutputCharset = stringtolower((const string&)value);
|
||||
if (attrs.get(cstr_mimetype, value))
|
||||
if (attrs.get(cstr_dj_keymt, value))
|
||||
h->cfgFilterOutputMtype = stringtolower((const string&)value);
|
||||
|
||||
#if 0
|
||||
|
||||
@ -36,6 +36,9 @@
|
||||
#include "debuglog.h"
|
||||
#include "transcode.h"
|
||||
|
||||
static const string cstr_html_charset("charset");
|
||||
static const string cstr_html_content("content");
|
||||
|
||||
inline static bool
|
||||
p_notdigit(char c)
|
||||
{
|
||||
@ -353,7 +356,7 @@ MyHtmlParser::opening_tag(const string &tag)
|
||||
case 'm':
|
||||
if (tag == "meta") {
|
||||
string content;
|
||||
if (get_parameter(cstr_content, content)) {
|
||||
if (get_parameter(cstr_html_content, content)) {
|
||||
string name;
|
||||
if (get_parameter("name", name)) {
|
||||
lowercase_term(name);
|
||||
@ -387,7 +390,7 @@ MyHtmlParser::opening_tag(const string &tag)
|
||||
MimeHeaderValue p;
|
||||
parseMimeHeaderValue(content, p);
|
||||
map<string, string>::const_iterator k;
|
||||
if ((k = p.params.find(cstr_charset)) !=
|
||||
if ((k = p.params.find(cstr_html_charset)) !=
|
||||
p.params.end()) {
|
||||
charset = k->second;
|
||||
if (!samecharset(charset, fromcharset)) {
|
||||
@ -402,7 +405,7 @@ MyHtmlParser::opening_tag(const string &tag)
|
||||
}
|
||||
}
|
||||
string newcharset;
|
||||
if (get_parameter(cstr_charset, newcharset)) {
|
||||
if (get_parameter(cstr_html_charset, newcharset)) {
|
||||
// HTML5 added: <meta charset="...">
|
||||
lowercase_term(newcharset);
|
||||
charset = newcharset;
|
||||
|
||||
@ -22,14 +22,14 @@
|
||||
|
||||
bool RecollFilter::txtdcode(const string& who)
|
||||
{
|
||||
if (m_metaData[cstr_mimetype].compare(cstr_textplain)) {
|
||||
if (m_metaData[cstr_dj_keymt].compare(cstr_textplain)) {
|
||||
LOGERR(("%s::txtdcode: called on non txt/plain: %s\n", who.c_str(),
|
||||
m_metaData[cstr_mimetype].c_str()));
|
||||
m_metaData[cstr_dj_keymt].c_str()));
|
||||
return false;
|
||||
}
|
||||
|
||||
string& ocs = m_metaData[cstr_origcharset];
|
||||
string& itext = m_metaData[cstr_content];
|
||||
string& ocs = m_metaData[cstr_dj_keyorigcharset];
|
||||
string& itext = m_metaData[cstr_dj_keycontent];
|
||||
LOGDEB0(("%s::txtdcode: %d bytes from [%s] to UTF-8\n",
|
||||
who.c_str(), itext.size(), ocs.c_str()));
|
||||
int ecnt;
|
||||
@ -44,6 +44,6 @@ bool RecollFilter::txtdcode(const string& who)
|
||||
return false;
|
||||
}
|
||||
itext.swap(otext);
|
||||
m_metaData[cstr_charset] = "UTF-8";
|
||||
m_metaData[cstr_dj_keycharset] = "UTF-8";
|
||||
return true;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user