clarified the use of string keys inside the Filter metaData array

This commit is contained in:
Jean-Francois Dockes 2012-03-07 10:13:46 +01:00
parent 1e28525e5a
commit 638d468796
15 changed files with 131 additions and 122 deletions

View File

@ -25,6 +25,8 @@
#include "pathut.h"
#include "rcldoc.h"
const string cstr_bgc_mimetype("mimetype");
BeagleQueueCache::BeagleQueueCache(RclConfig *cnf)
{
string ccdir;
@ -64,7 +66,7 @@ bool BeagleQueueCache::getFromCache(const string& udi, Rcl::Doc &dotdoc,
// Build a doc from saved metadata
cf.get(cstr_url, dotdoc.url, cstr_null);
cf.get(cstr_mimetype, dotdoc.mimetype, cstr_null);
cf.get(cstr_bgc_mimetype, dotdoc.mimetype, cstr_null);
cf.get(cstr_fmtime, dotdoc.fmtime, cstr_null);
cf.get(cstr_fbytes, dotdoc.fbytes, cstr_null);
dotdoc.sig.clear();

View File

@ -45,5 +45,6 @@ public:
private:
CirCache *m_cache;
};
extern const string cstr_bgc_mimetype;
#endif /* _beaglequeuecache_h_included_ */

View File

@ -40,25 +40,39 @@ using std::string;
#define DEF_CSTR(NM, STR) extern const string cstr_##NM
#endif
DEF_CSTR(author, "author");
DEF_CSTR(caption, "caption");
DEF_CSTR(charset, "charset");
DEF_CSTR(content, "content");
DEF_CSTR(dmtime, "dmtime");
DEF_CSTR(dquote, "\"");
DEF_CSTR(fbytes, "fbytes");
DEF_CSTR(fileu, "file://");
DEF_CSTR(fmtime, "fmtime");
DEF_CSTR(ipath, "ipath");
DEF_CSTR(iso_8859_1, "ISO-8859-1");
DEF_CSTR(md5, "md5");
DEF_CSTR(mimetype, "mimetype");
DEF_CSTR(minwilds, "*?[");
DEF_CSTR(newline, "\n");
DEF_CSTR(origcharset, "origcharset");
DEF_CSTR(null, "");
DEF_CSTR(plus, "+");
DEF_CSTR(textplain, "text/plain");
DEF_CSTR(url, "url");
// Values used as keys inside Dijon::Filter::metaData[]. This structure is
// used to store all data generated by format-translating filters. It is
// different from Rcl::Doc for mostly historical reasons. The translation
// from Filter to Doc occurs inside internfile.cpp
DEF_CSTR(dj_keyds, "description");
DEF_CSTR(dj_keyfn, "filename");
DEF_CSTR(dj_keymd, "modificationdate");
DEF_CSTR(dj_keyorigcharset, "origcharset");
DEF_CSTR(dj_keytitle, "title");
DEF_CSTR(dj_keyrecipient, "recipient");
DEF_CSTR(dj_keymsgid, "msgid");
DEF_CSTR(dj_keyabstract, "abstract");
DEF_CSTR(dj_keyauthor, "author");
DEF_CSTR(dj_keycharset, "charset");
DEF_CSTR(dj_keycontent, "content");
DEF_CSTR(dj_keyipath, "ipath");
DEF_CSTR(dj_keymd5, "md5");
DEF_CSTR(dj_keymt, "mimetype");
DEF_CSTR(dj_keydocsize, "docsize");
#endif /* _CSTR_H_INCLUDED_ */

View File

@ -161,7 +161,7 @@ public:
m_fields.set((*it).first, (*it).second, cstr_null);
}
m_fields.set(cstr_url, doc.url, cstr_null);
m_fields.set(cstr_mimetype, doc.mimetype, cstr_null);
m_fields.set(cstr_bgc_mimetype, doc.mimetype, cstr_null);
return true;
}

View File

@ -638,14 +638,6 @@ static inline bool getKeyValue(const map<string, string>& docdata,
return false;
}
// These defs are for the Dijon meta array. Rcl::Doc predefined field
// names are used where appropriate. In some cases, Rcl::Doc names are
// used inside the Dijon metadata (ex: origcharset)
static const string cstr_keyds("description");
static const string cstr_keyfn("filename");
static const string cstr_keymd("modificationdate");
static const string cstr_keytt("title");
bool FileInterner::dijontorcl(Rcl::Doc& doc)
{
Dijon::Filter *df = m_handlers.back();
@ -658,21 +650,21 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc)
for (map<string,string>::const_iterator it = docdata.begin();
it != docdata.end(); it++) {
if (it->first == cstr_content) {
if (it->first == cstr_dj_keycontent) {
doc.text = it->second;
} else if (it->first == cstr_keymd) {
} else if (it->first == cstr_dj_keymd) {
doc.dmtime = it->second;
} else if (it->first == Rcl::Doc::keyoc) {
} else if (it->first == cstr_dj_keyorigcharset) {
doc.origcharset = it->second;
} else if (it->first == cstr_mimetype || it->first == cstr_charset) {
} else if (it->first == cstr_dj_keymt || it->first == cstr_dj_keycharset) {
// don't need/want these.
} else {
doc.meta[it->first] = it->second;
}
}
if (doc.meta[Rcl::Doc::keyabs].empty() && !doc.meta[cstr_keyds].empty()) {
doc.meta[Rcl::Doc::keyabs] = doc.meta[cstr_keyds];
doc.meta.erase(cstr_keyds);
if (doc.meta[Rcl::Doc::keyabs].empty() && !doc.meta[cstr_dj_keyds].empty()) {
doc.meta[Rcl::Doc::keyabs] = doc.meta[cstr_dj_keyds];
doc.meta.erase(cstr_dj_keyds);
}
return true;
}
@ -704,19 +696,19 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const
for (vector<Dijon::Filter*>::const_iterator hit = m_handlers.begin();
hit != m_handlers.end(); hit++) {
const map<string, string>& docdata = (*hit)->get_meta_data();
if (getKeyValue(docdata, cstr_ipath, ipathel)) {
if (getKeyValue(docdata, cstr_dj_keyipath, ipathel)) {
if (!ipathel.empty()) {
// We have a non-empty ipath
hasipath = true;
getKeyValue(docdata, cstr_mimetype, doc.mimetype);
getKeyValue(docdata, cstr_keyfn, doc.utf8fn);
getKeyValue(docdata, cstr_dj_keymt, doc.mimetype);
getKeyValue(docdata, cstr_dj_keyfn, doc.utf8fn);
}
doc.ipath += colon_hide(ipathel) + cstr_isep;
} else {
doc.ipath += cstr_isep;
}
getKeyValue(docdata, cstr_author, doc.meta[Rcl::Doc::keyau]);
getKeyValue(docdata, cstr_keymd, doc.dmtime);
getKeyValue(docdata, cstr_dj_keyauthor, doc.meta[Rcl::Doc::keyau]);
getKeyValue(docdata, cstr_dj_keymd, doc.dmtime);
}
// Trim empty tail elements in ipath.
@ -754,8 +746,8 @@ int FileInterner::addHandler()
{
const map<string, string>& docdata = m_handlers.back()->get_meta_data();
string charset, mimetype;
getKeyValue(docdata, cstr_charset, charset);
getKeyValue(docdata, cstr_mimetype, mimetype);
getKeyValue(docdata, cstr_dj_keycharset, charset);
getKeyValue(docdata, cstr_dj_keymt, mimetype);
LOGDEB(("FileInterner::addHandler: next_doc is %s\n", mimetype.c_str()));
@ -796,7 +788,7 @@ int FileInterner::addHandler()
const string *txt = &ns;
{
map<string,string>::const_iterator it;
it = docdata.find(cstr_content);
it = docdata.find(cstr_dj_keycontent);
if (it != docdata.end())
txt = &it->second;
}

View File

@ -94,7 +94,7 @@ bool MimeHandlerExec::next_document()
myparams.push_back(m_ipath);
// Execute command, store the output
string& output = m_metaData[cstr_content];
string& output = m_metaData[cstr_dj_keycontent];
output.erase();
ExecCmd mexec;
MEAdv adv(filtermaxseconds);
@ -145,16 +145,16 @@ bool MimeHandlerExec::next_document()
void MimeHandlerExec::finaldetails()
{
m_metaData[cstr_origcharset] = m_dfltInputCharset;
m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset;
// cfgFilterOutputCharset comes from the mimeconf filter definition line
string& charset = m_metaData[cstr_charset];
string& charset = m_metaData[cstr_dj_keycharset];
charset = cfgFilterOutputCharset.empty() ? "UTF-8" : cfgFilterOutputCharset;
if (!stringlowercmp("default", charset)) {
charset = m_dfltInputCharset;
}
string& mt = m_metaData[cstr_mimetype];
string& mt = m_metaData[cstr_dj_keymt];
mt = cfgFilterOutputMtype.empty() ? "text/html" :
cfgFilterOutputMtype;
@ -165,7 +165,7 @@ void MimeHandlerExec::finaldetails()
string md5, xmd5, reason;
if (MD5File(m_fn, md5, &reason)) {
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
} else {
LOGERR(("MimeHandlerExec: cant compute md5 for [%s]: %s\n",
m_fn.c_str(), reason.c_str()));

View File

@ -66,7 +66,7 @@ bool MimeHandlerExecMultiple::startCmd()
}
// Note: data is not used if this is the "document:" field: it goes
// directly to m_metaData["content"] to avoid an extra copy
// directly to m_metaData[cstr_dj_keycontent] to avoid an extra copy
//
// Messages are made of data elements. Each element is like:
// name: len\ndata
@ -118,11 +118,11 @@ bool MimeHandlerExecMultiple::readDataElement(string& name, string &data)
LOGDEB1(("MHExecMultiple: got name [%s] len: %d\n", name.c_str(), len));
// Hack: check for 'Document:' and read directly the document data
// to m_metaData["content"] to avoid an extra copy of the bulky
// to m_metaData[cstr_dj_keycontent] to avoid an extra copy of the bulky
// piece
string *datap = &data;
if (!stringlowercmp("document:", name)) {
datap = &m_metaData[cstr_content];
datap = &m_metaData[cstr_dj_keycontent];
} else {
datap = &data;
}
@ -238,7 +238,7 @@ bool MimeHandlerExecMultiple::next_document()
// It used to be that eof could be signalled just by an empty document, but
// this was wrong. Empty documents can be found ie in zip files and should
// not be interpreted as eof.
if (m_metaData[cstr_content].empty()) {
if (m_metaData[cstr_dj_keycontent].empty()) {
LOGDEB0(("MHExecMultiple: got empty document inside [%s]: [%s]\n",
m_fn.c_str(), ipath.c_str()));
}
@ -248,14 +248,14 @@ bool MimeHandlerExecMultiple::next_document()
// mimetype, or the ipath MUST be a filename-like string which we can use
// to compute a mime type
if (!ipath.empty()) {
m_metaData[cstr_ipath] = ipath;
m_metaData[cstr_dj_keyipath] = ipath;
if (mtype.empty()) {
LOGDEB0(("MHExecMultiple: no mime type from filter, "
"using ipath for a guess\n"));
mtype = mimetype(ipath, 0, m_config, false);
if (mtype.empty()) {
// mimetype() won't call idFile when there is no file. Do it
mtype = idFileMem(m_metaData[cstr_content]);
mtype = idFileMem(m_metaData[cstr_dj_keycontent]);
if (mtype.empty()) {
// Note this happens for example for directory zip members
// We could recognize them by the end /, but wouldn't know
@ -265,16 +265,16 @@ bool MimeHandlerExecMultiple::next_document()
}
}
}
m_metaData[cstr_mimetype] = mtype;
m_metaData[cstr_dj_keymt] = mtype;
string md5, xmd5;
MD5String(m_metaData[cstr_content], md5);
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
MD5String(m_metaData[cstr_dj_keycontent], md5);
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
} else {
m_metaData[cstr_mimetype] = mtype.empty() ? "text/html" : mtype;
m_metaData.erase(cstr_ipath);
m_metaData[cstr_dj_keymt] = mtype.empty() ? "text/html" : mtype;
m_metaData.erase(cstr_dj_keyipath);
string md5, xmd5, reason;
if (MD5File(m_fn, md5, &reason)) {
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
} else {
LOGERR(("MimeHandlerExecM: cant compute md5 for [%s]: %s\n",
m_fn.c_str(), reason.c_str()));
@ -290,10 +290,10 @@ bool MimeHandlerExecMultiple::next_document()
charset = m_dfltInputCharset;
}
}
m_metaData[cstr_origcharset] = charset;
m_metaData[cstr_charset] = charset;
m_metaData[cstr_dj_keyorigcharset] = charset;
m_metaData[cstr_dj_keycharset] = charset;
if (!m_metaData[cstr_mimetype].compare(cstr_textplain)) {
if (!m_metaData[cstr_dj_keymt].compare(cstr_textplain)) {
(void)txtdcode("mh_execm");
}
@ -302,7 +302,7 @@ bool MimeHandlerExecMultiple::next_document()
m_havedoc = false;
LOGDEB0(("MHExecMultiple: returning %d bytes of content,"
" mtype [%s] charset [%s]\n", m_metaData[cstr_content].size(),
m_metaData[cstr_mimetype].c_str(), m_metaData[cstr_charset].c_str()));
" mtype [%s] charset [%s]\n", m_metaData[cstr_dj_keycontent].size(),
m_metaData[cstr_dj_keymt].c_str(), m_metaData[cstr_dj_keycharset].c_str()));
return true;
}

View File

@ -56,7 +56,7 @@ bool MimeHandlerHtml::set_document_string(const string& htext)
// We want to compute the md5 now because we may modify m_html later
string md5, xmd5;
MD5String(htext, md5);
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
return true;
}
@ -74,7 +74,7 @@ bool MimeHandlerHtml::next_document()
LOGDEB(("MHHtml::next_doc.: default supposed input charset: [%s]\n",
charset.c_str()));
// Override default input charset if someone took care to set one:
map<string,string>::const_iterator it = m_metaData.find(cstr_charset);
map<string,string>::const_iterator it = m_metaData.find(cstr_dj_keycharset);
if (it != m_metaData.end() && !it->second.empty()) {
charset = it->second;
LOGDEB(("MHHtml: next_doc.: input charset from ext. metadata: [%s]\n",
@ -163,14 +163,14 @@ bool MimeHandlerHtml::next_document()
}
}
m_metaData[cstr_origcharset] = result.get_charset();
m_metaData[cstr_content] = result.dump;
m_metaData[cstr_charset] = "utf-8";
m_metaData[cstr_dj_keyorigcharset] = result.get_charset();
m_metaData[cstr_dj_keycontent] = result.dump;
m_metaData[cstr_dj_keycharset] = "utf-8";
// Avoid setting empty values which would crush ones possibly inherited
// from parent (if we're an attachment)
if (!result.dmtime.empty())
m_metaData["modificationdate"] = result.dmtime;
m_metaData[cstr_mimetype] = cstr_textplain;
m_metaData[cstr_dj_keymd] = result.dmtime;
m_metaData[cstr_dj_keymt] = cstr_textplain;
for (map<string,string>::const_iterator it = result.meta.begin();
it != result.meta.end(); it++) {

View File

@ -44,11 +44,7 @@
using namespace std;
static const int maxdepth = 20;
static const string cstr_recipient = "recipient";
static const string cstr_modificationdate = "modificationdate";
static const string cstr_title = "title";
static const string cstr_msgid = "msgid";
static const string cstr_abstract = "abstract";
static const string cstr_mail_charset("charset");
MimeHandlerMail::MimeHandlerMail(RclConfig *cnf, const string &mt)
: RecollFilter(cnf, mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
@ -100,7 +96,7 @@ bool MimeHandlerMail::set_document_file(const string &fn)
// the md5 computation to the mime analysis, but ...
string md5, xmd5, reason;
if (MD5File(fn, md5, &reason)) {
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
} else {
LOGERR(("MimeHandlerMail: cant compute md5 for [%s]: %s\n", fn.c_str(),
reason.c_str()));
@ -132,7 +128,7 @@ bool MimeHandlerMail::set_document_string(const string &msgtxt)
string md5, xmd5;
MD5String(msgtxt, md5);
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
m_stream = new stringstream(msgtxt);
delete m_bincdoc;
@ -172,16 +168,16 @@ bool MimeHandlerMail::next_document()
bool res = false;
if (m_idx == -1) {
m_metaData[cstr_mimetype] = cstr_textplain;
m_metaData[cstr_dj_keymt] = cstr_textplain;
res = processMsg(m_bincdoc, 0);
LOGDEB1(("MimeHandlerMail::next_document: mimetype %s\n",
m_metaData[cstr_mimetype].c_str()));
const string& txt = m_metaData[cstr_content];
m_metaData[cstr_dj_keymt].c_str()));
const string& txt = m_metaData[cstr_dj_keycontent];
if (m_startoftext < txt.size())
m_metaData[cstr_abstract] =
m_metaData[cstr_dj_keyabstract] =
truncate_to_word(txt.substr(m_startoftext), 250);
} else {
m_metaData[cstr_abstract].clear();
m_metaData[cstr_dj_keyabstract].clear();
res = processAttach();
}
m_idx++;
@ -235,18 +231,18 @@ bool MimeHandlerMail::processAttach()
}
MHMailAttach *att = m_attachments[m_idx];
m_metaData[cstr_mimetype] = att->m_contentType;
m_metaData[cstr_charset] = att->m_charset;
m_metaData["filename"] = att->m_filename;
m_metaData[cstr_dj_keymt] = att->m_contentType;
m_metaData[cstr_dj_keycharset] = att->m_charset;
m_metaData[cstr_dj_keyfn] = att->m_filename;
// Change the title to something helpul
m_metaData[cstr_title] = att->m_filename + " (" + m_subject + ")";
m_metaData[cstr_dj_keytitle] = att->m_filename + " (" + m_subject + ")";
LOGDEB1((" processAttach:ct [%s] cs [%s] fn [%s]\n",
att->m_contentType.c_str(),
att->m_charset.c_str(),
att->m_filename.c_str()));
m_metaData[cstr_content] = string();
string& body = m_metaData[cstr_content];
m_metaData[cstr_dj_keycontent] = string();
string& body = m_metaData[cstr_dj_keycontent];
att->m_part->getBody(body, 0, att->m_part->bodylength);
string decoded;
const string *bdp;
@ -259,11 +255,11 @@ bool MimeHandlerMail::processAttach()
// Special case for text/plain content. Internfile should deal
// with this but it expects text/plain to be utf-8 already, so we
// handle the transcoding if needed
if (m_metaData[cstr_mimetype] == cstr_textplain) {
if (m_metaData[cstr_dj_keymt] == cstr_textplain) {
string utf8;
if (!transcode(body, utf8, m_metaData[cstr_charset], "UTF-8")) {
if (!transcode(body, utf8, m_metaData[cstr_dj_keycharset], "UTF-8")) {
LOGERR((" processAttach: transcode to utf-8 failed "
"for charset [%s]\n", m_metaData[cstr_charset].c_str()));
"for charset [%s]\n", m_metaData[cstr_dj_keycharset].c_str()));
// can't transcode at all -> data is garbage just erase it
body.clear();
} else {
@ -273,18 +269,18 @@ bool MimeHandlerMail::processAttach()
// Special case for application/octet-stream: try to better
// identify content, using file name if set
if (m_metaData[cstr_mimetype] == "application/octet-stream" &&
!m_metaData["filename"].empty()) {
string mt = mimetype(m_metaData["filename"], 0,
if (m_metaData[cstr_dj_keymt] == "application/octet-stream" &&
!m_metaData[cstr_dj_keyfn].empty()) {
string mt = mimetype(m_metaData[cstr_dj_keyfn], 0,
m_config, false);
if (!mt.empty())
m_metaData[cstr_mimetype] = mt;
m_metaData[cstr_dj_keymt] = mt;
}
// Ipath
char nbuf[20];
sprintf(nbuf, "%d", m_idx);
m_metaData[cstr_ipath] = nbuf;
m_metaData[cstr_dj_keyipath] = nbuf;
return true;
}
@ -308,7 +304,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
}
// Handle some headers.
string& text = m_metaData[cstr_content];
string& text = m_metaData[cstr_dj_keycontent];
Binc::HeaderItem hi;
string transcoded;
if (doc->h.getFirstHeader("From", hi)) {
@ -317,7 +313,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
text += string("From: ");
text += transcoded + cstr_newline;
if (depth == 1) {
m_metaData[cstr_author] = transcoded;
m_metaData[cstr_dj_keyauthor] = transcoded;
}
}
if (doc->h.getFirstHeader("To", hi)) {
@ -326,7 +322,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
text += string("To: ");
text += transcoded + cstr_newline;
if (depth == 1) {
m_metaData[cstr_recipient] = transcoded;
m_metaData[cstr_dj_keyrecipient] = transcoded;
}
}
if (doc->h.getFirstHeader("Cc", hi)) {
@ -335,13 +331,13 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
text += string("Cc: ");
text += transcoded + cstr_newline;
if (depth == 1) {
m_metaData[cstr_recipient] += " " + transcoded;
m_metaData[cstr_dj_keyrecipient] += " " + transcoded;
}
}
if (doc->h.getFirstHeader("Message-Id", hi)) {
if (depth == 1) {
m_metaData[cstr_msgid] = hi.getValue();
trimstring(m_metaData[cstr_msgid], "<>");
m_metaData[cstr_dj_keymsgid] = hi.getValue();
trimstring(m_metaData[cstr_dj_keymsgid], "<>");
}
}
if (doc->h.getFirstHeader("Date", hi)) {
@ -351,7 +347,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
if (t != (time_t)-1) {
char ascuxtime[100];
sprintf(ascuxtime, "%ld", (long)t);
m_metaData[cstr_modificationdate] = ascuxtime;
m_metaData[cstr_dj_keymd] = ascuxtime;
} else {
// Leave mtime field alone, ftime will be used instead.
LOGDEB(("rfc2822Date...: failed: [%s]\n", transcoded.c_str()));
@ -364,7 +360,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
if (doc->h.getFirstHeader("Subject", hi)) {
rfc2047_decode(hi.getValue(), transcoded);
if (depth == 1) {
m_metaData[cstr_title] = transcoded;
m_metaData[cstr_dj_keytitle] = transcoded;
m_subject = transcoded;
}
if (preview())
@ -393,7 +389,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
walkmime(doc, depth);
LOGDEB2(("MimeHandlerMail::processMsg:text:[%s]\n",
m_metaData[cstr_content].c_str()));
m_metaData[cstr_dj_keycontent].c_str()));
return true;
}
@ -415,7 +411,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
return;
}
string& out = m_metaData[cstr_content];
string& out = m_metaData[cstr_dj_keycontent];
if (doc->isMultipart()) {
LOGDEB2(("walkmime: ismultipart %d subtype '%s'\n",
@ -527,7 +523,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
// to iso-8859 only if the transfer-encoding is 8 bit, or test for
// actual 8 bit chars, but what the heck, le'ts use 8859-1 as default
string charset;
it = content_type.params.find(string(cstr_charset));
it = content_type.params.find(cstr_mail_charset);
if (it != content_type.params.end())
charset = it->second;
if (charset.empty() ||
@ -609,7 +605,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
mh.set_document_string(body);
mh.next_document();
map<string, string>::const_iterator it =
mh.get_meta_data().find(cstr_content);
mh.get_meta_data().find(cstr_dj_keycontent);
if (it != mh.get_meta_data().end())
out += it->second;
} else {

View File

@ -435,7 +435,7 @@ bool MimeHandlerMbox::next_document()
off_t message_end = 0;
bool iseof = false;
bool hademptyline = true;
string& msgtxt = m_metaData[cstr_content];
string& msgtxt = m_metaData[cstr_dj_keycontent];
msgtxt.erase();
line_type line;
for (;;) {
@ -499,8 +499,8 @@ bool MimeHandlerMbox::next_document()
// m_msgnum was incremented when hitting the next From_ or eof, so the data
// is for m_msgnum - 1
sprintf(buf, "%d", m_msgnum - 1);
m_metaData[cstr_ipath] = buf;
m_metaData[cstr_mimetype] = "message/rfc822";
m_metaData[cstr_dj_keyipath] = buf;
m_metaData[cstr_dj_keymt] = "message/rfc822";
if (iseof) {
LOGDEB2(("MimeHandlerMbox::next: eof hit\n"));
m_havedoc = false;
@ -591,7 +591,7 @@ int main(int argc, char **argv)
exit(1);
}
map<string, string>::const_iterator it =
mh.get_meta_data().find(cstr_content);
mh.get_meta_data().find(cstr_dj_keycontent);
int size;
if (it == mh.get_meta_data().end()) {
size = -1;
@ -611,7 +611,7 @@ int main(int argc, char **argv)
}
docnt++;
map<string, string>::const_iterator it =
mh.get_meta_data().find(cstr_content);
mh.get_meta_data().find(cstr_dj_keycontent);
int size;
if (it == mh.get_meta_data().end()) {
size = -1;

View File

@ -81,7 +81,7 @@ bool MimeHandlerText::set_document_file(const string &fn)
string md5, xmd5;
MD5String(m_text, md5);
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
m_havedoc = true;
return true;
}
@ -91,7 +91,7 @@ bool MimeHandlerText::set_document_string(const string& otext)
m_text = otext;
string md5, xmd5;
MD5String(m_text, md5);
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
m_havedoc = true;
return true;
}
@ -118,11 +118,11 @@ bool MimeHandlerText::next_document()
// We transcode even if defcharset is supposedly already utf-8:
// this validates the encoding.
m_metaData[cstr_origcharset] = m_dfltInputCharset;
m_metaData[cstr_mimetype] = cstr_textplain;
m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset;
m_metaData[cstr_dj_keymt] = cstr_textplain;
size_t srclen = m_text.length();
m_metaData[cstr_content].swap(m_text);
m_metaData[cstr_dj_keycontent].swap(m_text);
// txtdcode() truncates the text if transcoding fails
(void)txtdcode("mh_text");
@ -144,7 +144,7 @@ bool MimeHandlerText::next_document()
char buf[30];
sprintf(buf, "%lld", (long long)(m_offs - srclen));
if (m_offs - srclen != 0)
m_metaData[cstr_ipath] = buf;
m_metaData[cstr_dj_keyipath] = buf;
readnext();
return true;
}

View File

@ -42,8 +42,8 @@ class MimeHandlerUnknown : public RecollFilter {
if (m_havedoc == false)
return false;
m_havedoc = false;
m_metaData[cstr_content] = cstr_null;
m_metaData[cstr_mimetype] = cstr_textplain;
m_metaData[cstr_dj_keycontent] = cstr_null;
m_metaData[cstr_dj_keymt] = cstr_textplain;
return true;
}
virtual bool is_unknown() {return true;}

View File

@ -141,6 +141,7 @@ static Dijon::Filter *mhFactory(RclConfig *config, const string &mime)
}
}
static const string cstr_mh_charset("charset");
/**
* Create a filter that executes an external program or script
* A filter def can look like:
@ -179,9 +180,9 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
// Handle additional attributes. We substitute the semi-colons
// with newlines and use a ConfSimple
string value;
if (attrs.get(cstr_charset, value))
if (attrs.get(cstr_mh_charset, value))
h->cfgFilterOutputCharset = stringtolower((const string&)value);
if (attrs.get(cstr_mimetype, value))
if (attrs.get(cstr_dj_keymt, value))
h->cfgFilterOutputMtype = stringtolower((const string&)value);
#if 0

View File

@ -36,6 +36,9 @@
#include "debuglog.h"
#include "transcode.h"
static const string cstr_html_charset("charset");
static const string cstr_html_content("content");
inline static bool
p_notdigit(char c)
{
@ -353,7 +356,7 @@ MyHtmlParser::opening_tag(const string &tag)
case 'm':
if (tag == "meta") {
string content;
if (get_parameter(cstr_content, content)) {
if (get_parameter(cstr_html_content, content)) {
string name;
if (get_parameter("name", name)) {
lowercase_term(name);
@ -387,7 +390,7 @@ MyHtmlParser::opening_tag(const string &tag)
MimeHeaderValue p;
parseMimeHeaderValue(content, p);
map<string, string>::const_iterator k;
if ((k = p.params.find(cstr_charset)) !=
if ((k = p.params.find(cstr_html_charset)) !=
p.params.end()) {
charset = k->second;
if (!samecharset(charset, fromcharset)) {
@ -402,7 +405,7 @@ MyHtmlParser::opening_tag(const string &tag)
}
}
string newcharset;
if (get_parameter(cstr_charset, newcharset)) {
if (get_parameter(cstr_html_charset, newcharset)) {
// HTML5 added: <meta charset="...">
lowercase_term(newcharset);
charset = newcharset;

View File

@ -22,14 +22,14 @@
bool RecollFilter::txtdcode(const string& who)
{
if (m_metaData[cstr_mimetype].compare(cstr_textplain)) {
if (m_metaData[cstr_dj_keymt].compare(cstr_textplain)) {
LOGERR(("%s::txtdcode: called on non txt/plain: %s\n", who.c_str(),
m_metaData[cstr_mimetype].c_str()));
m_metaData[cstr_dj_keymt].c_str()));
return false;
}
string& ocs = m_metaData[cstr_origcharset];
string& itext = m_metaData[cstr_content];
string& ocs = m_metaData[cstr_dj_keyorigcharset];
string& itext = m_metaData[cstr_dj_keycontent];
LOGDEB0(("%s::txtdcode: %d bytes from [%s] to UTF-8\n",
who.c_str(), itext.size(), ocs.c_str()));
int ecnt;
@ -44,6 +44,6 @@ bool RecollFilter::txtdcode(const string& who)
return false;
}
itext.swap(otext);
m_metaData[cstr_charset] = "UTF-8";
m_metaData[cstr_dj_keycharset] = "UTF-8";
return true;
}