From 638d468796b2e062824f5f0f2e609867a769b417 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Wed, 7 Mar 2012 10:13:46 +0100 Subject: [PATCH] clarified the use of string keys inside the Filter metaData array --- src/common/beaglequeuecache.cpp | 4 +- src/common/beaglequeuecache.h | 1 + src/common/cstr.h | 28 +++++++++---- src/index/beaglequeue.cpp | 2 +- src/internfile/internfile.cpp | 38 +++++++---------- src/internfile/mh_exec.cpp | 10 ++--- src/internfile/mh_execm.cpp | 34 ++++++++-------- src/internfile/mh_html.cpp | 14 +++---- src/internfile/mh_mail.cpp | 72 ++++++++++++++++----------------- src/internfile/mh_mbox.cpp | 10 ++--- src/internfile/mh_text.cpp | 12 +++--- src/internfile/mh_unknown.h | 4 +- src/internfile/mimehandler.cpp | 5 ++- src/internfile/myhtmlparse.cpp | 9 +++-- src/internfile/txtdcode.cpp | 10 ++--- 15 files changed, 131 insertions(+), 122 deletions(-) diff --git a/src/common/beaglequeuecache.cpp b/src/common/beaglequeuecache.cpp index 5f3335e0..675e6afe 100644 --- a/src/common/beaglequeuecache.cpp +++ b/src/common/beaglequeuecache.cpp @@ -25,6 +25,8 @@ #include "pathut.h" #include "rcldoc.h" +const string cstr_bgc_mimetype("mimetype"); + BeagleQueueCache::BeagleQueueCache(RclConfig *cnf) { string ccdir; @@ -64,7 +66,7 @@ bool BeagleQueueCache::getFromCache(const string& udi, Rcl::Doc &dotdoc, // Build a doc from saved metadata cf.get(cstr_url, dotdoc.url, cstr_null); - cf.get(cstr_mimetype, dotdoc.mimetype, cstr_null); + cf.get(cstr_bgc_mimetype, dotdoc.mimetype, cstr_null); cf.get(cstr_fmtime, dotdoc.fmtime, cstr_null); cf.get(cstr_fbytes, dotdoc.fbytes, cstr_null); dotdoc.sig.clear(); diff --git a/src/common/beaglequeuecache.h b/src/common/beaglequeuecache.h index 64706802..5297d61c 100644 --- a/src/common/beaglequeuecache.h +++ b/src/common/beaglequeuecache.h @@ -45,5 +45,6 @@ public: private: CirCache *m_cache; }; +extern const string cstr_bgc_mimetype; #endif /* _beaglequeuecache_h_included_ */ diff --git a/src/common/cstr.h b/src/common/cstr.h index 6d481974..4d451127 100644 --- a/src/common/cstr.h +++ b/src/common/cstr.h @@ -40,25 +40,39 @@ using std::string; #define DEF_CSTR(NM, STR) extern const string cstr_##NM #endif -DEF_CSTR(author, "author"); DEF_CSTR(caption, "caption"); -DEF_CSTR(charset, "charset"); -DEF_CSTR(content, "content"); DEF_CSTR(dmtime, "dmtime"); DEF_CSTR(dquote, "\""); DEF_CSTR(fbytes, "fbytes"); DEF_CSTR(fileu, "file://"); DEF_CSTR(fmtime, "fmtime"); -DEF_CSTR(ipath, "ipath"); DEF_CSTR(iso_8859_1, "ISO-8859-1"); -DEF_CSTR(md5, "md5"); -DEF_CSTR(mimetype, "mimetype"); DEF_CSTR(minwilds, "*?["); DEF_CSTR(newline, "\n"); -DEF_CSTR(origcharset, "origcharset"); DEF_CSTR(null, ""); DEF_CSTR(plus, "+"); DEF_CSTR(textplain, "text/plain"); DEF_CSTR(url, "url"); + +// Values used as keys inside Dijon::Filter::metaData[]. This structure is +// used to store all data generated by format-translating filters. It is +// different from Rcl::Doc for mostly historical reasons. The translation +// from Filter to Doc occurs inside internfile.cpp +DEF_CSTR(dj_keyds, "description"); +DEF_CSTR(dj_keyfn, "filename"); +DEF_CSTR(dj_keymd, "modificationdate"); +DEF_CSTR(dj_keyorigcharset, "origcharset"); +DEF_CSTR(dj_keytitle, "title"); +DEF_CSTR(dj_keyrecipient, "recipient"); +DEF_CSTR(dj_keymsgid, "msgid"); +DEF_CSTR(dj_keyabstract, "abstract"); +DEF_CSTR(dj_keyauthor, "author"); +DEF_CSTR(dj_keycharset, "charset"); +DEF_CSTR(dj_keycontent, "content"); +DEF_CSTR(dj_keyipath, "ipath"); +DEF_CSTR(dj_keymd5, "md5"); +DEF_CSTR(dj_keymt, "mimetype"); +DEF_CSTR(dj_keydocsize, "docsize"); + #endif /* _CSTR_H_INCLUDED_ */ diff --git a/src/index/beaglequeue.cpp b/src/index/beaglequeue.cpp index 9ee9274d..c7797318 100644 --- a/src/index/beaglequeue.cpp +++ b/src/index/beaglequeue.cpp @@ -161,7 +161,7 @@ public: m_fields.set((*it).first, (*it).second, cstr_null); } m_fields.set(cstr_url, doc.url, cstr_null); - m_fields.set(cstr_mimetype, doc.mimetype, cstr_null); + m_fields.set(cstr_bgc_mimetype, doc.mimetype, cstr_null); return true; } diff --git a/src/internfile/internfile.cpp b/src/internfile/internfile.cpp index 3b0b60dc..b6bdeb7c 100644 --- a/src/internfile/internfile.cpp +++ b/src/internfile/internfile.cpp @@ -638,14 +638,6 @@ static inline bool getKeyValue(const map& docdata, return false; } -// These defs are for the Dijon meta array. Rcl::Doc predefined field -// names are used where appropriate. In some cases, Rcl::Doc names are -// used inside the Dijon metadata (ex: origcharset) -static const string cstr_keyds("description"); -static const string cstr_keyfn("filename"); -static const string cstr_keymd("modificationdate"); -static const string cstr_keytt("title"); - bool FileInterner::dijontorcl(Rcl::Doc& doc) { Dijon::Filter *df = m_handlers.back(); @@ -658,21 +650,21 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc) for (map::const_iterator it = docdata.begin(); it != docdata.end(); it++) { - if (it->first == cstr_content) { + if (it->first == cstr_dj_keycontent) { doc.text = it->second; - } else if (it->first == cstr_keymd) { + } else if (it->first == cstr_dj_keymd) { doc.dmtime = it->second; - } else if (it->first == Rcl::Doc::keyoc) { + } else if (it->first == cstr_dj_keyorigcharset) { doc.origcharset = it->second; - } else if (it->first == cstr_mimetype || it->first == cstr_charset) { + } else if (it->first == cstr_dj_keymt || it->first == cstr_dj_keycharset) { // don't need/want these. } else { doc.meta[it->first] = it->second; } } - if (doc.meta[Rcl::Doc::keyabs].empty() && !doc.meta[cstr_keyds].empty()) { - doc.meta[Rcl::Doc::keyabs] = doc.meta[cstr_keyds]; - doc.meta.erase(cstr_keyds); + if (doc.meta[Rcl::Doc::keyabs].empty() && !doc.meta[cstr_dj_keyds].empty()) { + doc.meta[Rcl::Doc::keyabs] = doc.meta[cstr_dj_keyds]; + doc.meta.erase(cstr_dj_keyds); } return true; } @@ -704,19 +696,19 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const for (vector::const_iterator hit = m_handlers.begin(); hit != m_handlers.end(); hit++) { const map& docdata = (*hit)->get_meta_data(); - if (getKeyValue(docdata, cstr_ipath, ipathel)) { + if (getKeyValue(docdata, cstr_dj_keyipath, ipathel)) { if (!ipathel.empty()) { // We have a non-empty ipath hasipath = true; - getKeyValue(docdata, cstr_mimetype, doc.mimetype); - getKeyValue(docdata, cstr_keyfn, doc.utf8fn); + getKeyValue(docdata, cstr_dj_keymt, doc.mimetype); + getKeyValue(docdata, cstr_dj_keyfn, doc.utf8fn); } doc.ipath += colon_hide(ipathel) + cstr_isep; } else { doc.ipath += cstr_isep; } - getKeyValue(docdata, cstr_author, doc.meta[Rcl::Doc::keyau]); - getKeyValue(docdata, cstr_keymd, doc.dmtime); + getKeyValue(docdata, cstr_dj_keyauthor, doc.meta[Rcl::Doc::keyau]); + getKeyValue(docdata, cstr_dj_keymd, doc.dmtime); } // Trim empty tail elements in ipath. @@ -754,8 +746,8 @@ int FileInterner::addHandler() { const map& docdata = m_handlers.back()->get_meta_data(); string charset, mimetype; - getKeyValue(docdata, cstr_charset, charset); - getKeyValue(docdata, cstr_mimetype, mimetype); + getKeyValue(docdata, cstr_dj_keycharset, charset); + getKeyValue(docdata, cstr_dj_keymt, mimetype); LOGDEB(("FileInterner::addHandler: next_doc is %s\n", mimetype.c_str())); @@ -796,7 +788,7 @@ int FileInterner::addHandler() const string *txt = &ns; { map::const_iterator it; - it = docdata.find(cstr_content); + it = docdata.find(cstr_dj_keycontent); if (it != docdata.end()) txt = &it->second; } diff --git a/src/internfile/mh_exec.cpp b/src/internfile/mh_exec.cpp index 40a8caf4..3948d0f1 100644 --- a/src/internfile/mh_exec.cpp +++ b/src/internfile/mh_exec.cpp @@ -94,7 +94,7 @@ bool MimeHandlerExec::next_document() myparams.push_back(m_ipath); // Execute command, store the output - string& output = m_metaData[cstr_content]; + string& output = m_metaData[cstr_dj_keycontent]; output.erase(); ExecCmd mexec; MEAdv adv(filtermaxseconds); @@ -145,16 +145,16 @@ bool MimeHandlerExec::next_document() void MimeHandlerExec::finaldetails() { - m_metaData[cstr_origcharset] = m_dfltInputCharset; + m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset; // cfgFilterOutputCharset comes from the mimeconf filter definition line - string& charset = m_metaData[cstr_charset]; + string& charset = m_metaData[cstr_dj_keycharset]; charset = cfgFilterOutputCharset.empty() ? "UTF-8" : cfgFilterOutputCharset; if (!stringlowercmp("default", charset)) { charset = m_dfltInputCharset; } - string& mt = m_metaData[cstr_mimetype]; + string& mt = m_metaData[cstr_dj_keymt]; mt = cfgFilterOutputMtype.empty() ? "text/html" : cfgFilterOutputMtype; @@ -165,7 +165,7 @@ void MimeHandlerExec::finaldetails() string md5, xmd5, reason; if (MD5File(m_fn, md5, &reason)) { - m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5); + m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5); } else { LOGERR(("MimeHandlerExec: cant compute md5 for [%s]: %s\n", m_fn.c_str(), reason.c_str())); diff --git a/src/internfile/mh_execm.cpp b/src/internfile/mh_execm.cpp index c40d5494..381d0287 100644 --- a/src/internfile/mh_execm.cpp +++ b/src/internfile/mh_execm.cpp @@ -66,7 +66,7 @@ bool MimeHandlerExecMultiple::startCmd() } // Note: data is not used if this is the "document:" field: it goes -// directly to m_metaData["content"] to avoid an extra copy +// directly to m_metaData[cstr_dj_keycontent] to avoid an extra copy // // Messages are made of data elements. Each element is like: // name: len\ndata @@ -118,11 +118,11 @@ bool MimeHandlerExecMultiple::readDataElement(string& name, string &data) LOGDEB1(("MHExecMultiple: got name [%s] len: %d\n", name.c_str(), len)); // Hack: check for 'Document:' and read directly the document data - // to m_metaData["content"] to avoid an extra copy of the bulky + // to m_metaData[cstr_dj_keycontent] to avoid an extra copy of the bulky // piece string *datap = &data; if (!stringlowercmp("document:", name)) { - datap = &m_metaData[cstr_content]; + datap = &m_metaData[cstr_dj_keycontent]; } else { datap = &data; } @@ -238,7 +238,7 @@ bool MimeHandlerExecMultiple::next_document() // It used to be that eof could be signalled just by an empty document, but // this was wrong. Empty documents can be found ie in zip files and should // not be interpreted as eof. - if (m_metaData[cstr_content].empty()) { + if (m_metaData[cstr_dj_keycontent].empty()) { LOGDEB0(("MHExecMultiple: got empty document inside [%s]: [%s]\n", m_fn.c_str(), ipath.c_str())); } @@ -248,14 +248,14 @@ bool MimeHandlerExecMultiple::next_document() // mimetype, or the ipath MUST be a filename-like string which we can use // to compute a mime type if (!ipath.empty()) { - m_metaData[cstr_ipath] = ipath; + m_metaData[cstr_dj_keyipath] = ipath; if (mtype.empty()) { LOGDEB0(("MHExecMultiple: no mime type from filter, " "using ipath for a guess\n")); mtype = mimetype(ipath, 0, m_config, false); if (mtype.empty()) { // mimetype() won't call idFile when there is no file. Do it - mtype = idFileMem(m_metaData[cstr_content]); + mtype = idFileMem(m_metaData[cstr_dj_keycontent]); if (mtype.empty()) { // Note this happens for example for directory zip members // We could recognize them by the end /, but wouldn't know @@ -265,16 +265,16 @@ bool MimeHandlerExecMultiple::next_document() } } } - m_metaData[cstr_mimetype] = mtype; + m_metaData[cstr_dj_keymt] = mtype; string md5, xmd5; - MD5String(m_metaData[cstr_content], md5); - m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5); + MD5String(m_metaData[cstr_dj_keycontent], md5); + m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5); } else { - m_metaData[cstr_mimetype] = mtype.empty() ? "text/html" : mtype; - m_metaData.erase(cstr_ipath); + m_metaData[cstr_dj_keymt] = mtype.empty() ? "text/html" : mtype; + m_metaData.erase(cstr_dj_keyipath); string md5, xmd5, reason; if (MD5File(m_fn, md5, &reason)) { - m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5); + m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5); } else { LOGERR(("MimeHandlerExecM: cant compute md5 for [%s]: %s\n", m_fn.c_str(), reason.c_str())); @@ -290,10 +290,10 @@ bool MimeHandlerExecMultiple::next_document() charset = m_dfltInputCharset; } } - m_metaData[cstr_origcharset] = charset; - m_metaData[cstr_charset] = charset; + m_metaData[cstr_dj_keyorigcharset] = charset; + m_metaData[cstr_dj_keycharset] = charset; - if (!m_metaData[cstr_mimetype].compare(cstr_textplain)) { + if (!m_metaData[cstr_dj_keymt].compare(cstr_textplain)) { (void)txtdcode("mh_execm"); } @@ -302,7 +302,7 @@ bool MimeHandlerExecMultiple::next_document() m_havedoc = false; LOGDEB0(("MHExecMultiple: returning %d bytes of content," - " mtype [%s] charset [%s]\n", m_metaData[cstr_content].size(), - m_metaData[cstr_mimetype].c_str(), m_metaData[cstr_charset].c_str())); + " mtype [%s] charset [%s]\n", m_metaData[cstr_dj_keycontent].size(), + m_metaData[cstr_dj_keymt].c_str(), m_metaData[cstr_dj_keycharset].c_str())); return true; } diff --git a/src/internfile/mh_html.cpp b/src/internfile/mh_html.cpp index 237caf8a..e8e07d05 100644 --- a/src/internfile/mh_html.cpp +++ b/src/internfile/mh_html.cpp @@ -56,7 +56,7 @@ bool MimeHandlerHtml::set_document_string(const string& htext) // We want to compute the md5 now because we may modify m_html later string md5, xmd5; MD5String(htext, md5); - m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5); + m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5); return true; } @@ -74,7 +74,7 @@ bool MimeHandlerHtml::next_document() LOGDEB(("MHHtml::next_doc.: default supposed input charset: [%s]\n", charset.c_str())); // Override default input charset if someone took care to set one: - map::const_iterator it = m_metaData.find(cstr_charset); + map::const_iterator it = m_metaData.find(cstr_dj_keycharset); if (it != m_metaData.end() && !it->second.empty()) { charset = it->second; LOGDEB(("MHHtml: next_doc.: input charset from ext. metadata: [%s]\n", @@ -163,14 +163,14 @@ bool MimeHandlerHtml::next_document() } } - m_metaData[cstr_origcharset] = result.get_charset(); - m_metaData[cstr_content] = result.dump; - m_metaData[cstr_charset] = "utf-8"; + m_metaData[cstr_dj_keyorigcharset] = result.get_charset(); + m_metaData[cstr_dj_keycontent] = result.dump; + m_metaData[cstr_dj_keycharset] = "utf-8"; // Avoid setting empty values which would crush ones possibly inherited // from parent (if we're an attachment) if (!result.dmtime.empty()) - m_metaData["modificationdate"] = result.dmtime; - m_metaData[cstr_mimetype] = cstr_textplain; + m_metaData[cstr_dj_keymd] = result.dmtime; + m_metaData[cstr_dj_keymt] = cstr_textplain; for (map::const_iterator it = result.meta.begin(); it != result.meta.end(); it++) { diff --git a/src/internfile/mh_mail.cpp b/src/internfile/mh_mail.cpp index 814f3bcf..0fd3d4ed 100644 --- a/src/internfile/mh_mail.cpp +++ b/src/internfile/mh_mail.cpp @@ -44,11 +44,7 @@ using namespace std; static const int maxdepth = 20; -static const string cstr_recipient = "recipient"; -static const string cstr_modificationdate = "modificationdate"; -static const string cstr_title = "title"; -static const string cstr_msgid = "msgid"; -static const string cstr_abstract = "abstract"; +static const string cstr_mail_charset("charset"); MimeHandlerMail::MimeHandlerMail(RclConfig *cnf, const string &mt) : RecollFilter(cnf, mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1) @@ -100,7 +96,7 @@ bool MimeHandlerMail::set_document_file(const string &fn) // the md5 computation to the mime analysis, but ... string md5, xmd5, reason; if (MD5File(fn, md5, &reason)) { - m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5); + m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5); } else { LOGERR(("MimeHandlerMail: cant compute md5 for [%s]: %s\n", fn.c_str(), reason.c_str())); @@ -132,7 +128,7 @@ bool MimeHandlerMail::set_document_string(const string &msgtxt) string md5, xmd5; MD5String(msgtxt, md5); - m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5); + m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5); m_stream = new stringstream(msgtxt); delete m_bincdoc; @@ -172,16 +168,16 @@ bool MimeHandlerMail::next_document() bool res = false; if (m_idx == -1) { - m_metaData[cstr_mimetype] = cstr_textplain; + m_metaData[cstr_dj_keymt] = cstr_textplain; res = processMsg(m_bincdoc, 0); LOGDEB1(("MimeHandlerMail::next_document: mimetype %s\n", - m_metaData[cstr_mimetype].c_str())); - const string& txt = m_metaData[cstr_content]; + m_metaData[cstr_dj_keymt].c_str())); + const string& txt = m_metaData[cstr_dj_keycontent]; if (m_startoftext < txt.size()) - m_metaData[cstr_abstract] = + m_metaData[cstr_dj_keyabstract] = truncate_to_word(txt.substr(m_startoftext), 250); } else { - m_metaData[cstr_abstract].clear(); + m_metaData[cstr_dj_keyabstract].clear(); res = processAttach(); } m_idx++; @@ -235,18 +231,18 @@ bool MimeHandlerMail::processAttach() } MHMailAttach *att = m_attachments[m_idx]; - m_metaData[cstr_mimetype] = att->m_contentType; - m_metaData[cstr_charset] = att->m_charset; - m_metaData["filename"] = att->m_filename; + m_metaData[cstr_dj_keymt] = att->m_contentType; + m_metaData[cstr_dj_keycharset] = att->m_charset; + m_metaData[cstr_dj_keyfn] = att->m_filename; // Change the title to something helpul - m_metaData[cstr_title] = att->m_filename + " (" + m_subject + ")"; + m_metaData[cstr_dj_keytitle] = att->m_filename + " (" + m_subject + ")"; LOGDEB1((" processAttach:ct [%s] cs [%s] fn [%s]\n", att->m_contentType.c_str(), att->m_charset.c_str(), att->m_filename.c_str())); - m_metaData[cstr_content] = string(); - string& body = m_metaData[cstr_content]; + m_metaData[cstr_dj_keycontent] = string(); + string& body = m_metaData[cstr_dj_keycontent]; att->m_part->getBody(body, 0, att->m_part->bodylength); string decoded; const string *bdp; @@ -259,11 +255,11 @@ bool MimeHandlerMail::processAttach() // Special case for text/plain content. Internfile should deal // with this but it expects text/plain to be utf-8 already, so we // handle the transcoding if needed - if (m_metaData[cstr_mimetype] == cstr_textplain) { + if (m_metaData[cstr_dj_keymt] == cstr_textplain) { string utf8; - if (!transcode(body, utf8, m_metaData[cstr_charset], "UTF-8")) { + if (!transcode(body, utf8, m_metaData[cstr_dj_keycharset], "UTF-8")) { LOGERR((" processAttach: transcode to utf-8 failed " - "for charset [%s]\n", m_metaData[cstr_charset].c_str())); + "for charset [%s]\n", m_metaData[cstr_dj_keycharset].c_str())); // can't transcode at all -> data is garbage just erase it body.clear(); } else { @@ -273,18 +269,18 @@ bool MimeHandlerMail::processAttach() // Special case for application/octet-stream: try to better // identify content, using file name if set - if (m_metaData[cstr_mimetype] == "application/octet-stream" && - !m_metaData["filename"].empty()) { - string mt = mimetype(m_metaData["filename"], 0, + if (m_metaData[cstr_dj_keymt] == "application/octet-stream" && + !m_metaData[cstr_dj_keyfn].empty()) { + string mt = mimetype(m_metaData[cstr_dj_keyfn], 0, m_config, false); if (!mt.empty()) - m_metaData[cstr_mimetype] = mt; + m_metaData[cstr_dj_keymt] = mt; } // Ipath char nbuf[20]; sprintf(nbuf, "%d", m_idx); - m_metaData[cstr_ipath] = nbuf; + m_metaData[cstr_dj_keyipath] = nbuf; return true; } @@ -308,7 +304,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth) } // Handle some headers. - string& text = m_metaData[cstr_content]; + string& text = m_metaData[cstr_dj_keycontent]; Binc::HeaderItem hi; string transcoded; if (doc->h.getFirstHeader("From", hi)) { @@ -317,7 +313,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth) text += string("From: "); text += transcoded + cstr_newline; if (depth == 1) { - m_metaData[cstr_author] = transcoded; + m_metaData[cstr_dj_keyauthor] = transcoded; } } if (doc->h.getFirstHeader("To", hi)) { @@ -326,7 +322,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth) text += string("To: "); text += transcoded + cstr_newline; if (depth == 1) { - m_metaData[cstr_recipient] = transcoded; + m_metaData[cstr_dj_keyrecipient] = transcoded; } } if (doc->h.getFirstHeader("Cc", hi)) { @@ -335,13 +331,13 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth) text += string("Cc: "); text += transcoded + cstr_newline; if (depth == 1) { - m_metaData[cstr_recipient] += " " + transcoded; + m_metaData[cstr_dj_keyrecipient] += " " + transcoded; } } if (doc->h.getFirstHeader("Message-Id", hi)) { if (depth == 1) { - m_metaData[cstr_msgid] = hi.getValue(); - trimstring(m_metaData[cstr_msgid], "<>"); + m_metaData[cstr_dj_keymsgid] = hi.getValue(); + trimstring(m_metaData[cstr_dj_keymsgid], "<>"); } } if (doc->h.getFirstHeader("Date", hi)) { @@ -351,7 +347,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth) if (t != (time_t)-1) { char ascuxtime[100]; sprintf(ascuxtime, "%ld", (long)t); - m_metaData[cstr_modificationdate] = ascuxtime; + m_metaData[cstr_dj_keymd] = ascuxtime; } else { // Leave mtime field alone, ftime will be used instead. LOGDEB(("rfc2822Date...: failed: [%s]\n", transcoded.c_str())); @@ -364,7 +360,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth) if (doc->h.getFirstHeader("Subject", hi)) { rfc2047_decode(hi.getValue(), transcoded); if (depth == 1) { - m_metaData[cstr_title] = transcoded; + m_metaData[cstr_dj_keytitle] = transcoded; m_subject = transcoded; } if (preview()) @@ -393,7 +389,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth) walkmime(doc, depth); LOGDEB2(("MimeHandlerMail::processMsg:text:[%s]\n", - m_metaData[cstr_content].c_str())); + m_metaData[cstr_dj_keycontent].c_str())); return true; } @@ -415,7 +411,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth) return; } - string& out = m_metaData[cstr_content]; + string& out = m_metaData[cstr_dj_keycontent]; if (doc->isMultipart()) { LOGDEB2(("walkmime: ismultipart %d subtype '%s'\n", @@ -527,7 +523,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth) // to iso-8859 only if the transfer-encoding is 8 bit, or test for // actual 8 bit chars, but what the heck, le'ts use 8859-1 as default string charset; - it = content_type.params.find(string(cstr_charset)); + it = content_type.params.find(cstr_mail_charset); if (it != content_type.params.end()) charset = it->second; if (charset.empty() || @@ -609,7 +605,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth) mh.set_document_string(body); mh.next_document(); map::const_iterator it = - mh.get_meta_data().find(cstr_content); + mh.get_meta_data().find(cstr_dj_keycontent); if (it != mh.get_meta_data().end()) out += it->second; } else { diff --git a/src/internfile/mh_mbox.cpp b/src/internfile/mh_mbox.cpp index a78036b9..3051382f 100644 --- a/src/internfile/mh_mbox.cpp +++ b/src/internfile/mh_mbox.cpp @@ -435,7 +435,7 @@ bool MimeHandlerMbox::next_document() off_t message_end = 0; bool iseof = false; bool hademptyline = true; - string& msgtxt = m_metaData[cstr_content]; + string& msgtxt = m_metaData[cstr_dj_keycontent]; msgtxt.erase(); line_type line; for (;;) { @@ -499,8 +499,8 @@ bool MimeHandlerMbox::next_document() // m_msgnum was incremented when hitting the next From_ or eof, so the data // is for m_msgnum - 1 sprintf(buf, "%d", m_msgnum - 1); - m_metaData[cstr_ipath] = buf; - m_metaData[cstr_mimetype] = "message/rfc822"; + m_metaData[cstr_dj_keyipath] = buf; + m_metaData[cstr_dj_keymt] = "message/rfc822"; if (iseof) { LOGDEB2(("MimeHandlerMbox::next: eof hit\n")); m_havedoc = false; @@ -591,7 +591,7 @@ int main(int argc, char **argv) exit(1); } map::const_iterator it = - mh.get_meta_data().find(cstr_content); + mh.get_meta_data().find(cstr_dj_keycontent); int size; if (it == mh.get_meta_data().end()) { size = -1; @@ -611,7 +611,7 @@ int main(int argc, char **argv) } docnt++; map::const_iterator it = - mh.get_meta_data().find(cstr_content); + mh.get_meta_data().find(cstr_dj_keycontent); int size; if (it == mh.get_meta_data().end()) { size = -1; diff --git a/src/internfile/mh_text.cpp b/src/internfile/mh_text.cpp index 0d6ddf5f..a9b7a203 100644 --- a/src/internfile/mh_text.cpp +++ b/src/internfile/mh_text.cpp @@ -81,7 +81,7 @@ bool MimeHandlerText::set_document_file(const string &fn) string md5, xmd5; MD5String(m_text, md5); - m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5); + m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5); m_havedoc = true; return true; } @@ -91,7 +91,7 @@ bool MimeHandlerText::set_document_string(const string& otext) m_text = otext; string md5, xmd5; MD5String(m_text, md5); - m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5); + m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5); m_havedoc = true; return true; } @@ -118,11 +118,11 @@ bool MimeHandlerText::next_document() // We transcode even if defcharset is supposedly already utf-8: // this validates the encoding. - m_metaData[cstr_origcharset] = m_dfltInputCharset; - m_metaData[cstr_mimetype] = cstr_textplain; + m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset; + m_metaData[cstr_dj_keymt] = cstr_textplain; size_t srclen = m_text.length(); - m_metaData[cstr_content].swap(m_text); + m_metaData[cstr_dj_keycontent].swap(m_text); // txtdcode() truncates the text if transcoding fails (void)txtdcode("mh_text"); @@ -144,7 +144,7 @@ bool MimeHandlerText::next_document() char buf[30]; sprintf(buf, "%lld", (long long)(m_offs - srclen)); if (m_offs - srclen != 0) - m_metaData[cstr_ipath] = buf; + m_metaData[cstr_dj_keyipath] = buf; readnext(); return true; } diff --git a/src/internfile/mh_unknown.h b/src/internfile/mh_unknown.h index c478aab4..206b98ae 100644 --- a/src/internfile/mh_unknown.h +++ b/src/internfile/mh_unknown.h @@ -42,8 +42,8 @@ class MimeHandlerUnknown : public RecollFilter { if (m_havedoc == false) return false; m_havedoc = false; - m_metaData[cstr_content] = cstr_null; - m_metaData[cstr_mimetype] = cstr_textplain; + m_metaData[cstr_dj_keycontent] = cstr_null; + m_metaData[cstr_dj_keymt] = cstr_textplain; return true; } virtual bool is_unknown() {return true;} diff --git a/src/internfile/mimehandler.cpp b/src/internfile/mimehandler.cpp index 9b2a3387..a9618478 100644 --- a/src/internfile/mimehandler.cpp +++ b/src/internfile/mimehandler.cpp @@ -141,6 +141,7 @@ static Dijon::Filter *mhFactory(RclConfig *config, const string &mime) } } +static const string cstr_mh_charset("charset"); /** * Create a filter that executes an external program or script * A filter def can look like: @@ -179,9 +180,9 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs, // Handle additional attributes. We substitute the semi-colons // with newlines and use a ConfSimple string value; - if (attrs.get(cstr_charset, value)) + if (attrs.get(cstr_mh_charset, value)) h->cfgFilterOutputCharset = stringtolower((const string&)value); - if (attrs.get(cstr_mimetype, value)) + if (attrs.get(cstr_dj_keymt, value)) h->cfgFilterOutputMtype = stringtolower((const string&)value); #if 0 diff --git a/src/internfile/myhtmlparse.cpp b/src/internfile/myhtmlparse.cpp index 59d01829..a85a7531 100644 --- a/src/internfile/myhtmlparse.cpp +++ b/src/internfile/myhtmlparse.cpp @@ -36,6 +36,9 @@ #include "debuglog.h" #include "transcode.h" +static const string cstr_html_charset("charset"); +static const string cstr_html_content("content"); + inline static bool p_notdigit(char c) { @@ -353,7 +356,7 @@ MyHtmlParser::opening_tag(const string &tag) case 'm': if (tag == "meta") { string content; - if (get_parameter(cstr_content, content)) { + if (get_parameter(cstr_html_content, content)) { string name; if (get_parameter("name", name)) { lowercase_term(name); @@ -387,7 +390,7 @@ MyHtmlParser::opening_tag(const string &tag) MimeHeaderValue p; parseMimeHeaderValue(content, p); map::const_iterator k; - if ((k = p.params.find(cstr_charset)) != + if ((k = p.params.find(cstr_html_charset)) != p.params.end()) { charset = k->second; if (!samecharset(charset, fromcharset)) { @@ -402,7 +405,7 @@ MyHtmlParser::opening_tag(const string &tag) } } string newcharset; - if (get_parameter(cstr_charset, newcharset)) { + if (get_parameter(cstr_html_charset, newcharset)) { // HTML5 added: lowercase_term(newcharset); charset = newcharset; diff --git a/src/internfile/txtdcode.cpp b/src/internfile/txtdcode.cpp index 2bae470a..0d933958 100644 --- a/src/internfile/txtdcode.cpp +++ b/src/internfile/txtdcode.cpp @@ -22,14 +22,14 @@ bool RecollFilter::txtdcode(const string& who) { - if (m_metaData[cstr_mimetype].compare(cstr_textplain)) { + if (m_metaData[cstr_dj_keymt].compare(cstr_textplain)) { LOGERR(("%s::txtdcode: called on non txt/plain: %s\n", who.c_str(), - m_metaData[cstr_mimetype].c_str())); + m_metaData[cstr_dj_keymt].c_str())); return false; } - string& ocs = m_metaData[cstr_origcharset]; - string& itext = m_metaData[cstr_content]; + string& ocs = m_metaData[cstr_dj_keyorigcharset]; + string& itext = m_metaData[cstr_dj_keycontent]; LOGDEB0(("%s::txtdcode: %d bytes from [%s] to UTF-8\n", who.c_str(), itext.size(), ocs.c_str())); int ecnt; @@ -44,6 +44,6 @@ bool RecollFilter::txtdcode(const string& who) return false; } itext.swap(otext); - m_metaData[cstr_charset] = "UTF-8"; + m_metaData[cstr_dj_keycharset] = "UTF-8"; return true; }