clarified the use of string keys inside the Filter metaData array
This commit is contained in:
parent
1e28525e5a
commit
638d468796
@ -25,6 +25,8 @@
|
|||||||
#include "pathut.h"
|
#include "pathut.h"
|
||||||
#include "rcldoc.h"
|
#include "rcldoc.h"
|
||||||
|
|
||||||
|
const string cstr_bgc_mimetype("mimetype");
|
||||||
|
|
||||||
BeagleQueueCache::BeagleQueueCache(RclConfig *cnf)
|
BeagleQueueCache::BeagleQueueCache(RclConfig *cnf)
|
||||||
{
|
{
|
||||||
string ccdir;
|
string ccdir;
|
||||||
@ -64,7 +66,7 @@ bool BeagleQueueCache::getFromCache(const string& udi, Rcl::Doc &dotdoc,
|
|||||||
|
|
||||||
// Build a doc from saved metadata
|
// Build a doc from saved metadata
|
||||||
cf.get(cstr_url, dotdoc.url, cstr_null);
|
cf.get(cstr_url, dotdoc.url, cstr_null);
|
||||||
cf.get(cstr_mimetype, dotdoc.mimetype, cstr_null);
|
cf.get(cstr_bgc_mimetype, dotdoc.mimetype, cstr_null);
|
||||||
cf.get(cstr_fmtime, dotdoc.fmtime, cstr_null);
|
cf.get(cstr_fmtime, dotdoc.fmtime, cstr_null);
|
||||||
cf.get(cstr_fbytes, dotdoc.fbytes, cstr_null);
|
cf.get(cstr_fbytes, dotdoc.fbytes, cstr_null);
|
||||||
dotdoc.sig.clear();
|
dotdoc.sig.clear();
|
||||||
|
|||||||
@ -45,5 +45,6 @@ public:
|
|||||||
private:
|
private:
|
||||||
CirCache *m_cache;
|
CirCache *m_cache;
|
||||||
};
|
};
|
||||||
|
extern const string cstr_bgc_mimetype;
|
||||||
|
|
||||||
#endif /* _beaglequeuecache_h_included_ */
|
#endif /* _beaglequeuecache_h_included_ */
|
||||||
|
|||||||
@ -40,25 +40,39 @@ using std::string;
|
|||||||
#define DEF_CSTR(NM, STR) extern const string cstr_##NM
|
#define DEF_CSTR(NM, STR) extern const string cstr_##NM
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
DEF_CSTR(author, "author");
|
|
||||||
DEF_CSTR(caption, "caption");
|
DEF_CSTR(caption, "caption");
|
||||||
DEF_CSTR(charset, "charset");
|
|
||||||
DEF_CSTR(content, "content");
|
|
||||||
DEF_CSTR(dmtime, "dmtime");
|
DEF_CSTR(dmtime, "dmtime");
|
||||||
DEF_CSTR(dquote, "\"");
|
DEF_CSTR(dquote, "\"");
|
||||||
DEF_CSTR(fbytes, "fbytes");
|
DEF_CSTR(fbytes, "fbytes");
|
||||||
DEF_CSTR(fileu, "file://");
|
DEF_CSTR(fileu, "file://");
|
||||||
DEF_CSTR(fmtime, "fmtime");
|
DEF_CSTR(fmtime, "fmtime");
|
||||||
DEF_CSTR(ipath, "ipath");
|
|
||||||
DEF_CSTR(iso_8859_1, "ISO-8859-1");
|
DEF_CSTR(iso_8859_1, "ISO-8859-1");
|
||||||
DEF_CSTR(md5, "md5");
|
|
||||||
DEF_CSTR(mimetype, "mimetype");
|
|
||||||
DEF_CSTR(minwilds, "*?[");
|
DEF_CSTR(minwilds, "*?[");
|
||||||
DEF_CSTR(newline, "\n");
|
DEF_CSTR(newline, "\n");
|
||||||
DEF_CSTR(origcharset, "origcharset");
|
|
||||||
DEF_CSTR(null, "");
|
DEF_CSTR(null, "");
|
||||||
DEF_CSTR(plus, "+");
|
DEF_CSTR(plus, "+");
|
||||||
DEF_CSTR(textplain, "text/plain");
|
DEF_CSTR(textplain, "text/plain");
|
||||||
DEF_CSTR(url, "url");
|
DEF_CSTR(url, "url");
|
||||||
|
|
||||||
|
|
||||||
|
// Values used as keys inside Dijon::Filter::metaData[]. This structure is
|
||||||
|
// used to store all data generated by format-translating filters. It is
|
||||||
|
// different from Rcl::Doc for mostly historical reasons. The translation
|
||||||
|
// from Filter to Doc occurs inside internfile.cpp
|
||||||
|
DEF_CSTR(dj_keyds, "description");
|
||||||
|
DEF_CSTR(dj_keyfn, "filename");
|
||||||
|
DEF_CSTR(dj_keymd, "modificationdate");
|
||||||
|
DEF_CSTR(dj_keyorigcharset, "origcharset");
|
||||||
|
DEF_CSTR(dj_keytitle, "title");
|
||||||
|
DEF_CSTR(dj_keyrecipient, "recipient");
|
||||||
|
DEF_CSTR(dj_keymsgid, "msgid");
|
||||||
|
DEF_CSTR(dj_keyabstract, "abstract");
|
||||||
|
DEF_CSTR(dj_keyauthor, "author");
|
||||||
|
DEF_CSTR(dj_keycharset, "charset");
|
||||||
|
DEF_CSTR(dj_keycontent, "content");
|
||||||
|
DEF_CSTR(dj_keyipath, "ipath");
|
||||||
|
DEF_CSTR(dj_keymd5, "md5");
|
||||||
|
DEF_CSTR(dj_keymt, "mimetype");
|
||||||
|
DEF_CSTR(dj_keydocsize, "docsize");
|
||||||
|
|
||||||
#endif /* _CSTR_H_INCLUDED_ */
|
#endif /* _CSTR_H_INCLUDED_ */
|
||||||
|
|||||||
@ -161,7 +161,7 @@ public:
|
|||||||
m_fields.set((*it).first, (*it).second, cstr_null);
|
m_fields.set((*it).first, (*it).second, cstr_null);
|
||||||
}
|
}
|
||||||
m_fields.set(cstr_url, doc.url, cstr_null);
|
m_fields.set(cstr_url, doc.url, cstr_null);
|
||||||
m_fields.set(cstr_mimetype, doc.mimetype, cstr_null);
|
m_fields.set(cstr_bgc_mimetype, doc.mimetype, cstr_null);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -638,14 +638,6 @@ static inline bool getKeyValue(const map<string, string>& docdata,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// These defs are for the Dijon meta array. Rcl::Doc predefined field
|
|
||||||
// names are used where appropriate. In some cases, Rcl::Doc names are
|
|
||||||
// used inside the Dijon metadata (ex: origcharset)
|
|
||||||
static const string cstr_keyds("description");
|
|
||||||
static const string cstr_keyfn("filename");
|
|
||||||
static const string cstr_keymd("modificationdate");
|
|
||||||
static const string cstr_keytt("title");
|
|
||||||
|
|
||||||
bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
||||||
{
|
{
|
||||||
Dijon::Filter *df = m_handlers.back();
|
Dijon::Filter *df = m_handlers.back();
|
||||||
@ -658,21 +650,21 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
|||||||
|
|
||||||
for (map<string,string>::const_iterator it = docdata.begin();
|
for (map<string,string>::const_iterator it = docdata.begin();
|
||||||
it != docdata.end(); it++) {
|
it != docdata.end(); it++) {
|
||||||
if (it->first == cstr_content) {
|
if (it->first == cstr_dj_keycontent) {
|
||||||
doc.text = it->second;
|
doc.text = it->second;
|
||||||
} else if (it->first == cstr_keymd) {
|
} else if (it->first == cstr_dj_keymd) {
|
||||||
doc.dmtime = it->second;
|
doc.dmtime = it->second;
|
||||||
} else if (it->first == Rcl::Doc::keyoc) {
|
} else if (it->first == cstr_dj_keyorigcharset) {
|
||||||
doc.origcharset = it->second;
|
doc.origcharset = it->second;
|
||||||
} else if (it->first == cstr_mimetype || it->first == cstr_charset) {
|
} else if (it->first == cstr_dj_keymt || it->first == cstr_dj_keycharset) {
|
||||||
// don't need/want these.
|
// don't need/want these.
|
||||||
} else {
|
} else {
|
||||||
doc.meta[it->first] = it->second;
|
doc.meta[it->first] = it->second;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (doc.meta[Rcl::Doc::keyabs].empty() && !doc.meta[cstr_keyds].empty()) {
|
if (doc.meta[Rcl::Doc::keyabs].empty() && !doc.meta[cstr_dj_keyds].empty()) {
|
||||||
doc.meta[Rcl::Doc::keyabs] = doc.meta[cstr_keyds];
|
doc.meta[Rcl::Doc::keyabs] = doc.meta[cstr_dj_keyds];
|
||||||
doc.meta.erase(cstr_keyds);
|
doc.meta.erase(cstr_dj_keyds);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -704,19 +696,19 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const
|
|||||||
for (vector<Dijon::Filter*>::const_iterator hit = m_handlers.begin();
|
for (vector<Dijon::Filter*>::const_iterator hit = m_handlers.begin();
|
||||||
hit != m_handlers.end(); hit++) {
|
hit != m_handlers.end(); hit++) {
|
||||||
const map<string, string>& docdata = (*hit)->get_meta_data();
|
const map<string, string>& docdata = (*hit)->get_meta_data();
|
||||||
if (getKeyValue(docdata, cstr_ipath, ipathel)) {
|
if (getKeyValue(docdata, cstr_dj_keyipath, ipathel)) {
|
||||||
if (!ipathel.empty()) {
|
if (!ipathel.empty()) {
|
||||||
// We have a non-empty ipath
|
// We have a non-empty ipath
|
||||||
hasipath = true;
|
hasipath = true;
|
||||||
getKeyValue(docdata, cstr_mimetype, doc.mimetype);
|
getKeyValue(docdata, cstr_dj_keymt, doc.mimetype);
|
||||||
getKeyValue(docdata, cstr_keyfn, doc.utf8fn);
|
getKeyValue(docdata, cstr_dj_keyfn, doc.utf8fn);
|
||||||
}
|
}
|
||||||
doc.ipath += colon_hide(ipathel) + cstr_isep;
|
doc.ipath += colon_hide(ipathel) + cstr_isep;
|
||||||
} else {
|
} else {
|
||||||
doc.ipath += cstr_isep;
|
doc.ipath += cstr_isep;
|
||||||
}
|
}
|
||||||
getKeyValue(docdata, cstr_author, doc.meta[Rcl::Doc::keyau]);
|
getKeyValue(docdata, cstr_dj_keyauthor, doc.meta[Rcl::Doc::keyau]);
|
||||||
getKeyValue(docdata, cstr_keymd, doc.dmtime);
|
getKeyValue(docdata, cstr_dj_keymd, doc.dmtime);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Trim empty tail elements in ipath.
|
// Trim empty tail elements in ipath.
|
||||||
@ -754,8 +746,8 @@ int FileInterner::addHandler()
|
|||||||
{
|
{
|
||||||
const map<string, string>& docdata = m_handlers.back()->get_meta_data();
|
const map<string, string>& docdata = m_handlers.back()->get_meta_data();
|
||||||
string charset, mimetype;
|
string charset, mimetype;
|
||||||
getKeyValue(docdata, cstr_charset, charset);
|
getKeyValue(docdata, cstr_dj_keycharset, charset);
|
||||||
getKeyValue(docdata, cstr_mimetype, mimetype);
|
getKeyValue(docdata, cstr_dj_keymt, mimetype);
|
||||||
|
|
||||||
LOGDEB(("FileInterner::addHandler: next_doc is %s\n", mimetype.c_str()));
|
LOGDEB(("FileInterner::addHandler: next_doc is %s\n", mimetype.c_str()));
|
||||||
|
|
||||||
@ -796,7 +788,7 @@ int FileInterner::addHandler()
|
|||||||
const string *txt = &ns;
|
const string *txt = &ns;
|
||||||
{
|
{
|
||||||
map<string,string>::const_iterator it;
|
map<string,string>::const_iterator it;
|
||||||
it = docdata.find(cstr_content);
|
it = docdata.find(cstr_dj_keycontent);
|
||||||
if (it != docdata.end())
|
if (it != docdata.end())
|
||||||
txt = &it->second;
|
txt = &it->second;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -94,7 +94,7 @@ bool MimeHandlerExec::next_document()
|
|||||||
myparams.push_back(m_ipath);
|
myparams.push_back(m_ipath);
|
||||||
|
|
||||||
// Execute command, store the output
|
// Execute command, store the output
|
||||||
string& output = m_metaData[cstr_content];
|
string& output = m_metaData[cstr_dj_keycontent];
|
||||||
output.erase();
|
output.erase();
|
||||||
ExecCmd mexec;
|
ExecCmd mexec;
|
||||||
MEAdv adv(filtermaxseconds);
|
MEAdv adv(filtermaxseconds);
|
||||||
@ -145,16 +145,16 @@ bool MimeHandlerExec::next_document()
|
|||||||
|
|
||||||
void MimeHandlerExec::finaldetails()
|
void MimeHandlerExec::finaldetails()
|
||||||
{
|
{
|
||||||
m_metaData[cstr_origcharset] = m_dfltInputCharset;
|
m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset;
|
||||||
|
|
||||||
// cfgFilterOutputCharset comes from the mimeconf filter definition line
|
// cfgFilterOutputCharset comes from the mimeconf filter definition line
|
||||||
string& charset = m_metaData[cstr_charset];
|
string& charset = m_metaData[cstr_dj_keycharset];
|
||||||
charset = cfgFilterOutputCharset.empty() ? "UTF-8" : cfgFilterOutputCharset;
|
charset = cfgFilterOutputCharset.empty() ? "UTF-8" : cfgFilterOutputCharset;
|
||||||
if (!stringlowercmp("default", charset)) {
|
if (!stringlowercmp("default", charset)) {
|
||||||
charset = m_dfltInputCharset;
|
charset = m_dfltInputCharset;
|
||||||
}
|
}
|
||||||
|
|
||||||
string& mt = m_metaData[cstr_mimetype];
|
string& mt = m_metaData[cstr_dj_keymt];
|
||||||
mt = cfgFilterOutputMtype.empty() ? "text/html" :
|
mt = cfgFilterOutputMtype.empty() ? "text/html" :
|
||||||
cfgFilterOutputMtype;
|
cfgFilterOutputMtype;
|
||||||
|
|
||||||
@ -165,7 +165,7 @@ void MimeHandlerExec::finaldetails()
|
|||||||
|
|
||||||
string md5, xmd5, reason;
|
string md5, xmd5, reason;
|
||||||
if (MD5File(m_fn, md5, &reason)) {
|
if (MD5File(m_fn, md5, &reason)) {
|
||||||
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
|
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||||
} else {
|
} else {
|
||||||
LOGERR(("MimeHandlerExec: cant compute md5 for [%s]: %s\n",
|
LOGERR(("MimeHandlerExec: cant compute md5 for [%s]: %s\n",
|
||||||
m_fn.c_str(), reason.c_str()));
|
m_fn.c_str(), reason.c_str()));
|
||||||
|
|||||||
@ -66,7 +66,7 @@ bool MimeHandlerExecMultiple::startCmd()
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Note: data is not used if this is the "document:" field: it goes
|
// Note: data is not used if this is the "document:" field: it goes
|
||||||
// directly to m_metaData["content"] to avoid an extra copy
|
// directly to m_metaData[cstr_dj_keycontent] to avoid an extra copy
|
||||||
//
|
//
|
||||||
// Messages are made of data elements. Each element is like:
|
// Messages are made of data elements. Each element is like:
|
||||||
// name: len\ndata
|
// name: len\ndata
|
||||||
@ -118,11 +118,11 @@ bool MimeHandlerExecMultiple::readDataElement(string& name, string &data)
|
|||||||
LOGDEB1(("MHExecMultiple: got name [%s] len: %d\n", name.c_str(), len));
|
LOGDEB1(("MHExecMultiple: got name [%s] len: %d\n", name.c_str(), len));
|
||||||
|
|
||||||
// Hack: check for 'Document:' and read directly the document data
|
// Hack: check for 'Document:' and read directly the document data
|
||||||
// to m_metaData["content"] to avoid an extra copy of the bulky
|
// to m_metaData[cstr_dj_keycontent] to avoid an extra copy of the bulky
|
||||||
// piece
|
// piece
|
||||||
string *datap = &data;
|
string *datap = &data;
|
||||||
if (!stringlowercmp("document:", name)) {
|
if (!stringlowercmp("document:", name)) {
|
||||||
datap = &m_metaData[cstr_content];
|
datap = &m_metaData[cstr_dj_keycontent];
|
||||||
} else {
|
} else {
|
||||||
datap = &data;
|
datap = &data;
|
||||||
}
|
}
|
||||||
@ -238,7 +238,7 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
// It used to be that eof could be signalled just by an empty document, but
|
// It used to be that eof could be signalled just by an empty document, but
|
||||||
// this was wrong. Empty documents can be found ie in zip files and should
|
// this was wrong. Empty documents can be found ie in zip files and should
|
||||||
// not be interpreted as eof.
|
// not be interpreted as eof.
|
||||||
if (m_metaData[cstr_content].empty()) {
|
if (m_metaData[cstr_dj_keycontent].empty()) {
|
||||||
LOGDEB0(("MHExecMultiple: got empty document inside [%s]: [%s]\n",
|
LOGDEB0(("MHExecMultiple: got empty document inside [%s]: [%s]\n",
|
||||||
m_fn.c_str(), ipath.c_str()));
|
m_fn.c_str(), ipath.c_str()));
|
||||||
}
|
}
|
||||||
@ -248,14 +248,14 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
// mimetype, or the ipath MUST be a filename-like string which we can use
|
// mimetype, or the ipath MUST be a filename-like string which we can use
|
||||||
// to compute a mime type
|
// to compute a mime type
|
||||||
if (!ipath.empty()) {
|
if (!ipath.empty()) {
|
||||||
m_metaData[cstr_ipath] = ipath;
|
m_metaData[cstr_dj_keyipath] = ipath;
|
||||||
if (mtype.empty()) {
|
if (mtype.empty()) {
|
||||||
LOGDEB0(("MHExecMultiple: no mime type from filter, "
|
LOGDEB0(("MHExecMultiple: no mime type from filter, "
|
||||||
"using ipath for a guess\n"));
|
"using ipath for a guess\n"));
|
||||||
mtype = mimetype(ipath, 0, m_config, false);
|
mtype = mimetype(ipath, 0, m_config, false);
|
||||||
if (mtype.empty()) {
|
if (mtype.empty()) {
|
||||||
// mimetype() won't call idFile when there is no file. Do it
|
// mimetype() won't call idFile when there is no file. Do it
|
||||||
mtype = idFileMem(m_metaData[cstr_content]);
|
mtype = idFileMem(m_metaData[cstr_dj_keycontent]);
|
||||||
if (mtype.empty()) {
|
if (mtype.empty()) {
|
||||||
// Note this happens for example for directory zip members
|
// Note this happens for example for directory zip members
|
||||||
// We could recognize them by the end /, but wouldn't know
|
// We could recognize them by the end /, but wouldn't know
|
||||||
@ -265,16 +265,16 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m_metaData[cstr_mimetype] = mtype;
|
m_metaData[cstr_dj_keymt] = mtype;
|
||||||
string md5, xmd5;
|
string md5, xmd5;
|
||||||
MD5String(m_metaData[cstr_content], md5);
|
MD5String(m_metaData[cstr_dj_keycontent], md5);
|
||||||
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
|
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||||
} else {
|
} else {
|
||||||
m_metaData[cstr_mimetype] = mtype.empty() ? "text/html" : mtype;
|
m_metaData[cstr_dj_keymt] = mtype.empty() ? "text/html" : mtype;
|
||||||
m_metaData.erase(cstr_ipath);
|
m_metaData.erase(cstr_dj_keyipath);
|
||||||
string md5, xmd5, reason;
|
string md5, xmd5, reason;
|
||||||
if (MD5File(m_fn, md5, &reason)) {
|
if (MD5File(m_fn, md5, &reason)) {
|
||||||
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
|
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||||
} else {
|
} else {
|
||||||
LOGERR(("MimeHandlerExecM: cant compute md5 for [%s]: %s\n",
|
LOGERR(("MimeHandlerExecM: cant compute md5 for [%s]: %s\n",
|
||||||
m_fn.c_str(), reason.c_str()));
|
m_fn.c_str(), reason.c_str()));
|
||||||
@ -290,10 +290,10 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
charset = m_dfltInputCharset;
|
charset = m_dfltInputCharset;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m_metaData[cstr_origcharset] = charset;
|
m_metaData[cstr_dj_keyorigcharset] = charset;
|
||||||
m_metaData[cstr_charset] = charset;
|
m_metaData[cstr_dj_keycharset] = charset;
|
||||||
|
|
||||||
if (!m_metaData[cstr_mimetype].compare(cstr_textplain)) {
|
if (!m_metaData[cstr_dj_keymt].compare(cstr_textplain)) {
|
||||||
(void)txtdcode("mh_execm");
|
(void)txtdcode("mh_execm");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -302,7 +302,7 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
m_havedoc = false;
|
m_havedoc = false;
|
||||||
|
|
||||||
LOGDEB0(("MHExecMultiple: returning %d bytes of content,"
|
LOGDEB0(("MHExecMultiple: returning %d bytes of content,"
|
||||||
" mtype [%s] charset [%s]\n", m_metaData[cstr_content].size(),
|
" mtype [%s] charset [%s]\n", m_metaData[cstr_dj_keycontent].size(),
|
||||||
m_metaData[cstr_mimetype].c_str(), m_metaData[cstr_charset].c_str()));
|
m_metaData[cstr_dj_keymt].c_str(), m_metaData[cstr_dj_keycharset].c_str()));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -56,7 +56,7 @@ bool MimeHandlerHtml::set_document_string(const string& htext)
|
|||||||
// We want to compute the md5 now because we may modify m_html later
|
// We want to compute the md5 now because we may modify m_html later
|
||||||
string md5, xmd5;
|
string md5, xmd5;
|
||||||
MD5String(htext, md5);
|
MD5String(htext, md5);
|
||||||
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
|
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -74,7 +74,7 @@ bool MimeHandlerHtml::next_document()
|
|||||||
LOGDEB(("MHHtml::next_doc.: default supposed input charset: [%s]\n",
|
LOGDEB(("MHHtml::next_doc.: default supposed input charset: [%s]\n",
|
||||||
charset.c_str()));
|
charset.c_str()));
|
||||||
// Override default input charset if someone took care to set one:
|
// Override default input charset if someone took care to set one:
|
||||||
map<string,string>::const_iterator it = m_metaData.find(cstr_charset);
|
map<string,string>::const_iterator it = m_metaData.find(cstr_dj_keycharset);
|
||||||
if (it != m_metaData.end() && !it->second.empty()) {
|
if (it != m_metaData.end() && !it->second.empty()) {
|
||||||
charset = it->second;
|
charset = it->second;
|
||||||
LOGDEB(("MHHtml: next_doc.: input charset from ext. metadata: [%s]\n",
|
LOGDEB(("MHHtml: next_doc.: input charset from ext. metadata: [%s]\n",
|
||||||
@ -163,14 +163,14 @@ bool MimeHandlerHtml::next_document()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
m_metaData[cstr_origcharset] = result.get_charset();
|
m_metaData[cstr_dj_keyorigcharset] = result.get_charset();
|
||||||
m_metaData[cstr_content] = result.dump;
|
m_metaData[cstr_dj_keycontent] = result.dump;
|
||||||
m_metaData[cstr_charset] = "utf-8";
|
m_metaData[cstr_dj_keycharset] = "utf-8";
|
||||||
// Avoid setting empty values which would crush ones possibly inherited
|
// Avoid setting empty values which would crush ones possibly inherited
|
||||||
// from parent (if we're an attachment)
|
// from parent (if we're an attachment)
|
||||||
if (!result.dmtime.empty())
|
if (!result.dmtime.empty())
|
||||||
m_metaData["modificationdate"] = result.dmtime;
|
m_metaData[cstr_dj_keymd] = result.dmtime;
|
||||||
m_metaData[cstr_mimetype] = cstr_textplain;
|
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
||||||
|
|
||||||
for (map<string,string>::const_iterator it = result.meta.begin();
|
for (map<string,string>::const_iterator it = result.meta.begin();
|
||||||
it != result.meta.end(); it++) {
|
it != result.meta.end(); it++) {
|
||||||
|
|||||||
@ -44,11 +44,7 @@
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
static const int maxdepth = 20;
|
static const int maxdepth = 20;
|
||||||
static const string cstr_recipient = "recipient";
|
static const string cstr_mail_charset("charset");
|
||||||
static const string cstr_modificationdate = "modificationdate";
|
|
||||||
static const string cstr_title = "title";
|
|
||||||
static const string cstr_msgid = "msgid";
|
|
||||||
static const string cstr_abstract = "abstract";
|
|
||||||
|
|
||||||
MimeHandlerMail::MimeHandlerMail(RclConfig *cnf, const string &mt)
|
MimeHandlerMail::MimeHandlerMail(RclConfig *cnf, const string &mt)
|
||||||
: RecollFilter(cnf, mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
|
: RecollFilter(cnf, mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
|
||||||
@ -100,7 +96,7 @@ bool MimeHandlerMail::set_document_file(const string &fn)
|
|||||||
// the md5 computation to the mime analysis, but ...
|
// the md5 computation to the mime analysis, but ...
|
||||||
string md5, xmd5, reason;
|
string md5, xmd5, reason;
|
||||||
if (MD5File(fn, md5, &reason)) {
|
if (MD5File(fn, md5, &reason)) {
|
||||||
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
|
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||||
} else {
|
} else {
|
||||||
LOGERR(("MimeHandlerMail: cant compute md5 for [%s]: %s\n", fn.c_str(),
|
LOGERR(("MimeHandlerMail: cant compute md5 for [%s]: %s\n", fn.c_str(),
|
||||||
reason.c_str()));
|
reason.c_str()));
|
||||||
@ -132,7 +128,7 @@ bool MimeHandlerMail::set_document_string(const string &msgtxt)
|
|||||||
|
|
||||||
string md5, xmd5;
|
string md5, xmd5;
|
||||||
MD5String(msgtxt, md5);
|
MD5String(msgtxt, md5);
|
||||||
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
|
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||||
|
|
||||||
m_stream = new stringstream(msgtxt);
|
m_stream = new stringstream(msgtxt);
|
||||||
delete m_bincdoc;
|
delete m_bincdoc;
|
||||||
@ -172,16 +168,16 @@ bool MimeHandlerMail::next_document()
|
|||||||
bool res = false;
|
bool res = false;
|
||||||
|
|
||||||
if (m_idx == -1) {
|
if (m_idx == -1) {
|
||||||
m_metaData[cstr_mimetype] = cstr_textplain;
|
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
||||||
res = processMsg(m_bincdoc, 0);
|
res = processMsg(m_bincdoc, 0);
|
||||||
LOGDEB1(("MimeHandlerMail::next_document: mimetype %s\n",
|
LOGDEB1(("MimeHandlerMail::next_document: mimetype %s\n",
|
||||||
m_metaData[cstr_mimetype].c_str()));
|
m_metaData[cstr_dj_keymt].c_str()));
|
||||||
const string& txt = m_metaData[cstr_content];
|
const string& txt = m_metaData[cstr_dj_keycontent];
|
||||||
if (m_startoftext < txt.size())
|
if (m_startoftext < txt.size())
|
||||||
m_metaData[cstr_abstract] =
|
m_metaData[cstr_dj_keyabstract] =
|
||||||
truncate_to_word(txt.substr(m_startoftext), 250);
|
truncate_to_word(txt.substr(m_startoftext), 250);
|
||||||
} else {
|
} else {
|
||||||
m_metaData[cstr_abstract].clear();
|
m_metaData[cstr_dj_keyabstract].clear();
|
||||||
res = processAttach();
|
res = processAttach();
|
||||||
}
|
}
|
||||||
m_idx++;
|
m_idx++;
|
||||||
@ -235,18 +231,18 @@ bool MimeHandlerMail::processAttach()
|
|||||||
}
|
}
|
||||||
MHMailAttach *att = m_attachments[m_idx];
|
MHMailAttach *att = m_attachments[m_idx];
|
||||||
|
|
||||||
m_metaData[cstr_mimetype] = att->m_contentType;
|
m_metaData[cstr_dj_keymt] = att->m_contentType;
|
||||||
m_metaData[cstr_charset] = att->m_charset;
|
m_metaData[cstr_dj_keycharset] = att->m_charset;
|
||||||
m_metaData["filename"] = att->m_filename;
|
m_metaData[cstr_dj_keyfn] = att->m_filename;
|
||||||
// Change the title to something helpul
|
// Change the title to something helpul
|
||||||
m_metaData[cstr_title] = att->m_filename + " (" + m_subject + ")";
|
m_metaData[cstr_dj_keytitle] = att->m_filename + " (" + m_subject + ")";
|
||||||
LOGDEB1((" processAttach:ct [%s] cs [%s] fn [%s]\n",
|
LOGDEB1((" processAttach:ct [%s] cs [%s] fn [%s]\n",
|
||||||
att->m_contentType.c_str(),
|
att->m_contentType.c_str(),
|
||||||
att->m_charset.c_str(),
|
att->m_charset.c_str(),
|
||||||
att->m_filename.c_str()));
|
att->m_filename.c_str()));
|
||||||
|
|
||||||
m_metaData[cstr_content] = string();
|
m_metaData[cstr_dj_keycontent] = string();
|
||||||
string& body = m_metaData[cstr_content];
|
string& body = m_metaData[cstr_dj_keycontent];
|
||||||
att->m_part->getBody(body, 0, att->m_part->bodylength);
|
att->m_part->getBody(body, 0, att->m_part->bodylength);
|
||||||
string decoded;
|
string decoded;
|
||||||
const string *bdp;
|
const string *bdp;
|
||||||
@ -259,11 +255,11 @@ bool MimeHandlerMail::processAttach()
|
|||||||
// Special case for text/plain content. Internfile should deal
|
// Special case for text/plain content. Internfile should deal
|
||||||
// with this but it expects text/plain to be utf-8 already, so we
|
// with this but it expects text/plain to be utf-8 already, so we
|
||||||
// handle the transcoding if needed
|
// handle the transcoding if needed
|
||||||
if (m_metaData[cstr_mimetype] == cstr_textplain) {
|
if (m_metaData[cstr_dj_keymt] == cstr_textplain) {
|
||||||
string utf8;
|
string utf8;
|
||||||
if (!transcode(body, utf8, m_metaData[cstr_charset], "UTF-8")) {
|
if (!transcode(body, utf8, m_metaData[cstr_dj_keycharset], "UTF-8")) {
|
||||||
LOGERR((" processAttach: transcode to utf-8 failed "
|
LOGERR((" processAttach: transcode to utf-8 failed "
|
||||||
"for charset [%s]\n", m_metaData[cstr_charset].c_str()));
|
"for charset [%s]\n", m_metaData[cstr_dj_keycharset].c_str()));
|
||||||
// can't transcode at all -> data is garbage just erase it
|
// can't transcode at all -> data is garbage just erase it
|
||||||
body.clear();
|
body.clear();
|
||||||
} else {
|
} else {
|
||||||
@ -273,18 +269,18 @@ bool MimeHandlerMail::processAttach()
|
|||||||
|
|
||||||
// Special case for application/octet-stream: try to better
|
// Special case for application/octet-stream: try to better
|
||||||
// identify content, using file name if set
|
// identify content, using file name if set
|
||||||
if (m_metaData[cstr_mimetype] == "application/octet-stream" &&
|
if (m_metaData[cstr_dj_keymt] == "application/octet-stream" &&
|
||||||
!m_metaData["filename"].empty()) {
|
!m_metaData[cstr_dj_keyfn].empty()) {
|
||||||
string mt = mimetype(m_metaData["filename"], 0,
|
string mt = mimetype(m_metaData[cstr_dj_keyfn], 0,
|
||||||
m_config, false);
|
m_config, false);
|
||||||
if (!mt.empty())
|
if (!mt.empty())
|
||||||
m_metaData[cstr_mimetype] = mt;
|
m_metaData[cstr_dj_keymt] = mt;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ipath
|
// Ipath
|
||||||
char nbuf[20];
|
char nbuf[20];
|
||||||
sprintf(nbuf, "%d", m_idx);
|
sprintf(nbuf, "%d", m_idx);
|
||||||
m_metaData[cstr_ipath] = nbuf;
|
m_metaData[cstr_dj_keyipath] = nbuf;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -308,7 +304,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Handle some headers.
|
// Handle some headers.
|
||||||
string& text = m_metaData[cstr_content];
|
string& text = m_metaData[cstr_dj_keycontent];
|
||||||
Binc::HeaderItem hi;
|
Binc::HeaderItem hi;
|
||||||
string transcoded;
|
string transcoded;
|
||||||
if (doc->h.getFirstHeader("From", hi)) {
|
if (doc->h.getFirstHeader("From", hi)) {
|
||||||
@ -317,7 +313,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
|||||||
text += string("From: ");
|
text += string("From: ");
|
||||||
text += transcoded + cstr_newline;
|
text += transcoded + cstr_newline;
|
||||||
if (depth == 1) {
|
if (depth == 1) {
|
||||||
m_metaData[cstr_author] = transcoded;
|
m_metaData[cstr_dj_keyauthor] = transcoded;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (doc->h.getFirstHeader("To", hi)) {
|
if (doc->h.getFirstHeader("To", hi)) {
|
||||||
@ -326,7 +322,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
|||||||
text += string("To: ");
|
text += string("To: ");
|
||||||
text += transcoded + cstr_newline;
|
text += transcoded + cstr_newline;
|
||||||
if (depth == 1) {
|
if (depth == 1) {
|
||||||
m_metaData[cstr_recipient] = transcoded;
|
m_metaData[cstr_dj_keyrecipient] = transcoded;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (doc->h.getFirstHeader("Cc", hi)) {
|
if (doc->h.getFirstHeader("Cc", hi)) {
|
||||||
@ -335,13 +331,13 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
|||||||
text += string("Cc: ");
|
text += string("Cc: ");
|
||||||
text += transcoded + cstr_newline;
|
text += transcoded + cstr_newline;
|
||||||
if (depth == 1) {
|
if (depth == 1) {
|
||||||
m_metaData[cstr_recipient] += " " + transcoded;
|
m_metaData[cstr_dj_keyrecipient] += " " + transcoded;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (doc->h.getFirstHeader("Message-Id", hi)) {
|
if (doc->h.getFirstHeader("Message-Id", hi)) {
|
||||||
if (depth == 1) {
|
if (depth == 1) {
|
||||||
m_metaData[cstr_msgid] = hi.getValue();
|
m_metaData[cstr_dj_keymsgid] = hi.getValue();
|
||||||
trimstring(m_metaData[cstr_msgid], "<>");
|
trimstring(m_metaData[cstr_dj_keymsgid], "<>");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (doc->h.getFirstHeader("Date", hi)) {
|
if (doc->h.getFirstHeader("Date", hi)) {
|
||||||
@ -351,7 +347,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
|||||||
if (t != (time_t)-1) {
|
if (t != (time_t)-1) {
|
||||||
char ascuxtime[100];
|
char ascuxtime[100];
|
||||||
sprintf(ascuxtime, "%ld", (long)t);
|
sprintf(ascuxtime, "%ld", (long)t);
|
||||||
m_metaData[cstr_modificationdate] = ascuxtime;
|
m_metaData[cstr_dj_keymd] = ascuxtime;
|
||||||
} else {
|
} else {
|
||||||
// Leave mtime field alone, ftime will be used instead.
|
// Leave mtime field alone, ftime will be used instead.
|
||||||
LOGDEB(("rfc2822Date...: failed: [%s]\n", transcoded.c_str()));
|
LOGDEB(("rfc2822Date...: failed: [%s]\n", transcoded.c_str()));
|
||||||
@ -364,7 +360,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
|||||||
if (doc->h.getFirstHeader("Subject", hi)) {
|
if (doc->h.getFirstHeader("Subject", hi)) {
|
||||||
rfc2047_decode(hi.getValue(), transcoded);
|
rfc2047_decode(hi.getValue(), transcoded);
|
||||||
if (depth == 1) {
|
if (depth == 1) {
|
||||||
m_metaData[cstr_title] = transcoded;
|
m_metaData[cstr_dj_keytitle] = transcoded;
|
||||||
m_subject = transcoded;
|
m_subject = transcoded;
|
||||||
}
|
}
|
||||||
if (preview())
|
if (preview())
|
||||||
@ -393,7 +389,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
|||||||
walkmime(doc, depth);
|
walkmime(doc, depth);
|
||||||
|
|
||||||
LOGDEB2(("MimeHandlerMail::processMsg:text:[%s]\n",
|
LOGDEB2(("MimeHandlerMail::processMsg:text:[%s]\n",
|
||||||
m_metaData[cstr_content].c_str()));
|
m_metaData[cstr_dj_keycontent].c_str()));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -415,7 +411,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
string& out = m_metaData[cstr_content];
|
string& out = m_metaData[cstr_dj_keycontent];
|
||||||
|
|
||||||
if (doc->isMultipart()) {
|
if (doc->isMultipart()) {
|
||||||
LOGDEB2(("walkmime: ismultipart %d subtype '%s'\n",
|
LOGDEB2(("walkmime: ismultipart %d subtype '%s'\n",
|
||||||
@ -527,7 +523,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
|||||||
// to iso-8859 only if the transfer-encoding is 8 bit, or test for
|
// to iso-8859 only if the transfer-encoding is 8 bit, or test for
|
||||||
// actual 8 bit chars, but what the heck, le'ts use 8859-1 as default
|
// actual 8 bit chars, but what the heck, le'ts use 8859-1 as default
|
||||||
string charset;
|
string charset;
|
||||||
it = content_type.params.find(string(cstr_charset));
|
it = content_type.params.find(cstr_mail_charset);
|
||||||
if (it != content_type.params.end())
|
if (it != content_type.params.end())
|
||||||
charset = it->second;
|
charset = it->second;
|
||||||
if (charset.empty() ||
|
if (charset.empty() ||
|
||||||
@ -609,7 +605,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
|||||||
mh.set_document_string(body);
|
mh.set_document_string(body);
|
||||||
mh.next_document();
|
mh.next_document();
|
||||||
map<string, string>::const_iterator it =
|
map<string, string>::const_iterator it =
|
||||||
mh.get_meta_data().find(cstr_content);
|
mh.get_meta_data().find(cstr_dj_keycontent);
|
||||||
if (it != mh.get_meta_data().end())
|
if (it != mh.get_meta_data().end())
|
||||||
out += it->second;
|
out += it->second;
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@ -435,7 +435,7 @@ bool MimeHandlerMbox::next_document()
|
|||||||
off_t message_end = 0;
|
off_t message_end = 0;
|
||||||
bool iseof = false;
|
bool iseof = false;
|
||||||
bool hademptyline = true;
|
bool hademptyline = true;
|
||||||
string& msgtxt = m_metaData[cstr_content];
|
string& msgtxt = m_metaData[cstr_dj_keycontent];
|
||||||
msgtxt.erase();
|
msgtxt.erase();
|
||||||
line_type line;
|
line_type line;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
@ -499,8 +499,8 @@ bool MimeHandlerMbox::next_document()
|
|||||||
// m_msgnum was incremented when hitting the next From_ or eof, so the data
|
// m_msgnum was incremented when hitting the next From_ or eof, so the data
|
||||||
// is for m_msgnum - 1
|
// is for m_msgnum - 1
|
||||||
sprintf(buf, "%d", m_msgnum - 1);
|
sprintf(buf, "%d", m_msgnum - 1);
|
||||||
m_metaData[cstr_ipath] = buf;
|
m_metaData[cstr_dj_keyipath] = buf;
|
||||||
m_metaData[cstr_mimetype] = "message/rfc822";
|
m_metaData[cstr_dj_keymt] = "message/rfc822";
|
||||||
if (iseof) {
|
if (iseof) {
|
||||||
LOGDEB2(("MimeHandlerMbox::next: eof hit\n"));
|
LOGDEB2(("MimeHandlerMbox::next: eof hit\n"));
|
||||||
m_havedoc = false;
|
m_havedoc = false;
|
||||||
@ -591,7 +591,7 @@ int main(int argc, char **argv)
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
map<string, string>::const_iterator it =
|
map<string, string>::const_iterator it =
|
||||||
mh.get_meta_data().find(cstr_content);
|
mh.get_meta_data().find(cstr_dj_keycontent);
|
||||||
int size;
|
int size;
|
||||||
if (it == mh.get_meta_data().end()) {
|
if (it == mh.get_meta_data().end()) {
|
||||||
size = -1;
|
size = -1;
|
||||||
@ -611,7 +611,7 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
docnt++;
|
docnt++;
|
||||||
map<string, string>::const_iterator it =
|
map<string, string>::const_iterator it =
|
||||||
mh.get_meta_data().find(cstr_content);
|
mh.get_meta_data().find(cstr_dj_keycontent);
|
||||||
int size;
|
int size;
|
||||||
if (it == mh.get_meta_data().end()) {
|
if (it == mh.get_meta_data().end()) {
|
||||||
size = -1;
|
size = -1;
|
||||||
|
|||||||
@ -81,7 +81,7 @@ bool MimeHandlerText::set_document_file(const string &fn)
|
|||||||
|
|
||||||
string md5, xmd5;
|
string md5, xmd5;
|
||||||
MD5String(m_text, md5);
|
MD5String(m_text, md5);
|
||||||
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
|
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||||
m_havedoc = true;
|
m_havedoc = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -91,7 +91,7 @@ bool MimeHandlerText::set_document_string(const string& otext)
|
|||||||
m_text = otext;
|
m_text = otext;
|
||||||
string md5, xmd5;
|
string md5, xmd5;
|
||||||
MD5String(m_text, md5);
|
MD5String(m_text, md5);
|
||||||
m_metaData[cstr_md5] = MD5HexPrint(md5, xmd5);
|
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||||
m_havedoc = true;
|
m_havedoc = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -118,11 +118,11 @@ bool MimeHandlerText::next_document()
|
|||||||
|
|
||||||
// We transcode even if defcharset is supposedly already utf-8:
|
// We transcode even if defcharset is supposedly already utf-8:
|
||||||
// this validates the encoding.
|
// this validates the encoding.
|
||||||
m_metaData[cstr_origcharset] = m_dfltInputCharset;
|
m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset;
|
||||||
m_metaData[cstr_mimetype] = cstr_textplain;
|
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
||||||
|
|
||||||
size_t srclen = m_text.length();
|
size_t srclen = m_text.length();
|
||||||
m_metaData[cstr_content].swap(m_text);
|
m_metaData[cstr_dj_keycontent].swap(m_text);
|
||||||
|
|
||||||
// txtdcode() truncates the text if transcoding fails
|
// txtdcode() truncates the text if transcoding fails
|
||||||
(void)txtdcode("mh_text");
|
(void)txtdcode("mh_text");
|
||||||
@ -144,7 +144,7 @@ bool MimeHandlerText::next_document()
|
|||||||
char buf[30];
|
char buf[30];
|
||||||
sprintf(buf, "%lld", (long long)(m_offs - srclen));
|
sprintf(buf, "%lld", (long long)(m_offs - srclen));
|
||||||
if (m_offs - srclen != 0)
|
if (m_offs - srclen != 0)
|
||||||
m_metaData[cstr_ipath] = buf;
|
m_metaData[cstr_dj_keyipath] = buf;
|
||||||
readnext();
|
readnext();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -42,8 +42,8 @@ class MimeHandlerUnknown : public RecollFilter {
|
|||||||
if (m_havedoc == false)
|
if (m_havedoc == false)
|
||||||
return false;
|
return false;
|
||||||
m_havedoc = false;
|
m_havedoc = false;
|
||||||
m_metaData[cstr_content] = cstr_null;
|
m_metaData[cstr_dj_keycontent] = cstr_null;
|
||||||
m_metaData[cstr_mimetype] = cstr_textplain;
|
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
virtual bool is_unknown() {return true;}
|
virtual bool is_unknown() {return true;}
|
||||||
|
|||||||
@ -141,6 +141,7 @@ static Dijon::Filter *mhFactory(RclConfig *config, const string &mime)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const string cstr_mh_charset("charset");
|
||||||
/**
|
/**
|
||||||
* Create a filter that executes an external program or script
|
* Create a filter that executes an external program or script
|
||||||
* A filter def can look like:
|
* A filter def can look like:
|
||||||
@ -179,9 +180,9 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
|||||||
// Handle additional attributes. We substitute the semi-colons
|
// Handle additional attributes. We substitute the semi-colons
|
||||||
// with newlines and use a ConfSimple
|
// with newlines and use a ConfSimple
|
||||||
string value;
|
string value;
|
||||||
if (attrs.get(cstr_charset, value))
|
if (attrs.get(cstr_mh_charset, value))
|
||||||
h->cfgFilterOutputCharset = stringtolower((const string&)value);
|
h->cfgFilterOutputCharset = stringtolower((const string&)value);
|
||||||
if (attrs.get(cstr_mimetype, value))
|
if (attrs.get(cstr_dj_keymt, value))
|
||||||
h->cfgFilterOutputMtype = stringtolower((const string&)value);
|
h->cfgFilterOutputMtype = stringtolower((const string&)value);
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
|
|||||||
@ -36,6 +36,9 @@
|
|||||||
#include "debuglog.h"
|
#include "debuglog.h"
|
||||||
#include "transcode.h"
|
#include "transcode.h"
|
||||||
|
|
||||||
|
static const string cstr_html_charset("charset");
|
||||||
|
static const string cstr_html_content("content");
|
||||||
|
|
||||||
inline static bool
|
inline static bool
|
||||||
p_notdigit(char c)
|
p_notdigit(char c)
|
||||||
{
|
{
|
||||||
@ -353,7 +356,7 @@ MyHtmlParser::opening_tag(const string &tag)
|
|||||||
case 'm':
|
case 'm':
|
||||||
if (tag == "meta") {
|
if (tag == "meta") {
|
||||||
string content;
|
string content;
|
||||||
if (get_parameter(cstr_content, content)) {
|
if (get_parameter(cstr_html_content, content)) {
|
||||||
string name;
|
string name;
|
||||||
if (get_parameter("name", name)) {
|
if (get_parameter("name", name)) {
|
||||||
lowercase_term(name);
|
lowercase_term(name);
|
||||||
@ -387,7 +390,7 @@ MyHtmlParser::opening_tag(const string &tag)
|
|||||||
MimeHeaderValue p;
|
MimeHeaderValue p;
|
||||||
parseMimeHeaderValue(content, p);
|
parseMimeHeaderValue(content, p);
|
||||||
map<string, string>::const_iterator k;
|
map<string, string>::const_iterator k;
|
||||||
if ((k = p.params.find(cstr_charset)) !=
|
if ((k = p.params.find(cstr_html_charset)) !=
|
||||||
p.params.end()) {
|
p.params.end()) {
|
||||||
charset = k->second;
|
charset = k->second;
|
||||||
if (!samecharset(charset, fromcharset)) {
|
if (!samecharset(charset, fromcharset)) {
|
||||||
@ -402,7 +405,7 @@ MyHtmlParser::opening_tag(const string &tag)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
string newcharset;
|
string newcharset;
|
||||||
if (get_parameter(cstr_charset, newcharset)) {
|
if (get_parameter(cstr_html_charset, newcharset)) {
|
||||||
// HTML5 added: <meta charset="...">
|
// HTML5 added: <meta charset="...">
|
||||||
lowercase_term(newcharset);
|
lowercase_term(newcharset);
|
||||||
charset = newcharset;
|
charset = newcharset;
|
||||||
|
|||||||
@ -22,14 +22,14 @@
|
|||||||
|
|
||||||
bool RecollFilter::txtdcode(const string& who)
|
bool RecollFilter::txtdcode(const string& who)
|
||||||
{
|
{
|
||||||
if (m_metaData[cstr_mimetype].compare(cstr_textplain)) {
|
if (m_metaData[cstr_dj_keymt].compare(cstr_textplain)) {
|
||||||
LOGERR(("%s::txtdcode: called on non txt/plain: %s\n", who.c_str(),
|
LOGERR(("%s::txtdcode: called on non txt/plain: %s\n", who.c_str(),
|
||||||
m_metaData[cstr_mimetype].c_str()));
|
m_metaData[cstr_dj_keymt].c_str()));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
string& ocs = m_metaData[cstr_origcharset];
|
string& ocs = m_metaData[cstr_dj_keyorigcharset];
|
||||||
string& itext = m_metaData[cstr_content];
|
string& itext = m_metaData[cstr_dj_keycontent];
|
||||||
LOGDEB0(("%s::txtdcode: %d bytes from [%s] to UTF-8\n",
|
LOGDEB0(("%s::txtdcode: %d bytes from [%s] to UTF-8\n",
|
||||||
who.c_str(), itext.size(), ocs.c_str()));
|
who.c_str(), itext.size(), ocs.c_str()));
|
||||||
int ecnt;
|
int ecnt;
|
||||||
@ -44,6 +44,6 @@ bool RecollFilter::txtdcode(const string& who)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
itext.swap(otext);
|
itext.swap(otext);
|
||||||
m_metaData[cstr_charset] = "UTF-8";
|
m_metaData[cstr_dj_keycharset] = "UTF-8";
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user