When indexing arbitrary email headers: sanitize the data to utf-8 to avoid later splitter errors

This commit is contained in:
Jean-Francois Dockes 2017-10-20 17:49:30 +02:00
parent 381f369878
commit f83490a5ee

View File

@ -385,13 +385,12 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
// Check for the presence of configured additional headers and possibly
// add them to the metadata (with appropriate field name).
if (!m_addProcdHdrs.empty()) {
for (map<string, string>::const_iterator it = m_addProcdHdrs.begin();
it != m_addProcdHdrs.end(); it++) {
if (!it->second.empty()) {
string hval;
if (doc->h.getFirstHeader(it->first, hi)) {
m_metaData[it->second] = hi.getValue();
}
for (auto& it : m_addProcdHdrs) {
if (!it.second.empty() && doc->h.getFirstHeader(it.first, hi)) {
// Email headers are supposedly ASCII, but we force
// transcode to UTF-8 anyway so that at least partial
// indexing can be done if there are 8bit chars in there.
transcode(hi.getValue(), m_metaData[it.second], "CP1252", "UTF-8");
}
}
}