When indexing arbitrary email headers: sanitize the data to utf-8 to avoid later splitter errors

This commit is contained in:
Jean-Francois Dockes 2017-10-20 17:49:30 +02:00
parent 381f369878
commit f83490a5ee

View File

@ -385,13 +385,12 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
// Check for the presence of configured additional headers and possibly // Check for the presence of configured additional headers and possibly
// add them to the metadata (with appropriate field name). // add them to the metadata (with appropriate field name).
if (!m_addProcdHdrs.empty()) { if (!m_addProcdHdrs.empty()) {
for (map<string, string>::const_iterator it = m_addProcdHdrs.begin(); for (auto& it : m_addProcdHdrs) {
it != m_addProcdHdrs.end(); it++) { if (!it.second.empty() && doc->h.getFirstHeader(it.first, hi)) {
if (!it->second.empty()) { // Email headers are supposedly ASCII, but we force
string hval; // transcode to UTF-8 anyway so that at least partial
if (doc->h.getFirstHeader(it->first, hi)) { // indexing can be done if there are 8bit chars in there.
m_metaData[it->second] = hi.getValue(); transcode(hi.getValue(), m_metaData[it.second], "CP1252", "UTF-8");
}
} }
} }
} }