From f83490a5eeb858bb9c39d9677d017a9d76012a91 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Fri, 20 Oct 2017 17:49:30 +0200 Subject: [PATCH] When indexing arbitrary email headers: sanitize the data to utf-8 to avoid later splitter errors --- src/internfile/mh_mail.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/internfile/mh_mail.cpp b/src/internfile/mh_mail.cpp index ee17b51f..192c490d 100644 --- a/src/internfile/mh_mail.cpp +++ b/src/internfile/mh_mail.cpp @@ -385,13 +385,12 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth) // Check for the presence of configured additional headers and possibly // add them to the metadata (with appropriate field name). if (!m_addProcdHdrs.empty()) { - for (map::const_iterator it = m_addProcdHdrs.begin(); - it != m_addProcdHdrs.end(); it++) { - if (!it->second.empty()) { - string hval; - if (doc->h.getFirstHeader(it->first, hi)) { - m_metaData[it->second] = hi.getValue(); - } + for (auto& it : m_addProcdHdrs) { + if (!it.second.empty() && doc->h.getFirstHeader(it.first, hi)) { + // Email headers are supposedly ASCII, but we force + // transcode to UTF-8 anyway so that at least partial + // indexing can be done if there are 8bit chars in there. + transcode(hi.getValue(), m_metaData[it.second], "CP1252", "UTF-8"); } } }