diff --git a/src/internfile/mh_mail.cpp b/src/internfile/mh_mail.cpp index d766d0c4..60b9c089 100644 --- a/src/internfile/mh_mail.cpp +++ b/src/internfile/mh_mail.cpp @@ -70,16 +70,16 @@ void MimeHandlerMail::clear_impl() { delete m_bincdoc; m_bincdoc = 0; if (m_fd >= 0) { - close(m_fd); - m_fd = -1; + close(m_fd); + m_fd = -1; } delete m_stream; m_stream = 0; m_idx = -1; m_startoftext = 0; m_subject.erase(); for (vector::iterator it = m_attachments.begin(); - it != m_attachments.end(); it++) { - delete *it; + it != m_attachments.end(); it++) { + delete *it; } m_attachments.clear(); } @@ -88,25 +88,25 @@ bool MimeHandlerMail::set_document_file_impl(const string& mt, const string &fn) { LOGDEB("MimeHandlerMail::set_document_file(" << fn << ")\n"); if (m_fd >= 0) { - close(m_fd); - m_fd = -1; + close(m_fd); + m_fd = -1; } if (!m_forPreview) { - // Yes, we read the file twice. It would be possible in theory - // to add the md5 computation to the mime analysis, but ... - string md5, xmd5, reason; - if (MD5File(fn, md5, &reason)) { - m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5); - } else { - LOGERR("MimeHandlerMail: md5 [" << fn << "]: " << reason << "\n"); - } + // Yes, we read the file twice. It would be possible in theory + // to add the md5 computation to the mime analysis, but ... + string md5, xmd5, reason; + if (MD5File(fn, md5, &reason)) { + m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5); + } else { + LOGERR("MimeHandlerMail: md5 [" << fn << "]: " << reason << "\n"); + } } m_fd = open(fn.c_str(), 0); if (m_fd < 0) { - LOGERR("MimeHandlerMail::set_document_file: open(" << fn << + LOGERR("MimeHandlerMail::set_document_file: open(" << fn << ") errno " << errno << "\n"); - return false; + return false; } #if defined O_NOATIME && O_NOATIME != 0 if (fcntl(m_fd, F_SETFL, O_NOATIME) < 0) { @@ -117,8 +117,8 @@ bool MimeHandlerMail::set_document_file_impl(const string& mt, const string &fn) m_bincdoc = new Binc::MimeDocument; m_bincdoc->parseFull(m_fd); if (!m_bincdoc->isHeaderParsed() && !m_bincdoc->isAllParsed()) { - LOGERR("MimeHandlerMail::mkDoc: mime parse error for " << fn << "\n"); - return false; + LOGERR("MimeHandlerMail::mkDoc: mime parse error for " << fn << "\n"); + return false; } m_havedoc = true; return true; @@ -132,26 +132,26 @@ bool MimeHandlerMail::set_document_string_impl(const string& mt, delete m_stream; if (!m_forPreview) { - string md5, xmd5; - MD5String(msgtxt, md5); - m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5); + string md5, xmd5; + MD5String(msgtxt, md5); + m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5); } if ((m_stream = new stringstream(msgtxt)) == 0 || !m_stream->good()) { - LOGERR("MimeHandlerMail::set_document_string: stream create error." + LOGERR("MimeHandlerMail::set_document_string: stream create error." "msgtxt.size() " << msgtxt.size() << "\n"); - return false; + return false; } delete m_bincdoc; if ((m_bincdoc = new Binc::MimeDocument) == 0) { - LOGERR("MimeHandlerMail::set_doc._string: new Binc:Document failed. " + LOGERR("MimeHandlerMail::set_doc._string: new Binc:Document failed. " "Out of memory?"); - return false; + return false; } m_bincdoc->parseFull(*m_stream); if (!m_bincdoc->isHeaderParsed() && !m_bincdoc->isAllParsed()) { - LOGERR("MimeHandlerMail::set_document_string: mime parse error\n"); - return false; + LOGERR("MimeHandlerMail::set_document_string: mime parse error\n"); + return false; } m_havedoc = true; return true; @@ -161,14 +161,14 @@ bool MimeHandlerMail::skip_to_document(const string& ipath) { LOGDEB("MimeHandlerMail::skip_to_document(" << ipath << ")\n"); if (m_idx == -1) { - // No decoding done yet. If ipath is null need do nothing - if (ipath.empty() || ipath == "-1") - return true; - // ipath points to attachment: need to decode message - if (!next_document()) { - LOGERR("MimeHandlerMail::skip_to_doc: next_document failed\n"); - return false; - } + // No decoding done yet. If ipath is null need do nothing + if (ipath.empty() || ipath == "-1") + return true; + // ipath points to attachment: need to decode message + if (!next_document()) { + LOGERR("MimeHandlerMail::skip_to_doc: next_document failed\n"); + return false; + } } m_idx = atoi(ipath.c_str()); return true; @@ -179,25 +179,25 @@ bool MimeHandlerMail::next_document() LOGDEB("MimeHandlerMail::next_document m_idx " << m_idx << " m_havedoc " << m_havedoc << "\n"); if (!m_havedoc) - return false; + return false; bool res = false; if (m_idx == -1) { - m_metaData[cstr_dj_keymt] = cstr_textplain; - res = processMsg(m_bincdoc, 0); - LOGDEB1("MimeHandlerMail::next_document: mt " << + m_metaData[cstr_dj_keymt] = cstr_textplain; + res = processMsg(m_bincdoc, 0); + LOGDEB1("MimeHandlerMail::next_document: mt " << m_metaData[cstr_dj_keymt] << ", att cnt " << m_attachments.size() << "\n"); const string& txt = m_metaData[cstr_dj_keycontent]; if (m_startoftext < txt.size()) m_metaData[cstr_dj_keyabstract] = truncate_to_word(txt.substr(m_startoftext), 250); - if (m_attachments.size() > 0) { - m_metaData[cstr_dj_keyanc] = "t"; - } + if (m_attachments.size() > 0) { + m_metaData[cstr_dj_keyanc] = "t"; + } } else { m_metaData[cstr_dj_keyabstract].clear(); - res = processAttach(); + res = processAttach(); } m_idx++; m_havedoc = m_idx < (int)m_attachments.size(); @@ -211,30 +211,30 @@ bool MimeHandlerMail::next_document() // which will be indicated by the *respp argument pointing to the original // text on exit static bool decodeBody(const string& cte, // Content transfer encoding - const string& body, // Source text - string& decoded, // Decoded text if actual decoding - const string** respp // Decoding Indicator - ) + const string& body, // Source text + string& decoded, // Decoded text if actual decoding + const string** respp // Decoding Indicator + ) { // By default, there is no encoding (7bit,8bit,raw). Also in case of // decoding error *respp = &body; if (!stringlowercmp("quoted-printable", cte)) { - if (!qp_decode(body, decoded)) { - LOGERR("decodeBody: quoted-printable decoding failed !\n"); - LOGDEB(" Body: \n" << body << "\n"); - return false; - } - *respp = &decoded; + if (!qp_decode(body, decoded)) { + LOGERR("decodeBody: quoted-printable decoding failed !\n"); + LOGDEB(" Body: \n" << body << "\n"); + return false; + } + *respp = &decoded; } else if (!stringlowercmp("base64", cte)) { - if (!base64_decode(body, decoded)) { - // base64 encoding errors are actually relatively common - LOGERR("decodeBody: base64 decoding failed !\n"); - LOGDEB(" Body: \n" << body << "\n"); - return false; - } - *respp = &decoded; + if (!base64_decode(body, decoded)) { + // base64 encoding errors are actually relatively common + LOGERR("decodeBody: base64 decoding failed !\n"); + LOGDEB(" Body: \n" << body << "\n"); + return false; + } + *respp = &decoded; } return true; } @@ -243,10 +243,10 @@ bool MimeHandlerMail::processAttach() { LOGDEB("MimeHandlerMail::processAttach() m_idx " << m_idx << "\n"); if (!m_havedoc) - return false; + return false; if (m_idx >= (int)m_attachments.size()) { - m_havedoc = false; - return false; + m_havedoc = false; + return false; } MHMailAttach *att = m_attachments[m_idx]; @@ -263,31 +263,31 @@ bool MimeHandlerMail::processAttach() body.clear(); att->m_part->getBody(body, 0, att->m_part->bodylength); { - string decoded; - const string *bdp; - if (!decodeBody(att->m_contentTransferEncoding, body, decoded, &bdp)) { - return false; - } - if (bdp != &body) - body.swap(decoded); + string decoded; + const string *bdp; + if (!decodeBody(att->m_contentTransferEncoding, body, decoded, &bdp)) { + return false; + } + if (bdp != &body) + body.swap(decoded); } // Special case for application/octet-stream: try to better // identify content, using file name if set if (m_metaData[cstr_dj_keymt] == "application/octet-stream" && - !m_metaData[cstr_dj_keyfn].empty()) { - string mt = mimetype(m_metaData[cstr_dj_keyfn], 0, - m_config, false); - if (!mt.empty()) - m_metaData[cstr_dj_keymt] = mt; + !m_metaData[cstr_dj_keyfn].empty()) { + string mt = mimetype(m_metaData[cstr_dj_keyfn], 0, + m_config, false); + if (!mt.empty()) + m_metaData[cstr_dj_keymt] = mt; } // Special case for text/plain content. Internfile should deal // with this but it expects text/plain to be utf-8 already, so we // handle the transcoding if needed. Same kind of issue for the MD5 if (m_metaData[cstr_dj_keymt] == cstr_textplain) { - if (!txtdcode("MimeHandlerMail::processAttach")) { - body.clear(); + if (!txtdcode("MimeHandlerMail::processAttach")) { + body.clear(); } else if (!m_forPreview) { string md5, xmd5; MD5String(body, md5); @@ -314,76 +314,76 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth) { LOGDEB2("MimeHandlerMail::processMsg: depth " << depth << "\n"); if (depth++ >= maxdepth) { - // Have to stop somewhere - LOGINFO("MimeHandlerMail::processMsg: maxdepth " << maxdepth << + // Have to stop somewhere + LOGINFO("MimeHandlerMail::processMsg: maxdepth " << maxdepth << " exceeded\n"); - // Return true anyway, better to index partially than not at all - return true; + // Return true anyway, better to index partially than not at all + return true; } - + // Handle some headers. string& text = m_metaData[cstr_dj_keycontent]; Binc::HeaderItem hi; string decoded; if (doc->h.getFirstHeader("From", hi)) { - rfc2047_decode(hi.getValue(), decoded); - if (preview()) - text += string("From: "); - text += decoded + cstr_newline; - if (depth == 1) { - m_metaData[cstr_dj_keyauthor] = decoded; - } + rfc2047_decode(hi.getValue(), decoded); + if (preview()) + text += string("From: "); + text += decoded + cstr_newline; + if (depth == 1) { + m_metaData[cstr_dj_keyauthor] = decoded; + } } if (doc->h.getFirstHeader("To", hi)) { - rfc2047_decode(hi.getValue(), decoded); - if (preview()) - text += string("To: "); - text += decoded + cstr_newline; - if (depth == 1) { - m_metaData[cstr_dj_keyrecipient] = decoded; - } + rfc2047_decode(hi.getValue(), decoded); + if (preview()) + text += string("To: "); + text += decoded + cstr_newline; + if (depth == 1) { + m_metaData[cstr_dj_keyrecipient] = decoded; + } } if (doc->h.getFirstHeader("Cc", hi)) { - rfc2047_decode(hi.getValue(), decoded); - if (preview()) - text += string("Cc: "); - text += decoded + cstr_newline; - if (depth == 1) { - m_metaData[cstr_dj_keyrecipient] += " " + decoded; - } + rfc2047_decode(hi.getValue(), decoded); + if (preview()) + text += string("Cc: "); + text += decoded + cstr_newline; + if (depth == 1) { + m_metaData[cstr_dj_keyrecipient] += " " + decoded; + } } if (doc->h.getFirstHeader("Message-Id", hi)) { - if (depth == 1) { - m_metaData[cstr_dj_keymsgid] = hi.getValue(); + if (depth == 1) { + m_metaData[cstr_dj_keymsgid] = hi.getValue(); trimstring(m_metaData[cstr_dj_keymsgid], "<>"); - } + } } if (doc->h.getFirstHeader("Date", hi)) { - rfc2047_decode(hi.getValue(), decoded); - if (depth == 1) { - time_t t = rfc2822DateToUxTime(decoded); - if (t != (time_t)-1) { - char ascuxtime[100]; - sprintf(ascuxtime, "%ld", (long)t); - m_metaData[cstr_dj_keymd] = ascuxtime; - } else { - // Leave mtime field alone, ftime will be used instead. - LOGDEB("rfc2822Date...: failed: [" << decoded << "]\n"); - } - } - if (preview()) - text += string("Date: "); - text += decoded + cstr_newline; + rfc2047_decode(hi.getValue(), decoded); + if (depth == 1) { + time_t t = rfc2822DateToUxTime(decoded); + if (t != (time_t)-1) { + char ascuxtime[100]; + sprintf(ascuxtime, "%ld", (long)t); + m_metaData[cstr_dj_keymd] = ascuxtime; + } else { + // Leave mtime field alone, ftime will be used instead. + LOGDEB("rfc2822Date...: failed: [" << decoded << "]\n"); + } + } + if (preview()) + text += string("Date: "); + text += decoded + cstr_newline; } if (doc->h.getFirstHeader("Subject", hi)) { - rfc2047_decode(hi.getValue(), decoded); - if (depth == 1) { - m_metaData[cstr_dj_keytitle] = decoded; - m_subject = decoded; - } - if (preview()) - text += string("Subject: "); - text += decoded + cstr_newline; + rfc2047_decode(hi.getValue(), decoded); + if (depth == 1) { + m_metaData[cstr_dj_keytitle] = decoded; + m_subject = decoded; + } + if (preview()) + text += string("Subject: "); + text += decoded + cstr_newline; } // Check for the presence of configured additional headers and possibly @@ -424,58 +424,58 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth) { LOGDEB2("MimeHandlerMail::walkmime: depth " << depth << "\n"); if (depth++ >= maxdepth) { - LOGINFO("walkmime: max depth (" << maxdepth << ") exceeded\n"); - return; + LOGINFO("walkmime: max depth (" << maxdepth << ") exceeded\n"); + return; } string& out = m_metaData[cstr_dj_keycontent]; if (doc->isMultipart()) { - LOGDEB2("walkmime: ismultipart " << doc->isMultipart() << + LOGDEB2("walkmime: ismultipart " << doc->isMultipart() << " subtype '" << doc->getSubType() << "'\n"); - // We only handle alternative, related and mixed (no digests). - std::vector::iterator it; + // We only handle alternative, related and mixed (no digests). + std::vector::iterator it; - if (!stringicmp("mixed", doc->getSubType()) || - !stringicmp("signed", doc->getSubType()) || - !stringicmp("related", doc->getSubType())) { - // Multipart mixed and related: process each part. - for (it = doc->members.begin(); it != doc->members.end();it++) { - walkmime(&(*it), depth); - } + if (!stringicmp("mixed", doc->getSubType()) || + !stringicmp("signed", doc->getSubType()) || + !stringicmp("related", doc->getSubType())) { + // Multipart mixed and related: process each part. + for (it = doc->members.begin(); it != doc->members.end();it++) { + walkmime(&(*it), depth); + } - } else if (!stringicmp("alternative", doc->getSubType())) { - // Multipart/alternative: look for a text/plain part, then html. - // Process if found - std::vector::iterator ittxt, ithtml; - ittxt = ithtml = doc->members.end(); - int i = 1; - for (it = doc->members.begin(); - it != doc->members.end(); it++, i++) { - // Get and parse content-type header - Binc::HeaderItem hi; - if (!it->h.getFirstHeader("Content-Type", hi)) { - LOGDEB("walkmime:no ctent-type header for part "<members.end() && ittxt->bodylength) { - LOGDEB2("walkmime: alternative: choose text/plain. Size: " << + } else if (!stringicmp("alternative", doc->getSubType())) { + // Multipart/alternative: look for a text/plain part, then html. + // Process if found + std::vector::iterator ittxt, ithtml; + ittxt = ithtml = doc->members.end(); + int i = 1; + for (it = doc->members.begin(); + it != doc->members.end(); it++, i++) { + // Get and parse content-type header + Binc::HeaderItem hi; + if (!it->h.getFirstHeader("Content-Type", hi)) { + LOGDEB("walkmime:no ctent-type header for part "<members.end() && ittxt->bodylength) { + LOGDEB2("walkmime: alternative: choose text/plain. Size: " << ittxt->bodylength << endl); walkmime(&(*ittxt), depth); - } else if (ithtml != doc->members.end()) { - LOGDEB2("walkmime: alternative: choose text/html.\n"); + } else if (ithtml != doc->members.end()) { + LOGDEB2("walkmime: alternative: choose text/html.\n"); walkmime(&(*ithtml), depth); - } - } - return; + } + } + return; } // Part is not multipart: it must be either simple or message. Take @@ -485,57 +485,60 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth) Binc::HeaderItem hi; string ctt = cstr_textplain; if (doc->h.getFirstHeader("Content-Type", hi)) { - ctt = hi.getValue(); + ctt = hi.getValue(); } LOGDEB2("walkmime:content-type: " << ctt << "\n"); MimeHeaderValue content_type; parseMimeHeaderValue(ctt, content_type); - + // Get and parse Content-Disposition header string ctd = "inline"; if (doc->h.getFirstHeader("Content-Disposition", hi)) { - ctd = hi.getValue(); + ctd = hi.getValue(); } MimeHeaderValue content_disposition; parseMimeHeaderValue(ctd, content_disposition); LOGDEB2("Content_disposition:[" << content_disposition.value << "]\n"); string dispindic; if (stringlowercmp("inline", content_disposition.value)) - dispindic = "Attachment"; + dispindic = "Attachment"; else - dispindic = "Inline"; + dispindic = "Inline"; // See if we have a filename. string filename; map::const_iterator it; it = content_disposition.params.find(string("filename")); if (it != content_disposition.params.end()) - filename = it->second; + filename = it->second; if (filename.empty()) { - it = content_type.params.find(string("name")); - if (it != content_type.params.end()) - filename = it->second; + it = content_type.params.find(string("name")); + if (it != content_type.params.end()) + filename = it->second; } - + + // Note: I have never seen anything useful in the Content-Description field. + // So it's not processed. Use rfc2047 to decode if we ever do it. + if (doc->isMessageRFC822()) { - LOGDEB2("walkmime: message/RFC822 part\n"); - - // The first part is the already parsed message. Call - // processMsg instead of walkmime so that mail headers get - // printed. The depth will tell it what to do - if (doc->members.empty()) { - //?? - return; - } - out += "\n"; - if (m_forPreview) - out += "[" + dispindic + " " + content_type.value + ": "; - out += filename; - if (m_forPreview) - out += "]"; - out += "\n\n"; - processMsg(&doc->members[0], depth); - return; + LOGDEB2("walkmime: message/RFC822 part\n"); + + // The first part is the already parsed message. Call + // processMsg instead of walkmime so that mail headers get + // printed. The depth will tell it what to do + if (doc->members.empty()) { + //?? + return; + } + out += "\n"; + if (m_forPreview) + out += "[" + dispindic + " " + content_type.value + ": "; + out += filename; + if (m_forPreview) + out += "]"; + out += "\n\n"; + processMsg(&doc->members[0], depth); + return; } // "Simple" part. @@ -550,13 +553,13 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth) string charset; it = content_type.params.find(cstr_mail_charset); if (it != content_type.params.end()) - charset = it->second; + charset = it->second; if (charset.empty() || - !stringlowercmp("us-ascii", charset) || - !stringlowercmp("default", charset) || - !stringlowercmp("x-user-defined", charset) || - !stringlowercmp("x-unknown", charset) || - !stringlowercmp("unknown", charset) ) { + !stringlowercmp("us-ascii", charset) || + !stringlowercmp("default", charset) || + !stringlowercmp("x-user-defined", charset) || + !stringlowercmp("x-unknown", charset) || + !stringlowercmp("unknown", charset) ) { if (!m_config->getConfParam("maildefcharset", charset)) charset = "CP1252"; } @@ -564,7 +567,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth) // Content transfer encoding string cte = "7bit"; if (doc->h.getFirstHeader("Content-Transfer-Encoding", hi)) { - cte = hi.getValue(); + cte = hi.getValue(); } // If the Content-Disposition is not inline, we treat it as @@ -572,34 +575,34 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth) // If it is inline but not text or html, same thing. // Some early MIME msgs have "text" instead of "text/plain" as type if (stringlowercmp("inline", content_disposition.value) || - (stringlowercmp(cstr_textplain, content_type.value) && - stringlowercmp("text", content_type.value) && - stringlowercmp("text/html", content_type.value)) ) { - if (!filename.empty()) { - out += "\n"; - if (m_forPreview) - out += "[" + dispindic + " " + content_type.value + ": "; - out += filename; - if (m_forPreview) - out += "]"; - out += "\n\n"; - } - MHMailAttach *att = new MHMailAttach; - if (att == 0) { - LOGERR("Out of memory\n"); - return; - } - att->m_contentType = content_type.value; - stringtolower(att->m_contentType); - att->m_filename = filename; - att->m_charset = charset; - att->m_contentTransferEncoding = cte; - att->m_part = doc; - LOGDEB("walkmime: attachmnt: ct [" << att->m_contentType << + (stringlowercmp(cstr_textplain, content_type.value) && + stringlowercmp("text", content_type.value) && + stringlowercmp("text/html", content_type.value)) ) { + if (!filename.empty()) { + out += "\n"; + if (m_forPreview) + out += "[" + dispindic + " " + content_type.value + ": "; + out += filename; + if (m_forPreview) + out += "]"; + out += "\n\n"; + } + MHMailAttach *att = new MHMailAttach; + if (att == 0) { + LOGERR("Out of memory\n"); + return; + } + att->m_contentType = content_type.value; + stringtolower(att->m_contentType); + att->m_filename = filename; + att->m_charset = charset; + att->m_contentTransferEncoding = cte; + att->m_part = doc; + LOGDEB("walkmime: attachmnt: ct [" << att->m_contentType << "] cte [" << att->m_contentTransferEncoding << "] cs [" << att->m_charset << "] fn [" << filename << "]\n"); - m_attachments.push_back(att); - return; + m_attachments.push_back(att); + return; } // We are dealing with an inline part of text/plain or text/html @@ -612,43 +615,42 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth) string body; doc->getBody(body, 0, doc->bodylength); { - string decoded; - const string *bdp; - if (!decodeBody(cte, body, decoded, &bdp)) { - LOGERR("MimeHandlerMail::walkmime: failed decoding body\n"); - } - if (bdp != &body) - body.swap(decoded); + string decoded; + const string *bdp; + if (!decodeBody(cte, body, decoded, &bdp)) { + LOGERR("MimeHandlerMail::walkmime: failed decoding body\n"); + } + if (bdp != &body) + body.swap(decoded); } // Handle html stripping and transcoding to utf8 if (!stringlowercmp("text/html", content_type.value)) { - MimeHandlerHtml mh(m_config, "1234"); - mh.set_property(Dijon::Filter::OPERATING_MODE, - m_forPreview ? "view" : "index"); - mh.set_property(Dijon::Filter::DEFAULT_CHARSET, charset); - mh.set_document_string("text/html", body); - mh.next_document(); - map::const_iterator it = - mh.get_meta_data().find(cstr_dj_keycontent); - if (it != mh.get_meta_data().end()) - out += it->second; + MimeHandlerHtml mh(m_config, "1234"); + mh.set_property(Dijon::Filter::OPERATING_MODE, + m_forPreview ? "view" : "index"); + mh.set_property(Dijon::Filter::DEFAULT_CHARSET, charset); + mh.set_document_string("text/html", body); + mh.next_document(); + map::const_iterator it = + mh.get_meta_data().find(cstr_dj_keycontent); + if (it != mh.get_meta_data().end()) + out += it->second; } else { - string utf8; - // Transcode to utf-8 - LOGDEB1("walkmime: transcoding from " << charset << " to UTF-8\n"); - if (!transcode(body, utf8, charset, cstr_utf8)) { - LOGERR("walkmime: transcode failed from cs '" << charset << + string utf8; + // Transcode to utf-8 + LOGDEB1("walkmime: transcoding from " << charset << " to UTF-8\n"); + if (!transcode(body, utf8, charset, cstr_utf8)) { + LOGERR("walkmime: transcode failed from cs '" << charset << "' to UTF-8\n"); - out += body; - } else { - out += utf8; - } + out += body; + } else { + out += utf8; + } } if (out.length() && out[out.length()-1] != '\n') - out += '\n'; + out += '\n'; LOGDEB2("walkmime: out now: [" << out << "]\n"); } - diff --git a/src/internfile/mh_mail.h b/src/internfile/mh_mail.h index eb2660cc..9b62ffae 100644 --- a/src/internfile/mh_mail.h +++ b/src/internfile/mh_mail.h @@ -24,25 +24,23 @@ #include "mimehandler.h" namespace Binc { - class MimeDocument; - class MimePart; +class MimeDocument; +class MimePart; } class MHMailAttach; /** - * Translate a mail folder file into internal documents (also works - * for maildir files). This has to keep state while parsing a mail folder - * file. + * Process a mail message (rfc822) into internal documents. */ class MimeHandlerMail : public RecollFilter { public: MimeHandlerMail(RclConfig *cnf, const std::string &id); virtual ~MimeHandlerMail(); virtual bool is_data_input_ok(DataInput input) const { - if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING) - return true; - return false; + if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING) + return true; + return false; } virtual bool next_document(); virtual bool skip_to_document(const std::string& ipath); @@ -76,10 +74,10 @@ private: class MHMailAttach { public: - std::string m_contentType; - std::string m_filename; - std::string m_charset; - std::string m_contentTransferEncoding; + std::string m_contentType; + std::string m_filename; + std::string m_charset; + std::string m_contentTransferEncoding; Binc::MimePart *m_part; };