diff --git a/src/internfile/mh_mail.cpp b/src/internfile/mh_mail.cpp index f8785def..0b110c8f 100644 --- a/src/internfile/mh_mail.cpp +++ b/src/internfile/mh_mail.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.29 2007-01-17 13:53:40 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.30 2007-10-17 11:40:35 dockes Exp $ (C) 2005 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -135,9 +135,14 @@ bool MimeHandlerMail::next_document() return res; } -// Decode according to content transfer encoding -static bool decodeBody(const string& cte, const string& body, string& decoded, - const string** respp) +// Decode according to content transfer encoding. May actually do nothing, +// which will be indicated by the *respp argument pointing to the original +// text on exit +static bool decodeBody(const string& cte, // Content transfer encoding + const string& body, // Source text + string& decoded, // Decoded text if actual decoding + const string** respp // Decoding Indicator + ) { // By default, there is no encoding (7bit,8bit,raw). Also in case of // decoding error @@ -146,13 +151,15 @@ static bool decodeBody(const string& cte, const string& body, string& decoded, if (!stringlowercmp("quoted-printable", cte)) { if (!qp_decode(body, decoded)) { LOGERR(("decodeBody: quoted-printable decoding failed !\n")); + LOGDEB((" Body: \n%s\n", body.c_str())); return false; } *respp = &decoded; } else if (!stringlowercmp("base64", cte)) { if (!base64_decode(body, decoded)) { - LOGERR(("decodeBody: base64 decoding failed !. body [%s]\n", - body.c_str())); + // base64 encoding errors are actually relatively common + LOGERR(("decodeBody: base64 decoding failed !\n")); + LOGDEB((" Body: \n%s\n", body.c_str())); return false; } *respp = &decoded; @@ -171,10 +178,15 @@ bool MimeHandlerMail::processAttach() } MHMailAttach *att = m_attachments[m_idx]; - LOGDEB1(("processAttach:content-type: %s\n", att->m_contentType.c_str())); m_metaData["mimetype"] = att->m_contentType; m_metaData["charset"] = att->m_charset; m_metaData["filename"] = att->m_filename; + // Change the title to something helpul + m_metaData["title"] = att->m_filename + " (" + m_subject + ")"; + LOGDEB1((" processAttach:ct [%s] cs [%s] fn [%s]\n", + att->m_contentType.c_str(), + att->m_charset.c_str(), + att->m_filename.c_str())); m_metaData["content"] = ""; string& body = m_metaData["content"]; @@ -186,9 +198,27 @@ bool MimeHandlerMail::processAttach() } if (bdp != &body) body = decoded; + + // Special case for text/plain content. Internfile should deal + // with this but it expects text/plain to be utf-8 already, so we + // handle the transcoding if needed + if (m_metaData["mimetype"] == "text/plain" && + stringicmp(m_metaData["charset"], "UTF-8")) { + string utf8; + if (!transcode(body, utf8, m_metaData["charset"], "UTF-8")) { + LOGERR((" processAttach: transcode to utf-8 failed " + "for charset [%s]\n", m_metaData["charset"].c_str())); + // Just let it through and hope for the best... + } else { + body = utf8; + } + } + + // Ipath char nbuf[10]; sprintf(nbuf, "%d", m_idx); m_metaData["ipath"] = nbuf; + return true; } @@ -242,8 +272,10 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth) } if (doc->h.getFirstHeader("Subject", hi)) { rfc2047_decode(hi.getValue(), transcoded); - if (depth == 1) + if (depth == 1) { m_metaData["title"] = transcoded; + m_subject = transcoded; + } text += string("Subject: ") + transcoded + string("\n"); } text += '\n'; @@ -406,8 +438,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth) } // If the Content-Disposition is not inline, we treat it as - // attachment, as per rfc2183. We don't process attachments - // for now, except for indexing/displaying the file name + // attachment, as per rfc2183. // If it is inline but not text or html, same thing. if (stringlowercmp("inline", content_disposition.value) || (stringlowercmp("text/plain", content_type.value) && @@ -421,7 +452,6 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth) out += "]"; out += "\n\n"; } - LOGDEB(("walkmime: pushing attchmnt fn [%s]\n", filename.c_str())); MHMailAttach *att = new MHMailAttach; if (att == 0) { LOGERR(("Out of memory\n")); @@ -433,6 +463,11 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth) att->m_charset = charset; att->m_contentTransferEncoding = cte; att->m_part = doc; + LOGDEB(("walkmime: attachmnt: ct [%s] cte [%s] cs [%s] fn [%s]\n", + att->m_contentType.c_str(), + att->m_contentTransferEncoding.c_str(), + att->m_charset.c_str(), + filename.c_str())); m_attachments.push_back(att); return; } diff --git a/src/internfile/mh_mail.h b/src/internfile/mh_mail.h index 2e45166b..311e4949 100644 --- a/src/internfile/mh_mail.h +++ b/src/internfile/mh_mail.h @@ -16,7 +16,7 @@ */ #ifndef _MAIL_H_INCLUDED_ #define _MAIL_H_INCLUDED_ -/* @(#$Id: mh_mail.h,v 1.11 2006-12-16 15:39:54 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: mh_mail.h,v 1.12 2007-10-17 11:40:35 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include @@ -56,12 +56,13 @@ class MimeHandlerMail : public RecollFilter { bool processMsg(Binc::MimePart *doc, int depth); void walkmime(Binc::MimePart* doc, int depth); bool processAttach(); - Binc::MimeDocument *m_bincdoc; - int m_fd; - std::stringstream *m_stream; - int m_idx; // starts at -1 for self, then index into + Binc::MimeDocument *m_bincdoc; + int m_fd; + std::stringstream *m_stream; + int m_idx; // starts at -1 for self, then index into // attachments; - vector m_attachments; + string m_subject; + vector m_attachments; }; class MHMailAttach { diff --git a/src/utils/mimeparse.cpp b/src/utils/mimeparse.cpp index 581a82a6..f461b904 100644 --- a/src/utils/mimeparse.cpp +++ b/src/utils/mimeparse.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: mimeparse.cpp,v 1.18 2007-01-18 14:23:42 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: mimeparse.cpp,v 1.19 2007-10-17 11:40:35 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -221,7 +221,7 @@ find_next_token(const string &in, string::size_type start, lex.quote = oquot; return ++end; } else { - string::size_type end = in.find_first_of(delims + " \t(", start); + string::size_type end = in.find_first_of(delims + "\r\n \t(", start); lex.what = Lexical::token; lex.quote = 0; if (end == string::npos) { @@ -830,6 +830,8 @@ main(int argc, const char **argv) "text/html;charset = UTF-8 ; otherparam=garb; \n" "QUOTEDPARAM=\"quoted value\"", + "text/plain; charset=ASCII\r\n name=\"809D3016_5691DPS_5.2.LIC\"", + "application/x-stuff;" "title*0*=us-ascii'en'This%20is%20even%20more%20;" "title*1*=%2A%2A%2Afun%2A%2A%2A%20;"