text/plain attachments were not transcoded to utf-8

This commit is contained in:
dockes 2007-10-17 11:40:35 +00:00
parent 37f11e47ac
commit 02475fba71
3 changed files with 57 additions and 19 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.29 2007-01-17 13:53:40 dockes Exp $ (C) 2005 J.F.Dockes"; static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.30 2007-10-17 11:40:35 dockes Exp $ (C) 2005 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -135,9 +135,14 @@ bool MimeHandlerMail::next_document()
return res; return res;
} }
// Decode according to content transfer encoding // Decode according to content transfer encoding. May actually do nothing,
static bool decodeBody(const string& cte, const string& body, string& decoded, // which will be indicated by the *respp argument pointing to the original
const string** respp) // text on exit
static bool decodeBody(const string& cte, // Content transfer encoding
const string& body, // Source text
string& decoded, // Decoded text if actual decoding
const string** respp // Decoding Indicator
)
{ {
// By default, there is no encoding (7bit,8bit,raw). Also in case of // By default, there is no encoding (7bit,8bit,raw). Also in case of
// decoding error // decoding error
@ -146,13 +151,15 @@ static bool decodeBody(const string& cte, const string& body, string& decoded,
if (!stringlowercmp("quoted-printable", cte)) { if (!stringlowercmp("quoted-printable", cte)) {
if (!qp_decode(body, decoded)) { if (!qp_decode(body, decoded)) {
LOGERR(("decodeBody: quoted-printable decoding failed !\n")); LOGERR(("decodeBody: quoted-printable decoding failed !\n"));
LOGDEB((" Body: \n%s\n", body.c_str()));
return false; return false;
} }
*respp = &decoded; *respp = &decoded;
} else if (!stringlowercmp("base64", cte)) { } else if (!stringlowercmp("base64", cte)) {
if (!base64_decode(body, decoded)) { if (!base64_decode(body, decoded)) {
LOGERR(("decodeBody: base64 decoding failed !. body [%s]\n", // base64 encoding errors are actually relatively common
body.c_str())); LOGERR(("decodeBody: base64 decoding failed !\n"));
LOGDEB((" Body: \n%s\n", body.c_str()));
return false; return false;
} }
*respp = &decoded; *respp = &decoded;
@ -171,10 +178,15 @@ bool MimeHandlerMail::processAttach()
} }
MHMailAttach *att = m_attachments[m_idx]; MHMailAttach *att = m_attachments[m_idx];
LOGDEB1(("processAttach:content-type: %s\n", att->m_contentType.c_str()));
m_metaData["mimetype"] = att->m_contentType; m_metaData["mimetype"] = att->m_contentType;
m_metaData["charset"] = att->m_charset; m_metaData["charset"] = att->m_charset;
m_metaData["filename"] = att->m_filename; m_metaData["filename"] = att->m_filename;
// Change the title to something helpul
m_metaData["title"] = att->m_filename + " (" + m_subject + ")";
LOGDEB1((" processAttach:ct [%s] cs [%s] fn [%s]\n",
att->m_contentType.c_str(),
att->m_charset.c_str(),
att->m_filename.c_str()));
m_metaData["content"] = ""; m_metaData["content"] = "";
string& body = m_metaData["content"]; string& body = m_metaData["content"];
@ -186,9 +198,27 @@ bool MimeHandlerMail::processAttach()
} }
if (bdp != &body) if (bdp != &body)
body = decoded; body = decoded;
// Special case for text/plain content. Internfile should deal
// with this but it expects text/plain to be utf-8 already, so we
// handle the transcoding if needed
if (m_metaData["mimetype"] == "text/plain" &&
stringicmp(m_metaData["charset"], "UTF-8")) {
string utf8;
if (!transcode(body, utf8, m_metaData["charset"], "UTF-8")) {
LOGERR((" processAttach: transcode to utf-8 failed "
"for charset [%s]\n", m_metaData["charset"].c_str()));
// Just let it through and hope for the best...
} else {
body = utf8;
}
}
// Ipath
char nbuf[10]; char nbuf[10];
sprintf(nbuf, "%d", m_idx); sprintf(nbuf, "%d", m_idx);
m_metaData["ipath"] = nbuf; m_metaData["ipath"] = nbuf;
return true; return true;
} }
@ -242,8 +272,10 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
} }
if (doc->h.getFirstHeader("Subject", hi)) { if (doc->h.getFirstHeader("Subject", hi)) {
rfc2047_decode(hi.getValue(), transcoded); rfc2047_decode(hi.getValue(), transcoded);
if (depth == 1) if (depth == 1) {
m_metaData["title"] = transcoded; m_metaData["title"] = transcoded;
m_subject = transcoded;
}
text += string("Subject: ") + transcoded + string("\n"); text += string("Subject: ") + transcoded + string("\n");
} }
text += '\n'; text += '\n';
@ -406,8 +438,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
} }
// If the Content-Disposition is not inline, we treat it as // If the Content-Disposition is not inline, we treat it as
// attachment, as per rfc2183. We don't process attachments // attachment, as per rfc2183.
// for now, except for indexing/displaying the file name
// If it is inline but not text or html, same thing. // If it is inline but not text or html, same thing.
if (stringlowercmp("inline", content_disposition.value) || if (stringlowercmp("inline", content_disposition.value) ||
(stringlowercmp("text/plain", content_type.value) && (stringlowercmp("text/plain", content_type.value) &&
@ -421,7 +452,6 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
out += "]"; out += "]";
out += "\n\n"; out += "\n\n";
} }
LOGDEB(("walkmime: pushing attchmnt fn [%s]\n", filename.c_str()));
MHMailAttach *att = new MHMailAttach; MHMailAttach *att = new MHMailAttach;
if (att == 0) { if (att == 0) {
LOGERR(("Out of memory\n")); LOGERR(("Out of memory\n"));
@ -433,6 +463,11 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
att->m_charset = charset; att->m_charset = charset;
att->m_contentTransferEncoding = cte; att->m_contentTransferEncoding = cte;
att->m_part = doc; att->m_part = doc;
LOGDEB(("walkmime: attachmnt: ct [%s] cte [%s] cs [%s] fn [%s]\n",
att->m_contentType.c_str(),
att->m_contentTransferEncoding.c_str(),
att->m_charset.c_str(),
filename.c_str()));
m_attachments.push_back(att); m_attachments.push_back(att);
return; return;
} }

View File

@ -16,7 +16,7 @@
*/ */
#ifndef _MAIL_H_INCLUDED_ #ifndef _MAIL_H_INCLUDED_
#define _MAIL_H_INCLUDED_ #define _MAIL_H_INCLUDED_
/* @(#$Id: mh_mail.h,v 1.11 2006-12-16 15:39:54 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: mh_mail.h,v 1.12 2007-10-17 11:40:35 dockes Exp $ (C) 2004 J.F.Dockes */
#include <sstream> #include <sstream>
#include <vector> #include <vector>
@ -56,12 +56,13 @@ class MimeHandlerMail : public RecollFilter {
bool processMsg(Binc::MimePart *doc, int depth); bool processMsg(Binc::MimePart *doc, int depth);
void walkmime(Binc::MimePart* doc, int depth); void walkmime(Binc::MimePart* doc, int depth);
bool processAttach(); bool processAttach();
Binc::MimeDocument *m_bincdoc; Binc::MimeDocument *m_bincdoc;
int m_fd; int m_fd;
std::stringstream *m_stream; std::stringstream *m_stream;
int m_idx; // starts at -1 for self, then index into int m_idx; // starts at -1 for self, then index into
// attachments; // attachments;
vector<MHMailAttach *> m_attachments; string m_subject;
vector<MHMailAttach *> m_attachments;
}; };
class MHMailAttach { class MHMailAttach {

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: mimeparse.cpp,v 1.18 2007-01-18 14:23:42 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: mimeparse.cpp,v 1.19 2007-10-17 11:40:35 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -221,7 +221,7 @@ find_next_token(const string &in, string::size_type start,
lex.quote = oquot; lex.quote = oquot;
return ++end; return ++end;
} else { } else {
string::size_type end = in.find_first_of(delims + " \t(", start); string::size_type end = in.find_first_of(delims + "\r\n \t(", start);
lex.what = Lexical::token; lex.what = Lexical::token;
lex.quote = 0; lex.quote = 0;
if (end == string::npos) { if (end == string::npos) {
@ -830,6 +830,8 @@ main(int argc, const char **argv)
"text/html;charset = UTF-8 ; otherparam=garb; \n" "text/html;charset = UTF-8 ; otherparam=garb; \n"
"QUOTEDPARAM=\"quoted value\"", "QUOTEDPARAM=\"quoted value\"",
"text/plain; charset=ASCII\r\n name=\"809D3016_5691DPS_5.2.LIC\"",
"application/x-stuff;" "application/x-stuff;"
"title*0*=us-ascii'en'This%20is%20even%20more%20;" "title*0*=us-ascii'en'This%20is%20even%20more%20;"
"title*1*=%2A%2A%2Afun%2A%2A%2A%20;" "title*1*=%2A%2A%2Afun%2A%2A%2A%20;"