walk the full mime tree instead of staying at level 1

This commit is contained in:
dockes 2006-09-19 14:30:39 +00:00
parent 6424efca57
commit 3e2bccd259
2 changed files with 203 additions and 152 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.17 2006-09-15 16:50:44 dockes Exp $ (C) 2005 J.F.Dockes"; static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.18 2006-09-19 14:30:39 dockes Exp $ (C) 2005 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -77,7 +77,12 @@ MimeHandlerMail::mkDoc(RclConfig *cnf, const string &fn,
} }
Binc::MimeDocument doc; Binc::MimeDocument doc;
doc.parseFull(fd); doc.parseFull(fd);
MimeHandler::Status ret = processone(fn, doc, docout); if (!doc.isHeaderParsed() && !doc.isAllParsed()) {
LOGERR(("MimeHandlerMail::mkDoc: mime parse error for %s\n",
fn.c_str()));
return MimeHandler::MHError;
}
MimeHandler::Status ret = processMsg(docout, doc, 0);
close(fd); close(fd);
return ret; return ret;
} else if (!stringlowercmp("text/x-mail", mtype)) { } else if (!stringlowercmp("text/x-mail", mtype)) {
@ -175,7 +180,12 @@ MimeHandlerMail::processmbox(const string &fn, Rcl::Doc &docout, string& ipath)
stringstream s(msgbuf); stringstream s(msgbuf);
Binc::MimeDocument doc; Binc::MimeDocument doc;
doc.parseFull(s); doc.parseFull(s);
MimeHandler::Status ret = processone(fn, doc, docout); if (!doc.isHeaderParsed() && !doc.isAllParsed()) {
LOGERR(("MimeHandlerMail::processMbox: mime parse error for %s\n",
fn.c_str()));
return MimeHandler::MHError;
}
MimeHandler::Status ret = processMsg(docout, doc, 0);
if (ret == MimeHandler::MHError) if (ret == MimeHandler::MHError)
return ret; return ret;
char buf[20]; char buf[20];
@ -189,23 +199,23 @@ MimeHandlerMail::processmbox(const string &fn, Rcl::Doc &docout, string& ipath)
// Transform a single message into a document. The subject becomes the // Transform a single message into a document. The subject becomes the
// title, and any simple body part with a content-type of text or html // title, and any simple body part with a content-type of text or html
// and content-disposition inline gets concatenated as text. // and content-disposition inline gets concatenated as text.
//
// If depth is not zero, we're called recursively for an
// message/rfc822 part and we must not touch the doc fields except the
// text
MimeHandler::Status MimeHandler::Status
MimeHandlerMail::processone(const string &fn, Binc::MimeDocument& doc, MimeHandlerMail::processMsg(Rcl::Doc &docout, Binc::MimePart& doc,
Rcl::Doc &docout) int depth)
{ {
if (!doc.isHeaderParsed() && !doc.isAllParsed()) { if (depth >= 5) {
LOGERR(("MimeHandlerMail::processone: mime parse error for %s\n", // Have to stop somewhere
fn.c_str())); LOGDEB(("MimeHandlerMail::processMsg: stopping at depth 5\n"));
return MimeHandler::MHError; return MimeHandler::MHDone;
} }
// Handle some headers. // Handle some headers.
Binc::HeaderItem hi; Binc::HeaderItem hi;
string transcoded; string transcoded;
if (doc.h.getFirstHeader("Subject", hi)) {
rfc2047_decode(hi.getValue(), transcoded);
docout.title = transcoded;
}
if (doc.h.getFirstHeader("From", hi)) { if (doc.h.getFirstHeader("From", hi)) {
rfc2047_decode(hi.getValue(), transcoded); rfc2047_decode(hi.getValue(), transcoded);
docout.text += string("From: ") + transcoded + string("\n"); docout.text += string("From: ") + transcoded + string("\n");
@ -216,6 +226,7 @@ MimeHandlerMail::processone(const string &fn, Binc::MimeDocument& doc,
} }
if (doc.h.getFirstHeader("Date", hi)) { if (doc.h.getFirstHeader("Date", hi)) {
rfc2047_decode(hi.getValue(), transcoded); rfc2047_decode(hi.getValue(), transcoded);
if (depth == 0) {
time_t t = rfc2822DateToUxTime(transcoded); time_t t = rfc2822DateToUxTime(transcoded);
if (t != (time_t)-1) { if (t != (time_t)-1) {
char ascuxtime[100]; char ascuxtime[100];
@ -223,30 +234,42 @@ MimeHandlerMail::processone(const string &fn, Binc::MimeDocument& doc,
docout.dmtime = ascuxtime; docout.dmtime = ascuxtime;
} else { } else {
// Leave mtime field alone, ftime will be used instead. // Leave mtime field alone, ftime will be used instead.
LOGDEB(("rfc2822Date...: failed for [%s]\n", transcoded.c_str())); LOGDEB(("rfc2822Date...: failed: [%s]\n", transcoded.c_str()));
}
} }
docout.text += string("Date: ") + transcoded + string("\n"); docout.text += string("Date: ") + transcoded + string("\n");
} }
if (doc.h.getFirstHeader("Subject", hi)) { if (doc.h.getFirstHeader("Subject", hi)) {
rfc2047_decode(hi.getValue(), transcoded); rfc2047_decode(hi.getValue(), transcoded);
if (depth == 0)
docout.title = transcoded;
docout.text += string("Subject: ") + transcoded + string("\n"); docout.text += string("Subject: ") + transcoded + string("\n");
} }
LOGDEB2(("MimeHandlerMail::processone:ismultipart %d mime subtype '%s'\n", LOGDEB2(("MimeHandlerMail::processMsg:ismultipart %d mime subtype '%s'\n",
doc.isMultipart(), doc.getSubType().c_str())); doc.isMultipart(), doc.getSubType().c_str()));
walkmime(docout.text, doc, 0); walkmime(docout, doc, depth);
LOGDEB2(("MimeHandlerMail::processone:text:[%s]\n", docout.text.c_str())); LOGDEB2(("MimeHandlerMail::processMsg:text:[%s]\n", docout.text.c_str()));
return MimeHandler::MHDone; return MimeHandler::MHDone;
} }
// Recursively walk the message mime parts and concatenate all the // Recursively walk the message mime parts and concatenate all the
// inline html or text that we find anywhere. // inline html or text that we find anywhere.
void MimeHandlerMail::walkmime(string &out, Binc::MimePart& doc, int depth) //
// RFC2046 reminder:
// Top level media types:
// Simple: text, image, audio, video, application,
// Composite: multipart, message.
//
// multipart can be mixed, alternative, parallel, digest.
// message/rfc822 may also be of interest.
void MimeHandlerMail::walkmime(Rcl::Doc& docout, Binc::MimePart& doc, int depth)
{ {
string &out = docout.text;
if (depth > 5) { if (depth > 5) {
LOGINFO(("walkmime: max depth exceeded\n")); LOGINFO(("walkmime: max depth (5) exceeded\n"));
return; return;
} }
@ -255,12 +278,12 @@ void MimeHandlerMail::walkmime(string &out, Binc::MimePart& doc, int depth)
doc.isMultipart(), doc.getSubType().c_str())); doc.isMultipart(), doc.getSubType().c_str()));
// We only handle alternative, related and mixed for now. For // We only handle alternative, related and mixed for now. For
// alternative, we look for a text/plain part, else html and // alternative, we look for a text/plain part, else html and
// process it For mixed and related, we process each part. // process it. For mixed and related, we process each part.
std::vector<Binc::MimePart>::iterator it; std::vector<Binc::MimePart>::iterator it;
if (!stringicmp("mixed", doc.getSubType()) || if (!stringicmp("mixed", doc.getSubType()) ||
!stringicmp("related", doc.getSubType())) { !stringicmp("related", doc.getSubType())) {
for (it = doc.members.begin(); it != doc.members.end();it++) { for (it = doc.members.begin(); it != doc.members.end();it++) {
walkmime(out, *it, depth+1); walkmime(docout, *it, depth+1);
} }
} else if (!stringicmp("alternative", doc.getSubType())) { } else if (!stringicmp("alternative", doc.getSubType())) {
std::vector<Binc::MimePart>::iterator ittxt, ithtml; std::vector<Binc::MimePart>::iterator ittxt, ithtml;
@ -283,14 +306,17 @@ void MimeHandlerMail::walkmime(string &out, Binc::MimePart& doc, int depth)
} }
if (ittxt != doc.members.end()) { if (ittxt != doc.members.end()) {
LOGDEB2(("walkmime: alternative: chose text/plain part\n")) LOGDEB2(("walkmime: alternative: chose text/plain part\n"))
walkmime(out, *ittxt, depth+1); walkmime(docout, *ittxt, depth+1);
} else if (ithtml != doc.members.end()) { } else if (ithtml != doc.members.end()) {
LOGDEB2(("walkmime: alternative: chose text/html part\n")) LOGDEB2(("walkmime: alternative: chose text/html part\n"))
walkmime(out, *ithtml, depth+1); walkmime(docout, *ithtml, depth+1);
} }
} }
} else { return;
// "Simple" part. See what it is: }
// Part is not multipart: it must be either simple or message. Take
// a look at interesting headers and a possible filename parameter
// Get and parse content-type header. // Get and parse content-type header.
Binc::HeaderItem hi; Binc::HeaderItem hi;
@ -309,46 +335,72 @@ void MimeHandlerMail::walkmime(string &out, Binc::MimePart& doc, int depth)
} }
MimeHeaderValue content_disposition; MimeHeaderValue content_disposition;
parseMimeHeaderValue(ctd, content_disposition); parseMimeHeaderValue(ctd, content_disposition);
LOGDEB2(("Content_disposition:[%s]\n", content_disposition.value.c_str()));
string dispindic;
if (stringlowercmp("inline", content_disposition.value))
dispindic = "Attachment";
else
dispindic = "Inline";
LOGDEB2(("Content_disposition:[%s]\n", // See if we have a filename.
content_disposition.value.c_str())); string filename;
// If this is an attachment, we index the file name if any and, when
// previewing, at least show that it was there.
if (!stringlowercmp("attachment", content_disposition.value)) {
string afn;
map<string,string>::const_iterator it; map<string,string>::const_iterator it;
it = content_disposition.params.find(string("filename")); it = content_disposition.params.find(string("filename"));
if (it != content_disposition.params.end()) if (it != content_disposition.params.end())
afn = it->second; filename = it->second;
if (doc.isMessageRFC822()) {
LOGDEB2(("walkmime: message/RFC822 part\n"));
// The first part is the already parsed message.
// Call processMsg instead of walkmime so tha mail headers get
// printed. The depth will tell it what to do
if (doc.members.empty()) {
//??
return;
}
out += "\n"; out += "\n";
if (m_forPreview) if (m_forPreview)
out += "[Attachment: "; out += "[" + dispindic + " " + content_type.value + ": ";
out += afn; out += filename;
if (m_forPreview) if (m_forPreview)
out += "]"; out += "]";
out += "\n\n"; out += "\n\n";
// Attachment: we're done with this part processMsg(docout, doc.members[0], depth+1);
return; return;
} }
// The only other disposition that interests us is "inline", and then // "Simple" part.
// this has to be plain text or html LOGDEB2(("walkmime: simple part\n"));
if (stringlowercmp("inline", content_disposition.value)) {
return; // If the Content-Disposition is not inline, we treat it as
} // attachment, as per rfc2183. We don't process attachments
if (stringlowercmp("text/plain", content_type.value) && // for now, except for indexing/displaying the file name
stringlowercmp("text/html", content_type.value)) { // If it is inline but not text or html, same thing.
if (stringlowercmp("inline", content_disposition.value) ||
(stringlowercmp("text/plain", content_type.value) &&
stringlowercmp("text/html", content_type.value)) ) {
if (!filename.empty()) {
out += "\n";
if (m_forPreview)
out += "[" + dispindic + " " + content_type.value + ": ";
out += filename;
if (m_forPreview)
out += "]";
out += "\n\n";
}
// We're done with this part
return; return;
} }
// We are dealing with an inline part of text/plain or text/html type
// Normally the default charset is us-ascii. But it happens that // Normally the default charset is us-ascii. But it happens that
// 8 bit chars exist in a message that is stated as us-ascii. Ie the // 8 bit chars exist in a message that is stated as us-ascii. Ie the
// mailer used by yahoo support ('KANA') does this. We could convert // mailer used by yahoo support ('KANA') does this. We could convert
// to iso-8859 only if the transfer-encoding is 8 bit, or test for // to iso-8859 only if the transfer-encoding is 8 bit, or test for
// actual 8 bit chars, but what the heck, le'ts use 8859-1 as default // actual 8 bit chars, but what the heck, le'ts use 8859-1 as default
string charset = "iso-8859-1"; string charset = "iso-8859-1";
map<string,string>::const_iterator it;
it = content_type.params.find(string("charset")); it = content_type.params.find(string("charset"));
if (it != content_type.params.end()) if (it != content_type.params.end())
charset = it->second; charset = it->second;
@ -415,5 +467,4 @@ void MimeHandlerMail::walkmime(string &out, Binc::MimePart& doc, int depth)
out += string("\r\n") + utf8; out += string("\r\n") + utf8;
LOGDEB2(("walkmime: out now: [%s]\n", out.c_str())); LOGDEB2(("walkmime: out now: [%s]\n", out.c_str()));
}
} }

View File

@ -16,7 +16,7 @@
*/ */
#ifndef _MAIL_H_INCLUDED_ #ifndef _MAIL_H_INCLUDED_
#define _MAIL_H_INCLUDED_ #define _MAIL_H_INCLUDED_
/* @(#$Id: mh_mail.h,v 1.7 2006-09-05 08:05:02 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: mh_mail.h,v 1.8 2006-09-19 14:30:39 dockes Exp $ (C) 2004 J.F.Dockes */
#include "mimehandler.h" #include "mimehandler.h"
@ -45,11 +45,11 @@ class MimeHandlerMail : public MimeHandler {
int m_msgnum; // Current message number in folder. Starts at 1 int m_msgnum; // Current message number in folder. Starts at 1
RclConfig *m_conf; // Keep pointer to rclconfig around RclConfig *m_conf; // Keep pointer to rclconfig around
MimeHandler::Status processone(const string &fn, Binc::MimeDocument& doc,
Rcl::Doc &docout);
MimeHandler::Status processmbox(const string &fn, Rcl::Doc &docout, MimeHandler::Status processmbox(const string &fn, Rcl::Doc &docout,
string &ipath); string &ipath);
void walkmime(string &out, Binc::MimePart& doc, int depth); MimeHandler::Status processMsg(Rcl::Doc &docout, Binc::MimePart& doc,
int depth);
void walkmime(Rcl::Doc &docout, Binc::MimePart& doc, int depth);
}; };
#endif /* _MAIL_H_INCLUDED_ */ #endif /* _MAIL_H_INCLUDED_ */