simplified the mbox-reading code

This commit is contained in:
Jean-Francois Dockes 2010-11-30 15:21:44 +01:00
parent 629e62e2b8
commit 084740cd2b

View File

@ -373,6 +373,8 @@ bool MimeHandlerMbox::next_document()
} }
LOGDEB0(("MimeHandlerMbox::next_document: fn %s, msgnum %d mtarg %d \n", LOGDEB0(("MimeHandlerMbox::next_document: fn %s, msgnum %d mtarg %d \n",
m_fn.c_str(), m_msgnum, mtarg)); m_fn.c_str(), m_msgnum, mtarg));
if (mtarg == 0)
mtarg = -1;
if (!regcompiled) { if (!regcompiled) {
regcomp(&fromregex, frompat, REG_NOSUB|REG_EXTENDED); regcomp(&fromregex, frompat, REG_NOSUB|REG_EXTENDED);
@ -406,90 +408,66 @@ bool MimeHandlerMbox::next_document()
} }
} }
off_t start, end; off_t message_end = 0;
bool iseof = false; bool iseof = false;
bool hademptyline = true; bool hademptyline = true;
string& msgtxt = m_metaData["content"]; string& msgtxt = m_metaData["content"];
msgtxt.erase(); msgtxt.erase();
do { line_type line;
// Look for next 'From ' Line, start of message. Set start to for (;;) {
// line after this message_end = ftello(fp);
line_type line; if (!fgets(line, LL, fp)) {
for (;;) { LOGDEB2(("MimeHandlerMbox:next: eof\n"));
mbhoff_type off_From = ftello(fp); iseof = true;
if (!fgets(line, LL, fp)) { m_msgnum++;
// Eof hit while looking for 'From ' -> file done. We'd need break;
// another return code here
LOGDEB2(("MimeHandlerMbox:next: hit eof while looking for "
"start From_ line\n"));
return false;
}
m_lineno++;
int ll;
stripendnl(line, ll);
LOGDEB2(("Start: hadempty %d lineno %d ll %d Line: [%s]\n",
hademptyline, m_lineno, ll, line));
if (ll <= 0) {
hademptyline = true;
continue;
}
// Non empty line. If the previous one was empty, check regex
if (hademptyline) {
// Tbird sometimes omits the empty line, so avoid resetting
// state (initially true) and hope for the best
if (!(m_quirks & MBOXQUIRK_TBIRD))
hademptyline = false;
if (!regexec(&fromregex, line, 0, 0, 0)) {
LOGDEB0(("MimeHandlerMbox: msgnum %d, "
"From_ at line %d: [%s]\n", m_msgnum, m_lineno, line));
start = ftello(fp);
m_offsets.push_back(off_From);
m_msgnum++;
break;
}
}
} }
m_lineno++;
// Look for next 'From ' line or eof, end of message. int ll;
for (;;) { stripendnl(line, ll);
end = ftello(fp); LOGDEB2(("mhmbox:next: hadempty %d lineno %d ll %d Line: [%s]\n",
if (!fgets(line, LL, fp)) { hademptyline, m_lineno, ll, line));
if (ferror(fp) || feof(fp)) if (hademptyline) {
iseof = true; if (ll > 0) {
break; // Non-empty line with empty line flag set, reset flag
} // and check regex.
m_lineno++; if (!(m_quirks & MBOXQUIRK_TBIRD)) {
int ll; // Tbird sometimes omits the empty line, so avoid
stripendnl(line, ll); // resetting state (initially true) and hope for
LOGDEB2(("End: hadempty %d ll %d Line: [%s]\n", // the best
hademptyline, ll, line)); hademptyline = false;
if (hademptyline) { }
if (ll > 0) { if (!regexec(&fromregex, line, 0, 0, 0)) {
if (!(m_quirks & MBOXQUIRK_TBIRD)) LOGDEB1(("MimeHandlerMbox: msgnum %d, "
hademptyline = false; "From_ at line %d: [%s]\n", m_msgnum, m_lineno, line));
if (!regexec(&fromregex, line, 0, 0, 0)) { m_offsets.push_back(message_end);
// Rewind to start of "From " line m_msgnum++;
fseek(fp, end, SEEK_SET); if ((mtarg <= 0 && m_msgnum > 1) ||
m_lineno--; (mtarg > 0 && m_msgnum > mtarg)) {
hademptyline = true; // Got message, go do something with it
break; break;
} }
// From_ lines are not part of messages
continue;
} }
} else if (ll <= 0) {
hademptyline = true;
}
if (mtarg <= 0 || m_msgnum == mtarg) {
line[ll] = '\n';
line[ll+1] = 0;
msgtxt += line;
} }
} else if (ll <= 0) {
hademptyline = true;
} }
} while (mtarg > 0 && m_msgnum < mtarg); if (mtarg <= 0 || m_msgnum == mtarg) {
// Accumulate message lines
LOGDEB1(("Message text: [%s]\n", msgtxt.c_str())); line[ll] = '\n';
line[ll+1] = 0;
msgtxt += line;
}
}
LOGDEB2(("Message text length %d\n", msgtxt.size()));
LOGDEB2(("Message text: [%s]\n", msgtxt.c_str()));
char buf[20]; char buf[20];
sprintf(buf, "%d", m_msgnum); // m_msgnum was incremented when hitting the next From_ or eof, so the data
// is for m_msgnum - 1
sprintf(buf, "%d", m_msgnum - 1);
m_metaData["ipath"] = buf; m_metaData["ipath"] = buf;
m_metaData["mimetype"] = "message/rfc822"; m_metaData["mimetype"] = "message/rfc822";
if (iseof) { if (iseof) {