add quirks flag to handle bad thunderbird mbox formats
This commit is contained in:
parent
6c03417195
commit
f10e14658f
@ -225,6 +225,7 @@ private:
|
|||||||
|
|
||||||
const size_t MboxCache::o_b1size = 1024;
|
const size_t MboxCache::o_b1size = 1024;
|
||||||
static class MboxCache mcache;
|
static class MboxCache mcache;
|
||||||
|
static const string keyquirks("mhmboxquirks");
|
||||||
|
|
||||||
MimeHandlerMbox::~MimeHandlerMbox()
|
MimeHandlerMbox::~MimeHandlerMbox()
|
||||||
{
|
{
|
||||||
@ -265,6 +266,18 @@ bool MimeHandlerMbox::set_document_file(const string &fn)
|
|||||||
fseek((FILE*)m_vfp, 0, SEEK_SET);
|
fseek((FILE*)m_vfp, 0, SEEK_SET);
|
||||||
m_havedoc = true;
|
m_havedoc = true;
|
||||||
m_offsets.clear();
|
m_offsets.clear();
|
||||||
|
m_quirks = 0;
|
||||||
|
|
||||||
|
// Check for location-based quirks:
|
||||||
|
RclConfig *config = RclConfig::getMainConfig();
|
||||||
|
string quirks;
|
||||||
|
if (config && config->getConfParam(keyquirks, quirks)) {
|
||||||
|
if (quirks == "tbird") {
|
||||||
|
LOGDEB(("MimeHandlerMbox: setting quirks TBIRD\n"));
|
||||||
|
m_quirks |= MBOXQUIRK_TBIRD;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -325,10 +338,6 @@ static inline void stripendnl(line_type& line, int& ll)
|
|||||||
// emacs-vm, Recoll is not alone
|
// emacs-vm, Recoll is not alone
|
||||||
// Update: 2009-11-27: word after From may be quoted string: From "john bull"
|
// Update: 2009-11-27: word after From may be quoted string: From "john bull"
|
||||||
static const char *frompat =
|
static const char *frompat =
|
||||||
#if 0 //1.9.0
|
|
||||||
"^From .* [1-2][0-9][0-9][0-9]$";
|
|
||||||
#endif
|
|
||||||
#if 1
|
|
||||||
"^From[ ]+([^ ]+|\"[^\"]+\")[ ]+" // 'From (toto@tutu|"john bull") '
|
"^From[ ]+([^ ]+|\"[^\"]+\")[ ]+" // 'From (toto@tutu|"john bull") '
|
||||||
"[[:alpha:]]{3}[ ]+[[:alpha:]]{3}[ ]+[0-3 ][0-9][ ]+" // Fri Oct 26
|
"[[:alpha:]]{3}[ ]+[[:alpha:]]{3}[ ]+[0-3 ][0-9][ ]+" // Fri Oct 26
|
||||||
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?[ ]+" // Time, seconds optional
|
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?[ ]+" // Time, seconds optional
|
||||||
@ -340,8 +349,7 @@ static const char *frompat =
|
|||||||
"[12][0-9][0-9][0-9][ ]+" // Year
|
"[12][0-9][0-9][0-9][ ]+" // Year
|
||||||
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?" // Time, secs optional
|
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?" // Time, secs optional
|
||||||
;
|
;
|
||||||
#endif
|
|
||||||
// "([ ]+[-+][0-9]{4})?$"
|
|
||||||
static regex_t fromregex;
|
static regex_t fromregex;
|
||||||
static bool regcompiled;
|
static bool regcompiled;
|
||||||
|
|
||||||
@ -425,15 +433,21 @@ bool MimeHandlerMbox::next_document()
|
|||||||
hademptyline = true;
|
hademptyline = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (hademptyline && !regexec(&fromregex, line, 0, 0, 0)) {
|
// Non empty line. If the previous one was empty, check regex
|
||||||
LOGDEB0(("MimeHandlerMbox: msgnum %d, From_ at line %d: [%s]\n",
|
if (hademptyline) {
|
||||||
m_msgnum, m_lineno, line));
|
// Tbird sometimes omits the empty line, so avoid resetting
|
||||||
start = ftello(fp);
|
// state (initially true) and hope for the best
|
||||||
m_offsets.push_back(off_From);
|
if (!(m_quirks & MBOXQUIRK_TBIRD))
|
||||||
m_msgnum++;
|
hademptyline = false;
|
||||||
break;
|
if (!regexec(&fromregex, line, 0, 0, 0)) {
|
||||||
|
LOGDEB0(("MimeHandlerMbox: msgnum %d, "
|
||||||
|
"From_ at line %d: [%s]\n", m_msgnum, m_lineno, line));
|
||||||
|
start = ftello(fp);
|
||||||
|
m_offsets.push_back(off_From);
|
||||||
|
m_msgnum++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
hademptyline = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Look for next 'From ' line or eof, end of message.
|
// Look for next 'From ' line or eof, end of message.
|
||||||
@ -449,22 +463,25 @@ bool MimeHandlerMbox::next_document()
|
|||||||
stripendnl(line, ll);
|
stripendnl(line, ll);
|
||||||
LOGDEB2(("End: hadempty %d ll %d Line: [%s]\n",
|
LOGDEB2(("End: hadempty %d ll %d Line: [%s]\n",
|
||||||
hademptyline, ll, line));
|
hademptyline, ll, line));
|
||||||
if (hademptyline && !regexec(&fromregex, line, 0, 0, 0)) {
|
if (hademptyline) {
|
||||||
// Rewind to start of "From " line
|
if (ll > 0) {
|
||||||
fseek(fp, end, SEEK_SET);
|
if (!(m_quirks & MBOXQUIRK_TBIRD))
|
||||||
m_lineno--;
|
hademptyline = false;
|
||||||
break;
|
if (!regexec(&fromregex, line, 0, 0, 0)) {
|
||||||
|
// Rewind to start of "From " line
|
||||||
|
fseek(fp, end, SEEK_SET);
|
||||||
|
m_lineno--;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (ll <= 0) {
|
||||||
|
hademptyline = true;
|
||||||
}
|
}
|
||||||
if (mtarg <= 0 || m_msgnum == mtarg) {
|
if (mtarg <= 0 || m_msgnum == mtarg) {
|
||||||
line[ll] = '\n';
|
line[ll] = '\n';
|
||||||
line[ll+1] = 0;
|
line[ll+1] = 0;
|
||||||
msgtxt += line;
|
msgtxt += line;
|
||||||
}
|
}
|
||||||
if (ll <= 0) {
|
|
||||||
hademptyline = true;
|
|
||||||
} else {
|
|
||||||
hademptyline = false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} while (mtarg > 0 && m_msgnum < mtarg);
|
} while (mtarg > 0 && m_msgnum < mtarg);
|
||||||
|
|||||||
@ -52,6 +52,8 @@ class MimeHandlerMbox : public RecollFilter {
|
|||||||
int m_lineno; // debug
|
int m_lineno; // debug
|
||||||
mbhoff_type m_fsize;
|
mbhoff_type m_fsize;
|
||||||
vector<mbhoff_type> m_offsets;
|
vector<mbhoff_type> m_offsets;
|
||||||
|
enum Quirks {MBOXQUIRK_TBIRD=1};
|
||||||
|
int m_quirks;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _MBOX_H_INCLUDED_ */
|
#endif /* _MBOX_H_INCLUDED_ */
|
||||||
|
|||||||
@ -17,7 +17,8 @@ topdirs = ~
|
|||||||
# ".*" in there (as was the case with an older sample config)
|
# ".*" in there (as was the case with an older sample config)
|
||||||
# These are simple names, not paths (must contain no / )
|
# These are simple names, not paths (must contain no / )
|
||||||
skippedNames = #* bin CVS Cache cache* caughtspam tmp .thumbnails .svn \
|
skippedNames = #* bin CVS Cache cache* caughtspam tmp .thumbnails .svn \
|
||||||
*~ recollrc .beagle .git .hg .bzr loop.ps
|
*~ .beagle .git .hg .bzr loop.ps .xsession-errors \
|
||||||
|
.recoll* xapiandb recollrc recoll.conf
|
||||||
|
|
||||||
# Wildcard expressions for paths we shouldn't go into. The database and
|
# Wildcard expressions for paths we shouldn't go into. The database and
|
||||||
# configuration directories will be added in there, else the default value
|
# configuration directories will be added in there, else the default value
|
||||||
@ -170,3 +171,7 @@ webcachemaxmbs = 40
|
|||||||
# used for application selection inside mimeview
|
# used for application selection inside mimeview
|
||||||
#[/some/app/directory]
|
#[/some/app/directory]
|
||||||
#localfields = rclaptg = someapp; otherfield = somevalue
|
#localfields = rclaptg = someapp; otherfield = somevalue
|
||||||
|
|
||||||
|
# Enable thunderbird mbox format quirks where appropriate
|
||||||
|
[~/.thunderbird]
|
||||||
|
mhmboxquirks = tbird
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user