Support From "bla bla" (quoted) From lines
This commit is contained in:
parent
ef7c432841
commit
d5c80cdf16
@ -129,22 +129,22 @@ static inline void stripendnl(line_type& line, int& ll)
|
|||||||
// in "Date: " header format, like: From - Mon, 8 May 2006 10:57:32
|
// in "Date: " header format, like: From - Mon, 8 May 2006 10:57:32
|
||||||
// This was added as an alternative format. By the way it also fools "mail" and
|
// This was added as an alternative format. By the way it also fools "mail" and
|
||||||
// emacs-vm, Recoll is not alone
|
// emacs-vm, Recoll is not alone
|
||||||
//
|
// Update: 2009-11-27: word after From may be quoted string: From "john bull"
|
||||||
static const char *frompat =
|
static const char *frompat =
|
||||||
#if 0 //1.9.0
|
#if 0 //1.9.0
|
||||||
"^From .* [1-2][0-9][0-9][0-9]$";
|
"^From .* [1-2][0-9][0-9][0-9]$";
|
||||||
#endif
|
#endif
|
||||||
#if 1
|
#if 1
|
||||||
"^From[ ]+[^ ]+[ ]+" // From whatever
|
"^From[ ]+([^ ]+|\"[^\"]+\")[ ]+" // 'From (toto@tutu|"john bull") '
|
||||||
"[[:alpha:]]{3}[ ]+[[:alpha:]]{3}[ ]+[0-3 ][0-9][ ]+" // Date
|
"[[:alpha:]]{3}[ ]+[[:alpha:]]{3}[ ]+[0-3 ][0-9][ ]+" // Fri Oct 26
|
||||||
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?[ ]+" // Time, seconds optional
|
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?[ ]+" // Time, seconds optional
|
||||||
"([^ ]+[ ]+)?" // Optional tz
|
"([^ ]+[ ]+)?" // Optional tz
|
||||||
"[12][0-9][0-9][0-9]" // Year, unanchored, more data may follow
|
"[12][0-9][0-9][0-9]" // Year, unanchored, more data may follow
|
||||||
"|" // Or standard mail Date: header format
|
"|" // Or standard mail Date: header format
|
||||||
"^From[ ]+[^ ]+[ ]+" // From toto@tutu
|
"^From[ ]+[^ ]+[ ]+" // From toto@tutu
|
||||||
"[[:alpha:]]{3},[ ]+[0-3]?[0-9][ ]+[[:alpha:]]{3}[ ]+" // Date Mon, 8 May
|
"[[:alpha:]]{3},[ ]+[0-3]?[0-9][ ]+[[:alpha:]]{3}[ ]+" // Mon, 8 May
|
||||||
"[12][0-9][0-9][0-9][ ]+" // Year
|
"[12][0-9][0-9][0-9][ ]+" // Year
|
||||||
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?" // Time, secs optional: 10:57(:32)?
|
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?" // Time, secs optional
|
||||||
;
|
;
|
||||||
#endif
|
#endif
|
||||||
// "([ ]+[-+][0-9]{4})?$"
|
// "([ ]+[-+][0-9]{4})?$"
|
||||||
@ -335,6 +335,15 @@ int main(int argc, char **argv)
|
|||||||
cerr << "next_document failed" << endl;
|
cerr << "next_document failed" << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
map<string, string>::const_iterator it =
|
||||||
|
mh.get_meta_data().find("content");
|
||||||
|
int size;
|
||||||
|
if (it == mh.get_meta_data().end()) {
|
||||||
|
size = -1;
|
||||||
|
} else {
|
||||||
|
size = it->second.length();
|
||||||
|
}
|
||||||
|
cout << "Doc " << docnt << " size " << size << endl;
|
||||||
docnt++;
|
docnt++;
|
||||||
}
|
}
|
||||||
cout << docnt << " documents found in " << filename << endl;
|
cout << docnt << " documents found in " << filename << endl;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user