mail handler: remove header names when indexing to avoid articially increasing the frequency of ie, the "subject" term

This commit is contained in:
Jean-Francois Dockes 2011-06-27 18:38:44 +02:00
parent 36a97cb8aa
commit 5292a97de3
2 changed files with 19 additions and 5 deletions

View File

@ -52,6 +52,8 @@ static const string cstr_title = "title";
static const string cstr_msgid = "msgid"; static const string cstr_msgid = "msgid";
static const string cstr_abstract = "abstract"; static const string cstr_abstract = "abstract";
static const string cstr_newline = "\n";
MimeHandlerMail::MimeHandlerMail(RclConfig *cnf, const string &mt) MimeHandlerMail::MimeHandlerMail(RclConfig *cnf, const string &mt)
: RecollFilter(cnf, mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1) : RecollFilter(cnf, mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
{ {
@ -315,21 +317,27 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
string transcoded; string transcoded;
if (doc->h.getFirstHeader("From", hi)) { if (doc->h.getFirstHeader("From", hi)) {
rfc2047_decode(hi.getValue(), transcoded); rfc2047_decode(hi.getValue(), transcoded);
text += string("From: ") + transcoded + string("\n"); if (preview())
text += string("From: ");
text += transcoded + cstr_newline;
if (depth == 1) { if (depth == 1) {
m_metaData[cstr_author] = transcoded; m_metaData[cstr_author] = transcoded;
} }
} }
if (doc->h.getFirstHeader("To", hi)) { if (doc->h.getFirstHeader("To", hi)) {
rfc2047_decode(hi.getValue(), transcoded); rfc2047_decode(hi.getValue(), transcoded);
text += string("To: ") + transcoded + string("\n"); if (preview())
text += string("To: ");
text += transcoded + cstr_newline;
if (depth == 1) { if (depth == 1) {
m_metaData[cstr_recipient] = transcoded; m_metaData[cstr_recipient] = transcoded;
} }
} }
if (doc->h.getFirstHeader("Cc", hi)) { if (doc->h.getFirstHeader("Cc", hi)) {
rfc2047_decode(hi.getValue(), transcoded); rfc2047_decode(hi.getValue(), transcoded);
text += string("Cc: ") + transcoded + string("\n"); if (preview())
text += string("Cc: ");
text += transcoded + cstr_newline;
if (depth == 1) { if (depth == 1) {
m_metaData[cstr_recipient] += " " + transcoded; m_metaData[cstr_recipient] += " " + transcoded;
} }
@ -353,7 +361,9 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
LOGDEB(("rfc2822Date...: failed: [%s]\n", transcoded.c_str())); LOGDEB(("rfc2822Date...: failed: [%s]\n", transcoded.c_str()));
} }
} }
text += string("Date: ") + transcoded + string("\n"); if (preview())
text += string("Date: ");
text += transcoded + cstr_newline;
} }
if (doc->h.getFirstHeader("Subject", hi)) { if (doc->h.getFirstHeader("Subject", hi)) {
rfc2047_decode(hi.getValue(), transcoded); rfc2047_decode(hi.getValue(), transcoded);
@ -361,7 +371,9 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
m_metaData[cstr_title] = transcoded; m_metaData[cstr_title] = transcoded;
m_subject = transcoded; m_subject = transcoded;
} }
text += string("Subject: ") + transcoded + string("\n"); if (preview())
text += string("Subject: ");
text += transcoded + cstr_newline;
} }
// Check for the presence of configured additional headers and possibly // Check for the presence of configured additional headers and possibly

View File

@ -93,6 +93,8 @@ public:
} }
protected: protected:
bool preview() {return m_forPreview;}
RclConfig *m_config; RclConfig *m_config;
bool m_forPreview; bool m_forPreview;
string m_dfltInputCharset; string m_dfltInputCharset;