extract msgid + generate abstract at start of txt, excluding headers

This commit is contained in:
dockes 2009-10-31 09:00:31 +00:00
parent 5db229d492
commit daae416d98
2 changed files with 22 additions and 5 deletions

View File

@ -52,6 +52,8 @@ static const string cstr_author = "author";
static const string cstr_recipient = "recipient";
static const string cstr_modificationdate = "modificationdate";
static const string cstr_title = "title";
static const string cstr_msgid = "msgid";
static const string cstr_abstract = "abstract";
MimeHandlerMail::~MimeHandlerMail()
{
@ -165,7 +167,11 @@ bool MimeHandlerMail::next_document()
res = processMsg(m_bincdoc, 0);
LOGDEB1(("MimeHandlerMail::next_document: mimetype %s\n",
m_metaData[cstr_mimetype].c_str()));
const string& txt = m_metaData[cstr_content];
if (m_startoftext < txt.size())
m_metaData[cstr_abstract] = txt.substr(m_startoftext, 250);
} else {
m_metaData[cstr_abstract] = "";
res = processAttach();
}
m_idx++;
@ -313,6 +319,12 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
m_metaData[cstr_recipient] += " " + transcoded;
}
}
if (doc->h.getFirstHeader("Message-Id", hi)) {
if (depth == 1) {
m_metaData[cstr_msgid] = hi.getValue();
trimstring(m_metaData[cstr_msgid], "<>");
}
}
if (doc->h.getFirstHeader("Date", hi)) {
rfc2047_decode(hi.getValue(), transcoded);
if (depth == 1) {
@ -337,7 +349,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
text += string("Subject: ") + transcoded + string("\n");
}
text += '\n';
m_startoftext = text.size();
LOGDEB2(("MimeHandlerMail::processMsg:ismultipart %d mime subtype '%s'\n",
doc->isMultipart(), doc->getSubType().c_str()));
walkmime(doc, depth);

View File

@ -37,7 +37,7 @@ class MHMailAttach;
* file.
*/
class MimeHandlerMail : public RecollFilter {
public:
public:
MimeHandlerMail(const string &mt)
: RecollFilter(mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
{}
@ -53,15 +53,20 @@ class MimeHandlerMail : public RecollFilter {
virtual bool skip_to_document(const string& ipath);
virtual void clear();
private:
private:
bool processMsg(Binc::MimePart *doc, int depth);
void walkmime(Binc::MimePart* doc, int depth);
bool processAttach();
Binc::MimeDocument *m_bincdoc;
int m_fd;
std::stringstream *m_stream;
int m_idx; // starts at -1 for self, then index into
// attachments;
// Current index in parts. starts at -1 for self, then index into
// attachments
int m_idx;
// Start of actual text (after the reprinted headers. This is for
// generating a semi-meaningful "abstract")
string::size_type m_startoftext;
string m_subject;
vector<MHMailAttach *> m_attachments;
};