extract msgid + generate abstract at start of txt, excluding headers
This commit is contained in:
parent
5db229d492
commit
daae416d98
@ -52,6 +52,8 @@ static const string cstr_author = "author";
|
|||||||
static const string cstr_recipient = "recipient";
|
static const string cstr_recipient = "recipient";
|
||||||
static const string cstr_modificationdate = "modificationdate";
|
static const string cstr_modificationdate = "modificationdate";
|
||||||
static const string cstr_title = "title";
|
static const string cstr_title = "title";
|
||||||
|
static const string cstr_msgid = "msgid";
|
||||||
|
static const string cstr_abstract = "abstract";
|
||||||
|
|
||||||
MimeHandlerMail::~MimeHandlerMail()
|
MimeHandlerMail::~MimeHandlerMail()
|
||||||
{
|
{
|
||||||
@ -165,7 +167,11 @@ bool MimeHandlerMail::next_document()
|
|||||||
res = processMsg(m_bincdoc, 0);
|
res = processMsg(m_bincdoc, 0);
|
||||||
LOGDEB1(("MimeHandlerMail::next_document: mimetype %s\n",
|
LOGDEB1(("MimeHandlerMail::next_document: mimetype %s\n",
|
||||||
m_metaData[cstr_mimetype].c_str()));
|
m_metaData[cstr_mimetype].c_str()));
|
||||||
|
const string& txt = m_metaData[cstr_content];
|
||||||
|
if (m_startoftext < txt.size())
|
||||||
|
m_metaData[cstr_abstract] = txt.substr(m_startoftext, 250);
|
||||||
} else {
|
} else {
|
||||||
|
m_metaData[cstr_abstract] = "";
|
||||||
res = processAttach();
|
res = processAttach();
|
||||||
}
|
}
|
||||||
m_idx++;
|
m_idx++;
|
||||||
@ -313,6 +319,12 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
|||||||
m_metaData[cstr_recipient] += " " + transcoded;
|
m_metaData[cstr_recipient] += " " + transcoded;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (doc->h.getFirstHeader("Message-Id", hi)) {
|
||||||
|
if (depth == 1) {
|
||||||
|
m_metaData[cstr_msgid] = hi.getValue();
|
||||||
|
trimstring(m_metaData[cstr_msgid], "<>");
|
||||||
|
}
|
||||||
|
}
|
||||||
if (doc->h.getFirstHeader("Date", hi)) {
|
if (doc->h.getFirstHeader("Date", hi)) {
|
||||||
rfc2047_decode(hi.getValue(), transcoded);
|
rfc2047_decode(hi.getValue(), transcoded);
|
||||||
if (depth == 1) {
|
if (depth == 1) {
|
||||||
@ -337,7 +349,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
|||||||
text += string("Subject: ") + transcoded + string("\n");
|
text += string("Subject: ") + transcoded + string("\n");
|
||||||
}
|
}
|
||||||
text += '\n';
|
text += '\n';
|
||||||
|
m_startoftext = text.size();
|
||||||
LOGDEB2(("MimeHandlerMail::processMsg:ismultipart %d mime subtype '%s'\n",
|
LOGDEB2(("MimeHandlerMail::processMsg:ismultipart %d mime subtype '%s'\n",
|
||||||
doc->isMultipart(), doc->getSubType().c_str()));
|
doc->isMultipart(), doc->getSubType().c_str()));
|
||||||
walkmime(doc, depth);
|
walkmime(doc, depth);
|
||||||
|
|||||||
@ -37,7 +37,7 @@ class MHMailAttach;
|
|||||||
* file.
|
* file.
|
||||||
*/
|
*/
|
||||||
class MimeHandlerMail : public RecollFilter {
|
class MimeHandlerMail : public RecollFilter {
|
||||||
public:
|
public:
|
||||||
MimeHandlerMail(const string &mt)
|
MimeHandlerMail(const string &mt)
|
||||||
: RecollFilter(mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
|
: RecollFilter(mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
|
||||||
{}
|
{}
|
||||||
@ -53,15 +53,20 @@ class MimeHandlerMail : public RecollFilter {
|
|||||||
virtual bool skip_to_document(const string& ipath);
|
virtual bool skip_to_document(const string& ipath);
|
||||||
virtual void clear();
|
virtual void clear();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool processMsg(Binc::MimePart *doc, int depth);
|
bool processMsg(Binc::MimePart *doc, int depth);
|
||||||
void walkmime(Binc::MimePart* doc, int depth);
|
void walkmime(Binc::MimePart* doc, int depth);
|
||||||
bool processAttach();
|
bool processAttach();
|
||||||
Binc::MimeDocument *m_bincdoc;
|
Binc::MimeDocument *m_bincdoc;
|
||||||
int m_fd;
|
int m_fd;
|
||||||
std::stringstream *m_stream;
|
std::stringstream *m_stream;
|
||||||
int m_idx; // starts at -1 for self, then index into
|
|
||||||
// attachments;
|
// Current index in parts. starts at -1 for self, then index into
|
||||||
|
// attachments
|
||||||
|
int m_idx;
|
||||||
|
// Start of actual text (after the reprinted headers. This is for
|
||||||
|
// generating a semi-meaningful "abstract")
|
||||||
|
string::size_type m_startoftext;
|
||||||
string m_subject;
|
string m_subject;
|
||||||
vector<MHMailAttach *> m_attachments;
|
vector<MHMailAttach *> m_attachments;
|
||||||
};
|
};
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user