test data indexing result same terms as 1.6.3
This commit is contained in:
parent
33c95ef1ba
commit
229eb0de78
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.46 2006-12-14 13:53:43 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.47 2006-12-15 16:33:15 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -426,7 +426,7 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||||||
Rcl::Doc fileDoc;
|
Rcl::Doc fileDoc;
|
||||||
fileDoc.fmtime = doc.fmtime;
|
fileDoc.fmtime = doc.fmtime;
|
||||||
fileDoc.utf8fn = doc.utf8fn;
|
fileDoc.utf8fn = doc.utf8fn;
|
||||||
fileDoc.mimetype = doc.mimetype;
|
fileDoc.mimetype = interner.get_mimetype();
|
||||||
if (!m_db.add(fn, fileDoc, stp))
|
if (!m_db.add(fn, fileDoc, stp))
|
||||||
return FsTreeWalker::FtwError;
|
return FsTreeWalker::FtwError;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -51,7 +51,7 @@ namespace Dijon
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/// Builds an empty filter.
|
/// Builds an empty filter.
|
||||||
Filter(const std::string &mime_type) {}
|
Filter(const std::string & /*mime_type */) {}
|
||||||
/// Destroys the filter.
|
/// Destroys the filter.
|
||||||
virtual ~Filter() {}
|
virtual ~Filter() {}
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.19 2006-12-15 12:40:02 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.20 2006-12-15 16:33:15 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -149,6 +149,7 @@ FileInterner::FileInterner(const std::string &f, RclConfig *cnf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Look for appropriate handler (might still return empty)
|
// Look for appropriate handler (might still return empty)
|
||||||
|
m_mimetype = l_mime;
|
||||||
Dijon::Filter *df = getMimeHandler(l_mime, m_cfg);
|
Dijon::Filter *df = getMimeHandler(l_mime, m_cfg);
|
||||||
|
|
||||||
if (!df) {
|
if (!df) {
|
||||||
@ -172,6 +173,66 @@ FileInterner::FileInterner(const std::string &f, RclConfig *cnf,
|
|||||||
m_fn.c_str()));
|
m_fn.c_str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FileInterner::~FileInterner()
|
||||||
|
{
|
||||||
|
while (!m_handlers.empty()) {
|
||||||
|
delete m_handlers.back();
|
||||||
|
m_handlers.pop_back();
|
||||||
|
}
|
||||||
|
tmpcleanup();
|
||||||
|
}
|
||||||
|
|
||||||
|
static const string string_empty;
|
||||||
|
static const string get_mimetype(Dijon::Filter* df)
|
||||||
|
{
|
||||||
|
const std::map<std::string, std::string> *docdata = &df->get_meta_data();
|
||||||
|
map<string,string>::const_iterator it;
|
||||||
|
it = docdata->find("mimetype");
|
||||||
|
if (it != docdata->end()) {
|
||||||
|
return it->second;
|
||||||
|
} else {
|
||||||
|
return string_empty;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
||||||
|
{
|
||||||
|
Dijon::Filter *df = m_handlers.back();
|
||||||
|
const std::map<std::string, std::string> *docdata = &df->get_meta_data();
|
||||||
|
map<string,string>::const_iterator it;
|
||||||
|
|
||||||
|
it = docdata->find("origcharset");
|
||||||
|
if (it != docdata->end())
|
||||||
|
doc.origcharset = it->second;
|
||||||
|
|
||||||
|
it = docdata->find("content");
|
||||||
|
if (it != docdata->end())
|
||||||
|
doc.text = it->second;
|
||||||
|
|
||||||
|
it = docdata->find("title");
|
||||||
|
if (it != docdata->end())
|
||||||
|
doc.title = it->second;
|
||||||
|
|
||||||
|
it = docdata->find("keywords");
|
||||||
|
if (it != docdata->end())
|
||||||
|
doc.keywords = it->second;
|
||||||
|
|
||||||
|
it = docdata->find("modificationdate");
|
||||||
|
if (it != docdata->end())
|
||||||
|
doc.dmtime = it->second;
|
||||||
|
|
||||||
|
it = docdata->find("abstract");
|
||||||
|
if (it != docdata->end()) {
|
||||||
|
doc.abstract = it->second;
|
||||||
|
} else {
|
||||||
|
it = docdata->find("sample");
|
||||||
|
if (it != docdata->end())
|
||||||
|
doc.abstract = it->second;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static const unsigned int MAXHANDLERS = 20;
|
static const unsigned int MAXHANDLERS = 20;
|
||||||
|
|
||||||
FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
||||||
@ -182,8 +243,11 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
|||||||
return FIError;
|
return FIError;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ipath vector.
|
||||||
// Note that the vector is big enough for the maximum stack. All values
|
// Note that the vector is big enough for the maximum stack. All values
|
||||||
// over the last significant one are ""
|
// over the last significant one are ""
|
||||||
|
// We set the ipath for the first handler here, others are set
|
||||||
|
// when they're pushed on the stack
|
||||||
vector<string> vipath(MAXHANDLERS);
|
vector<string> vipath(MAXHANDLERS);
|
||||||
int vipathidx = 0;
|
int vipathidx = 0;
|
||||||
if (!ipath.empty()) {
|
if (!ipath.empty()) {
|
||||||
@ -196,12 +260,8 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Try to get doc from the topmost filter */
|
/* Try to get doc from the topmost filter */
|
||||||
while (!m_handlers.empty()) {
|
while (!m_handlers.empty()) {
|
||||||
if (!vipath.empty()) {
|
|
||||||
|
|
||||||
}
|
|
||||||
if (!m_handlers.back()->has_documents()) {
|
if (!m_handlers.back()->has_documents()) {
|
||||||
// No docs at the current top level. Pop and see if there
|
// No docs at the current top level. Pop and see if there
|
||||||
// is something at the previous one
|
// is something at the previous one
|
||||||
@ -277,23 +337,42 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
|||||||
LOGERR(("FileInterner::internfile: stack empty\n"));
|
LOGERR(("FileInterner::internfile: stack empty\n"));
|
||||||
return FIError;
|
return FIError;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If indexing, we have to collect the ipath stack.
|
||||||
|
|
||||||
|
// While we're at it, we also set the mimetype, which is a special
|
||||||
|
// property:we want to get it from the topmost doc
|
||||||
|
// with an ipath, not the last one which is always text/html
|
||||||
|
// Note that ipath is returned through the parameter not doc.ipath
|
||||||
if (!m_forPreview) {
|
if (!m_forPreview) {
|
||||||
string &ipath = doc.ipath;
|
|
||||||
bool hasipath = false;
|
bool hasipath = false;
|
||||||
for (vector<Dijon::Filter*>::const_iterator it = m_handlers.begin();
|
doc.mimetype = m_mimetype;
|
||||||
it != m_handlers.end(); it++) {
|
LOGDEB2(("INITIAL mimetype: %s\n", doc.mimetype.c_str()));
|
||||||
map<string,string>::const_iterator iti =
|
map<string,string>::const_iterator titi;
|
||||||
(*it)->get_meta_data().find("ipath");
|
|
||||||
if (iti != (*it)->get_meta_data().end()) {
|
for (vector<Dijon::Filter*>::const_iterator hit = m_handlers.begin();
|
||||||
if (!iti->second.empty())
|
hit != m_handlers.end(); hit++) {
|
||||||
|
|
||||||
|
const map<string, string>& docdata = (*hit)->get_meta_data();
|
||||||
|
map<string, string>::const_iterator iti = docdata.find("ipath");
|
||||||
|
|
||||||
|
if (iti != docdata.end()) {
|
||||||
|
if (!iti->second.empty()) {
|
||||||
|
// We have a non-empty ipath
|
||||||
hasipath = true;
|
hasipath = true;
|
||||||
|
titi = docdata.find("mimetype");
|
||||||
|
if (titi != docdata.end())
|
||||||
|
doc.mimetype = titi->second;
|
||||||
|
}
|
||||||
ipath += iti->second + "|";
|
ipath += iti->second + "|";
|
||||||
} else {
|
} else {
|
||||||
ipath += "|";
|
ipath += "|";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Walk done, transform the list into a string
|
||||||
if (hasipath) {
|
if (hasipath) {
|
||||||
LOGDEB(("IPATH [%s]\n", ipath.c_str()));
|
LOGDEB2(("IPATH [%s]\n", ipath.c_str()));
|
||||||
string::size_type sit = ipath.find_last_not_of("|");
|
string::size_type sit = ipath.find_last_not_of("|");
|
||||||
if (sit == string::npos)
|
if (sit == string::npos)
|
||||||
ipath.erase();
|
ipath.erase();
|
||||||
@ -304,7 +383,7 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dijontorcl(m_handlers.back(), doc);
|
dijontorcl(doc);
|
||||||
|
|
||||||
// Destack what can be
|
// Destack what can be
|
||||||
while (!m_handlers.empty() && !m_handlers.back()->has_documents()) {
|
while (!m_handlers.empty() && !m_handlers.back()->has_documents()) {
|
||||||
@ -317,56 +396,6 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
|||||||
return FIAgain;
|
return FIAgain;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool FileInterner::dijontorcl(Dijon::Filter *df, Rcl::Doc& doc)
|
|
||||||
{
|
|
||||||
const std::map<std::string, std::string> *docdata = &df->get_meta_data();
|
|
||||||
map<string,string>::const_iterator it;
|
|
||||||
|
|
||||||
it = docdata->find("mimetype");
|
|
||||||
if (it != docdata->end())
|
|
||||||
doc.mimetype = it->second;
|
|
||||||
|
|
||||||
it = docdata->find("origcharset");
|
|
||||||
if (it != docdata->end())
|
|
||||||
doc.origcharset = it->second;
|
|
||||||
|
|
||||||
it = docdata->find("content");
|
|
||||||
if (it != docdata->end())
|
|
||||||
doc.text = it->second;
|
|
||||||
|
|
||||||
it = docdata->find("title");
|
|
||||||
if (it != docdata->end())
|
|
||||||
doc.title = it->second;
|
|
||||||
|
|
||||||
it = docdata->find("keywords");
|
|
||||||
if (it != docdata->end())
|
|
||||||
doc.keywords = it->second;
|
|
||||||
|
|
||||||
it = docdata->find("modificationdate");
|
|
||||||
if (it != docdata->end())
|
|
||||||
doc.dmtime = it->second;
|
|
||||||
|
|
||||||
it = docdata->find("abstract");
|
|
||||||
if (it != docdata->end()) {
|
|
||||||
doc.abstract = it->second;
|
|
||||||
} else {
|
|
||||||
it = docdata->find("sample");
|
|
||||||
if (it != docdata->end())
|
|
||||||
doc.abstract = it->second;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
FileInterner::~FileInterner()
|
|
||||||
{
|
|
||||||
while (!m_handlers.empty()) {
|
|
||||||
delete m_handlers.back();
|
|
||||||
m_handlers.pop_back();
|
|
||||||
}
|
|
||||||
tmpcleanup();
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _INTERNFILE_H_INCLUDED_
|
#ifndef _INTERNFILE_H_INCLUDED_
|
||||||
#define _INTERNFILE_H_INCLUDED_
|
#define _INTERNFILE_H_INCLUDED_
|
||||||
/* @(#$Id: internfile.h,v 1.7 2006-12-15 12:40:02 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: internfile.h,v 1.8 2006-12-15 16:33:15 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@ -70,10 +70,12 @@ class FileInterner {
|
|||||||
* should be called again to get the following one(s).
|
* should be called again to get the following one(s).
|
||||||
*/
|
*/
|
||||||
Status internfile(Rcl::Doc& doc, string &ipath);
|
Status internfile(Rcl::Doc& doc, string &ipath);
|
||||||
|
const string& get_mimetype() {return m_mimetype;}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
RclConfig *m_cfg;
|
RclConfig *m_cfg;
|
||||||
string m_fn;
|
string m_fn;
|
||||||
|
string m_mimetype; // Mime type for [uncompressed] file
|
||||||
bool m_forPreview;
|
bool m_forPreview;
|
||||||
// m_tdir and m_tfile are used only for decompressing input file if needed
|
// m_tdir and m_tfile are used only for decompressing input file if needed
|
||||||
const string& m_tdir;
|
const string& m_tdir;
|
||||||
@ -81,7 +83,7 @@ class FileInterner {
|
|||||||
vector<Dijon::Filter*> m_handlers;
|
vector<Dijon::Filter*> m_handlers;
|
||||||
|
|
||||||
void tmpcleanup();
|
void tmpcleanup();
|
||||||
static bool dijontorcl(Dijon::Filter *, Rcl::Doc&);
|
bool dijontorcl(Rcl::Doc&);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _INTERNFILE_H_INCLUDED_ */
|
#endif /* _INTERNFILE_H_INCLUDED_ */
|
||||||
|
|||||||
@ -64,8 +64,8 @@ bool MimeHandlerHtml::next_document()
|
|||||||
if (m_havedoc == false)
|
if (m_havedoc == false)
|
||||||
return false;
|
return false;
|
||||||
m_havedoc = false;
|
m_havedoc = false;
|
||||||
LOGDEB(("textHtmlToDoc: next_document\n"));
|
|
||||||
string charset = m_defcharset;
|
string charset = m_defcharset;
|
||||||
|
LOGDEB(("textHtmlToDoc: next_document. defcharset: %s\n",charset.c_str()));
|
||||||
|
|
||||||
// - We first try to convert from the default configured charset
|
// - We first try to convert from the default configured charset
|
||||||
// (which may depend of the current directory) to utf-8. If this
|
// (which may depend of the current directory) to utf-8. If this
|
||||||
@ -76,10 +76,11 @@ bool MimeHandlerHtml::next_document()
|
|||||||
LOGDEB(("textHtmlToDoc: charset before parsing: [%s]\n", charset.c_str()));
|
LOGDEB(("textHtmlToDoc: charset before parsing: [%s]\n", charset.c_str()));
|
||||||
|
|
||||||
|
|
||||||
MyHtmlParser p(m_metaData["content"]);
|
MyHtmlParser result;
|
||||||
for (int pass = 0; pass < 2; pass++) {
|
for (int pass = 0; pass < 2; pass++) {
|
||||||
string transcoded;
|
string transcoded;
|
||||||
LOGDEB(("Html::mkDoc: pass %d\n", pass));
|
LOGDEB(("Html::mkDoc: pass %d\n", pass));
|
||||||
|
MyHtmlParser p;
|
||||||
// Try transcoding. If it fails, use original text.
|
// Try transcoding. If it fails, use original text.
|
||||||
if (!transcode(m_html, transcoded, charset, "UTF-8")) {
|
if (!transcode(m_html, transcoded, charset, "UTF-8")) {
|
||||||
LOGERR(("textHtmlToDoc: transcode failed from cs '%s' to UTF-8\n",
|
LOGERR(("textHtmlToDoc: transcode failed from cs '%s' to UTF-8\n",
|
||||||
@ -97,16 +98,18 @@ bool MimeHandlerHtml::next_document()
|
|||||||
try {
|
try {
|
||||||
p.parse_html(transcoded);
|
p.parse_html(transcoded);
|
||||||
// No exception: ok?
|
// No exception: ok?
|
||||||
|
result = p;
|
||||||
break;
|
break;
|
||||||
} catch (bool diag) {
|
} catch (bool diag) {
|
||||||
|
result = p;
|
||||||
if (diag == true)
|
if (diag == true)
|
||||||
break;
|
break;
|
||||||
LOGDEB(("textHtmlToDoc: charset [%s] doc charset [%s]\n",
|
LOGDEB(("textHtmlToDoc: charset [%s] doc charset [%s]\n",
|
||||||
charset.c_str(), p.doccharset.c_str()));
|
charset.c_str(),result.doccharset.c_str()));
|
||||||
if (!p.doccharset.empty() &&
|
if (!result.doccharset.empty() &&
|
||||||
!samecharset(p.doccharset, p.ocharset)) {
|
!samecharset(result.doccharset, result.ocharset)) {
|
||||||
LOGDEB(("textHtmlToDoc: reparse for charsets\n"));
|
LOGDEB(("textHtmlToDoc: reparse for charsets\n"));
|
||||||
charset = p.doccharset;
|
charset = result.doccharset;
|
||||||
} else {
|
} else {
|
||||||
LOGERR(("textHtmlToDoc:: error: non charset exception\n"));
|
LOGERR(("textHtmlToDoc:: error: non charset exception\n"));
|
||||||
return false;
|
return false;
|
||||||
@ -115,11 +118,12 @@ bool MimeHandlerHtml::next_document()
|
|||||||
}
|
}
|
||||||
|
|
||||||
m_metaData["origcharset"] = m_defcharset;
|
m_metaData["origcharset"] = m_defcharset;
|
||||||
|
m_metaData["content"] = result.dump;
|
||||||
m_metaData["charset"] = "utf-8";
|
m_metaData["charset"] = "utf-8";
|
||||||
m_metaData["title"] = p.title;
|
m_metaData["title"] = result.title;
|
||||||
m_metaData["keywords"] = p.keywords;
|
m_metaData["keywords"] = result.keywords;
|
||||||
m_metaData["modificationdate"] = p.dmtime;
|
m_metaData["modificationdate"] = result.dmtime;
|
||||||
m_metaData["sample"] = p.sample;
|
m_metaData["sample"] = result.sample;
|
||||||
m_metaData["mimetype"] = "text/plain";
|
m_metaData["mimetype"] = "text/plain";
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.24 2006-12-15 12:40:02 dockes Exp $ (C) 2005 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.25 2006-12-15 16:33:15 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -94,9 +94,22 @@ bool MimeHandlerMail::next_document()
|
|||||||
{
|
{
|
||||||
if (!m_havedoc)
|
if (!m_havedoc)
|
||||||
return false;
|
return false;
|
||||||
m_havedoc = false;
|
bool res = false;
|
||||||
m_metaData["mimetype"] = "text/plain";
|
|
||||||
return processMsg(m_bincdoc, 0);
|
if (m_idx == -1) {
|
||||||
|
m_metaData["mimetype"] = "text/plain";
|
||||||
|
res =processMsg(m_bincdoc, 0);
|
||||||
|
} else {
|
||||||
|
res = processAttach();
|
||||||
|
}
|
||||||
|
m_idx++;
|
||||||
|
m_havedoc = m_idx < (int)m_attachments.size();
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MimeHandlerMail::processAttach()
|
||||||
|
{
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Transform a single message into a document. The subject becomes the
|
// Transform a single message into a document. The subject becomes the
|
||||||
@ -301,6 +314,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
|||||||
out += "]";
|
out += "]";
|
||||||
out += "\n\n";
|
out += "\n\n";
|
||||||
}
|
}
|
||||||
|
// m_attachments.push_back(&doc);
|
||||||
// We're done with this part
|
// We're done with this part
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -373,19 +387,18 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
|||||||
map<string, string>::const_iterator it =
|
map<string, string>::const_iterator it =
|
||||||
mh.get_meta_data().find("content");
|
mh.get_meta_data().find("content");
|
||||||
if (it != mh.get_meta_data().end())
|
if (it != mh.get_meta_data().end())
|
||||||
putf8 = &it->second;
|
out += it->second;
|
||||||
} else {
|
} else {
|
||||||
// Transcode to utf-8
|
// Transcode to utf-8
|
||||||
if (!transcode(body, utf8, charset, "UTF-8")) {
|
if (!transcode(body, utf8, charset, "UTF-8")) {
|
||||||
LOGERR(("walkmime: transcode failed from cs '%s' to UTF-8\n",
|
LOGERR(("walkmime: transcode failed from cs '%s' to UTF-8\n",
|
||||||
charset.c_str()));
|
charset.c_str()));
|
||||||
putf8 = &body;
|
out += body;
|
||||||
} else {
|
} else {
|
||||||
putf8 = &utf8;
|
out += utf8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (putf8)
|
|
||||||
out += *putf8;
|
|
||||||
if (out.length() && out[out.length()-1] != '\n')
|
if (out.length() && out[out.length()-1] != '\n')
|
||||||
out += '\n';
|
out += '\n';
|
||||||
|
|
||||||
|
|||||||
@ -16,9 +16,12 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _MAIL_H_INCLUDED_
|
#ifndef _MAIL_H_INCLUDED_
|
||||||
#define _MAIL_H_INCLUDED_
|
#define _MAIL_H_INCLUDED_
|
||||||
/* @(#$Id: mh_mail.h,v 1.9 2006-12-15 12:40:02 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: mh_mail.h,v 1.10 2006-12-15 16:33:15 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
#include <vector>
|
||||||
|
using std::vector;
|
||||||
|
|
||||||
#include "mimehandler.h"
|
#include "mimehandler.h"
|
||||||
|
|
||||||
namespace Binc {
|
namespace Binc {
|
||||||
@ -34,18 +37,23 @@ namespace Binc {
|
|||||||
class MimeHandlerMail : public RecollFilter {
|
class MimeHandlerMail : public RecollFilter {
|
||||||
public:
|
public:
|
||||||
MimeHandlerMail(const string &mt)
|
MimeHandlerMail(const string &mt)
|
||||||
: RecollFilter(mt), m_bincdoc(0), m_fd(-1), m_stream(0)
|
: RecollFilter(mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
|
||||||
{}
|
{}
|
||||||
virtual ~MimeHandlerMail();
|
virtual ~MimeHandlerMail();
|
||||||
virtual bool set_document_file(const string &file_path);
|
virtual bool set_document_file(const string &file_path);
|
||||||
virtual bool set_document_string(const string &data);
|
virtual bool set_document_string(const string &data);
|
||||||
virtual bool next_document();
|
virtual bool next_document();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Binc::MimeDocument *m_bincdoc;
|
|
||||||
bool processMsg(Binc::MimePart *doc, int depth);
|
bool processMsg(Binc::MimePart *doc, int depth);
|
||||||
void walkmime(Binc::MimePart* doc, int depth);
|
void walkmime(Binc::MimePart* doc, int depth);
|
||||||
int m_fd;
|
bool processAttach();
|
||||||
std::stringstream *m_stream;
|
Binc::MimeDocument *m_bincdoc;
|
||||||
|
int m_fd;
|
||||||
|
std::stringstream *m_stream;
|
||||||
|
int m_idx; // starts at -1 for self, then index into
|
||||||
|
// attachments;
|
||||||
|
vector<Binc::MimePart *> m_attachments;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _MAIL_H_INCLUDED_ */
|
#endif /* _MAIL_H_INCLUDED_ */
|
||||||
|
|||||||
@ -37,13 +37,11 @@ class MyHtmlParser : public HtmlParser {
|
|||||||
bool in_body_tag;
|
bool in_body_tag;
|
||||||
bool in_pre_tag;
|
bool in_pre_tag;
|
||||||
bool pending_space;
|
bool pending_space;
|
||||||
bool indexing_allowed;
|
string title, sample, keywords, dump, dmtime;
|
||||||
string title, sample, keywords, dmtime;
|
|
||||||
string localdump;
|
|
||||||
string &dump;
|
|
||||||
string ocharset; // This is the charset our user thinks the doc was
|
string ocharset; // This is the charset our user thinks the doc was
|
||||||
string charset; // This is the charset it was supposedly converted to
|
string charset; // This is the charset it was supposedly converted to
|
||||||
string doccharset; // Set this to value of charset parameter in header
|
string doccharset; // Set this to value of charset parameter in header
|
||||||
|
bool indexing_allowed;
|
||||||
void process_text(const string &text);
|
void process_text(const string &text);
|
||||||
void opening_tag(const string &tag, const map<string,string> &p);
|
void opening_tag(const string &tag, const map<string,string> &p);
|
||||||
void closing_tag(const string &tag);
|
void closing_tag(const string &tag);
|
||||||
@ -54,16 +52,5 @@ class MyHtmlParser : public HtmlParser {
|
|||||||
in_body_tag(false),
|
in_body_tag(false),
|
||||||
in_pre_tag(false),
|
in_pre_tag(false),
|
||||||
pending_space(false),
|
pending_space(false),
|
||||||
indexing_allowed(true),
|
indexing_allowed(true) { }
|
||||||
dump(localdump)
|
|
||||||
{ }
|
|
||||||
MyHtmlParser(string& buf) :
|
|
||||||
in_script_tag(false),
|
|
||||||
in_style_tag(false),
|
|
||||||
in_body_tag(false),
|
|
||||||
in_pre_tag(false),
|
|
||||||
pending_space(false),
|
|
||||||
indexing_allowed(true),
|
|
||||||
dump(buf)
|
|
||||||
{ }
|
|
||||||
};
|
};
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user