diff --git a/src/internfile/internfile.cpp b/src/internfile/internfile.cpp index 9c405281..409d5b4f 100644 --- a/src/internfile/internfile.cpp +++ b/src/internfile/internfile.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: internfile.cpp,v 1.5 2005-04-05 09:35:35 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: internfile.cpp,v 1.6 2005-11-08 21:02:55 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include #include @@ -130,7 +130,7 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath) // Turn file into a document. The document has fields for title, body // etc., all text converted to utf8 - MimeHandler::Status mhs = handler->worker(config, fn, mime, doc, ipath); + MimeHandler::Status mhs = handler->mkDoc(config, fn, mime, doc, ipath); FileInterner::Status ret = FIError; switch (mhs) { case MimeHandler::MHError: break; diff --git a/src/internfile/mh_html.cpp b/src/internfile/mh_html.cpp index bdfa588e..119055ea 100644 --- a/src/internfile/mh_html.cpp +++ b/src/internfile/mh_html.cpp @@ -39,7 +39,7 @@ using namespace std; MimeHandler::Status -MimeHandlerHtml::worker(RclConfig *conf, const string &fn, +MimeHandlerHtml::mkDoc(RclConfig *conf, const string &fn, const string &mtype, Rcl::Doc &docout, string&) { LOGDEB(("textHtmlToDoc: %s\n", fn.c_str())); @@ -48,11 +48,11 @@ MimeHandlerHtml::worker(RclConfig *conf, const string &fn, LOGINFO(("textHtmlToDoc: cant read: %s\n", fn.c_str())); return MimeHandler::MHError; } - return worker1(conf, fn, otext, mtype, docout); + return mkDoc(conf, fn, otext, mtype, docout); } MimeHandler::Status -MimeHandlerHtml::worker1(RclConfig *conf, const string &, +MimeHandlerHtml::mkDoc(RclConfig *conf, const string &, const string& htext, const string &mtype, Rcl::Doc &docout) { diff --git a/src/internfile/mh_html.h b/src/internfile/mh_html.h index 4c12f709..704a8d2b 100644 --- a/src/internfile/mh_html.h +++ b/src/internfile/mh_html.h @@ -1,21 +1,24 @@ #ifndef _HTML_H_INCLUDED_ #define _HTML_H_INCLUDED_ -/* @(#$Id: mh_html.h,v 1.4 2005-03-31 10:04:07 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: mh_html.h,v 1.5 2005-11-08 21:02:55 dockes Exp $ (C) 2004 J.F.Dockes */ #include "mimehandler.h" #include -// Code to turn an html document into an internal one. There are 2 -// interfaces, depending if we're working on a file, or on a -// string. The string form is with external handlers for foreign -// formats: they return a result in html, which has the advantage to -// be text (easy to use in shell-scripts), and semi-structured (can -// carry titles, abstracts, whatever) +/// Translate html document to an internal one. +/// +/// There are 2 interfaces, depending if we're working on a file, or +/// on a string. The string form is applied to the output of external +/// handlers for foreign formats: they return a result in html, which +/// has the advantage to be text (easy to use in shell-scripts), and +/// semi-structured (can carry titles, abstracts, whatever) class MimeHandlerHtml : public MimeHandler { public: std::string charsethint; - virtual MimeHandler::Status worker(RclConfig *conf, const string &fn, + /// Create internal document from html file (standard interface) + virtual MimeHandler::Status mkDoc(RclConfig *conf, const string &fn, const string &mtype, Rcl::Doc &docout, string&); - virtual MimeHandler::Status worker1(RclConfig *conf, const string &fn, + /// Create internal doc from html string (postfilter for external ones) + virtual MimeHandler::Status mkDoc(RclConfig *conf, const string &fn, const string& htext, const string &mtype, Rcl::Doc &docout); }; diff --git a/src/internfile/mh_mail.cpp b/src/internfile/mh_mail.cpp index 488214ea..6aae9d3e 100644 --- a/src/internfile/mh_mail.cpp +++ b/src/internfile/mh_mail.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.8 2005-11-05 14:40:50 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.9 2005-11-08 21:02:55 dockes Exp $ (C) 2005 J.F.Dockes"; #endif #include @@ -44,17 +44,17 @@ MimeHandlerMail::~MimeHandlerMail() // We are called for two different file types: mbox-type folders // holding multiple messages, and maildir-type files with one message MimeHandler::Status -MimeHandlerMail::worker(RclConfig *cnf, const string &fn, +MimeHandlerMail::mkDoc(RclConfig *cnf, const string &fn, const string &mtype, Rcl::Doc &docout, string& ipath) { - LOGDEB2(("MimeHandlerMail::worker: %s [%s]\n", mtype.c_str(), fn.c_str())); + LOGDEB2(("MimeHandlerMail::mkDoc: %s [%s]\n", mtype.c_str(), fn.c_str())); conf = cnf; if (!stringlowercmp("message/rfc822", mtype)) { ipath = ""; int fd; if ((fd = open(fn.c_str(), 0)) < 0) { - LOGERR(("MimeHandlerMail::worker: open(%s) errno %d\n", + LOGERR(("MimeHandlerMail::mkDoc: open(%s) errno %d\n", fn.c_str(), errno)); return MimeHandler::MHError; } @@ -356,7 +356,7 @@ static void walkmime(RclConfig *cnf, string &out, Binc::MimePart& doc, MimeHandlerHtml mh; Rcl::Doc hdoc; mh.charsethint = charset; - mh.worker1(cnf, "", body, content_type.value, hdoc); + mh.mkDoc(cnf, "", body, content_type.value, hdoc); transcoded = hdoc.text; } else { // Transcode to utf-8 diff --git a/src/internfile/mh_mail.h b/src/internfile/mh_mail.h index f7089636..1aaf795c 100644 --- a/src/internfile/mh_mail.h +++ b/src/internfile/mh_mail.h @@ -1,12 +1,13 @@ #ifndef _MAIL_H_INCLUDED_ #define _MAIL_H_INCLUDED_ -/* @(#$Id: mh_mail.h,v 1.2 2005-03-31 10:04:07 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: mh_mail.h,v 1.3 2005-11-08 21:02:55 dockes Exp $ (C) 2004 J.F.Dockes */ #include "mimehandler.h" namespace Binc { class MimeDocument; } -// Code to turn a mail folder file into internal documents +/// Translate a mail folder file into internal documents (also works +/// for maildir files) class MimeHandlerMail : public MimeHandler { void *vfp; int msgnum; @@ -19,7 +20,8 @@ class MimeHandlerMail : public MimeHandler { MimeHandlerMail() : vfp(0), msgnum(0), conf(0) {} virtual ~MimeHandlerMail(); virtual MimeHandler::Status - worker(RclConfig *conf, const string &fn, + mkDoc(RclConfig *conf, const string &fn, const string &mtype, Rcl::Doc &docout, string& ipath); }; + #endif /* _MAIL_H_INCLUDED_ */ diff --git a/src/internfile/mimehandler.cpp b/src/internfile/mimehandler.cpp index bda3d886..831b5943 100644 --- a/src/internfile/mimehandler.cpp +++ b/src/internfile/mimehandler.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.9 2005-03-25 09:40:27 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.10 2005-11-08 21:02:55 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include @@ -19,13 +19,13 @@ using namespace std; class MimeHandlerText : public MimeHandler { public: - MimeHandler::Status worker(RclConfig *conf, const string &fn, + MimeHandler::Status mkDoc(RclConfig *conf, const string &fn, const string &mtype, Rcl::Doc &docout, string&); }; // Process a plain text file -MimeHandler::Status MimeHandlerText::worker(RclConfig *conf, const string &fn, +MimeHandler::Status MimeHandlerText::mkDoc(RclConfig *conf, const string &fn, const string &mtype, Rcl::Doc &docout, string&) { string otext; @@ -61,7 +61,7 @@ class MimeHandlerExec : public MimeHandler { public: list params; virtual ~MimeHandlerExec() {} - virtual MimeHandler::Status worker(RclConfig *conf, const string &fn, + virtual MimeHandler::Status mkDoc(RclConfig *conf, const string &fn, const string &mtype, Rcl::Doc &docout, string&); @@ -71,12 +71,12 @@ class MimeHandlerExec : public MimeHandler { // Execute an external program to translate a file from its native format // to html. Then call the html parser to do the actual indexing MimeHandler::Status -MimeHandlerExec::worker(RclConfig *conf, const string &fn, +MimeHandlerExec::mkDoc(RclConfig *conf, const string &fn, const string &mtype, Rcl::Doc &docout, string&) { if (params.empty()) { // Hu ho - LOGERR(("MimeHandlerExec::worker: empty params for mime %s\n", + LOGERR(("MimeHandlerExec::mkDoc: empty params for mime %s\n", mtype.c_str())); return MimeHandler::MHError; } @@ -100,7 +100,7 @@ MimeHandlerExec::worker(RclConfig *conf, const string &fn, // Process/index the html MimeHandlerHtml hh; - return hh.worker1(conf, fn, html, mtype, docout); + return hh.mkDoc(conf, fn, html, mtype, docout); } static MimeHandler *mhfact(const string &mime) diff --git a/src/internfile/mimehandler.h b/src/internfile/mimehandler.h index 2c9dc9fe..4d28e18f 100644 --- a/src/internfile/mimehandler.h +++ b/src/internfile/mimehandler.h @@ -1,6 +1,6 @@ #ifndef _MIMEHANDLER_H_INCLUDED_ #define _MIMEHANDLER_H_INCLUDED_ -/* @(#$Id: mimehandler.h,v 1.6 2005-03-25 09:40:27 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: mimehandler.h,v 1.7 2005-11-08 21:02:55 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include @@ -16,6 +16,8 @@ class MimeHandler { public: virtual ~MimeHandler() {} + /// Status from mkDoc method. + enum Status {MHError, MHDone, MHAgain}; /** * Transform external data into internal utf8 document * @@ -31,17 +33,18 @@ class MimeHandler { * If this is empty (during indexation), it will be filled-up * by the function, and all the file's documents will be * returned by successive calls. - * @return the return value indicates if there are more documents to be + * @return The return value indicates if there are more documents to be * fetched from the same file. */ - enum Status {MHError, MHDone, MHAgain}; - virtual Status worker(RclConfig * conf, const std::string &filename, - const std::string &mimetype, Rcl::Doc& outdoc, - string& ipath) = 0; + virtual MimeHandler::Status mkDoc(RclConfig * conf, + const std::string &filename, + const std::string &mimetype, + Rcl::Doc& outdoc, + string& ipath) = 0; }; /** - * Return indexing handler class for given mime type + * Return indexing handler object for the given mime type * returned pointer should be deleted by caller */ extern MimeHandler *getMimeHandler(const std::string &mtyp, ConfTree *mhdlers);