restructuring on mimehandler files
This commit is contained in:
parent
e2053c1d1b
commit
6cba3b65c1
45
src/internfile/mh_exec.cpp
Normal file
45
src/internfile/mh_exec.cpp
Normal file
@ -0,0 +1,45 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: mh_exec.cpp,v 1.1 2005-11-18 13:23:46 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include "execmd.h"
|
||||
#include "mh_exec.h"
|
||||
#include "mh_html.h"
|
||||
#include "debuglog.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
// Execute an external program to translate a file from its native format
|
||||
// to html. Then call the html parser to do the actual indexing
|
||||
MimeHandler::Status
|
||||
MimeHandlerExec::mkDoc(RclConfig *conf, const string &fn,
|
||||
const string &mtype, Rcl::Doc &docout, string&)
|
||||
{
|
||||
if (params.empty()) {
|
||||
// Hu ho
|
||||
LOGERR(("MimeHandlerExec::mkDoc: empty params for mime %s\n",
|
||||
mtype.c_str()));
|
||||
return MimeHandler::MHError;
|
||||
}
|
||||
// Command name
|
||||
string cmd = find_filter(conf, params.front());
|
||||
|
||||
// Build parameter list: delete cmd name and add the file name
|
||||
list<string>::iterator it = params.begin();
|
||||
list<string>myparams(++it, params.end());
|
||||
myparams.push_back(fn);
|
||||
|
||||
// Execute command and store the result text, which is supposedly html
|
||||
string html;
|
||||
ExecCmd exec;
|
||||
int status = exec.doexec(cmd, myparams, 0, &html);
|
||||
if (status) {
|
||||
LOGERR(("MimeHandlerExec: command status 0x%x: %s\n",
|
||||
status, cmd.c_str()));
|
||||
return MimeHandler::MHError;
|
||||
}
|
||||
|
||||
// Process/index the html
|
||||
MimeHandlerHtml hh;
|
||||
return hh.mkDoc(conf, fn, html, mtype, docout);
|
||||
}
|
||||
26
src/internfile/mh_exec.h
Normal file
26
src/internfile/mh_exec.h
Normal file
@ -0,0 +1,26 @@
|
||||
#ifndef _MH_EXEC_H_INCLUDED_
|
||||
#define _MH_EXEC_H_INCLUDED_
|
||||
/* @(#$Id: mh_exec.h,v 1.1 2005-11-18 13:23:46 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
|
||||
#include "rclconfig.h"
|
||||
#include "rcldb.h"
|
||||
#include "mimehandler.h"
|
||||
|
||||
/**
|
||||
Turn external document into internal one by executing an external filter.
|
||||
The command to execute, and its parameters, come from the mimeconf file
|
||||
*/
|
||||
class MimeHandlerExec : public MimeHandler {
|
||||
public:
|
||||
std::list<std::string> params;
|
||||
virtual ~MimeHandlerExec() {}
|
||||
virtual MimeHandler::Status
|
||||
mkDoc(RclConfig *conf, const std::string &fn,
|
||||
const std::string &mtype, Rcl::Doc &docout, std::string&);
|
||||
|
||||
};
|
||||
|
||||
#endif /* _MH_EXEC_H_INCLUDED_ */
|
||||
@ -32,7 +32,7 @@
|
||||
#include "mimeparse.h"
|
||||
#include "myhtmlparse.h"
|
||||
#include "indextext.h"
|
||||
#include "html.h"
|
||||
#include "mh_html.h"
|
||||
|
||||
#include <iostream>
|
||||
using namespace std;
|
||||
|
||||
@ -1,25 +1,33 @@
|
||||
#ifndef _HTML_H_INCLUDED_
|
||||
#define _HTML_H_INCLUDED_
|
||||
/* @(#$Id: mh_html.h,v 1.5 2005-11-08 21:02:55 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
#include "mimehandler.h"
|
||||
/* @(#$Id: mh_html.h,v 1.6 2005-11-18 13:23:46 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
|
||||
/// Translate html document to an internal one.
|
||||
///
|
||||
/// There are 2 interfaces, depending if we're working on a file, or
|
||||
/// on a string. The string form is applied to the output of external
|
||||
/// handlers for foreign formats: they return a result in html, which
|
||||
/// has the advantage to be text (easy to use in shell-scripts), and
|
||||
/// semi-structured (can carry titles, abstracts, whatever)
|
||||
#include "mimehandler.h"
|
||||
|
||||
/**
|
||||
Translate html document to internal one.
|
||||
|
||||
There are 2 interfaces, depending if we're working on a file, or
|
||||
on a string. The string form is applied to the output of external
|
||||
handlers for foreign formats: they return a result in html, which
|
||||
has the advantage to be text (easy to use in shell-scripts), and
|
||||
semi-structured (can carry titles, abstracts, whatever)
|
||||
*/
|
||||
class MimeHandlerHtml : public MimeHandler {
|
||||
public:
|
||||
std::string charsethint;
|
||||
/// Create internal document from html file (standard interface)
|
||||
virtual MimeHandler::Status mkDoc(RclConfig *conf, const string &fn,
|
||||
const string &mtype, Rcl::Doc &docout, string&);
|
||||
/// Create internal doc from html string (postfilter for external ones)
|
||||
virtual MimeHandler::Status mkDoc(RclConfig *conf, const string &fn,
|
||||
const string& htext,
|
||||
const string &mtype, Rcl::Doc &docout);
|
||||
|
||||
/** Create internal document from html file (standard interface) */
|
||||
virtual MimeHandler::Status
|
||||
mkDoc(RclConfig *conf, const std::string &fn,
|
||||
const std::string &mtype, Rcl::Doc &docout, std::string&);
|
||||
|
||||
/** Create internal doc from html string (postfilter for external ones) */
|
||||
virtual MimeHandler::Status
|
||||
mkDoc(RclConfig *conf, const std::string &fn, const std::string& htext,
|
||||
const std::string &mtype, Rcl::Doc &docout);
|
||||
};
|
||||
|
||||
#endif /* _HTML_H_INCLUDED_ */
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.9 2005-11-08 21:02:55 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.10 2005-11-18 13:23:46 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
@ -19,11 +19,11 @@ using std::map;
|
||||
#include "transcode.h"
|
||||
#include "mimeparse.h"
|
||||
#include "indextext.h"
|
||||
#include "mail.h"
|
||||
#include "mh_mail.h"
|
||||
#include "debuglog.h"
|
||||
#include "smallut.h"
|
||||
#include "mimeparse.h"
|
||||
#include "html.h"
|
||||
#include "mh_html.h"
|
||||
|
||||
// binc imap mime definitions
|
||||
#include "mime.h"
|
||||
|
||||
@ -1,14 +1,29 @@
|
||||
#ifndef _MAIL_H_INCLUDED_
|
||||
#define _MAIL_H_INCLUDED_
|
||||
/* @(#$Id: mh_mail.h,v 1.3 2005-11-08 21:02:55 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: mh_mail.h,v 1.4 2005-11-18 13:23:46 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include "mimehandler.h"
|
||||
|
||||
namespace Binc {
|
||||
class MimeDocument;
|
||||
}
|
||||
|
||||
/// Translate a mail folder file into internal documents (also works
|
||||
/// for maildir files)
|
||||
/**
|
||||
Translate a mail folder file into internal documents (also works
|
||||
for maildir files). This has to keep state while parsing a mail folder
|
||||
file.
|
||||
*/
|
||||
class MimeHandlerMail : public MimeHandler {
|
||||
public:
|
||||
MimeHandlerMail() : vfp(0), msgnum(0), conf(0) {}
|
||||
|
||||
virtual MimeHandler::Status
|
||||
mkDoc(RclConfig *conf, const std::string &fn,
|
||||
const std::string &mtype, Rcl::Doc &docout, std::string& ipath);
|
||||
|
||||
virtual ~MimeHandlerMail();
|
||||
|
||||
private:
|
||||
void *vfp;
|
||||
int msgnum;
|
||||
RclConfig *conf;
|
||||
@ -16,12 +31,6 @@ class MimeHandlerMail : public MimeHandler {
|
||||
Rcl::Doc &docout);
|
||||
MimeHandler::Status processmbox(const string &fn, Rcl::Doc &docout,
|
||||
string &ipath);
|
||||
public:
|
||||
MimeHandlerMail() : vfp(0), msgnum(0), conf(0) {}
|
||||
virtual ~MimeHandlerMail();
|
||||
virtual MimeHandler::Status
|
||||
mkDoc(RclConfig *conf, const string &fn,
|
||||
const string &mtype, Rcl::Doc &docout, string& ipath);
|
||||
};
|
||||
|
||||
#endif /* _MAIL_H_INCLUDED_ */
|
||||
|
||||
46
src/internfile/mh_text.cpp
Normal file
46
src/internfile/mh_text.cpp
Normal file
@ -0,0 +1,46 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: mh_text.cpp,v 1.1 2005-11-18 13:23:46 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
using namespace std;
|
||||
|
||||
#include "mh_text.h"
|
||||
#include "csguess.h"
|
||||
#include "debuglog.h"
|
||||
#include "readfile.h"
|
||||
#include "transcode.h"
|
||||
|
||||
// Process a plain text file
|
||||
MimeHandler::Status MimeHandlerText::mkDoc(RclConfig *conf, const string &fn,
|
||||
const string &mtype, Rcl::Doc &docout, string&)
|
||||
{
|
||||
string otext;
|
||||
if (!file_to_string(fn, otext))
|
||||
return MimeHandler::MHError;
|
||||
|
||||
// Try to guess charset, then convert to utf-8, and fill document
|
||||
// fields The charset guesser really doesnt work well in general
|
||||
// and should be avoided (especially for short documents)
|
||||
string charset;
|
||||
if (conf->getGuessCharset()) {
|
||||
charset = csguess(otext, conf->getDefCharset());
|
||||
} else
|
||||
charset = conf->getDefCharset();
|
||||
string utf8;
|
||||
LOGDEB1(("textPlainToDoc: transcod from %s to %s\n", charset, "UTF-8"));
|
||||
|
||||
if (!transcode(otext, utf8, charset, "UTF-8")) {
|
||||
cerr << "textPlainToDoc: transcode failed: charset '" << charset
|
||||
<< "' to UTF-8: "<< utf8 << endl;
|
||||
otext.erase();
|
||||
return MimeHandler::MHError;
|
||||
}
|
||||
|
||||
Rcl::Doc out;
|
||||
out.origcharset = charset;
|
||||
out.text = utf8;
|
||||
docout = out;
|
||||
return MimeHandler::MHDone;
|
||||
}
|
||||
24
src/internfile/mh_text.h
Normal file
24
src/internfile/mh_text.h
Normal file
@ -0,0 +1,24 @@
|
||||
#ifndef _MH_TEXT_H_INCLUDED_
|
||||
#define _MH_TEXT_H_INCLUDED_
|
||||
/* @(#$Id: mh_text.h,v 1.1 2005-11-18 13:23:46 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "rclconfig.h"
|
||||
#include "rcldb.h"
|
||||
#include "mimehandler.h"
|
||||
|
||||
/**
|
||||
* Handler for text/plain files.
|
||||
*
|
||||
* Maybe try to guess charset, or use default, then transcode to utf8
|
||||
*/
|
||||
class MimeHandlerText : public MimeHandler {
|
||||
public:
|
||||
MimeHandler::Status mkDoc(RclConfig *conf, const std::string &fn,
|
||||
const std::string &mtype, Rcl::Doc &docout,
|
||||
std::string&);
|
||||
|
||||
};
|
||||
|
||||
#endif /* _MH_TEXT_H_INCLUDED_ */
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.11 2005-11-16 15:07:20 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.12 2005-11-18 13:23:46 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
@ -7,103 +7,15 @@ static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.11 2005-11-16 15:07:20 dockes
|
||||
using namespace std;
|
||||
|
||||
#include "mimehandler.h"
|
||||
#include "readfile.h"
|
||||
#include "csguess.h"
|
||||
#include "transcode.h"
|
||||
#include "debuglog.h"
|
||||
#include "smallut.h"
|
||||
#include "html.h"
|
||||
#include "mail.h"
|
||||
#include "execmd.h"
|
||||
#include "pathut.h"
|
||||
|
||||
class MimeHandlerText : public MimeHandler {
|
||||
public:
|
||||
MimeHandler::Status mkDoc(RclConfig *conf, const string &fn,
|
||||
const string &mtype, Rcl::Doc &docout, string&);
|
||||
|
||||
};
|
||||
|
||||
// Process a plain text file
|
||||
MimeHandler::Status MimeHandlerText::mkDoc(RclConfig *conf, const string &fn,
|
||||
const string &mtype, Rcl::Doc &docout, string&)
|
||||
{
|
||||
string otext;
|
||||
if (!file_to_string(fn, otext))
|
||||
return MimeHandler::MHError;
|
||||
|
||||
// Try to guess charset, then convert to utf-8, and fill document
|
||||
// fields The charset guesser really doesnt work well in general
|
||||
// and should be avoided (especially for short documents)
|
||||
string charset;
|
||||
if (conf->getGuessCharset()) {
|
||||
charset = csguess(otext, conf->getDefCharset());
|
||||
} else
|
||||
charset = conf->getDefCharset();
|
||||
string utf8;
|
||||
LOGDEB1(("textPlainToDoc: transcod from %s to %s\n", charset, "UTF-8"));
|
||||
|
||||
if (!transcode(otext, utf8, charset, "UTF-8")) {
|
||||
cerr << "textPlainToDoc: transcode failed: charset '" << charset
|
||||
<< "' to UTF-8: "<< utf8 << endl;
|
||||
otext.erase();
|
||||
return MimeHandler::MHError;
|
||||
}
|
||||
|
||||
Rcl::Doc out;
|
||||
out.origcharset = charset;
|
||||
out.text = utf8;
|
||||
docout = out;
|
||||
return MimeHandler::MHDone;
|
||||
}
|
||||
|
||||
class MimeHandlerExec : public MimeHandler {
|
||||
public:
|
||||
list<string> params;
|
||||
virtual ~MimeHandlerExec() {}
|
||||
virtual MimeHandler::Status mkDoc(RclConfig *conf, const string &fn,
|
||||
const string &mtype, Rcl::Doc &docout,
|
||||
string&);
|
||||
|
||||
};
|
||||
|
||||
|
||||
// Execute an external program to translate a file from its native format
|
||||
// to html. Then call the html parser to do the actual indexing
|
||||
MimeHandler::Status
|
||||
MimeHandlerExec::mkDoc(RclConfig *conf, const string &fn,
|
||||
const string &mtype, Rcl::Doc &docout, string&)
|
||||
{
|
||||
if (params.empty()) {
|
||||
// Hu ho
|
||||
LOGERR(("MimeHandlerExec::mkDoc: empty params for mime %s\n",
|
||||
mtype.c_str()));
|
||||
return MimeHandler::MHError;
|
||||
}
|
||||
// Command name
|
||||
string cmd = find_filter(conf, params.front());
|
||||
|
||||
// Build parameter list: delete cmd name and add the file name
|
||||
list<string>::iterator it = params.begin();
|
||||
list<string>myparams(++it, params.end());
|
||||
myparams.push_back(fn);
|
||||
|
||||
// Execute command and store the result text, which is supposedly html
|
||||
string html;
|
||||
ExecCmd exec;
|
||||
int status = exec.doexec(cmd, myparams, 0, &html);
|
||||
if (status) {
|
||||
LOGERR(("MimeHandlerExec: command status 0x%x: %s\n",
|
||||
status, cmd.c_str()));
|
||||
return MimeHandler::MHError;
|
||||
}
|
||||
|
||||
// Process/index the html
|
||||
MimeHandlerHtml hh;
|
||||
return hh.mkDoc(conf, fn, html, mtype, docout);
|
||||
}
|
||||
|
||||
static MimeHandler *mhfact(const string &mime)
|
||||
#include "mh_html.h"
|
||||
#include "mh_mail.h"
|
||||
#include "mh_text.h"
|
||||
#include "mh_exec.h"
|
||||
|
||||
/** Create internal handler object appropriate for given mime type */
|
||||
static MimeHandler *mhFactory(const string &mime)
|
||||
{
|
||||
if (!stringlowercmp("text/plain", mime))
|
||||
return new MimeHandlerText;
|
||||
@ -117,9 +29,9 @@ static MimeHandler *mhfact(const string &mime)
|
||||
}
|
||||
|
||||
/**
|
||||
* Return handler function for given mime type
|
||||
* Return handler object for given mime type:
|
||||
*/
|
||||
MimeHandler *getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
|
||||
MimeHandler *getMimeHandler(const string &mtype, ConfTree *mhandlers)
|
||||
{
|
||||
// Return handler definition for mime type
|
||||
string hs;
|
||||
@ -138,7 +50,7 @@ MimeHandler *getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
|
||||
|
||||
// Retrieve handler function according to type
|
||||
if (!stringlowercmp("internal", toks.front())) {
|
||||
return mhfact(mtype);
|
||||
return mhFactory(mtype);
|
||||
} else if (!stringlowercmp("dll", toks.front())) {
|
||||
return 0;
|
||||
} else if (!stringlowercmp("exec", toks.front())) {
|
||||
@ -160,7 +72,7 @@ MimeHandler *getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
|
||||
/**
|
||||
* Return external viewer exec string for given mime type
|
||||
*/
|
||||
string getMimeViewer(const std::string &mtype, ConfTree *mhandlers)
|
||||
string getMimeViewer(const string &mtype, ConfTree *mhandlers)
|
||||
{
|
||||
string hs;
|
||||
mhandlers->get(mtype, hs, "view");
|
||||
@ -170,7 +82,7 @@ string getMimeViewer(const std::string &mtype, ConfTree *mhandlers)
|
||||
/**
|
||||
* Return icon name
|
||||
*/
|
||||
string getMimeIconName(const std::string &mtype, ConfTree *mhandlers)
|
||||
string getMimeIconName(const string &mtype, ConfTree *mhandlers)
|
||||
{
|
||||
string hs;
|
||||
mhandlers->get(mtype, hs, "icons");
|
||||
@ -180,7 +92,7 @@ string getMimeIconName(const std::string &mtype, ConfTree *mhandlers)
|
||||
/**
|
||||
* Return decompression command line for given mime type
|
||||
*/
|
||||
bool getUncompressor(const std::string &mtype, ConfTree *mhandlers,
|
||||
bool getUncompressor(const string &mtype, ConfTree *mhandlers,
|
||||
list<string>& cmd)
|
||||
{
|
||||
string hs;
|
||||
|
||||
@ -7,17 +7,17 @@ all: depend $(LIBS)
|
||||
|
||||
OBJS = base64.o conftree.o csguess.o debuglog.o \
|
||||
execmd.o wipedir.o \
|
||||
fstreewalk.o html.o mail.o htmlparse.o idfile.o indexer.o \
|
||||
internfile.o md5.o \
|
||||
fstreewalk.o mh_html.o mh_mail.o mh_exec.o mh_text.o htmlparse.o \
|
||||
idfile.o indexer.o internfile.o md5.o \
|
||||
mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathhash.o pathut.o \
|
||||
rclconfig.o rcldb.o rclinit.o readfile.o smallut.o \
|
||||
textsplit.o transcode.o \
|
||||
unacpp.o unac.o
|
||||
SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \
|
||||
../utils/execmd.cpp ../utils/idfile.cpp ../utils/md5.cpp \
|
||||
../utils/wipedir.cpp \
|
||||
../utils/fstreewalk.cpp ../common/html.cpp ../common/mail.cpp \
|
||||
../common/htmlparse.cpp \
|
||||
../utils/wipedir.cpp ../utils/fstreewalk.cpp \
|
||||
../common/mh_html.cpp ../common/mh_mail.cpp ../common/mh_exec.cpp \
|
||||
../common/mh_text.cpp ../common/htmlparse.cpp \
|
||||
../index/indexer.cpp ../common/internfile.cpp \
|
||||
../common/mimehandler.cpp ../utils/mimeparse.cpp ../index/mimetype.cpp \
|
||||
../common/myhtmlparse.cpp ../common/pathhash.cpp ../utils/pathut.cpp \
|
||||
@ -46,7 +46,13 @@ wipedir.o : ../utils/wipedir.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
fstreewalk.o : ../utils/fstreewalk.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
html.o : ../common/html.cpp
|
||||
mh_html.o : ../common/mh_html.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
mh_exec.o : ../common/mh_exec.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
mh_text.o : ../common/mh_text.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
mh_html.o : ../common/mh_html.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
htmlparse.o : ../common/htmlparse.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
@ -56,7 +62,7 @@ indexer.o : ../index/indexer.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
internfile.o : ../common/internfile.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
mail.o : ../common/mail.cpp
|
||||
mh_mail.o : ../common/mh_mail.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
mimehandler.o : ../common/mimehandler.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user