added external filters and pdf handling

This commit is contained in:
dockes 2005-02-01 17:20:06 +00:00
parent cc512e2ec0
commit d0aaf92220
14 changed files with 373 additions and 82 deletions

84
src/filters/rclpdf Executable file
View File

@ -0,0 +1,84 @@
#!/bin/sh
# @(#$Id: rclpdf,v 1.1 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes
# This is copied almost verbatim from Estraier:
#================================================================
# Estraier: a personal full-text search system
# Copyright (C) 2003-2004 Mikio Hirabayashi
#================================================================
#================================================================
# rclpdf
# Strip a file of PDF and extract its text as HTML.
#================================================================
# set variables
LANG=C ; export LANG
LC_ALL=C ; export LC_ALL
progname="rclpdf"
# show help message
if test $# -ne 1 -o "$1" = "--help"
then
printf 'Strip a file of PDF and extract its text as HTML.\n'
printf 'Usage: %s [infile]\n' "$progname"
exit 1
fi
infile="$1"
# check the input file existence
if test ! -f "$infile"
then
printf '%s: %s: no such file\n' "$progname" "$infile"
exit 1
fi
# output the result
pdftotext -raw -htmlmeta -enc UTF-8 -eol unix -q "$infile" - |
iconv -f UTF-8 -t UTF-8 -c -s |
awk '
BEGIN {
esc = 0
mul = 1
emp = 0
}
{
if(esc < 1 && $0 ~ /^<title>/ && $0 ~ /title>$/){
printf("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n")
gsub(/<[^>]*>/, "", $0)
gsub(/&/, "\\&amp;", $0)
gsub(/</, "\\&lt;", $0)
gsub(/>/, "\\&gt;", $0)
printf("<title>%s</title>\n", $0)
} else if($0 == "<pre>"){
esc++
printf("<p>")
mul = 1
} else if($0 == "</pre>"){
esc--
printf("</p>\n")
} else if($0 ~ /-$/){
sub(/-$/, "", $0)
printf("%s", $0);
} else if($0 == "\f"){
printf("</p>\n<hr>\n<p>")
} else {
if(esc > 0){
gsub(/&/, "\\&amp;", $0)
gsub(/</, "\\&lt;", $0)
gsub(/>/, "\\&gt;", $0)
gsub(/^ */, "", $0)
gsub(/ *$/, "", $0)
}
print $0
}
}
'
# Suppressed code 2 lines above (at the last print $0), which seemed to
# deal with multibyte character being cut by a newline ? It caused problems
# (sometimes concatenated last word of a line with first of next, and I
# didn't really understand its use as iconv -c is supposed to fix the
# encoding anyway
# exit normally
exit 0

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.1 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.2 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <sys/stat.h>
@ -105,8 +105,8 @@ indexfile(void *cdata, const std::string &fn, const struct stat *stp,
}
// Look for appropriate handler
MimeHandlerFunc fun = getMimeHandler(mime, me->config->getMimeConf());
if (!fun) {
MimeHandler *handler = getMimeHandler(mime, me->config->getMimeConf());
if (!handler) {
// No handler for this type, for now :(
LOGDEB(("indexfile: %s : no handler\n", mime.c_str()));
return FsTreeWalker::FtwOk;
@ -115,14 +115,19 @@ indexfile(void *cdata, const std::string &fn, const struct stat *stp,
LOGDEB(("indexfile: %s [%s]\n", mime.c_str(), fn.c_str()));
// Check db up to date ?
if (!me->db.needUpdate(fn, stp))
if (!me->db.needUpdate(fn, stp)) {
delete handler;
return FsTreeWalker::FtwOk;
}
// Turn file into a document. The document has fields for title, body
// etc., all text converted to utf8
Rcl::Doc doc;
if (!fun(me->config, fn, mime, doc))
if (!handler->worker(me->config, fn, mime, doc)) {
delete handler;
return FsTreeWalker::FtwOk;
}
delete handler;
// Set up common fields:
doc.mimetype = mime;
@ -131,7 +136,7 @@ indexfile(void *cdata, const std::string &fn, const struct stat *stp,
doc.mtime = ascdate;
// Do database-specific work to update document data
if (!me->db.add(fn, doc))
if (!me->db.add(fn, doc))
return FsTreeWalker::FtwError;
return FsTreeWalker::FtwOk;

View File

@ -32,13 +32,14 @@
#include "mimeparse.h"
#include "myhtmlparse.h"
#include "indextext.h"
#include "html.h"
#include <iostream>
using namespace std;
bool textHtmlToDoc(RclConfig *conf, const string &fn,
const string &mtype, Rcl::Doc &docout)
bool MimeHandlerHtml::worker(RclConfig *conf, const string &fn,
const string &mtype, Rcl::Doc &docout)
{
LOGDEB(("textHtmlToDoc: %s\n", fn.c_str()));
string otext;
@ -46,7 +47,13 @@ bool textHtmlToDoc(RclConfig *conf, const string &fn,
LOGINFO(("textHtmlToDoc: cant read: %s\n", fn.c_str()));
return false;
}
return worker1(conf, fn, otext, mtype, docout);
}
bool MimeHandlerHtml::worker1(RclConfig *conf, const string &fn,
const string& htext,
const string &mtype, Rcl::Doc &docout)
{
// Character set handling:
// - We first try to convert from the default configured charset
@ -57,7 +64,7 @@ bool textHtmlToDoc(RclConfig *conf, const string &fn,
// instead of the configuration one.
string charset;
if (conf->guesscharset) {
charset = csguess(otext, conf->defcharset);
charset = csguess(htext, conf->defcharset);
} else
charset = conf->defcharset;
@ -69,10 +76,10 @@ bool textHtmlToDoc(RclConfig *conf, const string &fn,
MyHtmlParser p;
// Try transcoding. If it fails, use original text.
if (!transcode(otext, transcoded, charset, "UTF-8")) {
if (!transcode(htext, transcoded, charset, "UTF-8")) {
LOGERR(("textHtmlToDoc: transcode failed from cs '%s' to UTF-8\n",
charset.c_str()));
transcoded = otext;
transcoded = htext;
// We don't know the charset, at all
p.ocharset = p.charset = charset = "";
} else {

14
src/internfile/mh_html.h Normal file
View File

@ -0,0 +1,14 @@
#ifndef _HTML_H_INCLUDED_
#define _HTML_H_INCLUDED_
/* @(#$Id: mh_html.h,v 1.1 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes */
#include "mimehandler.h"
class MimeHandlerHtml : public MimeHandler {
public:
virtual bool worker(RclConfig *conf, const string &fn,
const string &mtype, Rcl::Doc &docout);
virtual bool worker1(RclConfig *conf, const string &fn,
const string& htext,
const string &mtype, Rcl::Doc &docout);
};
#endif /* _HTML_H_INCLUDED_ */

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.4 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.5 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <iostream>
@ -12,9 +12,19 @@ using namespace std;
#include "transcode.h"
#include "debuglog.h"
#include "smallut.h"
#include "html.h"
#include "execmd.h"
bool textPlainToDoc(RclConfig *conf, const string &fn,
const string &mtype, Rcl::Doc &docout)
class MimeHandlerText : public MimeHandler {
public:
bool worker(RclConfig *conf, const string &fn,
const string &mtype, Rcl::Doc &docout);
};
// Process a plain text file
bool MimeHandlerText::worker(RclConfig *conf, const string &fn,
const string &mtype, Rcl::Doc &docout)
{
string otext;
if (!file_to_string(fn, otext))
@ -45,25 +55,51 @@ bool textPlainToDoc(RclConfig *conf, const string &fn,
return true;
}
// Map of mime types to internal interner functions. This could just as well
// be an if else if suite inside getMimeHandler(), but this is prettier ?
static map<string, MimeHandlerFunc> ihandlers;
// Static object to get the map to be initialized at program start.
class IHandler_Init {
class MimeHandlerExec : public MimeHandler {
public:
IHandler_Init() {
ihandlers["text/plain"] = textPlainToDoc;
ihandlers["text/html"] = textHtmlToDoc;
// Add new associations here when needed
}
};
static IHandler_Init ihandleriniter;
list<string> params;
virtual ~MimeHandlerExec() {}
virtual bool worker(RclConfig *conf, const string &fn,
const string &mtype, Rcl::Doc &docout);
};
// Execute an external program to translate a file from its native format
// to html. Then call the html parser to do the actual indexing
bool MimeHandlerExec::worker(RclConfig *conf, const string &fn,
const string &mtype, Rcl::Doc &docout)
{
string cmd = params.front();
list<string>::iterator it = params.begin();
list<string>myparams(++it, params.end());
myparams.push_back(fn);
string html;
ExecCmd exec;
int status = exec.doexec(cmd, myparams, 0, &html);
if (status) {
LOGDEB(("MimeHandlerExec: command status 0x%x: %s\n",
status, cmd.c_str()));
return false;
}
MimeHandlerHtml hh;
return hh.worker1(conf, fn, html, mtype, docout);
}
static MimeHandler *mhfact(const string &mime)
{
if (!stringlowercmp("text/plain", mime))
return new MimeHandlerText;
else if (!stringlowercmp("text/html", mime))
return new MimeHandlerHtml;
return 0;
}
/**
* Return handler function for given mime type
*/
MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
MimeHandler *getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
{
// Return handler definition for mime type
string hs;
@ -82,25 +118,23 @@ MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
// Retrieve handler function according to type
if (!stringlowercmp("internal", toks[0])) {
map<string, MimeHandlerFunc>::const_iterator it =
ihandlers.find(mtype);
if (it == ihandlers.end()) {
LOGERR(("getMimeHandler: internal handler not found for %s\n",
mtype.c_str()));
return 0;
}
return it->second;
return mhfact(mtype);
} else if (!stringlowercmp("dll", toks[0])) {
if (toks.size() != 2)
return 0;
return 0;
} else if (!stringlowercmp("exec", toks[0])) {
if (toks.size() != 2)
if (toks.size() < 2) {
LOGERR(("getMimeHandler: bad line for %s: %s\n", mtype.c_str(),
hs.c_str()));
return 0;
return 0;
} else {
return 0;
}
MimeHandlerExec *h = new MimeHandlerExec;
vector<string>::const_iterator it1 = toks.begin();
it1++;
for (;it1 != toks.end();it1++)
h->params.push_back(*it1);
return h;
}
return 0;
}
/**

View File

@ -1,21 +1,29 @@
#ifndef _MIMEHANDLER_H_INCLUDED_
#define _MIMEHANDLER_H_INCLUDED_
/* @(#$Id: mimehandler.h,v 1.3 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: mimehandler.h,v 1.4 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include "rclconfig.h"
#include "rcldb.h"
/* Definition for document interner functions */
typedef bool (*MimeHandlerFunc)(RclConfig *, const std::string &,
const std::string &, Rcl::Doc&);
/**
* Return indexing handler function for given mime type
* Document interner class. We sometimes have data to pass to an interner
*/
extern MimeHandlerFunc getMimeHandler(const std::string &mtype,
ConfTree *mhandlers);
class MimeHandler {
public:
virtual ~MimeHandler() {}
virtual bool worker(RclConfig *, const std::string &filename,
const std::string &mimetype, Rcl::Doc& outdoc) = 0;
};
/**
* Return indexing handler class for given mime type
* returned pointer should be deleted by caller
*/
extern MimeHandler *getMimeHandler(const std::string &mtype,
ConfTree *mhandlers);
/**
* Return external viewer exec string for given mime type
@ -23,7 +31,4 @@ extern MimeHandlerFunc getMimeHandler(const std::string &mtype,
extern string getMimeViewer(const std::string &mtype,
ConfTree *mhandlers);
extern bool textHtmlToDoc(RclConfig *conf, const string &fn,
const string &mtype, Rcl::Doc &docout);
#endif /* _MIMEHANDLER_H_INCLUDED_ */

View File

@ -7,12 +7,14 @@ LIBS = librcl.a
all: $(LIBS)
OBJS = conftree.o csguess.o debuglog.o \
execmd.o \
fstreewalk.o html.o htmlparse.o indexer.o \
mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathut.o \
rclconfig.o rcldb.o readfile.o smallut.o \
textsplit.o transcode.o \
unacpp.o unac.o
SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \
../utils/execmd.cpp \
../utils/fstreewalk.cpp ../common/html.cpp ../common/htmlparse.cpp \
../index/indexer.cpp \
../common/mimehandler.cpp ../utils/mimeparse.cpp ../index/mimetype.cpp \
@ -35,6 +37,8 @@ csguess.o : ../index/csguess.cpp
$(CXX) $(CXXFLAGS) -c $<
debuglog.o : ../utils/debuglog.cpp
$(CXX) $(CXXFLAGS) -c $<
execmd.o : ../utils/execmd.cpp
$(CXX) $(CXXFLAGS) -c $<
fstreewalk.o : ../utils/fstreewalk.cpp
$(CXX) $(CXXFLAGS) -c $<
html.o : ../common/html.cpp

2
src/mk/FreeBSD Normal file
View File

@ -0,0 +1,2 @@
CXXFLAGS = -pthread -Wall -g -I. -I../index -I../utils -I../common \
-I../unac -I/usr/local/include

16
src/qtgui/idxthread.h Normal file
View File

@ -0,0 +1,16 @@
#ifndef _IDXTHREAD_H_INCLUDED_
#define _IDXTHREAD_H_INCLUDED_
/* @(#$Id: idxthread.h,v 1.1 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes */
class RclConfig;
// These two deal with starting / stopping the thread itself, not indexing
// sessions.
extern void start_idxthread(RclConfig *cnf);
extern void stop_idxthread();
extern int startindexing;
extern int indexingdone;
extern bool indexingstatus;
#endif /* _IDXTHREAD_H_INCLUDED_ */

17
src/qtgui/recoll.h Normal file
View File

@ -0,0 +1,17 @@
#ifndef _RECOLL_H_INCLUDED_
#define _RECOLL_H_INCLUDED_
/* @(#$Id: recoll.h,v 1.1 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes */
#include "rclconfig.h"
#include "rcldb.h"
#include "idxthread.h"
extern void recollCleanup();
// Misc declarations in need of sharing between the UI files
extern RclConfig *rclconfig;
extern Rcl::Db *rcldb;
extern int recollNeedsExit;
#endif /* _RECOLL_H_INCLUDED_ */

View File

@ -146,9 +146,9 @@ void RecollMain::reslistTE_clicked(int par, int car)
// for preview:
// Look for appropriate handler
MimeHandlerFunc fun =
MimeHandler *handler =
getMimeHandler(doc.mimetype, rclconfig->getMimeConf());
if (!fun) {
if (!handler) {
QMessageBox::warning(0, "Recoll",
QString("No mime handler for mime type ") +
doc.mimetype.c_str());
@ -157,13 +157,15 @@ void RecollMain::reslistTE_clicked(int par, int car)
string fn = urltolocalpath(doc.url);
Rcl::Doc fdoc;
if (!fun(rclconfig, fn, doc.mimetype, fdoc)) {
if (!handler->worker(rclconfig, fn, doc.mimetype, fdoc)) {
QMessageBox::warning(0, "Recoll",
QString("Failed to convert document for preview!\n") +
fn.c_str() + " mimetype " +
doc.mimetype.c_str());
delete handler;
return;
}
delete handler;
string rich = plaintorich(fdoc.text);
@ -188,7 +190,8 @@ void RecollMain::reslistTE_clicked(int par, int car)
}
// User asked to start query
// User asked to start query. Run it and call listNextPB_clicked to display
// first page of results
void RecollMain::queryText_returnPressed()
{
LOGDEB(("RecollMain::queryText_returnPressed()\n"));
@ -294,6 +297,7 @@ void RecollMain::listNextPB_clicked()
struct tm *tm = localtime(&mtime);
strftime(datebuf, 99, "<i>Modified:</i>&nbsp;%F&nbsp;%T", tm);
}
LOGDEB(("Abstract: %s\n", doc.abstract.c_str()));
string result = "<p>" +
string(perbuf) + " <b>" + doc.title + "</b><br>" +
doc.mimetype + "&nbsp;" +

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.15 2005-02-01 08:42:55 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.16 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <sys/stat.h>
@ -210,8 +210,12 @@ bool dumb_string(const string &in, string &out)
{
string inter;
out.erase();
if (!unac_cpp(in, inter))
if (in.empty())
return true;
if (!unac_cpp(in, inter)) {
LOGERR(("unac_cpp failed for %s\n", in.c_str()));
return false;
}
out.reserve(inter.length());
for (unsigned int i = 0; i < inter.length(); i++) {
if (inter[i] >= 'A' && inter[i] <= 'Z') {
@ -226,13 +230,55 @@ bool dumb_string(const string &in, string &out)
return true;
}
bool Rcl::Db::add(const string &fn, const Rcl::Doc &doc)
/* omindex direct */
/* Truncate a string to a given maxlength, avoiding cutting off midword
* if reasonably possible. */
string
truncate_to_word(string & input, string::size_type maxlen)
{
LOGDEB(("Rcl::Db::add: fn %s\n", fn.c_str()));
string output;
if (input.length() <= maxlen) {
output = input;
} else {
output = input.substr(0, maxlen);
const char *SEPAR = " \t\n\r-:.;,/[]{}";
string::size_type space = output.find_last_of(SEPAR);
// Original version only truncated at space if space was found after
// maxlen/2. But we HAVE to truncate at space, else we'd need to do
// utf8 stuff to avoid truncating at multibyte char. In any case,
// not finding space means that the text probably has no value.
// Except probably for Asian languages, so we may want to fix this
// one day
if (space == string::npos) {
output.erase();
} else {
output.erase(space);
}
output += " ...";
}
// replace newlines with spaces
size_t i = 0;
while ((i = output.find('\n', i)) != string::npos) output[i] = ' ';
return output;
}
bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc)
{
LOGDEB(("Rcl::Db::add: fn %s %s\n", fn.c_str(), idoc.text.c_str()));
if (pdata == 0)
return false;
Native *ndb = (Native *)pdata;
Rcl::Doc doc = idoc;
if (doc.abstract.empty())
doc.abstract = truncate_to_word(doc.text, 100);
else
doc.abstract = truncate_to_word(doc.abstract, 100);
doc.title = truncate_to_word(doc.title, 100);
doc.keywords = truncate_to_word(doc.keywords, 300);
Xapian::Document newdocument;
wsData splitData(newdocument);
@ -248,21 +294,21 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &doc)
splitData.basepos += splitData.curpos + 100;
if (!dumb_string(doc.text, noacc)) {
LOGERR(("Rcl::Db::add: dum_string failed\n"));
LOGERR(("Rcl::Db::add: dumb_string failed\n"));
return false;
}
splitter.text_to_words(noacc);
splitData.basepos += splitData.curpos + 100;
if (!dumb_string(doc.keywords, noacc)) {
LOGERR(("Rcl::Db::add: dum_string failed\n"));
LOGERR(("Rcl::Db::add: dumb_string failed\n"));
return false;
}
splitter.text_to_words(noacc);
splitData.basepos += splitData.curpos + 100;
if (!dumb_string(doc.abstract, noacc)) {
LOGERR(("Rcl::Db::add: dum_string failed\n"));
LOGERR(("Rcl::Db::add: dumb_string failed\n"));
return false;
}
splitter.text_to_words(noacc);
@ -271,7 +317,7 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &doc)
string pathterm = "P" + fn;
newdocument.add_term(pathterm);
const char *fnc = fn.c_str();
// Document data record. omindex has the following nl separated fields:
// - url
// - sample
@ -288,6 +334,20 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &doc)
LOGDEB(("Newdocument data: %s\n", record.c_str()));
newdocument.set_data(record);
time_t mtime = atol(doc.mtime.c_str());
struct tm *tm = localtime(&mtime);
char buf[9];
sprintf(buf, "%04d%02d%02d",tm->tm_year+1900, tm->tm_mon + 1, tm->tm_mday);
newdocument.add_term("D" + string(buf)); // Date (YYYYMMDD)
buf[7] = '\0';
if (buf[6] == '3') buf[6] = '2';
newdocument.add_term("W" + string(buf)); // "Weak" - 10ish day interval
buf[6] = '\0';
newdocument.add_term("M" + string(buf)); // Month (YYYYMM)
buf[4] = '\0';
newdocument.add_term("Y" + string(buf)); // Year (YYYY)
// If this document has already been indexed, update the existing
// entry.
try {

View File

@ -2,7 +2,7 @@ include ../mk/FreeBSD
BIGLIB = ../lib/librcl.a
PROGS = smallut trfstreewalk trpathut execmd transcode trmimeparse
PROGS = smallut trfstreewalk trpathut transcode trmimeparse trexecmd
all: $(PROGS)
FSTREEWALK_OBJS= trfstreewalk.o fstreewalk.o pathut.o
@ -16,11 +16,13 @@ PATHUT_OBJS= trpathut.o pathut.o
trpathut : $(PATHUT_OBJS)
$(CXX) $(CXXFLAGS) -o trpathut $(PATHUT_OBJS)
trpathut.o : pathut.cpp pathut.h
$(CXX) -o trpathut.o -c $(CXXFLAGS) \
-DTEST_PATHUT pathut.cpp
$(CXX) -o trpathut.o -c $(CXXFLAGS) -DTEST_PATHUT pathut.cpp
execmd: pathut.o
$(CXX) -o execmd $(CXXFLAGS) execmd.cpp pathut.o
EXECMD_OBJS= trexecmd.o $(BIGLIB)
trexecmd : $(EXECMD_OBJS)
$(CXX) $(CXXFLAGS) -o trexecmd $(EXECMD_OBJS)
trexecmd.o : execmd.cpp execmd.h
$(CXX) -o trexecmd.o -c $(CXXFLAGS) -DTEST_EXECMD execmd.cpp
TRANSCODE_OBJS= trtranscode.o $(BIGLIB)
transcode : $(TRANSCODE_OBJS)
@ -31,12 +33,13 @@ trtranscode.o : ../utils/transcode.cpp
transcode.cpp
MIMEPARSE_OBJS= trmimeparse.o $(BIGLIB)
mimeparse : $(MIMEPARSE_OBJS)
trmimeparse : $(MIMEPARSE_OBJS)
$(CXX) $(CXXFLAGS) -o mimeparse $(MIMEPARSE_OBJS) \
-L/usr/local/lib -liconv
trmimeparse.o : ../utils/mimeparse.cpp
trmimeparse.o : mimeparse.cpp
$(CXX) $(CXXFLAGS) -DTEST_MIMEPARSE -c -o trmimeparse.o \
mimeparse.cpp
SMALLUT_OBJS= trsmallut.o $(BIGLIB)
smallut : $(SMALLUT_OBJS)
$(CXX) $(CXXFLAGS) -o smallut $(SMALLUT_OBJS) \

View File

@ -1,12 +1,13 @@
#ifndef lint
static char rcsid[] = "@(#$Id: execmd.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: execmd.cpp,v 1.3 2005-02-01 17:20:06 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#ifndef TEST_EXECMD
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/select.h>
#include <fcntl.h>
#include <errno.h>
#include <list>
#include <string>
@ -15,6 +16,7 @@ static char rcsid[] = "@(#$Id: execmd.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $
#include "execmd.h"
#include "pathut.h"
#include "debuglog.h"
using namespace std;
#define MAX(A,B) (A>B?A:B)
@ -23,15 +25,25 @@ int
ExecCmd::doexec(const string &cmd, const list<string> args,
const string *input, string *output)
{
{
string command = cmd + " ";
for (list<string>::const_iterator it = args.begin();it != args.end();
it++) {
command += "{" + *it + "} ";
}
LOGDEB(("ExecCmd::doexec: %s\n", command.c_str()));
}
int pipein[2]; // subproc input
int pipeout[2]; // subproc output
pipein[0] = pipein[1] = pipeout[0] = pipeout[1] = -1;
if (input && pipe(pipein) < 0) {
LOGERR(("ExecCmd::doexec: pipe(2) failed. errno %d\n", errno));
return -1;
}
if (output && pipe(pipeout) < 0) {
LOGERR(("ExecCmd::doexec: pipe(2) failed. errno %d\n", errno));
close(pipein[0]);
close(pipein[1]);
return -1;
@ -39,6 +51,7 @@ ExecCmd::doexec(const string &cmd, const list<string> args,
pid_t pid = fork();
if (pid < 0) {
LOGERR(("ExecCmd::doexec: fork(2) failed. errno %d\n", errno));
return -1;
}
@ -71,17 +84,20 @@ ExecCmd::doexec(const string &cmd, const list<string> args,
//cerr << "pipein[1] "<< pipein[1] << " pipeout[0] " <<
//pipeout[0] << " nfds " << nfds << endl;
if (select(nfds, &readfds, &writefds, 0, 0) <= 0) {
perror("select");
LOGERR(("ExecCmd::doexec: select(2) failed. errno %d\n",
errno));
break;
}
if (pipein[1] >= 0 && FD_ISSET(pipein[1], &writefds)) {
int n = write(pipein[1], input->c_str()+nwritten,
input->length() - nwritten);
if (n < 0) {
LOGERR(("ExecCmd::doexec: write(2) failed. errno %d\n",
errno));
goto out;
}
nwritten += n;
if (nwritten == input->length()) {
if (nwritten == (int)input->length()) {
// cerr << "Closing output" << endl;
close(pipein[1]);
pipein[1] = -1;
@ -93,7 +109,8 @@ ExecCmd::doexec(const string &cmd, const list<string> args,
if (n == 0) {
goto out;
} else if (n < 0) {
perror("read");
LOGERR(("ExecCmd::doexec: read(2) failed. errno %d\n",
errno));
goto out;
} else if (n > 0) {
// cerr << "READ: " << n << endl;
@ -114,6 +131,7 @@ ExecCmd::doexec(const string &cmd, const list<string> args,
close(pipeout[0]);
if (pipeout[1] >= 0)
close(pipeout[1]);
LOGDEB(("ExecCmd::doexec: father got status 0x%x\n", status));
return status;
} else {
if (input) {
@ -130,10 +148,12 @@ ExecCmd::doexec(const string &cmd, const list<string> args,
pipeout[0] = -1;
if (pipeout[1] != 1) {
if (dup2(pipeout[1], 1) < 0) {
perror("dup2");
LOGERR(("ExecCmd::doexec: dup2(2) failed. errno %d\n",
errno));
}
if (close(pipeout[1]) < 0) {
perror("close");
LOGERR(("ExecCmd::doexec: close(2) failed. errno %d\n",
errno));
}
pipeout[1] = -1;
}
@ -148,7 +168,8 @@ ExecCmd::doexec(const string &cmd, const list<string> args,
Ccharp *argv;
argv = (Ccharp *)malloc((i+2) * sizeof(char *));
if (argv == 0) {
cerr << "Malloc error" << endl;
LOGERR(("ExecCmd::doexec: malloc() failed. errno %d\n",
errno));
exit(1);
}
@ -165,17 +186,31 @@ ExecCmd::doexec(const string &cmd, const list<string> args,
while (argv[i]) cerr << argv[i++] << endl;}
#endif
LOGDEB(("ExecCmd::doexec: execvp(%s)\n", cmd.c_str()));
execvp(cmd.c_str(), (char *const*)argv);
// Hu ho
//cerr << "Exec failed" << endl;
exit(1);
LOGERR(("ExecCmd::doexec: execvp(%s) failed. errno %d\n", cmd.c_str(),
errno));
exit(128);
}
}
#else // TEST
#include <stdio.h>
#include <string>
#include <iostream>
#include <list>
#include "debuglog.h"
using namespace std;
#include "execmd.h"
const char *data = "Une ligne de donnees\n";
int main(int argc, const char **argv)
{
DebugLog::getdbl()->setloglevel(DEBDEB1);
DebugLog::setfilename("stderr");
if (argc < 2) {
cerr << "Usage: execmd cmd arg1 arg2 ..." << endl;
exit(1);
@ -191,7 +226,8 @@ int main(int argc, const char **argv)
string *ip = 0;
//ip = &input;
int status = mexec.doexec(cmd, l, ip, &output);
cout << "Status: " << status << endl;
fprintf(stderr, "Status: 0x%x\n", status);
cout << "Output:" << output << endl;
exit (status >> 8);
}
#endif // TEST