diff --git a/src/filters/rclpdf b/src/filters/rclpdf new file mode 100755 index 00000000..e696c14b --- /dev/null +++ b/src/filters/rclpdf @@ -0,0 +1,84 @@ +#!/bin/sh +# @(#$Id: rclpdf,v 1.1 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes +# This is copied almost verbatim from Estraier: +#================================================================ +# Estraier: a personal full-text search system +# Copyright (C) 2003-2004 Mikio Hirabayashi +#================================================================ +#================================================================ +# rclpdf +# Strip a file of PDF and extract its text as HTML. +#================================================================ + + +# set variables +LANG=C ; export LANG +LC_ALL=C ; export LC_ALL +progname="rclpdf" + +# show help message +if test $# -ne 1 -o "$1" = "--help" +then + printf 'Strip a file of PDF and extract its text as HTML.\n' + printf 'Usage: %s [infile]\n' "$progname" + exit 1 +fi + +infile="$1" + +# check the input file existence +if test ! -f "$infile" +then + printf '%s: %s: no such file\n' "$progname" "$infile" + exit 1 +fi + +# output the result +pdftotext -raw -htmlmeta -enc UTF-8 -eol unix -q "$infile" - | +iconv -f UTF-8 -t UTF-8 -c -s | +awk ' +BEGIN { + esc = 0 + mul = 1 + emp = 0 +} +{ + if(esc < 1 && $0 ~ /^/ && $0 ~ /title>$/){ + printf("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n") + gsub(/<[^>]*>/, "", $0) + gsub(/&/, "\\&", $0) + gsub(/</, "\\<", $0) + gsub(/>/, "\\>", $0) + printf("<title>%s\n", $0) + } else if($0 == "
"){
+    esc++
+    printf("

") + mul = 1 + } else if($0 == "

"){ + esc-- + printf("

\n") + } else if($0 ~ /-$/){ + sub(/-$/, "", $0) + printf("%s", $0); + } else if($0 == "\f"){ + printf("

\n
\n

") + } else { + if(esc > 0){ + gsub(/&/, "\\&", $0) + gsub(//, "\\>", $0) + gsub(/^ */, "", $0) + gsub(/ *$/, "", $0) + } + print $0 + } +} +' +# Suppressed code 2 lines above (at the last print $0), which seemed to +# deal with multibyte character being cut by a newline ? It caused problems +# (sometimes concatenated last word of a line with first of next, and I +# didn't really understand its use as iconv -c is supposed to fix the +# encoding anyway + +# exit normally +exit 0 diff --git a/src/index/indexer.cpp b/src/index/indexer.cpp index 74f55a69..08016a8d 100644 --- a/src/index/indexer.cpp +++ b/src/index/indexer.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: indexer.cpp,v 1.1 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: indexer.cpp,v 1.2 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include @@ -105,8 +105,8 @@ indexfile(void *cdata, const std::string &fn, const struct stat *stp, } // Look for appropriate handler - MimeHandlerFunc fun = getMimeHandler(mime, me->config->getMimeConf()); - if (!fun) { + MimeHandler *handler = getMimeHandler(mime, me->config->getMimeConf()); + if (!handler) { // No handler for this type, for now :( LOGDEB(("indexfile: %s : no handler\n", mime.c_str())); return FsTreeWalker::FtwOk; @@ -115,14 +115,19 @@ indexfile(void *cdata, const std::string &fn, const struct stat *stp, LOGDEB(("indexfile: %s [%s]\n", mime.c_str(), fn.c_str())); // Check db up to date ? - if (!me->db.needUpdate(fn, stp)) + if (!me->db.needUpdate(fn, stp)) { + delete handler; return FsTreeWalker::FtwOk; + } // Turn file into a document. The document has fields for title, body // etc., all text converted to utf8 Rcl::Doc doc; - if (!fun(me->config, fn, mime, doc)) + if (!handler->worker(me->config, fn, mime, doc)) { + delete handler; return FsTreeWalker::FtwOk; + } + delete handler; // Set up common fields: doc.mimetype = mime; @@ -131,7 +136,7 @@ indexfile(void *cdata, const std::string &fn, const struct stat *stp, doc.mtime = ascdate; // Do database-specific work to update document data - if (!me->db.add(fn, doc)) + if (!me->db.add(fn, doc)) return FsTreeWalker::FtwError; return FsTreeWalker::FtwOk; diff --git a/src/internfile/mh_html.cpp b/src/internfile/mh_html.cpp index 3a6076b1..993c5748 100644 --- a/src/internfile/mh_html.cpp +++ b/src/internfile/mh_html.cpp @@ -32,13 +32,14 @@ #include "mimeparse.h" #include "myhtmlparse.h" #include "indextext.h" +#include "html.h" #include using namespace std; -bool textHtmlToDoc(RclConfig *conf, const string &fn, - const string &mtype, Rcl::Doc &docout) +bool MimeHandlerHtml::worker(RclConfig *conf, const string &fn, + const string &mtype, Rcl::Doc &docout) { LOGDEB(("textHtmlToDoc: %s\n", fn.c_str())); string otext; @@ -46,7 +47,13 @@ bool textHtmlToDoc(RclConfig *conf, const string &fn, LOGINFO(("textHtmlToDoc: cant read: %s\n", fn.c_str())); return false; } - + return worker1(conf, fn, otext, mtype, docout); +} + +bool MimeHandlerHtml::worker1(RclConfig *conf, const string &fn, + const string& htext, + const string &mtype, Rcl::Doc &docout) +{ // Character set handling: // - We first try to convert from the default configured charset @@ -57,7 +64,7 @@ bool textHtmlToDoc(RclConfig *conf, const string &fn, // instead of the configuration one. string charset; if (conf->guesscharset) { - charset = csguess(otext, conf->defcharset); + charset = csguess(htext, conf->defcharset); } else charset = conf->defcharset; @@ -69,10 +76,10 @@ bool textHtmlToDoc(RclConfig *conf, const string &fn, MyHtmlParser p; // Try transcoding. If it fails, use original text. - if (!transcode(otext, transcoded, charset, "UTF-8")) { + if (!transcode(htext, transcoded, charset, "UTF-8")) { LOGERR(("textHtmlToDoc: transcode failed from cs '%s' to UTF-8\n", charset.c_str())); - transcoded = otext; + transcoded = htext; // We don't know the charset, at all p.ocharset = p.charset = charset = ""; } else { diff --git a/src/internfile/mh_html.h b/src/internfile/mh_html.h new file mode 100644 index 00000000..2c3ad453 --- /dev/null +++ b/src/internfile/mh_html.h @@ -0,0 +1,14 @@ +#ifndef _HTML_H_INCLUDED_ +#define _HTML_H_INCLUDED_ +/* @(#$Id: mh_html.h,v 1.1 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes */ +#include "mimehandler.h" + +class MimeHandlerHtml : public MimeHandler { + public: + virtual bool worker(RclConfig *conf, const string &fn, + const string &mtype, Rcl::Doc &docout); + virtual bool worker1(RclConfig *conf, const string &fn, + const string& htext, + const string &mtype, Rcl::Doc &docout); +}; +#endif /* _HTML_H_INCLUDED_ */ diff --git a/src/internfile/mimehandler.cpp b/src/internfile/mimehandler.cpp index 96ff9579..c1e1609d 100644 --- a/src/internfile/mimehandler.cpp +++ b/src/internfile/mimehandler.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.4 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.5 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include @@ -12,9 +12,19 @@ using namespace std; #include "transcode.h" #include "debuglog.h" #include "smallut.h" +#include "html.h" +#include "execmd.h" -bool textPlainToDoc(RclConfig *conf, const string &fn, - const string &mtype, Rcl::Doc &docout) +class MimeHandlerText : public MimeHandler { + public: + bool worker(RclConfig *conf, const string &fn, + const string &mtype, Rcl::Doc &docout); + +}; + +// Process a plain text file +bool MimeHandlerText::worker(RclConfig *conf, const string &fn, + const string &mtype, Rcl::Doc &docout) { string otext; if (!file_to_string(fn, otext)) @@ -45,25 +55,51 @@ bool textPlainToDoc(RclConfig *conf, const string &fn, return true; } -// Map of mime types to internal interner functions. This could just as well -// be an if else if suite inside getMimeHandler(), but this is prettier ? -static map ihandlers; -// Static object to get the map to be initialized at program start. -class IHandler_Init { +class MimeHandlerExec : public MimeHandler { public: - IHandler_Init() { - ihandlers["text/plain"] = textPlainToDoc; - ihandlers["text/html"] = textHtmlToDoc; - // Add new associations here when needed - } -}; -static IHandler_Init ihandleriniter; + list params; + virtual ~MimeHandlerExec() {} + virtual bool worker(RclConfig *conf, const string &fn, + const string &mtype, Rcl::Doc &docout); +}; + + +// Execute an external program to translate a file from its native format +// to html. Then call the html parser to do the actual indexing +bool MimeHandlerExec::worker(RclConfig *conf, const string &fn, + const string &mtype, Rcl::Doc &docout) +{ + string cmd = params.front(); + list::iterator it = params.begin(); + listmyparams(++it, params.end()); + myparams.push_back(fn); + + string html; + ExecCmd exec; + int status = exec.doexec(cmd, myparams, 0, &html); + if (status) { + LOGDEB(("MimeHandlerExec: command status 0x%x: %s\n", + status, cmd.c_str())); + return false; + } + MimeHandlerHtml hh; + return hh.worker1(conf, fn, html, mtype, docout); +} + +static MimeHandler *mhfact(const string &mime) +{ + if (!stringlowercmp("text/plain", mime)) + return new MimeHandlerText; + else if (!stringlowercmp("text/html", mime)) + return new MimeHandlerHtml; + return 0; +} /** * Return handler function for given mime type */ -MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers) +MimeHandler *getMimeHandler(const std::string &mtype, ConfTree *mhandlers) { // Return handler definition for mime type string hs; @@ -82,25 +118,23 @@ MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers) // Retrieve handler function according to type if (!stringlowercmp("internal", toks[0])) { - map::const_iterator it = - ihandlers.find(mtype); - if (it == ihandlers.end()) { - LOGERR(("getMimeHandler: internal handler not found for %s\n", - mtype.c_str())); - return 0; - } - return it->second; + return mhfact(mtype); } else if (!stringlowercmp("dll", toks[0])) { - if (toks.size() != 2) - return 0; return 0; } else if (!stringlowercmp("exec", toks[0])) { - if (toks.size() != 2) + if (toks.size() < 2) { + LOGERR(("getMimeHandler: bad line for %s: %s\n", mtype.c_str(), + hs.c_str())); return 0; - return 0; - } else { - return 0; + } + MimeHandlerExec *h = new MimeHandlerExec; + vector::const_iterator it1 = toks.begin(); + it1++; + for (;it1 != toks.end();it1++) + h->params.push_back(*it1); + return h; } + return 0; } /** diff --git a/src/internfile/mimehandler.h b/src/internfile/mimehandler.h index 9542ef95..48597003 100644 --- a/src/internfile/mimehandler.h +++ b/src/internfile/mimehandler.h @@ -1,21 +1,29 @@ #ifndef _MIMEHANDLER_H_INCLUDED_ #define _MIMEHANDLER_H_INCLUDED_ -/* @(#$Id: mimehandler.h,v 1.3 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: mimehandler.h,v 1.4 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include "rclconfig.h" #include "rcldb.h" -/* Definition for document interner functions */ -typedef bool (*MimeHandlerFunc)(RclConfig *, const std::string &, - const std::string &, Rcl::Doc&); /** - * Return indexing handler function for given mime type + * Document interner class. We sometimes have data to pass to an interner */ -extern MimeHandlerFunc getMimeHandler(const std::string &mtype, - ConfTree *mhandlers); +class MimeHandler { + public: + virtual ~MimeHandler() {} + virtual bool worker(RclConfig *, const std::string &filename, + const std::string &mimetype, Rcl::Doc& outdoc) = 0; +}; + +/** + * Return indexing handler class for given mime type + * returned pointer should be deleted by caller + */ +extern MimeHandler *getMimeHandler(const std::string &mtype, + ConfTree *mhandlers); /** * Return external viewer exec string for given mime type @@ -23,7 +31,4 @@ extern MimeHandlerFunc getMimeHandler(const std::string &mtype, extern string getMimeViewer(const std::string &mtype, ConfTree *mhandlers); -extern bool textHtmlToDoc(RclConfig *conf, const string &fn, - const string &mtype, Rcl::Doc &docout); - #endif /* _MIMEHANDLER_H_INCLUDED_ */ diff --git a/src/lib/Makefile b/src/lib/Makefile index 40cd5f64..db95483d 100644 --- a/src/lib/Makefile +++ b/src/lib/Makefile @@ -7,12 +7,14 @@ LIBS = librcl.a all: $(LIBS) OBJS = conftree.o csguess.o debuglog.o \ + execmd.o \ fstreewalk.o html.o htmlparse.o indexer.o \ mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathut.o \ rclconfig.o rcldb.o readfile.o smallut.o \ textsplit.o transcode.o \ unacpp.o unac.o SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \ + ../utils/execmd.cpp \ ../utils/fstreewalk.cpp ../common/html.cpp ../common/htmlparse.cpp \ ../index/indexer.cpp \ ../common/mimehandler.cpp ../utils/mimeparse.cpp ../index/mimetype.cpp \ @@ -35,6 +37,8 @@ csguess.o : ../index/csguess.cpp $(CXX) $(CXXFLAGS) -c $< debuglog.o : ../utils/debuglog.cpp $(CXX) $(CXXFLAGS) -c $< +execmd.o : ../utils/execmd.cpp + $(CXX) $(CXXFLAGS) -c $< fstreewalk.o : ../utils/fstreewalk.cpp $(CXX) $(CXXFLAGS) -c $< html.o : ../common/html.cpp diff --git a/src/mk/FreeBSD b/src/mk/FreeBSD new file mode 100644 index 00000000..f39904bc --- /dev/null +++ b/src/mk/FreeBSD @@ -0,0 +1,2 @@ +CXXFLAGS = -pthread -Wall -g -I. -I../index -I../utils -I../common \ + -I../unac -I/usr/local/include diff --git a/src/qtgui/idxthread.h b/src/qtgui/idxthread.h new file mode 100644 index 00000000..1ccb3d91 --- /dev/null +++ b/src/qtgui/idxthread.h @@ -0,0 +1,16 @@ +#ifndef _IDXTHREAD_H_INCLUDED_ +#define _IDXTHREAD_H_INCLUDED_ +/* @(#$Id: idxthread.h,v 1.1 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes */ + +class RclConfig; + +// These two deal with starting / stopping the thread itself, not indexing +// sessions. +extern void start_idxthread(RclConfig *cnf); +extern void stop_idxthread(); + +extern int startindexing; +extern int indexingdone; +extern bool indexingstatus; + +#endif /* _IDXTHREAD_H_INCLUDED_ */ diff --git a/src/qtgui/recoll.h b/src/qtgui/recoll.h new file mode 100644 index 00000000..d7c11b42 --- /dev/null +++ b/src/qtgui/recoll.h @@ -0,0 +1,17 @@ +#ifndef _RECOLL_H_INCLUDED_ +#define _RECOLL_H_INCLUDED_ +/* @(#$Id: recoll.h,v 1.1 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes */ + +#include "rclconfig.h" +#include "rcldb.h" +#include "idxthread.h" + +extern void recollCleanup(); + +// Misc declarations in need of sharing between the UI files +extern RclConfig *rclconfig; +extern Rcl::Db *rcldb; + +extern int recollNeedsExit; + +#endif /* _RECOLL_H_INCLUDED_ */ diff --git a/src/qtgui/recollmain.ui.h b/src/qtgui/recollmain.ui.h index af25d4e0..db121e9b 100644 --- a/src/qtgui/recollmain.ui.h +++ b/src/qtgui/recollmain.ui.h @@ -146,9 +146,9 @@ void RecollMain::reslistTE_clicked(int par, int car) // for preview: // Look for appropriate handler - MimeHandlerFunc fun = + MimeHandler *handler = getMimeHandler(doc.mimetype, rclconfig->getMimeConf()); - if (!fun) { + if (!handler) { QMessageBox::warning(0, "Recoll", QString("No mime handler for mime type ") + doc.mimetype.c_str()); @@ -157,13 +157,15 @@ void RecollMain::reslistTE_clicked(int par, int car) string fn = urltolocalpath(doc.url); Rcl::Doc fdoc; - if (!fun(rclconfig, fn, doc.mimetype, fdoc)) { + if (!handler->worker(rclconfig, fn, doc.mimetype, fdoc)) { QMessageBox::warning(0, "Recoll", QString("Failed to convert document for preview!\n") + fn.c_str() + " mimetype " + doc.mimetype.c_str()); + delete handler; return; } + delete handler; string rich = plaintorich(fdoc.text); @@ -188,7 +190,8 @@ void RecollMain::reslistTE_clicked(int par, int car) } -// User asked to start query +// User asked to start query. Run it and call listNextPB_clicked to display +// first page of results void RecollMain::queryText_returnPressed() { LOGDEB(("RecollMain::queryText_returnPressed()\n")); @@ -294,6 +297,7 @@ void RecollMain::listNextPB_clicked() struct tm *tm = localtime(&mtime); strftime(datebuf, 99, "Modified: %F %T", tm); } + LOGDEB(("Abstract: %s\n", doc.abstract.c_str())); string result = "

" + string(perbuf) + " " + doc.title + "
" + doc.mimetype + " " + diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index b76170ba..ba0c4b88 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.15 2005-02-01 08:42:55 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.16 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include @@ -210,8 +210,12 @@ bool dumb_string(const string &in, string &out) { string inter; out.erase(); - if (!unac_cpp(in, inter)) + if (in.empty()) + return true; + if (!unac_cpp(in, inter)) { + LOGERR(("unac_cpp failed for %s\n", in.c_str())); return false; + } out.reserve(inter.length()); for (unsigned int i = 0; i < inter.length(); i++) { if (inter[i] >= 'A' && inter[i] <= 'Z') { @@ -226,13 +230,55 @@ bool dumb_string(const string &in, string &out) return true; } -bool Rcl::Db::add(const string &fn, const Rcl::Doc &doc) +/* omindex direct */ +/* Truncate a string to a given maxlength, avoiding cutting off midword + * if reasonably possible. */ +string +truncate_to_word(string & input, string::size_type maxlen) { - LOGDEB(("Rcl::Db::add: fn %s\n", fn.c_str())); + string output; + if (input.length() <= maxlen) { + output = input; + } else { + output = input.substr(0, maxlen); + const char *SEPAR = " \t\n\r-:.;,/[]{}"; + string::size_type space = output.find_last_of(SEPAR); + // Original version only truncated at space if space was found after + // maxlen/2. But we HAVE to truncate at space, else we'd need to do + // utf8 stuff to avoid truncating at multibyte char. In any case, + // not finding space means that the text probably has no value. + // Except probably for Asian languages, so we may want to fix this + // one day + if (space == string::npos) { + output.erase(); + } else { + output.erase(space); + } + + output += " ..."; + } + + // replace newlines with spaces + size_t i = 0; + while ((i = output.find('\n', i)) != string::npos) output[i] = ' '; + return output; +} + +bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc) +{ + LOGDEB(("Rcl::Db::add: fn %s %s\n", fn.c_str(), idoc.text.c_str())); if (pdata == 0) return false; Native *ndb = (Native *)pdata; + Rcl::Doc doc = idoc; + if (doc.abstract.empty()) + doc.abstract = truncate_to_word(doc.text, 100); + else + doc.abstract = truncate_to_word(doc.abstract, 100); + doc.title = truncate_to_word(doc.title, 100); + doc.keywords = truncate_to_word(doc.keywords, 300); + Xapian::Document newdocument; wsData splitData(newdocument); @@ -248,21 +294,21 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &doc) splitData.basepos += splitData.curpos + 100; if (!dumb_string(doc.text, noacc)) { - LOGERR(("Rcl::Db::add: dum_string failed\n")); + LOGERR(("Rcl::Db::add: dumb_string failed\n")); return false; } splitter.text_to_words(noacc); splitData.basepos += splitData.curpos + 100; if (!dumb_string(doc.keywords, noacc)) { - LOGERR(("Rcl::Db::add: dum_string failed\n")); + LOGERR(("Rcl::Db::add: dumb_string failed\n")); return false; } splitter.text_to_words(noacc); splitData.basepos += splitData.curpos + 100; if (!dumb_string(doc.abstract, noacc)) { - LOGERR(("Rcl::Db::add: dum_string failed\n")); + LOGERR(("Rcl::Db::add: dumb_string failed\n")); return false; } splitter.text_to_words(noacc); @@ -271,7 +317,7 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &doc) string pathterm = "P" + fn; newdocument.add_term(pathterm); const char *fnc = fn.c_str(); - + // Document data record. omindex has the following nl separated fields: // - url // - sample @@ -288,6 +334,20 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &doc) LOGDEB(("Newdocument data: %s\n", record.c_str())); newdocument.set_data(record); + + time_t mtime = atol(doc.mtime.c_str()); + struct tm *tm = localtime(&mtime); + char buf[9]; + sprintf(buf, "%04d%02d%02d",tm->tm_year+1900, tm->tm_mon + 1, tm->tm_mday); + newdocument.add_term("D" + string(buf)); // Date (YYYYMMDD) + buf[7] = '\0'; + if (buf[6] == '3') buf[6] = '2'; + newdocument.add_term("W" + string(buf)); // "Weak" - 10ish day interval + buf[6] = '\0'; + newdocument.add_term("M" + string(buf)); // Month (YYYYMM) + buf[4] = '\0'; + newdocument.add_term("Y" + string(buf)); // Year (YYYY) + // If this document has already been indexed, update the existing // entry. try { diff --git a/src/utils/Makefile b/src/utils/Makefile index 92200367..44bd7699 100644 --- a/src/utils/Makefile +++ b/src/utils/Makefile @@ -2,7 +2,7 @@ include ../mk/FreeBSD BIGLIB = ../lib/librcl.a -PROGS = smallut trfstreewalk trpathut execmd transcode trmimeparse +PROGS = smallut trfstreewalk trpathut transcode trmimeparse trexecmd all: $(PROGS) FSTREEWALK_OBJS= trfstreewalk.o fstreewalk.o pathut.o @@ -16,11 +16,13 @@ PATHUT_OBJS= trpathut.o pathut.o trpathut : $(PATHUT_OBJS) $(CXX) $(CXXFLAGS) -o trpathut $(PATHUT_OBJS) trpathut.o : pathut.cpp pathut.h - $(CXX) -o trpathut.o -c $(CXXFLAGS) \ - -DTEST_PATHUT pathut.cpp + $(CXX) -o trpathut.o -c $(CXXFLAGS) -DTEST_PATHUT pathut.cpp -execmd: pathut.o - $(CXX) -o execmd $(CXXFLAGS) execmd.cpp pathut.o +EXECMD_OBJS= trexecmd.o $(BIGLIB) +trexecmd : $(EXECMD_OBJS) + $(CXX) $(CXXFLAGS) -o trexecmd $(EXECMD_OBJS) +trexecmd.o : execmd.cpp execmd.h + $(CXX) -o trexecmd.o -c $(CXXFLAGS) -DTEST_EXECMD execmd.cpp TRANSCODE_OBJS= trtranscode.o $(BIGLIB) transcode : $(TRANSCODE_OBJS) @@ -31,12 +33,13 @@ trtranscode.o : ../utils/transcode.cpp transcode.cpp MIMEPARSE_OBJS= trmimeparse.o $(BIGLIB) -mimeparse : $(MIMEPARSE_OBJS) +trmimeparse : $(MIMEPARSE_OBJS) $(CXX) $(CXXFLAGS) -o mimeparse $(MIMEPARSE_OBJS) \ -L/usr/local/lib -liconv -trmimeparse.o : ../utils/mimeparse.cpp +trmimeparse.o : mimeparse.cpp $(CXX) $(CXXFLAGS) -DTEST_MIMEPARSE -c -o trmimeparse.o \ mimeparse.cpp + SMALLUT_OBJS= trsmallut.o $(BIGLIB) smallut : $(SMALLUT_OBJS) $(CXX) $(CXXFLAGS) -o smallut $(SMALLUT_OBJS) \ diff --git a/src/utils/execmd.cpp b/src/utils/execmd.cpp index 961749df..56490f0c 100644 --- a/src/utils/execmd.cpp +++ b/src/utils/execmd.cpp @@ -1,12 +1,13 @@ #ifndef lint -static char rcsid[] = "@(#$Id: execmd.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: execmd.cpp,v 1.3 2005-02-01 17:20:06 dockes Exp $ (C) 2004 J.F.Dockes"; #endif - +#ifndef TEST_EXECMD #include #include #include #include #include +#include #include #include @@ -15,6 +16,7 @@ static char rcsid[] = "@(#$Id: execmd.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ #include "execmd.h" #include "pathut.h" +#include "debuglog.h" using namespace std; #define MAX(A,B) (A>B?A:B) @@ -23,15 +25,25 @@ int ExecCmd::doexec(const string &cmd, const list args, const string *input, string *output) { + { + string command = cmd + " "; + for (list::const_iterator it = args.begin();it != args.end(); + it++) { + command += "{" + *it + "} "; + } + LOGDEB(("ExecCmd::doexec: %s\n", command.c_str())); + } int pipein[2]; // subproc input int pipeout[2]; // subproc output pipein[0] = pipein[1] = pipeout[0] = pipeout[1] = -1; if (input && pipe(pipein) < 0) { + LOGERR(("ExecCmd::doexec: pipe(2) failed. errno %d\n", errno)); return -1; } if (output && pipe(pipeout) < 0) { + LOGERR(("ExecCmd::doexec: pipe(2) failed. errno %d\n", errno)); close(pipein[0]); close(pipein[1]); return -1; @@ -39,6 +51,7 @@ ExecCmd::doexec(const string &cmd, const list args, pid_t pid = fork(); if (pid < 0) { + LOGERR(("ExecCmd::doexec: fork(2) failed. errno %d\n", errno)); return -1; } @@ -71,17 +84,20 @@ ExecCmd::doexec(const string &cmd, const list args, //cerr << "pipein[1] "<< pipein[1] << " pipeout[0] " << //pipeout[0] << " nfds " << nfds << endl; if (select(nfds, &readfds, &writefds, 0, 0) <= 0) { - perror("select"); + LOGERR(("ExecCmd::doexec: select(2) failed. errno %d\n", + errno)); break; } if (pipein[1] >= 0 && FD_ISSET(pipein[1], &writefds)) { int n = write(pipein[1], input->c_str()+nwritten, input->length() - nwritten); if (n < 0) { + LOGERR(("ExecCmd::doexec: write(2) failed. errno %d\n", + errno)); goto out; } nwritten += n; - if (nwritten == input->length()) { + if (nwritten == (int)input->length()) { // cerr << "Closing output" << endl; close(pipein[1]); pipein[1] = -1; @@ -93,7 +109,8 @@ ExecCmd::doexec(const string &cmd, const list args, if (n == 0) { goto out; } else if (n < 0) { - perror("read"); + LOGERR(("ExecCmd::doexec: read(2) failed. errno %d\n", + errno)); goto out; } else if (n > 0) { // cerr << "READ: " << n << endl; @@ -114,6 +131,7 @@ ExecCmd::doexec(const string &cmd, const list args, close(pipeout[0]); if (pipeout[1] >= 0) close(pipeout[1]); + LOGDEB(("ExecCmd::doexec: father got status 0x%x\n", status)); return status; } else { if (input) { @@ -130,10 +148,12 @@ ExecCmd::doexec(const string &cmd, const list args, pipeout[0] = -1; if (pipeout[1] != 1) { if (dup2(pipeout[1], 1) < 0) { - perror("dup2"); + LOGERR(("ExecCmd::doexec: dup2(2) failed. errno %d\n", + errno)); } if (close(pipeout[1]) < 0) { - perror("close"); + LOGERR(("ExecCmd::doexec: close(2) failed. errno %d\n", + errno)); } pipeout[1] = -1; } @@ -148,7 +168,8 @@ ExecCmd::doexec(const string &cmd, const list args, Ccharp *argv; argv = (Ccharp *)malloc((i+2) * sizeof(char *)); if (argv == 0) { - cerr << "Malloc error" << endl; + LOGERR(("ExecCmd::doexec: malloc() failed. errno %d\n", + errno)); exit(1); } @@ -165,17 +186,31 @@ ExecCmd::doexec(const string &cmd, const list args, while (argv[i]) cerr << argv[i++] << endl;} #endif + LOGDEB(("ExecCmd::doexec: execvp(%s)\n", cmd.c_str())); execvp(cmd.c_str(), (char *const*)argv); // Hu ho - //cerr << "Exec failed" << endl; - exit(1); + LOGERR(("ExecCmd::doexec: execvp(%s) failed. errno %d\n", cmd.c_str(), + errno)); + exit(128); } } +#else // TEST +#include +#include +#include +#include +#include "debuglog.h" +using namespace std; + +#include "execmd.h" + const char *data = "Une ligne de donnees\n"; int main(int argc, const char **argv) { + DebugLog::getdbl()->setloglevel(DEBDEB1); + DebugLog::setfilename("stderr"); if (argc < 2) { cerr << "Usage: execmd cmd arg1 arg2 ..." << endl; exit(1); @@ -191,7 +226,8 @@ int main(int argc, const char **argv) string *ip = 0; //ip = &input; int status = mexec.doexec(cmd, l, ip, &output); - cout << "Status: " << status << endl; + fprintf(stderr, "Status: 0x%x\n", status); cout << "Output:" << output << endl; exit (status >> 8); } +#endif // TEST