From c8a88029f7c7e8f0ef490c21fe184efd8bd81030 Mon Sep 17 00:00:00 2001 From: dockes Date: Fri, 9 Oct 2009 13:58:32 +0000 Subject: [PATCH] execm persistent filters --- src/filters/rclimg | 130 ++++++++++++++++++++------- src/internfile/mh_exec.cpp | 12 ++- src/internfile/mh_exec.h | 26 ++++-- src/internfile/mh_execm.cpp | 157 +++++++++++++++++++++++++++++++++ src/internfile/mh_execm.h | 79 +++++++++++++++++ src/internfile/mimehandler.cpp | 17 +++- src/lib/Makefile | 10 ++- src/lib/mkMake | 1 + src/sampleconf/mimeconf | 8 +- 9 files changed, 389 insertions(+), 51 deletions(-) create mode 100644 src/internfile/mh_execm.cpp create mode 100644 src/internfile/mh_execm.h diff --git a/src/filters/rclimg b/src/filters/rclimg index 71d7c2d2..60b8697e 100755 --- a/src/filters/rclimg +++ b/src/filters/rclimg @@ -55,8 +55,81 @@ sub xapianTag { return undef; } +sub imgTagsToHtml { + my $imageFile = shift; + my $output = ""; + $imageFile = '-' if $imageFile eq ''; + unless ( open(IMGF, $imageFile) ) { + print STDERR "$0: can't open file $imageFile\n"; + return $output; # file doesn't exist or can't be read + } + $info = ImageInfo(\*IMGF); + return $output unless $info; + $fields = []; + $other = []; + $titleHtmlTag = ""; + foreach $tagname ( sort keys %{$info} ) { + $xapiantag = xapianTag($tagname); + if (defined $xapiantag ) { + push @{$fields}, [ $xapiantag, $info->{$tagname} ]; + if ($xapiantag eq 'title') { + $titleHtmlTag = "$info->{$tagname}"; + } + push @{$other}, [ $tagname, $info->{$tagname} ] if $headAndBody; + } else { + push @{$other}, [ $tagname, $info->{$tagname} ]; + } + } + $output = "\n\n$titleHtmlTag\n" . + "\n"; + foreach $tagpair ( @{$fields} ) { + ($tagname, $value) = @{$tagpair}; + $output = $output . "\n"; + } + $output = $output . "\n"; + foreach $tagpair (@{$other} ) { + ($tagname, $value) = @{$tagpair}; + $output = $output . sprintf("%30s : %s
\n", $tagname, $value); + } + $output = $output . "\n\n"; + return $output; +} + +# Get one line from stdin, exit on eof +sub readlineorexit { + my $s = ; + unless ($s) { + # print STDERR "RCLIMG: EOF\n"; + exit 0; + } + return $s +} + +# Read one named parameter +sub readparam { + my $s = readlineorexit(); + if ($s eq "\n") { + return ("",""); + } + my @l = split(' ', $s); + + if (scalar(@l) != 2) { + print STDERR "RCLIMG: bad line:", $s; + exit 1; + } + my $paramname = lc $l[0]; + my $paramsize = $l[1]; + my $n = read STDIN, $paramdata, $paramsize; + if ($n != $paramsize) { + print STDERR "RCLIMG: [$paramname] expected $paramsize, got $n\n"; + exit 1; + } + # print STDERR "RCLIMG: [$paramname] $paramsize bytes: [$paramdata]\n"; + return ($paramname, $paramdata); +} + # -# start here +# Main program starts here # # JFD: replaced the "use" call with a runtime load with error checking, @@ -68,37 +141,30 @@ if ($@) { exit(1); } +$| = 1; +while (1) { + # print STDERR "RCLIMG: waiting for command\n"; -$imageFile = shift; -$imageFile = '-' if $imageFile eq ''; -unless ( open(IMGF, $imageFile) ) { - print STDERR "$0: can't open file $imageFile\n"; - exit(1); # file doesn't exist or can't be read -} -$info = ImageInfo(\*IMGF); -die unless $info; -$fields = []; -$other = []; -$titleHtmlTag = ""; -foreach $tagname ( sort keys %{$info} ) { - $xapiantag = xapianTag($tagname); - if (defined $xapiantag ) { - push @{$fields}, [ $xapiantag, $info->{$tagname} ]; - $titleHtmlTag = "$info->{$tagname}" if $xapiantag eq 'title'; - push @{$other}, [ $tagname, $info->{$tagname} ] if $headAndBody; - } else { - push @{$other}, [ $tagname, $info->{$tagname} ]; + my %params = (); + # Read at most 10 parameters (we only actually use one), stop at empty line + for($i = 1; $i < 10; $i++) { + my ($name, $value) = readparam; + if ($name eq "") { + last; } + $params{$name} = $value; + } + unless (defined $params{"filename:"}) { + print STDERR "RCLIMG: no filename ??\n"; + exit 1; + } + + my $data = imgTagsToHtml($params{"filename:"}); + my $l = length($data); + print "Data: $l\n"; + # print STDERR "RCLIMG: writing $l bytes of data\n"; + print $data; + # End of output parameters: print empty line + print "\n"; + # print STDERR "RCLIMG: done writing data\n"; } -print "\n\n$titleHtmlTag\n"; -print "\n"; -foreach $tagpair ( @{$fields} ) { - ($tagname, $value) = @{$tagpair}; - print "\n"; -} -print "\n"; -foreach $tagpair (@{$other} ) { - ($tagname, $value) = @{$tagpair}; - printf "%30s : %s
\n", $tagname, $value; -} -print "\n\n"; diff --git a/src/internfile/mh_exec.cpp b/src/internfile/mh_exec.cpp index 329a88fb..e0f7c6e3 100644 --- a/src/internfile/mh_exec.cpp +++ b/src/internfile/mh_exec.cpp @@ -108,7 +108,15 @@ bool MimeHandlerExec::next_document() return false; } - // if output is text, we must handle the conversion to utf-8 + finaldetails(); + return true; +} + + +void MimeHandlerExec::finaldetails() +{ + string& output = m_metaData["content"]; + // if output is text/plain (not text/html), we must convert it to utf-8 string charset = cfgCharset.empty() ? "utf-8" : cfgCharset; string mt = cfgMtype.empty() ? "text/html" : cfgMtype; if (!mt.compare("text/plain") && charset.compare("utf-8")) { @@ -139,6 +147,4 @@ bool MimeHandlerExec::next_document() LOGERR(("MimeHandlerExec: cant compute md5 for [%s]: %s\n", m_fn.c_str(), reason.c_str())); } - - return true; } diff --git a/src/internfile/mh_exec.h b/src/internfile/mh_exec.h index e137e9be..db798aa3 100644 --- a/src/internfile/mh_exec.h +++ b/src/internfile/mh_exec.h @@ -30,21 +30,31 @@ using std::string; * * The command to execute, and its parameters, are stored in the "params" * which is built in mimehandler.cpp out of data from the mimeconf file. + * + * As any RecollFilter, a MimeHandlerExec object can be reset + * by calling clear(), and will stay initialised for the same mtype + * (cmd, params etc.) */ class MimeHandlerExec : public RecollFilter { public: - // Members not reset by clear(). params, cfgMtype and chgCharset - // actually define what I am. missingHelper is a permanent error + /////////////////////// + // Members not reset by clear(). params, cfgMtype and chgCharset + // define what I am. missingHelper is a permanent error // (no use to try and execute over and over something that's not // here). + + // Parameter list: this has been built by our creator, from config file + // data. We always add the file name at the end before actual execution list params; - // The defaults for external filters is to output html except if defined - // otherwise in the config. + // Filter output type. The default for ext. filters is to output html, + // but some don't, in which case the type is defined in the config. string cfgMtype; - // For ext programs which don't output html, the output charset - // has to be known: ie they have a --charset utf-8 like option. + // Output character set if the above type is not text/html. For + // those filters, the output charset has to be known: ie set by a command + // line option. string cfgCharset; bool missingHelper; + //////////////// MimeHandlerExec(const string& mt) : RecollFilter(mt), missingHelper(false) {} @@ -66,9 +76,11 @@ class MimeHandlerExec : public RecollFilter { RecollFilter::clear(); } -private: +protected: string m_fn; string m_ipath; + + virtual void finaldetails(); }; #endif /* _MH_EXEC_H_INCLUDED_ */ diff --git a/src/internfile/mh_execm.cpp b/src/internfile/mh_execm.cpp new file mode 100644 index 00000000..6e886392 --- /dev/null +++ b/src/internfile/mh_execm.cpp @@ -0,0 +1,157 @@ +#ifndef lint +static char rcsid[] = "@(#$Id: mh_exec.cpp,v 1.14 2008-10-09 09:19:37 dockes Exp $ (C) 2005 J.F.Dockes"; +#endif +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include +#include + +#include "mh_execm.h" +#include "mh_html.h" +#include "debuglog.h" +#include "cancelcheck.h" +#include "smallut.h" +#include "transcode.h" +#include "md5.h" + +#include +#include + +#ifndef NO_NAMESPACES +using namespace std; +#endif /* NO_NAMESPACES */ + +bool MimeHandlerExecMultiple::startCmd() +{ + LOGDEB(("MimeHandlerExecMultiple::startCmd\n")); + // Command name + string cmd = params.front(); + + // Build parameter list: delete cmd name + list::iterator it = params.begin(); + listmyparams(++it, params.end()); + + // Start filter + m_cmd.putenv(m_forPreview ? "RECOLL_FILTER_FORPREVIEW=yes" : + "RECOLL_FILTER_FORPREVIEW=no"); + if (m_cmd.startExec(cmd, myparams, 1, 1) < 0) { + missingHelper = true; + return false; + } + return true; +} + +bool MimeHandlerExecMultiple::readDataElement(string& name) +{ + string ibuf; + if (m_cmd.getline(ibuf) <= 0) { + LOGERR(("MHExecMultiple: getline error\n")); + return false; + } + if (!ibuf.compare("\n")) { + LOGDEB(("MHExecMultiple: Got empty line\n")); + name = ""; + return true; + } + + // We're expecting something like paramname: len\n + list tokens; + stringToTokens(ibuf, tokens); + if (tokens.size() != 2) { + LOGERR(("MHExecMultiple: bad line in filter output: [%s]\n", + ibuf.c_str())); + return false; + } + list::iterator it = tokens.begin(); + name = *it++; + string& slen = *it; + int len; + if (sscanf(slen.c_str(), "%d", &len) != 1) { + LOGERR(("MHExecMultiple: bad line in filter output: [%s]\n", + ibuf.c_str())); + return false; + } + LOGDEB(("MHExecMultiple: got paramname [%s] len: %d\n", + name.c_str(), len)); + // We only care about the "data:" field for now + string discard; + string *datap; + if (!stringlowercmp("data:", name)) { + datap = &m_metaData["content"]; + } else { + datap = &discard; + } + // Then the data. + datap->erase(); + if (m_cmd.receive(*datap, len) != len) { + LOGERR(("MHExecMultiple: expected %d bytes of data, got %d\n", + len, datap->length())); + return false; + } + return true; +} + +// Execute an external program to translate a file from its native +// format to text or html. +bool MimeHandlerExecMultiple::next_document() +{ + if (m_havedoc == false) + return false; + if (missingHelper) { + LOGDEB(("MHExecMultiple::next_document(): helper known missing\n")); + return false; + } + if (params.empty()) { + // Hu ho + LOGERR(("MHExecMultiple::mkDoc: empty params\n")); + m_reason = "RECFILTERROR BADCONFIG"; + return false; + } + + if (m_cmd.getChildPid() < 0 && !startCmd()) { + return false; + } + + // Send request to child process + ostringstream obuf; + obuf << "FileName: " << m_fn.length() << endl << m_fn << endl; + if (m_cmd.send(obuf.str()) < 0) { + LOGERR(("MHExecMultiple: send error\n")); + return false; + } + + // Read answer + LOGDEB(("MHExecMultiple: reading answer\n")); + for (int loop=0;;loop++) { + string name; + if (!readDataElement(name)) { + return false; + } + if (name.empty()) + break; + if (loop == 10) { + // ?? + LOGERR(("MHExecMultiple: filter sent too many parameters\n")); + return false; + } + } + + finaldetails(); + m_havedoc = false; + return true; +} diff --git a/src/internfile/mh_execm.h b/src/internfile/mh_execm.h new file mode 100644 index 00000000..7cead3f7 --- /dev/null +++ b/src/internfile/mh_execm.h @@ -0,0 +1,79 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#ifndef _MH_EXECM_H_INCLUDED_ +#define _MH_EXECM_H_INCLUDED_ +/* @(#$Id: mh_exec.h,v 1.8 2008-10-06 06:22:46 dockes Exp $ (C) 2004 J.F.Dockes */ + +#include "mh_exec.h" +#include "execmd.h" + +/** + * Turn external document into internal one by executing an external filter. + * + * The command to execute, and its parameters, are stored in the "params" + * which is built in mimehandler.cpp out of data from the mimeconf file. + * + * This version uses persistent filters which can handle multiple requests + * without exiting, with a simple question/response protocol. + * + * The data is exchanged in TLV fashion, in a way that should be + * usable in most script languages. The basic unit has one line with a + * data type and a count, followed by the data. A 'message' ends with + * one empty line. A possible exchange: + * + * From recollindex (the message begins before 'Filename'): + * +Filename: 24 +/my/home/mail/somefolderIpath: 2 +22 +semicolist; stringToTokens(hs, semicolist, ";"); @@ -86,7 +88,9 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs) return 0; } - MimeHandlerExec *h = new MimeHandlerExec(mtype.c_str()); + MimeHandlerExec *h = multiple ? + new MimeHandlerExecMultiple(mtype.c_str()) : + new MimeHandlerExec(mtype.c_str()); list::iterator it; @@ -181,7 +185,14 @@ Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg, mtype.c_str(), hs.c_str())); return 0; } - return mhExecFactory(cfg, mtype, hs); + return mhExecFactory(cfg, mtype, hs, false); + } else if (!stringlowercmp("execm", *it)) { + if (toks.size() < 2) { + LOGERR(("getMimeHandler: bad line for %s: %s\n", + mtype.c_str(), hs.c_str())); + return 0; + } + return mhExecFactory(cfg, mtype, hs, true); } } diff --git a/src/lib/Makefile b/src/lib/Makefile index f8f05975..7625390e 100644 --- a/src/lib/Makefile +++ b/src/lib/Makefile @@ -6,8 +6,8 @@ LIBS = librcl.a all: $(LIBS) -OBJS = rclaspell.o rclconfig.o rclinit.o textsplit.o unacpp.o csguess.o indexer.o mimetype.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_html.o mh_mail.o mh_mbox.o mh_text.o docseq.o docseqdb.o docseqhist.o filtseq.o history.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o rcldb.o rcldoc.o rclquery.o searchdata.o stemdb.o stoplist.o base64.o conftree.o copyfile.o debuglog.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o readfile.o smallut.o transcode.o wipedir.o x11mon.o mime-getpart.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime-printdoc.o mime-printheader.o mime.o convert.o iodevice.o iofactory.o -DEPS = rclaspell.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp csguess.dep.stamp indexer.dep.stamp mimetype.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp history.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rclquery.dep.stamp searchdata.dep.stamp stemdb.dep.stamp stoplist.dep.stamp base64.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-getpart.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime-printdoc.dep.stamp mime-printheader.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp +OBJS = rclaspell.o rclconfig.o rclinit.o textsplit.o unacpp.o csguess.o indexer.o mimetype.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_execm.o mh_html.o mh_mail.o mh_mbox.o mh_text.o docseq.o docseqdb.o docseqhist.o filtseq.o history.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o rcldb.o rcldoc.o rclquery.o searchdata.o stemdb.o stoplist.o base64.o conftree.o copyfile.o debuglog.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o readfile.o smallut.o transcode.o wipedir.o x11mon.o mime-getpart.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime-printdoc.o mime-printheader.o mime.o convert.o iodevice.o iofactory.o +DEPS = rclaspell.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp csguess.dep.stamp indexer.dep.stamp mimetype.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_execm.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp history.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rclquery.dep.stamp searchdata.dep.stamp stemdb.dep.stamp stoplist.dep.stamp base64.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-getpart.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime-printdoc.dep.stamp mime-printheader.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp librcl.a : $(DEPS) $(OBJS) unac.o ar ru librcl.a $(OBJS) unac.o @@ -41,6 +41,8 @@ internfile.o : ../internfile/internfile.cpp $(CXX) $(ALL_CXXFLAGS) -c ../internfile/internfile.cpp mh_exec.o : ../internfile/mh_exec.cpp $(CXX) $(ALL_CXXFLAGS) -c ../internfile/mh_exec.cpp +mh_execm.o : ../internfile/mh_execm.cpp + $(CXX) $(ALL_CXXFLAGS) -c ../internfile/mh_execm.cpp mh_html.o : ../internfile/mh_html.cpp $(CXX) $(ALL_CXXFLAGS) -c ../internfile/mh_html.cpp mh_mail.o : ../internfile/mh_mail.cpp @@ -184,6 +186,9 @@ internfile.dep.stamp : ../internfile/internfile.cpp mh_exec.dep.stamp : ../internfile/mh_exec.cpp $(CXX) -M $(ALL_CXXFLAGS) ../internfile/mh_exec.cpp > mh_exec.dep touch mh_exec.dep.stamp +mh_execm.dep.stamp : ../internfile/mh_execm.cpp + $(CXX) -M $(ALL_CXXFLAGS) ../internfile/mh_execm.cpp > mh_execm.dep + touch mh_execm.dep.stamp mh_html.dep.stamp : ../internfile/mh_html.cpp $(CXX) -M $(ALL_CXXFLAGS) ../internfile/mh_html.cpp > mh_html.dep touch mh_html.dep.stamp @@ -314,6 +319,7 @@ include myhtmlparse.dep include mimehandler.dep include internfile.dep include mh_exec.dep +include mh_execm.dep include mh_html.dep include mh_mail.dep include mh_mbox.dep diff --git a/src/lib/mkMake b/src/lib/mkMake index 14f3f1c2..811cd046 100755 --- a/src/lib/mkMake +++ b/src/lib/mkMake @@ -17,6 +17,7 @@ ${depth}/internfile/myhtmlparse.cpp \ ${depth}/internfile/mimehandler.cpp \ ${depth}/internfile/internfile.cpp \ ${depth}/internfile/mh_exec.cpp \ +${depth}/internfile/mh_execm.cpp \ ${depth}/internfile/mh_html.cpp \ ${depth}/internfile/mh_mail.cpp \ ${depth}/internfile/mh_mbox.cpp \ diff --git a/src/sampleconf/mimeconf b/src/sampleconf/mimeconf index 69a96bb1..10fb38a3 100644 --- a/src/sampleconf/mimeconf +++ b/src/sampleconf/mimeconf @@ -72,10 +72,10 @@ application/x-lyx = exec rcllyx application/x-scribus = exec rclscribus application/x-tex = exec rcltex audio/mpeg = exec rclid3 -image/gif = exec rclimg -image/jpeg = exec rclimg -image/png = exec rclimg -image/tiff = exec rclimg +image/gif = execm rclimg +image/jpeg = execm rclimg +image/png = execm rclimg +image/tiff = execm rclimg image/vnd.djvu = exec rcldjvu image/svg+xml = exec rclsvg message/rfc822 = internal