execm persistent filters

This commit is contained in:
dockes 2009-10-09 13:58:32 +00:00
parent 3223d1245a
commit c8a88029f7
9 changed files with 389 additions and 51 deletions

View File

@ -55,8 +55,81 @@ sub xapianTag {
return undef;
}
sub imgTagsToHtml {
my $imageFile = shift;
my $output = "";
$imageFile = '-' if $imageFile eq '';
unless ( open(IMGF, $imageFile) ) {
print STDERR "$0: can't open file $imageFile\n";
return $output; # file doesn't exist or can't be read
}
$info = ImageInfo(\*IMGF);
return $output unless $info;
$fields = [];
$other = [];
$titleHtmlTag = "";
foreach $tagname ( sort keys %{$info} ) {
$xapiantag = xapianTag($tagname);
if (defined $xapiantag ) {
push @{$fields}, [ $xapiantag, $info->{$tagname} ];
if ($xapiantag eq 'title') {
$titleHtmlTag = "<title>$info->{$tagname}</title>";
}
push @{$other}, [ $tagname, $info->{$tagname} ] if $headAndBody;
} else {
push @{$other}, [ $tagname, $info->{$tagname} ];
}
}
$output = "<html>\n<head>\n$titleHtmlTag\n" .
"<meta http-equiv=\"Content-Type\" content=\"text/html;charset=UTF-8\">\n";
foreach $tagpair ( @{$fields} ) {
($tagname, $value) = @{$tagpair};
$output = $output . "<meta name=\"$tagname\" content=\"$value\">\n";
}
$output = $output . "</head><body>\n";
foreach $tagpair (@{$other} ) {
($tagname, $value) = @{$tagpair};
$output = $output . sprintf("%30s : %s<br>\n", $tagname, $value);
}
$output = $output . "</body>\n</html>\n";
return $output;
}
# Get one line from stdin, exit on eof
sub readlineorexit {
my $s = <STDIN>;
unless ($s) {
# print STDERR "RCLIMG: EOF\n";
exit 0;
}
return $s
}
# Read one named parameter
sub readparam {
my $s = readlineorexit();
if ($s eq "\n") {
return ("","");
}
my @l = split(' ', $s);
if (scalar(@l) != 2) {
print STDERR "RCLIMG: bad line:", $s;
exit 1;
}
my $paramname = lc $l[0];
my $paramsize = $l[1];
my $n = read STDIN, $paramdata, $paramsize;
if ($n != $paramsize) {
print STDERR "RCLIMG: [$paramname] expected $paramsize, got $n\n";
exit 1;
}
# print STDERR "RCLIMG: [$paramname] $paramsize bytes: [$paramdata]\n";
return ($paramname, $paramdata);
}
#
# start here
# Main program starts here
#
# JFD: replaced the "use" call with a runtime load with error checking,
@ -68,37 +141,30 @@ if ($@) {
exit(1);
}
$| = 1;
while (1) {
# print STDERR "RCLIMG: waiting for command\n";
$imageFile = shift;
$imageFile = '-' if $imageFile eq '';
unless ( open(IMGF, $imageFile) ) {
print STDERR "$0: can't open file $imageFile\n";
exit(1); # file doesn't exist or can't be read
}
$info = ImageInfo(\*IMGF);
die unless $info;
$fields = [];
$other = [];
$titleHtmlTag = "";
foreach $tagname ( sort keys %{$info} ) {
$xapiantag = xapianTag($tagname);
if (defined $xapiantag ) {
push @{$fields}, [ $xapiantag, $info->{$tagname} ];
$titleHtmlTag = "<title>$info->{$tagname}</title>" if $xapiantag eq 'title';
push @{$other}, [ $tagname, $info->{$tagname} ] if $headAndBody;
} else {
push @{$other}, [ $tagname, $info->{$tagname} ];
my %params = ();
# Read at most 10 parameters (we only actually use one), stop at empty line
for($i = 1; $i < 10; $i++) {
my ($name, $value) = readparam;
if ($name eq "") {
last;
}
$params{$name} = $value;
}
unless (defined $params{"filename:"}) {
print STDERR "RCLIMG: no filename ??\n";
exit 1;
}
my $data = imgTagsToHtml($params{"filename:"});
my $l = length($data);
print "Data: $l\n";
# print STDERR "RCLIMG: writing $l bytes of data\n";
print $data;
# End of output parameters: print empty line
print "\n";
# print STDERR "RCLIMG: done writing data\n";
}
print "<html>\n<head>\n$titleHtmlTag\n";
print "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=UTF-8\">\n";
foreach $tagpair ( @{$fields} ) {
($tagname, $value) = @{$tagpair};
print "<meta name=\"$tagname\" content=\"$value\">\n";
}
print "</head><body>\n";
foreach $tagpair (@{$other} ) {
($tagname, $value) = @{$tagpair};
printf "%30s : %s<br>\n", $tagname, $value;
}
print "</body>\n</html>\n";

View File

@ -108,7 +108,15 @@ bool MimeHandlerExec::next_document()
return false;
}
// if output is text, we must handle the conversion to utf-8
finaldetails();
return true;
}
void MimeHandlerExec::finaldetails()
{
string& output = m_metaData["content"];
// if output is text/plain (not text/html), we must convert it to utf-8
string charset = cfgCharset.empty() ? "utf-8" : cfgCharset;
string mt = cfgMtype.empty() ? "text/html" : cfgMtype;
if (!mt.compare("text/plain") && charset.compare("utf-8")) {
@ -139,6 +147,4 @@ bool MimeHandlerExec::next_document()
LOGERR(("MimeHandlerExec: cant compute md5 for [%s]: %s\n",
m_fn.c_str(), reason.c_str()));
}
return true;
}

View File

@ -30,21 +30,31 @@ using std::string;
*
* The command to execute, and its parameters, are stored in the "params"
* which is built in mimehandler.cpp out of data from the mimeconf file.
*
* As any RecollFilter, a MimeHandlerExec object can be reset
* by calling clear(), and will stay initialised for the same mtype
* (cmd, params etc.)
*/
class MimeHandlerExec : public RecollFilter {
public:
// Members not reset by clear(). params, cfgMtype and chgCharset
// actually define what I am. missingHelper is a permanent error
///////////////////////
// Members not reset by clear(). params, cfgMtype and chgCharset
// define what I am. missingHelper is a permanent error
// (no use to try and execute over and over something that's not
// here).
// Parameter list: this has been built by our creator, from config file
// data. We always add the file name at the end before actual execution
list<string> params;
// The defaults for external filters is to output html except if defined
// otherwise in the config.
// Filter output type. The default for ext. filters is to output html,
// but some don't, in which case the type is defined in the config.
string cfgMtype;
// For ext programs which don't output html, the output charset
// has to be known: ie they have a --charset utf-8 like option.
// Output character set if the above type is not text/html. For
// those filters, the output charset has to be known: ie set by a command
// line option.
string cfgCharset;
bool missingHelper;
////////////////
MimeHandlerExec(const string& mt) : RecollFilter(mt), missingHelper(false)
{}
@ -66,9 +76,11 @@ class MimeHandlerExec : public RecollFilter {
RecollFilter::clear();
}
private:
protected:
string m_fn;
string m_ipath;
virtual void finaldetails();
};
#endif /* _MH_EXEC_H_INCLUDED_ */

157
src/internfile/mh_execm.cpp Normal file
View File

@ -0,0 +1,157 @@
#ifndef lint
static char rcsid[] = "@(#$Id: mh_exec.cpp,v 1.14 2008-10-09 09:19:37 dockes Exp $ (C) 2005 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include <iostream>
#include <sstream>
#include "mh_execm.h"
#include "mh_html.h"
#include "debuglog.h"
#include "cancelcheck.h"
#include "smallut.h"
#include "transcode.h"
#include "md5.h"
#include <sys/types.h>
#include <sys/wait.h>
#ifndef NO_NAMESPACES
using namespace std;
#endif /* NO_NAMESPACES */
bool MimeHandlerExecMultiple::startCmd()
{
LOGDEB(("MimeHandlerExecMultiple::startCmd\n"));
// Command name
string cmd = params.front();
// Build parameter list: delete cmd name
list<string>::iterator it = params.begin();
list<string>myparams(++it, params.end());
// Start filter
m_cmd.putenv(m_forPreview ? "RECOLL_FILTER_FORPREVIEW=yes" :
"RECOLL_FILTER_FORPREVIEW=no");
if (m_cmd.startExec(cmd, myparams, 1, 1) < 0) {
missingHelper = true;
return false;
}
return true;
}
bool MimeHandlerExecMultiple::readDataElement(string& name)
{
string ibuf;
if (m_cmd.getline(ibuf) <= 0) {
LOGERR(("MHExecMultiple: getline error\n"));
return false;
}
if (!ibuf.compare("\n")) {
LOGDEB(("MHExecMultiple: Got empty line\n"));
name = "";
return true;
}
// We're expecting something like paramname: len\n
list<string> tokens;
stringToTokens(ibuf, tokens);
if (tokens.size() != 2) {
LOGERR(("MHExecMultiple: bad line in filter output: [%s]\n",
ibuf.c_str()));
return false;
}
list<string>::iterator it = tokens.begin();
name = *it++;
string& slen = *it;
int len;
if (sscanf(slen.c_str(), "%d", &len) != 1) {
LOGERR(("MHExecMultiple: bad line in filter output: [%s]\n",
ibuf.c_str()));
return false;
}
LOGDEB(("MHExecMultiple: got paramname [%s] len: %d\n",
name.c_str(), len));
// We only care about the "data:" field for now
string discard;
string *datap;
if (!stringlowercmp("data:", name)) {
datap = &m_metaData["content"];
} else {
datap = &discard;
}
// Then the data.
datap->erase();
if (m_cmd.receive(*datap, len) != len) {
LOGERR(("MHExecMultiple: expected %d bytes of data, got %d\n",
len, datap->length()));
return false;
}
return true;
}
// Execute an external program to translate a file from its native
// format to text or html.
bool MimeHandlerExecMultiple::next_document()
{
if (m_havedoc == false)
return false;
if (missingHelper) {
LOGDEB(("MHExecMultiple::next_document(): helper known missing\n"));
return false;
}
if (params.empty()) {
// Hu ho
LOGERR(("MHExecMultiple::mkDoc: empty params\n"));
m_reason = "RECFILTERROR BADCONFIG";
return false;
}
if (m_cmd.getChildPid() < 0 && !startCmd()) {
return false;
}
// Send request to child process
ostringstream obuf;
obuf << "FileName: " << m_fn.length() << endl << m_fn << endl;
if (m_cmd.send(obuf.str()) < 0) {
LOGERR(("MHExecMultiple: send error\n"));
return false;
}
// Read answer
LOGDEB(("MHExecMultiple: reading answer\n"));
for (int loop=0;;loop++) {
string name;
if (!readDataElement(name)) {
return false;
}
if (name.empty())
break;
if (loop == 10) {
// ??
LOGERR(("MHExecMultiple: filter sent too many parameters\n"));
return false;
}
}
finaldetails();
m_havedoc = false;
return true;
}

79
src/internfile/mh_execm.h Normal file
View File

@ -0,0 +1,79 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _MH_EXECM_H_INCLUDED_
#define _MH_EXECM_H_INCLUDED_
/* @(#$Id: mh_exec.h,v 1.8 2008-10-06 06:22:46 dockes Exp $ (C) 2004 J.F.Dockes */
#include "mh_exec.h"
#include "execmd.h"
/**
* Turn external document into internal one by executing an external filter.
*
* The command to execute, and its parameters, are stored in the "params"
* which is built in mimehandler.cpp out of data from the mimeconf file.
*
* This version uses persistent filters which can handle multiple requests
* without exiting, with a simple question/response protocol.
*
* The data is exchanged in TLV fashion, in a way that should be
* usable in most script languages. The basic unit has one line with a
* data type and a count, followed by the data. A 'message' ends with
* one empty line. A possible exchange:
*
* From recollindex (the message begins before 'Filename'):
*
Filename: 24
/my/home/mail/somefolderIpath: 2
22
<Message ends here: because of the empty line after '22'
*
* Example answer:
*
Mimetype: 10
text/plainData: 10
0123456789
<Message ends here because of empty line
*
* Until proven otherwise, this format is both extensible and
* reasonably easy to parse. While it's more destined for python or
* perl on the script side, it should even be sort of usable from the shell
* (ie: use dd to read the counted data). Most alternatives would need data
* encoding in some cases.
*/
class MimeHandlerExecMultiple : public MimeHandlerExec {
/////////
// Things not reset by "clear()", additionally to those in MimeHandlerExec
ExecCmd m_cmd;
/////// End un-cleared stuff.
public:
MimeHandlerExecMultiple(const string& mt)
: MimeHandlerExec(mt)
{}
// No resources to clean up, the ExecCmd destructor does it.
virtual ~MimeHandlerExecMultiple() {}
virtual bool next_document();
virtual void clear() {
MimeHandlerExec::clear();
}
private:
bool startCmd();
bool readDataElement(string& name);
};
#endif /* _MH_EXECM_H_INCLUDED_ */

View File

@ -31,6 +31,7 @@ using namespace std;
#include "smallut.h"
#include "mh_exec.h"
#include "mh_execm.h"
#include "mh_html.h"
#include "mh_mail.h"
#include "mh_mbox.h"
@ -68,7 +69,8 @@ static Dijon::Filter *mhFactory(const string &mime)
* We don't support ';' inside a quoted string for now. Can't see a use
* for it
*/
MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs)
MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
bool multiple)
{
list<string>semicolist;
stringToTokens(hs, semicolist, ";");
@ -86,7 +88,9 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs)
return 0;
}
MimeHandlerExec *h = new MimeHandlerExec(mtype.c_str());
MimeHandlerExec *h = multiple ?
new MimeHandlerExecMultiple(mtype.c_str()) :
new MimeHandlerExec(mtype.c_str());
list<string>::iterator it;
@ -181,7 +185,14 @@ Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg,
mtype.c_str(), hs.c_str()));
return 0;
}
return mhExecFactory(cfg, mtype, hs);
return mhExecFactory(cfg, mtype, hs, false);
} else if (!stringlowercmp("execm", *it)) {
if (toks.size() < 2) {
LOGERR(("getMimeHandler: bad line for %s: %s\n",
mtype.c_str(), hs.c_str()));
return 0;
}
return mhExecFactory(cfg, mtype, hs, true);
}
}

View File

@ -6,8 +6,8 @@ LIBS = librcl.a
all: $(LIBS)
OBJS = rclaspell.o rclconfig.o rclinit.o textsplit.o unacpp.o csguess.o indexer.o mimetype.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_html.o mh_mail.o mh_mbox.o mh_text.o docseq.o docseqdb.o docseqhist.o filtseq.o history.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o rcldb.o rcldoc.o rclquery.o searchdata.o stemdb.o stoplist.o base64.o conftree.o copyfile.o debuglog.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o readfile.o smallut.o transcode.o wipedir.o x11mon.o mime-getpart.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime-printdoc.o mime-printheader.o mime.o convert.o iodevice.o iofactory.o
DEPS = rclaspell.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp csguess.dep.stamp indexer.dep.stamp mimetype.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp history.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rclquery.dep.stamp searchdata.dep.stamp stemdb.dep.stamp stoplist.dep.stamp base64.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-getpart.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime-printdoc.dep.stamp mime-printheader.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp
OBJS = rclaspell.o rclconfig.o rclinit.o textsplit.o unacpp.o csguess.o indexer.o mimetype.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_execm.o mh_html.o mh_mail.o mh_mbox.o mh_text.o docseq.o docseqdb.o docseqhist.o filtseq.o history.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o rcldb.o rcldoc.o rclquery.o searchdata.o stemdb.o stoplist.o base64.o conftree.o copyfile.o debuglog.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o readfile.o smallut.o transcode.o wipedir.o x11mon.o mime-getpart.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime-printdoc.o mime-printheader.o mime.o convert.o iodevice.o iofactory.o
DEPS = rclaspell.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp csguess.dep.stamp indexer.dep.stamp mimetype.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_execm.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp history.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rclquery.dep.stamp searchdata.dep.stamp stemdb.dep.stamp stoplist.dep.stamp base64.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-getpart.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime-printdoc.dep.stamp mime-printheader.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp
librcl.a : $(DEPS) $(OBJS) unac.o
ar ru librcl.a $(OBJS) unac.o
@ -41,6 +41,8 @@ internfile.o : ../internfile/internfile.cpp
$(CXX) $(ALL_CXXFLAGS) -c ../internfile/internfile.cpp
mh_exec.o : ../internfile/mh_exec.cpp
$(CXX) $(ALL_CXXFLAGS) -c ../internfile/mh_exec.cpp
mh_execm.o : ../internfile/mh_execm.cpp
$(CXX) $(ALL_CXXFLAGS) -c ../internfile/mh_execm.cpp
mh_html.o : ../internfile/mh_html.cpp
$(CXX) $(ALL_CXXFLAGS) -c ../internfile/mh_html.cpp
mh_mail.o : ../internfile/mh_mail.cpp
@ -184,6 +186,9 @@ internfile.dep.stamp : ../internfile/internfile.cpp
mh_exec.dep.stamp : ../internfile/mh_exec.cpp
$(CXX) -M $(ALL_CXXFLAGS) ../internfile/mh_exec.cpp > mh_exec.dep
touch mh_exec.dep.stamp
mh_execm.dep.stamp : ../internfile/mh_execm.cpp
$(CXX) -M $(ALL_CXXFLAGS) ../internfile/mh_execm.cpp > mh_execm.dep
touch mh_execm.dep.stamp
mh_html.dep.stamp : ../internfile/mh_html.cpp
$(CXX) -M $(ALL_CXXFLAGS) ../internfile/mh_html.cpp > mh_html.dep
touch mh_html.dep.stamp
@ -314,6 +319,7 @@ include myhtmlparse.dep
include mimehandler.dep
include internfile.dep
include mh_exec.dep
include mh_execm.dep
include mh_html.dep
include mh_mail.dep
include mh_mbox.dep

View File

@ -17,6 +17,7 @@ ${depth}/internfile/myhtmlparse.cpp \
${depth}/internfile/mimehandler.cpp \
${depth}/internfile/internfile.cpp \
${depth}/internfile/mh_exec.cpp \
${depth}/internfile/mh_execm.cpp \
${depth}/internfile/mh_html.cpp \
${depth}/internfile/mh_mail.cpp \
${depth}/internfile/mh_mbox.cpp \

View File

@ -72,10 +72,10 @@ application/x-lyx = exec rcllyx
application/x-scribus = exec rclscribus
application/x-tex = exec rcltex
audio/mpeg = exec rclid3
image/gif = exec rclimg
image/jpeg = exec rclimg
image/png = exec rclimg
image/tiff = exec rclimg
image/gif = execm rclimg
image/jpeg = execm rclimg
image/png = execm rclimg
image/tiff = execm rclimg
image/vnd.djvu = exec rcldjvu
image/svg+xml = exec rclsvg
message/rfc822 = internal