execm first working zip version

This commit is contained in:
dockes 2009-10-14 06:21:44 +00:00
parent 3fcde55ff4
commit ce0078081a
7 changed files with 157 additions and 43 deletions

View File

@ -119,10 +119,12 @@ sub readparam {
} }
my $paramname = lc $l[0]; my $paramname = lc $l[0];
my $paramsize = $l[1]; my $paramsize = $l[1];
my $n = read STDIN, $paramdata, $paramsize; if ($paramsize > 0) {
if ($n != $paramsize) { my $n = read STDIN, $paramdata, $paramsize;
print STDERR "RCLIMG: [$paramname] expected $paramsize, got $n\n"; if ($n != $paramsize) {
exit 1; print STDERR "RCLIMG: [$paramname] expected $paramsize, got $n\n";
exit 1;
}
} }
# print STDERR "RCLIMG: [$paramname] $paramsize bytes: [$paramdata]\n"; # print STDERR "RCLIMG: [$paramname] $paramsize bytes: [$paramdata]\n";
return ($paramname, $paramdata); return ($paramname, $paramdata);
@ -156,14 +158,19 @@ while (1) {
} }
unless (defined $params{"filename:"}) { unless (defined $params{"filename:"}) {
print STDERR "RCLIMG: no filename ??\n"; print STDERR "RCLIMG: no filename ??\n";
exit 1; # Recoll is requesting next subdocument, but we have none, just say
# so:
print "Document: 0\n\n";
next;
} }
my $data = imgTagsToHtml($params{"filename:"}); my $data = imgTagsToHtml($params{"filename:"});
my $l = length($data); my $l = length($data);
print "Data: $l\n"; print "Document: $l\n";
# print STDERR "RCLIMG: writing $l bytes of data\n"; # print STDERR "RCLIMG: writing $l bytes of data\n";
print $data; print $data;
# Say we have no further documents for this file
print "Eof: 0\n";
# End of output parameters: print empty line # End of output parameters: print empty line
print "\n"; print "\n";
# print STDERR "RCLIMG: done writing data\n"; # print STDERR "RCLIMG: done writing data\n";

View File

@ -40,6 +40,7 @@ using namespace std;
#include "rclmon.h" #include "rclmon.h"
#include "x11mon.h" #include "x11mon.h"
#include "rclversion.h" #include "rclversion.h"
#include "cancelcheck.h"
// Globals for exit cleanup // Globals for exit cleanup
ConfIndexer *confindexer; ConfIndexer *confindexer;
@ -63,6 +64,7 @@ static void sigcleanup(int sig)
{ {
fprintf(stderr, "sigcleanup\n"); fprintf(stderr, "sigcleanup\n");
LOGDEB(("sigcleanup\n")); LOGDEB(("sigcleanup\n"));
CancelCheck::instance().setCancel();
stopindexing = 1; stopindexing = 1;
} }

View File

@ -28,6 +28,9 @@ static char rcsid[] = "@(#$Id: mh_exec.cpp,v 1.14 2008-10-09 09:19:37 dockes Exp
#include "smallut.h" #include "smallut.h"
#include "transcode.h" #include "transcode.h"
#include "md5.h" #include "md5.h"
#include "rclconfig.h"
#include "mimetype.h"
#include "idfile.h"
#include <sys/types.h> #include <sys/types.h>
#include <sys/wait.h> #include <sys/wait.h>
@ -39,6 +42,13 @@ using namespace std;
bool MimeHandlerExecMultiple::startCmd() bool MimeHandlerExecMultiple::startCmd()
{ {
LOGDEB(("MimeHandlerExecMultiple::startCmd\n")); LOGDEB(("MimeHandlerExecMultiple::startCmd\n"));
if (params.empty()) {
// Hu ho
LOGERR(("MHExecMultiple::mkDoc: empty params\n"));
m_reason = "RECFILTERROR BADCONFIG";
return false;
}
// Command name // Command name
string cmd = params.front(); string cmd = params.front();
@ -56,20 +66,31 @@ bool MimeHandlerExecMultiple::startCmd()
return true; return true;
} }
bool MimeHandlerExecMultiple::readDataElement(string& name) // Note: data is not used if this is the "document:" field: it goes
// directly to m_metaData["content"] to avoid an extra copy
//
// Messages are made of data elements. Each element is like:
// name: len\ndata
// An empty line signals the end of the message, so the whole thing
// would look like:
// Name1: Len1\nData1Name2: Len2\nData2\n
bool MimeHandlerExecMultiple::readDataElement(string& name, string &data)
{ {
string ibuf; string ibuf;
// Read name and length
if (m_cmd.getline(ibuf) <= 0) { if (m_cmd.getline(ibuf) <= 0) {
LOGERR(("MHExecMultiple: getline error\n")); LOGERR(("MHExecMultiple: getline error\n"));
return false; return false;
} }
// Empty line (end of message) ?
if (!ibuf.compare("\n")) { if (!ibuf.compare("\n")) {
LOGDEB(("MHExecMultiple: Got empty line\n")); LOGDEB(("MHExecMultiple: Got empty line\n"));
name = ""; name = "";
return true; return true;
} }
// We're expecting something like paramname: len\n // We're expecting something like Name: len\n
list<string> tokens; list<string> tokens;
stringToTokens(ibuf, tokens); stringToTokens(ibuf, tokens);
if (tokens.size() != 2) { if (tokens.size() != 2) {
@ -86,19 +107,21 @@ bool MimeHandlerExecMultiple::readDataElement(string& name)
ibuf.c_str())); ibuf.c_str()));
return false; return false;
} }
LOGDEB(("MHExecMultiple: got paramname [%s] len: %d\n", LOGDEB1(("MHExecMultiple: got name [%s] len: %d\n", name.c_str(), len));
name.c_str(), len));
// We only care about the "data:" field for now // Hack: check for 'Document:' and read directly the document data
string discard; // to m_metaData["content"] to avoid an extra copy of the bulky
string *datap; // piece
if (!stringlowercmp("data:", name)) { string *datap = &data;
if (!stringlowercmp("document:", name)) {
datap = &m_metaData["content"]; datap = &m_metaData["content"];
} else { } else {
datap = &discard; datap = &data;
} }
// Then the data.
// Read element data
datap->erase(); datap->erase();
if (m_cmd.receive(*datap, len) != len) { if (len > 0 && m_cmd.receive(*datap, len) != len) {
LOGERR(("MHExecMultiple: expected %d bytes of data, got %d\n", LOGERR(("MHExecMultiple: expected %d bytes of data, got %d\n",
len, datap->length())); len, datap->length()));
return false; return false;
@ -106,52 +129,116 @@ bool MimeHandlerExecMultiple::readDataElement(string& name)
return true; return true;
} }
// Execute an external program to translate a file from its native
// format to text or html.
bool MimeHandlerExecMultiple::next_document() bool MimeHandlerExecMultiple::next_document()
{ {
LOGDEB(("MimeHandlerExecMultiple::next_document(): [%s]\n", m_fn.c_str()));
if (m_havedoc == false) if (m_havedoc == false)
return false; return false;
if (missingHelper) { if (missingHelper) {
LOGDEB(("MHExecMultiple::next_document(): helper known missing\n")); LOGDEB(("MHExecMultiple::next_document(): helper known missing\n"));
return false; return false;
} }
if (params.empty()) {
// Hu ho
LOGERR(("MHExecMultiple::mkDoc: empty params\n"));
m_reason = "RECFILTERROR BADCONFIG";
return false;
}
if (m_cmd.getChildPid() < 0 && !startCmd()) { if (m_cmd.getChildPid() < 0 && !startCmd()) {
return false; return false;
} }
// Send request to child process // Send request to child process. This maybe the first/only
// request for a given file, or a continuation request. We send an
// empty file name in the latter case.
ostringstream obuf; ostringstream obuf;
obuf << "FileName: " << m_fn.length() << endl << m_fn << endl; if (m_filefirst) {
obuf << "FileName: " << m_fn.length() << "\n" << m_fn;
// m_filefirst is set to true by set_document_file()
m_filefirst = false;
} else {
obuf << "Filename: " << 0 << "\n";
}
if (m_ipath.length()) {
obuf << "Ipath: " << m_ipath.length() << "\n" << m_ipath;
}
obuf << "\n";
if (m_cmd.send(obuf.str()) < 0) { if (m_cmd.send(obuf.str()) < 0) {
LOGERR(("MHExecMultiple: send error\n")); LOGERR(("MHExecMultiple: send error\n"));
return false; return false;
} }
// Read answer // Read answer (multiple elements)
LOGDEB(("MHExecMultiple: reading answer\n")); LOGDEB1(("MHExecMultiple: reading answer\n"));
bool eof_received = false;
string ipath;
string mtype;
for (int loop=0;;loop++) { for (int loop=0;;loop++) {
string name; string name, data;
if (!readDataElement(name)) { if (!readDataElement(name, data)) {
return false; return false;
} }
if (name.empty()) if (name.empty())
break; break;
if (!stringlowercmp("eof:", name)) {
LOGDEB(("MHExecMultiple: got EOF\n"));
eof_received = true;
}
if (!stringlowercmp("ipath:", name)) {
ipath = data;
LOGDEB(("MHExecMultiple: got ipath [%s]\n", data.c_str()));
}
if (!stringlowercmp("mimetype:", name)) {
mtype = data;
LOGDEB(("MHExecMultiple: got mimetype [%s]\n", data.c_str()));
}
if (loop == 10) { if (loop == 10) {
// ?? // ??
LOGERR(("MHExecMultiple: filter sent too many parameters\n")); LOGERR(("MHExecMultiple: filter sent too many parameters\n"));
return false; return false;
} }
} }
// The end of data can be signaled from the filter in two ways:
finaldetails(); // either by returning an empty document (if the filter just hits
m_havedoc = false; // eof while trying to read the doc), or with an "eof:" field
// accompanying a normal document (if the filter hit eof at the
// end of the current doc, which is the preferred way).
if (m_metaData["content"].length() == 0) {
LOGDEB(("MHExecMultiple: got empty document\n"));
m_havedoc = false;
return false;
}
// If this has an ipath, it is an internal doc from a
// multi-document file. In this case, either the filter supplies the
// mimetype, or the ipath MUST be a filename-like string which we can use
// to compute a mime type
if (!ipath.empty()) {
m_metaData["ipath"] = ipath;
if (mtype.empty()) {
mtype = mimetype(ipath, 0, RclConfig::getMainConfig(), false);
if (mtype.empty()) {
// mimetype() won't call idFile when there is no file. Do it
mtype = idFileMem(m_metaData["content"]);
if (mtype.empty()) {
LOGERR(("MHExecMultiple: cant guess mime type\n"));
mtype = "application/octet-stream";
}
}
}
m_metaData["mimetype"] = mtype;
string md5, xmd5;
MD5String(m_metaData["content"], md5);
m_metaData["md5"] = MD5HexPrint(md5, xmd5);
} else {
m_metaData.erase("ipath");
string md5, xmd5, reason;
if (MD5File(m_fn, md5, &reason)) {
m_metaData["md5"] = MD5HexPrint(md5, xmd5);
} else {
LOGERR(("MimeHandlerExecM: cant compute md5 for [%s]: %s\n",
m_fn.c_str(), reason.c_str()));
}
}
if (eof_received)
m_havedoc = false;
return true; return true;
} }

View File

@ -28,7 +28,8 @@
* which is built in mimehandler.cpp out of data from the mimeconf file. * which is built in mimehandler.cpp out of data from the mimeconf file.
* *
* This version uses persistent filters which can handle multiple requests * This version uses persistent filters which can handle multiple requests
* without exiting, with a simple question/response protocol. * without exiting (both multiple files and multiple documents per file),
* with a simple question/response protocol.
* *
* The data is exchanged in TLV fashion, in a way that should be * The data is exchanged in TLV fashion, in a way that should be
* usable in most script languages. The basic unit has one line with a * usable in most script languages. The basic unit has one line with a
@ -49,11 +50,11 @@ text/plainData: 10
0123456789 0123456789
<Message ends here because of empty line <Message ends here because of empty line
* *
* Until proven otherwise, this format is both extensible and * This format is both extensible and reasonably easy to parse.
* reasonably easy to parse. While it's more destined for python or * While it's more destined for python or perl on the script side, it
* perl on the script side, it should even be sort of usable from the shell * should even be sort of usable from the shell (ie: use dd to read
* (ie: use dd to read the counted data). Most alternatives would need data * the counted data). Most alternatives would need data encoding in
* encoding in some cases. * some cases.
*/ */
class MimeHandlerExecMultiple : public MimeHandlerExec { class MimeHandlerExecMultiple : public MimeHandlerExec {
///////// /////////
@ -71,9 +72,14 @@ class MimeHandlerExecMultiple : public MimeHandlerExec {
virtual void clear() { virtual void clear() {
MimeHandlerExec::clear(); MimeHandlerExec::clear();
} }
virtual bool set_document_file(const string &file_path) {
m_filefirst = true;
return MimeHandlerExec::set_document_file(file_path);
}
private: private:
bool startCmd(); bool startCmd();
bool readDataElement(string& name); bool readDataElement(string& name, string& data);
bool m_filefirst;
}; };
#endif /* _MH_EXECM_H_INCLUDED_ */ #endif /* _MH_EXECM_H_INCLUDED_ */

View File

@ -71,6 +71,7 @@ application/x-kword = exec rclkwd
application/x-lyx = exec rcllyx application/x-lyx = exec rcllyx
application/x-scribus = exec rclscribus application/x-scribus = exec rclscribus
application/x-tex = exec rcltex application/x-tex = exec rcltex
application/zip = execm rclzip
audio/mpeg = exec rclid3 audio/mpeg = exec rclid3
image/gif = execm rclimg image/gif = execm rclimg
image/jpeg = execm rclimg image/jpeg = execm rclimg

View File

@ -39,6 +39,7 @@
.Z = application/x-gzip .Z = application/x-gzip
.bz2 = application/x-bzip2 .bz2 = application/x-bzip2
#.Z = application/x-compress #.Z = application/x-compress
.zip = application/zip
.doc = application/msword .doc = application/msword
.ppt = application/vnd.ms-powerpoint .ppt = application/vnd.ms-powerpoint

View File

@ -226,7 +226,6 @@ int ExecCmd::startExec(const string &cmd, const list<string>& args,
NetconCli *iclicon = new NetconCli(); NetconCli *iclicon = new NetconCli();
iclicon->setconn(m_pipein[1]); iclicon->setconn(m_pipein[1]);
m_tocmd = NetconP(iclicon); m_tocmd = NetconP(iclicon);
m_pipein[1] = 0;
} }
if (has_output) { if (has_output) {
close(m_pipeout[1]); close(m_pipeout[1]);
@ -234,7 +233,6 @@ int ExecCmd::startExec(const string &cmd, const list<string>& args,
NetconCli *oclicon = new NetconCli(); NetconCli *oclicon = new NetconCli();
oclicon->setconn(m_pipeout[0]); oclicon->setconn(m_pipeout[0]);
m_fromcmd = NetconP(oclicon); m_fromcmd = NetconP(oclicon);
m_pipeout[0] = -1;
} }
/* Don't want to undo what we just did ! */ /* Don't want to undo what we just did ! */
@ -361,6 +359,18 @@ int ExecCmd::doexec(const string &cmd, const list<string>& args,
} }
} }
LOGDEB0(("ExecCmd::doexec: selectloop returned %d\n", ret)); LOGDEB0(("ExecCmd::doexec: selectloop returned %d\n", ret));
// The netcons don't take ownership of the fds: we have to close them
// (have to do it before wait, this may be the signal the child is
// waiting for exiting).
if (input) {
close(m_pipein[1]);
m_pipein[1] = -1;
}
if (output) {
close(m_pipeout[0]);
m_pipeout[0] = -1;
}
} }
// Normal return: deactivate cleaner, wait() will do the cleanup // Normal return: deactivate cleaner, wait() will do the cleanup