execm first working zip version

This commit is contained in:
dockes 2009-10-14 06:21:44 +00:00
parent 3fcde55ff4
commit ce0078081a
7 changed files with 157 additions and 43 deletions

View File

@ -119,10 +119,12 @@ sub readparam {
}
my $paramname = lc $l[0];
my $paramsize = $l[1];
my $n = read STDIN, $paramdata, $paramsize;
if ($n != $paramsize) {
print STDERR "RCLIMG: [$paramname] expected $paramsize, got $n\n";
exit 1;
if ($paramsize > 0) {
my $n = read STDIN, $paramdata, $paramsize;
if ($n != $paramsize) {
print STDERR "RCLIMG: [$paramname] expected $paramsize, got $n\n";
exit 1;
}
}
# print STDERR "RCLIMG: [$paramname] $paramsize bytes: [$paramdata]\n";
return ($paramname, $paramdata);
@ -156,14 +158,19 @@ while (1) {
}
unless (defined $params{"filename:"}) {
print STDERR "RCLIMG: no filename ??\n";
exit 1;
# Recoll is requesting next subdocument, but we have none, just say
# so:
print "Document: 0\n\n";
next;
}
my $data = imgTagsToHtml($params{"filename:"});
my $l = length($data);
print "Data: $l\n";
print "Document: $l\n";
# print STDERR "RCLIMG: writing $l bytes of data\n";
print $data;
# Say we have no further documents for this file
print "Eof: 0\n";
# End of output parameters: print empty line
print "\n";
# print STDERR "RCLIMG: done writing data\n";

View File

@ -40,6 +40,7 @@ using namespace std;
#include "rclmon.h"
#include "x11mon.h"
#include "rclversion.h"
#include "cancelcheck.h"
// Globals for exit cleanup
ConfIndexer *confindexer;
@ -63,6 +64,7 @@ static void sigcleanup(int sig)
{
fprintf(stderr, "sigcleanup\n");
LOGDEB(("sigcleanup\n"));
CancelCheck::instance().setCancel();
stopindexing = 1;
}

View File

@ -28,6 +28,9 @@ static char rcsid[] = "@(#$Id: mh_exec.cpp,v 1.14 2008-10-09 09:19:37 dockes Exp
#include "smallut.h"
#include "transcode.h"
#include "md5.h"
#include "rclconfig.h"
#include "mimetype.h"
#include "idfile.h"
#include <sys/types.h>
#include <sys/wait.h>
@ -39,6 +42,13 @@ using namespace std;
bool MimeHandlerExecMultiple::startCmd()
{
LOGDEB(("MimeHandlerExecMultiple::startCmd\n"));
if (params.empty()) {
// Hu ho
LOGERR(("MHExecMultiple::mkDoc: empty params\n"));
m_reason = "RECFILTERROR BADCONFIG";
return false;
}
// Command name
string cmd = params.front();
@ -56,20 +66,31 @@ bool MimeHandlerExecMultiple::startCmd()
return true;
}
bool MimeHandlerExecMultiple::readDataElement(string& name)
// Note: data is not used if this is the "document:" field: it goes
// directly to m_metaData["content"] to avoid an extra copy
//
// Messages are made of data elements. Each element is like:
// name: len\ndata
// An empty line signals the end of the message, so the whole thing
// would look like:
// Name1: Len1\nData1Name2: Len2\nData2\n
bool MimeHandlerExecMultiple::readDataElement(string& name, string &data)
{
string ibuf;
// Read name and length
if (m_cmd.getline(ibuf) <= 0) {
LOGERR(("MHExecMultiple: getline error\n"));
return false;
}
// Empty line (end of message) ?
if (!ibuf.compare("\n")) {
LOGDEB(("MHExecMultiple: Got empty line\n"));
name = "";
return true;
}
// We're expecting something like paramname: len\n
// We're expecting something like Name: len\n
list<string> tokens;
stringToTokens(ibuf, tokens);
if (tokens.size() != 2) {
@ -86,19 +107,21 @@ bool MimeHandlerExecMultiple::readDataElement(string& name)
ibuf.c_str()));
return false;
}
LOGDEB(("MHExecMultiple: got paramname [%s] len: %d\n",
name.c_str(), len));
// We only care about the "data:" field for now
string discard;
string *datap;
if (!stringlowercmp("data:", name)) {
LOGDEB1(("MHExecMultiple: got name [%s] len: %d\n", name.c_str(), len));
// Hack: check for 'Document:' and read directly the document data
// to m_metaData["content"] to avoid an extra copy of the bulky
// piece
string *datap = &data;
if (!stringlowercmp("document:", name)) {
datap = &m_metaData["content"];
} else {
datap = &discard;
datap = &data;
}
// Then the data.
// Read element data
datap->erase();
if (m_cmd.receive(*datap, len) != len) {
if (len > 0 && m_cmd.receive(*datap, len) != len) {
LOGERR(("MHExecMultiple: expected %d bytes of data, got %d\n",
len, datap->length()));
return false;
@ -106,52 +129,116 @@ bool MimeHandlerExecMultiple::readDataElement(string& name)
return true;
}
// Execute an external program to translate a file from its native
// format to text or html.
bool MimeHandlerExecMultiple::next_document()
{
LOGDEB(("MimeHandlerExecMultiple::next_document(): [%s]\n", m_fn.c_str()));
if (m_havedoc == false)
return false;
if (missingHelper) {
LOGDEB(("MHExecMultiple::next_document(): helper known missing\n"));
return false;
}
if (params.empty()) {
// Hu ho
LOGERR(("MHExecMultiple::mkDoc: empty params\n"));
m_reason = "RECFILTERROR BADCONFIG";
return false;
}
if (m_cmd.getChildPid() < 0 && !startCmd()) {
return false;
}
// Send request to child process
// Send request to child process. This maybe the first/only
// request for a given file, or a continuation request. We send an
// empty file name in the latter case.
ostringstream obuf;
obuf << "FileName: " << m_fn.length() << endl << m_fn << endl;
if (m_filefirst) {
obuf << "FileName: " << m_fn.length() << "\n" << m_fn;
// m_filefirst is set to true by set_document_file()
m_filefirst = false;
} else {
obuf << "Filename: " << 0 << "\n";
}
if (m_ipath.length()) {
obuf << "Ipath: " << m_ipath.length() << "\n" << m_ipath;
}
obuf << "\n";
if (m_cmd.send(obuf.str()) < 0) {
LOGERR(("MHExecMultiple: send error\n"));
return false;
}
// Read answer
LOGDEB(("MHExecMultiple: reading answer\n"));
// Read answer (multiple elements)
LOGDEB1(("MHExecMultiple: reading answer\n"));
bool eof_received = false;
string ipath;
string mtype;
for (int loop=0;;loop++) {
string name;
if (!readDataElement(name)) {
string name, data;
if (!readDataElement(name, data)) {
return false;
}
if (name.empty())
break;
if (!stringlowercmp("eof:", name)) {
LOGDEB(("MHExecMultiple: got EOF\n"));
eof_received = true;
}
if (!stringlowercmp("ipath:", name)) {
ipath = data;
LOGDEB(("MHExecMultiple: got ipath [%s]\n", data.c_str()));
}
if (!stringlowercmp("mimetype:", name)) {
mtype = data;
LOGDEB(("MHExecMultiple: got mimetype [%s]\n", data.c_str()));
}
if (loop == 10) {
// ??
LOGERR(("MHExecMultiple: filter sent too many parameters\n"));
return false;
}
}
finaldetails();
m_havedoc = false;
// The end of data can be signaled from the filter in two ways:
// either by returning an empty document (if the filter just hits
// eof while trying to read the doc), or with an "eof:" field
// accompanying a normal document (if the filter hit eof at the
// end of the current doc, which is the preferred way).
if (m_metaData["content"].length() == 0) {
LOGDEB(("MHExecMultiple: got empty document\n"));
m_havedoc = false;
return false;
}
// If this has an ipath, it is an internal doc from a
// multi-document file. In this case, either the filter supplies the
// mimetype, or the ipath MUST be a filename-like string which we can use
// to compute a mime type
if (!ipath.empty()) {
m_metaData["ipath"] = ipath;
if (mtype.empty()) {
mtype = mimetype(ipath, 0, RclConfig::getMainConfig(), false);
if (mtype.empty()) {
// mimetype() won't call idFile when there is no file. Do it
mtype = idFileMem(m_metaData["content"]);
if (mtype.empty()) {
LOGERR(("MHExecMultiple: cant guess mime type\n"));
mtype = "application/octet-stream";
}
}
}
m_metaData["mimetype"] = mtype;
string md5, xmd5;
MD5String(m_metaData["content"], md5);
m_metaData["md5"] = MD5HexPrint(md5, xmd5);
} else {
m_metaData.erase("ipath");
string md5, xmd5, reason;
if (MD5File(m_fn, md5, &reason)) {
m_metaData["md5"] = MD5HexPrint(md5, xmd5);
} else {
LOGERR(("MimeHandlerExecM: cant compute md5 for [%s]: %s\n",
m_fn.c_str(), reason.c_str()));
}
}
if (eof_received)
m_havedoc = false;
return true;
}

View File

@ -28,7 +28,8 @@
* which is built in mimehandler.cpp out of data from the mimeconf file.
*
* This version uses persistent filters which can handle multiple requests
* without exiting, with a simple question/response protocol.
* without exiting (both multiple files and multiple documents per file),
* with a simple question/response protocol.
*
* The data is exchanged in TLV fashion, in a way that should be
* usable in most script languages. The basic unit has one line with a
@ -49,11 +50,11 @@ text/plainData: 10
0123456789
<Message ends here because of empty line
*
* Until proven otherwise, this format is both extensible and
* reasonably easy to parse. While it's more destined for python or
* perl on the script side, it should even be sort of usable from the shell
* (ie: use dd to read the counted data). Most alternatives would need data
* encoding in some cases.
* This format is both extensible and reasonably easy to parse.
* While it's more destined for python or perl on the script side, it
* should even be sort of usable from the shell (ie: use dd to read
* the counted data). Most alternatives would need data encoding in
* some cases.
*/
class MimeHandlerExecMultiple : public MimeHandlerExec {
/////////
@ -71,9 +72,14 @@ class MimeHandlerExecMultiple : public MimeHandlerExec {
virtual void clear() {
MimeHandlerExec::clear();
}
virtual bool set_document_file(const string &file_path) {
m_filefirst = true;
return MimeHandlerExec::set_document_file(file_path);
}
private:
bool startCmd();
bool readDataElement(string& name);
bool readDataElement(string& name, string& data);
bool m_filefirst;
};
#endif /* _MH_EXECM_H_INCLUDED_ */

View File

@ -71,6 +71,7 @@ application/x-kword = exec rclkwd
application/x-lyx = exec rcllyx
application/x-scribus = exec rclscribus
application/x-tex = exec rcltex
application/zip = execm rclzip
audio/mpeg = exec rclid3
image/gif = execm rclimg
image/jpeg = execm rclimg

View File

@ -39,6 +39,7 @@
.Z = application/x-gzip
.bz2 = application/x-bzip2
#.Z = application/x-compress
.zip = application/zip
.doc = application/msword
.ppt = application/vnd.ms-powerpoint

View File

@ -226,7 +226,6 @@ int ExecCmd::startExec(const string &cmd, const list<string>& args,
NetconCli *iclicon = new NetconCli();
iclicon->setconn(m_pipein[1]);
m_tocmd = NetconP(iclicon);
m_pipein[1] = 0;
}
if (has_output) {
close(m_pipeout[1]);
@ -234,7 +233,6 @@ int ExecCmd::startExec(const string &cmd, const list<string>& args,
NetconCli *oclicon = new NetconCli();
oclicon->setconn(m_pipeout[0]);
m_fromcmd = NetconP(oclicon);
m_pipeout[0] = -1;
}
/* Don't want to undo what we just did ! */
@ -361,6 +359,18 @@ int ExecCmd::doexec(const string &cmd, const list<string>& args,
}
}
LOGDEB0(("ExecCmd::doexec: selectloop returned %d\n", ret));
// The netcons don't take ownership of the fds: we have to close them
// (have to do it before wait, this may be the signal the child is
// waiting for exiting).
if (input) {
close(m_pipein[1]);
m_pipein[1] = -1;
}
if (output) {
close(m_pipeout[0]);
m_pipeout[0] = -1;
}
}
// Normal return: deactivate cleaner, wait() will do the cleanup