execm first working zip version
This commit is contained in:
parent
3fcde55ff4
commit
ce0078081a
@ -119,10 +119,12 @@ sub readparam {
|
||||
}
|
||||
my $paramname = lc $l[0];
|
||||
my $paramsize = $l[1];
|
||||
my $n = read STDIN, $paramdata, $paramsize;
|
||||
if ($n != $paramsize) {
|
||||
print STDERR "RCLIMG: [$paramname] expected $paramsize, got $n\n";
|
||||
exit 1;
|
||||
if ($paramsize > 0) {
|
||||
my $n = read STDIN, $paramdata, $paramsize;
|
||||
if ($n != $paramsize) {
|
||||
print STDERR "RCLIMG: [$paramname] expected $paramsize, got $n\n";
|
||||
exit 1;
|
||||
}
|
||||
}
|
||||
# print STDERR "RCLIMG: [$paramname] $paramsize bytes: [$paramdata]\n";
|
||||
return ($paramname, $paramdata);
|
||||
@ -156,14 +158,19 @@ while (1) {
|
||||
}
|
||||
unless (defined $params{"filename:"}) {
|
||||
print STDERR "RCLIMG: no filename ??\n";
|
||||
exit 1;
|
||||
# Recoll is requesting next subdocument, but we have none, just say
|
||||
# so:
|
||||
print "Document: 0\n\n";
|
||||
next;
|
||||
}
|
||||
|
||||
my $data = imgTagsToHtml($params{"filename:"});
|
||||
my $l = length($data);
|
||||
print "Data: $l\n";
|
||||
print "Document: $l\n";
|
||||
# print STDERR "RCLIMG: writing $l bytes of data\n";
|
||||
print $data;
|
||||
# Say we have no further documents for this file
|
||||
print "Eof: 0\n";
|
||||
# End of output parameters: print empty line
|
||||
print "\n";
|
||||
# print STDERR "RCLIMG: done writing data\n";
|
||||
|
||||
@ -40,6 +40,7 @@ using namespace std;
|
||||
#include "rclmon.h"
|
||||
#include "x11mon.h"
|
||||
#include "rclversion.h"
|
||||
#include "cancelcheck.h"
|
||||
|
||||
// Globals for exit cleanup
|
||||
ConfIndexer *confindexer;
|
||||
@ -63,6 +64,7 @@ static void sigcleanup(int sig)
|
||||
{
|
||||
fprintf(stderr, "sigcleanup\n");
|
||||
LOGDEB(("sigcleanup\n"));
|
||||
CancelCheck::instance().setCancel();
|
||||
stopindexing = 1;
|
||||
}
|
||||
|
||||
|
||||
@ -28,6 +28,9 @@ static char rcsid[] = "@(#$Id: mh_exec.cpp,v 1.14 2008-10-09 09:19:37 dockes Exp
|
||||
#include "smallut.h"
|
||||
#include "transcode.h"
|
||||
#include "md5.h"
|
||||
#include "rclconfig.h"
|
||||
#include "mimetype.h"
|
||||
#include "idfile.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
@ -39,6 +42,13 @@ using namespace std;
|
||||
bool MimeHandlerExecMultiple::startCmd()
|
||||
{
|
||||
LOGDEB(("MimeHandlerExecMultiple::startCmd\n"));
|
||||
if (params.empty()) {
|
||||
// Hu ho
|
||||
LOGERR(("MHExecMultiple::mkDoc: empty params\n"));
|
||||
m_reason = "RECFILTERROR BADCONFIG";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Command name
|
||||
string cmd = params.front();
|
||||
|
||||
@ -56,20 +66,31 @@ bool MimeHandlerExecMultiple::startCmd()
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MimeHandlerExecMultiple::readDataElement(string& name)
|
||||
// Note: data is not used if this is the "document:" field: it goes
|
||||
// directly to m_metaData["content"] to avoid an extra copy
|
||||
//
|
||||
// Messages are made of data elements. Each element is like:
|
||||
// name: len\ndata
|
||||
// An empty line signals the end of the message, so the whole thing
|
||||
// would look like:
|
||||
// Name1: Len1\nData1Name2: Len2\nData2\n
|
||||
bool MimeHandlerExecMultiple::readDataElement(string& name, string &data)
|
||||
{
|
||||
string ibuf;
|
||||
|
||||
// Read name and length
|
||||
if (m_cmd.getline(ibuf) <= 0) {
|
||||
LOGERR(("MHExecMultiple: getline error\n"));
|
||||
return false;
|
||||
}
|
||||
// Empty line (end of message) ?
|
||||
if (!ibuf.compare("\n")) {
|
||||
LOGDEB(("MHExecMultiple: Got empty line\n"));
|
||||
name = "";
|
||||
return true;
|
||||
}
|
||||
|
||||
// We're expecting something like paramname: len\n
|
||||
// We're expecting something like Name: len\n
|
||||
list<string> tokens;
|
||||
stringToTokens(ibuf, tokens);
|
||||
if (tokens.size() != 2) {
|
||||
@ -86,19 +107,21 @@ bool MimeHandlerExecMultiple::readDataElement(string& name)
|
||||
ibuf.c_str()));
|
||||
return false;
|
||||
}
|
||||
LOGDEB(("MHExecMultiple: got paramname [%s] len: %d\n",
|
||||
name.c_str(), len));
|
||||
// We only care about the "data:" field for now
|
||||
string discard;
|
||||
string *datap;
|
||||
if (!stringlowercmp("data:", name)) {
|
||||
LOGDEB1(("MHExecMultiple: got name [%s] len: %d\n", name.c_str(), len));
|
||||
|
||||
// Hack: check for 'Document:' and read directly the document data
|
||||
// to m_metaData["content"] to avoid an extra copy of the bulky
|
||||
// piece
|
||||
string *datap = &data;
|
||||
if (!stringlowercmp("document:", name)) {
|
||||
datap = &m_metaData["content"];
|
||||
} else {
|
||||
datap = &discard;
|
||||
datap = &data;
|
||||
}
|
||||
// Then the data.
|
||||
|
||||
// Read element data
|
||||
datap->erase();
|
||||
if (m_cmd.receive(*datap, len) != len) {
|
||||
if (len > 0 && m_cmd.receive(*datap, len) != len) {
|
||||
LOGERR(("MHExecMultiple: expected %d bytes of data, got %d\n",
|
||||
len, datap->length()));
|
||||
return false;
|
||||
@ -106,52 +129,116 @@ bool MimeHandlerExecMultiple::readDataElement(string& name)
|
||||
return true;
|
||||
}
|
||||
|
||||
// Execute an external program to translate a file from its native
|
||||
// format to text or html.
|
||||
bool MimeHandlerExecMultiple::next_document()
|
||||
{
|
||||
LOGDEB(("MimeHandlerExecMultiple::next_document(): [%s]\n", m_fn.c_str()));
|
||||
if (m_havedoc == false)
|
||||
return false;
|
||||
|
||||
if (missingHelper) {
|
||||
LOGDEB(("MHExecMultiple::next_document(): helper known missing\n"));
|
||||
return false;
|
||||
}
|
||||
if (params.empty()) {
|
||||
// Hu ho
|
||||
LOGERR(("MHExecMultiple::mkDoc: empty params\n"));
|
||||
m_reason = "RECFILTERROR BADCONFIG";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_cmd.getChildPid() < 0 && !startCmd()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Send request to child process
|
||||
// Send request to child process. This maybe the first/only
|
||||
// request for a given file, or a continuation request. We send an
|
||||
// empty file name in the latter case.
|
||||
ostringstream obuf;
|
||||
obuf << "FileName: " << m_fn.length() << endl << m_fn << endl;
|
||||
if (m_filefirst) {
|
||||
obuf << "FileName: " << m_fn.length() << "\n" << m_fn;
|
||||
// m_filefirst is set to true by set_document_file()
|
||||
m_filefirst = false;
|
||||
} else {
|
||||
obuf << "Filename: " << 0 << "\n";
|
||||
}
|
||||
if (m_ipath.length()) {
|
||||
obuf << "Ipath: " << m_ipath.length() << "\n" << m_ipath;
|
||||
}
|
||||
obuf << "\n";
|
||||
if (m_cmd.send(obuf.str()) < 0) {
|
||||
LOGERR(("MHExecMultiple: send error\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
// Read answer
|
||||
LOGDEB(("MHExecMultiple: reading answer\n"));
|
||||
// Read answer (multiple elements)
|
||||
LOGDEB1(("MHExecMultiple: reading answer\n"));
|
||||
bool eof_received = false;
|
||||
string ipath;
|
||||
string mtype;
|
||||
for (int loop=0;;loop++) {
|
||||
string name;
|
||||
if (!readDataElement(name)) {
|
||||
string name, data;
|
||||
if (!readDataElement(name, data)) {
|
||||
return false;
|
||||
}
|
||||
if (name.empty())
|
||||
break;
|
||||
if (!stringlowercmp("eof:", name)) {
|
||||
LOGDEB(("MHExecMultiple: got EOF\n"));
|
||||
eof_received = true;
|
||||
}
|
||||
if (!stringlowercmp("ipath:", name)) {
|
||||
ipath = data;
|
||||
LOGDEB(("MHExecMultiple: got ipath [%s]\n", data.c_str()));
|
||||
}
|
||||
if (!stringlowercmp("mimetype:", name)) {
|
||||
mtype = data;
|
||||
LOGDEB(("MHExecMultiple: got mimetype [%s]\n", data.c_str()));
|
||||
}
|
||||
if (loop == 10) {
|
||||
// ??
|
||||
LOGERR(("MHExecMultiple: filter sent too many parameters\n"));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
finaldetails();
|
||||
m_havedoc = false;
|
||||
// The end of data can be signaled from the filter in two ways:
|
||||
// either by returning an empty document (if the filter just hits
|
||||
// eof while trying to read the doc), or with an "eof:" field
|
||||
// accompanying a normal document (if the filter hit eof at the
|
||||
// end of the current doc, which is the preferred way).
|
||||
if (m_metaData["content"].length() == 0) {
|
||||
LOGDEB(("MHExecMultiple: got empty document\n"));
|
||||
m_havedoc = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
// If this has an ipath, it is an internal doc from a
|
||||
// multi-document file. In this case, either the filter supplies the
|
||||
// mimetype, or the ipath MUST be a filename-like string which we can use
|
||||
// to compute a mime type
|
||||
if (!ipath.empty()) {
|
||||
m_metaData["ipath"] = ipath;
|
||||
if (mtype.empty()) {
|
||||
mtype = mimetype(ipath, 0, RclConfig::getMainConfig(), false);
|
||||
if (mtype.empty()) {
|
||||
// mimetype() won't call idFile when there is no file. Do it
|
||||
mtype = idFileMem(m_metaData["content"]);
|
||||
if (mtype.empty()) {
|
||||
LOGERR(("MHExecMultiple: cant guess mime type\n"));
|
||||
mtype = "application/octet-stream";
|
||||
}
|
||||
}
|
||||
}
|
||||
m_metaData["mimetype"] = mtype;
|
||||
string md5, xmd5;
|
||||
MD5String(m_metaData["content"], md5);
|
||||
m_metaData["md5"] = MD5HexPrint(md5, xmd5);
|
||||
} else {
|
||||
m_metaData.erase("ipath");
|
||||
string md5, xmd5, reason;
|
||||
if (MD5File(m_fn, md5, &reason)) {
|
||||
m_metaData["md5"] = MD5HexPrint(md5, xmd5);
|
||||
} else {
|
||||
LOGERR(("MimeHandlerExecM: cant compute md5 for [%s]: %s\n",
|
||||
m_fn.c_str(), reason.c_str()));
|
||||
}
|
||||
}
|
||||
|
||||
if (eof_received)
|
||||
m_havedoc = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -28,7 +28,8 @@
|
||||
* which is built in mimehandler.cpp out of data from the mimeconf file.
|
||||
*
|
||||
* This version uses persistent filters which can handle multiple requests
|
||||
* without exiting, with a simple question/response protocol.
|
||||
* without exiting (both multiple files and multiple documents per file),
|
||||
* with a simple question/response protocol.
|
||||
*
|
||||
* The data is exchanged in TLV fashion, in a way that should be
|
||||
* usable in most script languages. The basic unit has one line with a
|
||||
@ -49,11 +50,11 @@ text/plainData: 10
|
||||
0123456789
|
||||
<Message ends here because of empty line
|
||||
*
|
||||
* Until proven otherwise, this format is both extensible and
|
||||
* reasonably easy to parse. While it's more destined for python or
|
||||
* perl on the script side, it should even be sort of usable from the shell
|
||||
* (ie: use dd to read the counted data). Most alternatives would need data
|
||||
* encoding in some cases.
|
||||
* This format is both extensible and reasonably easy to parse.
|
||||
* While it's more destined for python or perl on the script side, it
|
||||
* should even be sort of usable from the shell (ie: use dd to read
|
||||
* the counted data). Most alternatives would need data encoding in
|
||||
* some cases.
|
||||
*/
|
||||
class MimeHandlerExecMultiple : public MimeHandlerExec {
|
||||
/////////
|
||||
@ -71,9 +72,14 @@ class MimeHandlerExecMultiple : public MimeHandlerExec {
|
||||
virtual void clear() {
|
||||
MimeHandlerExec::clear();
|
||||
}
|
||||
virtual bool set_document_file(const string &file_path) {
|
||||
m_filefirst = true;
|
||||
return MimeHandlerExec::set_document_file(file_path);
|
||||
}
|
||||
private:
|
||||
bool startCmd();
|
||||
bool readDataElement(string& name);
|
||||
bool readDataElement(string& name, string& data);
|
||||
bool m_filefirst;
|
||||
};
|
||||
|
||||
#endif /* _MH_EXECM_H_INCLUDED_ */
|
||||
|
||||
@ -71,6 +71,7 @@ application/x-kword = exec rclkwd
|
||||
application/x-lyx = exec rcllyx
|
||||
application/x-scribus = exec rclscribus
|
||||
application/x-tex = exec rcltex
|
||||
application/zip = execm rclzip
|
||||
audio/mpeg = exec rclid3
|
||||
image/gif = execm rclimg
|
||||
image/jpeg = execm rclimg
|
||||
|
||||
@ -39,6 +39,7 @@
|
||||
.Z = application/x-gzip
|
||||
.bz2 = application/x-bzip2
|
||||
#.Z = application/x-compress
|
||||
.zip = application/zip
|
||||
|
||||
.doc = application/msword
|
||||
.ppt = application/vnd.ms-powerpoint
|
||||
|
||||
@ -226,7 +226,6 @@ int ExecCmd::startExec(const string &cmd, const list<string>& args,
|
||||
NetconCli *iclicon = new NetconCli();
|
||||
iclicon->setconn(m_pipein[1]);
|
||||
m_tocmd = NetconP(iclicon);
|
||||
m_pipein[1] = 0;
|
||||
}
|
||||
if (has_output) {
|
||||
close(m_pipeout[1]);
|
||||
@ -234,7 +233,6 @@ int ExecCmd::startExec(const string &cmd, const list<string>& args,
|
||||
NetconCli *oclicon = new NetconCli();
|
||||
oclicon->setconn(m_pipeout[0]);
|
||||
m_fromcmd = NetconP(oclicon);
|
||||
m_pipeout[0] = -1;
|
||||
}
|
||||
|
||||
/* Don't want to undo what we just did ! */
|
||||
@ -361,6 +359,18 @@ int ExecCmd::doexec(const string &cmd, const list<string>& args,
|
||||
}
|
||||
}
|
||||
LOGDEB0(("ExecCmd::doexec: selectloop returned %d\n", ret));
|
||||
|
||||
// The netcons don't take ownership of the fds: we have to close them
|
||||
// (have to do it before wait, this may be the signal the child is
|
||||
// waiting for exiting).
|
||||
if (input) {
|
||||
close(m_pipein[1]);
|
||||
m_pipein[1] = -1;
|
||||
}
|
||||
if (output) {
|
||||
close(m_pipeout[0]);
|
||||
m_pipeout[0] = -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Normal return: deactivate cleaner, wait() will do the cleanup
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user