From b9e672abda7f680ae66195b64ef589a1136a75fe Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Mon, 11 Jul 2016 18:13:39 +0200 Subject: [PATCH] Allow execm input handlers to set arbitrary data fields --- src/filters/rclexecm.py | 40 +++++++++++++++++++++++++------------ src/filters/rclzip | 11 ++++++++++ src/internfile/mh_execm.cpp | 10 +++++++++- 3 files changed, 47 insertions(+), 14 deletions(-) diff --git a/src/filters/rclexecm.py b/src/filters/rclexecm.py index 26c9764e..4bb86390 100644 --- a/src/filters/rclexecm.py +++ b/src/filters/rclexecm.py @@ -66,6 +66,8 @@ class RclExecM: self.myname = "???" self.mimetype = b"" + self.fields = {} + if os.environ.get("RECOLL_FILTER_MAXMEMBERKB"): self.maxmembersize = \ int(os.environ.get("RECOLL_FILTER_MAXMEMBERKB")) @@ -125,6 +127,9 @@ class RclExecM: def setmimetype(self, mt): self.mimetype = makebytes(mt) + def setfield(self, nm, value): + self.fields[nm] = value + # Read single parameter from process input: line with param name and size # followed by data. The param name is returned as str/unicode, the data # as bytes @@ -160,35 +165,44 @@ class RclExecM: return (paramname, paramdata) if PY3: - def senditem(self, nm, len, data): - sys.stdout.buffer.write(makebytes("%s: %d\n" % (nm, len))) - self.breakwrite(sys.stdout.buffer, makebytes(data)) + def senditem(self, nm, data): + data = makebytes(data) + l = len(data) + sys.stdout.buffer.write(makebytes("%s: %d\n" % (nm, l))) + self.breakwrite(sys.stdout.buffer, data) else: - def senditem(self, nm, len, data): - sys.stdout.write(makebytes("%s: %d\n" % (nm, len))) - self.breakwrite(sys.stdout, makebytes(data)) + def senditem(self, nm, data): + data = makebytes(data) + l = len(data) + sys.stdout.write(makebytes("%s: %d\n" % (nm, l))) + self.breakwrite(sys.stdout, data) # Send answer: document, ipath, possible eof. def answer(self, docdata, ipath, iseof = noteof, iserror = noerror): if iserror != RclExecM.fileerror and iseof != RclExecM.eofnow: - self.senditem("Document", len(docdata), docdata) + self.senditem("Document", docdata) if len(ipath): - self.senditem("Ipath", len(ipath), ipath) + self.senditem("Ipath", ipath) if len(self.mimetype): - self.senditem("Mimetype", len(self.mimetype), self.mimetype) + self.senditem("Mimetype", self.mimetype) + for nm,value in self.fields.iteritems(): + #self.rclog("Senditem: [%s] -> [%s]" % (nm, value)) + self.senditem("%s:"%nm, value) + self.fields = {} + # If we're at the end of the contents, say so if iseof == RclExecM.eofnow: - self.senditem("Eofnow", 0, b'') + self.senditem("Eofnow", b'') elif iseof == RclExecM.eofnext: - self.senditem("Eofnext", 0, b'') + self.senditem("Eofnext", b'') if iserror == RclExecM.subdocerror: - self.senditem("Subdocerror", 0, b'') + self.senditem("Subdocerror", b'') elif iserror == RclExecM.fileerror: - self.senditem("Fileerror", 0, b'') + self.senditem("Fileerror", b'') # End of message print() diff --git a/src/filters/rclzip b/src/filters/rclzip index 82974e54..2131bf1d 100755 --- a/src/filters/rclzip +++ b/src/filters/rclzip @@ -21,6 +21,7 @@ from __future__ import print_function import os +import posixpath import fnmatch import rclexecm from zipfile import ZipFile @@ -89,6 +90,16 @@ class ZipExtractor: #raise BadZipfile() else: docdata = self.zip.read(ipath) + try: + # We are assuming here that the zip uses forward slash + # separators, which is not necessarily the case. At + # worse, we'll get a wrong or no file name, which is + # no big deal (the ipath is the important data + # element). + filename = posixpath.basename(ipath) + self.em.setfield("filename", filename) + except: + pass ok = True except Exception as err: self.em.rclog("extractone: failed: [%s]" % err) diff --git a/src/internfile/mh_execm.cpp b/src/internfile/mh_execm.cpp index 05ecdce0..2e294d7e 100644 --- a/src/internfile/mh_execm.cpp +++ b/src/internfile/mh_execm.cpp @@ -169,6 +169,8 @@ bool MimeHandlerExecMultiple::next_document() return false; } + m_metaData.clear(); + // Send request to child process. This maybe the first/only // request for a given file, or a continuation request. We send an // empty file name in the latter case. @@ -260,8 +262,14 @@ bool MimeHandlerExecMultiple::next_document() } else if (!stringlowercmp("mimetype:", name)) { mtype = data; LOGDEB(("MHExecMultiple: got mimetype [%s]\n", data.c_str())); + } else { + string nm = stringtolower((const string&)name); + trimstring(nm, ":"); + LOGDEB(("MHExecMultiple: got [%s] -> [%s]\n", nm.c_str(), + data.c_str())); + m_metaData[nm] += data; } - if (loop == 10) { + if (loop == 20) { // ?? LOGERR(("MHExecMultiple: filter sent too many parameters\n")); return false;