diff --git a/src/filters/rclexecm.py b/src/filters/rclexecm.py new file mode 100644 index 00000000..ed6147e9 --- /dev/null +++ b/src/filters/rclexecm.py @@ -0,0 +1,99 @@ +########################################### +## Generic recoll multifilter communication code +import sys +import os + +class RclExecM: + def __init__(self): + self.myname = os.path.basename(sys.argv[0]) + + def rclog(self, s, doexit = 0, exitvalue = 1): + print >> sys.stderr, "RCLMFILT:", self.myname, ":", s + if doexit: + exit(exitvalue) + + def readparam(self): + s = sys.stdin.readline() + if s == '': + self.rclog(": EOF on input", 1, 0) + + s = s.rstrip("\n") + + if s == "": + return ("","") + l = s.split() + if len(l) != 2: + self.rclog("bad line: [" + s + "]", 1, 1) + + paramname = l[0].lower() + paramsize = int(l[1]) + if paramsize > 0: + paramdata = sys.stdin.read(paramsize) + if len(paramdata) != paramsize: + self.rclog("Bad read: wanted %d, got %d" % + (paramsize, len(paramdata)), 1,1) + else: + paramdata = "" + + #self.rclog("paramname [%s] paramsize %d value [%s]" % + # (paramname, paramsize, paramdata)) + return (paramname, paramdata) + + # Send answer: document, ipath, possible eof. + def answer(self, docdata, ipath, iseof): + + print "Document:", len(docdata) + sys.stdout.write(docdata) + + if len(ipath): + print "Ipath:", len(ipath) + sys.stdout.write(ipath) + + # If we're at the end of the contents, say so + if iseof: + print "Eof: 0" + # End of message + print + sys.stdout.flush() + #self.rclog("done writing data") + + def processmessage(self, processor, params): + + # We must have a filename entry (even empty). Else exit + if not params.has_key("filename:"): + self.rclog("no filename ??", 1, 1) + + # If we're given a file name, open it. + if len(params["filename:"]) != 0: + processor.openfile(params) + + # If we have an ipath, that's what we look for, else ask for next entry + ipath = "" + if params.has_key("ipath:") and len(params["ipath:"]): + ok, data, ipath, eof = processor.getipath(params) + else: + ok, data, ipath, eof = processor.getnext(params) + #self.rclog("processmessage: ok %s eof %s ipath %s"%(ok, eof, ipath)) + if ok: + self.answer(data, ipath, eof) + else: + self.answer("", "", eof) + + # Loop on messages from our master + def mainloop(self, processor): + while 1: + #self.rclog("waiting for command") + + params = dict() + + # Read at most 10 parameters (normally 1 or 2), stop at empty line + # End of message is signalled by empty paramname + for i in range(10): + paramname, paramdata = self.readparam() + if paramname == "": + break + params[paramname] = paramdata + + # Got message, act on it + self.processmessage(processor, params) + diff --git a/src/filters/rclzip b/src/filters/rclzip index 249a002b..c7d5cb71 100755 --- a/src/filters/rclzip +++ b/src/filters/rclzip @@ -1,111 +1,16 @@ #!/usr/bin/env python -########################################### -## Generic recoll multifilter communication code -import sys +# Zip file filter for Recoll + import os - -myname = os.path.basename(sys.argv[0]) - -def rclog(s, doexit = 0, exitvalue = 1): - print >> sys.stderr, "RCLMFILT:", myname, ":", s - if doexit: - exit(exitvalue) - -def readparam(): - s = sys.stdin.readline() - if s == '': - rclog(": EOF on input", 1, 0) - - s = s.rstrip("\n") - - if s == "": - return ("","") - l = s.split() - if len(l) != 2: - rclog("bad line: [" + s + "]", 1, 1) - - paramname = l[0].lower() - paramsize = int(l[1]) - if paramsize > 0: - paramdata = sys.stdin.read(paramsize) - if len(paramdata) != paramsize: - rclog("Bad read: wanted %d, got %d" % (paramsize, len(paramdata)), - 1,1) - else: - paramdata = "" - - #rclog("paramname [%s] paramsize %d value [%s]" % - # (paramname, paramsize, paramdata)) - return (paramname, paramdata) - -# Send answer: document, ipath, possible eof. -def answer(docdata, ipath, iseof): - - print "Document:", len(docdata) - sys.stdout.write(docdata) - - if len(ipath): - print "Ipath:", len(ipath) - sys.stdout.write(ipath) - - # If we're at the end of the contents, say so - if iseof: - print "Eof: 0" - # End of message - print - sys.stdout.flush() - #rclog("done writing data") - -def processmessage(processor, params): - - # We must have a filename entry (even empty). Else exit - if not params.has_key("filename:"): - rclog("no filename ??", 1, 1) - - # If we're given a file name, open it. - if len(params["filename:"]) != 0: - processor.openfile(params) - - # If we have an ipath, that's what we look for, else ask for next entry - ipath = "" - if params.has_key("ipath:") and len(params["ipath:"]): - ok, data, ipath, eof = processor.getipath(params) - else: - ok, data, ipath, eof = processor.getnext(params) - #rclog("processmessage: ok %s eof %s ipath %s"%(ok, eof, ipath)) - if ok: - answer(data, ipath, eof) - else: - answer("", "", eof) - -# Loop on messages from our master -def mainloop(processor): - while 1: - #rclog("waiting for command") - - params = dict() - - # Read at most 10 parameters (normally 1 or 2), stop at empty line - # End of message is signalled by empty paramname - for i in range(10): - paramname, paramdata = readparam() - if paramname == "": - break - params[paramname] = paramdata - - # Got message, act on it - processmessage(processor, params) - -################################################################### -# Code specific to the zip file filter from here +import rclexecm class ZipExtractor: - def __init__(self): + def __init__(self, em): self.currentindex = 0 self.contents = [] self.zipfile = "" - + self.em = em # Execute unzip to retrieve TOC list. Store in self.contents def listzip(self): cmd = "unzip -Z -1 '%s'"%(self.zipfile) @@ -121,7 +26,7 @@ class ZipExtractor: cmd = "unzip -p '%s' '%s'"%(self.zipfile, name) f = os.popen(cmd, "r") data = f.read() - # rclog("data: %s" % data) + # em.rclog("data: %s" % data) status = f.close() if status: return (False, "") @@ -132,7 +37,7 @@ class ZipExtractor: eof = (self.currentindex >= len(self.contents) -1) return (ok, docdata, ipath, eof) - ###### File type handler api, used by the protocol handler above: + ###### File type handler api, used by rclexecm ----------> def openfile(self, params): self.zipfile = params["filename:"] self.currentindex = 0 @@ -143,11 +48,12 @@ class ZipExtractor: def getnext(self, params): if self.currentindex >= len(self.contents): - #rclog("getnext: EOF hit") + #em.rclog("getnext: EOF hit") return (False, "", "", 1) else: ret= self.extractone(self.contents[self.currentindex].rstrip("\n")) self.currentindex += 1 return ret - -mainloop(ZipExtractor()) + +e = rclexecm.RclExecM() +e.mainloop(ZipExtractor(e))