diff --git a/src/filters/rclzip b/src/filters/rclzip new file mode 100755 index 00000000..5160f4f9 --- /dev/null +++ b/src/filters/rclzip @@ -0,0 +1,133 @@ +#!/usr/bin/env python + +########################################### +## Generic recoll multifilter communication code +import sys +import os + +myname = os.path.basename(sys.argv[0]) + +def ermsg(s, fatal=0, exitvalue=1): + print >> sys.stderr, "RCLMFILT:", myname, ":", s + if fatal: + exit(exitvalue) + +def readparam(): + s = sys.stdin.readline() + if s == '': + ermsg(": EOF on input", 1, 0) + + s = s.rstrip("\n") + + if s == "": + return ("","") + l = s.split() + if len(l) != 2: + ermsg("bad line: [" + s + "]", 1, 1) + + paramname = l[0].lower() + paramsize = int(l[1]) + if paramsize > 0: + paramdata = sys.stdin.read(paramsize) + if len(paramdata) != paramsize: + ermsg("Bad read: wanted %d, got %d" % (paramsize, len(paramdata)), + 1,1) + else: + paramdata = "" + + ermsg("paramname [%s] paramsize %d value [%s]" % + (paramname, paramsize, paramdata)) + return (paramname, paramdata) + +# Send answer: document, ipath, possible eof. +def answer(docdata, ipath, iseof): + + print "Document:", len(docdata) + sys.stdout.write(docdata) + + if len(ipath): + print "Ipath:", len(ipath) + sys.stdout.write(ipath) + + # If we're at the end of the contents, say so + if iseof: + print "Eof: 0" + # End of message + print + sys.stdout.flush() + ermsg("done writing data") + +def mainloop(processor): + while 1: + ermsg("waiting for command") + + params = dict() + + # Read at most 10 parameters (we only actually use one), stop + # at empty line + for i in range(10): + paramname,paramdata = readparam() + if paramname == "": + break + params[paramname] = paramdata + + # Call worker + processor.extractone(params) + +################################################################### +# Code specific to the zip file filter from here + +class ZipExtractor: + def __init__(self): + self.currentindex = 0 + self.contents = [] + self.zipfile = "" + + # Open zipfile and retrieve TOC + def listzip(self, fname): + cmd = "unzip -Z -1 " + fname + f = os.popen(cmd, "r") + self.contents = f.readlines() + f.close() + return self.contents + + # Extract given file + def extractzipentry(self, fname, name): + cmd = " ".join(["unzip -p ", fname, name]) + f = os.popen(cmd, "r") + data = f.read() + # ermsg("data: %s" % data) + f.close + return data + + def extractone(self, params): + # See what's asked of us: open new zip file, next entry or specific one + if not params.has_key("filename:"): + ermsg("no filename ??", 1, 1) + + # If we're given a file name, open it. Else increment position in + # current file (but wait for possible ipath) + if len(params["filename:"]) != 0: + self.zipfile = params["filename:"] + self.currentindex = 0 + ermsg("opening [%s]" % (self.zipfile,)) + self.listzip(self.zipfile) + else: + self.currentindex += 1 + + # If we have an ipath, that's what we look for, else process next entry + if params.has_key("ipath:") and len(params["ipath:"]): + entryname = params["ipath:"] + else: + if self.currentindex >= len(self.contents): + # EOF: "Document: 0\n\n" + answer("", "", 0) + return True + else: + entryname = self.contents[self.currentindex].rstrip("\n") + + docdata = self.extractzipentry(self.zipfile, entryname) + answer(docdata,entryname, (self.currentindex >= len(self.contents) -1)) + return True + +mainloop(ZipExtractor())