This commit is contained in:
dockes 2009-10-21 21:00:06 +00:00
parent 665de364af
commit d930b35f9f

133
src/filters/rclzip Executable file
View File

@ -0,0 +1,133 @@
#!/usr/bin/env python
###########################################
## Generic recoll multifilter communication code
import sys
import os
myname = os.path.basename(sys.argv[0])
def ermsg(s, fatal=0, exitvalue=1):
print >> sys.stderr, "RCLMFILT:", myname, ":", s
if fatal:
exit(exitvalue)
def readparam():
s = sys.stdin.readline()
if s == '':
ermsg(": EOF on input", 1, 0)
s = s.rstrip("\n")
if s == "":
return ("","")
l = s.split()
if len(l) != 2:
ermsg("bad line: [" + s + "]", 1, 1)
paramname = l[0].lower()
paramsize = int(l[1])
if paramsize > 0:
paramdata = sys.stdin.read(paramsize)
if len(paramdata) != paramsize:
ermsg("Bad read: wanted %d, got %d" % (paramsize, len(paramdata)),
1,1)
else:
paramdata = ""
ermsg("paramname [%s] paramsize %d value [%s]" %
(paramname, paramsize, paramdata))
return (paramname, paramdata)
# Send answer: document, ipath, possible eof.
def answer(docdata, ipath, iseof):
print "Document:", len(docdata)
sys.stdout.write(docdata)
if len(ipath):
print "Ipath:", len(ipath)
sys.stdout.write(ipath)
# If we're at the end of the contents, say so
if iseof:
print "Eof: 0"
# End of message
print
sys.stdout.flush()
ermsg("done writing data")
def mainloop(processor):
while 1:
ermsg("waiting for command")
params = dict()
# Read at most 10 parameters (we only actually use one), stop
# at empty line
for i in range(10):
paramname,paramdata = readparam()
if paramname == "":
break
params[paramname] = paramdata
# Call worker
processor.extractone(params)
###################################################################
# Code specific to the zip file filter from here
class ZipExtractor:
def __init__(self):
self.currentindex = 0
self.contents = []
self.zipfile = ""
# Open zipfile and retrieve TOC
def listzip(self, fname):
cmd = "unzip -Z -1 " + fname
f = os.popen(cmd, "r")
self.contents = f.readlines()
f.close()
return self.contents
# Extract given file
def extractzipentry(self, fname, name):
cmd = " ".join(["unzip -p ", fname, name])
f = os.popen(cmd, "r")
data = f.read()
# ermsg("data: %s" % data)
f.close
return data
def extractone(self, params):
# See what's asked of us: open new zip file, next entry or specific one
if not params.has_key("filename:"):
ermsg("no filename ??", 1, 1)
# If we're given a file name, open it. Else increment position in
# current file (but wait for possible ipath)
if len(params["filename:"]) != 0:
self.zipfile = params["filename:"]
self.currentindex = 0
ermsg("opening [%s]" % (self.zipfile,))
self.listzip(self.zipfile)
else:
self.currentindex += 1
# If we have an ipath, that's what we look for, else process next entry
if params.has_key("ipath:") and len(params["ipath:"]):
entryname = params["ipath:"]
else:
if self.currentindex >= len(self.contents):
# EOF: "Document: 0\n\n"
answer("", "", 0)
return True
else:
entryname = self.contents[self.currentindex].rstrip("\n")
docdata = self.extractzipentry(self.zipfile, entryname)
answer(docdata,entryname, (self.currentindex >= len(self.contents) -1))
return True
mainloop(ZipExtractor())