#!/usr/bin/env python ########################################### ## Generic recoll multifilter communication code import sys import os class RclExecM: noteof = 0 eofnext = 1 eofnow = 2 noerror = 0 subdocerror = 1 fileerror = 2 def __init__(self): self.myname = os.path.basename(sys.argv[0]) self.mimetype = "" def rclog(self, s, doexit = 0, exitvalue = 1): print >> sys.stderr, "RCLMFILT:", self.myname, ":", s if doexit: sys.exit(exitvalue) def htmlescape(self, txt): # This must stay first (it somehow had managed to skip after # the next line, with rather interesting results) txt = txt.replace("&", "&") txt = txt.replace("<", "<") txt = txt.replace('"', "&dquot;") return txt # Our worker sometimes knows the mime types of the data it sends def setmimetype(self, mt): self.mimetype = mt # Read single parameter from process input: line with param name and size # followed by data. def readparam(self): s = sys.stdin.readline() if s == '': sys.exit(0) # self.rclog(": EOF on input", 1, 0) s = s.rstrip("\n") if s == "": return ("","") l = s.split() if len(l) != 2: self.rclog("bad line: [" + s + "]", 1, 1) paramname = l[0].lower() paramsize = int(l[1]) if paramsize > 0: paramdata = sys.stdin.read(paramsize) if len(paramdata) != paramsize: self.rclog("Bad read: wanted %d, got %d" % (paramsize, len(paramdata)), 1,1) else: paramdata = "" #self.rclog("paramname [%s] paramsize %d value [%s]" % # (paramname, paramsize, paramdata)) return (paramname, paramdata) # Send answer: document, ipath, possible eof. def answer(self, docdata, ipath, iseof = noteof, iserror = noerror): if iserror != RclExecM.fileerror and iseof != RclExecM.eofnow: print "Document:", len(docdata) sys.stdout.write(docdata) if len(ipath): print "Ipath:", len(ipath) sys.stdout.write(ipath) if len(self.mimetype): print "Mimetype:", len(self.mimetype) sys.stdout.write(self.mimetype) # If we're at the end of the contents, say so if iseof == RclExecM.eofnow: print "Eofnow: 0" elif iseof == RclExecM.eofnext: print "Eofnext: 0" if iserror == RclExecM.subdocerror: print "Subdocerror: 0" elif iserror == RclExecM.fileerror: print "Fileerror: 0" # End of message print sys.stdout.flush() #self.rclog("done writing data") def processmessage(self, processor, params): # We must have a filename entry (even empty). Else exit if not params.has_key("filename:"): self.rclog("no filename ??", 1, 1) # If we're given a file name, open it. if len(params["filename:"]) != 0: try: if not processor.openfile(params): self.answer("", "", iserror = RclExecM.fileerror) return except Exception, err: self.rclog("processmessage: openfile raised: [%s]" % err) self.answer("", "", iserror = RclExecM.fileerror) return # If we have an ipath, that's what we look for, else ask for next entry ipath = "" eof = True self.mimetype = "" try: if params.has_key("ipath:") and len(params["ipath:"]): ok, data, ipath, eof = processor.getipath(params) else: ok, data, ipath, eof = processor.getnext(params) except Exception, err: self.answer("", "", eof, RclExecM.fileerror) return #self.rclog("processmessage: ok %s eof %s ipath %s"%(ok, eof, ipath)) if ok: self.answer(data, ipath, eof) else: self.answer("", "", eof, RclExecM.subdocerror) # Loop on messages from our master def mainloop(self, processor): while 1: #self.rclog("waiting for command") params = dict() # Read at most 10 parameters (normally 1 or 2), stop at empty line # End of message is signalled by empty paramname for i in range(10): paramname, paramdata = self.readparam() if paramname == "": break params[paramname] = paramdata # Got message, act on it self.processmessage(processor, params) # Common main routine for all python execm filters: either run the # normal protocol engine or a local loop to test without recollindex def main(proto, extract): if len(sys.argv) == 1: proto.mainloop(extract) else: # Got a file name parameter: TESTING without an execm parent # Loop on all entries or get specific ipath params = {'filename:':sys.argv[1]} if not extract.openfile(params): print "Open error" sys.exit(1) ipath = "" if len(sys.argv) == 3: ipath = sys.argv[2] if ipath != "": params['ipath:'] = ipath ok, data, ipath, eof = extract.getipath(params) if ok: print "== Found entry for ipath %s (mimetype [%s]):" % \ (ipath, proto.mimetype) print data print else: print "Got error, eof %d"%eof sys.exit(0) ecnt = 0 while 1: ok, data, ipath, eof = extract.getnext(params) if ok: ecnt = ecnt + 1 print "== Entry %d ipath %s (mimetype [%s]):" % \ (ecnt, ipath, proto.mimetype) print data print if eof != RclExecM.noteof: break else: print "Not ok, eof %d" % eof break