first pass at converting the filters for python 2/3 compat

2015-11-06 16:49:03 +01:00 · 2015-11-06 16:49:03 +01:00 · f344e8fedd
commit f344e8fedd
parent cc68331f3d
21 changed files with 298 additions and 255 deletions
--- a/src/filters/ppt-dump.py
+++ b/src/filters/ppt-dump.py
@ -52,7 +52,7 @@ class PPTDumper(object):
            try:
                dirstrm = strm.getDirectoryStreamByName(dirname)
-            except Exception, err:
+            except Exception as err:
                error("getDirectoryStreamByName(%s): %s - %s\n" % (dirname,str(err),self.filepath))
                # The previous version was killed by the exception
                # here, so the equivalent is to break, but maybe there
--- a/src/filters/rcl7z
+++ b/src/filters/rcl7z
@ -15,7 +15,7 @@ try:
    import pylzma
    from py7zlib import Archive7z
 except:
-    print "RECFILTERROR HELPERNOTFOUND python:pylzma"
+    print("RECFILTERROR HELPERNOTFOUND python:pylzma")
    sys.exit(1);
 try:
@ -40,19 +40,17 @@ class SevenZipExtractor:
    def extractone(self, ipath):
        #self.em.rclog("extractone: [%s]" % ipath)
-        docdata = ""
+        docdata = b''
        try:
            docdata = self.sevenzip.getmember(ipath).read()
            ok = True
-        except Exception, err:
+        except Exception as err:
            self.em.rclog("extractone: failed: [%s]" % err)
            ok = False
        iseof = rclexecm.RclExecM.noteof
        if self.currentindex >= len(self.sevenzip.getnames()) -1:
            iseof = rclexecm.RclExecM.eofnext
-        if isinstance(ipath, unicode):
+        return (ok, docdata, rclexecm.makebytes(ipath), iseof)
            ipath = ipath.encode("utf-8")
        return (ok, docdata, ipath, iseof)
    ###### File type handler api, used by rclexecm ---------->
    def openfile(self, params):
@ -71,7 +69,7 @@ class SevenZipExtractor:
            fp = open(filename, 'rb')
            self.sevenzip = Archive7z(fp)
            return True
-        except Exception, err:
+        except Exception as err:
            self.em.rclog("openfile: failed: [%s]" % err)
            return False
@ -84,7 +82,7 @@ class SevenZipExtractor:
        try:
            ipath = ipath.decode("utf-8")
            return self.extractone(ipath)
-        except Exception, err:
+        except Exception as err:
            return (ok, data, ipath, eof)
    def getnext(self, params):
--- a/src/filters/rclaudio
+++ b/src/filters/rclaudio
@ -12,7 +12,7 @@ try:
    from mutagen.flac import FLAC
    from mutagen.oggvorbis import OggVorbis
 except:
-    print "RECFILTERROR HELPERNOTFOUND python:mutagen"
+    print("RECFILTERROR HELPERNOTFOUND python:mutagen")
    sys.exit(1);
 # prototype for the html document we're returning
@ -42,23 +42,24 @@ class AudioTagExtractor:
        #self.em.rclog("extractone %s %s" % (params["filename:"], params["mimetype:"]))
        docdata = ""
        ok = False
-        if not params.has_key("mimetype:") or  not params.has_key("filename:"):
+        if not "mimetype:" in params or not "filename:" in params:
            self.em.rclog("extractone: no mime or file name")
            return (ok, docdata, "", rclexecm.RclExecM.eofnow)
        filename = params["filename:"]
        mimetype = params["mimetype:"]
        try:
-            if mimetype == "audio/mpeg":
+            if mimetype == b'audio/mpeg':
                tags = MP3(filename, ID3=EasyID3)
-            elif mimetype == "application/ogg":
+            elif mimetype == b'application/ogg' or \
                     mimetype == b'audio/x-vorbis+ogg':
                tags = OggVorbis(filename)
-            elif mimetype == "application/x-flac" or \
+            elif mimetype == b'application/x-flac' or \
-                     mimetype == "audio/x-flac" or \
+                     mimetype == 'audio/x-flac' or \
-                     mimetype == "audio/flac":
+                     mimetype == b'audio/flac':
                tags = FLAC(filename)
            else:
-                raise Exception, "Bad mime type %s" % mimetype
+                raise Exception("Bad mime type %s" % mimetype)
-        except Exception, err:
+        except Exception as err:
            self.em.rclog("extractone: extract failed: [%s]" % err)
            return (ok, docdata, "", rclexecm.RclExecM.eofnow)
@ -66,21 +67,22 @@ class AudioTagExtractor:
        artist = ""
        title = ""
        try:
-            album = self.em.htmlescape(tags["album"][0].encode("utf-8"))
+            album = self.em.htmlescape(tags["album"][0])
        except:
            pass
        try:
-            artist = self.em.htmlescape(tags["artist"][0].encode("utf-8"))
+            artist = self.em.htmlescape(tags["artist"][0])
        except:
            pass
        try:
-            title = self.em.htmlescape(tags["title"][0].encode("utf-8"))
+            title = self.em.htmlescape(tags["title"][0])
        except:
            pass
        self.em.setmimetype("text/html")
-        alldata = self.em.htmlescape(tags.pprint().encode("utf-8"))
+        alldata = self.em.htmlescape(tags.pprint())
        alldata = alldata.replace("\n", "<br>")
-        docdata = htmltemplate % (album, artist, title, alldata)
+        docdata = (htmltemplate % (album, artist, title, alldata))\
                  .encode('UTF-8')
        ok = True
        return (ok, docdata, "", rclexecm.RclExecM.eofnext)
--- a/src/filters/rclchm
+++ b/src/filters/rclchm
@ -2,6 +2,11 @@
 """Extract Html files from a Microsoft Compiled Html Help file (.chm)
 Needs at least python 2.2 for HTMLParser (chmlib needs 2.2 too)"""
 from __future__ import print_function
 # Note: this is not converted to python3, libchm does not have a
 # python3 wrapper at this point (2015-11)
 # Do we return individual chapters as html pages or concatenate everything?
 rclchm_catenate = 0
 # Use special html type to allow for mimeconf/mimeview Open magic,
@ -23,13 +28,13 @@ import rclexecm
 try:
    from chm import chm,chmlib
 except:
-    print "RECFILTERROR HELPERNOTFOUND python:chm"
+    print("RECFILTERROR HELPERNOTFOUND python:chm")
    sys.exit(1);
 try:
    from HTMLParser import HTMLParser
 except:
-    print "RECFILTERROR HELPERNOTFOUND python:HTMLParser"
+    print("RECFILTERROR HELPERNOTFOUND python:HTMLParser")
    sys.exit(1);
 # Small helper routines
@ -37,11 +42,11 @@ def getfile(chmfile, path):
    """Extract internal file text from chm object, given path"""
    res, ui = chmfile.ResolveObject(path)
    if res != chmlib.CHM_RESOLVE_SUCCESS:
-        #print "ResolveObject failed", path
+        #print("ResolveObject failed: %s" % path, file=sys.stderr)
        return ""
    res, doc = chmfile.RetrieveObject(ui)
    if not res:
-        print "RetrieveObject failed", path
+        print("RetrieveObject failed: %s" % path, file=sys.stderr)
        return ""
    return doc
--- a/src/filters/rcldia
+++ b/src/filters/rcldia
@ -1,5 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 from __future__ import print_function
 # dia (http://live.gnome.org/Dia) file filter for recoll
 # stefan.friedel@iwr.uni-heidelberg.de 2012
 #
@ -66,7 +68,7 @@ class DiaExtractor:
        try:
            docdata = self.ExtractDiaText()
            ok = True
-        except Exception, err:
+        except Exception as err:
            ok = False
        iseof = rclexecm.RclExecM.eofnext
        self.em.setmimetype("text/plain")
@ -76,7 +78,7 @@ class DiaExtractor:
    def openfile(self, params):
        try:
            self.dia = GzipFile(params["filename:"], 'r')
-            # Dial files are sometimes not compressed. Quite weirdly,
+            # Dia files are sometimes not compressed. Quite weirdly,
            # GzipFile does not complain until we try to read. Have to do it
            # here to be able to retry an uncompressed open.
            data = self.dia.readline()
--- a/src/filters/rcldoc.py
+++ b/src/filters/rcldoc.py
@ -1,4 +1,5 @@
 #!/usr/bin/env python
 from __future__ import print_function
 import rclexecm
 import rclexec1
@ -11,32 +12,32 @@ import os
 class WordProcessData:
    def __init__(self, em):
        self.em = em
-        self.out = ""
+        self.out = b''
-        self.cont = ""
+        self.cont = b''
        self.gotdata = False
        # Line with continued word (ending in -)
        # we strip the - which is not nice for actually hyphenated word.
        # What to do ?
-        self.patcont = re.compile('''[\w][-]$''')
+        self.patcont = re.compile(b'''[\w][-]$''')
        # Pattern for breaking continuation at last word start
-        self.patws = re.compile('''([\s])([\w]+)(-)$''')
+        self.patws = re.compile(b'''([\s])([\w]+)(-)$''')
    def takeLine(self, line):
        if not self.gotdata:
-            if line == "":
+            if line == b'':
                return
-            self.out = '<html><head><title></title>' + \
+            self.out = b'<html><head><title></title>' + \
-                       '<meta http-equiv="Content-Type"' + \
+                       b'<meta http-equiv="Content-Type"' + \
-                       'content="text/html;charset=UTF-8">' + \
+                       b'content="text/html;charset=UTF-8">' + \
-                       '</head><body><p>'
+                       b'</head><body><p>'
            self.gotdata = True
        if self.cont:
            line = self.cont + line
            self.cont = ""
-        if line == "\f":
+        if line == b'\f':
-            self.out += "</p><hr><p>"
+            self.out += '</p><hr><p>'
            return
        if self.patcont.search(line):
@ -47,16 +48,16 @@ class WordProcessData:
                line = line[0:match.start(1)]
            else:
                self.cont = line
-                line = ""
+                line = b''
        if line:
-            self.out += self.em.htmlescape(line) + "<br>"
+            self.out += self.em.htmlescape(line) + b'<br>'
        else:
-            self.out += "<br>"
+            self.out += b'<br>'
    def wrapData(self):
        if self.gotdata:
-            self.out += "</p></body></html>"
+            self.out += b'</p></body></html>'
        self.em.setmimetype("text/html")
        return self.out
@ -65,7 +66,7 @@ class WordProcessData:
 # output HTML
 class WordPassData:
    def __init__(self, em):
-        self.out = ""
+        self.out = b''
        self.em = em
    def takeLine(self, line):
@ -96,8 +97,8 @@ class WordFilter:
        return False
    def mimetype(self, fn):
-        rtfprolog ="{\\rtf1"
+        rtfprolog = b'{\\rtf1'
-        docprolog = b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1"
+        docprolog = b'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1'
        try:
            f = open(fn, "rb")
        except:
@ -132,7 +133,7 @@ class WordFilter:
            mt = self.mimetype(fn)
            self.em.rclog("rcldoc.py: actual MIME type %s" % mt)
            if mt == "text/plain":
-                return ([python, os.path.join(self.execdir, "rcltext.py")],
+                return (["python", os.path.join(self.execdir, "rcltext.py")],
                       WordPassData(self.em))
            elif mt == "text/rtf":
                cmd = ["python", os.path.join(self.execdir, "rclrtf.py"),
--- a/src/filters/rclepub
+++ b/src/filters/rclepub
@ -1,5 +1,6 @@
 #!/usr/bin/env python
 """Extract Html content from an EPUB file (.chm)"""
 from __future__ import print_function
 rclepub_html_mtype = "text/html"
@ -12,7 +13,7 @@ import rclexecm
 try:
    import epub
 except:
-    print "RECFILTERROR HELPERNOTFOUND python:epub"
+    print("RECFILTERROR HELPERNOTFOUND python:epub")
    sys.exit(1);
 class rclEPUB:
@ -63,11 +64,11 @@ class rclEPUB:
            if item is None:
                raise Exception("Item not found for id %s" % (id,))
            doc = self.book.read_item(item)
-            doc = re.sub('''</[hH][eE][aA][dD]>''',
+            doc = re.sub(b'''</[hH][eE][aA][dD]>''',
-                         '''<meta name="rclaptg" content="epub"></head>''', doc)
+                        b'''<meta name="rclaptg" content="epub"></head>''', doc)
            self.em.setmimetype(rclepub_html_mtype)
            return (True, doc, id, iseof)
-        except Exception, err:
+        except Exception as err:
            self.em.rclog("extractone: failed: [%s]" % err)
            return (False, "", id, iseof)
@ -76,11 +77,11 @@ class rclEPUB:
        self.currentindex = -1
        self.contents = []
        try:
-            self.book = epub.open(params["filename:"])
+            self.book = epub.open_epub(params["filename:"].decode('UTF-8'))
-        except Exception, err:
+        except Exception as err:
            self.em.rclog("openfile: epub.open failed: [%s]" % err)
            return False
-        for id, item in self.book.opf.manifest.iteritems():
+        for id, item in self.book.opf.manifest.items():
            if item.media_type == 'application/xhtml+xml':
                self.contents.append(id)
        return True
--- a/src/filters/rclexec1.py
+++ b/src/filters/rclexec1.py
@ -26,6 +26,8 @@
 # this would be to slow. So this helps implementing a permanent script
 # to repeatedly execute single commands.
 from __future__ import print_function
 import subprocess
 import rclexecm
@ -74,8 +76,8 @@ class Executor:
        # params["mimetype:"]))
        self.flt.reset()
        ok = False
-        if not params.has_key("filename:"):
+        if not "filename:" in params:
-            self.em.rclog("extractone: no mime or file name")
+            self.em.rclog("extractone: no file name")
            return (ok, "", "", rclexecm.RclExecM.eofnow)
        fn = params["filename:"]
--- a/src/filters/rclexecm.py
+++ b/src/filters/rclexecm.py
@ -16,6 +16,9 @@
 #   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 ########################################################
 ## Recoll multifilter communication module and utilities
 #
 # All data is binary. This is important for Python3
 # All parameter names are converted to and processed as str/unicode
 from __future__ import print_function
@ -26,6 +29,21 @@ import shutil
 import getopt
 import rclconfig
 PY3 = sys.version > '3'
 if PY3:
    def makebytes(data):
        if isinstance(data, bytes):
            return data
        else:
            return data.encode("UTF-8")
 else:
    def makebytes(data):
        if isinstance(data, unicode):
            return data.encode("UTF-8")
        else:
            return data
 my_config = rclconfig.RclConfig()
 ############################################
@ -33,7 +51,7 @@ my_config = rclconfig.RclConfig()
 # communication protocol with the recollindex process. It calls the
 # object specific of the document type to actually get the data.
 class RclExecM:
-    noteof  = 0
+    noteof = 0
    eofnext = 1
    eofnow = 2
@ -46,7 +64,7 @@ class RclExecM:
            self.myname = os.path.basename(sys.argv[0])
        except:
            self.myname = "???"
-        self.mimetype = ""
+        self.mimetype = b""
        if os.environ.get("RECOLL_FILTER_MAXMEMBERKB"):
            self.maxmembersize = \
@ -60,7 +78,7 @@ class RclExecM:
            msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
        self.debugfile = None
        if self.debugfile:
-            self.errfout = open(self.debugfile, "ab")
+            self.errfout = open(self.debugfile, "a")
        else:
            self.errfout = sys.stderr
@ -93,77 +111,84 @@ class RclExecM:
    # Note: tried replacing this with a multiple replacer according to
    #  http://stackoverflow.com/a/15221068, which was **10 times** slower
    def htmlescape(self, txt):
-        # This must stay first (it somehow had managed to skip after
+        # &amp must stay first (it somehow had managed to skip
-        # the next line, with rather interesting results)
+        # after the next replace, with rather interesting results)
-        txt = txt.replace("&", "&amp;")
+        try:
-
+            txt = txt.replace(b'&', b'&amp;').replace(b'<', b'&lt;').\
-        txt = txt.replace("<", "&lt;")
+                  replace(b'>', b'&gt;').replace(b'"', b'&quot;')
-        txt = txt.replace(">", "&gt;")
+        except:
-        txt = txt.replace('"', "&quot;")
+            txt = txt.replace("&", "&amp;").replace("<", "&lt;").\
                  replace(">", "&gt;").replace("\"", "&quot;")
        return txt
    # Our worker sometimes knows the mime types of the data it sends
    def setmimetype(self, mt):
-        self.mimetype = mt
+        self.mimetype = makebytes(mt)
    # Read single parameter from process input: line with param name and size
-    # followed by data.
+    # followed by data. The param name is returned as str/unicode, the data
    # as bytes
    def readparam(self):
-        s = sys.stdin.readline()
+        if PY3:
-        if s == '':
+            inf = sys.stdin.buffer
        else:
            inf = sys.stdin
        s = inf.readline()
        if s == b'':
            sys.exit(0)
 #           self.rclog(": EOF on input", 1, 0)
-        s = s.rstrip("\n")
+        s = s.rstrip(b'\n')
-        if s == "":
+        if s == b'':
-            return ("","")
+            return ('', b'')
        l = s.split()
        if len(l) != 2:
-            self.rclog("bad line: [" + s + "]", 1, 1)
+            self.rclog(b'bad line: [' + s + b']', 1, 1)
-        paramname = l[0].lower()
+        paramname = l[0].decode('ASCII').lower()
        paramsize = int(l[1])
        if paramsize > 0:
-            paramdata = sys.stdin.read(paramsize)
+            paramdata = inf.read(paramsize)
            if len(paramdata) != paramsize:
                self.rclog("Bad read: wanted %d, got %d" %
-                      (paramsize, len(paramdata)), 1,1)
+                      (paramsize, len(paramdata)), 1, 1)
        else:
-            paramdata = ""
+            paramdata = b''
        #self.rclog("paramname [%s] paramsize %d value [%s]" %
        #          (paramname, paramsize, paramdata))
        return (paramname, paramdata)
    if PY3:
        def senditem(self, nm, len, data):
            sys.stdout.buffer.write(makebytes("%s: %d\n" % (nm, len)))
            self.breakwrite(sys.stdout.buffer, makebytes(data))
    else:
        def senditem(self, nm, len, data):
            sys.stdout.write(makebytes("%s: %d\n" % (nm, len)))
            self.breakwrite(sys.stdout, makebytes(data))
    # Send answer: document, ipath, possible eof.
    def answer(self, docdata, ipath, iseof = noteof, iserror = noerror):
        if iserror != RclExecM.fileerror and iseof != RclExecM.eofnow:
-            if isinstance(docdata, unicode):
+            self.senditem("Document", len(docdata), docdata)
                self.rclog("GOT UNICODE for ipath [%s]" % (ipath,))
                docdata = docdata.encode("UTF-8")
            print("Document: %d" % len(docdata))
            self.breakwrite(sys.stdout, docdata)
            if len(ipath):
-                print("Ipath: %d" % len(ipath))
+                self.senditem("Ipath", len(ipath), ipath)
                sys.stdout.write(ipath)
            if len(self.mimetype):
-                print("Mimetype: %d" % len(self.mimetype))
+                self.senditem("Mimetype", len(self.mimetype), self.mimetype)
                sys.stdout.write(self.mimetype)
        # If we're at the end of the contents, say so
        if iseof == RclExecM.eofnow:
-            print("Eofnow: 0")
+            self.senditem("Eofnow", 0, b'')
        elif iseof == RclExecM.eofnext:
-            print("Eofnext: 0")
+            self.senditem("Eofnext", 0, b'')
        if iserror == RclExecM.subdocerror:
-            print("Subdocerror: 0")
+            self.senditem("Subdocerror", 0, b'')
        elif iserror == RclExecM.fileerror:
-            print("Fileerror: 0")
+            self.senditem("Fileerror", 0, b'')
        # End of message
        print()
@ -173,7 +198,8 @@ class RclExecM:
    def processmessage(self, processor, params):
        # We must have a filename entry (even empty). Else exit
-        if not params.has_key("filename:"):
+        if "filename:" not in params:
            print("%s" % params, file=sys.stderr)
            self.rclog("no filename ??", 1, 1)
        # If we're given a file name, open it. 
@ -182,7 +208,7 @@ class RclExecM:
                if not processor.openfile(params):
                    self.answer("", "", iserror = RclExecM.fileerror)
                    return
-            except Exception, err:
+            except Exception as err:
                self.rclog("processmessage: openfile raised: [%s]" % err)
                self.answer("", "", iserror = RclExecM.fileerror)
                return
@ -192,11 +218,11 @@ class RclExecM:
        eof = True
        self.mimetype = ""
        try:
-            if params.has_key("ipath:") and len(params["ipath:"]):
+            if "ipath:" in params and len(params["ipath:"]):
                ok, data, ipath, eof = processor.getipath(params)
            else:
                ok, data, ipath, eof = processor.getnext(params)
-        except Exception, err:
+        except Exception as err:
            self.answer("", "", eof, RclExecM.fileerror)
            return
@ -311,7 +337,7 @@ def main(proto, extract):
    actAsSingle = False
    debugDumpData = False
-    ipath = ""
+    ipath = b""
    args = sys.argv[1:]
    opts, args = getopt.getopt(args, "hdsi:w:")
@ -321,7 +347,7 @@ def main(proto, extract):
        elif opt in ['-s']:
            actAsSingle = True
        elif opt in ['-i']:
-            ipath = arg
+            ipath = makebytes(arg)
        elif opt in ['-w']:
            ret = which(arg)
            if ret:
@ -344,17 +370,17 @@ def main(proto, extract):
        lst = fileout.split(':')
        mimetype = lst[len(lst)-1].strip()
        lst = mimetype.split(';')
-        return lst[0].strip()
+        return makebytes(lst[0].strip())
    def mimetype_with_xdg(f):
        cmd = 'xdg-mime query filetype "' + f + '"'
-        return os.popen(cmd).read().strip()
+        return makebytes(os.popen(cmd).read().strip())
-    def debprint(s):
+    def debprint(out, s):
        if not actAsSingle:
-            print(s)
+            proto.breakwrite(out, makebytes(s+'\n'))
-    params = {'filename:': args[0]}
+    params = {'filename:': makebytes(args[0])}
    # Some filters (e.g. rclaudio) need/get a MIME type from the indexer
    mimetype = mimetype_with_xdg(args[0])
    params['mimetype:'] = mimetype
@ -363,19 +389,20 @@ def main(proto, extract):
        print("Open error", file=sys.stderr)
        sys.exit(1)
-    if ipath != "" or actAsSingle:
+    if PY3:
        ioout = sys.stdout.buffer
    else:
        ioout = sys.stdout
    if ipath != b"" or actAsSingle:
        params['ipath:'] = ipath
        ok, data, ipath, eof = extract.getipath(params)
        if ok:
-            debprint("== Found entry for ipath %s (mimetype [%s]):" % \
+            debprint(ioout, "== Found entry for ipath %s (mimetype [%s]):" % \
                  (ipath, proto.mimetype))
-            if isinstance(data, unicode):
+            bdata = makebytes(data)
                bdata = data.encode("UTF-8")
            else:
                bdata = data
            if debugDumpData or actAsSingle:
-                proto.breakwrite(sys.stdout, bdata)
+                proto.breakwrite(ioout, bdata)
-                print()
+                ioout.write(b'\n')
            sys.exit(0)
        else:
            print("Got error, eof %d"%eof, file=sys.stderr)
@ -386,15 +413,12 @@ def main(proto, extract):
        ok, data, ipath, eof = extract.getnext(params)
        if ok:
            ecnt = ecnt + 1
-            debprint("== Entry %d ipath %s (mimetype [%s]):" % \
+            bdata = makebytes(data)
-                  (ecnt, ipath, proto.mimetype))
+            debprint(ioout, "== Entry %d dlen %d ipath %s (mimetype [%s]):" % \
-            if isinstance(data, unicode):
+                  (ecnt, len(data), ipath, proto.mimetype))
                bdata = data.encode("UTF-8")
            else:
                bdata = data
            if debugDumpData:
-                proto.breakwrite(sys.stdout, bdata)
+                proto.breakwrite(ioout, bdata)
-                print()
+                ioout.write(b'\n')
            if eof != RclExecM.noteof:
                sys.exit(0)
        else:
--- a/src/filters/rclics
+++ b/src/filters/rclics
@ -1,4 +1,5 @@
 #!/usr/bin/env python
 from __future__ import print_function
 # Read an ICS file, break it into "documents" which are events, todos,
 # or journal entries, and interface with recoll execm
@ -13,36 +14,36 @@ import rclexecm
 import sys
 # Decide how we'll process the file.
-modules = ('internal', 'icalendar', 'vobject')
+modules = ("internal", "icalendar", "vobject")
-usemodule = 'internal'
+usemodule = "internal"
 forcevobject = 0
-if usemodule != 'internal':
+if usemodule != "internal":
    try:
        if forcevobject:
            raise Exception
        from icalendar import Calendar, Event
-        usemodule = 'icalendar'
+        usemodule = "icalendar"
    except:
        try:
            import vobject
-            usemodule = 'vobject'
+            usemodule = "vobject"
        except:
-            print "RECFILTERROR HELPERNOTFOUND python:icalendar"
+            print("RECFILTERROR HELPERNOTFOUND python:icalendar")
-            print "RECFILTERROR HELPERNOTFOUND python:vobject"
+            print("RECFILTERROR HELPERNOTFOUND python:vobject")
            sys.exit(1);
 class IcalExtractor:
    def __init__(self, em):
        self.file = ""
-	self.contents = []
+        self.contents = []
        self.em = em
    def extractone(self, index):
        if index >= len(self.contents):
            return(False, "", "", True)
        docdata = self.contents[index]
-	#self.em.rclog(docdata)
+        #self.em.rclog(docdata)
        iseof = rclexecm.RclExecM.noteof
        if self.currentindex >= len(self.contents) -1:
@ -55,32 +56,32 @@ class IcalExtractor:
        self.file = params["filename:"]
        try:
-            calstr = open(self.file, 'rb')
+            calstr = open(self.file, "rb")
-        except Exception, e:
+        except Exception as e:
            self.em.rclog("Openfile: open: %s" % str(e))
            return False
        self.currentindex = -1
-        if usemodule == 'internal':
+        if usemodule == "internal":
            self.contents = ICalSimpleSplitter().splitcalendar(calstr)
-        elif usemodule == 'icalendar':
+        elif usemodule == "icalendar":
            try:
                cal = Calendar.from_string(calstr.read())
-            except Exception, e:
+            except Exception as e:
                self.em.rclog("Openfile: read or parse error: %s" % str(e))
                return False
            self.contents = cal.walk()
            self.contents = [item.as_string() for item in self.contents
-                             if (item.name == 'VEVENT' or item.name == 'VTODO'
+                             if (item.name == "VEVENT" or item.name == "VTODO"
-                                 or item.name == 'VJOURNAL')]
+                                 or item.name == "VJOURNAL")]
        else:
            try:
                cal = vobject.readOne(calstr)
-            except Exception, e:
+            except Exception as e:
                self.em.rclog("Openfile: cant parse object: %s" % str(e))
                return False
-            for lstnm in ('vevent_list', 'vtodo_list', 'vjournal_list'):
+            for lstnm in ("vevent_list", "vtodo_list", "vjournal_list"):
                lst = getattr(cal, lstnm, [])
                for ev in lst:
                    self.contents.append(ev.serialize())
@ -90,7 +91,10 @@ class IcalExtractor:
    def getipath(self, params):
        try:
-            index = int(params["ipath:"])
+            if params["ipath:"] == b'':
                index = 0
            else:
                index = int(params["ipath:"])
        except:
            return (False, "", "", True)
        return self.extractone(index)
@ -100,7 +104,7 @@ class IcalExtractor:
        if self.currentindex == -1:
            # Return "self" doc
            self.currentindex = 0
-            self.em.setmimetype('text/plain')
+            self.em.setmimetype(b'text/plain')
            if len(self.contents) == 0:
                eof = rclexecm.RclExecM.eofnext
            else:
@ -121,44 +125,44 @@ class ICalSimpleSplitter:
    # Note that if an 'interesting' element is nested inside another one,
    # it will not be extracted (stay as text in external event). This is
    # not an issue and I don't think it can happen with the current list
-    interesting = ('VTODO', 'VEVENT', 'VJOURNAL')
+    interesting = (b'VTODO', b'VEVENT', b'VJOURNAL')
    def splitcalendar(self, fin):
-        curblkname = ''
+        curblkname = b''
-        curblk = ''
+        curblk = b''
        lo = []
        for line in fin:
            line = line.rstrip()
-            if line == '':
+            if line == b'':
                continue
            if curblkname:
-                curblk = curblk + line + "\n"
+                curblk = curblk + line + b'\n'
-            l = line.split(":")
+            l = line.split(b':')
            if len(l) < 2:
                continue
            # If not currently inside a block and we see an
            # 'interesting' BEGIN, start block
-            if curblkname == '' and l[0].upper() == "BEGIN" :
+            if curblkname == b'' and l[0].upper() == b'BEGIN':
                name = l[1].upper()
                if name in ICalSimpleSplitter.interesting:
                    curblkname = name
-                    curblk = curblk + line + "\n"
+                    curblk = curblk + line + b'\n'
            # If currently accumulating block lines, check for end
-            if curblkname and l[0].upper() == "END" and \
+            if curblkname and l[0].upper() == b'END' and \
                   l[1].upper() == curblkname:
                lo.append(curblk)
-                curblkname = ''
+                curblkname = b''
-                curblk = ''
+                curblk = b''
        if curblk:
            lo.append(curblk)
-            curblkname = ''
+            curblkname = b''
-            curblk = ''
+            curblk = b''
        return lo
--- a/src/filters/rclimg.py
+++ b/src/filters/rclimg.py
@ -1,11 +1,12 @@
 #!/usr/bin/env python
-# Python-based Image Tag extractor for Recoll. This is less thorough than the 
+# Python-based Image Tag extractor for Recoll. This is less thorough
-# Perl-based rclimg script, but useful if you don't want to have to install Perl
+# than the Perl-based rclimg script, but useful if you don't want to
-# (e.g. on Windows).
+# have to install Perl (e.g. on Windows).
 #
 # Uses pyexiv2. Also tried Pillow, found it useless for tags.
 #
 from __future__ import print_function
 import sys
 import os
@ -15,7 +16,7 @@ import re
 try:
    import pyexiv2
 except:
-    print "RECFILTERROR HELPERNOTFOUND python:pyexiv2"
+    print("RECFILTERROR HELPERNOTFOUND python:pyexiv2")
    sys.exit(1);
 khexre = re.compile('.*\.0[xX][0-9a-fA-F]+$')
@ -48,7 +49,7 @@ class ImgTagExtractor:
    def extractone(self, params):
        #self.em.rclog("extractone %s" % params["filename:"])
        ok = False
-        if not params.has_key("filename:"):
+        if "filename:" not in params:
            self.em.rclog("extractone: no file name")
            return (ok, docdata, "", rclexecm.RclExecM.eofnow)
        filename = params["filename:"]
@ -62,11 +63,11 @@ class ImgTagExtractor:
                # we skip numeric keys and undecoded makernote data
                if k != 'Exif.Photo.MakerNote' and not khexre.match(k):
                    mdic[k] = str(metadata[k].raw_value)
-        except Exception, err:
+        except Exception as err:
            self.em.rclog("extractone: extract failed: [%s]" % err)
            return (ok, "", "", rclexecm.RclExecM.eofnow)
-        docdata = "<html><head>\n"
+        docdata = b'<html><head>\n'
        ttdata = set()
        for k in pyexiv2_titles:
@ -77,25 +78,28 @@ class ImgTagExtractor:
            for v in ttdata:
                v = v.replace('[', '').replace(']', '').replace("'", "")
                title += v + " "
-            docdata += '<title>' + title + '</title>\n'
+            docdata += rclexecm.makebytes("<title>" + title + "</title>\n")
        for k in exiv2_dates:
            if k in mdic:
                # Recoll wants: %Y-%m-%d %H:%M:%S.
                # We get 2014:06:27 14:58:47
-                dt = mdic[k].replace(':', '-', 2)
+                dt = mdic[k].replace(":", "-", 2)
-                docdata += '<meta name="date" content="' + dt + '">\n'
+                docdata += b'<meta name="date" content="' + \
                           rclexecm.makebytes(dt) + b'">\n'
                break
-        for k,v in mdic.iteritems():
+        for k,v in mdic.items():
            if k ==  'Xmp.digiKam.TagsList':
-                docdata += '<meta name="keywords" content="' + \
+                docdata += b'<meta name="keywords" content="' + \
-                           self.em.htmlescape(mdic[k]) + '">\n'
+                           rclexecm.makebytes(self.em.htmlescape(mdic[k])) + \
                           b'">\n'
-        docdata += "</head><body>\n"
+        docdata += b'</head><body>\n'
-        for k,v in mdic.iteritems():
+        for k,v in mdic.items():
-            docdata += k + " : " + self.em.htmlescape(mdic[k]) + "<br />\n"
+            docdata += rclexecm.makebytes(k + " : " + \
-        docdata += "</body></html>"
+                                     self.em.htmlescape(mdic[k]) + "<br />\n")
        docdata += b'</body></html>'
        self.em.setmimetype("text/html")
--- a/src/filters/rclinfo
+++ b/src/filters/rclinfo
@ -3,6 +3,7 @@
 # Read a file in GNU info format and output its nodes as subdocs,
 # interfacing with recoll execm
 from __future__ import print_function
 import rclexecm
 import sys
@ -16,24 +17,12 @@ import subprocess
 # Some info source docs contain charset info like:
 # @documentencoding ISO-2022-JP
 # But this seems to be absent from outputs.
 htmltemplate = '''
 <html>
  <head>
      <title>%s</title>
      <meta name="rclaptg" content="gnuinfo">
   </head>
   <body>
   <pre style="white-space: pre-wrap">
   %s
   </pre></body>
 </html>
 '''
 # RclExecm interface
 class InfoExtractor:
    def __init__(self, em):
        self.file = ""
-	self.contents = []
+        self.contents = []
        self.em = em
    def extractone(self, index):
@ -43,8 +32,13 @@ class InfoExtractor:
        nodename, docdata = self.contents[index]
        nodename = self.em.htmlescape(nodename)
        docdata = self.em.htmlescape(docdata)
-
+        # strange whitespace to avoid changing the module tests (same as old)
-        docdata = htmltemplate % (nodename, docdata)
+        docdata = b'\n<html>\n  <head>\n      <title>' + nodename + \
                  b'</title>\n' + \
                  '      <meta name="rclaptg" content="gnuinfo">\n' + \
                  b'   </head>\n   <body>\n' + \
                  b'   <pre style="white-space: pre-wrap">\n   ' + \
                  docdata + b'\n   </pre></body>\n</html>\n'
        iseof = rclexecm.RclExecM.noteof
        if self.currentindex >= len(self.contents) -1:
@ -60,19 +54,18 @@ class InfoExtractor:
            self.em.rclog("Openfile: %s is not a file" % self.file)
            return False
-        cmd = "info --subnodes -o - -f " + self.file
+        cmd = b'info --subnodes -o - -f ' + self.file
        nullstream = open("/dev/null", 'w')
        try:
            infostream = subprocess.Popen(cmd, shell=True, bufsize=1,
                                          stderr=nullstream,
                                          stdout=subprocess.PIPE).stdout
-        except Exception, e:
+        except Exception as e:
            # Consider this as permanently fatal. 
            self.em.rclog("Openfile: exec info: %s" % str(e))
-            print "RECFILTERROR HELPERNOTFOUND info"
+            print("RECFILTERROR HELPERNOTFOUND info")
            sys.exit(1);
        self.currentindex = -1
        self.contents = InfoSimpleSplitter().splitinfo(self.file, infostream)
@ -117,9 +110,9 @@ class InfoSimpleSplitter:
        index = 0
        listout = []
        node_dict = {}
-        node = ""
+        node = b''
        infofile = os.path.basename(filename)
-        nodename = "Unknown"
+        nodename = b'Unknown'
        for line in fin:
@ -128,41 +121,41 @@ class InfoSimpleSplitter:
            # beginning with spaces (it's a bug probably, only seen it once)
            # Maybe we'd actually be better off directly interpreting the
            # info files
-            if gotblankline and line.lstrip(" ").startswith("File: "):
+            if gotblankline and line.lstrip(b' ').startswith(b'File: '):
                prevnodename = nodename
-                line = line.rstrip("\n\r")
+                line = line.rstrip(b'\n\r')
-                pairs = line.split(",")
+                pairs = line.split(b',')
-                up = "Top"
+                up = b'Top'
                nodename = str(index)
                try:
                    for pair in pairs:
-                        name, value = pair.split(':')
+                        name, value = pair.split(b':')
-                        name = name.strip(" ")
+                        name = name.strip(b' ')
-                        value = value.strip(" ")
+                        value = value.strip(b' ')
-                        if name == "Node":
+                        if name == b'Node':
                            nodename = value
-                        if name == "Up":
+                        if name == b'Up':
                            up = value
-                        if name == "File":
+                        if name == b'File':
                            infofile = value
-                except:
+                except Exception as err:
-                    print >> sys.stderr, "rclinfo: bad line in %s: [%s]\n" % \
+                    print("rclinfo: bad line in %s: [%s] %s\n" % \
-                          (infofile, line)
+                          (infofile, line, err), file = sys.stderr)
                    nodename = prevnodename
                    node += line
                    continue
-                if node_dict.has_key(nodename):
+                if nodename in node_dict:
-                    print >> sys.stderr, "Info file", filename, \
+                    print("Info file %s Dup node: %s" % (filename, nodename), \
-                          "Dup node: ", nodename
+                          file=sys.stderr)
                node_dict[nodename] = up
                if index != 0:
                    listout.append((prevnodename, node))
-                node = ""
+                node = b''
                index += 1
-            if line.rstrip("\n\r") == '':
+            if line.rstrip(b'\n\r') == b'':
                gotblankline = 1
            else:
                gotblankline = 0
@ -170,7 +163,7 @@ class InfoSimpleSplitter:
            node += line
        # File done, add last dangling node
-        if node != "":
+        if node != b'':
            listout.append((nodename, node))
        # Compute node paths (concatenate "Up" values), to be used
@ -178,34 +171,34 @@ class InfoSimpleSplitter:
        # the info file tree is bad
        listout1 = []
        for nodename, node in listout:
-            title = ""
+            title = b''
            loop = 0
            error = 0
-            while nodename != "Top":
+            while nodename != b'Top':
-                title = nodename + " / " + title
+                title = nodename + b' / ' + title
-                if node_dict.has_key(nodename):
+                if nodename in node_dict:
                    nodename = node_dict[nodename]
                else:
-                    print >> sys.stderr, \
+                    print(
           "Infofile: node's Up does not exist: file %s, path %s, up [%s]" % \
-                    (infofile, title, nodename)
+                    (infofile, title, nodename), sys.stderr)
                    error = 1
                    break
                loop += 1
                if loop > 50:
-                    print >> sys.stderr, "Infofile: bad tree (looping)", \
+                    print("Infofile: bad tree (looping) %s" % infofile, \
-                          infofile
+                          file = sys.stderr)
                    error = 1
                    break
            if error:
                continue
-            if title == "":
+            if title == b'':
                title = infofile
            else:
-                title = infofile + " / " + title
+                title = infofile + b' / ' + title
-            title = title.rstrip(" / ")
+            title = title.rstrip(b' / ')
            listout1.append((title, node))
        return listout1
--- a/src/filters/rclkar
+++ b/src/filters/rclkar
@ -1,6 +1,8 @@
 #!/usr/bin/env python
 # Read a .kar midi karaoke file and translate to recoll indexable format
 # This does not work with Python3 yet because python:midi doesn't 
 from __future__ import print_function
 import rclexecm
 import sys
@ -15,9 +17,9 @@ except:
    pass
 try:
-    import midi
+    from midi import midi
 except:
-    print "RECFILTERROR HELPERNOTFOUND python:midi"
+    print("RECFILTERROR HELPERNOTFOUND python:midi")
    sys.exit(1);
 try:
@ -106,12 +108,12 @@ class KarTextExtractor:
        if data:
            try:
                data = data.decode(self.encoding, 'ignore')
-            except Exception, err:
+            except Exception as err:
                self.em.rclog("Decode failed: " + str(err))
                return ""
            try:
                data = data.encode('utf-8')
-            except Exception, err:
+            except Exception as err:
                self.em.rclog("Encode failed: " + str(err))
                return ""
@ -127,7 +129,7 @@ class KarTextExtractor:
        just one our users could use if there is trouble with guessing
        encodings'''
-        rexp = r'\(([^\)]+)\)\.[a-zA-Z]+$'
+        rexp = b'''\(([^\)]+)\)\.[a-zA-Z]+$'''
        m = re.search(rexp, fn)
        if m:
            return m.group(1)
@ -165,7 +167,7 @@ class KarTextExtractor:
                if count > 0:
                    confidence = 1.0
                    encoding = code
-            except Exception, err:
+            except Exception as err:
                self.em.rclog("stopwords-based classifier failed: %s" % err)
                return (encoding, confidence)
@ -177,7 +179,7 @@ class KarTextExtractor:
        docdata = ""
        ok = False
-        if not params.has_key("filename:"):
+        if "filename:" not in params:
            self.em.rclog("extractone: no mime or file name")
            return (ok, docdata, "", rclexecm.RclExecM.eofnow)
        filename = params["filename:"]
@ -191,7 +193,7 @@ class KarTextExtractor:
                self.encoding = ""
        # Mimetype not used for now
-        if not params.has_key("mimetype:"):
+        if "mimetype:" not in params:
            mimetype = 'audio/x-midi'
        else:
            mimetype = params["mimetype:"]
@ -199,8 +201,8 @@ class KarTextExtractor:
        # Read in and midi-decode the file
        try:
            stream = midi.read_midifile(filename)
-        except Exception, err:
+        except Exception as err:
-            self.em.rclog("extractone: midi extract failed: [%s]" % err)
+            self.em.rclog("extractone: read_midifile failed: [%s]" % err)
            return (ok, docdata, "", rclexecm.RclExecM.eofnow)
        title = None
--- a/src/filters/rcllatinclass.py
+++ b/src/filters/rcllatinclass.py
@ -13,6 +13,8 @@ epsilon with dasia (in unicode but not iso). Can this be replaced by either epsi
 with acute accent ?
 """
 from __future__ import print_function
 import sys
 import string
 import glob
@ -117,7 +119,7 @@ if __name__ == "__main__":
    lang,code,count = classifier.classify(rawtext)
    if count > 0:
-        print "%s %s %d" % (code, lang, count)
+        print("%s %s %d" % (code, lang, count))
    else:
-        print "UNKNOWN UNKNOWN 0"
+        print("UNKNOWN UNKNOWN 0")
--- a/src/filters/rclrar
+++ b/src/filters/rclrar
@ -43,7 +43,7 @@ class RarExtractor:
        try:
            rarinfo = self.rar.getinfo(ipath)
            isdir = rarinfo.isdir()
-        except Exception, err:
+        except Exception as err:
            self.em.rclog("extractone: getinfo failed: [%s]" % err)
            return (True, docdata, ipath, false)
@ -56,7 +56,7 @@ class RarExtractor:
                else:
                    docdata = self.rar.read(ipath)
                ok = True
-            except Exception, err:
+            except Exception as err:
                self.em.rclog("extractone: failed: [%s]" % err)
                ok = False
        else:
@ -89,7 +89,7 @@ class RarExtractor:
        try:
            ipath = ipath.decode("utf-8")
            return self.extractone(ipath)
-        except Exception, err:
+        except Exception as err:
            return (ok, data, ipath, eof)
    def getnext(self, params):
--- a/src/filters/rclrtf.py
+++ b/src/filters/rclrtf.py
@ -1,4 +1,5 @@
 #!/usr/bin/env python
 from __future__ import print_function
 import rclexecm
 import rclexec1
@ -10,24 +11,24 @@ import os
 class RTFProcessData:
    def __init__(self, em):
        self.em = em
-        self.out = ""
+        self.out = b''
        self.gothead = 0
-        self.patendhead = re.compile('''</head>''')
+        self.patendhead = re.compile(b'''</head>''')
-        self.patcharset = re.compile('''^<meta http-equiv=''')
+        self.patcharset = re.compile(b'''^<meta http-equiv=''')
    # Some versions of unrtf put out a garbled charset line.
    # Apart from this, we pass the data untouched.
    def takeLine(self, line):
        if not self.gothead:
            if self.patendhead.search(line):
-                self.out +=  '<meta http-equiv="Content-Type" ' + \
+                self.out +=  b'<meta http-equiv="Content-Type" ' + \
-                             'content="text/html;charset=UTF-8">' + "\n"
+                             b'content="text/html;charset=UTF-8">' + b'\n'
-                self.out += line + "\n"
+                self.out += line + b'\n'
                self.gothead = 1
            elif not self.patcharset.search(line):
-                self.out += line + "\n"
+                self.out += line + b'\n'
        else:
-            self.out += line + "\n"
+            self.out += line + b'\n'
    def wrapData(self):
        return self.out
@ -52,7 +53,7 @@ class RTFFilter:
 if __name__ == '__main__':
    if not rclexecm.which("unrtf"):
-        print("RECFILTERROR HELPERNOTFOUND antiword")
+        print("RECFILTERROR HELPERNOTFOUND unrtf")
        sys.exit(1)
    proto = rclexecm.RclExecM()
    filter = RTFFilter(proto)
--- a/src/filters/rcltar
+++ b/src/filters/rcltar
@ -33,7 +33,7 @@ class TarExtractor:
            else:
                docdata = self.tar.extractfile(ipath).read()
            ok = True
-        except Exception, err:
+        except Exception as err:
            ok = False
        iseof = rclexecm.RclExecM.noteof
        if self.currentindex >= len(self.namen) -1:
@ -59,7 +59,7 @@ class TarExtractor:
        try:
            ipath = ipath.decode("utf-8")
            return self.extractone(ipath)
-        except Exception, err:
+        except Exception as err:
            return (ok, data, ipath, eof)
    def getnext(self, params):
--- a/src/filters/rclwar
+++ b/src/filters/rclwar
@ -15,7 +15,7 @@ class WarExtractor:
            member = self.tar.extractfile(tarinfo)
            docdata = member.read()
            ok = True
-        except Exception, err:
+        except Exception as err:
            self.em.rclog("extractone: failed: [%s]" % err)
            ok = False
        return (ok, docdata, tarinfo.name, rclexecm.RclExecM.noteof)
@ -26,7 +26,7 @@ class WarExtractor:
        try:
            self.tar = tarfile.open(params["filename:"])
            return True
-        except Exception, err:
+        except Exception as err:
            self.em.rclog(str(err))
            return False
@ -34,7 +34,7 @@ class WarExtractor:
        ipath = params["ipath:"]
        try:
            tarinfo = self.tar.getmember(ipath)
-        except Exception, err:
+        except Exception as err:
            self.em.rclog(str(err))
            return (False, "", ipath, rclexecm.RclExecM.noteof)
        return self.extractone(tarinfo)
--- a/src/filters/rclzip
+++ b/src/filters/rclzip
@ -72,7 +72,7 @@ class ZipExtractor:
            else:
                docdata = self.zip.read(ipath)
            ok = True
-        except Exception, err:
+        except Exception as err:
            self.em.rclog("extractone: failed: [%s]" % err)
            ok = False
        iseof = rclexecm.RclExecM.noteof
@ -98,7 +98,7 @@ class ZipExtractor:
        try:
            self.zip = ZipFile(filename)
            return True
-        except Exception, err:
+        except Exception as err:
            self.em.rclog("openfile: failed: [%s]" % err)
            return False
@ -111,7 +111,7 @@ class ZipExtractor:
        try:
            ipath = ipath.decode("utf-8")
            return self.extractone(ipath)
-        except Exception, err:
+        except Exception as err:
            return (ok, data, ipath, eof)
    def getnext(self, params):
--- a/src/python/recoll/recoll/rclconfig.py
+++ b/src/python/recoll/recoll/rclconfig.py
@ -75,7 +75,7 @@ class ConfSimple:
    def getNames(self, sk = ''):
        if not sk in self.submaps:
            return None
-        return self.submaps[sk].keys()
+        return list(self.submaps[sk].keys())
 class ConfTree(ConfSimple):
    """A ConfTree adds path-hierarchical interpretation of the section keys,
--- a/tests/config/recoll.conf
+++ b/tests/config/recoll.conf
@ -4,6 +4,8 @@ logfilename = /tmp/logrcltst
 daemloglevel = 6
 daemlogfilename = /tmp/rclmontrace
 systemfilecommand = xdg-mime query filetype
 indexStripChars = 1
 detectxattronly = 1