From f344e8feddcbc5d31a4d6130f62d91fd8a14ff95 Mon Sep 17 00:00:00 2001
From: Jean-Francois Dockes <jfd@recoll.org>
Date: Fri, 6 Nov 2015 16:49:03 +0100
Subject: [PATCH] first pass at converting the filters for python 2/3 compat

---
 src/filters/ppt-dump.py               |   2 +-
 src/filters/rcl7z                     |  14 +--
 src/filters/rclaudio                  |  30 ++---
 src/filters/rclchm                    |  13 ++-
 src/filters/rcldia                    |   6 +-
 src/filters/rcldoc.py                 |  39 +++----
 src/filters/rclepub                   |  15 +--
 src/filters/rclexec1.py               |   6 +-
 src/filters/rclexecm.py               | 152 +++++++++++++++-----------
 src/filters/rclics                    |  70 ++++++------
 src/filters/rclimg.py                 |  38 ++++---
 src/filters/rclinfo                   |  95 ++++++++--------
 src/filters/rclkar                    |  22 ++--
 src/filters/rcllatinclass.py          |   6 +-
 src/filters/rclrar                    |   6 +-
 src/filters/rclrtf.py                 |  19 ++--
 src/filters/rcltar                    |   4 +-
 src/filters/rclwar                    |   6 +-
 src/filters/rclzip                    |   6 +-
 src/python/recoll/recoll/rclconfig.py |   2 +-
 tests/config/recoll.conf              |   2 +
 21 files changed, 298 insertions(+), 255 deletions(-)

diff --git a/src/filters/ppt-dump.py b/src/filters/ppt-dump.py
index f05a5789..0a05559f 100755
--- a/src/filters/ppt-dump.py
+++ b/src/filters/ppt-dump.py
@@ -52,7 +52,7 @@ class PPTDumper(object):
 
             try:
                 dirstrm = strm.getDirectoryStreamByName(dirname)
-            except Exception, err:
+            except Exception as err:
                 error("getDirectoryStreamByName(%s): %s - %s\n" % (dirname,str(err),self.filepath))
                 # The previous version was killed by the exception
                 # here, so the equivalent is to break, but maybe there
diff --git a/src/filters/rcl7z b/src/filters/rcl7z
index c7ea935d..2af73ae6 100755
--- a/src/filters/rcl7z
+++ b/src/filters/rcl7z
@@ -15,7 +15,7 @@ try:
     import pylzma
     from py7zlib import Archive7z
 except:
-    print "RECFILTERROR HELPERNOTFOUND python:pylzma"
+    print("RECFILTERROR HELPERNOTFOUND python:pylzma")
     sys.exit(1);
 
 try:
@@ -40,19 +40,17 @@ class SevenZipExtractor:
             
     def extractone(self, ipath):
         #self.em.rclog("extractone: [%s]" % ipath)
-        docdata = ""
+        docdata = b''
         try:
             docdata = self.sevenzip.getmember(ipath).read()
             ok = True
-        except Exception, err:
+        except Exception as err:
             self.em.rclog("extractone: failed: [%s]" % err)
             ok = False
         iseof = rclexecm.RclExecM.noteof
         if self.currentindex >= len(self.sevenzip.getnames()) -1:
             iseof = rclexecm.RclExecM.eofnext
-        if isinstance(ipath, unicode):
-            ipath = ipath.encode("utf-8")
-        return (ok, docdata, ipath, iseof)
+        return (ok, docdata, rclexecm.makebytes(ipath), iseof)
 
     ###### File type handler api, used by rclexecm ---------->
     def openfile(self, params):
@@ -71,7 +69,7 @@ class SevenZipExtractor:
             fp = open(filename, 'rb')
             self.sevenzip = Archive7z(fp)
             return True
-        except Exception, err:
+        except Exception as err:
             self.em.rclog("openfile: failed: [%s]" % err)
             return False
 
@@ -84,7 +82,7 @@ class SevenZipExtractor:
         try:
             ipath = ipath.decode("utf-8")
             return self.extractone(ipath)
-        except Exception, err:
+        except Exception as err:
             return (ok, data, ipath, eof)
         
     def getnext(self, params):
diff --git a/src/filters/rclaudio b/src/filters/rclaudio
index d717adc1..03f95ad9 100755
--- a/src/filters/rclaudio
+++ b/src/filters/rclaudio
@@ -12,7 +12,7 @@ try:
     from mutagen.flac import FLAC
     from mutagen.oggvorbis import OggVorbis
 except:
-    print "RECFILTERROR HELPERNOTFOUND python:mutagen"
+    print("RECFILTERROR HELPERNOTFOUND python:mutagen")
     sys.exit(1);
 
 # prototype for the html document we're returning
@@ -42,23 +42,24 @@ class AudioTagExtractor:
         #self.em.rclog("extractone %s %s" % (params["filename:"], params["mimetype:"]))
         docdata = ""
         ok = False
-        if not params.has_key("mimetype:") or  not params.has_key("filename:"):
+        if not "mimetype:" in params or not "filename:" in params:
             self.em.rclog("extractone: no mime or file name")
             return (ok, docdata, "", rclexecm.RclExecM.eofnow)
         filename = params["filename:"]
         mimetype = params["mimetype:"]
         try:
-            if mimetype == "audio/mpeg":
+            if mimetype == b'audio/mpeg':
                 tags = MP3(filename, ID3=EasyID3)
-            elif mimetype == "application/ogg":
+            elif mimetype == b'application/ogg' or \
+                     mimetype == b'audio/x-vorbis+ogg':
                 tags = OggVorbis(filename)
-            elif mimetype == "application/x-flac" or \
-                     mimetype == "audio/x-flac" or \
-                     mimetype == "audio/flac":
+            elif mimetype == b'application/x-flac' or \
+                     mimetype == 'audio/x-flac' or \
+                     mimetype == b'audio/flac':
                 tags = FLAC(filename)
             else:
-                raise Exception, "Bad mime type %s" % mimetype
-        except Exception, err:
+                raise Exception("Bad mime type %s" % mimetype)
+        except Exception as err:
             self.em.rclog("extractone: extract failed: [%s]" % err)
             return (ok, docdata, "", rclexecm.RclExecM.eofnow)
 
@@ -66,21 +67,22 @@ class AudioTagExtractor:
         artist = ""
         title = ""
         try:
-            album = self.em.htmlescape(tags["album"][0].encode("utf-8"))
+            album = self.em.htmlescape(tags["album"][0])
         except:
             pass
         try:
-            artist = self.em.htmlescape(tags["artist"][0].encode("utf-8"))
+            artist = self.em.htmlescape(tags["artist"][0])
         except:
             pass
         try:
-            title = self.em.htmlescape(tags["title"][0].encode("utf-8"))
+            title = self.em.htmlescape(tags["title"][0])
         except:
             pass
         self.em.setmimetype("text/html")
-        alldata = self.em.htmlescape(tags.pprint().encode("utf-8"))
+        alldata = self.em.htmlescape(tags.pprint())
         alldata = alldata.replace("\n", "<br>")
-        docdata = htmltemplate % (album, artist, title, alldata)
+        docdata = (htmltemplate % (album, artist, title, alldata))\
+                  .encode('UTF-8')
         ok = True
         return (ok, docdata, "", rclexecm.RclExecM.eofnext)
 
diff --git a/src/filters/rclchm b/src/filters/rclchm
index a9c2bbc7..e9cf0291 100755
--- a/src/filters/rclchm
+++ b/src/filters/rclchm
@@ -2,6 +2,11 @@
 """Extract Html files from a Microsoft Compiled Html Help file (.chm)
 Needs at least python 2.2 for HTMLParser (chmlib needs 2.2 too)"""
 
+from __future__ import print_function
+
+# Note: this is not converted to python3, libchm does not have a
+# python3 wrapper at this point (2015-11)
+
 # Do we return individual chapters as html pages or concatenate everything?
 rclchm_catenate = 0
 # Use special html type to allow for mimeconf/mimeview Open magic,
@@ -23,13 +28,13 @@ import rclexecm
 try:
     from chm import chm,chmlib
 except:
-    print "RECFILTERROR HELPERNOTFOUND python:chm"
+    print("RECFILTERROR HELPERNOTFOUND python:chm")
     sys.exit(1);
 
 try:
     from HTMLParser import HTMLParser
 except:
-    print "RECFILTERROR HELPERNOTFOUND python:HTMLParser"
+    print("RECFILTERROR HELPERNOTFOUND python:HTMLParser")
     sys.exit(1);
 
 # Small helper routines
@@ -37,11 +42,11 @@ def getfile(chmfile, path):
     """Extract internal file text from chm object, given path"""
     res, ui = chmfile.ResolveObject(path)
     if res != chmlib.CHM_RESOLVE_SUCCESS:
-        #print "ResolveObject failed", path
+        #print("ResolveObject failed: %s" % path, file=sys.stderr)
         return ""
     res, doc = chmfile.RetrieveObject(ui)
     if not res:
-        print "RetrieveObject failed", path
+        print("RetrieveObject failed: %s" % path, file=sys.stderr)
         return ""
     return doc
 
diff --git a/src/filters/rcldia b/src/filters/rcldia
index 937204f5..1d00ea76 100755
--- a/src/filters/rcldia
+++ b/src/filters/rcldia
@@ -1,5 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+from __future__ import print_function
+
 # dia (http://live.gnome.org/Dia) file filter for recoll
 # stefan.friedel@iwr.uni-heidelberg.de 2012
 #
@@ -66,7 +68,7 @@ class DiaExtractor:
         try:
             docdata = self.ExtractDiaText()
             ok = True
-        except Exception, err:
+        except Exception as err:
             ok = False
         iseof = rclexecm.RclExecM.eofnext
         self.em.setmimetype("text/plain")
@@ -76,7 +78,7 @@ class DiaExtractor:
     def openfile(self, params):
         try:
             self.dia = GzipFile(params["filename:"], 'r')
-            # Dial files are sometimes not compressed. Quite weirdly,
+            # Dia files are sometimes not compressed. Quite weirdly,
             # GzipFile does not complain until we try to read. Have to do it
             # here to be able to retry an uncompressed open.
             data = self.dia.readline()
diff --git a/src/filters/rcldoc.py b/src/filters/rcldoc.py
index 75078f16..262226cb 100755
--- a/src/filters/rcldoc.py
+++ b/src/filters/rcldoc.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+from __future__ import print_function
 
 import rclexecm
 import rclexec1
@@ -11,32 +12,32 @@ import os
 class WordProcessData:
     def __init__(self, em):
         self.em = em
-        self.out = ""
-        self.cont = ""
+        self.out = b''
+        self.cont = b''
         self.gotdata = False
         # Line with continued word (ending in -)
         # we strip the - which is not nice for actually hyphenated word.
         # What to do ?
-        self.patcont = re.compile('''[\w][-]$''')
+        self.patcont = re.compile(b'''[\w][-]$''')
         # Pattern for breaking continuation at last word start
-        self.patws = re.compile('''([\s])([\w]+)(-)$''')
+        self.patws = re.compile(b'''([\s])([\w]+)(-)$''')
 
     def takeLine(self, line):
         if not self.gotdata:
-            if line == "":
+            if line == b'':
                 return
-            self.out = '<html><head><title></title>' + \
-                       '<meta http-equiv="Content-Type"' + \
-                       'content="text/html;charset=UTF-8">' + \
-                       '</head><body><p>'
+            self.out = b'<html><head><title></title>' + \
+                       b'<meta http-equiv="Content-Type"' + \
+                       b'content="text/html;charset=UTF-8">' + \
+                       b'</head><body><p>'
             self.gotdata = True
 
         if self.cont:
             line = self.cont + line
             self.cont = ""
 
-        if line == "\f":
-            self.out += "</p><hr><p>"
+        if line == b'\f':
+            self.out += '</p><hr><p>'
             return
 
         if self.patcont.search(line):
@@ -47,16 +48,16 @@ class WordProcessData:
                 line = line[0:match.start(1)]
             else:
                 self.cont = line
-                line = ""
+                line = b''
 
         if line:
-            self.out += self.em.htmlescape(line) + "<br>"
+            self.out += self.em.htmlescape(line) + b'<br>'
         else:
-            self.out += "<br>"
+            self.out += b'<br>'
 
     def wrapData(self):
         if self.gotdata:
-            self.out += "</p></body></html>"
+            self.out += b'</p></body></html>'
         self.em.setmimetype("text/html")
         return self.out
 
@@ -65,7 +66,7 @@ class WordProcessData:
 # output HTML
 class WordPassData:
     def __init__(self, em):
-        self.out = ""
+        self.out = b''
         self.em = em
 
     def takeLine(self, line):
@@ -96,8 +97,8 @@ class WordFilter:
         return False
 
     def mimetype(self, fn):
-        rtfprolog ="{\\rtf1"
-        docprolog = b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1"
+        rtfprolog = b'{\\rtf1'
+        docprolog = b'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1'
         try:
             f = open(fn, "rb")
         except:
@@ -132,7 +133,7 @@ class WordFilter:
             mt = self.mimetype(fn)
             self.em.rclog("rcldoc.py: actual MIME type %s" % mt)
             if mt == "text/plain":
-                return ([python, os.path.join(self.execdir, "rcltext.py")],
+                return (["python", os.path.join(self.execdir, "rcltext.py")],
                        WordPassData(self.em))
             elif mt == "text/rtf":
                 cmd = ["python", os.path.join(self.execdir, "rclrtf.py"),
diff --git a/src/filters/rclepub b/src/filters/rclepub
index 1c50592f..c4868d26 100755
--- a/src/filters/rclepub
+++ b/src/filters/rclepub
@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 """Extract Html content from an EPUB file (.chm)"""
+from __future__ import print_function
 
 rclepub_html_mtype = "text/html"
 
@@ -12,7 +13,7 @@ import rclexecm
 try:
     import epub
 except:
-    print "RECFILTERROR HELPERNOTFOUND python:epub"
+    print("RECFILTERROR HELPERNOTFOUND python:epub")
     sys.exit(1);
 
 class rclEPUB:
@@ -63,11 +64,11 @@ class rclEPUB:
             if item is None:
                 raise Exception("Item not found for id %s" % (id,))
             doc = self.book.read_item(item)
-            doc = re.sub('''</[hH][eE][aA][dD]>''',
-                         '''<meta name="rclaptg" content="epub"></head>''', doc)
+            doc = re.sub(b'''</[hH][eE][aA][dD]>''',
+                        b'''<meta name="rclaptg" content="epub"></head>''', doc)
             self.em.setmimetype(rclepub_html_mtype)
             return (True, doc, id, iseof)
-        except Exception, err:
+        except Exception as err:
             self.em.rclog("extractone: failed: [%s]" % err)
             return (False, "", id, iseof)
 
@@ -76,11 +77,11 @@ class rclEPUB:
         self.currentindex = -1
         self.contents = []
         try:
-            self.book = epub.open(params["filename:"])
-        except Exception, err:
+            self.book = epub.open_epub(params["filename:"].decode('UTF-8'))
+        except Exception as err:
             self.em.rclog("openfile: epub.open failed: [%s]" % err)
             return False
-        for id, item in self.book.opf.manifest.iteritems():
+        for id, item in self.book.opf.manifest.items():
             if item.media_type == 'application/xhtml+xml':
                 self.contents.append(id)
         return True
diff --git a/src/filters/rclexec1.py b/src/filters/rclexec1.py
index ffa68c53..d26d9b60 100644
--- a/src/filters/rclexec1.py
+++ b/src/filters/rclexec1.py
@@ -26,6 +26,8 @@
 # this would be to slow. So this helps implementing a permanent script
 # to repeatedly execute single commands.
 
+from __future__ import print_function
+
 import subprocess
 import rclexecm
 
@@ -74,8 +76,8 @@ class Executor:
         # params["mimetype:"]))
         self.flt.reset()
         ok = False
-        if not params.has_key("filename:"):
-            self.em.rclog("extractone: no mime or file name")
+        if not "filename:" in params:
+            self.em.rclog("extractone: no file name")
             return (ok, "", "", rclexecm.RclExecM.eofnow)
 
         fn = params["filename:"]
diff --git a/src/filters/rclexecm.py b/src/filters/rclexecm.py
index adcb54e5..26c9764e 100644
--- a/src/filters/rclexecm.py
+++ b/src/filters/rclexecm.py
@@ -16,6 +16,9 @@
 #   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 ########################################################
 ## Recoll multifilter communication module and utilities
+#
+# All data is binary. This is important for Python3
+# All parameter names are converted to and processed as str/unicode
 
 from __future__ import print_function
 
@@ -26,6 +29,21 @@ import shutil
 import getopt
 import rclconfig
 
+PY3 = sys.version > '3'
+
+if PY3:
+    def makebytes(data):
+        if isinstance(data, bytes):
+            return data
+        else:
+            return data.encode("UTF-8")
+else:
+    def makebytes(data):
+        if isinstance(data, unicode):
+            return data.encode("UTF-8")
+        else:
+            return data
+
 my_config = rclconfig.RclConfig()
 
 ############################################
@@ -33,7 +51,7 @@ my_config = rclconfig.RclConfig()
 # communication protocol with the recollindex process. It calls the
 # object specific of the document type to actually get the data.
 class RclExecM:
-    noteof  = 0
+    noteof = 0
     eofnext = 1
     eofnow = 2
 
@@ -46,7 +64,7 @@ class RclExecM:
             self.myname = os.path.basename(sys.argv[0])
         except:
             self.myname = "???"
-        self.mimetype = ""
+        self.mimetype = b""
 
         if os.environ.get("RECOLL_FILTER_MAXMEMBERKB"):
             self.maxmembersize = \
@@ -60,7 +78,7 @@ class RclExecM:
             msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
         self.debugfile = None
         if self.debugfile:
-            self.errfout = open(self.debugfile, "ab")
+            self.errfout = open(self.debugfile, "a")
         else:
             self.errfout = sys.stderr
         
@@ -93,77 +111,84 @@ class RclExecM:
     # Note: tried replacing this with a multiple replacer according to
     #  http://stackoverflow.com/a/15221068, which was **10 times** slower
     def htmlescape(self, txt):
-        # This must stay first (it somehow had managed to skip after
-        # the next line, with rather interesting results)
-        txt = txt.replace("&", "&amp;")
-
-        txt = txt.replace("<", "&lt;")
-        txt = txt.replace(">", "&gt;")
-        txt = txt.replace('"', "&quot;")
+        # &amp must stay first (it somehow had managed to skip
+        # after the next replace, with rather interesting results)
+        try:
+            txt = txt.replace(b'&', b'&amp;').replace(b'<', b'&lt;').\
+                  replace(b'>', b'&gt;').replace(b'"', b'&quot;')
+        except:
+            txt = txt.replace("&", "&amp;").replace("<", "&lt;").\
+                  replace(">", "&gt;").replace("\"", "&quot;")
         return txt
 
     # Our worker sometimes knows the mime types of the data it sends
     def setmimetype(self, mt):
-        self.mimetype = mt
+        self.mimetype = makebytes(mt)
 
     # Read single parameter from process input: line with param name and size
-    # followed by data.
+    # followed by data. The param name is returned as str/unicode, the data
+    # as bytes
     def readparam(self):
-        s = sys.stdin.readline()
-        if s == '':
+        if PY3:
+            inf = sys.stdin.buffer
+        else:
+            inf = sys.stdin
+        s = inf.readline()
+        if s == b'':
             sys.exit(0)
-#           self.rclog(": EOF on input", 1, 0)
 
-        s = s.rstrip("\n")
+        s = s.rstrip(b'\n')
 
-        if s == "":
-            return ("","")
+        if s == b'':
+            return ('', b'')
         l = s.split()
         if len(l) != 2:
-            self.rclog("bad line: [" + s + "]", 1, 1)
+            self.rclog(b'bad line: [' + s + b']', 1, 1)
 
-        paramname = l[0].lower()
+        paramname = l[0].decode('ASCII').lower()
         paramsize = int(l[1])
         if paramsize > 0:
-            paramdata = sys.stdin.read(paramsize)
+            paramdata = inf.read(paramsize)
             if len(paramdata) != paramsize:
                 self.rclog("Bad read: wanted %d, got %d" %
-                      (paramsize, len(paramdata)), 1,1)
+                      (paramsize, len(paramdata)), 1, 1)
         else:
-            paramdata = ""
+            paramdata = b''
     
         #self.rclog("paramname [%s] paramsize %d value [%s]" %
         #          (paramname, paramsize, paramdata))
         return (paramname, paramdata)
 
+    if PY3:
+        def senditem(self, nm, len, data):
+            sys.stdout.buffer.write(makebytes("%s: %d\n" % (nm, len)))
+            self.breakwrite(sys.stdout.buffer, makebytes(data))
+    else:
+        def senditem(self, nm, len, data):
+            sys.stdout.write(makebytes("%s: %d\n" % (nm, len)))
+            self.breakwrite(sys.stdout, makebytes(data))
+        
     # Send answer: document, ipath, possible eof.
     def answer(self, docdata, ipath, iseof = noteof, iserror = noerror):
 
         if iserror != RclExecM.fileerror and iseof != RclExecM.eofnow:
-            if isinstance(docdata, unicode):
-                self.rclog("GOT UNICODE for ipath [%s]" % (ipath,))
-                docdata = docdata.encode("UTF-8")
-
-            print("Document: %d" % len(docdata))
-            self.breakwrite(sys.stdout, docdata)
+            self.senditem("Document", len(docdata), docdata)
 
             if len(ipath):
-                print("Ipath: %d" % len(ipath))
-                sys.stdout.write(ipath)
+                self.senditem("Ipath", len(ipath), ipath)
 
             if len(self.mimetype):
-                print("Mimetype: %d" % len(self.mimetype))
-                sys.stdout.write(self.mimetype)
+                self.senditem("Mimetype", len(self.mimetype), self.mimetype)
 
         # If we're at the end of the contents, say so
         if iseof == RclExecM.eofnow:
-            print("Eofnow: 0")
+            self.senditem("Eofnow", 0, b'')
         elif iseof == RclExecM.eofnext:
-            print("Eofnext: 0")
+            self.senditem("Eofnext", 0, b'')
         if iserror == RclExecM.subdocerror:
-            print("Subdocerror: 0")
+            self.senditem("Subdocerror", 0, b'')
         elif iserror == RclExecM.fileerror:
-            print("Fileerror: 0")
+            self.senditem("Fileerror", 0, b'')
   
         # End of message
         print()
@@ -173,7 +198,8 @@ class RclExecM:
     def processmessage(self, processor, params):
 
         # We must have a filename entry (even empty). Else exit
-        if not params.has_key("filename:"):
+        if "filename:" not in params:
+            print("%s" % params, file=sys.stderr)
             self.rclog("no filename ??", 1, 1)
 
         # If we're given a file name, open it. 
@@ -182,7 +208,7 @@ class RclExecM:
                 if not processor.openfile(params):
                     self.answer("", "", iserror = RclExecM.fileerror)
                     return
-            except Exception, err:
+            except Exception as err:
                 self.rclog("processmessage: openfile raised: [%s]" % err)
                 self.answer("", "", iserror = RclExecM.fileerror)
                 return
@@ -192,11 +218,11 @@ class RclExecM:
         eof = True
         self.mimetype = ""
         try:
-            if params.has_key("ipath:") and len(params["ipath:"]):
+            if "ipath:" in params and len(params["ipath:"]):
                 ok, data, ipath, eof = processor.getipath(params)
             else:
                 ok, data, ipath, eof = processor.getnext(params)
-        except Exception, err:
+        except Exception as err:
             self.answer("", "", eof, RclExecM.fileerror)
             return
 
@@ -311,7 +337,7 @@ def main(proto, extract):
         
     actAsSingle = False
     debugDumpData = False
-    ipath = ""
+    ipath = b""
 
     args = sys.argv[1:]
     opts, args = getopt.getopt(args, "hdsi:w:")
@@ -321,7 +347,7 @@ def main(proto, extract):
         elif opt in ['-s']:
             actAsSingle = True
         elif opt in ['-i']:
-            ipath = arg
+            ipath = makebytes(arg)
         elif opt in ['-w']:
             ret = which(arg)
             if ret:
@@ -344,17 +370,17 @@ def main(proto, extract):
         lst = fileout.split(':')
         mimetype = lst[len(lst)-1].strip()
         lst = mimetype.split(';')
-        return lst[0].strip()
+        return makebytes(lst[0].strip())
 
     def mimetype_with_xdg(f):
         cmd = 'xdg-mime query filetype "' + f + '"'
-        return os.popen(cmd).read().strip()
+        return makebytes(os.popen(cmd).read().strip())
 
-    def debprint(s):
+    def debprint(out, s):
         if not actAsSingle:
-            print(s)
+            proto.breakwrite(out, makebytes(s+'\n'))
             
-    params = {'filename:': args[0]}
+    params = {'filename:': makebytes(args[0])}
     # Some filters (e.g. rclaudio) need/get a MIME type from the indexer
     mimetype = mimetype_with_xdg(args[0])
     params['mimetype:'] = mimetype
@@ -363,19 +389,20 @@ def main(proto, extract):
         print("Open error", file=sys.stderr)
         sys.exit(1)
 
-    if ipath != "" or actAsSingle:
+    if PY3:
+        ioout = sys.stdout.buffer
+    else:
+        ioout = sys.stdout
+    if ipath != b"" or actAsSingle:
         params['ipath:'] = ipath
         ok, data, ipath, eof = extract.getipath(params)
         if ok:
-            debprint("== Found entry for ipath %s (mimetype [%s]):" % \
+            debprint(ioout, "== Found entry for ipath %s (mimetype [%s]):" % \
                   (ipath, proto.mimetype))
-            if isinstance(data, unicode):
-                bdata = data.encode("UTF-8")
-            else:
-                bdata = data
+            bdata = makebytes(data)
             if debugDumpData or actAsSingle:
-                proto.breakwrite(sys.stdout, bdata)
-                print()
+                proto.breakwrite(ioout, bdata)
+                ioout.write(b'\n')
             sys.exit(0)
         else:
             print("Got error, eof %d"%eof, file=sys.stderr)
@@ -386,15 +413,12 @@ def main(proto, extract):
         ok, data, ipath, eof = extract.getnext(params)
         if ok:
             ecnt = ecnt + 1
-            debprint("== Entry %d ipath %s (mimetype [%s]):" % \
-                  (ecnt, ipath, proto.mimetype))
-            if isinstance(data, unicode):
-                bdata = data.encode("UTF-8")
-            else:
-                bdata = data
+            bdata = makebytes(data)
+            debprint(ioout, "== Entry %d dlen %d ipath %s (mimetype [%s]):" % \
+                  (ecnt, len(data), ipath, proto.mimetype))
             if debugDumpData:
-                proto.breakwrite(sys.stdout, bdata)
-                print()
+                proto.breakwrite(ioout, bdata)
+                ioout.write(b'\n')
             if eof != RclExecM.noteof:
                 sys.exit(0)
         else:
diff --git a/src/filters/rclics b/src/filters/rclics
index 6ad3f632..3f28a057 100755
--- a/src/filters/rclics
+++ b/src/filters/rclics
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+from __future__ import print_function
 
 # Read an ICS file, break it into "documents" which are events, todos,
 # or journal entries, and interface with recoll execm
@@ -13,36 +14,36 @@ import rclexecm
 import sys
 
 # Decide how we'll process the file.
-modules = ('internal', 'icalendar', 'vobject')
-usemodule = 'internal'
+modules = ("internal", "icalendar", "vobject")
+usemodule = "internal"
 forcevobject = 0
-if usemodule != 'internal':
+if usemodule != "internal":
     try:
         if forcevobject:
             raise Exception
         from icalendar import Calendar, Event
-        usemodule = 'icalendar'
+        usemodule = "icalendar"
     except:
         try:
             import vobject
-            usemodule = 'vobject'
+            usemodule = "vobject"
         except:
-            print "RECFILTERROR HELPERNOTFOUND python:icalendar"
-            print "RECFILTERROR HELPERNOTFOUND python:vobject"
+            print("RECFILTERROR HELPERNOTFOUND python:icalendar")
+            print("RECFILTERROR HELPERNOTFOUND python:vobject")
             sys.exit(1);
 
 
 class IcalExtractor:
     def __init__(self, em):
         self.file = ""
-	self.contents = []
+        self.contents = []
         self.em = em
 
     def extractone(self, index):
         if index >= len(self.contents):
             return(False, "", "", True)
         docdata = self.contents[index]
-	#self.em.rclog(docdata)
+        #self.em.rclog(docdata)
 
         iseof = rclexecm.RclExecM.noteof
         if self.currentindex >= len(self.contents) -1:
@@ -55,32 +56,32 @@ class IcalExtractor:
         self.file = params["filename:"]
 
         try:
-            calstr = open(self.file, 'rb')
-        except Exception, e:
+            calstr = open(self.file, "rb")
+        except Exception as e:
             self.em.rclog("Openfile: open: %s" % str(e))
             return False
 
         self.currentindex = -1
 
-        if usemodule == 'internal':
+        if usemodule == "internal":
             self.contents = ICalSimpleSplitter().splitcalendar(calstr)
-        elif usemodule == 'icalendar':
+        elif usemodule == "icalendar":
             try:
                 cal = Calendar.from_string(calstr.read())
-            except Exception, e:
+            except Exception as e:
                 self.em.rclog("Openfile: read or parse error: %s" % str(e))
                 return False
             self.contents = cal.walk()
             self.contents = [item.as_string() for item in self.contents
-                             if (item.name == 'VEVENT' or item.name == 'VTODO'
-                                 or item.name == 'VJOURNAL')]
+                             if (item.name == "VEVENT" or item.name == "VTODO"
+                                 or item.name == "VJOURNAL")]
         else:
             try:
                 cal = vobject.readOne(calstr)
-            except Exception, e:
+            except Exception as e:
                 self.em.rclog("Openfile: cant parse object: %s" % str(e))
                 return False
-            for lstnm in ('vevent_list', 'vtodo_list', 'vjournal_list'):
+            for lstnm in ("vevent_list", "vtodo_list", "vjournal_list"):
                 lst = getattr(cal, lstnm, [])
                 for ev in lst:
                     self.contents.append(ev.serialize())
@@ -90,7 +91,10 @@ class IcalExtractor:
 
     def getipath(self, params):
         try:
-            index = int(params["ipath:"])
+            if params["ipath:"] == b'':
+                index = 0
+            else:
+                index = int(params["ipath:"])
         except:
             return (False, "", "", True)
         return self.extractone(index)
@@ -100,7 +104,7 @@ class IcalExtractor:
         if self.currentindex == -1:
             # Return "self" doc
             self.currentindex = 0
-            self.em.setmimetype('text/plain')
+            self.em.setmimetype(b'text/plain')
             if len(self.contents) == 0:
                 eof = rclexecm.RclExecM.eofnext
             else:
@@ -121,44 +125,44 @@ class ICalSimpleSplitter:
     # Note that if an 'interesting' element is nested inside another one,
     # it will not be extracted (stay as text in external event). This is
     # not an issue and I don't think it can happen with the current list
-    interesting = ('VTODO', 'VEVENT', 'VJOURNAL')
+    interesting = (b'VTODO', b'VEVENT', b'VJOURNAL')
 
     def splitcalendar(self, fin):
-        curblkname = ''
-        curblk = ''
+        curblkname = b''
+        curblk = b''
 
         lo = []
         for line in fin:
             line = line.rstrip()
-            if line == '':
+            if line == b'':
                 continue
 
             if curblkname:
-                curblk = curblk + line + "\n"
+                curblk = curblk + line + b'\n'
 
-            l = line.split(":")
+            l = line.split(b':')
             if len(l) < 2:
                 continue
 
             # If not currently inside a block and we see an
             # 'interesting' BEGIN, start block
-            if curblkname == '' and l[0].upper() == "BEGIN" :
+            if curblkname == b'' and l[0].upper() == b'BEGIN':
                 name = l[1].upper()
                 if name in ICalSimpleSplitter.interesting:
                     curblkname = name
-                    curblk = curblk + line + "\n"
+                    curblk = curblk + line + b'\n'
 
             # If currently accumulating block lines, check for end
-            if curblkname and l[0].upper() == "END" and \
+            if curblkname and l[0].upper() == b'END' and \
                    l[1].upper() == curblkname:
                 lo.append(curblk)
-                curblkname = ''
-                curblk = ''
+                curblkname = b''
+                curblk = b''
 
         if curblk:
             lo.append(curblk)
-            curblkname = ''
-            curblk = ''
+            curblkname = b''
+            curblk = b''
 
         return lo
  
diff --git a/src/filters/rclimg.py b/src/filters/rclimg.py
index ac21d130..8892a9ae 100755
--- a/src/filters/rclimg.py
+++ b/src/filters/rclimg.py
@@ -1,11 +1,12 @@
 #!/usr/bin/env python
 
-# Python-based Image Tag extractor for Recoll. This is less thorough than the 
-# Perl-based rclimg script, but useful if you don't want to have to install Perl
-# (e.g. on Windows).
+# Python-based Image Tag extractor for Recoll. This is less thorough
+# than the Perl-based rclimg script, but useful if you don't want to
+# have to install Perl (e.g. on Windows).
 #
 # Uses pyexiv2. Also tried Pillow, found it useless for tags.
 #
+from __future__ import print_function
 
 import sys
 import os
@@ -15,7 +16,7 @@ import re
 try:
     import pyexiv2
 except:
-    print "RECFILTERROR HELPERNOTFOUND python:pyexiv2"
+    print("RECFILTERROR HELPERNOTFOUND python:pyexiv2")
     sys.exit(1);
 
 khexre = re.compile('.*\.0[xX][0-9a-fA-F]+$')
@@ -48,7 +49,7 @@ class ImgTagExtractor:
     def extractone(self, params):
         #self.em.rclog("extractone %s" % params["filename:"])
         ok = False
-        if not params.has_key("filename:"):
+        if "filename:" not in params:
             self.em.rclog("extractone: no file name")
             return (ok, docdata, "", rclexecm.RclExecM.eofnow)
         filename = params["filename:"]
@@ -62,11 +63,11 @@ class ImgTagExtractor:
                 # we skip numeric keys and undecoded makernote data
                 if k != 'Exif.Photo.MakerNote' and not khexre.match(k):
                     mdic[k] = str(metadata[k].raw_value)
-        except Exception, err:
+        except Exception as err:
             self.em.rclog("extractone: extract failed: [%s]" % err)
             return (ok, "", "", rclexecm.RclExecM.eofnow)
 
-        docdata = "<html><head>\n"
+        docdata = b'<html><head>\n'
 
         ttdata = set()
         for k in pyexiv2_titles:
@@ -77,25 +78,28 @@ class ImgTagExtractor:
             for v in ttdata:
                 v = v.replace('[', '').replace(']', '').replace("'", "")
                 title += v + " "
-            docdata += '<title>' + title + '</title>\n'
+            docdata += rclexecm.makebytes("<title>" + title + "</title>\n")
 
         for k in exiv2_dates:
             if k in mdic:
                 # Recoll wants: %Y-%m-%d %H:%M:%S.
                 # We get 2014:06:27 14:58:47
-                dt = mdic[k].replace(':', '-', 2)
-                docdata += '<meta name="date" content="' + dt + '">\n'
+                dt = mdic[k].replace(":", "-", 2)
+                docdata += b'<meta name="date" content="' + \
+                           rclexecm.makebytes(dt) + b'">\n'
                 break
 
-        for k,v in mdic.iteritems():
+        for k,v in mdic.items():
             if k ==  'Xmp.digiKam.TagsList':
-                docdata += '<meta name="keywords" content="' + \
-                           self.em.htmlescape(mdic[k]) + '">\n'
+                docdata += b'<meta name="keywords" content="' + \
+                           rclexecm.makebytes(self.em.htmlescape(mdic[k])) + \
+                           b'">\n'
 
-        docdata += "</head><body>\n"
-        for k,v in mdic.iteritems():
-            docdata += k + " : " + self.em.htmlescape(mdic[k]) + "<br />\n"
-        docdata += "</body></html>"
+        docdata += b'</head><body>\n'
+        for k,v in mdic.items():
+            docdata += rclexecm.makebytes(k + " : " + \
+                                     self.em.htmlescape(mdic[k]) + "<br />\n")
+        docdata += b'</body></html>'
 
         self.em.setmimetype("text/html")
 
diff --git a/src/filters/rclinfo b/src/filters/rclinfo
index c6b8a8b1..575121cc 100755
--- a/src/filters/rclinfo
+++ b/src/filters/rclinfo
@@ -3,6 +3,7 @@
 # Read a file in GNU info format and output its nodes as subdocs,
 # interfacing with recoll execm
 
+from __future__ import print_function
 
 import rclexecm
 import sys
@@ -16,24 +17,12 @@ import subprocess
 # Some info source docs contain charset info like:
 # @documentencoding ISO-2022-JP
 # But this seems to be absent from outputs.
-htmltemplate = '''
-<html>
-  <head>
-      <title>%s</title>
-      <meta name="rclaptg" content="gnuinfo">
-   </head>
-   <body>
-   <pre style="white-space: pre-wrap">
-   %s
-   </pre></body>
-</html>
-'''
 
 # RclExecm interface
 class InfoExtractor:
     def __init__(self, em):
         self.file = ""
-	self.contents = []
+        self.contents = []
         self.em = em
 
     def extractone(self, index):
@@ -43,8 +32,13 @@ class InfoExtractor:
         nodename, docdata = self.contents[index]
         nodename = self.em.htmlescape(nodename)
         docdata = self.em.htmlescape(docdata)
-
-        docdata = htmltemplate % (nodename, docdata)
+        # strange whitespace to avoid changing the module tests (same as old)
+        docdata = b'\n<html>\n  <head>\n      <title>' + nodename + \
+                  b'</title>\n' + \
+                  '      <meta name="rclaptg" content="gnuinfo">\n' + \
+                  b'   </head>\n   <body>\n' + \
+                  b'   <pre style="white-space: pre-wrap">\n   ' + \
+                  docdata + b'\n   </pre></body>\n</html>\n'
 
         iseof = rclexecm.RclExecM.noteof
         if self.currentindex >= len(self.contents) -1:
@@ -60,19 +54,18 @@ class InfoExtractor:
             self.em.rclog("Openfile: %s is not a file" % self.file)
             return False
 
-        cmd = "info --subnodes -o - -f " + self.file
+        cmd = b'info --subnodes -o - -f ' + self.file
         nullstream = open("/dev/null", 'w')
         try:
             infostream = subprocess.Popen(cmd, shell=True, bufsize=1,
                                           stderr=nullstream,
                                           stdout=subprocess.PIPE).stdout
-        except Exception, e:
+        except Exception as e:
             # Consider this as permanently fatal. 
             self.em.rclog("Openfile: exec info: %s" % str(e))
-            print "RECFILTERROR HELPERNOTFOUND info"
+            print("RECFILTERROR HELPERNOTFOUND info")
             sys.exit(1);
 
-
         self.currentindex = -1
 
         self.contents = InfoSimpleSplitter().splitinfo(self.file, infostream)
@@ -117,9 +110,9 @@ class InfoSimpleSplitter:
         index = 0
         listout = []
         node_dict = {}
-        node = ""
+        node = b''
         infofile = os.path.basename(filename)
-        nodename = "Unknown"
+        nodename = b'Unknown'
         
         for line in fin:
 
@@ -128,41 +121,41 @@ class InfoSimpleSplitter:
             # beginning with spaces (it's a bug probably, only seen it once)
             # Maybe we'd actually be better off directly interpreting the
             # info files
-            if gotblankline and line.lstrip(" ").startswith("File: "):
+            if gotblankline and line.lstrip(b' ').startswith(b'File: '):
                 prevnodename = nodename
-                line = line.rstrip("\n\r")
-                pairs = line.split(",")
-                up = "Top"
+                line = line.rstrip(b'\n\r')
+                pairs = line.split(b',')
+                up = b'Top'
                 nodename = str(index)
                 try:
                     for pair in pairs:
-                        name, value = pair.split(':')
-                        name = name.strip(" ")
-                        value = value.strip(" ")
-                        if name == "Node":
+                        name, value = pair.split(b':')
+                        name = name.strip(b' ')
+                        value = value.strip(b' ')
+                        if name == b'Node':
                             nodename = value
-                        if name == "Up":
+                        if name == b'Up':
                             up = value
-                        if name == "File":
+                        if name == b'File':
                             infofile = value
-                except:
-                    print >> sys.stderr, "rclinfo: bad line in %s: [%s]\n" % \
-                          (infofile, line)
+                except Exception as err:
+                    print("rclinfo: bad line in %s: [%s] %s\n" % \
+                          (infofile, line, err), file = sys.stderr)
                     nodename = prevnodename
                     node += line
                     continue
 
-                if node_dict.has_key(nodename):
-                    print >> sys.stderr, "Info file", filename, \
-                          "Dup node: ", nodename
+                if nodename in node_dict:
+                    print("Info file %s Dup node: %s" % (filename, nodename), \
+                          file=sys.stderr)
                 node_dict[nodename] = up
 
                 if index != 0:
                     listout.append((prevnodename, node))
-                node = ""
+                node = b''
                 index += 1
 
-            if line.rstrip("\n\r") == '':
+            if line.rstrip(b'\n\r') == b'':
                 gotblankline = 1
             else:
                 gotblankline = 0
@@ -170,7 +163,7 @@ class InfoSimpleSplitter:
             node += line
 
         # File done, add last dangling node
-        if node != "":
+        if node != b'':
             listout.append((nodename, node))
 
         # Compute node paths (concatenate "Up" values), to be used
@@ -178,34 +171,34 @@ class InfoSimpleSplitter:
         # the info file tree is bad
         listout1 = []
         for nodename, node in listout:
-            title = ""
+            title = b''
             loop = 0
             error = 0
-            while nodename != "Top":
-                title = nodename + " / " + title
-                if node_dict.has_key(nodename):
+            while nodename != b'Top':
+                title = nodename + b' / ' + title
+                if nodename in node_dict:
                     nodename = node_dict[nodename]
                 else:
-                    print >> sys.stderr, \
+                    print(
            "Infofile: node's Up does not exist: file %s, path %s, up [%s]" % \
-                    (infofile, title, nodename)
+                    (infofile, title, nodename), sys.stderr)
                     error = 1
                     break
                 loop += 1
                 if loop > 50:
-                    print >> sys.stderr, "Infofile: bad tree (looping)", \
-                          infofile
+                    print("Infofile: bad tree (looping) %s" % infofile, \
+                          file = sys.stderr)
                     error = 1
                     break
 
             if error:
                 continue
 
-            if title == "":
+            if title == b'':
                 title = infofile
             else:
-                title = infofile + " / " + title
-            title = title.rstrip(" / ")
+                title = infofile + b' / ' + title
+            title = title.rstrip(b' / ')
             listout1.append((title, node))
 
         return listout1
diff --git a/src/filters/rclkar b/src/filters/rclkar
index 83c0207c..00432b15 100755
--- a/src/filters/rclkar
+++ b/src/filters/rclkar
@@ -1,6 +1,8 @@
 #!/usr/bin/env python
 
 # Read a .kar midi karaoke file and translate to recoll indexable format
+# This does not work with Python3 yet because python:midi doesn't 
+from __future__ import print_function
 
 import rclexecm
 import sys
@@ -15,9 +17,9 @@ except:
     pass
 
 try:
-    import midi
+    from midi import midi
 except:
-    print "RECFILTERROR HELPERNOTFOUND python:midi"
+    print("RECFILTERROR HELPERNOTFOUND python:midi")
     sys.exit(1);
 
 try:
@@ -106,12 +108,12 @@ class KarTextExtractor:
         if data:
             try:
                 data = data.decode(self.encoding, 'ignore')
-            except Exception, err:
+            except Exception as err:
                 self.em.rclog("Decode failed: " + str(err))
                 return ""
             try:
                 data = data.encode('utf-8')
-            except Exception, err:
+            except Exception as err:
                 self.em.rclog("Encode failed: " + str(err))
                 return ""
             
@@ -127,7 +129,7 @@ class KarTextExtractor:
         just one our users could use if there is trouble with guessing
         encodings'''
 
-        rexp = r'\(([^\)]+)\)\.[a-zA-Z]+$'
+        rexp = b'''\(([^\)]+)\)\.[a-zA-Z]+$'''
         m = re.search(rexp, fn)
         if m:
             return m.group(1)
@@ -165,7 +167,7 @@ class KarTextExtractor:
                 if count > 0:
                     confidence = 1.0
                     encoding = code
-            except Exception, err:
+            except Exception as err:
                 self.em.rclog("stopwords-based classifier failed: %s" % err)
                 return (encoding, confidence)
 
@@ -177,7 +179,7 @@ class KarTextExtractor:
         docdata = ""
         ok = False
 
-        if not params.has_key("filename:"):
+        if "filename:" not in params:
             self.em.rclog("extractone: no mime or file name")
             return (ok, docdata, "", rclexecm.RclExecM.eofnow)
         filename = params["filename:"]
@@ -191,7 +193,7 @@ class KarTextExtractor:
                 self.encoding = ""
 
         # Mimetype not used for now
-        if not params.has_key("mimetype:"):
+        if "mimetype:" not in params:
             mimetype = 'audio/x-midi'
         else:
             mimetype = params["mimetype:"]
@@ -199,8 +201,8 @@ class KarTextExtractor:
         # Read in and midi-decode the file
         try:
             stream = midi.read_midifile(filename)
-        except Exception, err:
-            self.em.rclog("extractone: midi extract failed: [%s]" % err)
+        except Exception as err:
+            self.em.rclog("extractone: read_midifile failed: [%s]" % err)
             return (ok, docdata, "", rclexecm.RclExecM.eofnow)
 
         title = None
diff --git a/src/filters/rcllatinclass.py b/src/filters/rcllatinclass.py
index 529aadab..ad5d3efe 100755
--- a/src/filters/rcllatinclass.py
+++ b/src/filters/rcllatinclass.py
@@ -13,6 +13,8 @@ epsilon with dasia (in unicode but not iso). Can this be replaced by either epsi
 with acute accent ?
 """
 
+from __future__ import print_function
+
 import sys
 import string
 import glob
@@ -117,7 +119,7 @@ if __name__ == "__main__":
 
     lang,code,count = classifier.classify(rawtext)
     if count > 0:
-        print "%s %s %d" % (code, lang, count)
+        print("%s %s %d" % (code, lang, count))
     else:
-        print "UNKNOWN UNKNOWN 0"
+        print("UNKNOWN UNKNOWN 0")
         
diff --git a/src/filters/rclrar b/src/filters/rclrar
index b661f510..0846263c 100755
--- a/src/filters/rclrar
+++ b/src/filters/rclrar
@@ -43,7 +43,7 @@ class RarExtractor:
         try:
             rarinfo = self.rar.getinfo(ipath)
             isdir = rarinfo.isdir()
-        except Exception, err:
+        except Exception as err:
             self.em.rclog("extractone: getinfo failed: [%s]" % err)
             return (True, docdata, ipath, false)
 
@@ -56,7 +56,7 @@ class RarExtractor:
                 else:
                     docdata = self.rar.read(ipath)
                 ok = True
-            except Exception, err:
+            except Exception as err:
                 self.em.rclog("extractone: failed: [%s]" % err)
                 ok = False
         else:
@@ -89,7 +89,7 @@ class RarExtractor:
         try:
             ipath = ipath.decode("utf-8")
             return self.extractone(ipath)
-        except Exception, err:
+        except Exception as err:
             return (ok, data, ipath, eof)
         
     def getnext(self, params):
diff --git a/src/filters/rclrtf.py b/src/filters/rclrtf.py
index c7031030..5a9a68ac 100755
--- a/src/filters/rclrtf.py
+++ b/src/filters/rclrtf.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+from __future__ import print_function
 
 import rclexecm
 import rclexec1
@@ -10,24 +11,24 @@ import os
 class RTFProcessData:
     def __init__(self, em):
         self.em = em
-        self.out = ""
+        self.out = b''
         self.gothead = 0
-        self.patendhead = re.compile('''</head>''')
-        self.patcharset = re.compile('''^<meta http-equiv=''')
+        self.patendhead = re.compile(b'''</head>''')
+        self.patcharset = re.compile(b'''^<meta http-equiv=''')
 
     # Some versions of unrtf put out a garbled charset line.
     # Apart from this, we pass the data untouched.
     def takeLine(self, line):
         if not self.gothead:
             if self.patendhead.search(line):
-                self.out +=  '<meta http-equiv="Content-Type" ' + \
-                             'content="text/html;charset=UTF-8">' + "\n"
-                self.out += line + "\n"
+                self.out +=  b'<meta http-equiv="Content-Type" ' + \
+                             b'content="text/html;charset=UTF-8">' + b'\n'
+                self.out += line + b'\n'
                 self.gothead = 1
             elif not self.patcharset.search(line):
-                self.out += line + "\n"
+                self.out += line + b'\n'
         else:
-            self.out += line + "\n"
+            self.out += line + b'\n'
 
     def wrapData(self):
         return self.out
@@ -52,7 +53,7 @@ class RTFFilter:
 
 if __name__ == '__main__':
     if not rclexecm.which("unrtf"):
-        print("RECFILTERROR HELPERNOTFOUND antiword")
+        print("RECFILTERROR HELPERNOTFOUND unrtf")
         sys.exit(1)
     proto = rclexecm.RclExecM()
     filter = RTFFilter(proto)
diff --git a/src/filters/rcltar b/src/filters/rcltar
index 3d6508e0..7dba94d3 100755
--- a/src/filters/rcltar
+++ b/src/filters/rcltar
@@ -33,7 +33,7 @@ class TarExtractor:
             else:
                 docdata = self.tar.extractfile(ipath).read()
             ok = True
-        except Exception, err:
+        except Exception as err:
             ok = False
         iseof = rclexecm.RclExecM.noteof
         if self.currentindex >= len(self.namen) -1:
@@ -59,7 +59,7 @@ class TarExtractor:
         try:
             ipath = ipath.decode("utf-8")
             return self.extractone(ipath)
-        except Exception, err:
+        except Exception as err:
             return (ok, data, ipath, eof)
 
     def getnext(self, params):
diff --git a/src/filters/rclwar b/src/filters/rclwar
index 8fe46638..30a95e9f 100755
--- a/src/filters/rclwar
+++ b/src/filters/rclwar
@@ -15,7 +15,7 @@ class WarExtractor:
             member = self.tar.extractfile(tarinfo)
             docdata = member.read()
             ok = True
-        except Exception, err:
+        except Exception as err:
             self.em.rclog("extractone: failed: [%s]" % err)
             ok = False
         return (ok, docdata, tarinfo.name, rclexecm.RclExecM.noteof)
@@ -26,7 +26,7 @@ class WarExtractor:
         try:
             self.tar = tarfile.open(params["filename:"])
             return True
-        except Exception, err:
+        except Exception as err:
             self.em.rclog(str(err))
             return False
 
@@ -34,7 +34,7 @@ class WarExtractor:
         ipath = params["ipath:"]
         try:
             tarinfo = self.tar.getmember(ipath)
-        except Exception, err:
+        except Exception as err:
             self.em.rclog(str(err))
             return (False, "", ipath, rclexecm.RclExecM.noteof)
         return self.extractone(tarinfo)
diff --git a/src/filters/rclzip b/src/filters/rclzip
index a3afb06e..9d88dc76 100755
--- a/src/filters/rclzip
+++ b/src/filters/rclzip
@@ -72,7 +72,7 @@ class ZipExtractor:
             else:
                 docdata = self.zip.read(ipath)
             ok = True
-        except Exception, err:
+        except Exception as err:
             self.em.rclog("extractone: failed: [%s]" % err)
             ok = False
         iseof = rclexecm.RclExecM.noteof
@@ -98,7 +98,7 @@ class ZipExtractor:
         try:
             self.zip = ZipFile(filename)
             return True
-        except Exception, err:
+        except Exception as err:
             self.em.rclog("openfile: failed: [%s]" % err)
             return False
 
@@ -111,7 +111,7 @@ class ZipExtractor:
         try:
             ipath = ipath.decode("utf-8")
             return self.extractone(ipath)
-        except Exception, err:
+        except Exception as err:
             return (ok, data, ipath, eof)
         
     def getnext(self, params):
diff --git a/src/python/recoll/recoll/rclconfig.py b/src/python/recoll/recoll/rclconfig.py
index 28cb4e5a..8fc8aaff 100755
--- a/src/python/recoll/recoll/rclconfig.py
+++ b/src/python/recoll/recoll/rclconfig.py
@@ -75,7 +75,7 @@ class ConfSimple:
     def getNames(self, sk = ''):
         if not sk in self.submaps:
             return None
-        return self.submaps[sk].keys()
+        return list(self.submaps[sk].keys())
     
 class ConfTree(ConfSimple):
     """A ConfTree adds path-hierarchical interpretation of the section keys,
diff --git a/tests/config/recoll.conf b/tests/config/recoll.conf
index 4e66ddb2..19f3d8d6 100644
--- a/tests/config/recoll.conf
+++ b/tests/config/recoll.conf
@@ -4,6 +4,8 @@ logfilename = /tmp/logrcltst
 daemloglevel = 6
 daemlogfilename = /tmp/rclmontrace
 
+systemfilecommand = xdg-mime query filetype
+
 indexStripChars = 1
 detectxattronly = 1