All format handlers compatible with python3 except chm

2018-03-09 15:25:11 +01:00 · 2018-03-09 15:25:11 +01:00 · 93ac830079
commit 93ac830079
parent 7f49de5d97
13 changed files with 45 additions and 58 deletions
--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -613,6 +613,7 @@ filters/rcllatinclass.py \
 filters/rcllatinstops.zip \
 filters/rcllyx \
 filters/rclman \
+filters/rclmidi.py \
 filters/rclpdf.py \
 filters/rclokulnote \
 filters/rclopxml.py \
--- a/src/filters/rclaudio
+++ b/src/filters/rclaudio
@ -18,7 +18,7 @@ except:
    sys.exit(1);


-re_pairnum = re.compile(r'\(([0-9]+),\s*([0-9]+)\)')
+re_pairnum = re.compile(b'''\(([0-9]+),\s*([0-9]+)\)''')

 # The 'Easy' mutagen tags conversions are incomplete. We do it ourselves.
 # TPA,TPOS,disc DISCNUMBER/TOTALDISCS
@ -186,7 +186,7 @@ class AudioTagExtractor:
    def _embeddedImageFormat(self, mutf):
        #self.em.rclog("_embeddedImage: MIME: %s"%mutf.mime)
        if 'audio/mp3' in mutf.mime:
-            for tagname in mutf.iterkeys():
+            for tagname in mutf.keys():
                if tagname.startswith('APIC:'):
                    #self.em.rclog("mp3 img: %s" % mutf[tagname].mime)
                    return 'jpg' if mutf[tagname].mime == 'image/jpeg' else 'png'
@ -194,7 +194,7 @@ class AudioTagExtractor:
            if mutf.pictures:
                return 'jpg' if mutf.pictures[0].mime == 'image/jpeg' else 'png'
        elif 'audio/mp4' in mutf.mime:
-            if 'covr' in mutf.iterkeys():
+            if 'covr' in mutf.keys():
                format = mutf['covr'][0].imageformat 
                if format == mutagen.mp4.AtomDataType.JPEG:
                    return 'jpg'
@ -273,7 +273,7 @@ class AudioTagExtractor:
                #self.em.rclog("using default bits_per_sample")
                minf['bits_per_sample'] = 16

-        for tag,val in minf.iteritems():
+        for tag,val in minf.items():
            minf[tag] = str(val)

        #self.em.rclog("minf after audio %s\n" % minf)
@ -281,7 +281,7 @@ class AudioTagExtractor:
        ####################
        # Metadata tags. The names vary depending on the file type. We
        # just have a big translation dictionary for all
-        for tag,val in mutf.iteritems():
+        for tag,val in mutf.items():
            #self.em.rclog("Original tag: <%s>, val <%s>" % (tag, val))
            if tag.upper() in tagdict:
                tag = tag.upper()
@ -297,7 +297,7 @@ class AudioTagExtractor:
                        except:
                            val0 = val
                    if val0:
-                        if isinstance(val0, unicode):
+                        if type(val0) == type(u""):
                            val0 = val0.encode('utf-8', errors='replace')
                        else:
                            val0 = str(val0)
@ -320,7 +320,7 @@ class AudioTagExtractor:
                    if mo:
                        l = (mo.group(1), mo.group(2))
                    else:
-                        l = l.split('/')
+                        l = l.split(b'/')
                else:
                    self.em.rclog("l is tuple: %s" %l)
                if len(l) == 2:
@ -345,7 +345,7 @@ class AudioTagExtractor:
        self.em.setmimetype("text/plain")
        self.em.setfield("charset", 'utf-8')

-        for tag,val in minf.iteritems():
+        for tag,val in minf.items():
            #self.em.rclog("%s -> %s" % (tag, val))
            self.em.setfield(tag, val)
            # Compat with old version
--- a/src/filters/rclchm
+++ b/src/filters/rclchm
@ -4,8 +4,8 @@ Needs at least python 2.2 for HTMLParser (chmlib needs 2.2 too)"""

 from __future__ import print_function

-# Note: this is not converted to python3, libchm does not have a
-# python3 wrapper at this point (2015-11)
+# Note: this is not converted to Py3, libchm does not have a
+# Py3 wrapper at this point (2018-03)

 rclchm_html_mtype = "text/html"

--- a/src/filters/rcldia
+++ b/src/filters/rcldia
@ -69,6 +69,7 @@ class DiaExtractor:
            docdata = self.ExtractDiaText()
            ok = True
        except Exception as err:
+            self.em.rclog("Dia parse failed: %s"%err)
            ok = False
        iseof = rclexecm.RclExecM.eofnext
        self.em.setmimetype("text/plain")
@ -77,7 +78,7 @@ class DiaExtractor:
    ###### File type handler api, used by rclexecm ---------->
    def openfile(self, params):
        try:
-            self.dia = GzipFile(params["filename:"], 'r')
+            self.dia = GzipFile(params["filename:"], 'rb')
            # Dia files are sometimes not compressed. Quite weirdly,
            # GzipFile does not complain until we try to read. Have to do it
            # here to be able to retry an uncompressed open.
@ -87,7 +88,7 @@ class DiaExtractor:
        except:
            # File not compressed ?
            try:
-                self.dia = open(params["filename:"], 'r')
+                self.dia = open(params["filename:"], 'rb')
            except:
                return False
            return True
--- a/src/filters/rclexecm.py
+++ b/src/filters/rclexecm.py
@ -31,18 +31,10 @@ import rclconfig

 PY3 = sys.version > '3'

-if PY3:
-    def makebytes(data):
-        if isinstance(data, bytes):
-            return data
-        else:
-            return data.encode("UTF-8")
-else:
-    def makebytes(data):
-        if isinstance(data, unicode):
-            return data.encode("UTF-8")
-        else:
-            return data
+def makebytes(data):
+    if type(data) == type(u''):
+        return data.encode("UTF-8")
+    return data

 my_config = rclconfig.RclConfig()

@ -189,7 +181,7 @@ class RclExecM:
            if len(self.mimetype):
                self.senditem("Mimetype", self.mimetype)

-            for nm,value in self.fields.iteritems():
+            for nm,value in self.fields.items():
                #self.rclog("Senditem: [%s] -> [%s]" % (nm, value))
                self.senditem("%s:"%nm, value)
            self.fields = {}
@ -412,7 +404,7 @@ def main(proto, extract):
        ok, data, ipath, eof = extract.getipath(params)
        if ok:
            debprint(ioout, "== Found entry for ipath %s (mimetype [%s]):" % \
-                  (ipath, proto.mimetype))
+                  (ipath, proto.mimetype.decode('cp1252')))
            bdata = makebytes(data)
            if debugDumpData or actAsSingle:
                proto.breakwrite(ioout, bdata)
@ -429,7 +421,7 @@ def main(proto, extract):
            ecnt = ecnt + 1
            bdata = makebytes(data)
            debprint(ioout, "== Entry %d dlen %d ipath %s (mimetype [%s]):" % \
-                  (ecnt, len(data), ipath, proto.mimetype))
+                  (ecnt, len(data), ipath, proto.mimetype.decode('cp1252')))
            if debugDumpData:
                proto.breakwrite(ioout, bdata)
                ioout.write(b'\n')
--- a/src/filters/rclmidi.py
+++ b/src/filters/rclmidi.py
@ -854,12 +854,8 @@ class EventStreamReader(object):
    def parse(self, instream, outstream):
        self.midistream = outstream
        self.instream = instream
-        if PY3:
-            if type(instream) in (str, bytes):
-                self.instream = open(instream, 'rb')
-        else:
-            if type(instream) in (str, unicode):
-                self.instream = open(instream, 'rb')
+        if type(instream) in (type(b''), type(u'')):
+            self.instream = open(instream, 'rb')
        self.parse_file_header()
        for track in range(self.midistream.trackcount):  
            trksz = self.parse_track_header()
--- a/src/filters/rclpython
+++ b/src/filters/rclpython
@ -39,18 +39,10 @@ else:
    import io
 import keyword, token, tokenize

-if PY2:
-    def makebytes(data):
-        if isinstance(data, unicode):
-            return data.encode("UTF-8")
-        else:
-            return data
-else:
-    def makebytes(data):
-        if isinstance(data, bytes):
-            return data
-        else:
-            return data.encode("UTF-8")
+def makebytes(data):
+    if type(data) == type(u''):
+        return data.encode("UTF-8")
+    return data

 #############################################################################
 ### Python Source Parser (does Hilighting)
--- a/src/filters/rclrar
+++ b/src/filters/rclrar
@ -80,9 +80,15 @@ class RarExtractor:
    def openfile(self, params):
        self.currentindex = -1
        try:
-            self.rar = RarFile(params["filename:"])
+            # The previous versions passed the file name to
+            # RarFile. But the py3 version of this wants an str as
+            # input, which is wrong of course, as filenames are
+            # binary. Circumvented by passing the open file
+            f = open(params["filename:"], 'rb')
+            self.rar = RarFile(f)
            return True
-        except:
+        except Exception as err:
+            self.em.rclog("RarFile: %s"%err)
            return False

    def getipath(self, params):
--- a/src/filters/rclsoff-flat.py
+++ b/src/filters/rclsoff-flat.py
@ -151,7 +151,7 @@ class OOExtractor:
        fn = params["filename:"]

        try:
-            f = open(fn)
+            f = open(fn, 'rb')
            data = f.read()
            f.close()
        except Exception as err:
--- a/src/filters/rclsoff.py
+++ b/src/filters/rclsoff.py
@ -154,7 +154,7 @@ class OOExtractor:
        except:
            # To be checked. I'm under the impression that I get this when
            # nothing matches?
-            #self.em.rclog("no/bad metadata in %s" % fn)
+            #self.em.rclog("No/bad metadata in %s" % fn)
            pass

        docdata += b'</head>\n<body>\n'
--- a/src/filters/rclxslt.py
+++ b/src/filters/rclxslt.py
@ -61,10 +61,10 @@ else:
        styledoc = etree.fromstring(sheet)
        transform = etree.XSLT(styledoc)
        doc = etree.fromstring(data)
-        return etree.tostring(transform(doc))
+        return bytes(transform(doc))
    def apply_sheet_file(sheet, fn):
        styledoc = etree.fromstring(sheet)
        transform = etree.XSLT(styledoc)
        doc = etree.parse(fn)
-        return etree.tostring(transform(doc))
+        return bytes(transform(doc))

--- a/src/filters/rclzip
+++ b/src/filters/rclzip
@ -124,7 +124,7 @@ class ZipExtractor:

        try:
            if rclexecm.PY3:
-                # Note: python3 ZipFile wants an str file name, which
+                # Note: py3 ZipFile wants an str file name, which
                # is wrong: file names are binary. But it accepts an
                # open file, and open() has no such restriction
                f = open(filename, 'rb')
--- a/src/filters/xlsxmltocsv.py
+++ b/src/filters/xlsxmltocsv.py
@ -17,12 +17,11 @@

 # Transform XML output from xls-dump.py into csv format.
 #
-# Note: this would be difficult to make compatible with python 3 <= 3.4
-# because of the use of % interpolation on what should be bytes.
-# The python2 restriction is not a big issue at this point because
-# msodumper is not compatible with python3 anyway
-# % interpolation for bytes is planned for python 3.5, at which point
-# porting this module will become trivial.
+# Note: this would be difficult to make compatible with python 3 <=
+# 3.4 because of the use of % interpolation on what should be bytes.
+# # % terpolation for bytes is available as of python 3.5, which is
+# the minimum version supported.
+

 from __future__ import print_function