From 93ac8300795205786bc6182d81d371b151466902 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Fri, 9 Mar 2018 15:25:11 +0100 Subject: [PATCH] All format handlers compatible with python3 except chm --- src/Makefile.am | 1 + src/filters/rclaudio | 16 ++++++++-------- src/filters/rclchm | 4 ++-- src/filters/rcldia | 5 +++-- src/filters/rclexecm.py | 22 +++++++--------------- src/filters/rclmidi.py | 8 ++------ src/filters/rclpython | 16 ++++------------ src/filters/rclrar | 10 ++++++++-- src/filters/rclsoff-flat.py | 2 +- src/filters/rclsoff.py | 2 +- src/filters/rclxslt.py | 4 ++-- src/filters/rclzip | 2 +- src/filters/xlsxmltocsv.py | 11 +++++------ 13 files changed, 45 insertions(+), 58 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index 5aa1aea4..39911e74 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -613,6 +613,7 @@ filters/rcllatinclass.py \ filters/rcllatinstops.zip \ filters/rcllyx \ filters/rclman \ +filters/rclmidi.py \ filters/rclpdf.py \ filters/rclokulnote \ filters/rclopxml.py \ diff --git a/src/filters/rclaudio b/src/filters/rclaudio index 672ddd3d..94ca0be7 100755 --- a/src/filters/rclaudio +++ b/src/filters/rclaudio @@ -18,7 +18,7 @@ except: sys.exit(1); -re_pairnum = re.compile(r'\(([0-9]+),\s*([0-9]+)\)') +re_pairnum = re.compile(b'''\(([0-9]+),\s*([0-9]+)\)''') # The 'Easy' mutagen tags conversions are incomplete. We do it ourselves. # TPA,TPOS,disc DISCNUMBER/TOTALDISCS @@ -186,7 +186,7 @@ class AudioTagExtractor: def _embeddedImageFormat(self, mutf): #self.em.rclog("_embeddedImage: MIME: %s"%mutf.mime) if 'audio/mp3' in mutf.mime: - for tagname in mutf.iterkeys(): + for tagname in mutf.keys(): if tagname.startswith('APIC:'): #self.em.rclog("mp3 img: %s" % mutf[tagname].mime) return 'jpg' if mutf[tagname].mime == 'image/jpeg' else 'png' @@ -194,7 +194,7 @@ class AudioTagExtractor: if mutf.pictures: return 'jpg' if mutf.pictures[0].mime == 'image/jpeg' else 'png' elif 'audio/mp4' in mutf.mime: - if 'covr' in mutf.iterkeys(): + if 'covr' in mutf.keys(): format = mutf['covr'][0].imageformat if format == mutagen.mp4.AtomDataType.JPEG: return 'jpg' @@ -273,7 +273,7 @@ class AudioTagExtractor: #self.em.rclog("using default bits_per_sample") minf['bits_per_sample'] = 16 - for tag,val in minf.iteritems(): + for tag,val in minf.items(): minf[tag] = str(val) #self.em.rclog("minf after audio %s\n" % minf) @@ -281,7 +281,7 @@ class AudioTagExtractor: #################### # Metadata tags. The names vary depending on the file type. We # just have a big translation dictionary for all - for tag,val in mutf.iteritems(): + for tag,val in mutf.items(): #self.em.rclog("Original tag: <%s>, val <%s>" % (tag, val)) if tag.upper() in tagdict: tag = tag.upper() @@ -297,7 +297,7 @@ class AudioTagExtractor: except: val0 = val if val0: - if isinstance(val0, unicode): + if type(val0) == type(u""): val0 = val0.encode('utf-8', errors='replace') else: val0 = str(val0) @@ -320,7 +320,7 @@ class AudioTagExtractor: if mo: l = (mo.group(1), mo.group(2)) else: - l = l.split('/') + l = l.split(b'/') else: self.em.rclog("l is tuple: %s" %l) if len(l) == 2: @@ -345,7 +345,7 @@ class AudioTagExtractor: self.em.setmimetype("text/plain") self.em.setfield("charset", 'utf-8') - for tag,val in minf.iteritems(): + for tag,val in minf.items(): #self.em.rclog("%s -> %s" % (tag, val)) self.em.setfield(tag, val) # Compat with old version diff --git a/src/filters/rclchm b/src/filters/rclchm index 2e8fa600..49f8728a 100755 --- a/src/filters/rclchm +++ b/src/filters/rclchm @@ -4,8 +4,8 @@ Needs at least python 2.2 for HTMLParser (chmlib needs 2.2 too)""" from __future__ import print_function -# Note: this is not converted to python3, libchm does not have a -# python3 wrapper at this point (2015-11) +# Note: this is not converted to Py3, libchm does not have a +# Py3 wrapper at this point (2018-03) rclchm_html_mtype = "text/html" diff --git a/src/filters/rcldia b/src/filters/rcldia index e8e8608f..282148eb 100755 --- a/src/filters/rcldia +++ b/src/filters/rcldia @@ -69,6 +69,7 @@ class DiaExtractor: docdata = self.ExtractDiaText() ok = True except Exception as err: + self.em.rclog("Dia parse failed: %s"%err) ok = False iseof = rclexecm.RclExecM.eofnext self.em.setmimetype("text/plain") @@ -77,7 +78,7 @@ class DiaExtractor: ###### File type handler api, used by rclexecm ----------> def openfile(self, params): try: - self.dia = GzipFile(params["filename:"], 'r') + self.dia = GzipFile(params["filename:"], 'rb') # Dia files are sometimes not compressed. Quite weirdly, # GzipFile does not complain until we try to read. Have to do it # here to be able to retry an uncompressed open. @@ -87,7 +88,7 @@ class DiaExtractor: except: # File not compressed ? try: - self.dia = open(params["filename:"], 'r') + self.dia = open(params["filename:"], 'rb') except: return False return True diff --git a/src/filters/rclexecm.py b/src/filters/rclexecm.py index 4bb86390..21f54d5d 100644 --- a/src/filters/rclexecm.py +++ b/src/filters/rclexecm.py @@ -31,18 +31,10 @@ import rclconfig PY3 = sys.version > '3' -if PY3: - def makebytes(data): - if isinstance(data, bytes): - return data - else: - return data.encode("UTF-8") -else: - def makebytes(data): - if isinstance(data, unicode): - return data.encode("UTF-8") - else: - return data +def makebytes(data): + if type(data) == type(u''): + return data.encode("UTF-8") + return data my_config = rclconfig.RclConfig() @@ -189,7 +181,7 @@ class RclExecM: if len(self.mimetype): self.senditem("Mimetype", self.mimetype) - for nm,value in self.fields.iteritems(): + for nm,value in self.fields.items(): #self.rclog("Senditem: [%s] -> [%s]" % (nm, value)) self.senditem("%s:"%nm, value) self.fields = {} @@ -412,7 +404,7 @@ def main(proto, extract): ok, data, ipath, eof = extract.getipath(params) if ok: debprint(ioout, "== Found entry for ipath %s (mimetype [%s]):" % \ - (ipath, proto.mimetype)) + (ipath, proto.mimetype.decode('cp1252'))) bdata = makebytes(data) if debugDumpData or actAsSingle: proto.breakwrite(ioout, bdata) @@ -429,7 +421,7 @@ def main(proto, extract): ecnt = ecnt + 1 bdata = makebytes(data) debprint(ioout, "== Entry %d dlen %d ipath %s (mimetype [%s]):" % \ - (ecnt, len(data), ipath, proto.mimetype)) + (ecnt, len(data), ipath, proto.mimetype.decode('cp1252'))) if debugDumpData: proto.breakwrite(ioout, bdata) ioout.write(b'\n') diff --git a/src/filters/rclmidi.py b/src/filters/rclmidi.py index c0ebedb7..c181df18 100755 --- a/src/filters/rclmidi.py +++ b/src/filters/rclmidi.py @@ -854,12 +854,8 @@ class EventStreamReader(object): def parse(self, instream, outstream): self.midistream = outstream self.instream = instream - if PY3: - if type(instream) in (str, bytes): - self.instream = open(instream, 'rb') - else: - if type(instream) in (str, unicode): - self.instream = open(instream, 'rb') + if type(instream) in (type(b''), type(u'')): + self.instream = open(instream, 'rb') self.parse_file_header() for track in range(self.midistream.trackcount): trksz = self.parse_track_header() diff --git a/src/filters/rclpython b/src/filters/rclpython index 04b9502a..615455b3 100755 --- a/src/filters/rclpython +++ b/src/filters/rclpython @@ -39,18 +39,10 @@ else: import io import keyword, token, tokenize -if PY2: - def makebytes(data): - if isinstance(data, unicode): - return data.encode("UTF-8") - else: - return data -else: - def makebytes(data): - if isinstance(data, bytes): - return data - else: - return data.encode("UTF-8") +def makebytes(data): + if type(data) == type(u''): + return data.encode("UTF-8") + return data ############################################################################# ### Python Source Parser (does Hilighting) diff --git a/src/filters/rclrar b/src/filters/rclrar index be9d3f98..8f723fa5 100755 --- a/src/filters/rclrar +++ b/src/filters/rclrar @@ -80,9 +80,15 @@ class RarExtractor: def openfile(self, params): self.currentindex = -1 try: - self.rar = RarFile(params["filename:"]) + # The previous versions passed the file name to + # RarFile. But the py3 version of this wants an str as + # input, which is wrong of course, as filenames are + # binary. Circumvented by passing the open file + f = open(params["filename:"], 'rb') + self.rar = RarFile(f) return True - except: + except Exception as err: + self.em.rclog("RarFile: %s"%err) return False def getipath(self, params): diff --git a/src/filters/rclsoff-flat.py b/src/filters/rclsoff-flat.py index 0827da1e..337a5f94 100755 --- a/src/filters/rclsoff-flat.py +++ b/src/filters/rclsoff-flat.py @@ -151,7 +151,7 @@ class OOExtractor: fn = params["filename:"] try: - f = open(fn) + f = open(fn, 'rb') data = f.read() f.close() except Exception as err: diff --git a/src/filters/rclsoff.py b/src/filters/rclsoff.py index 85ca9aaf..5730d97c 100755 --- a/src/filters/rclsoff.py +++ b/src/filters/rclsoff.py @@ -154,7 +154,7 @@ class OOExtractor: except: # To be checked. I'm under the impression that I get this when # nothing matches? - #self.em.rclog("no/bad metadata in %s" % fn) + #self.em.rclog("No/bad metadata in %s" % fn) pass docdata += b'\n\n' diff --git a/src/filters/rclxslt.py b/src/filters/rclxslt.py index 2441294e..7d9232c2 100644 --- a/src/filters/rclxslt.py +++ b/src/filters/rclxslt.py @@ -61,10 +61,10 @@ else: styledoc = etree.fromstring(sheet) transform = etree.XSLT(styledoc) doc = etree.fromstring(data) - return etree.tostring(transform(doc)) + return bytes(transform(doc)) def apply_sheet_file(sheet, fn): styledoc = etree.fromstring(sheet) transform = etree.XSLT(styledoc) doc = etree.parse(fn) - return etree.tostring(transform(doc)) + return bytes(transform(doc)) diff --git a/src/filters/rclzip b/src/filters/rclzip index 5504c985..35739625 100755 --- a/src/filters/rclzip +++ b/src/filters/rclzip @@ -124,7 +124,7 @@ class ZipExtractor: try: if rclexecm.PY3: - # Note: python3 ZipFile wants an str file name, which + # Note: py3 ZipFile wants an str file name, which # is wrong: file names are binary. But it accepts an # open file, and open() has no such restriction f = open(filename, 'rb') diff --git a/src/filters/xlsxmltocsv.py b/src/filters/xlsxmltocsv.py index a8930167..0c9a5047 100755 --- a/src/filters/xlsxmltocsv.py +++ b/src/filters/xlsxmltocsv.py @@ -17,12 +17,11 @@ # Transform XML output from xls-dump.py into csv format. # -# Note: this would be difficult to make compatible with python 3 <= 3.4 -# because of the use of % interpolation on what should be bytes. -# The python2 restriction is not a big issue at this point because -# msodumper is not compatible with python3 anyway -# % interpolation for bytes is planned for python 3.5, at which point -# porting this module will become trivial. +# Note: this would be difficult to make compatible with python 3 <= +# 3.4 because of the use of % interpolation on what should be bytes. +# # % terpolation for bytes is available as of python 3.5, which is +# the minimum version supported. + from __future__ import print_function