All format handlers compatible with python3 except chm

This commit is contained in:
Jean-Francois Dockes 2018-03-09 15:25:11 +01:00
parent 7f49de5d97
commit 93ac830079
13 changed files with 45 additions and 58 deletions

View File

@ -613,6 +613,7 @@ filters/rcllatinclass.py \
filters/rcllatinstops.zip \
filters/rcllyx \
filters/rclman \
filters/rclmidi.py \
filters/rclpdf.py \
filters/rclokulnote \
filters/rclopxml.py \

View File

@ -18,7 +18,7 @@ except:
sys.exit(1);
re_pairnum = re.compile(r'\(([0-9]+),\s*([0-9]+)\)')
re_pairnum = re.compile(b'''\(([0-9]+),\s*([0-9]+)\)''')
# The 'Easy' mutagen tags conversions are incomplete. We do it ourselves.
# TPA,TPOS,disc DISCNUMBER/TOTALDISCS
@ -186,7 +186,7 @@ class AudioTagExtractor:
def _embeddedImageFormat(self, mutf):
#self.em.rclog("_embeddedImage: MIME: %s"%mutf.mime)
if 'audio/mp3' in mutf.mime:
for tagname in mutf.iterkeys():
for tagname in mutf.keys():
if tagname.startswith('APIC:'):
#self.em.rclog("mp3 img: %s" % mutf[tagname].mime)
return 'jpg' if mutf[tagname].mime == 'image/jpeg' else 'png'
@ -194,7 +194,7 @@ class AudioTagExtractor:
if mutf.pictures:
return 'jpg' if mutf.pictures[0].mime == 'image/jpeg' else 'png'
elif 'audio/mp4' in mutf.mime:
if 'covr' in mutf.iterkeys():
if 'covr' in mutf.keys():
format = mutf['covr'][0].imageformat
if format == mutagen.mp4.AtomDataType.JPEG:
return 'jpg'
@ -273,7 +273,7 @@ class AudioTagExtractor:
#self.em.rclog("using default bits_per_sample")
minf['bits_per_sample'] = 16
for tag,val in minf.iteritems():
for tag,val in minf.items():
minf[tag] = str(val)
#self.em.rclog("minf after audio %s\n" % minf)
@ -281,7 +281,7 @@ class AudioTagExtractor:
####################
# Metadata tags. The names vary depending on the file type. We
# just have a big translation dictionary for all
for tag,val in mutf.iteritems():
for tag,val in mutf.items():
#self.em.rclog("Original tag: <%s>, val <%s>" % (tag, val))
if tag.upper() in tagdict:
tag = tag.upper()
@ -297,7 +297,7 @@ class AudioTagExtractor:
except:
val0 = val
if val0:
if isinstance(val0, unicode):
if type(val0) == type(u""):
val0 = val0.encode('utf-8', errors='replace')
else:
val0 = str(val0)
@ -320,7 +320,7 @@ class AudioTagExtractor:
if mo:
l = (mo.group(1), mo.group(2))
else:
l = l.split('/')
l = l.split(b'/')
else:
self.em.rclog("l is tuple: %s" %l)
if len(l) == 2:
@ -345,7 +345,7 @@ class AudioTagExtractor:
self.em.setmimetype("text/plain")
self.em.setfield("charset", 'utf-8')
for tag,val in minf.iteritems():
for tag,val in minf.items():
#self.em.rclog("%s -> %s" % (tag, val))
self.em.setfield(tag, val)
# Compat with old version

View File

@ -4,8 +4,8 @@ Needs at least python 2.2 for HTMLParser (chmlib needs 2.2 too)"""
from __future__ import print_function
# Note: this is not converted to python3, libchm does not have a
# python3 wrapper at this point (2015-11)
# Note: this is not converted to Py3, libchm does not have a
# Py3 wrapper at this point (2018-03)
rclchm_html_mtype = "text/html"

View File

@ -69,6 +69,7 @@ class DiaExtractor:
docdata = self.ExtractDiaText()
ok = True
except Exception as err:
self.em.rclog("Dia parse failed: %s"%err)
ok = False
iseof = rclexecm.RclExecM.eofnext
self.em.setmimetype("text/plain")
@ -77,7 +78,7 @@ class DiaExtractor:
###### File type handler api, used by rclexecm ---------->
def openfile(self, params):
try:
self.dia = GzipFile(params["filename:"], 'r')
self.dia = GzipFile(params["filename:"], 'rb')
# Dia files are sometimes not compressed. Quite weirdly,
# GzipFile does not complain until we try to read. Have to do it
# here to be able to retry an uncompressed open.
@ -87,7 +88,7 @@ class DiaExtractor:
except:
# File not compressed ?
try:
self.dia = open(params["filename:"], 'r')
self.dia = open(params["filename:"], 'rb')
except:
return False
return True

View File

@ -31,18 +31,10 @@ import rclconfig
PY3 = sys.version > '3'
if PY3:
def makebytes(data):
if isinstance(data, bytes):
return data
else:
return data.encode("UTF-8")
else:
def makebytes(data):
if isinstance(data, unicode):
return data.encode("UTF-8")
else:
return data
def makebytes(data):
if type(data) == type(u''):
return data.encode("UTF-8")
return data
my_config = rclconfig.RclConfig()
@ -189,7 +181,7 @@ class RclExecM:
if len(self.mimetype):
self.senditem("Mimetype", self.mimetype)
for nm,value in self.fields.iteritems():
for nm,value in self.fields.items():
#self.rclog("Senditem: [%s] -> [%s]" % (nm, value))
self.senditem("%s:"%nm, value)
self.fields = {}
@ -412,7 +404,7 @@ def main(proto, extract):
ok, data, ipath, eof = extract.getipath(params)
if ok:
debprint(ioout, "== Found entry for ipath %s (mimetype [%s]):" % \
(ipath, proto.mimetype))
(ipath, proto.mimetype.decode('cp1252')))
bdata = makebytes(data)
if debugDumpData or actAsSingle:
proto.breakwrite(ioout, bdata)
@ -429,7 +421,7 @@ def main(proto, extract):
ecnt = ecnt + 1
bdata = makebytes(data)
debprint(ioout, "== Entry %d dlen %d ipath %s (mimetype [%s]):" % \
(ecnt, len(data), ipath, proto.mimetype))
(ecnt, len(data), ipath, proto.mimetype.decode('cp1252')))
if debugDumpData:
proto.breakwrite(ioout, bdata)
ioout.write(b'\n')

View File

@ -854,12 +854,8 @@ class EventStreamReader(object):
def parse(self, instream, outstream):
self.midistream = outstream
self.instream = instream
if PY3:
if type(instream) in (str, bytes):
self.instream = open(instream, 'rb')
else:
if type(instream) in (str, unicode):
self.instream = open(instream, 'rb')
if type(instream) in (type(b''), type(u'')):
self.instream = open(instream, 'rb')
self.parse_file_header()
for track in range(self.midistream.trackcount):
trksz = self.parse_track_header()

View File

@ -39,18 +39,10 @@ else:
import io
import keyword, token, tokenize
if PY2:
def makebytes(data):
if isinstance(data, unicode):
return data.encode("UTF-8")
else:
return data
else:
def makebytes(data):
if isinstance(data, bytes):
return data
else:
return data.encode("UTF-8")
def makebytes(data):
if type(data) == type(u''):
return data.encode("UTF-8")
return data
#############################################################################
### Python Source Parser (does Hilighting)

View File

@ -80,9 +80,15 @@ class RarExtractor:
def openfile(self, params):
self.currentindex = -1
try:
self.rar = RarFile(params["filename:"])
# The previous versions passed the file name to
# RarFile. But the py3 version of this wants an str as
# input, which is wrong of course, as filenames are
# binary. Circumvented by passing the open file
f = open(params["filename:"], 'rb')
self.rar = RarFile(f)
return True
except:
except Exception as err:
self.em.rclog("RarFile: %s"%err)
return False
def getipath(self, params):

View File

@ -151,7 +151,7 @@ class OOExtractor:
fn = params["filename:"]
try:
f = open(fn)
f = open(fn, 'rb')
data = f.read()
f.close()
except Exception as err:

View File

@ -154,7 +154,7 @@ class OOExtractor:
except:
# To be checked. I'm under the impression that I get this when
# nothing matches?
#self.em.rclog("no/bad metadata in %s" % fn)
#self.em.rclog("No/bad metadata in %s" % fn)
pass
docdata += b'</head>\n<body>\n'

View File

@ -61,10 +61,10 @@ else:
styledoc = etree.fromstring(sheet)
transform = etree.XSLT(styledoc)
doc = etree.fromstring(data)
return etree.tostring(transform(doc))
return bytes(transform(doc))
def apply_sheet_file(sheet, fn):
styledoc = etree.fromstring(sheet)
transform = etree.XSLT(styledoc)
doc = etree.parse(fn)
return etree.tostring(transform(doc))
return bytes(transform(doc))

View File

@ -124,7 +124,7 @@ class ZipExtractor:
try:
if rclexecm.PY3:
# Note: python3 ZipFile wants an str file name, which
# Note: py3 ZipFile wants an str file name, which
# is wrong: file names are binary. But it accepts an
# open file, and open() has no such restriction
f = open(filename, 'rb')

View File

@ -17,12 +17,11 @@
# Transform XML output from xls-dump.py into csv format.
#
# Note: this would be difficult to make compatible with python 3 <= 3.4
# because of the use of % interpolation on what should be bytes.
# The python2 restriction is not a big issue at this point because
# msodumper is not compatible with python3 anyway
# % interpolation for bytes is planned for python 3.5, at which point
# porting this module will become trivial.
# Note: this would be difficult to make compatible with python 3 <=
# 3.4 because of the use of % interpolation on what should be bytes.
# # % terpolation for bytes is available as of python 3.5, which is
# the minimum version supported.
from __future__ import print_function