All format handlers compatible with python3 except chm
This commit is contained in:
parent
7f49de5d97
commit
93ac830079
@ -613,6 +613,7 @@ filters/rcllatinclass.py \
|
|||||||
filters/rcllatinstops.zip \
|
filters/rcllatinstops.zip \
|
||||||
filters/rcllyx \
|
filters/rcllyx \
|
||||||
filters/rclman \
|
filters/rclman \
|
||||||
|
filters/rclmidi.py \
|
||||||
filters/rclpdf.py \
|
filters/rclpdf.py \
|
||||||
filters/rclokulnote \
|
filters/rclokulnote \
|
||||||
filters/rclopxml.py \
|
filters/rclopxml.py \
|
||||||
|
|||||||
@ -18,7 +18,7 @@ except:
|
|||||||
sys.exit(1);
|
sys.exit(1);
|
||||||
|
|
||||||
|
|
||||||
re_pairnum = re.compile(r'\(([0-9]+),\s*([0-9]+)\)')
|
re_pairnum = re.compile(b'''\(([0-9]+),\s*([0-9]+)\)''')
|
||||||
|
|
||||||
# The 'Easy' mutagen tags conversions are incomplete. We do it ourselves.
|
# The 'Easy' mutagen tags conversions are incomplete. We do it ourselves.
|
||||||
# TPA,TPOS,disc DISCNUMBER/TOTALDISCS
|
# TPA,TPOS,disc DISCNUMBER/TOTALDISCS
|
||||||
@ -186,7 +186,7 @@ class AudioTagExtractor:
|
|||||||
def _embeddedImageFormat(self, mutf):
|
def _embeddedImageFormat(self, mutf):
|
||||||
#self.em.rclog("_embeddedImage: MIME: %s"%mutf.mime)
|
#self.em.rclog("_embeddedImage: MIME: %s"%mutf.mime)
|
||||||
if 'audio/mp3' in mutf.mime:
|
if 'audio/mp3' in mutf.mime:
|
||||||
for tagname in mutf.iterkeys():
|
for tagname in mutf.keys():
|
||||||
if tagname.startswith('APIC:'):
|
if tagname.startswith('APIC:'):
|
||||||
#self.em.rclog("mp3 img: %s" % mutf[tagname].mime)
|
#self.em.rclog("mp3 img: %s" % mutf[tagname].mime)
|
||||||
return 'jpg' if mutf[tagname].mime == 'image/jpeg' else 'png'
|
return 'jpg' if mutf[tagname].mime == 'image/jpeg' else 'png'
|
||||||
@ -194,7 +194,7 @@ class AudioTagExtractor:
|
|||||||
if mutf.pictures:
|
if mutf.pictures:
|
||||||
return 'jpg' if mutf.pictures[0].mime == 'image/jpeg' else 'png'
|
return 'jpg' if mutf.pictures[0].mime == 'image/jpeg' else 'png'
|
||||||
elif 'audio/mp4' in mutf.mime:
|
elif 'audio/mp4' in mutf.mime:
|
||||||
if 'covr' in mutf.iterkeys():
|
if 'covr' in mutf.keys():
|
||||||
format = mutf['covr'][0].imageformat
|
format = mutf['covr'][0].imageformat
|
||||||
if format == mutagen.mp4.AtomDataType.JPEG:
|
if format == mutagen.mp4.AtomDataType.JPEG:
|
||||||
return 'jpg'
|
return 'jpg'
|
||||||
@ -273,7 +273,7 @@ class AudioTagExtractor:
|
|||||||
#self.em.rclog("using default bits_per_sample")
|
#self.em.rclog("using default bits_per_sample")
|
||||||
minf['bits_per_sample'] = 16
|
minf['bits_per_sample'] = 16
|
||||||
|
|
||||||
for tag,val in minf.iteritems():
|
for tag,val in minf.items():
|
||||||
minf[tag] = str(val)
|
minf[tag] = str(val)
|
||||||
|
|
||||||
#self.em.rclog("minf after audio %s\n" % minf)
|
#self.em.rclog("minf after audio %s\n" % minf)
|
||||||
@ -281,7 +281,7 @@ class AudioTagExtractor:
|
|||||||
####################
|
####################
|
||||||
# Metadata tags. The names vary depending on the file type. We
|
# Metadata tags. The names vary depending on the file type. We
|
||||||
# just have a big translation dictionary for all
|
# just have a big translation dictionary for all
|
||||||
for tag,val in mutf.iteritems():
|
for tag,val in mutf.items():
|
||||||
#self.em.rclog("Original tag: <%s>, val <%s>" % (tag, val))
|
#self.em.rclog("Original tag: <%s>, val <%s>" % (tag, val))
|
||||||
if tag.upper() in tagdict:
|
if tag.upper() in tagdict:
|
||||||
tag = tag.upper()
|
tag = tag.upper()
|
||||||
@ -297,7 +297,7 @@ class AudioTagExtractor:
|
|||||||
except:
|
except:
|
||||||
val0 = val
|
val0 = val
|
||||||
if val0:
|
if val0:
|
||||||
if isinstance(val0, unicode):
|
if type(val0) == type(u""):
|
||||||
val0 = val0.encode('utf-8', errors='replace')
|
val0 = val0.encode('utf-8', errors='replace')
|
||||||
else:
|
else:
|
||||||
val0 = str(val0)
|
val0 = str(val0)
|
||||||
@ -320,7 +320,7 @@ class AudioTagExtractor:
|
|||||||
if mo:
|
if mo:
|
||||||
l = (mo.group(1), mo.group(2))
|
l = (mo.group(1), mo.group(2))
|
||||||
else:
|
else:
|
||||||
l = l.split('/')
|
l = l.split(b'/')
|
||||||
else:
|
else:
|
||||||
self.em.rclog("l is tuple: %s" %l)
|
self.em.rclog("l is tuple: %s" %l)
|
||||||
if len(l) == 2:
|
if len(l) == 2:
|
||||||
@ -345,7 +345,7 @@ class AudioTagExtractor:
|
|||||||
self.em.setmimetype("text/plain")
|
self.em.setmimetype("text/plain")
|
||||||
self.em.setfield("charset", 'utf-8')
|
self.em.setfield("charset", 'utf-8')
|
||||||
|
|
||||||
for tag,val in minf.iteritems():
|
for tag,val in minf.items():
|
||||||
#self.em.rclog("%s -> %s" % (tag, val))
|
#self.em.rclog("%s -> %s" % (tag, val))
|
||||||
self.em.setfield(tag, val)
|
self.em.setfield(tag, val)
|
||||||
# Compat with old version
|
# Compat with old version
|
||||||
|
|||||||
@ -4,8 +4,8 @@ Needs at least python 2.2 for HTMLParser (chmlib needs 2.2 too)"""
|
|||||||
|
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
# Note: this is not converted to python3, libchm does not have a
|
# Note: this is not converted to Py3, libchm does not have a
|
||||||
# python3 wrapper at this point (2015-11)
|
# Py3 wrapper at this point (2018-03)
|
||||||
|
|
||||||
rclchm_html_mtype = "text/html"
|
rclchm_html_mtype = "text/html"
|
||||||
|
|
||||||
|
|||||||
@ -69,6 +69,7 @@ class DiaExtractor:
|
|||||||
docdata = self.ExtractDiaText()
|
docdata = self.ExtractDiaText()
|
||||||
ok = True
|
ok = True
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
|
self.em.rclog("Dia parse failed: %s"%err)
|
||||||
ok = False
|
ok = False
|
||||||
iseof = rclexecm.RclExecM.eofnext
|
iseof = rclexecm.RclExecM.eofnext
|
||||||
self.em.setmimetype("text/plain")
|
self.em.setmimetype("text/plain")
|
||||||
@ -77,7 +78,7 @@ class DiaExtractor:
|
|||||||
###### File type handler api, used by rclexecm ---------->
|
###### File type handler api, used by rclexecm ---------->
|
||||||
def openfile(self, params):
|
def openfile(self, params):
|
||||||
try:
|
try:
|
||||||
self.dia = GzipFile(params["filename:"], 'r')
|
self.dia = GzipFile(params["filename:"], 'rb')
|
||||||
# Dia files are sometimes not compressed. Quite weirdly,
|
# Dia files are sometimes not compressed. Quite weirdly,
|
||||||
# GzipFile does not complain until we try to read. Have to do it
|
# GzipFile does not complain until we try to read. Have to do it
|
||||||
# here to be able to retry an uncompressed open.
|
# here to be able to retry an uncompressed open.
|
||||||
@ -87,7 +88,7 @@ class DiaExtractor:
|
|||||||
except:
|
except:
|
||||||
# File not compressed ?
|
# File not compressed ?
|
||||||
try:
|
try:
|
||||||
self.dia = open(params["filename:"], 'r')
|
self.dia = open(params["filename:"], 'rb')
|
||||||
except:
|
except:
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|||||||
@ -31,18 +31,10 @@ import rclconfig
|
|||||||
|
|
||||||
PY3 = sys.version > '3'
|
PY3 = sys.version > '3'
|
||||||
|
|
||||||
if PY3:
|
def makebytes(data):
|
||||||
def makebytes(data):
|
if type(data) == type(u''):
|
||||||
if isinstance(data, bytes):
|
return data.encode("UTF-8")
|
||||||
return data
|
return data
|
||||||
else:
|
|
||||||
return data.encode("UTF-8")
|
|
||||||
else:
|
|
||||||
def makebytes(data):
|
|
||||||
if isinstance(data, unicode):
|
|
||||||
return data.encode("UTF-8")
|
|
||||||
else:
|
|
||||||
return data
|
|
||||||
|
|
||||||
my_config = rclconfig.RclConfig()
|
my_config = rclconfig.RclConfig()
|
||||||
|
|
||||||
@ -189,7 +181,7 @@ class RclExecM:
|
|||||||
if len(self.mimetype):
|
if len(self.mimetype):
|
||||||
self.senditem("Mimetype", self.mimetype)
|
self.senditem("Mimetype", self.mimetype)
|
||||||
|
|
||||||
for nm,value in self.fields.iteritems():
|
for nm,value in self.fields.items():
|
||||||
#self.rclog("Senditem: [%s] -> [%s]" % (nm, value))
|
#self.rclog("Senditem: [%s] -> [%s]" % (nm, value))
|
||||||
self.senditem("%s:"%nm, value)
|
self.senditem("%s:"%nm, value)
|
||||||
self.fields = {}
|
self.fields = {}
|
||||||
@ -412,7 +404,7 @@ def main(proto, extract):
|
|||||||
ok, data, ipath, eof = extract.getipath(params)
|
ok, data, ipath, eof = extract.getipath(params)
|
||||||
if ok:
|
if ok:
|
||||||
debprint(ioout, "== Found entry for ipath %s (mimetype [%s]):" % \
|
debprint(ioout, "== Found entry for ipath %s (mimetype [%s]):" % \
|
||||||
(ipath, proto.mimetype))
|
(ipath, proto.mimetype.decode('cp1252')))
|
||||||
bdata = makebytes(data)
|
bdata = makebytes(data)
|
||||||
if debugDumpData or actAsSingle:
|
if debugDumpData or actAsSingle:
|
||||||
proto.breakwrite(ioout, bdata)
|
proto.breakwrite(ioout, bdata)
|
||||||
@ -429,7 +421,7 @@ def main(proto, extract):
|
|||||||
ecnt = ecnt + 1
|
ecnt = ecnt + 1
|
||||||
bdata = makebytes(data)
|
bdata = makebytes(data)
|
||||||
debprint(ioout, "== Entry %d dlen %d ipath %s (mimetype [%s]):" % \
|
debprint(ioout, "== Entry %d dlen %d ipath %s (mimetype [%s]):" % \
|
||||||
(ecnt, len(data), ipath, proto.mimetype))
|
(ecnt, len(data), ipath, proto.mimetype.decode('cp1252')))
|
||||||
if debugDumpData:
|
if debugDumpData:
|
||||||
proto.breakwrite(ioout, bdata)
|
proto.breakwrite(ioout, bdata)
|
||||||
ioout.write(b'\n')
|
ioout.write(b'\n')
|
||||||
|
|||||||
@ -854,12 +854,8 @@ class EventStreamReader(object):
|
|||||||
def parse(self, instream, outstream):
|
def parse(self, instream, outstream):
|
||||||
self.midistream = outstream
|
self.midistream = outstream
|
||||||
self.instream = instream
|
self.instream = instream
|
||||||
if PY3:
|
if type(instream) in (type(b''), type(u'')):
|
||||||
if type(instream) in (str, bytes):
|
self.instream = open(instream, 'rb')
|
||||||
self.instream = open(instream, 'rb')
|
|
||||||
else:
|
|
||||||
if type(instream) in (str, unicode):
|
|
||||||
self.instream = open(instream, 'rb')
|
|
||||||
self.parse_file_header()
|
self.parse_file_header()
|
||||||
for track in range(self.midistream.trackcount):
|
for track in range(self.midistream.trackcount):
|
||||||
trksz = self.parse_track_header()
|
trksz = self.parse_track_header()
|
||||||
|
|||||||
@ -39,18 +39,10 @@ else:
|
|||||||
import io
|
import io
|
||||||
import keyword, token, tokenize
|
import keyword, token, tokenize
|
||||||
|
|
||||||
if PY2:
|
def makebytes(data):
|
||||||
def makebytes(data):
|
if type(data) == type(u''):
|
||||||
if isinstance(data, unicode):
|
return data.encode("UTF-8")
|
||||||
return data.encode("UTF-8")
|
return data
|
||||||
else:
|
|
||||||
return data
|
|
||||||
else:
|
|
||||||
def makebytes(data):
|
|
||||||
if isinstance(data, bytes):
|
|
||||||
return data
|
|
||||||
else:
|
|
||||||
return data.encode("UTF-8")
|
|
||||||
|
|
||||||
#############################################################################
|
#############################################################################
|
||||||
### Python Source Parser (does Hilighting)
|
### Python Source Parser (does Hilighting)
|
||||||
|
|||||||
@ -80,9 +80,15 @@ class RarExtractor:
|
|||||||
def openfile(self, params):
|
def openfile(self, params):
|
||||||
self.currentindex = -1
|
self.currentindex = -1
|
||||||
try:
|
try:
|
||||||
self.rar = RarFile(params["filename:"])
|
# The previous versions passed the file name to
|
||||||
|
# RarFile. But the py3 version of this wants an str as
|
||||||
|
# input, which is wrong of course, as filenames are
|
||||||
|
# binary. Circumvented by passing the open file
|
||||||
|
f = open(params["filename:"], 'rb')
|
||||||
|
self.rar = RarFile(f)
|
||||||
return True
|
return True
|
||||||
except:
|
except Exception as err:
|
||||||
|
self.em.rclog("RarFile: %s"%err)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def getipath(self, params):
|
def getipath(self, params):
|
||||||
|
|||||||
@ -151,7 +151,7 @@ class OOExtractor:
|
|||||||
fn = params["filename:"]
|
fn = params["filename:"]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
f = open(fn)
|
f = open(fn, 'rb')
|
||||||
data = f.read()
|
data = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
|
|||||||
@ -154,7 +154,7 @@ class OOExtractor:
|
|||||||
except:
|
except:
|
||||||
# To be checked. I'm under the impression that I get this when
|
# To be checked. I'm under the impression that I get this when
|
||||||
# nothing matches?
|
# nothing matches?
|
||||||
#self.em.rclog("no/bad metadata in %s" % fn)
|
#self.em.rclog("No/bad metadata in %s" % fn)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
docdata += b'</head>\n<body>\n'
|
docdata += b'</head>\n<body>\n'
|
||||||
|
|||||||
@ -61,10 +61,10 @@ else:
|
|||||||
styledoc = etree.fromstring(sheet)
|
styledoc = etree.fromstring(sheet)
|
||||||
transform = etree.XSLT(styledoc)
|
transform = etree.XSLT(styledoc)
|
||||||
doc = etree.fromstring(data)
|
doc = etree.fromstring(data)
|
||||||
return etree.tostring(transform(doc))
|
return bytes(transform(doc))
|
||||||
def apply_sheet_file(sheet, fn):
|
def apply_sheet_file(sheet, fn):
|
||||||
styledoc = etree.fromstring(sheet)
|
styledoc = etree.fromstring(sheet)
|
||||||
transform = etree.XSLT(styledoc)
|
transform = etree.XSLT(styledoc)
|
||||||
doc = etree.parse(fn)
|
doc = etree.parse(fn)
|
||||||
return etree.tostring(transform(doc))
|
return bytes(transform(doc))
|
||||||
|
|
||||||
|
|||||||
@ -124,7 +124,7 @@ class ZipExtractor:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
if rclexecm.PY3:
|
if rclexecm.PY3:
|
||||||
# Note: python3 ZipFile wants an str file name, which
|
# Note: py3 ZipFile wants an str file name, which
|
||||||
# is wrong: file names are binary. But it accepts an
|
# is wrong: file names are binary. But it accepts an
|
||||||
# open file, and open() has no such restriction
|
# open file, and open() has no such restriction
|
||||||
f = open(filename, 'rb')
|
f = open(filename, 'rb')
|
||||||
|
|||||||
@ -17,12 +17,11 @@
|
|||||||
|
|
||||||
# Transform XML output from xls-dump.py into csv format.
|
# Transform XML output from xls-dump.py into csv format.
|
||||||
#
|
#
|
||||||
# Note: this would be difficult to make compatible with python 3 <= 3.4
|
# Note: this would be difficult to make compatible with python 3 <=
|
||||||
# because of the use of % interpolation on what should be bytes.
|
# 3.4 because of the use of % interpolation on what should be bytes.
|
||||||
# The python2 restriction is not a big issue at this point because
|
# # % terpolation for bytes is available as of python 3.5, which is
|
||||||
# msodumper is not compatible with python3 anyway
|
# the minimum version supported.
|
||||||
# % interpolation for bytes is planned for python 3.5, at which point
|
|
||||||
# porting this module will become trivial.
|
|
||||||
|
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user