diff --git a/src/filters/rclinfo b/src/filters/rclinfo index 203e7e57..8047c842 100755 --- a/src/filters/rclinfo +++ b/src/filters/rclinfo @@ -32,13 +32,17 @@ class InfoExtractor: nodename, docdata = self.contents[index] nodename = self.em.htmlescape(nodename) docdata = self.em.htmlescape(docdata) + print("type(docdata) = %s type(nodename) = %s"% \ + (type(docdata), type(nodename)), file=sys.stderr) # strange whitespace to avoid changing the module tests (same as old) - docdata = b'\n\n
\n\n ' + \ - docdata + b'\n\n\n' + docdata + \ + b'\n \n\n' iseof = rclexecm.RclExecM.noteof if self.currentindex >= len(self.contents) -1: diff --git a/src/filters/rclkar b/src/filters/rclkar index 00432b15..940f13d0 100755 --- a/src/filters/rclkar +++ b/src/filters/rclkar @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # Read a .kar midi karaoke file and translate to recoll indexable format # This does not work with Python3 yet because python:midi doesn't diff --git a/src/filters/rclmpdf.py b/src/filters/rclmpdf.py index 0b5ba836..4d7e9634 100755 --- a/src/filters/rclmpdf.py +++ b/src/filters/rclmpdf.py @@ -120,24 +120,24 @@ class PDFExtractor: inheader = False inbody = False didcs = False - output = '' - cont = '' - for line in input.split('\n'): + output = b'' + cont = b'' + for line in input.split(b'\n'): line = cont + line - cont = '' - if re.search('', line): + cont = b'' + if re.search(b'', line): inheader = False - if re.search('', line): + if re.search(b'', line): inbody = False if inheader: if not didcs: - output += '\n' + output += b'\n' didcs = True - m = re.search(r'(.*
', line):
+ if re.search(b'', line):
inbody = True
- output += line + '\n'
+ output += line + b'\n'
return output
diff --git a/src/filters/rcltar b/src/filters/rcltar
index f597bb79..74aaecbd 100755
--- a/src/filters/rcltar
+++ b/src/filters/rcltar
@@ -23,15 +23,15 @@ class TarExtractor:
self.namen = []
def extractone(self, ipath):
- docdata = ""
+ docdata = b''
try:
info = self.tar.getmember(ipath)
if info.size > self.em.maxmembersize:
# skip
- docdata = ""
+ docdata = b''
self.em.rclog("extractone: entry %s size %d too big" %
(ipath, info.size))
- docdata = "" # raise TarError("Member too big")
+ docdata = b'' # raise TarError("Member too big")
else:
docdata = self.tar.extractfile(ipath).read()
ok = True
@@ -45,7 +45,7 @@ class TarExtractor:
def openfile(self, params):
self.currentindex = -1
try:
- self.tar = tarfile.open(name=params["filename:"],mode='r')
+ self.tar = tarfile.open(name=params["filename:"], mode='r')
#self.namen = [ y.name for y in filter(lambda z:z.isfile(),self.tar.getmembers())]
self.namen = [ y.name for y in [z for z in self.tar.getmembers() if z.isfile()]]
diff --git a/src/filters/rcltext.py b/src/filters/rcltext.py
index 847a80b2..f449dfe6 100755
--- a/src/filters/rcltext.py
+++ b/src/filters/rcltext.py
@@ -21,7 +21,7 @@ class TxtDump:
fn = params["filename:"]
# No charset, so recoll will have to use its config to guess it
- txt = ''
+ txt = b''
try:
f = open(fn, "rb")
txt += self.em.htmlescape(f.read())
@@ -29,7 +29,7 @@ class TxtDump:
self.em.rclog("TxtDump: %s : %s" % (fn, err))
return (False, "", "", rclexecm.RclExecM.eofnow)
- txt += ''
+ txt += b'
'
return (True, txt, "", rclexecm.RclExecM.eofnext)
###### File type handler api, used by rclexecm ---------->
diff --git a/src/filters/xlsxmltocsv.py b/src/filters/xlsxmltocsv.py
index cfc39304..7fa12e58 100755
--- a/src/filters/xlsxmltocsv.py
+++ b/src/filters/xlsxmltocsv.py
@@ -1,14 +1,33 @@
#!/usr/bin/env python2
+# Copyright (C) 2015 J.F.Dockes
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# Transform XML output from xls-dump.py into csv format.
-# Note: msodumper is not compatible with python3.
+#
+# Note: this would be difficult to make compatible with python 3 <= 3.4
+# because of the use of % interpolation on what should be bytes.
+# The python2 restriction is not a big issue at this point because
+# msodumper is not compatible with python3 anyway
+# % interpolation for bytes is planned for python 3.5, at which point
+# porting this module will become trivial.
from __future__ import print_function
import sys
import xml.sax
-sys.path.append(sys.path[0]+"/msodump.zip")
-from msodumper.globals import error
dtt = True
@@ -62,7 +81,7 @@ if __name__ == '__main__':
xml.sax.parse(sys.stdin, handler)
print(handler.output)
except BaseException as err:
- error("xml-parse: %s\n" % (str(sys.exc_info()[:2]),))
+ print("xml-parse: %s\n" % (str(sys.exc_info()[:2]),), file=sys.stderr)
sys.exit(1)
sys.exit(0)