first pass at converting the filters for python 2/3 compat
This commit is contained in:
parent
cc68331f3d
commit
f344e8fedd
@ -52,7 +52,7 @@ class PPTDumper(object):
|
||||
|
||||
try:
|
||||
dirstrm = strm.getDirectoryStreamByName(dirname)
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
error("getDirectoryStreamByName(%s): %s - %s\n" % (dirname,str(err),self.filepath))
|
||||
# The previous version was killed by the exception
|
||||
# here, so the equivalent is to break, but maybe there
|
||||
|
||||
@ -15,7 +15,7 @@ try:
|
||||
import pylzma
|
||||
from py7zlib import Archive7z
|
||||
except:
|
||||
print "RECFILTERROR HELPERNOTFOUND python:pylzma"
|
||||
print("RECFILTERROR HELPERNOTFOUND python:pylzma")
|
||||
sys.exit(1);
|
||||
|
||||
try:
|
||||
@ -40,19 +40,17 @@ class SevenZipExtractor:
|
||||
|
||||
def extractone(self, ipath):
|
||||
#self.em.rclog("extractone: [%s]" % ipath)
|
||||
docdata = ""
|
||||
docdata = b''
|
||||
try:
|
||||
docdata = self.sevenzip.getmember(ipath).read()
|
||||
ok = True
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
self.em.rclog("extractone: failed: [%s]" % err)
|
||||
ok = False
|
||||
iseof = rclexecm.RclExecM.noteof
|
||||
if self.currentindex >= len(self.sevenzip.getnames()) -1:
|
||||
iseof = rclexecm.RclExecM.eofnext
|
||||
if isinstance(ipath, unicode):
|
||||
ipath = ipath.encode("utf-8")
|
||||
return (ok, docdata, ipath, iseof)
|
||||
return (ok, docdata, rclexecm.makebytes(ipath), iseof)
|
||||
|
||||
###### File type handler api, used by rclexecm ---------->
|
||||
def openfile(self, params):
|
||||
@ -71,7 +69,7 @@ class SevenZipExtractor:
|
||||
fp = open(filename, 'rb')
|
||||
self.sevenzip = Archive7z(fp)
|
||||
return True
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
self.em.rclog("openfile: failed: [%s]" % err)
|
||||
return False
|
||||
|
||||
@ -84,7 +82,7 @@ class SevenZipExtractor:
|
||||
try:
|
||||
ipath = ipath.decode("utf-8")
|
||||
return self.extractone(ipath)
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
return (ok, data, ipath, eof)
|
||||
|
||||
def getnext(self, params):
|
||||
|
||||
@ -12,7 +12,7 @@ try:
|
||||
from mutagen.flac import FLAC
|
||||
from mutagen.oggvorbis import OggVorbis
|
||||
except:
|
||||
print "RECFILTERROR HELPERNOTFOUND python:mutagen"
|
||||
print("RECFILTERROR HELPERNOTFOUND python:mutagen")
|
||||
sys.exit(1);
|
||||
|
||||
# prototype for the html document we're returning
|
||||
@ -42,23 +42,24 @@ class AudioTagExtractor:
|
||||
#self.em.rclog("extractone %s %s" % (params["filename:"], params["mimetype:"]))
|
||||
docdata = ""
|
||||
ok = False
|
||||
if not params.has_key("mimetype:") or not params.has_key("filename:"):
|
||||
if not "mimetype:" in params or not "filename:" in params:
|
||||
self.em.rclog("extractone: no mime or file name")
|
||||
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
|
||||
filename = params["filename:"]
|
||||
mimetype = params["mimetype:"]
|
||||
try:
|
||||
if mimetype == "audio/mpeg":
|
||||
if mimetype == b'audio/mpeg':
|
||||
tags = MP3(filename, ID3=EasyID3)
|
||||
elif mimetype == "application/ogg":
|
||||
elif mimetype == b'application/ogg' or \
|
||||
mimetype == b'audio/x-vorbis+ogg':
|
||||
tags = OggVorbis(filename)
|
||||
elif mimetype == "application/x-flac" or \
|
||||
mimetype == "audio/x-flac" or \
|
||||
mimetype == "audio/flac":
|
||||
elif mimetype == b'application/x-flac' or \
|
||||
mimetype == 'audio/x-flac' or \
|
||||
mimetype == b'audio/flac':
|
||||
tags = FLAC(filename)
|
||||
else:
|
||||
raise Exception, "Bad mime type %s" % mimetype
|
||||
except Exception, err:
|
||||
raise Exception("Bad mime type %s" % mimetype)
|
||||
except Exception as err:
|
||||
self.em.rclog("extractone: extract failed: [%s]" % err)
|
||||
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
@ -66,21 +67,22 @@ class AudioTagExtractor:
|
||||
artist = ""
|
||||
title = ""
|
||||
try:
|
||||
album = self.em.htmlescape(tags["album"][0].encode("utf-8"))
|
||||
album = self.em.htmlescape(tags["album"][0])
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
artist = self.em.htmlescape(tags["artist"][0].encode("utf-8"))
|
||||
artist = self.em.htmlescape(tags["artist"][0])
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
title = self.em.htmlescape(tags["title"][0].encode("utf-8"))
|
||||
title = self.em.htmlescape(tags["title"][0])
|
||||
except:
|
||||
pass
|
||||
self.em.setmimetype("text/html")
|
||||
alldata = self.em.htmlescape(tags.pprint().encode("utf-8"))
|
||||
alldata = self.em.htmlescape(tags.pprint())
|
||||
alldata = alldata.replace("\n", "<br>")
|
||||
docdata = htmltemplate % (album, artist, title, alldata)
|
||||
docdata = (htmltemplate % (album, artist, title, alldata))\
|
||||
.encode('UTF-8')
|
||||
ok = True
|
||||
return (ok, docdata, "", rclexecm.RclExecM.eofnext)
|
||||
|
||||
|
||||
@ -2,6 +2,11 @@
|
||||
"""Extract Html files from a Microsoft Compiled Html Help file (.chm)
|
||||
Needs at least python 2.2 for HTMLParser (chmlib needs 2.2 too)"""
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
# Note: this is not converted to python3, libchm does not have a
|
||||
# python3 wrapper at this point (2015-11)
|
||||
|
||||
# Do we return individual chapters as html pages or concatenate everything?
|
||||
rclchm_catenate = 0
|
||||
# Use special html type to allow for mimeconf/mimeview Open magic,
|
||||
@ -23,13 +28,13 @@ import rclexecm
|
||||
try:
|
||||
from chm import chm,chmlib
|
||||
except:
|
||||
print "RECFILTERROR HELPERNOTFOUND python:chm"
|
||||
print("RECFILTERROR HELPERNOTFOUND python:chm")
|
||||
sys.exit(1);
|
||||
|
||||
try:
|
||||
from HTMLParser import HTMLParser
|
||||
except:
|
||||
print "RECFILTERROR HELPERNOTFOUND python:HTMLParser"
|
||||
print("RECFILTERROR HELPERNOTFOUND python:HTMLParser")
|
||||
sys.exit(1);
|
||||
|
||||
# Small helper routines
|
||||
@ -37,11 +42,11 @@ def getfile(chmfile, path):
|
||||
"""Extract internal file text from chm object, given path"""
|
||||
res, ui = chmfile.ResolveObject(path)
|
||||
if res != chmlib.CHM_RESOLVE_SUCCESS:
|
||||
#print "ResolveObject failed", path
|
||||
#print("ResolveObject failed: %s" % path, file=sys.stderr)
|
||||
return ""
|
||||
res, doc = chmfile.RetrieveObject(ui)
|
||||
if not res:
|
||||
print "RetrieveObject failed", path
|
||||
print("RetrieveObject failed: %s" % path, file=sys.stderr)
|
||||
return ""
|
||||
return doc
|
||||
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import print_function
|
||||
|
||||
# dia (http://live.gnome.org/Dia) file filter for recoll
|
||||
# stefan.friedel@iwr.uni-heidelberg.de 2012
|
||||
#
|
||||
@ -66,7 +68,7 @@ class DiaExtractor:
|
||||
try:
|
||||
docdata = self.ExtractDiaText()
|
||||
ok = True
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
ok = False
|
||||
iseof = rclexecm.RclExecM.eofnext
|
||||
self.em.setmimetype("text/plain")
|
||||
@ -76,7 +78,7 @@ class DiaExtractor:
|
||||
def openfile(self, params):
|
||||
try:
|
||||
self.dia = GzipFile(params["filename:"], 'r')
|
||||
# Dial files are sometimes not compressed. Quite weirdly,
|
||||
# Dia files are sometimes not compressed. Quite weirdly,
|
||||
# GzipFile does not complain until we try to read. Have to do it
|
||||
# here to be able to retry an uncompressed open.
|
||||
data = self.dia.readline()
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
|
||||
import rclexecm
|
||||
import rclexec1
|
||||
@ -11,32 +12,32 @@ import os
|
||||
class WordProcessData:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.out = ""
|
||||
self.cont = ""
|
||||
self.out = b''
|
||||
self.cont = b''
|
||||
self.gotdata = False
|
||||
# Line with continued word (ending in -)
|
||||
# we strip the - which is not nice for actually hyphenated word.
|
||||
# What to do ?
|
||||
self.patcont = re.compile('''[\w][-]$''')
|
||||
self.patcont = re.compile(b'''[\w][-]$''')
|
||||
# Pattern for breaking continuation at last word start
|
||||
self.patws = re.compile('''([\s])([\w]+)(-)$''')
|
||||
self.patws = re.compile(b'''([\s])([\w]+)(-)$''')
|
||||
|
||||
def takeLine(self, line):
|
||||
if not self.gotdata:
|
||||
if line == "":
|
||||
if line == b'':
|
||||
return
|
||||
self.out = '<html><head><title></title>' + \
|
||||
'<meta http-equiv="Content-Type"' + \
|
||||
'content="text/html;charset=UTF-8">' + \
|
||||
'</head><body><p>'
|
||||
self.out = b'<html><head><title></title>' + \
|
||||
b'<meta http-equiv="Content-Type"' + \
|
||||
b'content="text/html;charset=UTF-8">' + \
|
||||
b'</head><body><p>'
|
||||
self.gotdata = True
|
||||
|
||||
if self.cont:
|
||||
line = self.cont + line
|
||||
self.cont = ""
|
||||
|
||||
if line == "\f":
|
||||
self.out += "</p><hr><p>"
|
||||
if line == b'\f':
|
||||
self.out += '</p><hr><p>'
|
||||
return
|
||||
|
||||
if self.patcont.search(line):
|
||||
@ -47,16 +48,16 @@ class WordProcessData:
|
||||
line = line[0:match.start(1)]
|
||||
else:
|
||||
self.cont = line
|
||||
line = ""
|
||||
line = b''
|
||||
|
||||
if line:
|
||||
self.out += self.em.htmlescape(line) + "<br>"
|
||||
self.out += self.em.htmlescape(line) + b'<br>'
|
||||
else:
|
||||
self.out += "<br>"
|
||||
self.out += b'<br>'
|
||||
|
||||
def wrapData(self):
|
||||
if self.gotdata:
|
||||
self.out += "</p></body></html>"
|
||||
self.out += b'</p></body></html>'
|
||||
self.em.setmimetype("text/html")
|
||||
return self.out
|
||||
|
||||
@ -65,7 +66,7 @@ class WordProcessData:
|
||||
# output HTML
|
||||
class WordPassData:
|
||||
def __init__(self, em):
|
||||
self.out = ""
|
||||
self.out = b''
|
||||
self.em = em
|
||||
|
||||
def takeLine(self, line):
|
||||
@ -96,8 +97,8 @@ class WordFilter:
|
||||
return False
|
||||
|
||||
def mimetype(self, fn):
|
||||
rtfprolog ="{\\rtf1"
|
||||
docprolog = b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1"
|
||||
rtfprolog = b'{\\rtf1'
|
||||
docprolog = b'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1'
|
||||
try:
|
||||
f = open(fn, "rb")
|
||||
except:
|
||||
@ -132,7 +133,7 @@ class WordFilter:
|
||||
mt = self.mimetype(fn)
|
||||
self.em.rclog("rcldoc.py: actual MIME type %s" % mt)
|
||||
if mt == "text/plain":
|
||||
return ([python, os.path.join(self.execdir, "rcltext.py")],
|
||||
return (["python", os.path.join(self.execdir, "rcltext.py")],
|
||||
WordPassData(self.em))
|
||||
elif mt == "text/rtf":
|
||||
cmd = ["python", os.path.join(self.execdir, "rclrtf.py"),
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
"""Extract Html content from an EPUB file (.chm)"""
|
||||
from __future__ import print_function
|
||||
|
||||
rclepub_html_mtype = "text/html"
|
||||
|
||||
@ -12,7 +13,7 @@ import rclexecm
|
||||
try:
|
||||
import epub
|
||||
except:
|
||||
print "RECFILTERROR HELPERNOTFOUND python:epub"
|
||||
print("RECFILTERROR HELPERNOTFOUND python:epub")
|
||||
sys.exit(1);
|
||||
|
||||
class rclEPUB:
|
||||
@ -63,11 +64,11 @@ class rclEPUB:
|
||||
if item is None:
|
||||
raise Exception("Item not found for id %s" % (id,))
|
||||
doc = self.book.read_item(item)
|
||||
doc = re.sub('''</[hH][eE][aA][dD]>''',
|
||||
'''<meta name="rclaptg" content="epub"></head>''', doc)
|
||||
doc = re.sub(b'''</[hH][eE][aA][dD]>''',
|
||||
b'''<meta name="rclaptg" content="epub"></head>''', doc)
|
||||
self.em.setmimetype(rclepub_html_mtype)
|
||||
return (True, doc, id, iseof)
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
self.em.rclog("extractone: failed: [%s]" % err)
|
||||
return (False, "", id, iseof)
|
||||
|
||||
@ -76,11 +77,11 @@ class rclEPUB:
|
||||
self.currentindex = -1
|
||||
self.contents = []
|
||||
try:
|
||||
self.book = epub.open(params["filename:"])
|
||||
except Exception, err:
|
||||
self.book = epub.open_epub(params["filename:"].decode('UTF-8'))
|
||||
except Exception as err:
|
||||
self.em.rclog("openfile: epub.open failed: [%s]" % err)
|
||||
return False
|
||||
for id, item in self.book.opf.manifest.iteritems():
|
||||
for id, item in self.book.opf.manifest.items():
|
||||
if item.media_type == 'application/xhtml+xml':
|
||||
self.contents.append(id)
|
||||
return True
|
||||
|
||||
@ -26,6 +26,8 @@
|
||||
# this would be to slow. So this helps implementing a permanent script
|
||||
# to repeatedly execute single commands.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import subprocess
|
||||
import rclexecm
|
||||
|
||||
@ -74,8 +76,8 @@ class Executor:
|
||||
# params["mimetype:"]))
|
||||
self.flt.reset()
|
||||
ok = False
|
||||
if not params.has_key("filename:"):
|
||||
self.em.rclog("extractone: no mime or file name")
|
||||
if not "filename:" in params:
|
||||
self.em.rclog("extractone: no file name")
|
||||
return (ok, "", "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
fn = params["filename:"]
|
||||
|
||||
@ -16,6 +16,9 @@
|
||||
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
########################################################
|
||||
## Recoll multifilter communication module and utilities
|
||||
#
|
||||
# All data is binary. This is important for Python3
|
||||
# All parameter names are converted to and processed as str/unicode
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
@ -26,6 +29,21 @@ import shutil
|
||||
import getopt
|
||||
import rclconfig
|
||||
|
||||
PY3 = sys.version > '3'
|
||||
|
||||
if PY3:
|
||||
def makebytes(data):
|
||||
if isinstance(data, bytes):
|
||||
return data
|
||||
else:
|
||||
return data.encode("UTF-8")
|
||||
else:
|
||||
def makebytes(data):
|
||||
if isinstance(data, unicode):
|
||||
return data.encode("UTF-8")
|
||||
else:
|
||||
return data
|
||||
|
||||
my_config = rclconfig.RclConfig()
|
||||
|
||||
############################################
|
||||
@ -46,7 +64,7 @@ class RclExecM:
|
||||
self.myname = os.path.basename(sys.argv[0])
|
||||
except:
|
||||
self.myname = "???"
|
||||
self.mimetype = ""
|
||||
self.mimetype = b""
|
||||
|
||||
if os.environ.get("RECOLL_FILTER_MAXMEMBERKB"):
|
||||
self.maxmembersize = \
|
||||
@ -60,7 +78,7 @@ class RclExecM:
|
||||
msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
|
||||
self.debugfile = None
|
||||
if self.debugfile:
|
||||
self.errfout = open(self.debugfile, "ab")
|
||||
self.errfout = open(self.debugfile, "a")
|
||||
else:
|
||||
self.errfout = sys.stderr
|
||||
|
||||
@ -93,77 +111,84 @@ class RclExecM:
|
||||
# Note: tried replacing this with a multiple replacer according to
|
||||
# http://stackoverflow.com/a/15221068, which was **10 times** slower
|
||||
def htmlescape(self, txt):
|
||||
# This must stay first (it somehow had managed to skip after
|
||||
# the next line, with rather interesting results)
|
||||
txt = txt.replace("&", "&")
|
||||
|
||||
txt = txt.replace("<", "<")
|
||||
txt = txt.replace(">", ">")
|
||||
txt = txt.replace('"', """)
|
||||
# & must stay first (it somehow had managed to skip
|
||||
# after the next replace, with rather interesting results)
|
||||
try:
|
||||
txt = txt.replace(b'&', b'&').replace(b'<', b'<').\
|
||||
replace(b'>', b'>').replace(b'"', b'"')
|
||||
except:
|
||||
txt = txt.replace("&", "&").replace("<", "<").\
|
||||
replace(">", ">").replace("\"", """)
|
||||
return txt
|
||||
|
||||
# Our worker sometimes knows the mime types of the data it sends
|
||||
def setmimetype(self, mt):
|
||||
self.mimetype = mt
|
||||
self.mimetype = makebytes(mt)
|
||||
|
||||
# Read single parameter from process input: line with param name and size
|
||||
# followed by data.
|
||||
# followed by data. The param name is returned as str/unicode, the data
|
||||
# as bytes
|
||||
def readparam(self):
|
||||
s = sys.stdin.readline()
|
||||
if s == '':
|
||||
if PY3:
|
||||
inf = sys.stdin.buffer
|
||||
else:
|
||||
inf = sys.stdin
|
||||
s = inf.readline()
|
||||
if s == b'':
|
||||
sys.exit(0)
|
||||
# self.rclog(": EOF on input", 1, 0)
|
||||
|
||||
s = s.rstrip("\n")
|
||||
s = s.rstrip(b'\n')
|
||||
|
||||
if s == "":
|
||||
return ("","")
|
||||
if s == b'':
|
||||
return ('', b'')
|
||||
l = s.split()
|
||||
if len(l) != 2:
|
||||
self.rclog("bad line: [" + s + "]", 1, 1)
|
||||
self.rclog(b'bad line: [' + s + b']', 1, 1)
|
||||
|
||||
paramname = l[0].lower()
|
||||
paramname = l[0].decode('ASCII').lower()
|
||||
paramsize = int(l[1])
|
||||
if paramsize > 0:
|
||||
paramdata = sys.stdin.read(paramsize)
|
||||
paramdata = inf.read(paramsize)
|
||||
if len(paramdata) != paramsize:
|
||||
self.rclog("Bad read: wanted %d, got %d" %
|
||||
(paramsize, len(paramdata)), 1, 1)
|
||||
else:
|
||||
paramdata = ""
|
||||
paramdata = b''
|
||||
|
||||
#self.rclog("paramname [%s] paramsize %d value [%s]" %
|
||||
# (paramname, paramsize, paramdata))
|
||||
return (paramname, paramdata)
|
||||
|
||||
if PY3:
|
||||
def senditem(self, nm, len, data):
|
||||
sys.stdout.buffer.write(makebytes("%s: %d\n" % (nm, len)))
|
||||
self.breakwrite(sys.stdout.buffer, makebytes(data))
|
||||
else:
|
||||
def senditem(self, nm, len, data):
|
||||
sys.stdout.write(makebytes("%s: %d\n" % (nm, len)))
|
||||
self.breakwrite(sys.stdout, makebytes(data))
|
||||
|
||||
# Send answer: document, ipath, possible eof.
|
||||
def answer(self, docdata, ipath, iseof = noteof, iserror = noerror):
|
||||
|
||||
if iserror != RclExecM.fileerror and iseof != RclExecM.eofnow:
|
||||
if isinstance(docdata, unicode):
|
||||
self.rclog("GOT UNICODE for ipath [%s]" % (ipath,))
|
||||
docdata = docdata.encode("UTF-8")
|
||||
|
||||
print("Document: %d" % len(docdata))
|
||||
self.breakwrite(sys.stdout, docdata)
|
||||
self.senditem("Document", len(docdata), docdata)
|
||||
|
||||
if len(ipath):
|
||||
print("Ipath: %d" % len(ipath))
|
||||
sys.stdout.write(ipath)
|
||||
self.senditem("Ipath", len(ipath), ipath)
|
||||
|
||||
if len(self.mimetype):
|
||||
print("Mimetype: %d" % len(self.mimetype))
|
||||
sys.stdout.write(self.mimetype)
|
||||
self.senditem("Mimetype", len(self.mimetype), self.mimetype)
|
||||
|
||||
# If we're at the end of the contents, say so
|
||||
if iseof == RclExecM.eofnow:
|
||||
print("Eofnow: 0")
|
||||
self.senditem("Eofnow", 0, b'')
|
||||
elif iseof == RclExecM.eofnext:
|
||||
print("Eofnext: 0")
|
||||
self.senditem("Eofnext", 0, b'')
|
||||
if iserror == RclExecM.subdocerror:
|
||||
print("Subdocerror: 0")
|
||||
self.senditem("Subdocerror", 0, b'')
|
||||
elif iserror == RclExecM.fileerror:
|
||||
print("Fileerror: 0")
|
||||
self.senditem("Fileerror", 0, b'')
|
||||
|
||||
# End of message
|
||||
print()
|
||||
@ -173,7 +198,8 @@ class RclExecM:
|
||||
def processmessage(self, processor, params):
|
||||
|
||||
# We must have a filename entry (even empty). Else exit
|
||||
if not params.has_key("filename:"):
|
||||
if "filename:" not in params:
|
||||
print("%s" % params, file=sys.stderr)
|
||||
self.rclog("no filename ??", 1, 1)
|
||||
|
||||
# If we're given a file name, open it.
|
||||
@ -182,7 +208,7 @@ class RclExecM:
|
||||
if not processor.openfile(params):
|
||||
self.answer("", "", iserror = RclExecM.fileerror)
|
||||
return
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
self.rclog("processmessage: openfile raised: [%s]" % err)
|
||||
self.answer("", "", iserror = RclExecM.fileerror)
|
||||
return
|
||||
@ -192,11 +218,11 @@ class RclExecM:
|
||||
eof = True
|
||||
self.mimetype = ""
|
||||
try:
|
||||
if params.has_key("ipath:") and len(params["ipath:"]):
|
||||
if "ipath:" in params and len(params["ipath:"]):
|
||||
ok, data, ipath, eof = processor.getipath(params)
|
||||
else:
|
||||
ok, data, ipath, eof = processor.getnext(params)
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
self.answer("", "", eof, RclExecM.fileerror)
|
||||
return
|
||||
|
||||
@ -311,7 +337,7 @@ def main(proto, extract):
|
||||
|
||||
actAsSingle = False
|
||||
debugDumpData = False
|
||||
ipath = ""
|
||||
ipath = b""
|
||||
|
||||
args = sys.argv[1:]
|
||||
opts, args = getopt.getopt(args, "hdsi:w:")
|
||||
@ -321,7 +347,7 @@ def main(proto, extract):
|
||||
elif opt in ['-s']:
|
||||
actAsSingle = True
|
||||
elif opt in ['-i']:
|
||||
ipath = arg
|
||||
ipath = makebytes(arg)
|
||||
elif opt in ['-w']:
|
||||
ret = which(arg)
|
||||
if ret:
|
||||
@ -344,17 +370,17 @@ def main(proto, extract):
|
||||
lst = fileout.split(':')
|
||||
mimetype = lst[len(lst)-1].strip()
|
||||
lst = mimetype.split(';')
|
||||
return lst[0].strip()
|
||||
return makebytes(lst[0].strip())
|
||||
|
||||
def mimetype_with_xdg(f):
|
||||
cmd = 'xdg-mime query filetype "' + f + '"'
|
||||
return os.popen(cmd).read().strip()
|
||||
return makebytes(os.popen(cmd).read().strip())
|
||||
|
||||
def debprint(s):
|
||||
def debprint(out, s):
|
||||
if not actAsSingle:
|
||||
print(s)
|
||||
proto.breakwrite(out, makebytes(s+'\n'))
|
||||
|
||||
params = {'filename:': args[0]}
|
||||
params = {'filename:': makebytes(args[0])}
|
||||
# Some filters (e.g. rclaudio) need/get a MIME type from the indexer
|
||||
mimetype = mimetype_with_xdg(args[0])
|
||||
params['mimetype:'] = mimetype
|
||||
@ -363,19 +389,20 @@ def main(proto, extract):
|
||||
print("Open error", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if ipath != "" or actAsSingle:
|
||||
if PY3:
|
||||
ioout = sys.stdout.buffer
|
||||
else:
|
||||
ioout = sys.stdout
|
||||
if ipath != b"" or actAsSingle:
|
||||
params['ipath:'] = ipath
|
||||
ok, data, ipath, eof = extract.getipath(params)
|
||||
if ok:
|
||||
debprint("== Found entry for ipath %s (mimetype [%s]):" % \
|
||||
debprint(ioout, "== Found entry for ipath %s (mimetype [%s]):" % \
|
||||
(ipath, proto.mimetype))
|
||||
if isinstance(data, unicode):
|
||||
bdata = data.encode("UTF-8")
|
||||
else:
|
||||
bdata = data
|
||||
bdata = makebytes(data)
|
||||
if debugDumpData or actAsSingle:
|
||||
proto.breakwrite(sys.stdout, bdata)
|
||||
print()
|
||||
proto.breakwrite(ioout, bdata)
|
||||
ioout.write(b'\n')
|
||||
sys.exit(0)
|
||||
else:
|
||||
print("Got error, eof %d"%eof, file=sys.stderr)
|
||||
@ -386,15 +413,12 @@ def main(proto, extract):
|
||||
ok, data, ipath, eof = extract.getnext(params)
|
||||
if ok:
|
||||
ecnt = ecnt + 1
|
||||
debprint("== Entry %d ipath %s (mimetype [%s]):" % \
|
||||
(ecnt, ipath, proto.mimetype))
|
||||
if isinstance(data, unicode):
|
||||
bdata = data.encode("UTF-8")
|
||||
else:
|
||||
bdata = data
|
||||
bdata = makebytes(data)
|
||||
debprint(ioout, "== Entry %d dlen %d ipath %s (mimetype [%s]):" % \
|
||||
(ecnt, len(data), ipath, proto.mimetype))
|
||||
if debugDumpData:
|
||||
proto.breakwrite(sys.stdout, bdata)
|
||||
print()
|
||||
proto.breakwrite(ioout, bdata)
|
||||
ioout.write(b'\n')
|
||||
if eof != RclExecM.noteof:
|
||||
sys.exit(0)
|
||||
else:
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
|
||||
# Read an ICS file, break it into "documents" which are events, todos,
|
||||
# or journal entries, and interface with recoll execm
|
||||
@ -13,22 +14,22 @@ import rclexecm
|
||||
import sys
|
||||
|
||||
# Decide how we'll process the file.
|
||||
modules = ('internal', 'icalendar', 'vobject')
|
||||
usemodule = 'internal'
|
||||
modules = ("internal", "icalendar", "vobject")
|
||||
usemodule = "internal"
|
||||
forcevobject = 0
|
||||
if usemodule != 'internal':
|
||||
if usemodule != "internal":
|
||||
try:
|
||||
if forcevobject:
|
||||
raise Exception
|
||||
from icalendar import Calendar, Event
|
||||
usemodule = 'icalendar'
|
||||
usemodule = "icalendar"
|
||||
except:
|
||||
try:
|
||||
import vobject
|
||||
usemodule = 'vobject'
|
||||
usemodule = "vobject"
|
||||
except:
|
||||
print "RECFILTERROR HELPERNOTFOUND python:icalendar"
|
||||
print "RECFILTERROR HELPERNOTFOUND python:vobject"
|
||||
print("RECFILTERROR HELPERNOTFOUND python:icalendar")
|
||||
print("RECFILTERROR HELPERNOTFOUND python:vobject")
|
||||
sys.exit(1);
|
||||
|
||||
|
||||
@ -55,32 +56,32 @@ class IcalExtractor:
|
||||
self.file = params["filename:"]
|
||||
|
||||
try:
|
||||
calstr = open(self.file, 'rb')
|
||||
except Exception, e:
|
||||
calstr = open(self.file, "rb")
|
||||
except Exception as e:
|
||||
self.em.rclog("Openfile: open: %s" % str(e))
|
||||
return False
|
||||
|
||||
self.currentindex = -1
|
||||
|
||||
if usemodule == 'internal':
|
||||
if usemodule == "internal":
|
||||
self.contents = ICalSimpleSplitter().splitcalendar(calstr)
|
||||
elif usemodule == 'icalendar':
|
||||
elif usemodule == "icalendar":
|
||||
try:
|
||||
cal = Calendar.from_string(calstr.read())
|
||||
except Exception, e:
|
||||
except Exception as e:
|
||||
self.em.rclog("Openfile: read or parse error: %s" % str(e))
|
||||
return False
|
||||
self.contents = cal.walk()
|
||||
self.contents = [item.as_string() for item in self.contents
|
||||
if (item.name == 'VEVENT' or item.name == 'VTODO'
|
||||
or item.name == 'VJOURNAL')]
|
||||
if (item.name == "VEVENT" or item.name == "VTODO"
|
||||
or item.name == "VJOURNAL")]
|
||||
else:
|
||||
try:
|
||||
cal = vobject.readOne(calstr)
|
||||
except Exception, e:
|
||||
except Exception as e:
|
||||
self.em.rclog("Openfile: cant parse object: %s" % str(e))
|
||||
return False
|
||||
for lstnm in ('vevent_list', 'vtodo_list', 'vjournal_list'):
|
||||
for lstnm in ("vevent_list", "vtodo_list", "vjournal_list"):
|
||||
lst = getattr(cal, lstnm, [])
|
||||
for ev in lst:
|
||||
self.contents.append(ev.serialize())
|
||||
@ -90,6 +91,9 @@ class IcalExtractor:
|
||||
|
||||
def getipath(self, params):
|
||||
try:
|
||||
if params["ipath:"] == b'':
|
||||
index = 0
|
||||
else:
|
||||
index = int(params["ipath:"])
|
||||
except:
|
||||
return (False, "", "", True)
|
||||
@ -100,7 +104,7 @@ class IcalExtractor:
|
||||
if self.currentindex == -1:
|
||||
# Return "self" doc
|
||||
self.currentindex = 0
|
||||
self.em.setmimetype('text/plain')
|
||||
self.em.setmimetype(b'text/plain')
|
||||
if len(self.contents) == 0:
|
||||
eof = rclexecm.RclExecM.eofnext
|
||||
else:
|
||||
@ -121,44 +125,44 @@ class ICalSimpleSplitter:
|
||||
# Note that if an 'interesting' element is nested inside another one,
|
||||
# it will not be extracted (stay as text in external event). This is
|
||||
# not an issue and I don't think it can happen with the current list
|
||||
interesting = ('VTODO', 'VEVENT', 'VJOURNAL')
|
||||
interesting = (b'VTODO', b'VEVENT', b'VJOURNAL')
|
||||
|
||||
def splitcalendar(self, fin):
|
||||
curblkname = ''
|
||||
curblk = ''
|
||||
curblkname = b''
|
||||
curblk = b''
|
||||
|
||||
lo = []
|
||||
for line in fin:
|
||||
line = line.rstrip()
|
||||
if line == '':
|
||||
if line == b'':
|
||||
continue
|
||||
|
||||
if curblkname:
|
||||
curblk = curblk + line + "\n"
|
||||
curblk = curblk + line + b'\n'
|
||||
|
||||
l = line.split(":")
|
||||
l = line.split(b':')
|
||||
if len(l) < 2:
|
||||
continue
|
||||
|
||||
# If not currently inside a block and we see an
|
||||
# 'interesting' BEGIN, start block
|
||||
if curblkname == '' and l[0].upper() == "BEGIN" :
|
||||
if curblkname == b'' and l[0].upper() == b'BEGIN':
|
||||
name = l[1].upper()
|
||||
if name in ICalSimpleSplitter.interesting:
|
||||
curblkname = name
|
||||
curblk = curblk + line + "\n"
|
||||
curblk = curblk + line + b'\n'
|
||||
|
||||
# If currently accumulating block lines, check for end
|
||||
if curblkname and l[0].upper() == "END" and \
|
||||
if curblkname and l[0].upper() == b'END' and \
|
||||
l[1].upper() == curblkname:
|
||||
lo.append(curblk)
|
||||
curblkname = ''
|
||||
curblk = ''
|
||||
curblkname = b''
|
||||
curblk = b''
|
||||
|
||||
if curblk:
|
||||
lo.append(curblk)
|
||||
curblkname = ''
|
||||
curblk = ''
|
||||
curblkname = b''
|
||||
curblk = b''
|
||||
|
||||
return lo
|
||||
|
||||
|
||||
@ -1,11 +1,12 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Python-based Image Tag extractor for Recoll. This is less thorough than the
|
||||
# Perl-based rclimg script, but useful if you don't want to have to install Perl
|
||||
# (e.g. on Windows).
|
||||
# Python-based Image Tag extractor for Recoll. This is less thorough
|
||||
# than the Perl-based rclimg script, but useful if you don't want to
|
||||
# have to install Perl (e.g. on Windows).
|
||||
#
|
||||
# Uses pyexiv2. Also tried Pillow, found it useless for tags.
|
||||
#
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import os
|
||||
@ -15,7 +16,7 @@ import re
|
||||
try:
|
||||
import pyexiv2
|
||||
except:
|
||||
print "RECFILTERROR HELPERNOTFOUND python:pyexiv2"
|
||||
print("RECFILTERROR HELPERNOTFOUND python:pyexiv2")
|
||||
sys.exit(1);
|
||||
|
||||
khexre = re.compile('.*\.0[xX][0-9a-fA-F]+$')
|
||||
@ -48,7 +49,7 @@ class ImgTagExtractor:
|
||||
def extractone(self, params):
|
||||
#self.em.rclog("extractone %s" % params["filename:"])
|
||||
ok = False
|
||||
if not params.has_key("filename:"):
|
||||
if "filename:" not in params:
|
||||
self.em.rclog("extractone: no file name")
|
||||
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
|
||||
filename = params["filename:"]
|
||||
@ -62,11 +63,11 @@ class ImgTagExtractor:
|
||||
# we skip numeric keys and undecoded makernote data
|
||||
if k != 'Exif.Photo.MakerNote' and not khexre.match(k):
|
||||
mdic[k] = str(metadata[k].raw_value)
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
self.em.rclog("extractone: extract failed: [%s]" % err)
|
||||
return (ok, "", "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
docdata = "<html><head>\n"
|
||||
docdata = b'<html><head>\n'
|
||||
|
||||
ttdata = set()
|
||||
for k in pyexiv2_titles:
|
||||
@ -77,25 +78,28 @@ class ImgTagExtractor:
|
||||
for v in ttdata:
|
||||
v = v.replace('[', '').replace(']', '').replace("'", "")
|
||||
title += v + " "
|
||||
docdata += '<title>' + title + '</title>\n'
|
||||
docdata += rclexecm.makebytes("<title>" + title + "</title>\n")
|
||||
|
||||
for k in exiv2_dates:
|
||||
if k in mdic:
|
||||
# Recoll wants: %Y-%m-%d %H:%M:%S.
|
||||
# We get 2014:06:27 14:58:47
|
||||
dt = mdic[k].replace(':', '-', 2)
|
||||
docdata += '<meta name="date" content="' + dt + '">\n'
|
||||
dt = mdic[k].replace(":", "-", 2)
|
||||
docdata += b'<meta name="date" content="' + \
|
||||
rclexecm.makebytes(dt) + b'">\n'
|
||||
break
|
||||
|
||||
for k,v in mdic.iteritems():
|
||||
for k,v in mdic.items():
|
||||
if k == 'Xmp.digiKam.TagsList':
|
||||
docdata += '<meta name="keywords" content="' + \
|
||||
self.em.htmlescape(mdic[k]) + '">\n'
|
||||
docdata += b'<meta name="keywords" content="' + \
|
||||
rclexecm.makebytes(self.em.htmlescape(mdic[k])) + \
|
||||
b'">\n'
|
||||
|
||||
docdata += "</head><body>\n"
|
||||
for k,v in mdic.iteritems():
|
||||
docdata += k + " : " + self.em.htmlescape(mdic[k]) + "<br />\n"
|
||||
docdata += "</body></html>"
|
||||
docdata += b'</head><body>\n'
|
||||
for k,v in mdic.items():
|
||||
docdata += rclexecm.makebytes(k + " : " + \
|
||||
self.em.htmlescape(mdic[k]) + "<br />\n")
|
||||
docdata += b'</body></html>'
|
||||
|
||||
self.em.setmimetype("text/html")
|
||||
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
# Read a file in GNU info format and output its nodes as subdocs,
|
||||
# interfacing with recoll execm
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import rclexecm
|
||||
import sys
|
||||
@ -16,18 +17,6 @@ import subprocess
|
||||
# Some info source docs contain charset info like:
|
||||
# @documentencoding ISO-2022-JP
|
||||
# But this seems to be absent from outputs.
|
||||
htmltemplate = '''
|
||||
<html>
|
||||
<head>
|
||||
<title>%s</title>
|
||||
<meta name="rclaptg" content="gnuinfo">
|
||||
</head>
|
||||
<body>
|
||||
<pre style="white-space: pre-wrap">
|
||||
%s
|
||||
</pre></body>
|
||||
</html>
|
||||
'''
|
||||
|
||||
# RclExecm interface
|
||||
class InfoExtractor:
|
||||
@ -43,8 +32,13 @@ class InfoExtractor:
|
||||
nodename, docdata = self.contents[index]
|
||||
nodename = self.em.htmlescape(nodename)
|
||||
docdata = self.em.htmlescape(docdata)
|
||||
|
||||
docdata = htmltemplate % (nodename, docdata)
|
||||
# strange whitespace to avoid changing the module tests (same as old)
|
||||
docdata = b'\n<html>\n <head>\n <title>' + nodename + \
|
||||
b'</title>\n' + \
|
||||
' <meta name="rclaptg" content="gnuinfo">\n' + \
|
||||
b' </head>\n <body>\n' + \
|
||||
b' <pre style="white-space: pre-wrap">\n ' + \
|
||||
docdata + b'\n </pre></body>\n</html>\n'
|
||||
|
||||
iseof = rclexecm.RclExecM.noteof
|
||||
if self.currentindex >= len(self.contents) -1:
|
||||
@ -60,19 +54,18 @@ class InfoExtractor:
|
||||
self.em.rclog("Openfile: %s is not a file" % self.file)
|
||||
return False
|
||||
|
||||
cmd = "info --subnodes -o - -f " + self.file
|
||||
cmd = b'info --subnodes -o - -f ' + self.file
|
||||
nullstream = open("/dev/null", 'w')
|
||||
try:
|
||||
infostream = subprocess.Popen(cmd, shell=True, bufsize=1,
|
||||
stderr=nullstream,
|
||||
stdout=subprocess.PIPE).stdout
|
||||
except Exception, e:
|
||||
except Exception as e:
|
||||
# Consider this as permanently fatal.
|
||||
self.em.rclog("Openfile: exec info: %s" % str(e))
|
||||
print "RECFILTERROR HELPERNOTFOUND info"
|
||||
print("RECFILTERROR HELPERNOTFOUND info")
|
||||
sys.exit(1);
|
||||
|
||||
|
||||
self.currentindex = -1
|
||||
|
||||
self.contents = InfoSimpleSplitter().splitinfo(self.file, infostream)
|
||||
@ -117,9 +110,9 @@ class InfoSimpleSplitter:
|
||||
index = 0
|
||||
listout = []
|
||||
node_dict = {}
|
||||
node = ""
|
||||
node = b''
|
||||
infofile = os.path.basename(filename)
|
||||
nodename = "Unknown"
|
||||
nodename = b'Unknown'
|
||||
|
||||
for line in fin:
|
||||
|
||||
@ -128,41 +121,41 @@ class InfoSimpleSplitter:
|
||||
# beginning with spaces (it's a bug probably, only seen it once)
|
||||
# Maybe we'd actually be better off directly interpreting the
|
||||
# info files
|
||||
if gotblankline and line.lstrip(" ").startswith("File: "):
|
||||
if gotblankline and line.lstrip(b' ').startswith(b'File: '):
|
||||
prevnodename = nodename
|
||||
line = line.rstrip("\n\r")
|
||||
pairs = line.split(",")
|
||||
up = "Top"
|
||||
line = line.rstrip(b'\n\r')
|
||||
pairs = line.split(b',')
|
||||
up = b'Top'
|
||||
nodename = str(index)
|
||||
try:
|
||||
for pair in pairs:
|
||||
name, value = pair.split(':')
|
||||
name = name.strip(" ")
|
||||
value = value.strip(" ")
|
||||
if name == "Node":
|
||||
name, value = pair.split(b':')
|
||||
name = name.strip(b' ')
|
||||
value = value.strip(b' ')
|
||||
if name == b'Node':
|
||||
nodename = value
|
||||
if name == "Up":
|
||||
if name == b'Up':
|
||||
up = value
|
||||
if name == "File":
|
||||
if name == b'File':
|
||||
infofile = value
|
||||
except:
|
||||
print >> sys.stderr, "rclinfo: bad line in %s: [%s]\n" % \
|
||||
(infofile, line)
|
||||
except Exception as err:
|
||||
print("rclinfo: bad line in %s: [%s] %s\n" % \
|
||||
(infofile, line, err), file = sys.stderr)
|
||||
nodename = prevnodename
|
||||
node += line
|
||||
continue
|
||||
|
||||
if node_dict.has_key(nodename):
|
||||
print >> sys.stderr, "Info file", filename, \
|
||||
"Dup node: ", nodename
|
||||
if nodename in node_dict:
|
||||
print("Info file %s Dup node: %s" % (filename, nodename), \
|
||||
file=sys.stderr)
|
||||
node_dict[nodename] = up
|
||||
|
||||
if index != 0:
|
||||
listout.append((prevnodename, node))
|
||||
node = ""
|
||||
node = b''
|
||||
index += 1
|
||||
|
||||
if line.rstrip("\n\r") == '':
|
||||
if line.rstrip(b'\n\r') == b'':
|
||||
gotblankline = 1
|
||||
else:
|
||||
gotblankline = 0
|
||||
@ -170,7 +163,7 @@ class InfoSimpleSplitter:
|
||||
node += line
|
||||
|
||||
# File done, add last dangling node
|
||||
if node != "":
|
||||
if node != b'':
|
||||
listout.append((nodename, node))
|
||||
|
||||
# Compute node paths (concatenate "Up" values), to be used
|
||||
@ -178,34 +171,34 @@ class InfoSimpleSplitter:
|
||||
# the info file tree is bad
|
||||
listout1 = []
|
||||
for nodename, node in listout:
|
||||
title = ""
|
||||
title = b''
|
||||
loop = 0
|
||||
error = 0
|
||||
while nodename != "Top":
|
||||
title = nodename + " / " + title
|
||||
if node_dict.has_key(nodename):
|
||||
while nodename != b'Top':
|
||||
title = nodename + b' / ' + title
|
||||
if nodename in node_dict:
|
||||
nodename = node_dict[nodename]
|
||||
else:
|
||||
print >> sys.stderr, \
|
||||
print(
|
||||
"Infofile: node's Up does not exist: file %s, path %s, up [%s]" % \
|
||||
(infofile, title, nodename)
|
||||
(infofile, title, nodename), sys.stderr)
|
||||
error = 1
|
||||
break
|
||||
loop += 1
|
||||
if loop > 50:
|
||||
print >> sys.stderr, "Infofile: bad tree (looping)", \
|
||||
infofile
|
||||
print("Infofile: bad tree (looping) %s" % infofile, \
|
||||
file = sys.stderr)
|
||||
error = 1
|
||||
break
|
||||
|
||||
if error:
|
||||
continue
|
||||
|
||||
if title == "":
|
||||
if title == b'':
|
||||
title = infofile
|
||||
else:
|
||||
title = infofile + " / " + title
|
||||
title = title.rstrip(" / ")
|
||||
title = infofile + b' / ' + title
|
||||
title = title.rstrip(b' / ')
|
||||
listout1.append((title, node))
|
||||
|
||||
return listout1
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Read a .kar midi karaoke file and translate to recoll indexable format
|
||||
# This does not work with Python3 yet because python:midi doesn't
|
||||
from __future__ import print_function
|
||||
|
||||
import rclexecm
|
||||
import sys
|
||||
@ -15,9 +17,9 @@ except:
|
||||
pass
|
||||
|
||||
try:
|
||||
import midi
|
||||
from midi import midi
|
||||
except:
|
||||
print "RECFILTERROR HELPERNOTFOUND python:midi"
|
||||
print("RECFILTERROR HELPERNOTFOUND python:midi")
|
||||
sys.exit(1);
|
||||
|
||||
try:
|
||||
@ -106,12 +108,12 @@ class KarTextExtractor:
|
||||
if data:
|
||||
try:
|
||||
data = data.decode(self.encoding, 'ignore')
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
self.em.rclog("Decode failed: " + str(err))
|
||||
return ""
|
||||
try:
|
||||
data = data.encode('utf-8')
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
self.em.rclog("Encode failed: " + str(err))
|
||||
return ""
|
||||
|
||||
@ -127,7 +129,7 @@ class KarTextExtractor:
|
||||
just one our users could use if there is trouble with guessing
|
||||
encodings'''
|
||||
|
||||
rexp = r'\(([^\)]+)\)\.[a-zA-Z]+$'
|
||||
rexp = b'''\(([^\)]+)\)\.[a-zA-Z]+$'''
|
||||
m = re.search(rexp, fn)
|
||||
if m:
|
||||
return m.group(1)
|
||||
@ -165,7 +167,7 @@ class KarTextExtractor:
|
||||
if count > 0:
|
||||
confidence = 1.0
|
||||
encoding = code
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
self.em.rclog("stopwords-based classifier failed: %s" % err)
|
||||
return (encoding, confidence)
|
||||
|
||||
@ -177,7 +179,7 @@ class KarTextExtractor:
|
||||
docdata = ""
|
||||
ok = False
|
||||
|
||||
if not params.has_key("filename:"):
|
||||
if "filename:" not in params:
|
||||
self.em.rclog("extractone: no mime or file name")
|
||||
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
|
||||
filename = params["filename:"]
|
||||
@ -191,7 +193,7 @@ class KarTextExtractor:
|
||||
self.encoding = ""
|
||||
|
||||
# Mimetype not used for now
|
||||
if not params.has_key("mimetype:"):
|
||||
if "mimetype:" not in params:
|
||||
mimetype = 'audio/x-midi'
|
||||
else:
|
||||
mimetype = params["mimetype:"]
|
||||
@ -199,8 +201,8 @@ class KarTextExtractor:
|
||||
# Read in and midi-decode the file
|
||||
try:
|
||||
stream = midi.read_midifile(filename)
|
||||
except Exception, err:
|
||||
self.em.rclog("extractone: midi extract failed: [%s]" % err)
|
||||
except Exception as err:
|
||||
self.em.rclog("extractone: read_midifile failed: [%s]" % err)
|
||||
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
title = None
|
||||
|
||||
@ -13,6 +13,8 @@ epsilon with dasia (in unicode but not iso). Can this be replaced by either epsi
|
||||
with acute accent ?
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import string
|
||||
import glob
|
||||
@ -117,7 +119,7 @@ if __name__ == "__main__":
|
||||
|
||||
lang,code,count = classifier.classify(rawtext)
|
||||
if count > 0:
|
||||
print "%s %s %d" % (code, lang, count)
|
||||
print("%s %s %d" % (code, lang, count))
|
||||
else:
|
||||
print "UNKNOWN UNKNOWN 0"
|
||||
print("UNKNOWN UNKNOWN 0")
|
||||
|
||||
|
||||
@ -43,7 +43,7 @@ class RarExtractor:
|
||||
try:
|
||||
rarinfo = self.rar.getinfo(ipath)
|
||||
isdir = rarinfo.isdir()
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
self.em.rclog("extractone: getinfo failed: [%s]" % err)
|
||||
return (True, docdata, ipath, false)
|
||||
|
||||
@ -56,7 +56,7 @@ class RarExtractor:
|
||||
else:
|
||||
docdata = self.rar.read(ipath)
|
||||
ok = True
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
self.em.rclog("extractone: failed: [%s]" % err)
|
||||
ok = False
|
||||
else:
|
||||
@ -89,7 +89,7 @@ class RarExtractor:
|
||||
try:
|
||||
ipath = ipath.decode("utf-8")
|
||||
return self.extractone(ipath)
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
return (ok, data, ipath, eof)
|
||||
|
||||
def getnext(self, params):
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
|
||||
import rclexecm
|
||||
import rclexec1
|
||||
@ -10,24 +11,24 @@ import os
|
||||
class RTFProcessData:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.out = ""
|
||||
self.out = b''
|
||||
self.gothead = 0
|
||||
self.patendhead = re.compile('''</head>''')
|
||||
self.patcharset = re.compile('''^<meta http-equiv=''')
|
||||
self.patendhead = re.compile(b'''</head>''')
|
||||
self.patcharset = re.compile(b'''^<meta http-equiv=''')
|
||||
|
||||
# Some versions of unrtf put out a garbled charset line.
|
||||
# Apart from this, we pass the data untouched.
|
||||
def takeLine(self, line):
|
||||
if not self.gothead:
|
||||
if self.patendhead.search(line):
|
||||
self.out += '<meta http-equiv="Content-Type" ' + \
|
||||
'content="text/html;charset=UTF-8">' + "\n"
|
||||
self.out += line + "\n"
|
||||
self.out += b'<meta http-equiv="Content-Type" ' + \
|
||||
b'content="text/html;charset=UTF-8">' + b'\n'
|
||||
self.out += line + b'\n'
|
||||
self.gothead = 1
|
||||
elif not self.patcharset.search(line):
|
||||
self.out += line + "\n"
|
||||
self.out += line + b'\n'
|
||||
else:
|
||||
self.out += line + "\n"
|
||||
self.out += line + b'\n'
|
||||
|
||||
def wrapData(self):
|
||||
return self.out
|
||||
@ -52,7 +53,7 @@ class RTFFilter:
|
||||
|
||||
if __name__ == '__main__':
|
||||
if not rclexecm.which("unrtf"):
|
||||
print("RECFILTERROR HELPERNOTFOUND antiword")
|
||||
print("RECFILTERROR HELPERNOTFOUND unrtf")
|
||||
sys.exit(1)
|
||||
proto = rclexecm.RclExecM()
|
||||
filter = RTFFilter(proto)
|
||||
|
||||
@ -33,7 +33,7 @@ class TarExtractor:
|
||||
else:
|
||||
docdata = self.tar.extractfile(ipath).read()
|
||||
ok = True
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
ok = False
|
||||
iseof = rclexecm.RclExecM.noteof
|
||||
if self.currentindex >= len(self.namen) -1:
|
||||
@ -59,7 +59,7 @@ class TarExtractor:
|
||||
try:
|
||||
ipath = ipath.decode("utf-8")
|
||||
return self.extractone(ipath)
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
return (ok, data, ipath, eof)
|
||||
|
||||
def getnext(self, params):
|
||||
|
||||
@ -15,7 +15,7 @@ class WarExtractor:
|
||||
member = self.tar.extractfile(tarinfo)
|
||||
docdata = member.read()
|
||||
ok = True
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
self.em.rclog("extractone: failed: [%s]" % err)
|
||||
ok = False
|
||||
return (ok, docdata, tarinfo.name, rclexecm.RclExecM.noteof)
|
||||
@ -26,7 +26,7 @@ class WarExtractor:
|
||||
try:
|
||||
self.tar = tarfile.open(params["filename:"])
|
||||
return True
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
self.em.rclog(str(err))
|
||||
return False
|
||||
|
||||
@ -34,7 +34,7 @@ class WarExtractor:
|
||||
ipath = params["ipath:"]
|
||||
try:
|
||||
tarinfo = self.tar.getmember(ipath)
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
self.em.rclog(str(err))
|
||||
return (False, "", ipath, rclexecm.RclExecM.noteof)
|
||||
return self.extractone(tarinfo)
|
||||
|
||||
@ -72,7 +72,7 @@ class ZipExtractor:
|
||||
else:
|
||||
docdata = self.zip.read(ipath)
|
||||
ok = True
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
self.em.rclog("extractone: failed: [%s]" % err)
|
||||
ok = False
|
||||
iseof = rclexecm.RclExecM.noteof
|
||||
@ -98,7 +98,7 @@ class ZipExtractor:
|
||||
try:
|
||||
self.zip = ZipFile(filename)
|
||||
return True
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
self.em.rclog("openfile: failed: [%s]" % err)
|
||||
return False
|
||||
|
||||
@ -111,7 +111,7 @@ class ZipExtractor:
|
||||
try:
|
||||
ipath = ipath.decode("utf-8")
|
||||
return self.extractone(ipath)
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
return (ok, data, ipath, eof)
|
||||
|
||||
def getnext(self, params):
|
||||
|
||||
@ -75,7 +75,7 @@ class ConfSimple:
|
||||
def getNames(self, sk = ''):
|
||||
if not sk in self.submaps:
|
||||
return None
|
||||
return self.submaps[sk].keys()
|
||||
return list(self.submaps[sk].keys())
|
||||
|
||||
class ConfTree(ConfSimple):
|
||||
"""A ConfTree adds path-hierarchical interpretation of the section keys,
|
||||
|
||||
@ -4,6 +4,8 @@ logfilename = /tmp/logrcltst
|
||||
daemloglevel = 6
|
||||
daemlogfilename = /tmp/rclmontrace
|
||||
|
||||
systemfilecommand = xdg-mime query filetype
|
||||
|
||||
indexStripChars = 1
|
||||
detectxattronly = 1
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user