first pass at converting the filters for python 2/3 compat
This commit is contained in:
parent
cc68331f3d
commit
f344e8fedd
@ -52,7 +52,7 @@ class PPTDumper(object):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
dirstrm = strm.getDirectoryStreamByName(dirname)
|
dirstrm = strm.getDirectoryStreamByName(dirname)
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
error("getDirectoryStreamByName(%s): %s - %s\n" % (dirname,str(err),self.filepath))
|
error("getDirectoryStreamByName(%s): %s - %s\n" % (dirname,str(err),self.filepath))
|
||||||
# The previous version was killed by the exception
|
# The previous version was killed by the exception
|
||||||
# here, so the equivalent is to break, but maybe there
|
# here, so the equivalent is to break, but maybe there
|
||||||
|
|||||||
@ -15,7 +15,7 @@ try:
|
|||||||
import pylzma
|
import pylzma
|
||||||
from py7zlib import Archive7z
|
from py7zlib import Archive7z
|
||||||
except:
|
except:
|
||||||
print "RECFILTERROR HELPERNOTFOUND python:pylzma"
|
print("RECFILTERROR HELPERNOTFOUND python:pylzma")
|
||||||
sys.exit(1);
|
sys.exit(1);
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -40,19 +40,17 @@ class SevenZipExtractor:
|
|||||||
|
|
||||||
def extractone(self, ipath):
|
def extractone(self, ipath):
|
||||||
#self.em.rclog("extractone: [%s]" % ipath)
|
#self.em.rclog("extractone: [%s]" % ipath)
|
||||||
docdata = ""
|
docdata = b''
|
||||||
try:
|
try:
|
||||||
docdata = self.sevenzip.getmember(ipath).read()
|
docdata = self.sevenzip.getmember(ipath).read()
|
||||||
ok = True
|
ok = True
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
self.em.rclog("extractone: failed: [%s]" % err)
|
self.em.rclog("extractone: failed: [%s]" % err)
|
||||||
ok = False
|
ok = False
|
||||||
iseof = rclexecm.RclExecM.noteof
|
iseof = rclexecm.RclExecM.noteof
|
||||||
if self.currentindex >= len(self.sevenzip.getnames()) -1:
|
if self.currentindex >= len(self.sevenzip.getnames()) -1:
|
||||||
iseof = rclexecm.RclExecM.eofnext
|
iseof = rclexecm.RclExecM.eofnext
|
||||||
if isinstance(ipath, unicode):
|
return (ok, docdata, rclexecm.makebytes(ipath), iseof)
|
||||||
ipath = ipath.encode("utf-8")
|
|
||||||
return (ok, docdata, ipath, iseof)
|
|
||||||
|
|
||||||
###### File type handler api, used by rclexecm ---------->
|
###### File type handler api, used by rclexecm ---------->
|
||||||
def openfile(self, params):
|
def openfile(self, params):
|
||||||
@ -71,7 +69,7 @@ class SevenZipExtractor:
|
|||||||
fp = open(filename, 'rb')
|
fp = open(filename, 'rb')
|
||||||
self.sevenzip = Archive7z(fp)
|
self.sevenzip = Archive7z(fp)
|
||||||
return True
|
return True
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
self.em.rclog("openfile: failed: [%s]" % err)
|
self.em.rclog("openfile: failed: [%s]" % err)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -84,7 +82,7 @@ class SevenZipExtractor:
|
|||||||
try:
|
try:
|
||||||
ipath = ipath.decode("utf-8")
|
ipath = ipath.decode("utf-8")
|
||||||
return self.extractone(ipath)
|
return self.extractone(ipath)
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
return (ok, data, ipath, eof)
|
return (ok, data, ipath, eof)
|
||||||
|
|
||||||
def getnext(self, params):
|
def getnext(self, params):
|
||||||
|
|||||||
@ -12,7 +12,7 @@ try:
|
|||||||
from mutagen.flac import FLAC
|
from mutagen.flac import FLAC
|
||||||
from mutagen.oggvorbis import OggVorbis
|
from mutagen.oggvorbis import OggVorbis
|
||||||
except:
|
except:
|
||||||
print "RECFILTERROR HELPERNOTFOUND python:mutagen"
|
print("RECFILTERROR HELPERNOTFOUND python:mutagen")
|
||||||
sys.exit(1);
|
sys.exit(1);
|
||||||
|
|
||||||
# prototype for the html document we're returning
|
# prototype for the html document we're returning
|
||||||
@ -42,23 +42,24 @@ class AudioTagExtractor:
|
|||||||
#self.em.rclog("extractone %s %s" % (params["filename:"], params["mimetype:"]))
|
#self.em.rclog("extractone %s %s" % (params["filename:"], params["mimetype:"]))
|
||||||
docdata = ""
|
docdata = ""
|
||||||
ok = False
|
ok = False
|
||||||
if not params.has_key("mimetype:") or not params.has_key("filename:"):
|
if not "mimetype:" in params or not "filename:" in params:
|
||||||
self.em.rclog("extractone: no mime or file name")
|
self.em.rclog("extractone: no mime or file name")
|
||||||
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
|
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
|
||||||
filename = params["filename:"]
|
filename = params["filename:"]
|
||||||
mimetype = params["mimetype:"]
|
mimetype = params["mimetype:"]
|
||||||
try:
|
try:
|
||||||
if mimetype == "audio/mpeg":
|
if mimetype == b'audio/mpeg':
|
||||||
tags = MP3(filename, ID3=EasyID3)
|
tags = MP3(filename, ID3=EasyID3)
|
||||||
elif mimetype == "application/ogg":
|
elif mimetype == b'application/ogg' or \
|
||||||
|
mimetype == b'audio/x-vorbis+ogg':
|
||||||
tags = OggVorbis(filename)
|
tags = OggVorbis(filename)
|
||||||
elif mimetype == "application/x-flac" or \
|
elif mimetype == b'application/x-flac' or \
|
||||||
mimetype == "audio/x-flac" or \
|
mimetype == 'audio/x-flac' or \
|
||||||
mimetype == "audio/flac":
|
mimetype == b'audio/flac':
|
||||||
tags = FLAC(filename)
|
tags = FLAC(filename)
|
||||||
else:
|
else:
|
||||||
raise Exception, "Bad mime type %s" % mimetype
|
raise Exception("Bad mime type %s" % mimetype)
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
self.em.rclog("extractone: extract failed: [%s]" % err)
|
self.em.rclog("extractone: extract failed: [%s]" % err)
|
||||||
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
|
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
@ -66,21 +67,22 @@ class AudioTagExtractor:
|
|||||||
artist = ""
|
artist = ""
|
||||||
title = ""
|
title = ""
|
||||||
try:
|
try:
|
||||||
album = self.em.htmlescape(tags["album"][0].encode("utf-8"))
|
album = self.em.htmlescape(tags["album"][0])
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
try:
|
try:
|
||||||
artist = self.em.htmlescape(tags["artist"][0].encode("utf-8"))
|
artist = self.em.htmlescape(tags["artist"][0])
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
try:
|
try:
|
||||||
title = self.em.htmlescape(tags["title"][0].encode("utf-8"))
|
title = self.em.htmlescape(tags["title"][0])
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
self.em.setmimetype("text/html")
|
self.em.setmimetype("text/html")
|
||||||
alldata = self.em.htmlescape(tags.pprint().encode("utf-8"))
|
alldata = self.em.htmlescape(tags.pprint())
|
||||||
alldata = alldata.replace("\n", "<br>")
|
alldata = alldata.replace("\n", "<br>")
|
||||||
docdata = htmltemplate % (album, artist, title, alldata)
|
docdata = (htmltemplate % (album, artist, title, alldata))\
|
||||||
|
.encode('UTF-8')
|
||||||
ok = True
|
ok = True
|
||||||
return (ok, docdata, "", rclexecm.RclExecM.eofnext)
|
return (ok, docdata, "", rclexecm.RclExecM.eofnext)
|
||||||
|
|
||||||
|
|||||||
@ -2,6 +2,11 @@
|
|||||||
"""Extract Html files from a Microsoft Compiled Html Help file (.chm)
|
"""Extract Html files from a Microsoft Compiled Html Help file (.chm)
|
||||||
Needs at least python 2.2 for HTMLParser (chmlib needs 2.2 too)"""
|
Needs at least python 2.2 for HTMLParser (chmlib needs 2.2 too)"""
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
# Note: this is not converted to python3, libchm does not have a
|
||||||
|
# python3 wrapper at this point (2015-11)
|
||||||
|
|
||||||
# Do we return individual chapters as html pages or concatenate everything?
|
# Do we return individual chapters as html pages or concatenate everything?
|
||||||
rclchm_catenate = 0
|
rclchm_catenate = 0
|
||||||
# Use special html type to allow for mimeconf/mimeview Open magic,
|
# Use special html type to allow for mimeconf/mimeview Open magic,
|
||||||
@ -23,13 +28,13 @@ import rclexecm
|
|||||||
try:
|
try:
|
||||||
from chm import chm,chmlib
|
from chm import chm,chmlib
|
||||||
except:
|
except:
|
||||||
print "RECFILTERROR HELPERNOTFOUND python:chm"
|
print("RECFILTERROR HELPERNOTFOUND python:chm")
|
||||||
sys.exit(1);
|
sys.exit(1);
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from HTMLParser import HTMLParser
|
from HTMLParser import HTMLParser
|
||||||
except:
|
except:
|
||||||
print "RECFILTERROR HELPERNOTFOUND python:HTMLParser"
|
print("RECFILTERROR HELPERNOTFOUND python:HTMLParser")
|
||||||
sys.exit(1);
|
sys.exit(1);
|
||||||
|
|
||||||
# Small helper routines
|
# Small helper routines
|
||||||
@ -37,11 +42,11 @@ def getfile(chmfile, path):
|
|||||||
"""Extract internal file text from chm object, given path"""
|
"""Extract internal file text from chm object, given path"""
|
||||||
res, ui = chmfile.ResolveObject(path)
|
res, ui = chmfile.ResolveObject(path)
|
||||||
if res != chmlib.CHM_RESOLVE_SUCCESS:
|
if res != chmlib.CHM_RESOLVE_SUCCESS:
|
||||||
#print "ResolveObject failed", path
|
#print("ResolveObject failed: %s" % path, file=sys.stderr)
|
||||||
return ""
|
return ""
|
||||||
res, doc = chmfile.RetrieveObject(ui)
|
res, doc = chmfile.RetrieveObject(ui)
|
||||||
if not res:
|
if not res:
|
||||||
print "RetrieveObject failed", path
|
print("RetrieveObject failed: %s" % path, file=sys.stderr)
|
||||||
return ""
|
return ""
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
# dia (http://live.gnome.org/Dia) file filter for recoll
|
# dia (http://live.gnome.org/Dia) file filter for recoll
|
||||||
# stefan.friedel@iwr.uni-heidelberg.de 2012
|
# stefan.friedel@iwr.uni-heidelberg.de 2012
|
||||||
#
|
#
|
||||||
@ -66,7 +68,7 @@ class DiaExtractor:
|
|||||||
try:
|
try:
|
||||||
docdata = self.ExtractDiaText()
|
docdata = self.ExtractDiaText()
|
||||||
ok = True
|
ok = True
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
ok = False
|
ok = False
|
||||||
iseof = rclexecm.RclExecM.eofnext
|
iseof = rclexecm.RclExecM.eofnext
|
||||||
self.em.setmimetype("text/plain")
|
self.em.setmimetype("text/plain")
|
||||||
@ -76,7 +78,7 @@ class DiaExtractor:
|
|||||||
def openfile(self, params):
|
def openfile(self, params):
|
||||||
try:
|
try:
|
||||||
self.dia = GzipFile(params["filename:"], 'r')
|
self.dia = GzipFile(params["filename:"], 'r')
|
||||||
# Dial files are sometimes not compressed. Quite weirdly,
|
# Dia files are sometimes not compressed. Quite weirdly,
|
||||||
# GzipFile does not complain until we try to read. Have to do it
|
# GzipFile does not complain until we try to read. Have to do it
|
||||||
# here to be able to retry an uncompressed open.
|
# here to be able to retry an uncompressed open.
|
||||||
data = self.dia.readline()
|
data = self.dia.readline()
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
import rclexec1
|
import rclexec1
|
||||||
@ -11,32 +12,32 @@ import os
|
|||||||
class WordProcessData:
|
class WordProcessData:
|
||||||
def __init__(self, em):
|
def __init__(self, em):
|
||||||
self.em = em
|
self.em = em
|
||||||
self.out = ""
|
self.out = b''
|
||||||
self.cont = ""
|
self.cont = b''
|
||||||
self.gotdata = False
|
self.gotdata = False
|
||||||
# Line with continued word (ending in -)
|
# Line with continued word (ending in -)
|
||||||
# we strip the - which is not nice for actually hyphenated word.
|
# we strip the - which is not nice for actually hyphenated word.
|
||||||
# What to do ?
|
# What to do ?
|
||||||
self.patcont = re.compile('''[\w][-]$''')
|
self.patcont = re.compile(b'''[\w][-]$''')
|
||||||
# Pattern for breaking continuation at last word start
|
# Pattern for breaking continuation at last word start
|
||||||
self.patws = re.compile('''([\s])([\w]+)(-)$''')
|
self.patws = re.compile(b'''([\s])([\w]+)(-)$''')
|
||||||
|
|
||||||
def takeLine(self, line):
|
def takeLine(self, line):
|
||||||
if not self.gotdata:
|
if not self.gotdata:
|
||||||
if line == "":
|
if line == b'':
|
||||||
return
|
return
|
||||||
self.out = '<html><head><title></title>' + \
|
self.out = b'<html><head><title></title>' + \
|
||||||
'<meta http-equiv="Content-Type"' + \
|
b'<meta http-equiv="Content-Type"' + \
|
||||||
'content="text/html;charset=UTF-8">' + \
|
b'content="text/html;charset=UTF-8">' + \
|
||||||
'</head><body><p>'
|
b'</head><body><p>'
|
||||||
self.gotdata = True
|
self.gotdata = True
|
||||||
|
|
||||||
if self.cont:
|
if self.cont:
|
||||||
line = self.cont + line
|
line = self.cont + line
|
||||||
self.cont = ""
|
self.cont = ""
|
||||||
|
|
||||||
if line == "\f":
|
if line == b'\f':
|
||||||
self.out += "</p><hr><p>"
|
self.out += '</p><hr><p>'
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.patcont.search(line):
|
if self.patcont.search(line):
|
||||||
@ -47,16 +48,16 @@ class WordProcessData:
|
|||||||
line = line[0:match.start(1)]
|
line = line[0:match.start(1)]
|
||||||
else:
|
else:
|
||||||
self.cont = line
|
self.cont = line
|
||||||
line = ""
|
line = b''
|
||||||
|
|
||||||
if line:
|
if line:
|
||||||
self.out += self.em.htmlescape(line) + "<br>"
|
self.out += self.em.htmlescape(line) + b'<br>'
|
||||||
else:
|
else:
|
||||||
self.out += "<br>"
|
self.out += b'<br>'
|
||||||
|
|
||||||
def wrapData(self):
|
def wrapData(self):
|
||||||
if self.gotdata:
|
if self.gotdata:
|
||||||
self.out += "</p></body></html>"
|
self.out += b'</p></body></html>'
|
||||||
self.em.setmimetype("text/html")
|
self.em.setmimetype("text/html")
|
||||||
return self.out
|
return self.out
|
||||||
|
|
||||||
@ -65,7 +66,7 @@ class WordProcessData:
|
|||||||
# output HTML
|
# output HTML
|
||||||
class WordPassData:
|
class WordPassData:
|
||||||
def __init__(self, em):
|
def __init__(self, em):
|
||||||
self.out = ""
|
self.out = b''
|
||||||
self.em = em
|
self.em = em
|
||||||
|
|
||||||
def takeLine(self, line):
|
def takeLine(self, line):
|
||||||
@ -96,8 +97,8 @@ class WordFilter:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def mimetype(self, fn):
|
def mimetype(self, fn):
|
||||||
rtfprolog ="{\\rtf1"
|
rtfprolog = b'{\\rtf1'
|
||||||
docprolog = b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1"
|
docprolog = b'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1'
|
||||||
try:
|
try:
|
||||||
f = open(fn, "rb")
|
f = open(fn, "rb")
|
||||||
except:
|
except:
|
||||||
@ -132,7 +133,7 @@ class WordFilter:
|
|||||||
mt = self.mimetype(fn)
|
mt = self.mimetype(fn)
|
||||||
self.em.rclog("rcldoc.py: actual MIME type %s" % mt)
|
self.em.rclog("rcldoc.py: actual MIME type %s" % mt)
|
||||||
if mt == "text/plain":
|
if mt == "text/plain":
|
||||||
return ([python, os.path.join(self.execdir, "rcltext.py")],
|
return (["python", os.path.join(self.execdir, "rcltext.py")],
|
||||||
WordPassData(self.em))
|
WordPassData(self.em))
|
||||||
elif mt == "text/rtf":
|
elif mt == "text/rtf":
|
||||||
cmd = ["python", os.path.join(self.execdir, "rclrtf.py"),
|
cmd = ["python", os.path.join(self.execdir, "rclrtf.py"),
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
"""Extract Html content from an EPUB file (.chm)"""
|
"""Extract Html content from an EPUB file (.chm)"""
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
rclepub_html_mtype = "text/html"
|
rclepub_html_mtype = "text/html"
|
||||||
|
|
||||||
@ -12,7 +13,7 @@ import rclexecm
|
|||||||
try:
|
try:
|
||||||
import epub
|
import epub
|
||||||
except:
|
except:
|
||||||
print "RECFILTERROR HELPERNOTFOUND python:epub"
|
print("RECFILTERROR HELPERNOTFOUND python:epub")
|
||||||
sys.exit(1);
|
sys.exit(1);
|
||||||
|
|
||||||
class rclEPUB:
|
class rclEPUB:
|
||||||
@ -63,11 +64,11 @@ class rclEPUB:
|
|||||||
if item is None:
|
if item is None:
|
||||||
raise Exception("Item not found for id %s" % (id,))
|
raise Exception("Item not found for id %s" % (id,))
|
||||||
doc = self.book.read_item(item)
|
doc = self.book.read_item(item)
|
||||||
doc = re.sub('''</[hH][eE][aA][dD]>''',
|
doc = re.sub(b'''</[hH][eE][aA][dD]>''',
|
||||||
'''<meta name="rclaptg" content="epub"></head>''', doc)
|
b'''<meta name="rclaptg" content="epub"></head>''', doc)
|
||||||
self.em.setmimetype(rclepub_html_mtype)
|
self.em.setmimetype(rclepub_html_mtype)
|
||||||
return (True, doc, id, iseof)
|
return (True, doc, id, iseof)
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
self.em.rclog("extractone: failed: [%s]" % err)
|
self.em.rclog("extractone: failed: [%s]" % err)
|
||||||
return (False, "", id, iseof)
|
return (False, "", id, iseof)
|
||||||
|
|
||||||
@ -76,11 +77,11 @@ class rclEPUB:
|
|||||||
self.currentindex = -1
|
self.currentindex = -1
|
||||||
self.contents = []
|
self.contents = []
|
||||||
try:
|
try:
|
||||||
self.book = epub.open(params["filename:"])
|
self.book = epub.open_epub(params["filename:"].decode('UTF-8'))
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
self.em.rclog("openfile: epub.open failed: [%s]" % err)
|
self.em.rclog("openfile: epub.open failed: [%s]" % err)
|
||||||
return False
|
return False
|
||||||
for id, item in self.book.opf.manifest.iteritems():
|
for id, item in self.book.opf.manifest.items():
|
||||||
if item.media_type == 'application/xhtml+xml':
|
if item.media_type == 'application/xhtml+xml':
|
||||||
self.contents.append(id)
|
self.contents.append(id)
|
||||||
return True
|
return True
|
||||||
|
|||||||
@ -26,6 +26,8 @@
|
|||||||
# this would be to slow. So this helps implementing a permanent script
|
# this would be to slow. So this helps implementing a permanent script
|
||||||
# to repeatedly execute single commands.
|
# to repeatedly execute single commands.
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import rclexecm
|
import rclexecm
|
||||||
|
|
||||||
@ -74,8 +76,8 @@ class Executor:
|
|||||||
# params["mimetype:"]))
|
# params["mimetype:"]))
|
||||||
self.flt.reset()
|
self.flt.reset()
|
||||||
ok = False
|
ok = False
|
||||||
if not params.has_key("filename:"):
|
if not "filename:" in params:
|
||||||
self.em.rclog("extractone: no mime or file name")
|
self.em.rclog("extractone: no file name")
|
||||||
return (ok, "", "", rclexecm.RclExecM.eofnow)
|
return (ok, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
fn = params["filename:"]
|
fn = params["filename:"]
|
||||||
|
|||||||
@ -16,6 +16,9 @@
|
|||||||
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
########################################################
|
########################################################
|
||||||
## Recoll multifilter communication module and utilities
|
## Recoll multifilter communication module and utilities
|
||||||
|
#
|
||||||
|
# All data is binary. This is important for Python3
|
||||||
|
# All parameter names are converted to and processed as str/unicode
|
||||||
|
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
@ -26,6 +29,21 @@ import shutil
|
|||||||
import getopt
|
import getopt
|
||||||
import rclconfig
|
import rclconfig
|
||||||
|
|
||||||
|
PY3 = sys.version > '3'
|
||||||
|
|
||||||
|
if PY3:
|
||||||
|
def makebytes(data):
|
||||||
|
if isinstance(data, bytes):
|
||||||
|
return data
|
||||||
|
else:
|
||||||
|
return data.encode("UTF-8")
|
||||||
|
else:
|
||||||
|
def makebytes(data):
|
||||||
|
if isinstance(data, unicode):
|
||||||
|
return data.encode("UTF-8")
|
||||||
|
else:
|
||||||
|
return data
|
||||||
|
|
||||||
my_config = rclconfig.RclConfig()
|
my_config = rclconfig.RclConfig()
|
||||||
|
|
||||||
############################################
|
############################################
|
||||||
@ -33,7 +51,7 @@ my_config = rclconfig.RclConfig()
|
|||||||
# communication protocol with the recollindex process. It calls the
|
# communication protocol with the recollindex process. It calls the
|
||||||
# object specific of the document type to actually get the data.
|
# object specific of the document type to actually get the data.
|
||||||
class RclExecM:
|
class RclExecM:
|
||||||
noteof = 0
|
noteof = 0
|
||||||
eofnext = 1
|
eofnext = 1
|
||||||
eofnow = 2
|
eofnow = 2
|
||||||
|
|
||||||
@ -46,7 +64,7 @@ class RclExecM:
|
|||||||
self.myname = os.path.basename(sys.argv[0])
|
self.myname = os.path.basename(sys.argv[0])
|
||||||
except:
|
except:
|
||||||
self.myname = "???"
|
self.myname = "???"
|
||||||
self.mimetype = ""
|
self.mimetype = b""
|
||||||
|
|
||||||
if os.environ.get("RECOLL_FILTER_MAXMEMBERKB"):
|
if os.environ.get("RECOLL_FILTER_MAXMEMBERKB"):
|
||||||
self.maxmembersize = \
|
self.maxmembersize = \
|
||||||
@ -60,7 +78,7 @@ class RclExecM:
|
|||||||
msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
|
msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
|
||||||
self.debugfile = None
|
self.debugfile = None
|
||||||
if self.debugfile:
|
if self.debugfile:
|
||||||
self.errfout = open(self.debugfile, "ab")
|
self.errfout = open(self.debugfile, "a")
|
||||||
else:
|
else:
|
||||||
self.errfout = sys.stderr
|
self.errfout = sys.stderr
|
||||||
|
|
||||||
@ -93,77 +111,84 @@ class RclExecM:
|
|||||||
# Note: tried replacing this with a multiple replacer according to
|
# Note: tried replacing this with a multiple replacer according to
|
||||||
# http://stackoverflow.com/a/15221068, which was **10 times** slower
|
# http://stackoverflow.com/a/15221068, which was **10 times** slower
|
||||||
def htmlescape(self, txt):
|
def htmlescape(self, txt):
|
||||||
# This must stay first (it somehow had managed to skip after
|
# & must stay first (it somehow had managed to skip
|
||||||
# the next line, with rather interesting results)
|
# after the next replace, with rather interesting results)
|
||||||
txt = txt.replace("&", "&")
|
try:
|
||||||
|
txt = txt.replace(b'&', b'&').replace(b'<', b'<').\
|
||||||
txt = txt.replace("<", "<")
|
replace(b'>', b'>').replace(b'"', b'"')
|
||||||
txt = txt.replace(">", ">")
|
except:
|
||||||
txt = txt.replace('"', """)
|
txt = txt.replace("&", "&").replace("<", "<").\
|
||||||
|
replace(">", ">").replace("\"", """)
|
||||||
return txt
|
return txt
|
||||||
|
|
||||||
# Our worker sometimes knows the mime types of the data it sends
|
# Our worker sometimes knows the mime types of the data it sends
|
||||||
def setmimetype(self, mt):
|
def setmimetype(self, mt):
|
||||||
self.mimetype = mt
|
self.mimetype = makebytes(mt)
|
||||||
|
|
||||||
# Read single parameter from process input: line with param name and size
|
# Read single parameter from process input: line with param name and size
|
||||||
# followed by data.
|
# followed by data. The param name is returned as str/unicode, the data
|
||||||
|
# as bytes
|
||||||
def readparam(self):
|
def readparam(self):
|
||||||
s = sys.stdin.readline()
|
if PY3:
|
||||||
if s == '':
|
inf = sys.stdin.buffer
|
||||||
|
else:
|
||||||
|
inf = sys.stdin
|
||||||
|
s = inf.readline()
|
||||||
|
if s == b'':
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
# self.rclog(": EOF on input", 1, 0)
|
|
||||||
|
|
||||||
s = s.rstrip("\n")
|
s = s.rstrip(b'\n')
|
||||||
|
|
||||||
if s == "":
|
if s == b'':
|
||||||
return ("","")
|
return ('', b'')
|
||||||
l = s.split()
|
l = s.split()
|
||||||
if len(l) != 2:
|
if len(l) != 2:
|
||||||
self.rclog("bad line: [" + s + "]", 1, 1)
|
self.rclog(b'bad line: [' + s + b']', 1, 1)
|
||||||
|
|
||||||
paramname = l[0].lower()
|
paramname = l[0].decode('ASCII').lower()
|
||||||
paramsize = int(l[1])
|
paramsize = int(l[1])
|
||||||
if paramsize > 0:
|
if paramsize > 0:
|
||||||
paramdata = sys.stdin.read(paramsize)
|
paramdata = inf.read(paramsize)
|
||||||
if len(paramdata) != paramsize:
|
if len(paramdata) != paramsize:
|
||||||
self.rclog("Bad read: wanted %d, got %d" %
|
self.rclog("Bad read: wanted %d, got %d" %
|
||||||
(paramsize, len(paramdata)), 1,1)
|
(paramsize, len(paramdata)), 1, 1)
|
||||||
else:
|
else:
|
||||||
paramdata = ""
|
paramdata = b''
|
||||||
|
|
||||||
#self.rclog("paramname [%s] paramsize %d value [%s]" %
|
#self.rclog("paramname [%s] paramsize %d value [%s]" %
|
||||||
# (paramname, paramsize, paramdata))
|
# (paramname, paramsize, paramdata))
|
||||||
return (paramname, paramdata)
|
return (paramname, paramdata)
|
||||||
|
|
||||||
|
if PY3:
|
||||||
|
def senditem(self, nm, len, data):
|
||||||
|
sys.stdout.buffer.write(makebytes("%s: %d\n" % (nm, len)))
|
||||||
|
self.breakwrite(sys.stdout.buffer, makebytes(data))
|
||||||
|
else:
|
||||||
|
def senditem(self, nm, len, data):
|
||||||
|
sys.stdout.write(makebytes("%s: %d\n" % (nm, len)))
|
||||||
|
self.breakwrite(sys.stdout, makebytes(data))
|
||||||
|
|
||||||
# Send answer: document, ipath, possible eof.
|
# Send answer: document, ipath, possible eof.
|
||||||
def answer(self, docdata, ipath, iseof = noteof, iserror = noerror):
|
def answer(self, docdata, ipath, iseof = noteof, iserror = noerror):
|
||||||
|
|
||||||
if iserror != RclExecM.fileerror and iseof != RclExecM.eofnow:
|
if iserror != RclExecM.fileerror and iseof != RclExecM.eofnow:
|
||||||
if isinstance(docdata, unicode):
|
self.senditem("Document", len(docdata), docdata)
|
||||||
self.rclog("GOT UNICODE for ipath [%s]" % (ipath,))
|
|
||||||
docdata = docdata.encode("UTF-8")
|
|
||||||
|
|
||||||
print("Document: %d" % len(docdata))
|
|
||||||
self.breakwrite(sys.stdout, docdata)
|
|
||||||
|
|
||||||
if len(ipath):
|
if len(ipath):
|
||||||
print("Ipath: %d" % len(ipath))
|
self.senditem("Ipath", len(ipath), ipath)
|
||||||
sys.stdout.write(ipath)
|
|
||||||
|
|
||||||
if len(self.mimetype):
|
if len(self.mimetype):
|
||||||
print("Mimetype: %d" % len(self.mimetype))
|
self.senditem("Mimetype", len(self.mimetype), self.mimetype)
|
||||||
sys.stdout.write(self.mimetype)
|
|
||||||
|
|
||||||
# If we're at the end of the contents, say so
|
# If we're at the end of the contents, say so
|
||||||
if iseof == RclExecM.eofnow:
|
if iseof == RclExecM.eofnow:
|
||||||
print("Eofnow: 0")
|
self.senditem("Eofnow", 0, b'')
|
||||||
elif iseof == RclExecM.eofnext:
|
elif iseof == RclExecM.eofnext:
|
||||||
print("Eofnext: 0")
|
self.senditem("Eofnext", 0, b'')
|
||||||
if iserror == RclExecM.subdocerror:
|
if iserror == RclExecM.subdocerror:
|
||||||
print("Subdocerror: 0")
|
self.senditem("Subdocerror", 0, b'')
|
||||||
elif iserror == RclExecM.fileerror:
|
elif iserror == RclExecM.fileerror:
|
||||||
print("Fileerror: 0")
|
self.senditem("Fileerror", 0, b'')
|
||||||
|
|
||||||
# End of message
|
# End of message
|
||||||
print()
|
print()
|
||||||
@ -173,7 +198,8 @@ class RclExecM:
|
|||||||
def processmessage(self, processor, params):
|
def processmessage(self, processor, params):
|
||||||
|
|
||||||
# We must have a filename entry (even empty). Else exit
|
# We must have a filename entry (even empty). Else exit
|
||||||
if not params.has_key("filename:"):
|
if "filename:" not in params:
|
||||||
|
print("%s" % params, file=sys.stderr)
|
||||||
self.rclog("no filename ??", 1, 1)
|
self.rclog("no filename ??", 1, 1)
|
||||||
|
|
||||||
# If we're given a file name, open it.
|
# If we're given a file name, open it.
|
||||||
@ -182,7 +208,7 @@ class RclExecM:
|
|||||||
if not processor.openfile(params):
|
if not processor.openfile(params):
|
||||||
self.answer("", "", iserror = RclExecM.fileerror)
|
self.answer("", "", iserror = RclExecM.fileerror)
|
||||||
return
|
return
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
self.rclog("processmessage: openfile raised: [%s]" % err)
|
self.rclog("processmessage: openfile raised: [%s]" % err)
|
||||||
self.answer("", "", iserror = RclExecM.fileerror)
|
self.answer("", "", iserror = RclExecM.fileerror)
|
||||||
return
|
return
|
||||||
@ -192,11 +218,11 @@ class RclExecM:
|
|||||||
eof = True
|
eof = True
|
||||||
self.mimetype = ""
|
self.mimetype = ""
|
||||||
try:
|
try:
|
||||||
if params.has_key("ipath:") and len(params["ipath:"]):
|
if "ipath:" in params and len(params["ipath:"]):
|
||||||
ok, data, ipath, eof = processor.getipath(params)
|
ok, data, ipath, eof = processor.getipath(params)
|
||||||
else:
|
else:
|
||||||
ok, data, ipath, eof = processor.getnext(params)
|
ok, data, ipath, eof = processor.getnext(params)
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
self.answer("", "", eof, RclExecM.fileerror)
|
self.answer("", "", eof, RclExecM.fileerror)
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -311,7 +337,7 @@ def main(proto, extract):
|
|||||||
|
|
||||||
actAsSingle = False
|
actAsSingle = False
|
||||||
debugDumpData = False
|
debugDumpData = False
|
||||||
ipath = ""
|
ipath = b""
|
||||||
|
|
||||||
args = sys.argv[1:]
|
args = sys.argv[1:]
|
||||||
opts, args = getopt.getopt(args, "hdsi:w:")
|
opts, args = getopt.getopt(args, "hdsi:w:")
|
||||||
@ -321,7 +347,7 @@ def main(proto, extract):
|
|||||||
elif opt in ['-s']:
|
elif opt in ['-s']:
|
||||||
actAsSingle = True
|
actAsSingle = True
|
||||||
elif opt in ['-i']:
|
elif opt in ['-i']:
|
||||||
ipath = arg
|
ipath = makebytes(arg)
|
||||||
elif opt in ['-w']:
|
elif opt in ['-w']:
|
||||||
ret = which(arg)
|
ret = which(arg)
|
||||||
if ret:
|
if ret:
|
||||||
@ -344,17 +370,17 @@ def main(proto, extract):
|
|||||||
lst = fileout.split(':')
|
lst = fileout.split(':')
|
||||||
mimetype = lst[len(lst)-1].strip()
|
mimetype = lst[len(lst)-1].strip()
|
||||||
lst = mimetype.split(';')
|
lst = mimetype.split(';')
|
||||||
return lst[0].strip()
|
return makebytes(lst[0].strip())
|
||||||
|
|
||||||
def mimetype_with_xdg(f):
|
def mimetype_with_xdg(f):
|
||||||
cmd = 'xdg-mime query filetype "' + f + '"'
|
cmd = 'xdg-mime query filetype "' + f + '"'
|
||||||
return os.popen(cmd).read().strip()
|
return makebytes(os.popen(cmd).read().strip())
|
||||||
|
|
||||||
def debprint(s):
|
def debprint(out, s):
|
||||||
if not actAsSingle:
|
if not actAsSingle:
|
||||||
print(s)
|
proto.breakwrite(out, makebytes(s+'\n'))
|
||||||
|
|
||||||
params = {'filename:': args[0]}
|
params = {'filename:': makebytes(args[0])}
|
||||||
# Some filters (e.g. rclaudio) need/get a MIME type from the indexer
|
# Some filters (e.g. rclaudio) need/get a MIME type from the indexer
|
||||||
mimetype = mimetype_with_xdg(args[0])
|
mimetype = mimetype_with_xdg(args[0])
|
||||||
params['mimetype:'] = mimetype
|
params['mimetype:'] = mimetype
|
||||||
@ -363,19 +389,20 @@ def main(proto, extract):
|
|||||||
print("Open error", file=sys.stderr)
|
print("Open error", file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
if ipath != "" or actAsSingle:
|
if PY3:
|
||||||
|
ioout = sys.stdout.buffer
|
||||||
|
else:
|
||||||
|
ioout = sys.stdout
|
||||||
|
if ipath != b"" or actAsSingle:
|
||||||
params['ipath:'] = ipath
|
params['ipath:'] = ipath
|
||||||
ok, data, ipath, eof = extract.getipath(params)
|
ok, data, ipath, eof = extract.getipath(params)
|
||||||
if ok:
|
if ok:
|
||||||
debprint("== Found entry for ipath %s (mimetype [%s]):" % \
|
debprint(ioout, "== Found entry for ipath %s (mimetype [%s]):" % \
|
||||||
(ipath, proto.mimetype))
|
(ipath, proto.mimetype))
|
||||||
if isinstance(data, unicode):
|
bdata = makebytes(data)
|
||||||
bdata = data.encode("UTF-8")
|
|
||||||
else:
|
|
||||||
bdata = data
|
|
||||||
if debugDumpData or actAsSingle:
|
if debugDumpData or actAsSingle:
|
||||||
proto.breakwrite(sys.stdout, bdata)
|
proto.breakwrite(ioout, bdata)
|
||||||
print()
|
ioout.write(b'\n')
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
else:
|
else:
|
||||||
print("Got error, eof %d"%eof, file=sys.stderr)
|
print("Got error, eof %d"%eof, file=sys.stderr)
|
||||||
@ -386,15 +413,12 @@ def main(proto, extract):
|
|||||||
ok, data, ipath, eof = extract.getnext(params)
|
ok, data, ipath, eof = extract.getnext(params)
|
||||||
if ok:
|
if ok:
|
||||||
ecnt = ecnt + 1
|
ecnt = ecnt + 1
|
||||||
debprint("== Entry %d ipath %s (mimetype [%s]):" % \
|
bdata = makebytes(data)
|
||||||
(ecnt, ipath, proto.mimetype))
|
debprint(ioout, "== Entry %d dlen %d ipath %s (mimetype [%s]):" % \
|
||||||
if isinstance(data, unicode):
|
(ecnt, len(data), ipath, proto.mimetype))
|
||||||
bdata = data.encode("UTF-8")
|
|
||||||
else:
|
|
||||||
bdata = data
|
|
||||||
if debugDumpData:
|
if debugDumpData:
|
||||||
proto.breakwrite(sys.stdout, bdata)
|
proto.breakwrite(ioout, bdata)
|
||||||
print()
|
ioout.write(b'\n')
|
||||||
if eof != RclExecM.noteof:
|
if eof != RclExecM.noteof:
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
# Read an ICS file, break it into "documents" which are events, todos,
|
# Read an ICS file, break it into "documents" which are events, todos,
|
||||||
# or journal entries, and interface with recoll execm
|
# or journal entries, and interface with recoll execm
|
||||||
@ -13,36 +14,36 @@ import rclexecm
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
# Decide how we'll process the file.
|
# Decide how we'll process the file.
|
||||||
modules = ('internal', 'icalendar', 'vobject')
|
modules = ("internal", "icalendar", "vobject")
|
||||||
usemodule = 'internal'
|
usemodule = "internal"
|
||||||
forcevobject = 0
|
forcevobject = 0
|
||||||
if usemodule != 'internal':
|
if usemodule != "internal":
|
||||||
try:
|
try:
|
||||||
if forcevobject:
|
if forcevobject:
|
||||||
raise Exception
|
raise Exception
|
||||||
from icalendar import Calendar, Event
|
from icalendar import Calendar, Event
|
||||||
usemodule = 'icalendar'
|
usemodule = "icalendar"
|
||||||
except:
|
except:
|
||||||
try:
|
try:
|
||||||
import vobject
|
import vobject
|
||||||
usemodule = 'vobject'
|
usemodule = "vobject"
|
||||||
except:
|
except:
|
||||||
print "RECFILTERROR HELPERNOTFOUND python:icalendar"
|
print("RECFILTERROR HELPERNOTFOUND python:icalendar")
|
||||||
print "RECFILTERROR HELPERNOTFOUND python:vobject"
|
print("RECFILTERROR HELPERNOTFOUND python:vobject")
|
||||||
sys.exit(1);
|
sys.exit(1);
|
||||||
|
|
||||||
|
|
||||||
class IcalExtractor:
|
class IcalExtractor:
|
||||||
def __init__(self, em):
|
def __init__(self, em):
|
||||||
self.file = ""
|
self.file = ""
|
||||||
self.contents = []
|
self.contents = []
|
||||||
self.em = em
|
self.em = em
|
||||||
|
|
||||||
def extractone(self, index):
|
def extractone(self, index):
|
||||||
if index >= len(self.contents):
|
if index >= len(self.contents):
|
||||||
return(False, "", "", True)
|
return(False, "", "", True)
|
||||||
docdata = self.contents[index]
|
docdata = self.contents[index]
|
||||||
#self.em.rclog(docdata)
|
#self.em.rclog(docdata)
|
||||||
|
|
||||||
iseof = rclexecm.RclExecM.noteof
|
iseof = rclexecm.RclExecM.noteof
|
||||||
if self.currentindex >= len(self.contents) -1:
|
if self.currentindex >= len(self.contents) -1:
|
||||||
@ -55,32 +56,32 @@ class IcalExtractor:
|
|||||||
self.file = params["filename:"]
|
self.file = params["filename:"]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
calstr = open(self.file, 'rb')
|
calstr = open(self.file, "rb")
|
||||||
except Exception, e:
|
except Exception as e:
|
||||||
self.em.rclog("Openfile: open: %s" % str(e))
|
self.em.rclog("Openfile: open: %s" % str(e))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
self.currentindex = -1
|
self.currentindex = -1
|
||||||
|
|
||||||
if usemodule == 'internal':
|
if usemodule == "internal":
|
||||||
self.contents = ICalSimpleSplitter().splitcalendar(calstr)
|
self.contents = ICalSimpleSplitter().splitcalendar(calstr)
|
||||||
elif usemodule == 'icalendar':
|
elif usemodule == "icalendar":
|
||||||
try:
|
try:
|
||||||
cal = Calendar.from_string(calstr.read())
|
cal = Calendar.from_string(calstr.read())
|
||||||
except Exception, e:
|
except Exception as e:
|
||||||
self.em.rclog("Openfile: read or parse error: %s" % str(e))
|
self.em.rclog("Openfile: read or parse error: %s" % str(e))
|
||||||
return False
|
return False
|
||||||
self.contents = cal.walk()
|
self.contents = cal.walk()
|
||||||
self.contents = [item.as_string() for item in self.contents
|
self.contents = [item.as_string() for item in self.contents
|
||||||
if (item.name == 'VEVENT' or item.name == 'VTODO'
|
if (item.name == "VEVENT" or item.name == "VTODO"
|
||||||
or item.name == 'VJOURNAL')]
|
or item.name == "VJOURNAL")]
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
cal = vobject.readOne(calstr)
|
cal = vobject.readOne(calstr)
|
||||||
except Exception, e:
|
except Exception as e:
|
||||||
self.em.rclog("Openfile: cant parse object: %s" % str(e))
|
self.em.rclog("Openfile: cant parse object: %s" % str(e))
|
||||||
return False
|
return False
|
||||||
for lstnm in ('vevent_list', 'vtodo_list', 'vjournal_list'):
|
for lstnm in ("vevent_list", "vtodo_list", "vjournal_list"):
|
||||||
lst = getattr(cal, lstnm, [])
|
lst = getattr(cal, lstnm, [])
|
||||||
for ev in lst:
|
for ev in lst:
|
||||||
self.contents.append(ev.serialize())
|
self.contents.append(ev.serialize())
|
||||||
@ -90,7 +91,10 @@ class IcalExtractor:
|
|||||||
|
|
||||||
def getipath(self, params):
|
def getipath(self, params):
|
||||||
try:
|
try:
|
||||||
index = int(params["ipath:"])
|
if params["ipath:"] == b'':
|
||||||
|
index = 0
|
||||||
|
else:
|
||||||
|
index = int(params["ipath:"])
|
||||||
except:
|
except:
|
||||||
return (False, "", "", True)
|
return (False, "", "", True)
|
||||||
return self.extractone(index)
|
return self.extractone(index)
|
||||||
@ -100,7 +104,7 @@ class IcalExtractor:
|
|||||||
if self.currentindex == -1:
|
if self.currentindex == -1:
|
||||||
# Return "self" doc
|
# Return "self" doc
|
||||||
self.currentindex = 0
|
self.currentindex = 0
|
||||||
self.em.setmimetype('text/plain')
|
self.em.setmimetype(b'text/plain')
|
||||||
if len(self.contents) == 0:
|
if len(self.contents) == 0:
|
||||||
eof = rclexecm.RclExecM.eofnext
|
eof = rclexecm.RclExecM.eofnext
|
||||||
else:
|
else:
|
||||||
@ -121,44 +125,44 @@ class ICalSimpleSplitter:
|
|||||||
# Note that if an 'interesting' element is nested inside another one,
|
# Note that if an 'interesting' element is nested inside another one,
|
||||||
# it will not be extracted (stay as text in external event). This is
|
# it will not be extracted (stay as text in external event). This is
|
||||||
# not an issue and I don't think it can happen with the current list
|
# not an issue and I don't think it can happen with the current list
|
||||||
interesting = ('VTODO', 'VEVENT', 'VJOURNAL')
|
interesting = (b'VTODO', b'VEVENT', b'VJOURNAL')
|
||||||
|
|
||||||
def splitcalendar(self, fin):
|
def splitcalendar(self, fin):
|
||||||
curblkname = ''
|
curblkname = b''
|
||||||
curblk = ''
|
curblk = b''
|
||||||
|
|
||||||
lo = []
|
lo = []
|
||||||
for line in fin:
|
for line in fin:
|
||||||
line = line.rstrip()
|
line = line.rstrip()
|
||||||
if line == '':
|
if line == b'':
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if curblkname:
|
if curblkname:
|
||||||
curblk = curblk + line + "\n"
|
curblk = curblk + line + b'\n'
|
||||||
|
|
||||||
l = line.split(":")
|
l = line.split(b':')
|
||||||
if len(l) < 2:
|
if len(l) < 2:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# If not currently inside a block and we see an
|
# If not currently inside a block and we see an
|
||||||
# 'interesting' BEGIN, start block
|
# 'interesting' BEGIN, start block
|
||||||
if curblkname == '' and l[0].upper() == "BEGIN" :
|
if curblkname == b'' and l[0].upper() == b'BEGIN':
|
||||||
name = l[1].upper()
|
name = l[1].upper()
|
||||||
if name in ICalSimpleSplitter.interesting:
|
if name in ICalSimpleSplitter.interesting:
|
||||||
curblkname = name
|
curblkname = name
|
||||||
curblk = curblk + line + "\n"
|
curblk = curblk + line + b'\n'
|
||||||
|
|
||||||
# If currently accumulating block lines, check for end
|
# If currently accumulating block lines, check for end
|
||||||
if curblkname and l[0].upper() == "END" and \
|
if curblkname and l[0].upper() == b'END' and \
|
||||||
l[1].upper() == curblkname:
|
l[1].upper() == curblkname:
|
||||||
lo.append(curblk)
|
lo.append(curblk)
|
||||||
curblkname = ''
|
curblkname = b''
|
||||||
curblk = ''
|
curblk = b''
|
||||||
|
|
||||||
if curblk:
|
if curblk:
|
||||||
lo.append(curblk)
|
lo.append(curblk)
|
||||||
curblkname = ''
|
curblkname = b''
|
||||||
curblk = ''
|
curblk = b''
|
||||||
|
|
||||||
return lo
|
return lo
|
||||||
|
|
||||||
|
|||||||
@ -1,11 +1,12 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
# Python-based Image Tag extractor for Recoll. This is less thorough than the
|
# Python-based Image Tag extractor for Recoll. This is less thorough
|
||||||
# Perl-based rclimg script, but useful if you don't want to have to install Perl
|
# than the Perl-based rclimg script, but useful if you don't want to
|
||||||
# (e.g. on Windows).
|
# have to install Perl (e.g. on Windows).
|
||||||
#
|
#
|
||||||
# Uses pyexiv2. Also tried Pillow, found it useless for tags.
|
# Uses pyexiv2. Also tried Pillow, found it useless for tags.
|
||||||
#
|
#
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
@ -15,7 +16,7 @@ import re
|
|||||||
try:
|
try:
|
||||||
import pyexiv2
|
import pyexiv2
|
||||||
except:
|
except:
|
||||||
print "RECFILTERROR HELPERNOTFOUND python:pyexiv2"
|
print("RECFILTERROR HELPERNOTFOUND python:pyexiv2")
|
||||||
sys.exit(1);
|
sys.exit(1);
|
||||||
|
|
||||||
khexre = re.compile('.*\.0[xX][0-9a-fA-F]+$')
|
khexre = re.compile('.*\.0[xX][0-9a-fA-F]+$')
|
||||||
@ -48,7 +49,7 @@ class ImgTagExtractor:
|
|||||||
def extractone(self, params):
|
def extractone(self, params):
|
||||||
#self.em.rclog("extractone %s" % params["filename:"])
|
#self.em.rclog("extractone %s" % params["filename:"])
|
||||||
ok = False
|
ok = False
|
||||||
if not params.has_key("filename:"):
|
if "filename:" not in params:
|
||||||
self.em.rclog("extractone: no file name")
|
self.em.rclog("extractone: no file name")
|
||||||
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
|
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
|
||||||
filename = params["filename:"]
|
filename = params["filename:"]
|
||||||
@ -62,11 +63,11 @@ class ImgTagExtractor:
|
|||||||
# we skip numeric keys and undecoded makernote data
|
# we skip numeric keys and undecoded makernote data
|
||||||
if k != 'Exif.Photo.MakerNote' and not khexre.match(k):
|
if k != 'Exif.Photo.MakerNote' and not khexre.match(k):
|
||||||
mdic[k] = str(metadata[k].raw_value)
|
mdic[k] = str(metadata[k].raw_value)
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
self.em.rclog("extractone: extract failed: [%s]" % err)
|
self.em.rclog("extractone: extract failed: [%s]" % err)
|
||||||
return (ok, "", "", rclexecm.RclExecM.eofnow)
|
return (ok, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
docdata = "<html><head>\n"
|
docdata = b'<html><head>\n'
|
||||||
|
|
||||||
ttdata = set()
|
ttdata = set()
|
||||||
for k in pyexiv2_titles:
|
for k in pyexiv2_titles:
|
||||||
@ -77,25 +78,28 @@ class ImgTagExtractor:
|
|||||||
for v in ttdata:
|
for v in ttdata:
|
||||||
v = v.replace('[', '').replace(']', '').replace("'", "")
|
v = v.replace('[', '').replace(']', '').replace("'", "")
|
||||||
title += v + " "
|
title += v + " "
|
||||||
docdata += '<title>' + title + '</title>\n'
|
docdata += rclexecm.makebytes("<title>" + title + "</title>\n")
|
||||||
|
|
||||||
for k in exiv2_dates:
|
for k in exiv2_dates:
|
||||||
if k in mdic:
|
if k in mdic:
|
||||||
# Recoll wants: %Y-%m-%d %H:%M:%S.
|
# Recoll wants: %Y-%m-%d %H:%M:%S.
|
||||||
# We get 2014:06:27 14:58:47
|
# We get 2014:06:27 14:58:47
|
||||||
dt = mdic[k].replace(':', '-', 2)
|
dt = mdic[k].replace(":", "-", 2)
|
||||||
docdata += '<meta name="date" content="' + dt + '">\n'
|
docdata += b'<meta name="date" content="' + \
|
||||||
|
rclexecm.makebytes(dt) + b'">\n'
|
||||||
break
|
break
|
||||||
|
|
||||||
for k,v in mdic.iteritems():
|
for k,v in mdic.items():
|
||||||
if k == 'Xmp.digiKam.TagsList':
|
if k == 'Xmp.digiKam.TagsList':
|
||||||
docdata += '<meta name="keywords" content="' + \
|
docdata += b'<meta name="keywords" content="' + \
|
||||||
self.em.htmlescape(mdic[k]) + '">\n'
|
rclexecm.makebytes(self.em.htmlescape(mdic[k])) + \
|
||||||
|
b'">\n'
|
||||||
|
|
||||||
docdata += "</head><body>\n"
|
docdata += b'</head><body>\n'
|
||||||
for k,v in mdic.iteritems():
|
for k,v in mdic.items():
|
||||||
docdata += k + " : " + self.em.htmlescape(mdic[k]) + "<br />\n"
|
docdata += rclexecm.makebytes(k + " : " + \
|
||||||
docdata += "</body></html>"
|
self.em.htmlescape(mdic[k]) + "<br />\n")
|
||||||
|
docdata += b'</body></html>'
|
||||||
|
|
||||||
self.em.setmimetype("text/html")
|
self.em.setmimetype("text/html")
|
||||||
|
|
||||||
|
|||||||
@ -3,6 +3,7 @@
|
|||||||
# Read a file in GNU info format and output its nodes as subdocs,
|
# Read a file in GNU info format and output its nodes as subdocs,
|
||||||
# interfacing with recoll execm
|
# interfacing with recoll execm
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
import sys
|
import sys
|
||||||
@ -16,24 +17,12 @@ import subprocess
|
|||||||
# Some info source docs contain charset info like:
|
# Some info source docs contain charset info like:
|
||||||
# @documentencoding ISO-2022-JP
|
# @documentencoding ISO-2022-JP
|
||||||
# But this seems to be absent from outputs.
|
# But this seems to be absent from outputs.
|
||||||
htmltemplate = '''
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<title>%s</title>
|
|
||||||
<meta name="rclaptg" content="gnuinfo">
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<pre style="white-space: pre-wrap">
|
|
||||||
%s
|
|
||||||
</pre></body>
|
|
||||||
</html>
|
|
||||||
'''
|
|
||||||
|
|
||||||
# RclExecm interface
|
# RclExecm interface
|
||||||
class InfoExtractor:
|
class InfoExtractor:
|
||||||
def __init__(self, em):
|
def __init__(self, em):
|
||||||
self.file = ""
|
self.file = ""
|
||||||
self.contents = []
|
self.contents = []
|
||||||
self.em = em
|
self.em = em
|
||||||
|
|
||||||
def extractone(self, index):
|
def extractone(self, index):
|
||||||
@ -43,8 +32,13 @@ class InfoExtractor:
|
|||||||
nodename, docdata = self.contents[index]
|
nodename, docdata = self.contents[index]
|
||||||
nodename = self.em.htmlescape(nodename)
|
nodename = self.em.htmlescape(nodename)
|
||||||
docdata = self.em.htmlescape(docdata)
|
docdata = self.em.htmlescape(docdata)
|
||||||
|
# strange whitespace to avoid changing the module tests (same as old)
|
||||||
docdata = htmltemplate % (nodename, docdata)
|
docdata = b'\n<html>\n <head>\n <title>' + nodename + \
|
||||||
|
b'</title>\n' + \
|
||||||
|
' <meta name="rclaptg" content="gnuinfo">\n' + \
|
||||||
|
b' </head>\n <body>\n' + \
|
||||||
|
b' <pre style="white-space: pre-wrap">\n ' + \
|
||||||
|
docdata + b'\n </pre></body>\n</html>\n'
|
||||||
|
|
||||||
iseof = rclexecm.RclExecM.noteof
|
iseof = rclexecm.RclExecM.noteof
|
||||||
if self.currentindex >= len(self.contents) -1:
|
if self.currentindex >= len(self.contents) -1:
|
||||||
@ -60,19 +54,18 @@ class InfoExtractor:
|
|||||||
self.em.rclog("Openfile: %s is not a file" % self.file)
|
self.em.rclog("Openfile: %s is not a file" % self.file)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
cmd = "info --subnodes -o - -f " + self.file
|
cmd = b'info --subnodes -o - -f ' + self.file
|
||||||
nullstream = open("/dev/null", 'w')
|
nullstream = open("/dev/null", 'w')
|
||||||
try:
|
try:
|
||||||
infostream = subprocess.Popen(cmd, shell=True, bufsize=1,
|
infostream = subprocess.Popen(cmd, shell=True, bufsize=1,
|
||||||
stderr=nullstream,
|
stderr=nullstream,
|
||||||
stdout=subprocess.PIPE).stdout
|
stdout=subprocess.PIPE).stdout
|
||||||
except Exception, e:
|
except Exception as e:
|
||||||
# Consider this as permanently fatal.
|
# Consider this as permanently fatal.
|
||||||
self.em.rclog("Openfile: exec info: %s" % str(e))
|
self.em.rclog("Openfile: exec info: %s" % str(e))
|
||||||
print "RECFILTERROR HELPERNOTFOUND info"
|
print("RECFILTERROR HELPERNOTFOUND info")
|
||||||
sys.exit(1);
|
sys.exit(1);
|
||||||
|
|
||||||
|
|
||||||
self.currentindex = -1
|
self.currentindex = -1
|
||||||
|
|
||||||
self.contents = InfoSimpleSplitter().splitinfo(self.file, infostream)
|
self.contents = InfoSimpleSplitter().splitinfo(self.file, infostream)
|
||||||
@ -117,9 +110,9 @@ class InfoSimpleSplitter:
|
|||||||
index = 0
|
index = 0
|
||||||
listout = []
|
listout = []
|
||||||
node_dict = {}
|
node_dict = {}
|
||||||
node = ""
|
node = b''
|
||||||
infofile = os.path.basename(filename)
|
infofile = os.path.basename(filename)
|
||||||
nodename = "Unknown"
|
nodename = b'Unknown'
|
||||||
|
|
||||||
for line in fin:
|
for line in fin:
|
||||||
|
|
||||||
@ -128,41 +121,41 @@ class InfoSimpleSplitter:
|
|||||||
# beginning with spaces (it's a bug probably, only seen it once)
|
# beginning with spaces (it's a bug probably, only seen it once)
|
||||||
# Maybe we'd actually be better off directly interpreting the
|
# Maybe we'd actually be better off directly interpreting the
|
||||||
# info files
|
# info files
|
||||||
if gotblankline and line.lstrip(" ").startswith("File: "):
|
if gotblankline and line.lstrip(b' ').startswith(b'File: '):
|
||||||
prevnodename = nodename
|
prevnodename = nodename
|
||||||
line = line.rstrip("\n\r")
|
line = line.rstrip(b'\n\r')
|
||||||
pairs = line.split(",")
|
pairs = line.split(b',')
|
||||||
up = "Top"
|
up = b'Top'
|
||||||
nodename = str(index)
|
nodename = str(index)
|
||||||
try:
|
try:
|
||||||
for pair in pairs:
|
for pair in pairs:
|
||||||
name, value = pair.split(':')
|
name, value = pair.split(b':')
|
||||||
name = name.strip(" ")
|
name = name.strip(b' ')
|
||||||
value = value.strip(" ")
|
value = value.strip(b' ')
|
||||||
if name == "Node":
|
if name == b'Node':
|
||||||
nodename = value
|
nodename = value
|
||||||
if name == "Up":
|
if name == b'Up':
|
||||||
up = value
|
up = value
|
||||||
if name == "File":
|
if name == b'File':
|
||||||
infofile = value
|
infofile = value
|
||||||
except:
|
except Exception as err:
|
||||||
print >> sys.stderr, "rclinfo: bad line in %s: [%s]\n" % \
|
print("rclinfo: bad line in %s: [%s] %s\n" % \
|
||||||
(infofile, line)
|
(infofile, line, err), file = sys.stderr)
|
||||||
nodename = prevnodename
|
nodename = prevnodename
|
||||||
node += line
|
node += line
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if node_dict.has_key(nodename):
|
if nodename in node_dict:
|
||||||
print >> sys.stderr, "Info file", filename, \
|
print("Info file %s Dup node: %s" % (filename, nodename), \
|
||||||
"Dup node: ", nodename
|
file=sys.stderr)
|
||||||
node_dict[nodename] = up
|
node_dict[nodename] = up
|
||||||
|
|
||||||
if index != 0:
|
if index != 0:
|
||||||
listout.append((prevnodename, node))
|
listout.append((prevnodename, node))
|
||||||
node = ""
|
node = b''
|
||||||
index += 1
|
index += 1
|
||||||
|
|
||||||
if line.rstrip("\n\r") == '':
|
if line.rstrip(b'\n\r') == b'':
|
||||||
gotblankline = 1
|
gotblankline = 1
|
||||||
else:
|
else:
|
||||||
gotblankline = 0
|
gotblankline = 0
|
||||||
@ -170,7 +163,7 @@ class InfoSimpleSplitter:
|
|||||||
node += line
|
node += line
|
||||||
|
|
||||||
# File done, add last dangling node
|
# File done, add last dangling node
|
||||||
if node != "":
|
if node != b'':
|
||||||
listout.append((nodename, node))
|
listout.append((nodename, node))
|
||||||
|
|
||||||
# Compute node paths (concatenate "Up" values), to be used
|
# Compute node paths (concatenate "Up" values), to be used
|
||||||
@ -178,34 +171,34 @@ class InfoSimpleSplitter:
|
|||||||
# the info file tree is bad
|
# the info file tree is bad
|
||||||
listout1 = []
|
listout1 = []
|
||||||
for nodename, node in listout:
|
for nodename, node in listout:
|
||||||
title = ""
|
title = b''
|
||||||
loop = 0
|
loop = 0
|
||||||
error = 0
|
error = 0
|
||||||
while nodename != "Top":
|
while nodename != b'Top':
|
||||||
title = nodename + " / " + title
|
title = nodename + b' / ' + title
|
||||||
if node_dict.has_key(nodename):
|
if nodename in node_dict:
|
||||||
nodename = node_dict[nodename]
|
nodename = node_dict[nodename]
|
||||||
else:
|
else:
|
||||||
print >> sys.stderr, \
|
print(
|
||||||
"Infofile: node's Up does not exist: file %s, path %s, up [%s]" % \
|
"Infofile: node's Up does not exist: file %s, path %s, up [%s]" % \
|
||||||
(infofile, title, nodename)
|
(infofile, title, nodename), sys.stderr)
|
||||||
error = 1
|
error = 1
|
||||||
break
|
break
|
||||||
loop += 1
|
loop += 1
|
||||||
if loop > 50:
|
if loop > 50:
|
||||||
print >> sys.stderr, "Infofile: bad tree (looping)", \
|
print("Infofile: bad tree (looping) %s" % infofile, \
|
||||||
infofile
|
file = sys.stderr)
|
||||||
error = 1
|
error = 1
|
||||||
break
|
break
|
||||||
|
|
||||||
if error:
|
if error:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if title == "":
|
if title == b'':
|
||||||
title = infofile
|
title = infofile
|
||||||
else:
|
else:
|
||||||
title = infofile + " / " + title
|
title = infofile + b' / ' + title
|
||||||
title = title.rstrip(" / ")
|
title = title.rstrip(b' / ')
|
||||||
listout1.append((title, node))
|
listout1.append((title, node))
|
||||||
|
|
||||||
return listout1
|
return listout1
|
||||||
|
|||||||
@ -1,6 +1,8 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
# Read a .kar midi karaoke file and translate to recoll indexable format
|
# Read a .kar midi karaoke file and translate to recoll indexable format
|
||||||
|
# This does not work with Python3 yet because python:midi doesn't
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
import sys
|
import sys
|
||||||
@ -15,9 +17,9 @@ except:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import midi
|
from midi import midi
|
||||||
except:
|
except:
|
||||||
print "RECFILTERROR HELPERNOTFOUND python:midi"
|
print("RECFILTERROR HELPERNOTFOUND python:midi")
|
||||||
sys.exit(1);
|
sys.exit(1);
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -106,12 +108,12 @@ class KarTextExtractor:
|
|||||||
if data:
|
if data:
|
||||||
try:
|
try:
|
||||||
data = data.decode(self.encoding, 'ignore')
|
data = data.decode(self.encoding, 'ignore')
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
self.em.rclog("Decode failed: " + str(err))
|
self.em.rclog("Decode failed: " + str(err))
|
||||||
return ""
|
return ""
|
||||||
try:
|
try:
|
||||||
data = data.encode('utf-8')
|
data = data.encode('utf-8')
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
self.em.rclog("Encode failed: " + str(err))
|
self.em.rclog("Encode failed: " + str(err))
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
@ -127,7 +129,7 @@ class KarTextExtractor:
|
|||||||
just one our users could use if there is trouble with guessing
|
just one our users could use if there is trouble with guessing
|
||||||
encodings'''
|
encodings'''
|
||||||
|
|
||||||
rexp = r'\(([^\)]+)\)\.[a-zA-Z]+$'
|
rexp = b'''\(([^\)]+)\)\.[a-zA-Z]+$'''
|
||||||
m = re.search(rexp, fn)
|
m = re.search(rexp, fn)
|
||||||
if m:
|
if m:
|
||||||
return m.group(1)
|
return m.group(1)
|
||||||
@ -165,7 +167,7 @@ class KarTextExtractor:
|
|||||||
if count > 0:
|
if count > 0:
|
||||||
confidence = 1.0
|
confidence = 1.0
|
||||||
encoding = code
|
encoding = code
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
self.em.rclog("stopwords-based classifier failed: %s" % err)
|
self.em.rclog("stopwords-based classifier failed: %s" % err)
|
||||||
return (encoding, confidence)
|
return (encoding, confidence)
|
||||||
|
|
||||||
@ -177,7 +179,7 @@ class KarTextExtractor:
|
|||||||
docdata = ""
|
docdata = ""
|
||||||
ok = False
|
ok = False
|
||||||
|
|
||||||
if not params.has_key("filename:"):
|
if "filename:" not in params:
|
||||||
self.em.rclog("extractone: no mime or file name")
|
self.em.rclog("extractone: no mime or file name")
|
||||||
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
|
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
|
||||||
filename = params["filename:"]
|
filename = params["filename:"]
|
||||||
@ -191,7 +193,7 @@ class KarTextExtractor:
|
|||||||
self.encoding = ""
|
self.encoding = ""
|
||||||
|
|
||||||
# Mimetype not used for now
|
# Mimetype not used for now
|
||||||
if not params.has_key("mimetype:"):
|
if "mimetype:" not in params:
|
||||||
mimetype = 'audio/x-midi'
|
mimetype = 'audio/x-midi'
|
||||||
else:
|
else:
|
||||||
mimetype = params["mimetype:"]
|
mimetype = params["mimetype:"]
|
||||||
@ -199,8 +201,8 @@ class KarTextExtractor:
|
|||||||
# Read in and midi-decode the file
|
# Read in and midi-decode the file
|
||||||
try:
|
try:
|
||||||
stream = midi.read_midifile(filename)
|
stream = midi.read_midifile(filename)
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
self.em.rclog("extractone: midi extract failed: [%s]" % err)
|
self.em.rclog("extractone: read_midifile failed: [%s]" % err)
|
||||||
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
|
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
title = None
|
title = None
|
||||||
|
|||||||
@ -13,6 +13,8 @@ epsilon with dasia (in unicode but not iso). Can this be replaced by either epsi
|
|||||||
with acute accent ?
|
with acute accent ?
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import string
|
import string
|
||||||
import glob
|
import glob
|
||||||
@ -117,7 +119,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
lang,code,count = classifier.classify(rawtext)
|
lang,code,count = classifier.classify(rawtext)
|
||||||
if count > 0:
|
if count > 0:
|
||||||
print "%s %s %d" % (code, lang, count)
|
print("%s %s %d" % (code, lang, count))
|
||||||
else:
|
else:
|
||||||
print "UNKNOWN UNKNOWN 0"
|
print("UNKNOWN UNKNOWN 0")
|
||||||
|
|
||||||
|
|||||||
@ -43,7 +43,7 @@ class RarExtractor:
|
|||||||
try:
|
try:
|
||||||
rarinfo = self.rar.getinfo(ipath)
|
rarinfo = self.rar.getinfo(ipath)
|
||||||
isdir = rarinfo.isdir()
|
isdir = rarinfo.isdir()
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
self.em.rclog("extractone: getinfo failed: [%s]" % err)
|
self.em.rclog("extractone: getinfo failed: [%s]" % err)
|
||||||
return (True, docdata, ipath, false)
|
return (True, docdata, ipath, false)
|
||||||
|
|
||||||
@ -56,7 +56,7 @@ class RarExtractor:
|
|||||||
else:
|
else:
|
||||||
docdata = self.rar.read(ipath)
|
docdata = self.rar.read(ipath)
|
||||||
ok = True
|
ok = True
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
self.em.rclog("extractone: failed: [%s]" % err)
|
self.em.rclog("extractone: failed: [%s]" % err)
|
||||||
ok = False
|
ok = False
|
||||||
else:
|
else:
|
||||||
@ -89,7 +89,7 @@ class RarExtractor:
|
|||||||
try:
|
try:
|
||||||
ipath = ipath.decode("utf-8")
|
ipath = ipath.decode("utf-8")
|
||||||
return self.extractone(ipath)
|
return self.extractone(ipath)
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
return (ok, data, ipath, eof)
|
return (ok, data, ipath, eof)
|
||||||
|
|
||||||
def getnext(self, params):
|
def getnext(self, params):
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
import rclexec1
|
import rclexec1
|
||||||
@ -10,24 +11,24 @@ import os
|
|||||||
class RTFProcessData:
|
class RTFProcessData:
|
||||||
def __init__(self, em):
|
def __init__(self, em):
|
||||||
self.em = em
|
self.em = em
|
||||||
self.out = ""
|
self.out = b''
|
||||||
self.gothead = 0
|
self.gothead = 0
|
||||||
self.patendhead = re.compile('''</head>''')
|
self.patendhead = re.compile(b'''</head>''')
|
||||||
self.patcharset = re.compile('''^<meta http-equiv=''')
|
self.patcharset = re.compile(b'''^<meta http-equiv=''')
|
||||||
|
|
||||||
# Some versions of unrtf put out a garbled charset line.
|
# Some versions of unrtf put out a garbled charset line.
|
||||||
# Apart from this, we pass the data untouched.
|
# Apart from this, we pass the data untouched.
|
||||||
def takeLine(self, line):
|
def takeLine(self, line):
|
||||||
if not self.gothead:
|
if not self.gothead:
|
||||||
if self.patendhead.search(line):
|
if self.patendhead.search(line):
|
||||||
self.out += '<meta http-equiv="Content-Type" ' + \
|
self.out += b'<meta http-equiv="Content-Type" ' + \
|
||||||
'content="text/html;charset=UTF-8">' + "\n"
|
b'content="text/html;charset=UTF-8">' + b'\n'
|
||||||
self.out += line + "\n"
|
self.out += line + b'\n'
|
||||||
self.gothead = 1
|
self.gothead = 1
|
||||||
elif not self.patcharset.search(line):
|
elif not self.patcharset.search(line):
|
||||||
self.out += line + "\n"
|
self.out += line + b'\n'
|
||||||
else:
|
else:
|
||||||
self.out += line + "\n"
|
self.out += line + b'\n'
|
||||||
|
|
||||||
def wrapData(self):
|
def wrapData(self):
|
||||||
return self.out
|
return self.out
|
||||||
@ -52,7 +53,7 @@ class RTFFilter:
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
if not rclexecm.which("unrtf"):
|
if not rclexecm.which("unrtf"):
|
||||||
print("RECFILTERROR HELPERNOTFOUND antiword")
|
print("RECFILTERROR HELPERNOTFOUND unrtf")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
proto = rclexecm.RclExecM()
|
proto = rclexecm.RclExecM()
|
||||||
filter = RTFFilter(proto)
|
filter = RTFFilter(proto)
|
||||||
|
|||||||
@ -33,7 +33,7 @@ class TarExtractor:
|
|||||||
else:
|
else:
|
||||||
docdata = self.tar.extractfile(ipath).read()
|
docdata = self.tar.extractfile(ipath).read()
|
||||||
ok = True
|
ok = True
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
ok = False
|
ok = False
|
||||||
iseof = rclexecm.RclExecM.noteof
|
iseof = rclexecm.RclExecM.noteof
|
||||||
if self.currentindex >= len(self.namen) -1:
|
if self.currentindex >= len(self.namen) -1:
|
||||||
@ -59,7 +59,7 @@ class TarExtractor:
|
|||||||
try:
|
try:
|
||||||
ipath = ipath.decode("utf-8")
|
ipath = ipath.decode("utf-8")
|
||||||
return self.extractone(ipath)
|
return self.extractone(ipath)
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
return (ok, data, ipath, eof)
|
return (ok, data, ipath, eof)
|
||||||
|
|
||||||
def getnext(self, params):
|
def getnext(self, params):
|
||||||
|
|||||||
@ -15,7 +15,7 @@ class WarExtractor:
|
|||||||
member = self.tar.extractfile(tarinfo)
|
member = self.tar.extractfile(tarinfo)
|
||||||
docdata = member.read()
|
docdata = member.read()
|
||||||
ok = True
|
ok = True
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
self.em.rclog("extractone: failed: [%s]" % err)
|
self.em.rclog("extractone: failed: [%s]" % err)
|
||||||
ok = False
|
ok = False
|
||||||
return (ok, docdata, tarinfo.name, rclexecm.RclExecM.noteof)
|
return (ok, docdata, tarinfo.name, rclexecm.RclExecM.noteof)
|
||||||
@ -26,7 +26,7 @@ class WarExtractor:
|
|||||||
try:
|
try:
|
||||||
self.tar = tarfile.open(params["filename:"])
|
self.tar = tarfile.open(params["filename:"])
|
||||||
return True
|
return True
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
self.em.rclog(str(err))
|
self.em.rclog(str(err))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -34,7 +34,7 @@ class WarExtractor:
|
|||||||
ipath = params["ipath:"]
|
ipath = params["ipath:"]
|
||||||
try:
|
try:
|
||||||
tarinfo = self.tar.getmember(ipath)
|
tarinfo = self.tar.getmember(ipath)
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
self.em.rclog(str(err))
|
self.em.rclog(str(err))
|
||||||
return (False, "", ipath, rclexecm.RclExecM.noteof)
|
return (False, "", ipath, rclexecm.RclExecM.noteof)
|
||||||
return self.extractone(tarinfo)
|
return self.extractone(tarinfo)
|
||||||
|
|||||||
@ -72,7 +72,7 @@ class ZipExtractor:
|
|||||||
else:
|
else:
|
||||||
docdata = self.zip.read(ipath)
|
docdata = self.zip.read(ipath)
|
||||||
ok = True
|
ok = True
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
self.em.rclog("extractone: failed: [%s]" % err)
|
self.em.rclog("extractone: failed: [%s]" % err)
|
||||||
ok = False
|
ok = False
|
||||||
iseof = rclexecm.RclExecM.noteof
|
iseof = rclexecm.RclExecM.noteof
|
||||||
@ -98,7 +98,7 @@ class ZipExtractor:
|
|||||||
try:
|
try:
|
||||||
self.zip = ZipFile(filename)
|
self.zip = ZipFile(filename)
|
||||||
return True
|
return True
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
self.em.rclog("openfile: failed: [%s]" % err)
|
self.em.rclog("openfile: failed: [%s]" % err)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -111,7 +111,7 @@ class ZipExtractor:
|
|||||||
try:
|
try:
|
||||||
ipath = ipath.decode("utf-8")
|
ipath = ipath.decode("utf-8")
|
||||||
return self.extractone(ipath)
|
return self.extractone(ipath)
|
||||||
except Exception, err:
|
except Exception as err:
|
||||||
return (ok, data, ipath, eof)
|
return (ok, data, ipath, eof)
|
||||||
|
|
||||||
def getnext(self, params):
|
def getnext(self, params):
|
||||||
|
|||||||
@ -75,7 +75,7 @@ class ConfSimple:
|
|||||||
def getNames(self, sk = ''):
|
def getNames(self, sk = ''):
|
||||||
if not sk in self.submaps:
|
if not sk in self.submaps:
|
||||||
return None
|
return None
|
||||||
return self.submaps[sk].keys()
|
return list(self.submaps[sk].keys())
|
||||||
|
|
||||||
class ConfTree(ConfSimple):
|
class ConfTree(ConfSimple):
|
||||||
"""A ConfTree adds path-hierarchical interpretation of the section keys,
|
"""A ConfTree adds path-hierarchical interpretation of the section keys,
|
||||||
|
|||||||
@ -4,6 +4,8 @@ logfilename = /tmp/logrcltst
|
|||||||
daemloglevel = 6
|
daemloglevel = 6
|
||||||
daemlogfilename = /tmp/rclmontrace
|
daemlogfilename = /tmp/rclmontrace
|
||||||
|
|
||||||
|
systemfilecommand = xdg-mime query filetype
|
||||||
|
|
||||||
indexStripChars = 1
|
indexStripChars = 1
|
||||||
detectxattronly = 1
|
detectxattronly = 1
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user