rcl7z: use py7zr if available, rather than pylzma, which does not work on some archives
This commit is contained in:
parent
3479e7cd85
commit
53edd7b213
@ -4,18 +4,27 @@
|
|||||||
|
|
||||||
# Thanks to Recoll user Martin Ziegler
|
# Thanks to Recoll user Martin Ziegler
|
||||||
# This is a modified version of rclzip, with some help from rcltar
|
# This is a modified version of rclzip, with some help from rcltar
|
||||||
# Python pylzma library required. See http://www.joachim-bauch.de/projects/pylzma/
|
#
|
||||||
|
# Normally using py7zr https://github.com/miurahr/py7zr
|
||||||
|
#
|
||||||
|
# Else, but it does not work on all archives, may use:
|
||||||
|
# Python pylzma library required. See http://www.joachim-bauch.de/projects/pylzma/
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import fnmatch
|
import fnmatch
|
||||||
import rclexecm
|
import rclexecm
|
||||||
|
|
||||||
|
usingpy7zr = False
|
||||||
try:
|
try:
|
||||||
from py7zlib import Archive7z
|
from py7zr import SevenZipFile as Archive7z
|
||||||
|
usingpy7zr = True
|
||||||
except:
|
except:
|
||||||
print("RECFILTERROR HELPERNOTFOUND python3:pylzma")
|
try:
|
||||||
sys.exit(1);
|
from py7zlib import Archive7z
|
||||||
|
except:
|
||||||
|
print("RECFILTERROR HELPERNOTFOUND python3:py7zr or python3:pylzma")
|
||||||
|
sys.exit(1);
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from recoll import rclconfig
|
from recoll import rclconfig
|
||||||
@ -41,14 +50,18 @@ class SevenZipExtractor:
|
|||||||
def extractone(self, ipath):
|
def extractone(self, ipath):
|
||||||
#self.em.rclog("extractone: [%s]" % ipath)
|
#self.em.rclog("extractone: [%s]" % ipath)
|
||||||
docdata = b''
|
docdata = b''
|
||||||
|
ok = False
|
||||||
try:
|
try:
|
||||||
docdata = self.sevenzip.getmember(ipath).read()
|
if usingpy7zr:
|
||||||
|
docdata = self.sevenzdic[ipath].read()
|
||||||
|
else:
|
||||||
|
docdata = self.sevenzip.getmember(ipath).read()
|
||||||
ok = True
|
ok = True
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
self.em.rclog("extractone: failed: [%s]" % err)
|
self.em.rclog("extractone: failed: [%s]" % err)
|
||||||
ok = False
|
|
||||||
iseof = rclexecm.RclExecM.noteof
|
iseof = rclexecm.RclExecM.noteof
|
||||||
if self.currentindex >= len(self.sevenzip.getnames()) -1:
|
if self.currentindex >= len(self.names) -1:
|
||||||
iseof = rclexecm.RclExecM.eofnext
|
iseof = rclexecm.RclExecM.eofnext
|
||||||
return (ok, docdata, rclexecm.makebytes(ipath), iseof)
|
return (ok, docdata, rclexecm.makebytes(ipath), iseof)
|
||||||
|
|
||||||
@ -72,6 +85,11 @@ class SevenZipExtractor:
|
|||||||
try:
|
try:
|
||||||
self.fp = open(filename, 'rb')
|
self.fp = open(filename, 'rb')
|
||||||
self.sevenzip = Archive7z(self.fp)
|
self.sevenzip = Archive7z(self.fp)
|
||||||
|
if usingpy7zr:
|
||||||
|
self.sevenzdic = self.sevenzip.readall()
|
||||||
|
self.names = [k[0] for k in self.sevenzdic.items()]
|
||||||
|
else:
|
||||||
|
self.names = self.sevenzip.getnames()
|
||||||
return True
|
return True
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
self.em.rclog("openfile: failed: [%s]" % err)
|
self.em.rclog("openfile: failed: [%s]" % err)
|
||||||
@ -94,40 +112,40 @@ class SevenZipExtractor:
|
|||||||
# Return "self" doc
|
# Return "self" doc
|
||||||
self.currentindex = 0
|
self.currentindex = 0
|
||||||
self.em.setmimetype('text/plain')
|
self.em.setmimetype('text/plain')
|
||||||
if len(self.sevenzip.getnames()) == 0:
|
if len(self.names) == 0:
|
||||||
self.closefile()
|
self.closefile()
|
||||||
eof = rclexecm.RclExecM.eofnext
|
eof = rclexecm.RclExecM.eofnext
|
||||||
else:
|
else:
|
||||||
eof = rclexecm.RclExecM.noteof
|
eof = rclexecm.RclExecM.noteof
|
||||||
return (True, "", "", eof)
|
return (True, "", "", eof)
|
||||||
|
|
||||||
if self.currentindex >= len(self.sevenzip.getnames()):
|
if self.currentindex >= len(self.names):
|
||||||
#self.em.rclog("getnext: EOF hit")
|
#self.em.rclog("getnext: EOF hit")
|
||||||
self.closefile()
|
self.closefile()
|
||||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
else:
|
|
||||||
entryname = self.sevenzip.getnames()[self.currentindex]
|
|
||||||
|
|
||||||
if hasrclconfig and len(self.skiplist) != 0:
|
entryname = self.names[self.currentindex]
|
||||||
while self.currentindex < len(self.sevenzip.getnames()):
|
|
||||||
entryname = self.sevenzip.getnames()[self.currentindex]
|
if hasrclconfig and len(self.skiplist) != 0:
|
||||||
for pat in self.skiplist:
|
while self.currentindex < len(self.names):
|
||||||
if fnmatch.fnmatch(entryname, pat):
|
entryname = self.names[self.currentindex]
|
||||||
entryname = None
|
for pat in self.skiplist:
|
||||||
break
|
if fnmatch.fnmatch(entryname, pat):
|
||||||
if entryname is not None:
|
entryname = None
|
||||||
break
|
break
|
||||||
self.currentindex += 1
|
if entryname is not None:
|
||||||
if entryname is None:
|
break
|
||||||
self.closefile()
|
self.currentindex += 1
|
||||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
if entryname is None:
|
||||||
|
|
||||||
ret = self.extractone(entryname)
|
|
||||||
if ret[3] == rclexecm.RclExecM.eofnext or \
|
|
||||||
ret[3] == rclexecm.RclExecM.eofnow:
|
|
||||||
self.closefile()
|
self.closefile()
|
||||||
self.currentindex += 1
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
return ret
|
|
||||||
|
ret = self.extractone(entryname)
|
||||||
|
if ret[3] == rclexecm.RclExecM.eofnext or \
|
||||||
|
ret[3] == rclexecm.RclExecM.eofnow:
|
||||||
|
self.closefile()
|
||||||
|
self.currentindex += 1
|
||||||
|
return ret
|
||||||
|
|
||||||
# Main program: create protocol handler and extractor and run them
|
# Main program: create protocol handler and extractor and run them
|
||||||
proto = rclexecm.RclExecM()
|
proto = rclexecm.RclExecM()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user