Merged in igitur/recoll (pull request #4)

Implement filter for .7z files. Based on rclzip and rcltar
This commit is contained in:
medoc 2015-04-07 08:27:19 +02:00
commit f201f71940
7 changed files with 137 additions and 2 deletions

View File

@ -55,6 +55,7 @@ share/pixmaps/recoll.png
%%DATADIR%%/filters/rclwpd
%%DATADIR%%/filters/rclxls
%%DATADIR%%/filters/rclzip
%%DATADIR%%/filters/rcl7z
%%DATADIR%%/filters/xdg-open
%%DATADIR%%/images/aptosid-book.png
%%DATADIR%%/images/aptosid-manual.png

127
src/filters/rcl7z Executable file
View File

@ -0,0 +1,127 @@
#!/usr/bin/env python
# 7-Zip file filter for Recoll
# Thanks to Recoll user Martin Ziegler
# This is a modified version of rclzip, with some help from rcltar
# Python pylzma library required. See http://www.joachim-bauch.de/projects/pylzma/
import os
import fnmatch
import rclexecm
try:
import pylzma
from py7zlib import Archive7z
except:
print "RECFILTERROR HELPERNOTFOUND python:pylzma"
sys.exit(1);
try:
from recoll import rclconfig
hasrclconfig = True
except:
hasrclconfig = False
# As a temporary measure, we also look for rclconfig as a bare
# module. This is so that the intermediate releases of the filter can
# ship and use rclconfig.py with the filter code
if not hasrclconfig:
try:
import rclconfig
hasrclconfig = True
except:
pass
class SevenZipExtractor:
def __init__(self, em):
self.currentindex = 0
self.em = em
def extractone(self, ipath):
#self.em.rclog("extractone: [%s]" % ipath)
docdata = ""
try:
docdata = self.sevenzip.getmember(ipath).read()
ok = True
except Exception, err:
self.em.rclog("extractone: failed: [%s]" % err)
ok = False
iseof = rclexecm.RclExecM.noteof
if self.currentindex >= len(self.sevenzip.getnames()) -1:
iseof = rclexecm.RclExecM.eofnext
if isinstance(ipath, unicode):
ipath = ipath.encode("utf-8")
return (ok, docdata, ipath, iseof)
###### File type handler api, used by rclexecm ---------->
def openfile(self, params):
filename = params["filename:"]
self.currentindex = -1
self.skiplist = []
if hasrclconfig:
config = rclconfig.RclConfig()
config.setKeyDir(os.path.dirname(filename))
skipped = config.getConfParam("zipSkippedNames")
if skipped is not None:
self.skiplist = skipped.split(" ")
try:
fp = open(filename, 'rb')
self.sevenzip = Archive7z(fp)
return True
except Exception, err:
self.em.rclog("openfile: failed: [%s]" % err)
return False
def getipath(self, params):
ipath = params["ipath:"]
ok, data, ipath, eof = self.extractone(ipath)
if ok:
return (ok, data, ipath, eof)
# Not found. Maybe we need to decode the path?
try:
ipath = ipath.decode("utf-8")
return self.extractone(ipath)
except Exception, err:
return (ok, data, ipath, eof)
def getnext(self, params):
if self.currentindex == -1:
# Return "self" doc
self.currentindex = 0
self.em.setmimetype('text/plain')
if len(self.sevenzip.getnames()) == 0:
eof = rclexecm.RclExecM.eofnext
else:
eof = rclexecm.RclExecM.noteof
return (True, "", "", eof)
if self.currentindex >= len(self.sevenzip.getnames()):
#self.em.rclog("getnext: EOF hit")
return (False, "", "", rclexecm.RclExecM.eofnow)
else:
entryname = self.sevenzip.getnames()[self.currentindex]
if hasrclconfig and len(self.skiplist) != 0:
while self.currentindex < len(self.sevenzip.getnames()):
entryname = self.sevenzip.getnames()[self.currentindex]
for pat in self.skiplist:
if fnmatch.fnmatch(entryname, pat):
entryname = None
break
if entryname is not None:
break
self.currentindex += 1
if entryname is None:
return (False, "", "", rclexecm.RclExecM.eofnow)
ret= self.extractone(entryname)
self.currentindex += 1
return ret
# Main program: create protocol handler and extractor and run them
proto = rclexecm.RclExecM()
extract = SevenZipExtractor(proto)
rclexecm.main(proto, extract)

View File

@ -162,6 +162,7 @@ filters/rclwpd
filters/rclxls
filters/rclxml
filters/rclzip
filters/rcl7z
filters/recfiltcommon
filters/xls-dump.py
filters/xlsxmltocsv.py

View File

@ -119,6 +119,7 @@ application/x-shellscript = internal text/plain
application/x-tex = exec rcltex
application/x-webarchive = execm rclwar
application/zip = execm rclzip;charset=default
application/x-7z-compressed = execm rcl7z
audio/mpeg = execm rclaudio
audio/mp4 = execm rclaudio
audio/aac = execm rclaudio
@ -220,6 +221,7 @@ application/x-tex = wordprocessing
application/x-webarchive = archive
application/xml = document
application/zip = archive
application/x-7z-compressed = archive
audio/mpeg = sownd
audio/x-karaoke = sownd
image/bmp = image
@ -359,9 +361,10 @@ other = application/vnd.sun.xml.draw \
application/x-fsdirectory \
application/x-mimehtml \
application/x-rar \
application/x-tar \
application/x-tar \
application/x-webarchive \
application/zip \
application/zip \
application/x-7z-compressed \
inode/directory \
inode/symlink \

View File

@ -61,6 +61,7 @@
.rar = application/x-rar
#.Z = application/x-compress
.zip = application/zip
.7z = application/x-7z-compressed
# The rcltar module can handle compressed tar formats internally so we
# use application/x-tar for all tar files compressed or not. Note that tar

View File

@ -124,6 +124,7 @@ application/x-okular-notes = okular %f
application/x-rar = ark %f
application/x-tar = ark %f
application/zip = ark %f
application/x-7z-compressed = ark %f
application/x-awk = emacsclient --no-wait %f
application/x-perl = emacsclient --no-wait %f

View File

@ -107,6 +107,7 @@ application/x-okular-notes = okular %f
application/x-rar = ark %f
application/x-tar = ark %f
application/zip = ark %f
application/x-7z-compressed = ark %f
application/x-awk = emacsclient %f
application/x-perl = emacsclient %f