Merged in igitur/recoll (pull request #4)
Implement filter for .7z files. Based on rclzip and rcltar
This commit is contained in:
commit
f201f71940
@ -55,6 +55,7 @@ share/pixmaps/recoll.png
|
|||||||
%%DATADIR%%/filters/rclwpd
|
%%DATADIR%%/filters/rclwpd
|
||||||
%%DATADIR%%/filters/rclxls
|
%%DATADIR%%/filters/rclxls
|
||||||
%%DATADIR%%/filters/rclzip
|
%%DATADIR%%/filters/rclzip
|
||||||
|
%%DATADIR%%/filters/rcl7z
|
||||||
%%DATADIR%%/filters/xdg-open
|
%%DATADIR%%/filters/xdg-open
|
||||||
%%DATADIR%%/images/aptosid-book.png
|
%%DATADIR%%/images/aptosid-book.png
|
||||||
%%DATADIR%%/images/aptosid-manual.png
|
%%DATADIR%%/images/aptosid-manual.png
|
||||||
|
|||||||
127
src/filters/rcl7z
Executable file
127
src/filters/rcl7z
Executable file
@ -0,0 +1,127 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# 7-Zip file filter for Recoll
|
||||||
|
|
||||||
|
# Thanks to Recoll user Martin Ziegler
|
||||||
|
# This is a modified version of rclzip, with some help from rcltar
|
||||||
|
# Python pylzma library required. See http://www.joachim-bauch.de/projects/pylzma/
|
||||||
|
|
||||||
|
|
||||||
|
import os
|
||||||
|
import fnmatch
|
||||||
|
import rclexecm
|
||||||
|
|
||||||
|
try:
|
||||||
|
import pylzma
|
||||||
|
from py7zlib import Archive7z
|
||||||
|
except:
|
||||||
|
print "RECFILTERROR HELPERNOTFOUND python:pylzma"
|
||||||
|
sys.exit(1);
|
||||||
|
|
||||||
|
try:
|
||||||
|
from recoll import rclconfig
|
||||||
|
hasrclconfig = True
|
||||||
|
except:
|
||||||
|
hasrclconfig = False
|
||||||
|
# As a temporary measure, we also look for rclconfig as a bare
|
||||||
|
# module. This is so that the intermediate releases of the filter can
|
||||||
|
# ship and use rclconfig.py with the filter code
|
||||||
|
if not hasrclconfig:
|
||||||
|
try:
|
||||||
|
import rclconfig
|
||||||
|
hasrclconfig = True
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
class SevenZipExtractor:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.currentindex = 0
|
||||||
|
self.em = em
|
||||||
|
|
||||||
|
def extractone(self, ipath):
|
||||||
|
#self.em.rclog("extractone: [%s]" % ipath)
|
||||||
|
docdata = ""
|
||||||
|
try:
|
||||||
|
docdata = self.sevenzip.getmember(ipath).read()
|
||||||
|
ok = True
|
||||||
|
except Exception, err:
|
||||||
|
self.em.rclog("extractone: failed: [%s]" % err)
|
||||||
|
ok = False
|
||||||
|
iseof = rclexecm.RclExecM.noteof
|
||||||
|
if self.currentindex >= len(self.sevenzip.getnames()) -1:
|
||||||
|
iseof = rclexecm.RclExecM.eofnext
|
||||||
|
if isinstance(ipath, unicode):
|
||||||
|
ipath = ipath.encode("utf-8")
|
||||||
|
return (ok, docdata, ipath, iseof)
|
||||||
|
|
||||||
|
###### File type handler api, used by rclexecm ---------->
|
||||||
|
def openfile(self, params):
|
||||||
|
filename = params["filename:"]
|
||||||
|
self.currentindex = -1
|
||||||
|
self.skiplist = []
|
||||||
|
|
||||||
|
if hasrclconfig:
|
||||||
|
config = rclconfig.RclConfig()
|
||||||
|
config.setKeyDir(os.path.dirname(filename))
|
||||||
|
skipped = config.getConfParam("zipSkippedNames")
|
||||||
|
if skipped is not None:
|
||||||
|
self.skiplist = skipped.split(" ")
|
||||||
|
|
||||||
|
try:
|
||||||
|
fp = open(filename, 'rb')
|
||||||
|
self.sevenzip = Archive7z(fp)
|
||||||
|
return True
|
||||||
|
except Exception, err:
|
||||||
|
self.em.rclog("openfile: failed: [%s]" % err)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def getipath(self, params):
|
||||||
|
ipath = params["ipath:"]
|
||||||
|
ok, data, ipath, eof = self.extractone(ipath)
|
||||||
|
if ok:
|
||||||
|
return (ok, data, ipath, eof)
|
||||||
|
# Not found. Maybe we need to decode the path?
|
||||||
|
try:
|
||||||
|
ipath = ipath.decode("utf-8")
|
||||||
|
return self.extractone(ipath)
|
||||||
|
except Exception, err:
|
||||||
|
return (ok, data, ipath, eof)
|
||||||
|
|
||||||
|
def getnext(self, params):
|
||||||
|
if self.currentindex == -1:
|
||||||
|
# Return "self" doc
|
||||||
|
self.currentindex = 0
|
||||||
|
self.em.setmimetype('text/plain')
|
||||||
|
if len(self.sevenzip.getnames()) == 0:
|
||||||
|
eof = rclexecm.RclExecM.eofnext
|
||||||
|
else:
|
||||||
|
eof = rclexecm.RclExecM.noteof
|
||||||
|
return (True, "", "", eof)
|
||||||
|
|
||||||
|
if self.currentindex >= len(self.sevenzip.getnames()):
|
||||||
|
#self.em.rclog("getnext: EOF hit")
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
else:
|
||||||
|
entryname = self.sevenzip.getnames()[self.currentindex]
|
||||||
|
|
||||||
|
if hasrclconfig and len(self.skiplist) != 0:
|
||||||
|
while self.currentindex < len(self.sevenzip.getnames()):
|
||||||
|
entryname = self.sevenzip.getnames()[self.currentindex]
|
||||||
|
for pat in self.skiplist:
|
||||||
|
if fnmatch.fnmatch(entryname, pat):
|
||||||
|
entryname = None
|
||||||
|
break
|
||||||
|
if entryname is not None:
|
||||||
|
break
|
||||||
|
self.currentindex += 1
|
||||||
|
if entryname is None:
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
|
ret= self.extractone(entryname)
|
||||||
|
self.currentindex += 1
|
||||||
|
return ret
|
||||||
|
|
||||||
|
# Main program: create protocol handler and extractor and run them
|
||||||
|
proto = rclexecm.RclExecM()
|
||||||
|
extract = SevenZipExtractor(proto)
|
||||||
|
rclexecm.main(proto, extract)
|
||||||
@ -162,6 +162,7 @@ filters/rclwpd
|
|||||||
filters/rclxls
|
filters/rclxls
|
||||||
filters/rclxml
|
filters/rclxml
|
||||||
filters/rclzip
|
filters/rclzip
|
||||||
|
filters/rcl7z
|
||||||
filters/recfiltcommon
|
filters/recfiltcommon
|
||||||
filters/xls-dump.py
|
filters/xls-dump.py
|
||||||
filters/xlsxmltocsv.py
|
filters/xlsxmltocsv.py
|
||||||
|
|||||||
@ -119,6 +119,7 @@ application/x-shellscript = internal text/plain
|
|||||||
application/x-tex = exec rcltex
|
application/x-tex = exec rcltex
|
||||||
application/x-webarchive = execm rclwar
|
application/x-webarchive = execm rclwar
|
||||||
application/zip = execm rclzip;charset=default
|
application/zip = execm rclzip;charset=default
|
||||||
|
application/x-7z-compressed = execm rcl7z
|
||||||
audio/mpeg = execm rclaudio
|
audio/mpeg = execm rclaudio
|
||||||
audio/mp4 = execm rclaudio
|
audio/mp4 = execm rclaudio
|
||||||
audio/aac = execm rclaudio
|
audio/aac = execm rclaudio
|
||||||
@ -220,6 +221,7 @@ application/x-tex = wordprocessing
|
|||||||
application/x-webarchive = archive
|
application/x-webarchive = archive
|
||||||
application/xml = document
|
application/xml = document
|
||||||
application/zip = archive
|
application/zip = archive
|
||||||
|
application/x-7z-compressed = archive
|
||||||
audio/mpeg = sownd
|
audio/mpeg = sownd
|
||||||
audio/x-karaoke = sownd
|
audio/x-karaoke = sownd
|
||||||
image/bmp = image
|
image/bmp = image
|
||||||
@ -359,9 +361,10 @@ other = application/vnd.sun.xml.draw \
|
|||||||
application/x-fsdirectory \
|
application/x-fsdirectory \
|
||||||
application/x-mimehtml \
|
application/x-mimehtml \
|
||||||
application/x-rar \
|
application/x-rar \
|
||||||
application/x-tar \
|
application/x-tar \
|
||||||
application/x-webarchive \
|
application/x-webarchive \
|
||||||
application/zip \
|
application/zip \
|
||||||
|
application/x-7z-compressed \
|
||||||
inode/directory \
|
inode/directory \
|
||||||
inode/symlink \
|
inode/symlink \
|
||||||
|
|
||||||
|
|||||||
@ -61,6 +61,7 @@
|
|||||||
.rar = application/x-rar
|
.rar = application/x-rar
|
||||||
#.Z = application/x-compress
|
#.Z = application/x-compress
|
||||||
.zip = application/zip
|
.zip = application/zip
|
||||||
|
.7z = application/x-7z-compressed
|
||||||
|
|
||||||
# The rcltar module can handle compressed tar formats internally so we
|
# The rcltar module can handle compressed tar formats internally so we
|
||||||
# use application/x-tar for all tar files compressed or not. Note that tar
|
# use application/x-tar for all tar files compressed or not. Note that tar
|
||||||
|
|||||||
@ -124,6 +124,7 @@ application/x-okular-notes = okular %f
|
|||||||
application/x-rar = ark %f
|
application/x-rar = ark %f
|
||||||
application/x-tar = ark %f
|
application/x-tar = ark %f
|
||||||
application/zip = ark %f
|
application/zip = ark %f
|
||||||
|
application/x-7z-compressed = ark %f
|
||||||
|
|
||||||
application/x-awk = emacsclient --no-wait %f
|
application/x-awk = emacsclient --no-wait %f
|
||||||
application/x-perl = emacsclient --no-wait %f
|
application/x-perl = emacsclient --no-wait %f
|
||||||
|
|||||||
@ -107,6 +107,7 @@ application/x-okular-notes = okular %f
|
|||||||
application/x-rar = ark %f
|
application/x-rar = ark %f
|
||||||
application/x-tar = ark %f
|
application/x-tar = ark %f
|
||||||
application/zip = ark %f
|
application/zip = ark %f
|
||||||
|
application/x-7z-compressed = ark %f
|
||||||
|
|
||||||
application/x-awk = emacsclient %f
|
application/x-awk = emacsclient %f
|
||||||
application/x-perl = emacsclient %f
|
application/x-perl = emacsclient %f
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user