From d80db8c09f50929e7d8848c167aa3b1d1a8aace2 Mon Sep 17 00:00:00 2001 From: Francois Botha Date: Mon, 6 Apr 2015 09:57:00 +0200 Subject: [PATCH] Implement filter for .7z files. Based on rclzip and rcltar --- packaging/FreeBSD/recoll/pkg-plist | 1 + src/filters/rcl7z | 127 +++++++++++++++++++++++++++++ src/mk/manifest.txt | 1 + src/sampleconf/mimeconf | 7 +- src/sampleconf/mimemap | 1 + src/sampleconf/mimeview | 1 + src/sampleconf/mimeview.mac | 1 + 7 files changed, 137 insertions(+), 2 deletions(-) create mode 100755 src/filters/rcl7z diff --git a/packaging/FreeBSD/recoll/pkg-plist b/packaging/FreeBSD/recoll/pkg-plist index a11a10dd..bfcd9397 100644 --- a/packaging/FreeBSD/recoll/pkg-plist +++ b/packaging/FreeBSD/recoll/pkg-plist @@ -55,6 +55,7 @@ share/pixmaps/recoll.png %%DATADIR%%/filters/rclwpd %%DATADIR%%/filters/rclxls %%DATADIR%%/filters/rclzip +%%DATADIR%%/filters/rcl7z %%DATADIR%%/filters/xdg-open %%DATADIR%%/images/aptosid-book.png %%DATADIR%%/images/aptosid-manual.png diff --git a/src/filters/rcl7z b/src/filters/rcl7z new file mode 100755 index 00000000..0d96c6b2 --- /dev/null +++ b/src/filters/rcl7z @@ -0,0 +1,127 @@ +#!/usr/bin/env python + +# 7-Zip file filter for Recoll + +# Thanks to Recoll user Martin Ziegler +# This is a modified version of rclzip, with some help from rcltar +# Python pylzma library required. See http://www.joachim-bauch.de/projects/pylzma/ + + +import os +import fnmatch +import rclexecm + +try: + import pylzma + from py7zlib import Archive7z +except: + print "RECFILTERROR HELPERNOTFOUND python:pylzma" + sys.exit(1); + +try: + from recoll import rclconfig + hasrclconfig = True +except: + hasrclconfig = False +# As a temporary measure, we also look for rclconfig as a bare +# module. This is so that the intermediate releases of the filter can +# ship and use rclconfig.py with the filter code +if not hasrclconfig: + try: + import rclconfig + hasrclconfig = True + except: + pass + +class SevenZipExtractor: + def __init__(self, em): + self.currentindex = 0 + self.em = em + + def extractone(self, ipath): + #self.em.rclog("extractone: [%s]" % ipath) + docdata = "" + try: + docdata = self.sevenzip.getmember(ipath).read() + ok = True + except Exception, err: + self.em.rclog("extractone: failed: [%s]" % err) + ok = False + iseof = rclexecm.RclExecM.noteof + if self.currentindex >= len(self.sevenzip.getnames()) -1: + iseof = rclexecm.RclExecM.eofnext + if isinstance(ipath, unicode): + ipath = ipath.encode("utf-8") + return (ok, docdata, ipath, iseof) + + ###### File type handler api, used by rclexecm ----------> + def openfile(self, params): + filename = params["filename:"] + self.currentindex = -1 + self.skiplist = [] + + if hasrclconfig: + config = rclconfig.RclConfig() + config.setKeyDir(os.path.dirname(filename)) + skipped = config.getConfParam("zipSkippedNames") + if skipped is not None: + self.skiplist = skipped.split(" ") + + try: + fp = open(filename, 'rb') + self.sevenzip = Archive7z(fp) + return True + except Exception, err: + self.em.rclog("openfile: failed: [%s]" % err) + return False + + def getipath(self, params): + ipath = params["ipath:"] + ok, data, ipath, eof = self.extractone(ipath) + if ok: + return (ok, data, ipath, eof) + # Not found. Maybe we need to decode the path? + try: + ipath = ipath.decode("utf-8") + return self.extractone(ipath) + except Exception, err: + return (ok, data, ipath, eof) + + def getnext(self, params): + if self.currentindex == -1: + # Return "self" doc + self.currentindex = 0 + self.em.setmimetype('text/plain') + if len(self.sevenzip.getnames()) == 0: + eof = rclexecm.RclExecM.eofnext + else: + eof = rclexecm.RclExecM.noteof + return (True, "", "", eof) + + if self.currentindex >= len(self.sevenzip.getnames()): + #self.em.rclog("getnext: EOF hit") + return (False, "", "", rclexecm.RclExecM.eofnow) + else: + entryname = self.sevenzip.getnames()[self.currentindex] + + if hasrclconfig and len(self.skiplist) != 0: + while self.currentindex < len(self.sevenzip.getnames()): + entryname = self.sevenzip.getnames()[self.currentindex] + for pat in self.skiplist: + if fnmatch.fnmatch(entryname, pat): + entryname = None + break + if entryname is not None: + break + self.currentindex += 1 + if entryname is None: + return (False, "", "", rclexecm.RclExecM.eofnow) + + ret= self.extractone(entryname) + self.currentindex += 1 + return ret + +# Main program: create protocol handler and extractor and run them +proto = rclexecm.RclExecM() +extract = SevenZipExtractor(proto) +rclexecm.main(proto, extract) diff --git a/src/mk/manifest.txt b/src/mk/manifest.txt index 26bb545a..417a49c7 100644 --- a/src/mk/manifest.txt +++ b/src/mk/manifest.txt @@ -162,6 +162,7 @@ filters/rclwpd filters/rclxls filters/rclxml filters/rclzip +filters/rcl7z filters/recfiltcommon filters/xls-dump.py filters/xlsxmltocsv.py diff --git a/src/sampleconf/mimeconf b/src/sampleconf/mimeconf index 33feb411..beb23c0e 100644 --- a/src/sampleconf/mimeconf +++ b/src/sampleconf/mimeconf @@ -119,6 +119,7 @@ application/x-shellscript = internal text/plain application/x-tex = exec rcltex application/x-webarchive = execm rclwar application/zip = execm rclzip;charset=default +application/x-7z-compressed = execm rcl7z audio/mpeg = execm rclaudio audio/mp4 = execm rclaudio audio/aac = execm rclaudio @@ -220,6 +221,7 @@ application/x-tex = wordprocessing application/x-webarchive = archive application/xml = document application/zip = archive +application/x-7z-compressed = archive audio/mpeg = sownd audio/x-karaoke = sownd image/bmp = image @@ -359,9 +361,10 @@ other = application/vnd.sun.xml.draw \ application/x-fsdirectory \ application/x-mimehtml \ application/x-rar \ - application/x-tar \ + application/x-tar \ application/x-webarchive \ - application/zip \ + application/zip \ + application/x-7z-compressed \ inode/directory \ inode/symlink \ diff --git a/src/sampleconf/mimemap b/src/sampleconf/mimemap index 4b2cddd0..d100237f 100644 --- a/src/sampleconf/mimemap +++ b/src/sampleconf/mimemap @@ -61,6 +61,7 @@ .rar = application/x-rar #.Z = application/x-compress .zip = application/zip +.7z = application/x-7z-compressed # The rcltar module can handle compressed tar formats internally so we # use application/x-tar for all tar files compressed or not. Note that tar diff --git a/src/sampleconf/mimeview b/src/sampleconf/mimeview index 6f6e9de4..654d52ce 100644 --- a/src/sampleconf/mimeview +++ b/src/sampleconf/mimeview @@ -124,6 +124,7 @@ application/x-okular-notes = okular %f application/x-rar = ark %f application/x-tar = ark %f application/zip = ark %f +application/x-7z-compressed = ark %f application/x-awk = emacsclient --no-wait %f application/x-perl = emacsclient --no-wait %f diff --git a/src/sampleconf/mimeview.mac b/src/sampleconf/mimeview.mac index 5c54526b..fc24b8b1 100644 --- a/src/sampleconf/mimeview.mac +++ b/src/sampleconf/mimeview.mac @@ -107,6 +107,7 @@ application/x-okular-notes = okular %f application/x-rar = ark %f application/x-tar = ark %f application/zip = ark %f +application/x-7z-compressed = ark %f application/x-awk = emacsclient %f application/x-perl = emacsclient %f