diff --git a/src/filters/rclzip b/src/filters/rclzip index e7e807e6..ad7759b7 100755 --- a/src/filters/rclzip +++ b/src/filters/rclzip @@ -28,6 +28,7 @@ from zipfile import ZipFile try: from recoll import rclconfig + from recoll import conftree hasrclconfig = True except: hasrclconfig = False @@ -118,10 +119,14 @@ class ZipExtractor: if hasrclconfig: config = rclconfig.RclConfig() config.setKeyDir(os.path.dirname(filename)) + usebaseskipped = config.getConfParam("zipUseSkippedNames") + if usebaseskipped: + skipped = config.getConfParam("skippedNames") + self.em.rclog("skippedNames: %s"%self.skiplist) + self.skiplist += conftree.stringToStrings(skipped) skipped = config.getConfParam("zipSkippedNames") if skipped is not None: - self.skiplist = skipped.split(" ") - + self.skiplist += conftree.stringToStrings(skipped) try: if rclexecm.PY3: # Note: py3 ZipFile wants an str file name, which diff --git a/src/python/recoll/recoll/conftree.py b/src/python/recoll/recoll/conftree.py index 4ddc6117..90c241bb 100644 --- a/src/python/recoll/recoll/conftree.py +++ b/src/python/recoll/recoll/conftree.py @@ -251,3 +251,21 @@ class ConfStack(object): if v and dodecode: v = v.decode('utf-8') return v + +def stringToStrings(s): + '''Parse a string made of space-separated words and C-Style strings + (double-quoted with backslash escape). E.g.: + word1 word2 "compound \\"quoted\\" string" -> + ['word1', 'word2', 'compound "quoted string']''' + import shlex + lex = shlex.shlex(s, posix=True) + lex.quotes = '"' + lex.escape = '\\' + lex.escapedquotes = '"' + l = [] + while True: + tok = lex.get_token() + if not tok: + break + l.append(tok) + return l diff --git a/src/sampleconf/recoll.conf b/src/sampleconf/recoll.conf index e1072507..e425326a 100644 --- a/src/sampleconf/recoll.conf +++ b/src/sampleconf/recoll.conf @@ -120,14 +120,24 @@ skippedPaths = /media #daemSkippedPaths = +# +# +# Use skippedNames inside Zip archives.Fetched +# directly by the rclzip handler. Skip the patterns defined by skippedNames +# inside Zip archives. Can be redefined for subdirectories. +# See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html +# +#zipUseSkippedNames = + # # # Space-separated list of wildcard expressions for names that should # be ignored inside zip archives.This is used directly by -# the zip handler, and has a function similar to skippedNames, but works -# independantly. Can be redefined for subdirectories. Supported by recoll -# 1.20 and newer. See -# https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html +# the zip handler. If zipUseSkippedNames is not set, zipSkippedNames +# defines the patterns to be skipped inside archives. If zipUseSkippedNames +# is set, the two lists are concatenated and used. Can be redefined for +# subdirectories. +# See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html # #zipSkippedNames =