diff --git a/src/filters/rclzip b/src/filters/rclzip
index e7e807e6..ad7759b7 100755
--- a/src/filters/rclzip
+++ b/src/filters/rclzip
@@ -28,6 +28,7 @@ from zipfile import ZipFile
try:
from recoll import rclconfig
+ from recoll import conftree
hasrclconfig = True
except:
hasrclconfig = False
@@ -118,10 +119,14 @@ class ZipExtractor:
if hasrclconfig:
config = rclconfig.RclConfig()
config.setKeyDir(os.path.dirname(filename))
+ usebaseskipped = config.getConfParam("zipUseSkippedNames")
+ if usebaseskipped:
+ skipped = config.getConfParam("skippedNames")
+ self.em.rclog("skippedNames: %s"%self.skiplist)
+ self.skiplist += conftree.stringToStrings(skipped)
skipped = config.getConfParam("zipSkippedNames")
if skipped is not None:
- self.skiplist = skipped.split(" ")
-
+ self.skiplist += conftree.stringToStrings(skipped)
try:
if rclexecm.PY3:
# Note: py3 ZipFile wants an str file name, which
diff --git a/src/python/recoll/recoll/conftree.py b/src/python/recoll/recoll/conftree.py
index 4ddc6117..90c241bb 100644
--- a/src/python/recoll/recoll/conftree.py
+++ b/src/python/recoll/recoll/conftree.py
@@ -251,3 +251,21 @@ class ConfStack(object):
if v and dodecode:
v = v.decode('utf-8')
return v
+
+def stringToStrings(s):
+ '''Parse a string made of space-separated words and C-Style strings
+ (double-quoted with backslash escape). E.g.:
+ word1 word2 "compound \\"quoted\\" string" ->
+ ['word1', 'word2', 'compound "quoted string']'''
+ import shlex
+ lex = shlex.shlex(s, posix=True)
+ lex.quotes = '"'
+ lex.escape = '\\'
+ lex.escapedquotes = '"'
+ l = []
+ while True:
+ tok = lex.get_token()
+ if not tok:
+ break
+ l.append(tok)
+ return l
diff --git a/src/sampleconf/recoll.conf b/src/sampleconf/recoll.conf
index e1072507..e425326a 100644
--- a/src/sampleconf/recoll.conf
+++ b/src/sampleconf/recoll.conf
@@ -120,14 +120,24 @@ skippedPaths = /media
#daemSkippedPaths =
+#
+#
+# Use skippedNames inside Zip archives.Fetched
+# directly by the rclzip handler. Skip the patterns defined by skippedNames
+# inside Zip archives. Can be redefined for subdirectories.
+# See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html
+#
+#zipUseSkippedNames =
+
#
#
# Space-separated list of wildcard expressions for names that should
# be ignored inside zip archives.This is used directly by
-# the zip handler, and has a function similar to skippedNames, but works
-# independantly. Can be redefined for subdirectories. Supported by recoll
-# 1.20 and newer. See
-# https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html
+# the zip handler. If zipUseSkippedNames is not set, zipSkippedNames
+# defines the patterns to be skipped inside archives. If zipUseSkippedNames
+# is set, the two lists are concatenated and used. Can be redefined for
+# subdirectories.
+# See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html
#
#zipSkippedNames =