rclzip: add useSkippedNames variable to also use the base skippedNames

This commit is contained in:
Jean-Francois Dockes 2018-06-04 09:05:23 +02:00
parent ec4bb12df0
commit f3d3f5b0bf
3 changed files with 39 additions and 6 deletions

View File

@ -28,6 +28,7 @@ from zipfile import ZipFile
try:
from recoll import rclconfig
from recoll import conftree
hasrclconfig = True
except:
hasrclconfig = False
@ -118,10 +119,14 @@ class ZipExtractor:
if hasrclconfig:
config = rclconfig.RclConfig()
config.setKeyDir(os.path.dirname(filename))
usebaseskipped = config.getConfParam("zipUseSkippedNames")
if usebaseskipped:
skipped = config.getConfParam("skippedNames")
self.em.rclog("skippedNames: %s"%self.skiplist)
self.skiplist += conftree.stringToStrings(skipped)
skipped = config.getConfParam("zipSkippedNames")
if skipped is not None:
self.skiplist = skipped.split(" ")
self.skiplist += conftree.stringToStrings(skipped)
try:
if rclexecm.PY3:
# Note: py3 ZipFile wants an str file name, which

View File

@ -251,3 +251,21 @@ class ConfStack(object):
if v and dodecode:
v = v.decode('utf-8')
return v
def stringToStrings(s):
'''Parse a string made of space-separated words and C-Style strings
(double-quoted with backslash escape). E.g.:
word1 word2 "compound \\"quoted\\" string" ->
['word1', 'word2', 'compound "quoted string']'''
import shlex
lex = shlex.shlex(s, posix=True)
lex.quotes = '"'
lex.escape = '\\'
lex.escapedquotes = '"'
l = []
while True:
tok = lex.get_token()
if not tok:
break
l.append(tok)
return l

View File

@ -120,14 +120,24 @@ skippedPaths = /media
#daemSkippedPaths =
# <var name="zipUseSkippedNames" type="bool">
#
# <brief>Use skippedNames inside Zip archives.</brief><descr>Fetched
# directly by the rclzip handler. Skip the patterns defined by skippedNames
# inside Zip archives. Can be redefined for subdirectories.
# See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html
# </descr></var>
#zipUseSkippedNames =
# <var name="zipSkippedNames" type="string">
#
# <brief>Space-separated list of wildcard expressions for names that should
# be ignored inside zip archives.</brief><descr>This is used directly by
# the zip handler, and has a function similar to skippedNames, but works
# independantly. Can be redefined for subdirectories. Supported by recoll
# 1.20 and newer. See
# https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html
# the zip handler. If zipUseSkippedNames is not set, zipSkippedNames
# defines the patterns to be skipped inside archives. If zipUseSkippedNames
# is set, the two lists are concatenated and used. Can be redefined for
# subdirectories.
# See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html
# </descr></var>
#zipSkippedNames =