From af42fe8f5e69f885eeb6cdd165a3c1bf36527455 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Sun, 6 Oct 2019 07:44:50 +0200 Subject: [PATCH] rclconfig.py, rclexecm.py: implement part of mimetype identification for rclexecm test mode --- src/filters/rclexecm.py | 16 +++++++++++----- src/python/recoll/recoll/rclconfig.py | 23 +++++++++++++++++++++++ 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/filters/rclexecm.py b/src/filters/rclexecm.py index 7717fb9e..e889eab6 100644 --- a/src/filters/rclexecm.py +++ b/src/filters/rclexecm.py @@ -386,6 +386,7 @@ def main(proto, extract): if len(args) != 1: usage() + path = args[0] def mimetype_with_file(f): cmd = 'file -i "' + f + '"' @@ -402,11 +403,16 @@ def main(proto, extract): def debprint(out, s): if not actAsSingle: proto.breakwrite(out, makebytes(s+'\n')) - - params = {'filename:': makebytes(args[0])} - # Some filters (e.g. rclaudio) need/get a MIME type from the indexer - mimetype = mimetype_with_file(args[0]) - params['mimetype:'] = mimetype + + params = {'filename:': makebytes(path)} + + # Some filters (e.g. rclaudio) need/get a MIME type from the indexer. + # We make a half-assed attempt to emulate: + mimetype = my_config.mimeType(path) + if not mimetype and not _mswindows: + mimetype = mimetype_with_file(path) + if mimetype: + params['mimetype:'] = mimetype if not extract.openfile(params): print("Open error", file=sys.stderr) diff --git a/src/python/recoll/recoll/rclconfig.py b/src/python/recoll/recoll/rclconfig.py index 26542cc5..67d8661a 100644 --- a/src/python/recoll/recoll/rclconfig.py +++ b/src/python/recoll/recoll/rclconfig.py @@ -28,6 +28,7 @@ class RclDynConf: class RclConfig: def __init__(self, argcnf = None): self.config = None + self.mimemap = None platsys = platform.system() # Find configuration directory if argcnf is not None: @@ -88,6 +89,28 @@ class RclConfig: if not self.config: self.config = conftree.ConfStack("recoll.conf", self.cdirs, "tree") return self.config.get(nm, self.keydir) + + # This is a simplified version of the c++ code, intended mostly for the + # test mode of rclexecm.py. We don't attempt to check the data, so this + # will not work on extension-less paths (e.g. mbox/mail/etc.) + def mimeType(self, path): + if not self.mimemap: + self.mimemap = conftree.ConfStack("mimemap", self.cdirs, "tree") + if os.path.exists(path): + if os.path.isdir(path): + return "inode/directory" + if os.path.islink(path): + return "inode/symlink" + if not os.path.isfile(path): + return "inode/x-fsspecial" + try: + size = os.path.getsize(path) + if size == 0: + return "inode/x-empty" + except: + pass + ext = os.path.splitext(path)[1] + return self.mimemap.get(ext, self.keydir) class RclExtraDbs: def __init__(self, config):