From 3716ea3dacac64113a823af5b7c3ff6925b13013 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Mon, 28 Sep 2020 14:04:09 +0200 Subject: [PATCH] unify processing for executing a python script --- src/common/rclconfig.cpp | 59 ++++++++++++++++++++++------------ src/common/rclconfig.h | 10 ++++++ src/common/textsplitko.cpp | 14 ++++---- src/index/indexer.cpp | 21 +++--------- src/internfile/mimehandler.cpp | 38 +++++----------------- 5 files changed, 68 insertions(+), 74 deletions(-) diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index f2122afa..34151455 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -1677,6 +1677,43 @@ string RclConfig::findFilter(const string &icmd) const } } +bool RclConfig::processFilterCmd(std::vector& cmd) const +{ + LOGDEB0("processFilterCmd: in: " << stringsToString(cmd) << "\n"); + auto it = cmd.begin(); + + // Special-case python and perl on windows: we need to also locate the + // first argument which is the script name "python somescript.py". + // On Unix, thanks to #!, we usually just run "somescript.py", but need + // the same change if we ever want to use the same cmd line as windows + bool hasinterp = !stringlowercmp("python", *it) || + !stringlowercmp("perl", *it); + + *it++ = findFilter(*it); + if (hasinterp) { + if (cmd.size() < 2) { + LOGERR("processFilterCmd: python/perl cmd: no script?. [" << + stringsToString(cmd) << "]\n"); + return false; + } else { + *it = findFilter(*it); + } + } + LOGDEB0("processFilterCmd: out: " << stringsToString(cmd) << "\n"); + return true; +} + +bool RclConfig::pythonCmd(const std::string& scriptname, + std::vector& cmd) const +{ +#ifdef _WIN32 + cmd = {"python", scriptname}; +#else + cmd = {scriptname}; +#endif + return processFilterCmd(cmd); +} + /** * Return decompression command line for given mime type */ @@ -1693,32 +1730,14 @@ bool RclConfig::getUncompressor(const string &mtype, vector& cmd) const LOGERR("getUncompressor: empty spec for mtype " << mtype << "\n"); return false; } - vector::iterator it = tokens.begin(); + auto it = tokens.begin(); if (tokens.size() < 2) return false; if (stringlowercmp("uncompress", *it++)) return false; cmd.clear(); - cmd.push_back(findFilter(*it)); - - // Special-case python and perl on windows: we need to also locate the - // first argument which is the script name "python somescript.py". - // On Unix, thanks to #!, we usually just run "somescript.py", but need - // the same change if we ever want to use the same cmdling as windows - if (!stringlowercmp("python", *it) || !stringlowercmp("perl", *it)) { - it++; - if (tokens.size() < 3) { - LOGERR("getUncpressor: python/perl cmd: no script?. [" << - mtype << "]\n"); - } else { - *it = findFilter(*it); - } - } else { - it++; - } - cmd.insert(cmd.end(), it, tokens.end()); - return true; + return processFilterCmd(cmd); } static const char blurb0[] = diff --git a/src/common/rclconfig.h b/src/common/rclconfig.h index 6ac19be6..7e7bb22b 100644 --- a/src/common/rclconfig.h +++ b/src/common/rclconfig.h @@ -332,6 +332,16 @@ class RclConfig { bool getMissingHelperDesc(string&) const; void storeMissingHelperDesc(const string &s); + /** Replace simple command name(s) inside vector with full + * paths. May have to replace two if the first entry is an + * interpreter name */ + bool processFilterCmd(std::vector& cmd) const; + + /** Build command vector for python script, possibly prepending + interpreter on Windows */ + bool pythonCmd( + const std::string& script, std::vector& cmd) const; + /** Find exec file for external filter. * * If the input is an absolute path, we just return it. Else We diff --git a/src/common/textsplitko.cpp b/src/common/textsplitko.cpp index a634de6d..7de523d3 100644 --- a/src/common/textsplitko.cpp +++ b/src/common/textsplitko.cpp @@ -59,13 +59,13 @@ static const string magicpage{"NEWPPPAGE"}; void TextSplit::koStaticConfInit(RclConfig *config, const string& tagger) { -#ifdef _WIN32 - o_cmdpath = config->findFilter("python"); - o_cmdargs.clear(); - o_cmdargs.push_back(config->findFilter("kosplitter.py")); -#else - o_cmdpath = config->findFilter("kosplitter.py"); -#endif + std::vector cmdvec; + if (config->pythonCmd("kosplitter.py", cmdvec)) { + auto it = cmdvec.begin(); + o_cmdpath = *it++; + o_cmdargs.clear(); + o_cmdargs.insert(o_cmdargs.end(), it, cmdvec.end()); + } if (tagger == "Okt" || tagger == "Mecab" || tagger == "Komoran") { o_taggername = tagger; if (tagger == "Komoran") diff --git a/src/index/indexer.cpp b/src/index/indexer.cpp index 9413c3cb..5d646020 100644 --- a/src/index/indexer.cpp +++ b/src/index/indexer.cpp @@ -66,22 +66,9 @@ bool runWebFilesMoverScript(RclConfig *config) downloadsdir = path_tildexpand("~/Downloads"); } } - static string cmdpath; - vector args; -#ifdef _WIN32 - const static string cmdnm{"python"}; - args.push_back(config->findFilter("recoll-we-move-files.py")); -#else - const static string cmdnm{"recoll-we-move-files.py"}; -#endif - if (cmdpath.empty()) { - cmdpath = config->findFilter(cmdnm); - if (cmdpath.empty()) { - LOGERR("runWFMoverScript: recoll-we-move-files.py not found\n"); - return false; - } - } - + vector cmdvec; + config->pythonCmd("recoll-we-move-files.py", cmdvec); + /* Arrange to not actually run the script if the directory did not change */ static time_t dirmtime; time_t ndirmtime = 0; @@ -100,7 +87,7 @@ bool runWebFilesMoverScript(RclConfig *config) are created during the run. */ dirmtime = ndirmtime; ExecCmd cmd; - int status = cmd.doexec(cmdpath, args); + int status = cmd.doexec1(cmdvec); return status == 0; } return true; diff --git a/src/internfile/mimehandler.cpp b/src/internfile/mimehandler.cpp index f0fadae2..0581ebd5 100644 --- a/src/internfile/mimehandler.cpp +++ b/src/internfile/mimehandler.cpp @@ -231,27 +231,12 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs, "]: [" << hs << "]\n"); return 0; } - MimeHandlerExec *h = multiple ? - new MimeHandlerExecMultiple(cfg, id) : - new MimeHandlerExec(cfg, id); - vector::iterator it = cmdtoks.begin(); - - // Special-case python and perl on windows: we need to also locate the - // first argument which is the script name "python somescript.py". - // On Unix, thanks to #!, we usually just run "somescript.py", but need - // the same change if we ever want to use the same cmdling as windows - if (!stringlowercmp("python", *it) || !stringlowercmp("perl", *it)) { - if (cmdtoks.size() < 2) { - LOGERR("mhExecFactory: python/perl cmd: no script?. [" << - mtype << "]: [" << hs << "]\n"); - } - vector::iterator it1(it); - it1++; - *it1 = cfg->findFilter(*it1); + if (!cfg->processFilterCmd(cmdtoks)) { + return nullptr; } - - h->params.push_back(cfg->findFilter(*it++)); - h->params.insert(h->params.end(), it, cmdtoks.end()); + MimeHandlerExec *h = multiple ? new MimeHandlerExecMultiple(cfg, id) : + new MimeHandlerExec(cfg, id); + h->params = cmdtoks; // Handle additional attributes. We substitute the semi-colons // with newlines and use a ConfSimple @@ -261,16 +246,9 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs, if (attrs.get(cstr_dj_keymt, value)) h->cfgFilterOutputMtype = stringtolower((const string&)value); -#if 0 - string scmd; - for (it = h->params.begin(); it != h->params.end(); it++) { - scmd += string("[") + *it + "] "; - } - LOGDEB("mhExecFactory:mt [" << mtype << "] cfgmt [" << - h->cfgFilterOutputMtype << "] cfgcs [" << - h->cfgFilterOutputCharset << "] cmd: [" << scmd << "]\n"); -#endif - + LOGDEB2("mhExecFactory:mt [" << mtype << "] cfgmt [" << + h->cfgFilterOutputMtype << "] cfgcs ["<cfgFilterOutputCharset << + "] cmd: [" << stringsToString(h->params) << "]\n"); return h; }