diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index ab05143f..9ce91572 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -410,28 +410,6 @@ const string& RclConfig::getDefCharset(bool filename) const } } -bool RclConfig::addLocalFields(map *tgt) const -{ - LOGDEB0(("RclConfig::addLocalFields: keydir [%s]\n", m_keydir.c_str())); - string sfields; - if (tgt == 0 || ! getConfParam("localfields", sfields)) - return false; - // Substitute ':' with '\n' inside the string. There is no way to escape ':' - for (string::size_type i = 0; i < sfields.size(); i++) - if (sfields[i] == ':') - sfields[i] = '\n'; - // Parse the result with a confsimple and add the results to the metadata - ConfSimple conf(sfields, 1, true); - vector nmlst = conf.getNames(cstr_null); - for (vector::const_iterator it = nmlst.begin(); - it != nmlst.end(); it++) { - conf.get(*it, (*tgt)[*it]); - LOGDEB(("RclConfig::addLocalFields: [%s] => [%s]\n", - (*it).c_str(), (*tgt)[*it].c_str())); - } - return true; -} - // Get all known document mime values. We get them from the mimeconf // 'index' submap. // It's quite possible that there are other mime types in the index @@ -630,15 +608,18 @@ bool RclConfig::valueSplitAttributes(const string& whole, string& value, // Handle additional attributes. We substitute the semi-colons // with newlines and use a ConfSimple if (!attrstr.empty()) { - for (string::size_type i = 0; i < attrstr.size(); i++) + for (string::size_type i = 0; i < attrstr.size(); i++) { if (attrstr[i] == ';') attrstr[i] = '\n'; - attrs = ConfSimple(attrstr); + } + attrs.reparse(attrstr); + } else { + attrs.clear(); } + return true; } - bool RclConfig::getMissingHelperDesc(string& out) const { string fmiss = path_cat(getConfDir(), "missing"); diff --git a/src/common/rclconfig.h b/src/common/rclconfig.h index cc5a2a82..e7e546b4 100644 --- a/src/common/rclconfig.h +++ b/src/common/rclconfig.h @@ -163,9 +163,6 @@ class RclConfig { Doesn't depend on the keydir */ vector getDaemSkippedPaths() const; - /** conf: Add local fields to target dic */ - bool addLocalFields(map *tgt) const; - /** * mimemap: Check if file name should be ignored because of suffix * diff --git a/src/doc/user/usermanual.sgml b/src/doc/user/usermanual.sgml index c7a1f5cc..eb7e77db 100644 --- a/src/doc/user/usermanual.sgml +++ b/src/doc/user/usermanual.sgml @@ -4644,9 +4644,10 @@ unac_except_trans = under a given directory. Typical usage would be to set an "rclaptg" field, to be used in mimeview to select a specific viewer. If several fields are to be set, they - should be separated with a colon (':') character (which there - is currently no way to escape). Ie: - localfields= rclaptg=gnus:other = val, then + should be separated with a semi-colon (';') character, which there + is currently no way to escape. Also note the initial semi-colon. + Example: + localfields= ;rclaptg=gnus;other = val, then select specifier viewer with mimetype|tag=... in mimeview. diff --git a/src/index/Makefile b/src/index/Makefile index fe88d73c..e30e4caa 100644 --- a/src/index/Makefile +++ b/src/index/Makefile @@ -29,7 +29,7 @@ subtreelist : $(SUBTREELIST_OBJS) subtreelist.o : subtreelist.cpp $(CXX) $(ALL_CXXFLAGS) -DTEST_SUBTREELIST -c subtreelist.cpp -MIMETYPE_OBJS= trmimetype.o $(BIGLIB) +MIMETYPE_OBJS= trmimetype.o mimetype : $(MIMETYPE_OBJS) $(CXX) $(ALL_CXXFLAGS) -o mimetype $(MIMETYPE_OBJS) \ $(LIBRECOLL) $(LIBICONV) $(LIBSYS) diff --git a/src/index/fsindexer.cpp b/src/index/fsindexer.cpp index cb1a36d8..66442402 100644 --- a/src/index/fsindexer.cpp +++ b/src/index/fsindexer.cpp @@ -45,6 +45,7 @@ #include "fileudi.h" #include "cancelcheck.h" #include "rclinit.h" +#include "execmd.h" // When using extended attributes, we have to use the ctime. // This is quite an expensive price to pay... @@ -71,12 +72,13 @@ extern void *FsIndexerDbUpdWorker(void*); class InternfileTask { public: InternfileTask(const std::string &f, const struct stat *i_stp, - map lfields) - : fn(f), statbuf(*i_stp), localfields(lfields) + map lfields, vector reapers) + : fn(f), statbuf(*i_stp), localfields(lfields), mdreapers(reapers) {} string fn; struct stat statbuf; map localfields; + vector mdreapers; }; extern void *FsIndexerInternfileWorker(void*); #endif // IDX_THREADS @@ -108,6 +110,7 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc) { LOGDEB1(("FsIndexer::FsIndexer\n")); m_havelocalfields = m_config->hasNameAnywhere("localfields"); + m_havemdreapers = m_config->hasNameAnywhere("metadatacmds"); #ifdef IDX_THREADS m_stableconfig = new RclConfig(*m_config); @@ -311,6 +314,9 @@ bool FsIndexer::indexFiles(list& files, ConfIndexer::IxFlag flag) m_config->setKeyDir(path_getfather(*it)); if (m_havelocalfields) localfieldsfromconf(); + if (m_havemdreapers) + mdreapersfromconf(); + bool follow = false; m_config->getConfParam("followLinks", &follow); @@ -396,21 +402,90 @@ out: // Local fields can be set for fs subtrees in the configuration file void FsIndexer::localfieldsfromconf() { - LOGDEB0(("FsIndexer::localfieldsfromconf\n")); + LOGDEB1(("FsIndexer::localfieldsfromconf\n")); + + string sfields; + m_config->getConfParam("localfields", sfields); + if (!sfields.compare(m_slocalfields)) + return; + + m_slocalfields = sfields; m_localfields.clear(); - m_config->addLocalFields(&m_localfields); + if (sfields.empty()) + return; + + string value; + ConfSimple attrs; + m_config->valueSplitAttributes(sfields, value, attrs); + vector nmlst = attrs.getNames(cstr_null); + for (vector::const_iterator it = nmlst.begin(); + it != nmlst.end(); it++) { + attrs.get(*it, m_localfields[*it]); + } } + // -void FsIndexer::setlocalfields(map fields, Rcl::Doc& doc) +void FsIndexer::setlocalfields(const map& fields, Rcl::Doc& doc) { for (map::const_iterator it = fields.begin(); - it != fields.end(); it++) { + it != fields.end(); it++) { // Should local fields override those coming from the document - // ? I think not, but not too sure + // ? I think not, but not too sure. We could also chose to + // concatenate the values ? if (doc.meta.find(it->second) == doc.meta.end()) { doc.meta[it->first] = it->second; - } + } + } +} + +// Metadata gathering commands +void FsIndexer::mdreapersfromconf() +{ + LOGDEB1(("FsIndexer::mdreapersfromconf\n")); + + string sreapers; + m_config->getConfParam("metadatacmds", sreapers); + if (!sreapers.compare(m_smdreapers)) + return; + + m_smdreapers = sreapers; + m_mdreapers.clear(); + if (sreapers.empty()) + return; + + string value; + ConfSimple attrs; + m_config->valueSplitAttributes(sreapers, value, attrs); + vector nmlst = attrs.getNames(cstr_null); + for (vector::const_iterator it = nmlst.begin(); + it != nmlst.end(); it++) { + MDReaper reaper; + reaper.fieldname = m_config->fieldCanon(*it); + string s; + attrs.get(*it, s); + stringToStrings(s, reaper.cmdv); + m_mdreapers.push_back(reaper); + } +} + +void FsIndexer::reapmetadata(const vector& reapers, const string& fn, + Rcl::Doc& doc) +{ + map smap = create_map('f', fn); + for (vector::const_iterator rp = reapers.begin(); + rp != reapers.end(); rp++) { + vector cmd; + for (vector::const_iterator it = rp->cmdv.begin(); + it != rp->cmdv.end(); it++) { + string s; + pcSubst(*it, s, smap); + cmd.push_back(s); + } + string output; + if (ExecCmd::backtick(cmd, output)) { + doc.meta[rp->fieldname] += string(" ") + output; + } } } @@ -515,6 +590,8 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, // Adjust local fields from config for this subtree if (m_havelocalfields) localfieldsfromconf(); + if (m_havemdreapers) + mdreapersfromconf(); if (flg == FsTreeWalker::FtwDirReturn) return FsTreeWalker::FtwOk; @@ -522,7 +599,8 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, #ifdef IDX_THREADS if (m_haveInternQ) { - InternfileTask *tp = new InternfileTask(fn, stp, m_localfields); + InternfileTask *tp = new InternfileTask(fn, stp, m_localfields, + m_mdreapers); if (m_iwqueue.put(tp)) { return FsTreeWalker::FtwOk; } else { @@ -531,14 +609,16 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, } #endif - return processonefile(m_config, m_tmpdir, fn, stp, m_localfields); + return processonefile(m_config, m_tmpdir, fn, stp, m_localfields, + m_mdreapers); } FsTreeWalker::Status FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir, const std::string &fn, const struct stat *stp, - map localfields) + const map& localfields, + const vector& mdreapers) { //////////////////// // Check db up to date ? Doing this before file type @@ -623,9 +703,13 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir, // We'll change the signature to ensure that the indexing will // be retried every time. - // Internal access path for multi-document files - if (doc.ipath.empty()) + // Internal access path for multi-document files. If empty, this is + // for the main file. + if (doc.ipath.empty()) { hadNullIpath = true; + if (m_havemdreapers) + reapmetadata(mdreapers, fn, doc); + } // Set file name, mod time and url if not done by filter if (doc.fmtime.empty()) @@ -708,7 +792,8 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir, fileDoc.url = cstr_fileu + fn; if (m_havelocalfields) setlocalfields(localfields, fileDoc); - + if (m_havemdreapers) + reapmetadata(mdreapers, fn, fileDoc); char cbuf[100]; sprintf(cbuf, OFFTPC, stp->st_size); fileDoc.pcbytes = cbuf; diff --git a/src/index/fsindexer.h b/src/index/fsindexer.h index 8db85fb0..5503b1a5 100644 --- a/src/index/fsindexer.h +++ b/src/index/fsindexer.h @@ -88,13 +88,24 @@ class FsIndexer : public FsTreeWalkerCB { std::vector m_tdl; FIMissingStore *m_missing; - // The configuration can set attribute fields to be inherited by // all files in a file system area. Ie: set "rclaptg = thunderbird" // inside ~/.thunderbird. The boolean is set at init to avoid // further wasteful processing if no local fields are set. bool m_havelocalfields; - map m_localfields; + string m_slocalfields; + map m_localfields; + + // Same idea with the metadata-gathering external commands, + // (e.g. used to reap tagging info: "tmsu tags %f") + /* Hold the description for an external metadata-gathering command */ + struct MDReaper { + string fieldname; + vector cmdv; + }; + bool m_havemdreapers; + string m_smdreapers; + vector m_mdreapers; #ifdef IDX_THREADS friend void *FsIndexerDbUpdWorker(void*); @@ -109,11 +120,15 @@ class FsIndexer : public FsTreeWalkerCB { bool init(); void localfieldsfromconf(); - void setlocalfields(const map flds, Rcl::Doc& doc); + void mdreapersfromconf(); + void setlocalfields(const map& flds, Rcl::Doc& doc); + void reapmetadata(const vector& reapers, const string &fn, + Rcl::Doc& doc); string getDbDir() {return m_config->getDbDir();} FsTreeWalker::Status processonefile(RclConfig *config, TempDir& tmpdir, const string &fn, - const struct stat *, map localfields); + const struct stat *, const map& localfields, + const vector& mdreapers); }; #endif /* _fsindexer_h_included_ */ diff --git a/src/index/mimetype.cpp b/src/index/mimetype.cpp index 1beb5d7e..8f557abd 100644 --- a/src/index/mimetype.cpp +++ b/src/index/mimetype.cpp @@ -23,10 +23,7 @@ #include #include #include - -#ifndef NO_NAMESPACES using namespace std; -#endif /* NO_NAMESPACES */ #include "mimetype.h" #include "debuglog.h" @@ -53,20 +50,13 @@ static string mimetypefromdata(const string &fn, bool usfc) // First try the internal identifying routine string mime = idFile(fn.c_str()); - // Then exec 'file -i' #ifdef USE_SYSTEM_FILE_COMMAND if (usfc && mime.empty()) { // Last resort: use "file -i" - vector args; - - args.push_back("-i"); - args.push_back(fn); - ExecCmd ex; + vector cmd = create_vector(FILE_PROG) ("-i") (fn); string result; - string cmd = FILE_PROG; - int status = ex.doexec(cmd, args, 0, &result); - if (status) { - LOGERR(("mimetypefromdata: doexec: status 0x%x\n", status)); + if (!ExecCmd::backtick(cmd, result)) { + LOGERR(("mimetypefromdata: exec %s failed\n", FILE_PROG)); return string(); } LOGDEB2(("mimetype: [%s] \"file\" output [%s]\n", @@ -160,7 +150,6 @@ string mimetype(const string &fn, const struct stat *stp, } - #else // TEST-> #include diff --git a/src/qtgui/preview_w.cpp b/src/qtgui/preview_w.cpp index 1e10a1d2..b30aea81 100644 --- a/src/qtgui/preview_w.cpp +++ b/src/qtgui/preview_w.cpp @@ -832,8 +832,8 @@ bool Preview::loadDocInCurrentTab(const Rcl::Doc &idoc, int docnum) //////////////////////////////////////////////////////////////////////// // Load and convert document - // idoc came out of the index data (main text and other fields missing). - // foc is the complete one what we are going to extract from storage. + // idoc came out of the index data (main text and some fields missing). + // fdoc is the complete one what we are going to extract from storage. Rcl::Doc fdoc; int status = 1; LoadThread lthr(&status, fdoc, idoc); diff --git a/src/sampleconf/recoll.conf.in b/src/sampleconf/recoll.conf.in index 307458d4..965cacb0 100644 --- a/src/sampleconf/recoll.conf.in +++ b/src/sampleconf/recoll.conf.in @@ -277,19 +277,20 @@ snippetMaxPosWalk = 1000000 #defaultcharset = iso-8859-2 # You can set fields on all files of a specific fs area. (rclaptg can be -# used for application selection inside mimeview +# used for application selection inside mimeview). +# Syntax is the usual name = value ; attr1 = val1 ; ... with an empty value +# so needs initial semi-colon #[/some/app/directory] -#localfields = rclaptg = someapp; otherfield = somevalue +#localfields = ; rclaptg = someapp; otherfield = somevalue -# Use app tag to enable using gnu info to open info files (as the subnodes -# are indexed as html, we'd use firefox on a temp file else. Set this on -# some known info storage places -[/usr/share/info] -localfields = rclaptg=gnuinfo -[/usr/local/share/info] -localfields = rclaptg=gnuinfo -[/usr/local/info] -localfields = rclaptg=gnuinfo +# It's also possible to execute external commands to gather external +# metadata, for example tmsu tags. +# There can be several entries, separated by semi-colons, each defining +# which field name the data goes into and the command to use. Don't forget the +# initial semi-colon. All the field names must be different. You can use +# aliases in the "field" file if necessary. +#[/some/area/of/the/fs] +#metadatacmds = ; tags = tmsu tags %f [/usr/share/man] followSymlinks = 1 diff --git a/src/utils/conftree.cpp b/src/utils/conftree.cpp index a4eb1ccc..70db1ed6 100644 --- a/src/utils/conftree.cpp +++ b/src/utils/conftree.cpp @@ -146,6 +146,13 @@ ConfSimple::ConfSimple(int readonly, bool tildexp) status = readonly ? STATUS_RO : STATUS_RW; } +void ConfSimple::reparse(const string& d) +{ + clear(); + stringstream input(d, ios::in); + parseinput(input); +} + ConfSimple::ConfSimple(const string& d, int readonly, bool tildexp) : dotildexpand(tildexp), m_fmtime(0), m_holdWrites(false) { diff --git a/src/utils/conftree.h b/src/utils/conftree.h index 8bbe1bfd..08d316c1 100644 --- a/src/utils/conftree.h +++ b/src/utils/conftree.h @@ -158,6 +158,16 @@ public: return true; } + /** Clear, then reparse from string */ + void reparse(const string& in); + + /** Clear all content */ + void clear() + { + m_submaps.clear(); + m_order.clear(); + } + /** * Get value for named parameter, from specified subsection (looks in * global space if sk is empty). diff --git a/src/utils/execmd.cpp b/src/utils/execmd.cpp index 70211666..968f9355 100644 --- a/src/utils/execmd.cpp +++ b/src/utils/execmd.cpp @@ -242,7 +242,7 @@ inline void ExecCmd::dochild(const string &cmd, const char **argv, execve(cmd.c_str(), (char *const*)argv, (char *const*)envv); // Hu ho - LOGERR(("ExecCmd::doexec: execvp(%s) failed. errno %d\n", cmd.c_str(), + LOGERR(("ExecCmd::doexec: execve(%s) failed. errno %d\n", cmd.c_str(), errno)); _exit(127); } @@ -634,6 +634,18 @@ bool ExecCmd::maybereap(int *status) } } +// Static +bool ExecCmd::backtick(const std::vector cmd, std::string& out) +{ + vector::const_iterator it = cmd.begin(); + it++; + vector args(it, cmd.end()); + ExecCmd mexec; + int status = mexec.doexec(*cmd.begin(), args, 0, &out); + return status == 0; +} + +/// ReExec class methods /////////////////////////////////////////////////// ReExec::ReExec(int argc, char *args[]) { init(argc, args); @@ -747,18 +759,21 @@ void ReExec::reexec() execvp(m_argv[0].c_str(), (char *const*)argv); } + //////////////////////////////////////////////////////////////////// #else // TEST #include #include +#include +#include + #include #include #include -#include "debuglog.h" -#include "cancelcheck.h" - using namespace std; +#include "debuglog.h" +#include "cancelcheck.h" #include "execmd.h" static int op_flags; diff --git a/src/utils/execmd.h b/src/utils/execmd.h index 3ef81416..c8dcea1a 100644 --- a/src/utils/execmd.h +++ b/src/utils/execmd.h @@ -178,6 +178,14 @@ class ExecCmd { */ static bool which(const string& cmd, string& exe, const char* path = 0); + /** + * Execute command and return stdout output in a string + * @param cmd input: command and args + * @param out output: what the command printed + * @return true if exec status was 0 + */ + static bool backtick(const std::vector cmd, std::string& out); + friend class ExecCmdRsrc; private: static bool o_useVfork; diff --git a/src/utils/pathut.h b/src/utils/pathut.h index a2cf2317..15bd10ee 100644 --- a/src/utils/pathut.h +++ b/src/utils/pathut.h @@ -109,7 +109,7 @@ private: typedef RefCntr TempFile; -/// Temporary directory class +/// Temporary directory class. Recursively deleted by destructor. class TempDir { public: TempDir(); @@ -117,6 +117,7 @@ public: const char *dirname() {return m_dirname.c_str();} const string &getreason() {return m_reason;} bool ok() {return !m_dirname.empty();} + /// Recursively delete contents but not self. bool wipe(); private: string m_dirname; diff --git a/src/utils/smallut.h b/src/utils/smallut.h index b6465695..ae8791df 100644 --- a/src/utils/smallut.h +++ b/src/utils/smallut.h @@ -225,6 +225,28 @@ public: return m_map; } }; +template +class create_vector +{ +private: + std::vector m_vector; +public: + create_vector(const T& val) + { + m_vector.push_back(val); + } + + create_vector& operator()(const T& val) + { + m_vector.push_back(val); + return *this; + } + + operator std::vector() + { + return m_vector; + } +}; #ifndef MIN #define MIN(A,B) (((A)<(B)) ? (A) : (B)) diff --git a/website/release-1.19.html b/website/release-1.19.html index fffa802f..36dbe24c 100644 --- a/website/release-1.19.html +++ b/website/release-1.19.html @@ -73,6 +73,13 @@

Recoll 1.19 .... Changes documented from 1.18.1 to rev 3159

    +
  • Changed format for rclaptg field. Was colon-separated, + now uses normal value/attributes syntax with an empty value + like: +
    +            localfields = ; attr1 = val1 ; attr2 = val2
    +          
    +
  • There are new GUI configuration options to run in "search as you type" mode, and to disable the Qt auto-completion inside the simple search string, which was often more confusing