From 91e66a13f9c457bc5186be5ee7b3313d14e9c412 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Mon, 24 Apr 2017 10:15:44 +0200 Subject: [PATCH] Moved docsToPaths() method from confindexer to function in rcldoc to simplify linking. confindexer.o not in librecoll any more.+small std namespace fixes --- src/Makefile.am | 4 +- src/index/bglfetcher.h | 3 +- src/index/fetcher.h | 3 +- src/index/indexer.cpp | 25 ----------- src/index/indexer.h | 2 +- src/qtgui/rclm_idx.cpp | 2 +- src/qtgui/reslist.cpp | 2 +- src/qtgui/restable.cpp | 2 +- src/rcldb/rcldoc.cpp | 29 +++++++++++++ src/rcldb/rcldoc.h | 98 +++++++++++++++++++++--------------------- 10 files changed, 88 insertions(+), 82 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index ce279f70..e63d56cd 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -91,8 +91,6 @@ index/fsfetcher.cpp \ index/fsfetcher.h \ index/fsindexer.cpp \ index/fsindexer.h \ -index/indexer.cpp \ -index/indexer.h \ index/mimetype.cpp \ index/mimetype.h \ index/rclmon.h \ @@ -289,6 +287,8 @@ endif recollindex_SOURCES = \ index/recollindex.cpp \ + index/indexer.cpp \ + index/indexer.h \ index/rclmonprc.cpp \ index/rclmonrcv.cpp \ utils/x11mon.cpp \ diff --git a/src/index/bglfetcher.h b/src/index/bglfetcher.h index 0690ef66..16eab4a1 100644 --- a/src/index/bglfetcher.h +++ b/src/index/bglfetcher.h @@ -23,7 +23,8 @@ */ class BGLDocFetcher : public DocFetcher{ virtual bool fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out); - virtual bool makesig(RclConfig* cnf, const Rcl::Doc& idoc, string& sig); + virtual bool makesig(RclConfig* cnf, const Rcl::Doc& idoc, + std::string& sig); virtual ~BGLDocFetcher() {} }; diff --git a/src/index/fetcher.h b/src/index/fetcher.h index 418aa604..b6e540cb 100644 --- a/src/index/fetcher.h +++ b/src/index/fetcher.h @@ -66,7 +66,8 @@ public: * @param idoc the data gathered from the index for this doc (udi/ipath) * @param sig output. */ - virtual bool makesig(RclConfig* cnf, const Rcl::Doc& idoc, string& sig) = 0; + virtual bool makesig(RclConfig* cnf, const Rcl::Doc& idoc, + std::string& sig) = 0; virtual ~DocFetcher() {} }; diff --git a/src/index/indexer.cpp b/src/index/indexer.cpp index 7de91e42..a58e466d 100644 --- a/src/index/indexer.cpp +++ b/src/index/indexer.cpp @@ -211,31 +211,6 @@ bool ConfIndexer::indexFiles(list& ifiles, int flag) return ret; } -bool ConfIndexer::docsToPaths(vector &docs, vector &paths) -{ - for (vector::iterator it = docs.begin(); it != docs.end(); it++) { - Rcl::Doc &idoc = *it; - string backend; - idoc.getmeta(Rcl::Doc::keybcknd, &backend); - - // This only makes sense for file system files: beagle docs are - // always up to date because they can't be updated in the cache, - // only added/removed. Same remark as made inside internfile, we - // need a generic way to handle backends. - if (!backend.empty() && backend.compare("FS")) - continue; - - // Filesystem document. The url has to be like file:// - if (idoc.url.find(cstr_fileu) != 0) { - LOGERR("idx::docsToPaths: FS backend and non fs url: [" << - idoc.url << "]\n"); - continue; - } - paths.push_back(idoc.url.substr(7, string::npos)); - } - return true; -} - // Update index for specific documents. The docs come from an index // query, so the udi, backend etc. fields are filled. bool ConfIndexer::updateDocs(std::vector &docs, IxFlag flag) diff --git a/src/index/indexer.h b/src/index/indexer.h index fa7f0067..fb51206b 100644 --- a/src/index/indexer.h +++ b/src/index/indexer.h @@ -132,7 +132,7 @@ class ConfIndexer { /** Update index for list of documents given as list of docs (out of query) */ bool updateDocs(vector &docs, IxFlag f = IxFNone); - static bool docsToPaths(vector &docs, vector &paths); + /** Purge a list of files. */ bool purgeFiles(list &files, int f = IxFNone); diff --git a/src/qtgui/rclm_idx.cpp b/src/qtgui/rclm_idx.cpp index c4b15173..16e73417 100644 --- a/src/qtgui/rclm_idx.cpp +++ b/src/qtgui/rclm_idx.cpp @@ -428,7 +428,7 @@ void RclMain::updateIdxForDocs(vector& docs) } vector paths; - if (ConfIndexer::docsToPaths(docs, paths)) { + if (Rcl::docsToPaths(docs, paths)) { vector args{"-c", theconfig->getConfDir(), "-e", "-i"}; args.insert(args.end(), paths.begin(), paths.end()); m_idxproc = new ExecCmd; diff --git a/src/qtgui/reslist.cpp b/src/qtgui/reslist.cpp index 3368c143..651e5f26 100644 --- a/src/qtgui/reslist.cpp +++ b/src/qtgui/reslist.cpp @@ -236,7 +236,7 @@ string QtGuiResListPager::iconUrl(RclConfig *config, Rcl::Doc& doc) vector docs; docs.push_back(doc); vector paths; - ConfIndexer::docsToPaths(docs, paths); + Rcl::docsToPaths(docs, paths); if (!paths.empty()) { string path; LOGDEB2("ResList::iconUrl: source path [" << paths[0] << "]\n"); diff --git a/src/qtgui/restable.cpp b/src/qtgui/restable.cpp index 8f54755a..8a5a3ef6 100644 --- a/src/qtgui/restable.cpp +++ b/src/qtgui/restable.cpp @@ -117,7 +117,7 @@ string ResTablePager::iconUrl(RclConfig *config, Rcl::Doc& doc) vector docs; docs.push_back(doc); vector paths; - ConfIndexer::docsToPaths(docs, paths); + Rcl::docsToPaths(docs, paths); if (!paths.empty()) { string path; if (thumbPathForUrl(cstr_fileu + paths[0], 128, path)) { diff --git a/src/rcldb/rcldoc.cpp b/src/rcldb/rcldoc.cpp index 09700d07..fc8db24a 100644 --- a/src/rcldb/rcldoc.cpp +++ b/src/rcldb/rcldoc.cpp @@ -20,6 +20,8 @@ #include "log.h" #include "rclutil.h" +using namespace std; + namespace Rcl { const string Doc::keyabs("abstract"); const string Doc::keyapptg("rclaptg"); @@ -97,6 +99,33 @@ namespace Rcl { d->haschildren = haschildren; d->onlyxattr = onlyxattr; } + +static const string cstr_fileu("file://"); +bool docsToPaths(vector &docs, vector &paths) +{ + for (vector::iterator it = docs.begin(); it != docs.end(); it++) { + Rcl::Doc &idoc = *it; + string backend; + idoc.getmeta(Rcl::Doc::keybcknd, &backend); + + // This only makes sense for file system files: beagle docs are + // always up to date because they can't be updated in the cache, + // only added/removed. Same remark as made inside internfile, we + // need a generic way to handle backends. + if (!backend.empty() && backend.compare("FS")) + continue; + + // Filesystem document. The url has to be like file:// + if (idoc.url.find(cstr_fileu) != 0) { + LOGERR("idx::docsToPaths: FS backend and non fs url: [" << + idoc.url << "]\n"); + continue; + } + paths.push_back(idoc.url.substr(7, string::npos)); + } + return true; +} + } diff --git a/src/rcldb/rcldoc.h b/src/rcldb/rcldoc.h index 08a08dd7..4a6107ee 100644 --- a/src/rcldb/rcldoc.h +++ b/src/rcldb/rcldoc.h @@ -19,8 +19,7 @@ #include #include -using std::string; -using std::map; +#include #include "smallut.h" @@ -46,33 +45,33 @@ class Doc { // Binary or url-encoded url. No transcoding: this is used to access files // Index: computed by Db::add caller. // Query: from doc data. - string url; + std::string url; // When we do path translation for documents from external indexes, we // save the original path: - string idxurl; + std::string idxurl; // And the originating db. 0 is base, 1 first external etc. int idxi; // Internal path for multi-doc files. Ascii // Set by FsIndexer::processone - string ipath; + std::string ipath; // Mime type. Set by FileInterner::internfile - string mimetype; + std::string mimetype; // File modification time as decimal ascii unix time // Set by FsIndexer::processone - string fmtime; + std::string fmtime; // Data reference date (same format). Ie: mail date // Possibly set by mimetype-specific handler // Filter::metaData["modificationdate"] - string dmtime; + std::string dmtime; // Charset we transcoded the 'text' field from (in case we want back) // Possibly set by handler - string origcharset; + std::string origcharset; // A map for textual metadata like, author, keywords, abstract, // title. The entries are possibly set by the mimetype-specific @@ -81,7 +80,7 @@ class Doc { // Only some predefined fields are stored in the data record: // "title", "keywords", "abstract", "author", but if a field name is // in the "stored" configuration list, it will be stored too. - map meta; + std::map meta; // Attribute for the "abstract" entry. true if it is just the top // of doc, not a native document attribute. Not stored directly, but @@ -92,23 +91,23 @@ class Doc { // external containing archive. // Index: Set by caller prior to Db::Add. // Query: Set from data record - string pcbytes; + std::string pcbytes; // Document size, ie, size of the .odt or .xls. // Index: Set in internfile from the filter stack // Query: set from data record - string fbytes; + std::string fbytes; // Doc text size. // Index: from text.length(). // Query: set by rcldb from index data record - string dbytes; + std::string dbytes; // Doc signature. Used for up to date checks. // Index: set by Db::Add caller. Query: set from doc data. // This is opaque to rcldb, and could just as well be ctime, size, // ctime+size, md5, whatever. - string sig; + std::string sig; ///////////////////////////////////////////////// // The following fields don't go to the db record, so they can't @@ -116,7 +115,7 @@ class Doc { // Main document text. This is plaintext utf-8 text to be split // and indexed - string text; + std::string text; ///////////////////////////////////////////////// // Misc stuff @@ -170,9 +169,9 @@ class Doc { haspages(false), haschildren(false), onlyxattr(false) { } /** Get value for named field. If value pointer is 0, just test existence */ - bool getmeta(const string& nm, string *value = 0) const + bool getmeta(const std::string& nm, std::string *value = 0) const { - map::const_iterator it = meta.find(nm); + const auto it = meta.find(nm); if (it != meta.end()) { if (value) *value = it->second; @@ -182,9 +181,9 @@ class Doc { } } /** Nocopy getvalue. sets pointer to entry value if exists */ - bool peekmeta(const string& nm, const string **value = 0) const + bool peekmeta(const std::string& nm, const std::string **value = 0) const { - map::const_iterator it = meta.find(nm); + const auto it = meta.find(nm); if (it != meta.end()) { if (value) *value = &(it->second); @@ -195,9 +194,9 @@ class Doc { } // Create entry or append text to existing entry. - bool addmeta(const string& nm, const string& value) + bool addmeta(const std::string& nm, const std::string& value) { - map::iterator mit = meta.find(nm); + auto mit = meta.find(nm); if (mit == meta.end()) { meta[nm] = value; } else if (mit->second.empty()) { @@ -206,7 +205,7 @@ class Doc { // It may happen that the same attr exists several times // in the internfile stack. Avoid duplicating values. if (mit->second != value) - mit->second += string(" - ") + value; + mit->second += std::string(" - ") + value; } return true; } @@ -215,7 +214,7 @@ class Doc { * (as opposed to e.g. a webcache file), not a subdoc, */ bool isFsFile() { - string backend; + std::string backend; getmeta(keybcknd, &backend); if (!backend.empty() && backend.compare("FS")) return false; @@ -232,10 +231,10 @@ class Doc { // author), _must_ use these canonical values, not aliases. This is // enforced in internfile.cpp and misc other bits of metadata-gathering // code - static const string keyurl; // url + static const std::string keyurl; // url // childurl. This is set when working with the parent of the result, to hold // the child of interest url, typically to highlight a directory entry - static const string keychildurl; + static const std::string keychildurl; // file name. This is set for filesystem-level containers or // documents, and not inherited by subdocuments (which can get a // keyfn anyway from, e.g, an attachment filename value). Subdocs @@ -243,37 +242,38 @@ class Doc { // usually don't want to see all subdocs when searching for the // file name). Instead the container file name is now set in the // document record but not indexed (see next entry). - static const string keyfn; + static const std::string keyfn; // Container file name. This is set for all subdocuments of a // given top level container. It is not indexed by default but // stored in the document record keyfn field if this is still // empty when we create it, for display purposes. - static const string keytcfn; - static const string keyipt; // ipath - static const string keytp; // mime type - static const string keyfmt; // file mtime - static const string keydmt; // document mtime - static const string keymt; // mtime dmtime if set else fmtime - static const string keyoc; // original charset - static const string keypcs; // document outer container size - static const string keyfs; // document size - static const string keyds; // document text size - static const string keysz; // dbytes if set else fbytes else pcbytes - static const string keysig; // sig - static const string keyrr; // relevancy rating - static const string keycc; // Collapse count - static const string keyabs; // abstract - static const string keyau; // author - static const string keytt; // title - static const string keykw; // keywords - static const string keymd5; // file md5 checksum - static const string keybcknd; // backend type for data not from the filesys + static const std::string keytcfn; + static const std::string keyipt; // ipath + static const std::string keytp; // mime type + static const std::string keyfmt; // file mtime + static const std::string keydmt; // document mtime + static const std::string keymt; // mtime dmtime if set else fmtime + static const std::string keyoc; // original charset + static const std::string keypcs; // document outer container size + static const std::string keyfs; // document size + static const std::string keyds; // document text size + static const std::string keysz; // dbytes if set else fbytes else pcbytes + static const std::string keysig; // sig + static const std::string keyrr; // relevancy rating + static const std::string keycc; // Collapse count + static const std::string keyabs; // abstract + static const std::string keyau; // author + static const std::string keytt; // title + static const std::string keykw; // keywords + static const std::string keymd5; // file md5 checksum + static const std::string keybcknd; // backend type for data not from the filesys // udi back from index. Only set by Rcl::Query::getdoc(). - static const string keyudi; - static const string keyapptg; // apptag. Set from localfields (fsindexer) - static const string keybght; // beagle hit type ("beagleHitType") + static const std::string keyudi; + static const std::string keyapptg; // apptag. Set from localfields (fsindexer) + static const std::string keybght; // beagle hit type ("beagleHitType") }; +extern bool docsToPaths(std::vector &docs,std::vector &paths); }