diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index b1540e31..466a443f 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -55,6 +55,44 @@ using namespace std; #define MAX(A,B) (((A)>(B)) ? (A) : (B)) #endif +bool ParamStale::needrecompute() +{ + if (parent->m_keydirgen != savedkeydirgen) { + savedkeydirgen = parent->m_keydirgen; + string newvalue; + if (!conffile) + return false; + conffile->get(paramname, newvalue, parent->m_keydir); + if (newvalue.compare(savedvalue)) { + savedvalue = newvalue; + return true; + } + } + return false; +} +void ParamStale::init(RclConfig *rconf, ConfNull *cnf, const string& nm) +{ + parent = rconf; + conffile = cnf; + paramname = nm; + savedkeydirgen = -1; +} + +void RclConfig::zeroMe() { + m_ok = false; + m_keydirgen = 0; + m_conf = 0; + mimemap = 0; + mimeconf = 0; + mimeview = 0; + m_fields = 0; + m_stopsuffixes = 0; + m_maxsufflen = 0; + m_stpsuffstate.init(this, 0, "recoll_noindex"); + m_skpnstate.init(this, 0, "skippedNames"); + m_rmtstate.init(this, 0, "indexedmimetypes"); +} + RclConfig::RclConfig(const string *argcnf) { zeroMe(); @@ -134,6 +172,10 @@ RclConfig::RclConfig(const string *argcnf) m_ok = true; setKeyDir(""); + + m_stpsuffstate.init(this, mimemap, "recoll_noindex"); + m_skpnstate.init(this, m_conf, "skippedNames"); + m_rmtstate.init(this, m_conf, "indexedmimetypes"); return; } @@ -145,6 +187,8 @@ bool RclConfig::updateMainConfig() stringsToString(m_cdirs, where); m_reason = string("No/bad main configuration file in: ") + where; m_ok = false; + m_skpnstate.init(this, 0, "skippedNames"); + m_rmtstate.init(this, 0, "indexedmimetypes"); return false; } setKeyDir(""); @@ -159,6 +203,8 @@ bool RclConfig::updateMainConfig() TextSplit::cjkProcessing(true); } } + m_skpnstate.init(this, m_conf, "skippedNames"); + m_rmtstate.init(this, m_conf, "indexedmimetypes"); return true; } @@ -176,6 +222,10 @@ ConfNull *RclConfig::cloneMainConfig() // prefetch a few common values. void RclConfig::setKeyDir(const string &dir) { + if (!dir.compare(m_keydir)) + return; + + m_keydirgen++; m_keydir = dir; if (m_conf == 0) return; @@ -184,21 +234,6 @@ void RclConfig::setKeyDir(const string &dir) defcharset.erase(); getConfParam("guesscharset", &guesscharset); - - string rmtstr; - if (m_conf->get("indexedmimetypes", rmtstr, m_keydir)) { - stringtolower(rmtstr); - if (rmtstr != m_rmtstr) { - LOGDEB2(("RclConfig::setKeyDir: rmtstr [%s]\n", rmtstr.c_str())); - m_rmtstr = rmtstr; - list l; - // Yea, no good to go string->list->set. Lazy me. - stringToStrings(rmtstr, l); - for (list::iterator it = l.begin(); it !=l.end(); it++) { - m_restrictMTypes.insert(*it); - } - } - } } bool RclConfig::getConfParam(const std::string &name, int *ivp) @@ -250,15 +285,17 @@ list RclConfig::getTopdirs() // Get charset to be used for transcoding to utf-8 if unspecified by doc // For document contents: -// If defcharset was set (from the config or a previous call), use it. -// Else, try to guess it from the locale -// Use iso8859-1 as ultimate default -// defcharset is reset on setKeyDir() +// If defcharset was set (from the config or a previous call, this +// is done in setKeydir), use it. +// Else, try to guess it from the locale +// Use iso8859-1 as ultimate default +// // For filenames, same thing except that we do not use the config file value // (only the locale). const string& RclConfig::getDefCharset(bool filename) { - static string localecharset; // This supposedly never changes + // This can't change once computed inside a process. + static string localecharset; if (localecharset.empty()) { const char *cp; cp = nl_langinfo(CODESET); @@ -301,7 +338,6 @@ std::list RclConfig::getAllMimeTypes() std::list lst; if (mimeconf == 0) return lst; - // mimeconf->sortwalk(mtypesWalker, &lst); lst = mimeconf->getNames("index"); lst.sort(); lst.unique(); @@ -349,24 +385,20 @@ typedef multiset SuffixStore; bool RclConfig::inStopSuffixes(const string& fni) { - if (m_stopsuffixes == 0) { + if (m_stopsuffixes == 0 || m_stpsuffstate.needrecompute()) { // Need to initialize the suffixes + delete STOPSUFFIXES; if ((m_stopsuffixes = new SuffixStore) == 0) { LOGERR(("RclConfig::inStopSuffixes: out of memory\n")); return false; } - string stp; list stoplist; - if (mimemap && mimemap->get("recoll_noindex", stp, m_keydir)) { - stringToStrings(stp, stoplist); - } + stringToStrings(m_stpsuffstate.savedvalue, stoplist); for (list::const_iterator it = stoplist.begin(); it != stoplist.end(); it++) { - string lower(*it); - stringtolower(lower); - STOPSUFFIXES->insert(SfString(lower)); - if (m_maxsufflen < lower.length()) - m_maxsufflen = lower.length(); + STOPSUFFIXES->insert(SfString(stringtolower(*it))); + if (m_maxsufflen < it->length()) + m_maxsufflen = it->length(); } } @@ -444,9 +476,14 @@ bool RclConfig::getMimeCatTypes(const string& cat, list& tps) string RclConfig::getMimeHandlerDef(const std::string &mtype, bool filtertypes) { string hs; + if (filtertypes && m_rmtstate.needrecompute()) { + m_restrictMTypes.clear(); + stringToStrings(stringtolower((const string&)m_rmtstate.savedvalue), + m_restrictMTypes); + } if (filtertypes && !m_restrictMTypes.empty()) { string mt = mtype; - stringtolower(mt); + stringtolower(mt); if (m_restrictMTypes.find(mt) == m_restrictMTypes.end()) return hs; } @@ -455,6 +492,7 @@ string RclConfig::getMimeHandlerDef(const std::string &mtype, bool filtertypes) } return hs; } + string RclConfig::getMissingHelperDesc() { string fmiss = path_cat(getConfDir(), "missing"); @@ -462,6 +500,7 @@ string RclConfig::getMissingHelperDesc() file_to_string(fmiss, out); return out; } + void RclConfig::storeMissingHelperDesc(const string &s) { string fmiss = path_cat(getConfDir(), "missing"); @@ -709,14 +748,12 @@ string RclConfig::getStopfile() return path_cat(getConfDir(), "stoplist.txt"); } -list RclConfig::getSkippedNames() +list& RclConfig::getSkippedNames() { - list skpl; - string skipped; - if (getConfParam("skippedNames", skipped)) { - stringToStrings(skipped, skpl); + if (m_skpnstate.needrecompute()) { + stringToStrings(m_skpnstate.savedvalue, m_skpnlist); } - return skpl; + return m_skpnlist; } list RclConfig::getSkippedPaths() @@ -726,7 +763,9 @@ list RclConfig::getSkippedPaths() if (getConfParam("skippedPaths", skipped)) { stringToStrings(skipped, skpl); } - // Always add the dbdir and confdir to the skipped paths + // Always add the dbdir and confdir to the skipped paths. This is + // especially important for the rt monitor which will go into a loop if we + // don't do this. skpl.push_back(getDbDir()); skpl.push_back(getConfDir()); for (list::iterator it = skpl.begin(); it != skpl.end(); it++) { @@ -916,8 +955,10 @@ void RclConfig::initFrom(const RclConfig& r) m_maxsufflen = r.m_maxsufflen; defcharset = r.defcharset; guesscharset = r.guesscharset; - m_rmtstr = r.m_rmtstr; - m_restrictMTypes = r.m_restrictMTypes; + + m_stpsuffstate.init(this, mimemap, r.m_stpsuffstate.paramname); + m_skpnstate.init(this, m_conf, r.m_skpnstate.paramname); + m_rmtstate.init(this, m_conf, r.m_rmtstate.paramname); } #else // -> Test diff --git a/src/common/rclconfig.h b/src/common/rclconfig.h index 8f5c1acb..3c769df4 100644 --- a/src/common/rclconfig.h +++ b/src/common/rclconfig.h @@ -38,6 +38,24 @@ using std::set; #include "conftree.h" #include "smallut.h" +class RclConfig; + +// A small class used for parameters that need to be computed from the +// config string, and which can change with the keydir. Minimize work +// by using the keydirgen and a saved string to avoid unneeded +// recomputations +class ParamStale { +public: + RclConfig *parent; + ConfNull *conffile; + string paramname; + int savedkeydirgen; + string savedvalue; + + void init(RclConfig *rconf, ConfNull *cnf, const string& nm); + bool needrecompute(); +}; + class RclConfig { public: @@ -115,11 +133,10 @@ class RclConfig { string getStopfile(); /** Get list of skipped file names for current keydir */ - list getSkippedNames(); + list& getSkippedNames(); /** Get list of skipped paths patterns. Doesn't depend on the keydir */ list getSkippedPaths(); - /** Get list of skipped paths patterns, daemon version (may add some) Doesn't depend on the keydir */ list getDaemSkippedPaths(); @@ -203,12 +220,16 @@ class RclConfig { return *this; } + friend class ParamStale; + private: int m_ok; string m_reason; // Explanation for bad state string m_confdir; // User directory where the customized files are stored string m_datadir; // Example: /usr/local/share/recoll string m_keydir; // Current directory used for parameter fetches. + int m_keydirgen; // To help with knowing when to update computed data. + list m_cdirs; // directory stack for the confstacks ConfStack *m_conf; // Parsed configuration files @@ -223,12 +244,16 @@ class RclConfig { void *m_stopsuffixes; unsigned int m_maxsufflen; + ParamStale m_stpsuffstate; + + ParamStale m_skpnstate; + list m_skpnlist; // Parameters auto-fetched on setkeydir string defcharset; // These are stored locally to avoid bool guesscharset; // They are fetched initially or on setKeydir() // Limiting set of mime types to be processed. Normally empty. - string m_rmtstr; + ParamStale m_rmtstate; set m_restrictMTypes; /** Create initial user configuration */ @@ -236,16 +261,7 @@ class RclConfig { /** Copy from other */ void initFrom(const RclConfig& r); /** Init pointers to 0 */ - void zeroMe() { - m_ok = false; - m_conf = 0; - mimemap = 0; - mimeconf = 0; - mimeview = 0; - m_fields = 0; - m_stopsuffixes = 0; - m_maxsufflen = 0; - } + void zeroMe(); /** Free data then zero pointers */ void freeAll(); bool readFieldsConfig(const string& errloc); diff --git a/src/index/fsindexer.cpp b/src/index/fsindexer.cpp index e351b2f9..9091e6d0 100644 --- a/src/index/fsindexer.cpp +++ b/src/index/fsindexer.cpp @@ -130,10 +130,6 @@ bool FsIndexer::index() if (m_config->getConfParam("idxabsmlen", &abslen)) m_db->setAbstractParams(abslen, -1, -1); - // Set up skipped patterns for this subtree. This probably should be - // done in the directory change code in processone() instead. - m_walker.setSkippedNames(m_config->getSkippedNames()); - // Walk the directory tree if (m_walker.walk(*it, *this) != FsTreeWalker::FtwOk) { LOGERR(("FsIndexer::index: error while indexing %s: %s\n", @@ -153,8 +149,7 @@ bool FsIndexer::index() } static bool matchesSkipped(const list& tdl, - const list& skpnl, - const list& skppl, + FsTreeWalker& walker, const string& path) { // First check what (if any) topdir this is in: @@ -170,46 +165,33 @@ static bool matchesSkipped(const list& tdl, return true; } - // Check path against skippedPaths. If we find a system where - // FNM_LEADING_DIR is undefined (its unposixy), will have to do this for - // all ascendant paths up to the topdir - for (list::const_iterator it = skppl.begin(); - it != skppl.end(); it++) { - if (fnmatch(it->c_str(), path.c_str(), FNM_PATHNAME|FNM_LEADING_DIR) - == 0) { - LOGDEB(("FsIndexer::indexFiles: skipping [%s] (skpp)\n", - path.c_str())); - return true; - } + // Check path against skippedPaths. + if (walker.inSkippedPaths(path)) { + LOGDEB(("FsIndexer::indexFiles: skipping [%s] (skpp)\n", path.c_str())); + return true; } // Then check all path components up to the topdir against skippedNames - if (!skpnl.empty()) { - string mpath = path; - while (mpath.length() >= td.length() && mpath.length() > 1) { - string fn = path_getsimple(mpath); - for (list::const_iterator it = skpnl.begin(); - it != skpnl.end(); it++) { - LOGDEB2(("Checking [%s] against [%s]\n", - fn.c_str(), it->c_str())); - if (fnmatch(it->c_str(), fn.c_str(), 0) == 0) { - LOGDEB(("FsIndexer::indexFiles: skipping [%s] (skpn)\n", - path.c_str())); - return true; - } - } - string::size_type len = mpath.length(); - mpath = path_getfather(mpath); - // getfather normally returns a path ending with /, getsimple - // would then return '' - if (!mpath.empty() && mpath[mpath.size()-1] == '/') - mpath.erase(mpath.size()-1); - // should not be necessary, but lets be prudent. If the - // path did not shorten, something is seriously amiss - // (could be an assert actually) - if (mpath.length() >= len) - return true; + string mpath = path; + while (mpath.length() >= td.length() && mpath.length() > 1) { + string fn = path_getsimple(mpath); + if (walker.inSkippedNames(fn)) { + LOGDEB(("FsIndexer::indexFiles: skipping [%s] (skpn)\n", + path.c_str())); + return true; } + + string::size_type len = mpath.length(); + mpath = path_getfather(mpath); + // getfather normally returns a path ending with /, getsimple + // would then return '' + if (!mpath.empty() && mpath[mpath.size()-1] == '/') + mpath.erase(mpath.size()-1); + // should not be necessary, but lets be prudent. If the + // path did not shorten, something is seriously amiss + // (could be an assert actually) + if (mpath.length() >= len) + return true; } return false; } @@ -222,8 +204,21 @@ bool FsIndexer::indexFiles(list& files) if (!init()) return false; + // We use an FsTreeWalker just for handling the skipped path/name lists + FsTreeWalker walker; + walker.setSkippedPaths(m_config->getSkippedPaths()); + for (list::iterator it = files.begin(); it != files.end(); ) { LOGDEB2(("FsIndexer::indexFiles: [%s]\n", it->c_str())); + + m_config->setKeyDir(path_getfather(*it)); + walker.setSkippedNames(m_config->getSkippedNames()); + + // Check path against indexed areas and skipped names/paths + if (matchesSkipped(m_tdl, walker, *it)) { + it++; continue; + } + struct stat stb; if (lstat(it->c_str(), &stb) != 0) { LOGERR(("FsIndexer::indexFiles: lstat(%s): %s", it->c_str(), @@ -238,23 +233,6 @@ bool FsIndexer::indexFiles(list& files) it++; continue; } - string dir = path_getfather(*it); - m_config->setKeyDir(dir); - static string lstdir; - static list skpnl; - static list skppl; - if (lstdir.compare(dir)) { - LOGDEB(("Recomputing list of skipped names\n")); - skpnl = m_config->getSkippedNames(); - skppl = m_config->getSkippedPaths(); - lstdir = dir; - } - - // Check path against indexed areas and skipped names/paths - if (matchesSkipped(m_tdl, skpnl, skppl, *it)) { - it++; continue; - } - int abslen; if (m_config->getConfParam("idxabsmlen", &abslen)) m_db->setAbstractParams(abslen, -1, -1); @@ -363,6 +341,9 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, flg == FsTreeWalker::FtwDirReturn) { m_config->setKeyDir(fn); + // Set up skipped patterns for this subtree. + m_walker.setSkippedNames(m_config->getSkippedNames()); + int abslen; if (m_config->getConfParam("idxabsmlen", &abslen)) m_db->setAbstractParams(abslen, -1, -1); diff --git a/src/index/rclmonrcv.cpp b/src/index/rclmonrcv.cpp index 6fb749d7..5e81e2b7 100644 --- a/src/index/rclmonrcv.cpp +++ b/src/index/rclmonrcv.cpp @@ -59,18 +59,27 @@ static RclMonitor *makeMonitor(); while we create the watches)*/ class WalkCB : public FsTreeWalkerCB { public: - WalkCB(RclConfig *conf, RclMonitor *mon, RclMonEventQueue *queue) - : m_conf(conf), m_mon(mon), m_queue(queue) + WalkCB(RclConfig *conf, RclMonitor *mon, RclMonEventQueue *queue + FsTreeWalker& walker) + : m_config(conf), m_mon(mon), m_queue(queue), m_walker(walker) {} virtual ~WalkCB() {} virtual FsTreeWalker::Status - processone(const string &fn, const struct stat *st, FsTreeWalker::CbFlag flg) + processone(const string &fn, const struct stat *st, + FsTreeWalker::CbFlag flg) { LOGDEB2(("rclMonRcvRun: processone %s m_mon %p m_mon->ok %d\n", fn.c_str(), m_mon, m_mon?m_mon->ok():0)); + if (flg == FsTreeWalker::FtwDirEnter || + flg == FsTreeWalker::FtwDirReturn) { + m_config->setKeyDir(fn); + // Set up skipped patterns for this subtree. + m_walker.setSkippedNames(m_config->getSkippedNames()); + } + if (flg == FsTreeWalker::FtwDirEnter) { // Create watch when entering directory, but first empty // whatever events we may already have on queue @@ -99,9 +108,10 @@ public: } private: - RclConfig *m_conf; + RclConfig *m_config; RclMonitor *m_mon; RclMonEventQueue *m_queue; + FsTreeWalker& m_walker; }; /** Main thread routine: create watches, then forever wait for and queue events */ @@ -133,7 +143,7 @@ void *rclMonRcvRun(void *q) // Walk the directory trees to add watches FsTreeWalker walker; walker.setSkippedPaths(queue->getConfig()->getDaemSkippedPaths()); - WalkCB walkcb(queue->getConfig(), mon, queue); + WalkCB walkcb(queue->getConfig(), mon, queue, walker); for (list::iterator it = tdl.begin(); it != tdl.end(); it++) { queue->getConfig()->setKeyDir(*it); // Adjust the follow symlinks options @@ -144,15 +154,6 @@ void *rclMonRcvRun(void *q) } else { walker.setOpts(FsTreeWalker::FtwOptNone); } - // Adjust the skipped names according to config - walker.setSkippedNames(queue->getConfig()->getSkippedNames()); - // Add the dbdir to skipped paths. Note that adding the dbdir - // is probably not useful as we'll probably never have - // multiple dbs per config file, and the global dbdir is - // included by - // config->getSkippedPaths(). Still, better to be safe here as - // config->including dbdir in the walk will get us into a loop - walker.addSkippedPath(queue->getConfig()->getDbDir()); LOGDEB(("rclMonRcvRun: walking %s\n", it->c_str())); walker.walk(*it, walkcb); } diff --git a/src/utils/fstreewalk.cpp b/src/utils/fstreewalk.cpp index e9277757..15973d3e 100644 --- a/src/utils/fstreewalk.cpp +++ b/src/utils/fstreewalk.cpp @@ -94,8 +94,6 @@ bool FsTreeWalker::addSkippedName(const string& pattern) bool FsTreeWalker::setSkippedNames(const list &patterns) { data->skippedNames = patterns; - data->skippedNames.sort(); - data->skippedNames.unique(); return true; } bool FsTreeWalker::inSkippedNames(const string& name) @@ -125,8 +123,6 @@ bool FsTreeWalker::setSkippedPaths(const list &paths) it != data->skippedPaths.end(); it++) if (!(data->options & FtwNoCanon)) *it = path_canon(*it); - data->skippedPaths.sort(); - data->skippedPaths.unique(); return true; } bool FsTreeWalker::inSkippedPaths(const string& path) @@ -134,7 +130,14 @@ bool FsTreeWalker::inSkippedPaths(const string& path) list::const_iterator it; for (it = data->skippedPaths.begin(); it != data->skippedPaths.end(); it++) { - if (fnmatch(it->c_str(), path.c_str(), FNM_PATHNAME) == 0) + // If we find a system where FNM_LEADING_DIR is undefined (its + // unposixy), will have to do this for all ascendant paths up + // to the topdir. We'll then have a constructor option because + // this is only useful when called externally. When used + // internally, we don't descend in skipped paths, and so don't + // need FNM_LEADING_DIR + if (fnmatch(it->c_str(), path.c_str(), FNM_PATHNAME | + FNM_LEADING_DIR) == 0) return true; } return false; diff --git a/src/utils/fstreewalk.h b/src/utils/fstreewalk.h index 89a10be6..e7116aa2 100644 --- a/src/utils/fstreewalk.h +++ b/src/utils/fstreewalk.h @@ -79,8 +79,11 @@ class FsTreeWalker { /** Set the ignored paths list */ bool setSkippedPaths(const list &pathlist); + /** Test if path/name should be skipped. This can be used independantly of + * an actual tree walk */ bool inSkippedPaths(const string& path); bool inSkippedNames(const string& name); + private: Status iwalk(const string &dir, struct stat *stp, FsTreeWalkerCB& cb); class Internal; diff --git a/src/utils/smallut.cpp b/src/utils/smallut.cpp index 2dc00185..2ae63c1d 100644 --- a/src/utils/smallut.cpp +++ b/src/utils/smallut.cpp @@ -182,71 +182,71 @@ template bool stringToStrings(const string &s, T &tokens) states state = SPACE; for (unsigned int i = 0; i < s.length(); i++) { switch (s[i]) { - case '"': + case '"': switch(state) { - case SPACE: + case SPACE: state=INQUOTE; continue; - case TOKEN: + case TOKEN: current += '"'; continue; - case INQUOTE: - tokens.push_back(current); + case INQUOTE: + tokens.insert(tokens.end(), current); current.clear(); state = SPACE; continue; - case ESCAPE: + case ESCAPE: current += '"'; state = INQUOTE; - continue; + continue; } break; - case '\\': + case '\\': switch(state) { - case SPACE: - case TOKEN: - current += '\\'; - state=TOKEN; - continue; - case INQUOTE: - state = ESCAPE; - continue; - case ESCAPE: - current += '\\'; - state = INQUOTE; - continue; + case SPACE: + case TOKEN: + current += '\\'; + state=TOKEN; + continue; + case INQUOTE: + state = ESCAPE; + continue; + case ESCAPE: + current += '\\'; + state = INQUOTE; + continue; } break; - case ' ': - case '\t': - case '\n': - case '\r': + case ' ': + case '\t': + case '\n': + case '\r': switch(state) { - case SPACE: - continue; - case TOKEN: - tokens.push_back(current); + case SPACE: + continue; + case TOKEN: + tokens.insert(tokens.end(), current); current.clear(); state = SPACE; continue; - case INQUOTE: - case ESCAPE: - current += s[i]; - continue; + case INQUOTE: + case ESCAPE: + current += s[i]; + continue; } break; - default: + default: switch(state) { - case ESCAPE: - state = INQUOTE; - break; - case SPACE: - state = TOKEN; - break; - case TOKEN: - case INQUOTE: - break; + case ESCAPE: + state = INQUOTE; + break; + case SPACE: + state = TOKEN; + break; + case TOKEN: + case INQUOTE: + break; } current += s[i]; } @@ -255,7 +255,7 @@ template bool stringToStrings(const string &s, T &tokens) case SPACE: break; case TOKEN: - tokens.push_back(current); + tokens.insert(tokens.end(), current); break; case INQUOTE: case ESCAPE: @@ -271,6 +271,10 @@ bool stringToStrings(const string &s, vector &tokens) { return stringToStrings >(s, tokens); } +bool stringToStrings(const string &s, set &tokens) +{ + return stringToStrings >(s, tokens); +} template void stringsToString(const T &tokens, string &s) { diff --git a/src/utils/smallut.h b/src/utils/smallut.h index 1b499f56..4adb5ec6 100644 --- a/src/utils/smallut.h +++ b/src/utils/smallut.h @@ -54,6 +54,7 @@ extern bool samecharset(const string &cs1, const string &cs2); */ extern bool stringToStrings(const string &s, list &tokens); extern bool stringToStrings(const string &s, vector &tokens); +extern bool stringToStrings(const string &s, set &tokens); /** * Inverse operation: