rationalized how we recompute things on setkeydir. recoll_noindex and skippedNames can now be changed at any point in the tree

This commit is contained in:
dockes 2009-11-15 08:38:43 +00:00
parent 1406bca35b
commit 0ccf8fccd9
8 changed files with 225 additions and 175 deletions

View File

@ -55,6 +55,44 @@ using namespace std;
#define MAX(A,B) (((A)>(B)) ? (A) : (B))
#endif
bool ParamStale::needrecompute()
{
if (parent->m_keydirgen != savedkeydirgen) {
savedkeydirgen = parent->m_keydirgen;
string newvalue;
if (!conffile)
return false;
conffile->get(paramname, newvalue, parent->m_keydir);
if (newvalue.compare(savedvalue)) {
savedvalue = newvalue;
return true;
}
}
return false;
}
void ParamStale::init(RclConfig *rconf, ConfNull *cnf, const string& nm)
{
parent = rconf;
conffile = cnf;
paramname = nm;
savedkeydirgen = -1;
}
void RclConfig::zeroMe() {
m_ok = false;
m_keydirgen = 0;
m_conf = 0;
mimemap = 0;
mimeconf = 0;
mimeview = 0;
m_fields = 0;
m_stopsuffixes = 0;
m_maxsufflen = 0;
m_stpsuffstate.init(this, 0, "recoll_noindex");
m_skpnstate.init(this, 0, "skippedNames");
m_rmtstate.init(this, 0, "indexedmimetypes");
}
RclConfig::RclConfig(const string *argcnf)
{
zeroMe();
@ -134,6 +172,10 @@ RclConfig::RclConfig(const string *argcnf)
m_ok = true;
setKeyDir("");
m_stpsuffstate.init(this, mimemap, "recoll_noindex");
m_skpnstate.init(this, m_conf, "skippedNames");
m_rmtstate.init(this, m_conf, "indexedmimetypes");
return;
}
@ -145,6 +187,8 @@ bool RclConfig::updateMainConfig()
stringsToString(m_cdirs, where);
m_reason = string("No/bad main configuration file in: ") + where;
m_ok = false;
m_skpnstate.init(this, 0, "skippedNames");
m_rmtstate.init(this, 0, "indexedmimetypes");
return false;
}
setKeyDir("");
@ -159,6 +203,8 @@ bool RclConfig::updateMainConfig()
TextSplit::cjkProcessing(true);
}
}
m_skpnstate.init(this, m_conf, "skippedNames");
m_rmtstate.init(this, m_conf, "indexedmimetypes");
return true;
}
@ -176,6 +222,10 @@ ConfNull *RclConfig::cloneMainConfig()
// prefetch a few common values.
void RclConfig::setKeyDir(const string &dir)
{
if (!dir.compare(m_keydir))
return;
m_keydirgen++;
m_keydir = dir;
if (m_conf == 0)
return;
@ -184,21 +234,6 @@ void RclConfig::setKeyDir(const string &dir)
defcharset.erase();
getConfParam("guesscharset", &guesscharset);
string rmtstr;
if (m_conf->get("indexedmimetypes", rmtstr, m_keydir)) {
stringtolower(rmtstr);
if (rmtstr != m_rmtstr) {
LOGDEB2(("RclConfig::setKeyDir: rmtstr [%s]\n", rmtstr.c_str()));
m_rmtstr = rmtstr;
list<string> l;
// Yea, no good to go string->list->set. Lazy me.
stringToStrings(rmtstr, l);
for (list<string>::iterator it = l.begin(); it !=l.end(); it++) {
m_restrictMTypes.insert(*it);
}
}
}
}
bool RclConfig::getConfParam(const std::string &name, int *ivp)
@ -250,15 +285,17 @@ list<string> RclConfig::getTopdirs()
// Get charset to be used for transcoding to utf-8 if unspecified by doc
// For document contents:
// If defcharset was set (from the config or a previous call), use it.
// Else, try to guess it from the locale
// Use iso8859-1 as ultimate default
// defcharset is reset on setKeyDir()
// If defcharset was set (from the config or a previous call, this
// is done in setKeydir), use it.
// Else, try to guess it from the locale
// Use iso8859-1 as ultimate default
//
// For filenames, same thing except that we do not use the config file value
// (only the locale).
const string& RclConfig::getDefCharset(bool filename)
{
static string localecharset; // This supposedly never changes
// This can't change once computed inside a process.
static string localecharset;
if (localecharset.empty()) {
const char *cp;
cp = nl_langinfo(CODESET);
@ -301,7 +338,6 @@ std::list<string> RclConfig::getAllMimeTypes()
std::list<string> lst;
if (mimeconf == 0)
return lst;
// mimeconf->sortwalk(mtypesWalker, &lst);
lst = mimeconf->getNames("index");
lst.sort();
lst.unique();
@ -349,24 +385,20 @@ typedef multiset<SfString, SuffCmp> SuffixStore;
bool RclConfig::inStopSuffixes(const string& fni)
{
if (m_stopsuffixes == 0) {
if (m_stopsuffixes == 0 || m_stpsuffstate.needrecompute()) {
// Need to initialize the suffixes
delete STOPSUFFIXES;
if ((m_stopsuffixes = new SuffixStore) == 0) {
LOGERR(("RclConfig::inStopSuffixes: out of memory\n"));
return false;
}
string stp;
list<string> stoplist;
if (mimemap && mimemap->get("recoll_noindex", stp, m_keydir)) {
stringToStrings(stp, stoplist);
}
stringToStrings(m_stpsuffstate.savedvalue, stoplist);
for (list<string>::const_iterator it = stoplist.begin();
it != stoplist.end(); it++) {
string lower(*it);
stringtolower(lower);
STOPSUFFIXES->insert(SfString(lower));
if (m_maxsufflen < lower.length())
m_maxsufflen = lower.length();
STOPSUFFIXES->insert(SfString(stringtolower(*it)));
if (m_maxsufflen < it->length())
m_maxsufflen = it->length();
}
}
@ -444,9 +476,14 @@ bool RclConfig::getMimeCatTypes(const string& cat, list<string>& tps)
string RclConfig::getMimeHandlerDef(const std::string &mtype, bool filtertypes)
{
string hs;
if (filtertypes && m_rmtstate.needrecompute()) {
m_restrictMTypes.clear();
stringToStrings(stringtolower((const string&)m_rmtstate.savedvalue),
m_restrictMTypes);
}
if (filtertypes && !m_restrictMTypes.empty()) {
string mt = mtype;
stringtolower(mt);
stringtolower(mt);
if (m_restrictMTypes.find(mt) == m_restrictMTypes.end())
return hs;
}
@ -455,6 +492,7 @@ string RclConfig::getMimeHandlerDef(const std::string &mtype, bool filtertypes)
}
return hs;
}
string RclConfig::getMissingHelperDesc()
{
string fmiss = path_cat(getConfDir(), "missing");
@ -462,6 +500,7 @@ string RclConfig::getMissingHelperDesc()
file_to_string(fmiss, out);
return out;
}
void RclConfig::storeMissingHelperDesc(const string &s)
{
string fmiss = path_cat(getConfDir(), "missing");
@ -709,14 +748,12 @@ string RclConfig::getStopfile()
return path_cat(getConfDir(), "stoplist.txt");
}
list<string> RclConfig::getSkippedNames()
list<string>& RclConfig::getSkippedNames()
{
list<string> skpl;
string skipped;
if (getConfParam("skippedNames", skipped)) {
stringToStrings(skipped, skpl);
if (m_skpnstate.needrecompute()) {
stringToStrings(m_skpnstate.savedvalue, m_skpnlist);
}
return skpl;
return m_skpnlist;
}
list<string> RclConfig::getSkippedPaths()
@ -726,7 +763,9 @@ list<string> RclConfig::getSkippedPaths()
if (getConfParam("skippedPaths", skipped)) {
stringToStrings(skipped, skpl);
}
// Always add the dbdir and confdir to the skipped paths
// Always add the dbdir and confdir to the skipped paths. This is
// especially important for the rt monitor which will go into a loop if we
// don't do this.
skpl.push_back(getDbDir());
skpl.push_back(getConfDir());
for (list<string>::iterator it = skpl.begin(); it != skpl.end(); it++) {
@ -916,8 +955,10 @@ void RclConfig::initFrom(const RclConfig& r)
m_maxsufflen = r.m_maxsufflen;
defcharset = r.defcharset;
guesscharset = r.guesscharset;
m_rmtstr = r.m_rmtstr;
m_restrictMTypes = r.m_restrictMTypes;
m_stpsuffstate.init(this, mimemap, r.m_stpsuffstate.paramname);
m_skpnstate.init(this, m_conf, r.m_skpnstate.paramname);
m_rmtstate.init(this, m_conf, r.m_rmtstate.paramname);
}
#else // -> Test

View File

@ -38,6 +38,24 @@ using std::set;
#include "conftree.h"
#include "smallut.h"
class RclConfig;
// A small class used for parameters that need to be computed from the
// config string, and which can change with the keydir. Minimize work
// by using the keydirgen and a saved string to avoid unneeded
// recomputations
class ParamStale {
public:
RclConfig *parent;
ConfNull *conffile;
string paramname;
int savedkeydirgen;
string savedvalue;
void init(RclConfig *rconf, ConfNull *cnf, const string& nm);
bool needrecompute();
};
class RclConfig {
public:
@ -115,11 +133,10 @@ class RclConfig {
string getStopfile();
/** Get list of skipped file names for current keydir */
list<string> getSkippedNames();
list<string>& getSkippedNames();
/** Get list of skipped paths patterns. Doesn't depend on the keydir */
list<string> getSkippedPaths();
/** Get list of skipped paths patterns, daemon version (may add some)
Doesn't depend on the keydir */
list<string> getDaemSkippedPaths();
@ -203,12 +220,16 @@ class RclConfig {
return *this;
}
friend class ParamStale;
private:
int m_ok;
string m_reason; // Explanation for bad state
string m_confdir; // User directory where the customized files are stored
string m_datadir; // Example: /usr/local/share/recoll
string m_keydir; // Current directory used for parameter fetches.
int m_keydirgen; // To help with knowing when to update computed data.
list<string> m_cdirs; // directory stack for the confstacks
ConfStack<ConfTree> *m_conf; // Parsed configuration files
@ -223,12 +244,16 @@ class RclConfig {
void *m_stopsuffixes;
unsigned int m_maxsufflen;
ParamStale m_stpsuffstate;
ParamStale m_skpnstate;
list<string> m_skpnlist;
// Parameters auto-fetched on setkeydir
string defcharset; // These are stored locally to avoid
bool guesscharset; // They are fetched initially or on setKeydir()
// Limiting set of mime types to be processed. Normally empty.
string m_rmtstr;
ParamStale m_rmtstate;
set<string> m_restrictMTypes;
/** Create initial user configuration */
@ -236,16 +261,7 @@ class RclConfig {
/** Copy from other */
void initFrom(const RclConfig& r);
/** Init pointers to 0 */
void zeroMe() {
m_ok = false;
m_conf = 0;
mimemap = 0;
mimeconf = 0;
mimeview = 0;
m_fields = 0;
m_stopsuffixes = 0;
m_maxsufflen = 0;
}
void zeroMe();
/** Free data then zero pointers */
void freeAll();
bool readFieldsConfig(const string& errloc);

View File

@ -130,10 +130,6 @@ bool FsIndexer::index()
if (m_config->getConfParam("idxabsmlen", &abslen))
m_db->setAbstractParams(abslen, -1, -1);
// Set up skipped patterns for this subtree. This probably should be
// done in the directory change code in processone() instead.
m_walker.setSkippedNames(m_config->getSkippedNames());
// Walk the directory tree
if (m_walker.walk(*it, *this) != FsTreeWalker::FtwOk) {
LOGERR(("FsIndexer::index: error while indexing %s: %s\n",
@ -153,8 +149,7 @@ bool FsIndexer::index()
}
static bool matchesSkipped(const list<string>& tdl,
const list<string>& skpnl,
const list<string>& skppl,
FsTreeWalker& walker,
const string& path)
{
// First check what (if any) topdir this is in:
@ -170,46 +165,33 @@ static bool matchesSkipped(const list<string>& tdl,
return true;
}
// Check path against skippedPaths. If we find a system where
// FNM_LEADING_DIR is undefined (its unposixy), will have to do this for
// all ascendant paths up to the topdir
for (list<string>::const_iterator it = skppl.begin();
it != skppl.end(); it++) {
if (fnmatch(it->c_str(), path.c_str(), FNM_PATHNAME|FNM_LEADING_DIR)
== 0) {
LOGDEB(("FsIndexer::indexFiles: skipping [%s] (skpp)\n",
path.c_str()));
return true;
}
// Check path against skippedPaths.
if (walker.inSkippedPaths(path)) {
LOGDEB(("FsIndexer::indexFiles: skipping [%s] (skpp)\n", path.c_str()));
return true;
}
// Then check all path components up to the topdir against skippedNames
if (!skpnl.empty()) {
string mpath = path;
while (mpath.length() >= td.length() && mpath.length() > 1) {
string fn = path_getsimple(mpath);
for (list<string>::const_iterator it = skpnl.begin();
it != skpnl.end(); it++) {
LOGDEB2(("Checking [%s] against [%s]\n",
fn.c_str(), it->c_str()));
if (fnmatch(it->c_str(), fn.c_str(), 0) == 0) {
LOGDEB(("FsIndexer::indexFiles: skipping [%s] (skpn)\n",
path.c_str()));
return true;
}
}
string::size_type len = mpath.length();
mpath = path_getfather(mpath);
// getfather normally returns a path ending with /, getsimple
// would then return ''
if (!mpath.empty() && mpath[mpath.size()-1] == '/')
mpath.erase(mpath.size()-1);
// should not be necessary, but lets be prudent. If the
// path did not shorten, something is seriously amiss
// (could be an assert actually)
if (mpath.length() >= len)
return true;
string mpath = path;
while (mpath.length() >= td.length() && mpath.length() > 1) {
string fn = path_getsimple(mpath);
if (walker.inSkippedNames(fn)) {
LOGDEB(("FsIndexer::indexFiles: skipping [%s] (skpn)\n",
path.c_str()));
return true;
}
string::size_type len = mpath.length();
mpath = path_getfather(mpath);
// getfather normally returns a path ending with /, getsimple
// would then return ''
if (!mpath.empty() && mpath[mpath.size()-1] == '/')
mpath.erase(mpath.size()-1);
// should not be necessary, but lets be prudent. If the
// path did not shorten, something is seriously amiss
// (could be an assert actually)
if (mpath.length() >= len)
return true;
}
return false;
}
@ -222,8 +204,21 @@ bool FsIndexer::indexFiles(list<string>& files)
if (!init())
return false;
// We use an FsTreeWalker just for handling the skipped path/name lists
FsTreeWalker walker;
walker.setSkippedPaths(m_config->getSkippedPaths());
for (list<string>::iterator it = files.begin(); it != files.end(); ) {
LOGDEB2(("FsIndexer::indexFiles: [%s]\n", it->c_str()));
m_config->setKeyDir(path_getfather(*it));
walker.setSkippedNames(m_config->getSkippedNames());
// Check path against indexed areas and skipped names/paths
if (matchesSkipped(m_tdl, walker, *it)) {
it++; continue;
}
struct stat stb;
if (lstat(it->c_str(), &stb) != 0) {
LOGERR(("FsIndexer::indexFiles: lstat(%s): %s", it->c_str(),
@ -238,23 +233,6 @@ bool FsIndexer::indexFiles(list<string>& files)
it++; continue;
}
string dir = path_getfather(*it);
m_config->setKeyDir(dir);
static string lstdir;
static list<string> skpnl;
static list<string> skppl;
if (lstdir.compare(dir)) {
LOGDEB(("Recomputing list of skipped names\n"));
skpnl = m_config->getSkippedNames();
skppl = m_config->getSkippedPaths();
lstdir = dir;
}
// Check path against indexed areas and skipped names/paths
if (matchesSkipped(m_tdl, skpnl, skppl, *it)) {
it++; continue;
}
int abslen;
if (m_config->getConfParam("idxabsmlen", &abslen))
m_db->setAbstractParams(abslen, -1, -1);
@ -363,6 +341,9 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
flg == FsTreeWalker::FtwDirReturn) {
m_config->setKeyDir(fn);
// Set up skipped patterns for this subtree.
m_walker.setSkippedNames(m_config->getSkippedNames());
int abslen;
if (m_config->getConfParam("idxabsmlen", &abslen))
m_db->setAbstractParams(abslen, -1, -1);

View File

@ -59,18 +59,27 @@ static RclMonitor *makeMonitor();
while we create the watches)*/
class WalkCB : public FsTreeWalkerCB {
public:
WalkCB(RclConfig *conf, RclMonitor *mon, RclMonEventQueue *queue)
: m_conf(conf), m_mon(mon), m_queue(queue)
WalkCB(RclConfig *conf, RclMonitor *mon, RclMonEventQueue *queue
FsTreeWalker& walker)
: m_config(conf), m_mon(mon), m_queue(queue), m_walker(walker)
{}
virtual ~WalkCB()
{}
virtual FsTreeWalker::Status
processone(const string &fn, const struct stat *st, FsTreeWalker::CbFlag flg)
processone(const string &fn, const struct stat *st,
FsTreeWalker::CbFlag flg)
{
LOGDEB2(("rclMonRcvRun: processone %s m_mon %p m_mon->ok %d\n",
fn.c_str(), m_mon, m_mon?m_mon->ok():0));
if (flg == FsTreeWalker::FtwDirEnter ||
flg == FsTreeWalker::FtwDirReturn) {
m_config->setKeyDir(fn);
// Set up skipped patterns for this subtree.
m_walker.setSkippedNames(m_config->getSkippedNames());
}
if (flg == FsTreeWalker::FtwDirEnter) {
// Create watch when entering directory, but first empty
// whatever events we may already have on queue
@ -99,9 +108,10 @@ public:
}
private:
RclConfig *m_conf;
RclConfig *m_config;
RclMonitor *m_mon;
RclMonEventQueue *m_queue;
FsTreeWalker& m_walker;
};
/** Main thread routine: create watches, then forever wait for and queue events */
@ -133,7 +143,7 @@ void *rclMonRcvRun(void *q)
// Walk the directory trees to add watches
FsTreeWalker walker;
walker.setSkippedPaths(queue->getConfig()->getDaemSkippedPaths());
WalkCB walkcb(queue->getConfig(), mon, queue);
WalkCB walkcb(queue->getConfig(), mon, queue, walker);
for (list<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
queue->getConfig()->setKeyDir(*it);
// Adjust the follow symlinks options
@ -144,15 +154,6 @@ void *rclMonRcvRun(void *q)
} else {
walker.setOpts(FsTreeWalker::FtwOptNone);
}
// Adjust the skipped names according to config
walker.setSkippedNames(queue->getConfig()->getSkippedNames());
// Add the dbdir to skipped paths. Note that adding the dbdir
// is probably not useful as we'll probably never have
// multiple dbs per config file, and the global dbdir is
// included by
// config->getSkippedPaths(). Still, better to be safe here as
// config->including dbdir in the walk will get us into a loop
walker.addSkippedPath(queue->getConfig()->getDbDir());
LOGDEB(("rclMonRcvRun: walking %s\n", it->c_str()));
walker.walk(*it, walkcb);
}

View File

@ -94,8 +94,6 @@ bool FsTreeWalker::addSkippedName(const string& pattern)
bool FsTreeWalker::setSkippedNames(const list<string> &patterns)
{
data->skippedNames = patterns;
data->skippedNames.sort();
data->skippedNames.unique();
return true;
}
bool FsTreeWalker::inSkippedNames(const string& name)
@ -125,8 +123,6 @@ bool FsTreeWalker::setSkippedPaths(const list<string> &paths)
it != data->skippedPaths.end(); it++)
if (!(data->options & FtwNoCanon))
*it = path_canon(*it);
data->skippedPaths.sort();
data->skippedPaths.unique();
return true;
}
bool FsTreeWalker::inSkippedPaths(const string& path)
@ -134,7 +130,14 @@ bool FsTreeWalker::inSkippedPaths(const string& path)
list<string>::const_iterator it;
for (it = data->skippedPaths.begin();
it != data->skippedPaths.end(); it++) {
if (fnmatch(it->c_str(), path.c_str(), FNM_PATHNAME) == 0)
// If we find a system where FNM_LEADING_DIR is undefined (its
// unposixy), will have to do this for all ascendant paths up
// to the topdir. We'll then have a constructor option because
// this is only useful when called externally. When used
// internally, we don't descend in skipped paths, and so don't
// need FNM_LEADING_DIR
if (fnmatch(it->c_str(), path.c_str(), FNM_PATHNAME |
FNM_LEADING_DIR) == 0)
return true;
}
return false;

View File

@ -79,8 +79,11 @@ class FsTreeWalker {
/** Set the ignored paths list */
bool setSkippedPaths(const list<string> &pathlist);
/** Test if path/name should be skipped. This can be used independantly of
* an actual tree walk */
bool inSkippedPaths(const string& path);
bool inSkippedNames(const string& name);
private:
Status iwalk(const string &dir, struct stat *stp, FsTreeWalkerCB& cb);
class Internal;

View File

@ -182,71 +182,71 @@ template <class T> bool stringToStrings(const string &s, T &tokens)
states state = SPACE;
for (unsigned int i = 0; i < s.length(); i++) {
switch (s[i]) {
case '"':
case '"':
switch(state) {
case SPACE:
case SPACE:
state=INQUOTE; continue;
case TOKEN:
case TOKEN:
current += '"';
continue;
case INQUOTE:
tokens.push_back(current);
case INQUOTE:
tokens.insert(tokens.end(), current);
current.clear();
state = SPACE;
continue;
case ESCAPE:
case ESCAPE:
current += '"';
state = INQUOTE;
continue;
continue;
}
break;
case '\\':
case '\\':
switch(state) {
case SPACE:
case TOKEN:
current += '\\';
state=TOKEN;
continue;
case INQUOTE:
state = ESCAPE;
continue;
case ESCAPE:
current += '\\';
state = INQUOTE;
continue;
case SPACE:
case TOKEN:
current += '\\';
state=TOKEN;
continue;
case INQUOTE:
state = ESCAPE;
continue;
case ESCAPE:
current += '\\';
state = INQUOTE;
continue;
}
break;
case ' ':
case '\t':
case '\n':
case '\r':
case ' ':
case '\t':
case '\n':
case '\r':
switch(state) {
case SPACE:
continue;
case TOKEN:
tokens.push_back(current);
case SPACE:
continue;
case TOKEN:
tokens.insert(tokens.end(), current);
current.clear();
state = SPACE;
continue;
case INQUOTE:
case ESCAPE:
current += s[i];
continue;
case INQUOTE:
case ESCAPE:
current += s[i];
continue;
}
break;
default:
default:
switch(state) {
case ESCAPE:
state = INQUOTE;
break;
case SPACE:
state = TOKEN;
break;
case TOKEN:
case INQUOTE:
break;
case ESCAPE:
state = INQUOTE;
break;
case SPACE:
state = TOKEN;
break;
case TOKEN:
case INQUOTE:
break;
}
current += s[i];
}
@ -255,7 +255,7 @@ template <class T> bool stringToStrings(const string &s, T &tokens)
case SPACE:
break;
case TOKEN:
tokens.push_back(current);
tokens.insert(tokens.end(), current);
break;
case INQUOTE:
case ESCAPE:
@ -271,6 +271,10 @@ bool stringToStrings(const string &s, vector<string> &tokens)
{
return stringToStrings<vector<string> >(s, tokens);
}
bool stringToStrings(const string &s, set<string> &tokens)
{
return stringToStrings<set<string> >(s, tokens);
}
template <class T> void stringsToString(const T &tokens, string &s)
{

View File

@ -54,6 +54,7 @@ extern bool samecharset(const string &cs1, const string &cs2);
*/
extern bool stringToStrings(const string &s, list<string> &tokens);
extern bool stringToStrings(const string &s, vector<string> &tokens);
extern bool stringToStrings(const string &s, set<string> &tokens);
/**
* Inverse operation: