Allow fields local to a subtree to be set in the configuration

This commit is contained in:
dockes 2009-10-30 08:59:30 +00:00
parent ccae4eee18
commit e7dfd57d6e
9 changed files with 142 additions and 39 deletions

View File

@ -1 +1 @@
1.12.0 1.13.0

View File

@ -90,6 +90,14 @@ class RclConfig {
list<string> getConfNames(const char *pattern = 0) { list<string> getConfNames(const char *pattern = 0) {
return m_conf->getNames(m_keydir, pattern); return m_conf->getNames(m_keydir, pattern);
} }
/** Check if name exists anywhere in config */
bool hasNameAnywhere(const string& nm)
{
return m_conf? m_conf->hasNameAnywhere(nm) : false;
}
/** Get default charset for current keydir (was set during setKeydir) /** Get default charset for current keydir (was set during setKeydir)
* filenames are handled differently */ * filenames are handled differently */
const string &getDefCharset(bool filename = false); const string &getDefCharset(bool filename = false);

View File

@ -362,6 +362,46 @@ bool DbIndexer::purgeFiles(const list<string> &filenames)
return true; return true;
} }
// Local fields can be set for fs subtrees in the configuration file
void DbIndexer::localfieldsfromconf()
{
LOGDEB(("DbIndexer::localfieldsfromconf\n"));
m_localfields.clear();
string sfields;
if (!m_config->getConfParam("localfields", sfields))
return;
list<string> lfields;
if (!stringToStrings(sfields, lfields)) {
LOGERR(("DbIndexer::localfieldsfromconf: bad syntax for [%s]\n",
sfields.c_str()));
return;
}
for (list<string>::const_iterator it = lfields.begin();
it != lfields.end(); it++) {
ConfSimple conf(*it, 1, true);
list<string> nmlst = conf.getNames("");
for (list<string>::const_iterator it1 = nmlst.begin();
it1 != nmlst.end(); it1++) {
conf.get(*it1, m_localfields[*it1]);
LOGDEB2(("DbIndexer::localfieldsfromconf: [%s] => [%s]\n",
(*it1).c_str(), m_localfields[*it1].c_str()));
}
}
}
//
void DbIndexer::setlocalfields(Rcl::Doc& doc)
{
for (map<string, string>::const_iterator it = m_localfields.begin();
it != m_localfields.end(); it++) {
// Should local fields override those coming from the document
// ? I think not, but not too sure
if (doc.meta.find(it->second) == doc.meta.end()) {
doc.meta[it->first] = it->second;
}
}
}
/// This method gets called for every file and directory found by the /// This method gets called for every file and directory found by the
/// tree walker. /// tree walker.
@ -381,28 +421,37 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
if (m_updater && !m_updater->update()) { if (m_updater && !m_updater->update()) {
return FsTreeWalker::FtwStop; return FsTreeWalker::FtwStop;
} }
// If we're changing directories, possibly adjust parameters (set // If we're changing directories, possibly adjust parameters (set
// the current directory in configuration object) // the current directory in configuration object)
if (flg == FsTreeWalker::FtwDirEnter || if (flg == FsTreeWalker::FtwDirEnter ||
flg == FsTreeWalker::FtwDirReturn) { flg == FsTreeWalker::FtwDirReturn) {
m_config->setKeyDir(fn); m_config->setKeyDir(fn);
int abslen; int abslen;
if (m_config->getConfParam("idxabsmlen", &abslen)) if (m_config->getConfParam("idxabsmlen", &abslen))
m_db.setAbstractParams(abslen, -1, -1); m_db.setAbstractParams(abslen, -1, -1);
// Adjust local fields from config for this subtree
if (m_havelocalfields)
localfieldsfromconf();
if (flg == FsTreeWalker::FtwDirReturn) if (flg == FsTreeWalker::FtwDirReturn)
return FsTreeWalker::FtwOk; return FsTreeWalker::FtwOk;
} }
////////////////////
// Check db up to date ? Doing this before file type // Check db up to date ? Doing this before file type
// identification means that, if usesystemfilecommand is switched // identification means that, if usesystemfilecommand is switched
// from on to off it may happen that some files which are now // from on to off it may happen that some files which are now
// without mime type will not be purged from the db, resulting // without mime type will not be purged from the db, resulting
// in possible 'cannot intern file' messages at query time... // in possible 'cannot intern file' messages at query time...
char cbuf[100];
// Document signature. This is based on m/ctime and size and used // Document signature. This is based on m/ctime and size and used
// for the uptodate check (the value computed here is checked // for the uptodate check (the value computed here is checked
// against the stored one). Changing the computation forces a full // against the stored one). Changing the computation forces a full
// reindex of course. // reindex of course.
char cbuf[100];
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME); sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
string sig = cbuf; string sig = cbuf;
string udi; string udi;
@ -507,6 +556,9 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
doc.sig += plus; doc.sig += plus;
} }
// Possibly add fields from local config
if (m_havelocalfields)
setlocalfields(doc);
// Add document to database. If there is an ipath, add it as a children // Add document to database. If there is an ipath, add it as a children
// of the file document. // of the file document.
string udi; string udi;

View File

@ -20,10 +20,12 @@
#include <string> #include <string>
#include <list> #include <list>
#include <map>
#ifndef NO_NAMESPACES #ifndef NO_NAMESPACES
using std::string; using std::string;
using std::list; using std::list;
using std::map;
#endif #endif
#include "rclconfig.h" #include "rclconfig.h"
@ -101,7 +103,9 @@ class DbIndexer : public FsTreeWalkerCB {
DbIxStatusUpdater *updfunc = 0 // status updater callback DbIxStatusUpdater *updfunc = 0 // status updater callback
) )
: m_config(cnf), m_db(cnf), m_updater(updfunc) : m_config(cnf), m_db(cnf), m_updater(updfunc)
{} {
m_havelocalfields = m_config->hasNameAnywhere("localfields");
}
virtual ~DbIndexer(); virtual ~DbIndexer();
@ -150,7 +154,16 @@ class DbIndexer : public FsTreeWalkerCB {
string m_tmpdir; string m_tmpdir;
DbIxStatusUpdater *m_updater; DbIxStatusUpdater *m_updater;
// The configuration can set attribute fields to be inherited by
// all files in a file system area. Ie: set "apptag = thunderbird"
// inside ~/.thunderbird. The boolean is set at init to avoid
// further wasteful processing if no local fields are set.
bool m_havelocalfields;
map<string, string> m_localfields;
bool init(bool rst = false, bool rdonly = false); bool init(bool rst = false, bool rdonly = false);
void localfieldsfromconf();
void setlocalfields(Rcl::Doc& doc);
}; };
/** Helper methods in recollindex.cpp for initial checks/setup to index /** Helper methods in recollindex.cpp for initial checks/setup to index

View File

@ -196,9 +196,18 @@ Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg,
} }
} }
// We are supposed to get here if there was no specific error, but // We get here if there was no specific error, but there is no
// there is no identified mime type, or no handler // identified mime type, or no handler associated.
// associated. These files are either ignored or their name is
#ifdef INDEX_UNKNOWN_TEXT_AS_PLAIN
// If the type is an unknown text/xxx, index as text/plain and
// hope for the best (this wouldn't work too well with text/rtf...)
if (mtype.find("text/") == 0) {
return mhFactory("text/plain");
}
#endif
// Finally, unhandled files are either ignored or their name is
// indexed, depending on configuration // indexed, depending on configuration
bool indexunknown = false; bool indexunknown = false;
cfg->getConfParam("indexallfilenames", &indexunknown); cfg->getConfParam("indexallfilenames", &indexunknown);

View File

@ -134,7 +134,7 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
Doc &doc, int percent) Doc &doc, int percent)
{ {
LOGDEB0(("Db::dbDataToRclDoc: data: %s\n", data.c_str())); LOGDEB0(("Db::dbDataToRclDoc: data: %s\n", data.c_str()));
ConfSimple parms(&data); ConfSimple parms(data);
if (!parms.ok()) if (!parms.ok())
return false; return false;
parms.get(Doc::keyurl, doc.url); parms.get(Doc::keyurl, doc.url);

View File

@ -42,7 +42,7 @@ public:
{ {
// Parse xapian document's data and populate doc fields // Parse xapian document's data and populate doc fields
string data = xdoc.get_data(); string data = xdoc.get_data();
ConfSimple parms(&data); ConfSimple parms(data);
// The only filtering for now is on file path (subtree) // The only filtering for now is on file path (subtree)
string url; string url;

View File

@ -46,7 +46,7 @@ using std::list;
#define MIN(A,B) ((A)<(B) ? (A) : (B)) #define MIN(A,B) ((A)<(B) ? (A) : (B))
#endif #endif
//#define DEBUG #undef DEBUG
#ifdef DEBUG #ifdef DEBUG
#define LOGDEB(X) fprintf X #define LOGDEB(X) fprintf X
#else #else
@ -63,22 +63,20 @@ void ConfSimple::parseinput(istream &input)
bool eof = false; bool eof = false;
for (;;) { for (;;) {
cline[0] = 0;
input.getline(cline, LL-1); input.getline(cline, LL-1);
LOGDEB((stderr, "Parse:line: [%s] status %d\n", cline, int(status))); LOGDEB((stderr, "Parse:line: [%s] status %d\n", cline, int(status)));
if (!input.good()) { if (!input.good()) {
if (input.bad()) { if (input.bad()) {
LOGDEB((stderr, "Parse: input.bad()\n"));
status = STATUS_ERROR; status = STATUS_ERROR;
return; return;
} }
LOGDEB((stderr, "Parse: eof\n"));
// Must be eof ? But maybe we have a partial line which // Must be eof ? But maybe we have a partial line which
// must be processed. This happens if the last line before // must be processed. This happens if the last line before
// eof ends with a backslash // eof ends with a backslash, or there is no final \n
if (appending) { eof = true;
eof = true;
goto processline;
}
break;
} }
{ {
@ -94,11 +92,12 @@ void ConfSimple::parseinput(istream &input)
else else
line = cline; line = cline;
processline:
// Note that we trim whitespace before checking for backslash-eol // Note that we trim whitespace before checking for backslash-eol
// This avoids invisible problems. // This avoids invisible whitespace problems.
trimstring(line); trimstring(line);
if (line.empty() || line.at(0) == '#') { if (line.empty() || line.at(0) == '#') {
if (eof)
break;
m_order.push_back(ConfLine(ConfLine::CFL_COMMENT, line)); m_order.push_back(ConfLine(ConfLine::CFL_COMMENT, line));
continue; continue;
} }
@ -142,29 +141,29 @@ void ConfSimple::parseinput(istream &input)
continue; continue;
} }
i_set(nm, val, submapkey, true); i_set(nm, val, submapkey, true);
if (eof == true) if (eof)
break; break;
} }
} }
ConfSimple::ConfSimple(int readonly, bool tildexp) ConfSimple::ConfSimple(int readonly, bool tildexp)
: dotildexpand(tildexp), m_data(0), m_holdWrites(false) : dotildexpand(tildexp), m_holdWrites(false)
{ {
status = readonly ? STATUS_RO : STATUS_RW; status = readonly ? STATUS_RO : STATUS_RW;
} }
ConfSimple::ConfSimple(string *d, int readonly, bool tildexp) ConfSimple::ConfSimple(const string& d, int readonly, bool tildexp)
: dotildexpand(tildexp), m_data(d), m_holdWrites(false) : dotildexpand(tildexp), m_holdWrites(false)
{ {
status = readonly ? STATUS_RO : STATUS_RW; status = readonly ? STATUS_RO : STATUS_RW;
stringstream input(*d, ios::in); stringstream input(d, ios::in);
parseinput(input); parseinput(input);
} }
ConfSimple::ConfSimple(const char *fname, int readonly, bool tildexp) ConfSimple::ConfSimple(const char *fname, int readonly, bool tildexp)
: dotildexpand(tildexp), m_filename(fname), m_data(0), m_holdWrites(false) : dotildexpand(tildexp), m_filename(fname), m_holdWrites(false)
{ {
status = readonly ? STATUS_RO : STATUS_RW; status = readonly ? STATUS_RO : STATUS_RW;
@ -418,7 +417,8 @@ ConfSimple::sortwalk(WalkerCode (*walker)(void *,const string&,const string&),
return WALK_CONTINUE; return WALK_CONTINUE;
} }
// Write to default output: // Write to default output. This currently only does something if output is
// a file
bool ConfSimple::write() bool ConfSimple::write()
{ {
if (!ok()) if (!ok())
@ -430,11 +430,11 @@ bool ConfSimple::write()
if (!output.is_open()) if (!output.is_open())
return 0; return 0;
return write(output); return write(output);
} else if (m_data) {
ostringstream output(*m_data, ios::out | ios::trunc);
return write(output);
} else { } else {
// No backing store, no writing // No backing store, no writing. Maybe one day we'll need it with
// some kind of output string. This can't be the original string which
// is currently readonly.
//ostringstream output(m_ostring, ios::out | ios::trunc);
return 1; return 1;
} }
} }
@ -529,6 +529,18 @@ list<string> ConfSimple::getSubKeys()
return mylist; return mylist;
} }
bool ConfSimple::hasNameAnywhere(const string& nm)
{
list<string>keys = getSubKeys();
for (list<string>::const_iterator it = keys.begin();
it != keys.end(); it++) {
string val;
if (get(nm, val, *it))
return true;
}
return false;
}
// ////////////////////////////////////////////////////////////////////////// // //////////////////////////////////////////////////////////////////////////
// ConfTree Methods: conftree interpret keys like a hierarchical file tree // ConfTree Methods: conftree interpret keys like a hierarchical file tree
// ////////////////////////////////////////////////////////////////////////// // //////////////////////////////////////////////////////////////////////////
@ -917,7 +929,7 @@ int main(int argc, char **argv)
if (argc != 0) if (argc != 0)
Usage(); Usage();
string s; string s;
ConfSimple c(&s); ConfSimple c(s);
memtest(c); memtest(c);
exit(0); exit(0);
} else if ((op_flags & OPT_V)) { } else if ((op_flags & OPT_V)) {

View File

@ -96,6 +96,7 @@ public:
virtual ~ConfNull() {}; virtual ~ConfNull() {};
virtual int get(const string &name, string &value, virtual int get(const string &name, string &value,
const string &sk = string()) = 0; const string &sk = string()) = 0;
virtual bool hasNameAnywhere(const string& nm) = 0;
virtual int set(const string &nm, const string &val, virtual int set(const string &nm, const string &val,
const string &sk = string()) = 0; const string &sk = string()) = 0;
virtual bool ok() = 0; virtual bool ok() = 0;
@ -127,7 +128,7 @@ public:
* @param readonly if true open readonly, else rw * @param readonly if true open readonly, else rw
* @param tildexp try tilde (home dir) expansion for subsection names * @param tildexp try tilde (home dir) expansion for subsection names
*/ */
ConfSimple(string *data, int readonly = 0, bool tildexp = false); ConfSimple(const string& data, int readonly = 0, bool tildexp = false);
/** /**
* Build an empty object. This will be memory only, with no backing store. * Build an empty object. This will be memory only, with no backing store.
@ -196,6 +197,10 @@ public:
/** Return all names in given submap. */ /** Return all names in given submap. */
virtual list<string> getNames(const string &sk, const char *pattern = 0); virtual list<string> getNames(const string &sk, const char *pattern = 0);
/** Check if name is present in any submap. This is relatively expensive
* but useful for saving further processing sometimes */
virtual bool hasNameAnywhere(const string& nm);
/** /**
* Return all subkeys * Return all subkeys
*/ */
@ -207,13 +212,11 @@ public:
* Copy constructor. Expensive but less so than a full rebuild * Copy constructor. Expensive but less so than a full rebuild
*/ */
ConfSimple(const ConfSimple &rhs) ConfSimple(const ConfSimple &rhs)
: ConfNull(), m_data(0) : ConfNull()
{ {
if ((status = rhs.status) == STATUS_ERROR) if ((status = rhs.status) == STATUS_ERROR)
return; return;
m_filename = rhs.m_filename; m_filename = rhs.m_filename;
// Note: we just share the pointer, this doesnt belong to us
m_data = rhs.m_data;
m_submaps = rhs.m_submaps; m_submaps = rhs.m_submaps;
} }
@ -224,8 +227,6 @@ public:
{ {
if (this != &rhs && (status = rhs.status) != STATUS_ERROR) { if (this != &rhs && (status = rhs.status) != STATUS_ERROR) {
m_filename = rhs.m_filename; m_filename = rhs.m_filename;
// Note: we don't own data. Just share the pointer
m_data = rhs.m_data;
m_submaps = rhs.m_submaps; m_submaps = rhs.m_submaps;
} }
return *this; return *this;
@ -237,8 +238,6 @@ protected:
private: private:
// Set if we're working with a file // Set if we're working with a file
string m_filename; string m_filename;
// Set if we're working with an in-memory string
string *m_data;
// Configuration data submaps (one per subkey, the main data has a // Configuration data submaps (one per subkey, the main data has a
// null subkey) // null subkey)
map<string, map<string, string> > m_submaps; map<string, map<string, string> > m_submaps;
@ -281,7 +280,7 @@ public:
* expansion */ * expansion */
ConfTree(const char *fname, int readonly = 0) ConfTree(const char *fname, int readonly = 0)
: ConfSimple(fname, readonly, true) {} : ConfSimple(fname, readonly, true) {}
ConfTree(string *data, int readonly = 0) ConfTree(const string &data, int readonly = 0)
: ConfSimple(data, readonly, true) {} : ConfSimple(data, readonly, true) {}
ConfTree(int readonly = 0) ConfTree(int readonly = 0)
: ConfSimple(readonly, true) {} : ConfSimple(readonly, true) {}
@ -364,6 +363,16 @@ public:
return false; return false;
} }
virtual bool hasNameAnywhere(const string& nm)
{
typename list<T*>::iterator it;
for (it = m_confs.begin();it != m_confs.end();it++) {
if ((*it)->hasNameAnywhere(nm))
return true;
}
return false;
}
virtual int set(const string &nm, const string &val, const string &sk = string()) virtual int set(const string &nm, const string &val, const string &sk = string())
{ {
if (!m_ok) if (!m_ok)