Allow fields local to a subtree to be set in the configuration

This commit is contained in:
dockes 2009-10-30 08:59:30 +00:00
parent ccae4eee18
commit e7dfd57d6e
9 changed files with 142 additions and 39 deletions

View File

@ -1 +1 @@
1.12.0
1.13.0

View File

@ -90,6 +90,14 @@ class RclConfig {
list<string> getConfNames(const char *pattern = 0) {
return m_conf->getNames(m_keydir, pattern);
}
/** Check if name exists anywhere in config */
bool hasNameAnywhere(const string& nm)
{
return m_conf? m_conf->hasNameAnywhere(nm) : false;
}
/** Get default charset for current keydir (was set during setKeydir)
* filenames are handled differently */
const string &getDefCharset(bool filename = false);

View File

@ -362,6 +362,46 @@ bool DbIndexer::purgeFiles(const list<string> &filenames)
return true;
}
// Local fields can be set for fs subtrees in the configuration file
void DbIndexer::localfieldsfromconf()
{
LOGDEB(("DbIndexer::localfieldsfromconf\n"));
m_localfields.clear();
string sfields;
if (!m_config->getConfParam("localfields", sfields))
return;
list<string> lfields;
if (!stringToStrings(sfields, lfields)) {
LOGERR(("DbIndexer::localfieldsfromconf: bad syntax for [%s]\n",
sfields.c_str()));
return;
}
for (list<string>::const_iterator it = lfields.begin();
it != lfields.end(); it++) {
ConfSimple conf(*it, 1, true);
list<string> nmlst = conf.getNames("");
for (list<string>::const_iterator it1 = nmlst.begin();
it1 != nmlst.end(); it1++) {
conf.get(*it1, m_localfields[*it1]);
LOGDEB2(("DbIndexer::localfieldsfromconf: [%s] => [%s]\n",
(*it1).c_str(), m_localfields[*it1].c_str()));
}
}
}
//
void DbIndexer::setlocalfields(Rcl::Doc& doc)
{
for (map<string, string>::const_iterator it = m_localfields.begin();
it != m_localfields.end(); it++) {
// Should local fields override those coming from the document
// ? I think not, but not too sure
if (doc.meta.find(it->second) == doc.meta.end()) {
doc.meta[it->first] = it->second;
}
}
}
/// This method gets called for every file and directory found by the
/// tree walker.
@ -381,28 +421,37 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
if (m_updater && !m_updater->update()) {
return FsTreeWalker::FtwStop;
}
// If we're changing directories, possibly adjust parameters (set
// the current directory in configuration object)
if (flg == FsTreeWalker::FtwDirEnter ||
flg == FsTreeWalker::FtwDirReturn) {
m_config->setKeyDir(fn);
int abslen;
if (m_config->getConfParam("idxabsmlen", &abslen))
m_db.setAbstractParams(abslen, -1, -1);
// Adjust local fields from config for this subtree
if (m_havelocalfields)
localfieldsfromconf();
if (flg == FsTreeWalker::FtwDirReturn)
return FsTreeWalker::FtwOk;
}
////////////////////
// Check db up to date ? Doing this before file type
// identification means that, if usesystemfilecommand is switched
// from on to off it may happen that some files which are now
// without mime type will not be purged from the db, resulting
// in possible 'cannot intern file' messages at query time...
char cbuf[100];
// Document signature. This is based on m/ctime and size and used
// for the uptodate check (the value computed here is checked
// against the stored one). Changing the computation forces a full
// reindex of course.
char cbuf[100];
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
string sig = cbuf;
string udi;
@ -507,6 +556,9 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
doc.sig += plus;
}
// Possibly add fields from local config
if (m_havelocalfields)
setlocalfields(doc);
// Add document to database. If there is an ipath, add it as a children
// of the file document.
string udi;

View File

@ -20,10 +20,12 @@
#include <string>
#include <list>
#include <map>
#ifndef NO_NAMESPACES
using std::string;
using std::list;
using std::map;
#endif
#include "rclconfig.h"
@ -101,7 +103,9 @@ class DbIndexer : public FsTreeWalkerCB {
DbIxStatusUpdater *updfunc = 0 // status updater callback
)
: m_config(cnf), m_db(cnf), m_updater(updfunc)
{}
{
m_havelocalfields = m_config->hasNameAnywhere("localfields");
}
virtual ~DbIndexer();
@ -150,7 +154,16 @@ class DbIndexer : public FsTreeWalkerCB {
string m_tmpdir;
DbIxStatusUpdater *m_updater;
// The configuration can set attribute fields to be inherited by
// all files in a file system area. Ie: set "apptag = thunderbird"
// inside ~/.thunderbird. The boolean is set at init to avoid
// further wasteful processing if no local fields are set.
bool m_havelocalfields;
map<string, string> m_localfields;
bool init(bool rst = false, bool rdonly = false);
void localfieldsfromconf();
void setlocalfields(Rcl::Doc& doc);
};
/** Helper methods in recollindex.cpp for initial checks/setup to index

View File

@ -196,9 +196,18 @@ Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg,
}
}
// We are supposed to get here if there was no specific error, but
// there is no identified mime type, or no handler
// associated. These files are either ignored or their name is
// We get here if there was no specific error, but there is no
// identified mime type, or no handler associated.
#ifdef INDEX_UNKNOWN_TEXT_AS_PLAIN
// If the type is an unknown text/xxx, index as text/plain and
// hope for the best (this wouldn't work too well with text/rtf...)
if (mtype.find("text/") == 0) {
return mhFactory("text/plain");
}
#endif
// Finally, unhandled files are either ignored or their name is
// indexed, depending on configuration
bool indexunknown = false;
cfg->getConfParam("indexallfilenames", &indexunknown);

View File

@ -134,7 +134,7 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
Doc &doc, int percent)
{
LOGDEB0(("Db::dbDataToRclDoc: data: %s\n", data.c_str()));
ConfSimple parms(&data);
ConfSimple parms(data);
if (!parms.ok())
return false;
parms.get(Doc::keyurl, doc.url);

View File

@ -42,7 +42,7 @@ public:
{
// Parse xapian document's data and populate doc fields
string data = xdoc.get_data();
ConfSimple parms(&data);
ConfSimple parms(data);
// The only filtering for now is on file path (subtree)
string url;

View File

@ -46,7 +46,7 @@ using std::list;
#define MIN(A,B) ((A)<(B) ? (A) : (B))
#endif
//#define DEBUG
#undef DEBUG
#ifdef DEBUG
#define LOGDEB(X) fprintf X
#else
@ -63,22 +63,20 @@ void ConfSimple::parseinput(istream &input)
bool eof = false;
for (;;) {
cline[0] = 0;
input.getline(cline, LL-1);
LOGDEB((stderr, "Parse:line: [%s] status %d\n", cline, int(status)));
if (!input.good()) {
if (input.bad()) {
LOGDEB((stderr, "Parse: input.bad()\n"));
status = STATUS_ERROR;
return;
}
LOGDEB((stderr, "Parse: eof\n"));
// Must be eof ? But maybe we have a partial line which
// must be processed. This happens if the last line before
// eof ends with a backslash
if (appending) {
eof = true;
goto processline;
}
break;
// eof ends with a backslash, or there is no final \n
eof = true;
}
{
@ -94,11 +92,12 @@ void ConfSimple::parseinput(istream &input)
else
line = cline;
processline:
// Note that we trim whitespace before checking for backslash-eol
// This avoids invisible problems.
// This avoids invisible whitespace problems.
trimstring(line);
if (line.empty() || line.at(0) == '#') {
if (eof)
break;
m_order.push_back(ConfLine(ConfLine::CFL_COMMENT, line));
continue;
}
@ -142,29 +141,29 @@ void ConfSimple::parseinput(istream &input)
continue;
}
i_set(nm, val, submapkey, true);
if (eof == true)
if (eof)
break;
}
}
ConfSimple::ConfSimple(int readonly, bool tildexp)
: dotildexpand(tildexp), m_data(0), m_holdWrites(false)
: dotildexpand(tildexp), m_holdWrites(false)
{
status = readonly ? STATUS_RO : STATUS_RW;
}
ConfSimple::ConfSimple(string *d, int readonly, bool tildexp)
: dotildexpand(tildexp), m_data(d), m_holdWrites(false)
ConfSimple::ConfSimple(const string& d, int readonly, bool tildexp)
: dotildexpand(tildexp), m_holdWrites(false)
{
status = readonly ? STATUS_RO : STATUS_RW;
stringstream input(*d, ios::in);
stringstream input(d, ios::in);
parseinput(input);
}
ConfSimple::ConfSimple(const char *fname, int readonly, bool tildexp)
: dotildexpand(tildexp), m_filename(fname), m_data(0), m_holdWrites(false)
: dotildexpand(tildexp), m_filename(fname), m_holdWrites(false)
{
status = readonly ? STATUS_RO : STATUS_RW;
@ -418,7 +417,8 @@ ConfSimple::sortwalk(WalkerCode (*walker)(void *,const string&,const string&),
return WALK_CONTINUE;
}
// Write to default output:
// Write to default output. This currently only does something if output is
// a file
bool ConfSimple::write()
{
if (!ok())
@ -430,11 +430,11 @@ bool ConfSimple::write()
if (!output.is_open())
return 0;
return write(output);
} else if (m_data) {
ostringstream output(*m_data, ios::out | ios::trunc);
return write(output);
} else {
// No backing store, no writing
// No backing store, no writing. Maybe one day we'll need it with
// some kind of output string. This can't be the original string which
// is currently readonly.
//ostringstream output(m_ostring, ios::out | ios::trunc);
return 1;
}
}
@ -529,6 +529,18 @@ list<string> ConfSimple::getSubKeys()
return mylist;
}
bool ConfSimple::hasNameAnywhere(const string& nm)
{
list<string>keys = getSubKeys();
for (list<string>::const_iterator it = keys.begin();
it != keys.end(); it++) {
string val;
if (get(nm, val, *it))
return true;
}
return false;
}
// //////////////////////////////////////////////////////////////////////////
// ConfTree Methods: conftree interpret keys like a hierarchical file tree
// //////////////////////////////////////////////////////////////////////////
@ -917,7 +929,7 @@ int main(int argc, char **argv)
if (argc != 0)
Usage();
string s;
ConfSimple c(&s);
ConfSimple c(s);
memtest(c);
exit(0);
} else if ((op_flags & OPT_V)) {

View File

@ -96,6 +96,7 @@ public:
virtual ~ConfNull() {};
virtual int get(const string &name, string &value,
const string &sk = string()) = 0;
virtual bool hasNameAnywhere(const string& nm) = 0;
virtual int set(const string &nm, const string &val,
const string &sk = string()) = 0;
virtual bool ok() = 0;
@ -127,7 +128,7 @@ public:
* @param readonly if true open readonly, else rw
* @param tildexp try tilde (home dir) expansion for subsection names
*/
ConfSimple(string *data, int readonly = 0, bool tildexp = false);
ConfSimple(const string& data, int readonly = 0, bool tildexp = false);
/**
* Build an empty object. This will be memory only, with no backing store.
@ -196,6 +197,10 @@ public:
/** Return all names in given submap. */
virtual list<string> getNames(const string &sk, const char *pattern = 0);
/** Check if name is present in any submap. This is relatively expensive
* but useful for saving further processing sometimes */
virtual bool hasNameAnywhere(const string& nm);
/**
* Return all subkeys
*/
@ -207,13 +212,11 @@ public:
* Copy constructor. Expensive but less so than a full rebuild
*/
ConfSimple(const ConfSimple &rhs)
: ConfNull(), m_data(0)
: ConfNull()
{
if ((status = rhs.status) == STATUS_ERROR)
return;
m_filename = rhs.m_filename;
// Note: we just share the pointer, this doesnt belong to us
m_data = rhs.m_data;
m_submaps = rhs.m_submaps;
}
@ -224,8 +227,6 @@ public:
{
if (this != &rhs && (status = rhs.status) != STATUS_ERROR) {
m_filename = rhs.m_filename;
// Note: we don't own data. Just share the pointer
m_data = rhs.m_data;
m_submaps = rhs.m_submaps;
}
return *this;
@ -237,8 +238,6 @@ protected:
private:
// Set if we're working with a file
string m_filename;
// Set if we're working with an in-memory string
string *m_data;
// Configuration data submaps (one per subkey, the main data has a
// null subkey)
map<string, map<string, string> > m_submaps;
@ -281,7 +280,7 @@ public:
* expansion */
ConfTree(const char *fname, int readonly = 0)
: ConfSimple(fname, readonly, true) {}
ConfTree(string *data, int readonly = 0)
ConfTree(const string &data, int readonly = 0)
: ConfSimple(data, readonly, true) {}
ConfTree(int readonly = 0)
: ConfSimple(readonly, true) {}
@ -364,6 +363,16 @@ public:
return false;
}
virtual bool hasNameAnywhere(const string& nm)
{
typename list<T*>::iterator it;
for (it = m_confs.begin();it != m_confs.end();it++) {
if ((*it)->hasNameAnywhere(nm))
return true;
}
return false;
}
virtual int set(const string &nm, const string &val, const string &sk = string())
{
if (!m_ok)