diff --git a/src/VERSION b/src/VERSION index 0eed1a29..feaae22b 100644 --- a/src/VERSION +++ b/src/VERSION @@ -1 +1 @@ -1.12.0 +1.13.0 diff --git a/src/common/rclconfig.h b/src/common/rclconfig.h index cf7a829e..88cf9980 100644 --- a/src/common/rclconfig.h +++ b/src/common/rclconfig.h @@ -90,6 +90,14 @@ class RclConfig { list getConfNames(const char *pattern = 0) { return m_conf->getNames(m_keydir, pattern); } + + /** Check if name exists anywhere in config */ + bool hasNameAnywhere(const string& nm) + { + return m_conf? m_conf->hasNameAnywhere(nm) : false; + } + + /** Get default charset for current keydir (was set during setKeydir) * filenames are handled differently */ const string &getDefCharset(bool filename = false); diff --git a/src/index/indexer.cpp b/src/index/indexer.cpp index 4db0f3af..aabc5417 100644 --- a/src/index/indexer.cpp +++ b/src/index/indexer.cpp @@ -362,6 +362,46 @@ bool DbIndexer::purgeFiles(const list &filenames) return true; } +// Local fields can be set for fs subtrees in the configuration file +void DbIndexer::localfieldsfromconf() +{ + LOGDEB(("DbIndexer::localfieldsfromconf\n")); + m_localfields.clear(); + string sfields; + if (!m_config->getConfParam("localfields", sfields)) + return; + list lfields; + if (!stringToStrings(sfields, lfields)) { + LOGERR(("DbIndexer::localfieldsfromconf: bad syntax for [%s]\n", + sfields.c_str())); + return; + } + for (list::const_iterator it = lfields.begin(); + it != lfields.end(); it++) { + ConfSimple conf(*it, 1, true); + list nmlst = conf.getNames(""); + for (list::const_iterator it1 = nmlst.begin(); + it1 != nmlst.end(); it1++) { + conf.get(*it1, m_localfields[*it1]); + LOGDEB2(("DbIndexer::localfieldsfromconf: [%s] => [%s]\n", + (*it1).c_str(), m_localfields[*it1].c_str())); + } + } +} + +// +void DbIndexer::setlocalfields(Rcl::Doc& doc) +{ + for (map::const_iterator it = m_localfields.begin(); + it != m_localfields.end(); it++) { + // Should local fields override those coming from the document + // ? I think not, but not too sure + if (doc.meta.find(it->second) == doc.meta.end()) { + doc.meta[it->first] = it->second; + } + } +} + /// This method gets called for every file and directory found by the /// tree walker. @@ -381,28 +421,37 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp, if (m_updater && !m_updater->update()) { return FsTreeWalker::FtwStop; } + // If we're changing directories, possibly adjust parameters (set // the current directory in configuration object) if (flg == FsTreeWalker::FtwDirEnter || flg == FsTreeWalker::FtwDirReturn) { m_config->setKeyDir(fn); + int abslen; if (m_config->getConfParam("idxabsmlen", &abslen)) m_db.setAbstractParams(abslen, -1, -1); + + // Adjust local fields from config for this subtree + if (m_havelocalfields) + localfieldsfromconf(); + if (flg == FsTreeWalker::FtwDirReturn) return FsTreeWalker::FtwOk; } + //////////////////// // Check db up to date ? Doing this before file type // identification means that, if usesystemfilecommand is switched // from on to off it may happen that some files which are now // without mime type will not be purged from the db, resulting // in possible 'cannot intern file' messages at query time... - char cbuf[100]; + // Document signature. This is based on m/ctime and size and used // for the uptodate check (the value computed here is checked // against the stored one). Changing the computation forces a full // reindex of course. + char cbuf[100]; sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME); string sig = cbuf; string udi; @@ -507,6 +556,9 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp, doc.sig += plus; } + // Possibly add fields from local config + if (m_havelocalfields) + setlocalfields(doc); // Add document to database. If there is an ipath, add it as a children // of the file document. string udi; diff --git a/src/index/indexer.h b/src/index/indexer.h index a055e34d..2eff4196 100644 --- a/src/index/indexer.h +++ b/src/index/indexer.h @@ -20,10 +20,12 @@ #include #include +#include #ifndef NO_NAMESPACES using std::string; using std::list; +using std::map; #endif #include "rclconfig.h" @@ -101,7 +103,9 @@ class DbIndexer : public FsTreeWalkerCB { DbIxStatusUpdater *updfunc = 0 // status updater callback ) : m_config(cnf), m_db(cnf), m_updater(updfunc) - {} + { + m_havelocalfields = m_config->hasNameAnywhere("localfields"); + } virtual ~DbIndexer(); @@ -150,7 +154,16 @@ class DbIndexer : public FsTreeWalkerCB { string m_tmpdir; DbIxStatusUpdater *m_updater; + // The configuration can set attribute fields to be inherited by + // all files in a file system area. Ie: set "apptag = thunderbird" + // inside ~/.thunderbird. The boolean is set at init to avoid + // further wasteful processing if no local fields are set. + bool m_havelocalfields; + map m_localfields; + bool init(bool rst = false, bool rdonly = false); + void localfieldsfromconf(); + void setlocalfields(Rcl::Doc& doc); }; /** Helper methods in recollindex.cpp for initial checks/setup to index diff --git a/src/internfile/mimehandler.cpp b/src/internfile/mimehandler.cpp index 35292cd7..6fd44a12 100644 --- a/src/internfile/mimehandler.cpp +++ b/src/internfile/mimehandler.cpp @@ -196,9 +196,18 @@ Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg, } } - // We are supposed to get here if there was no specific error, but - // there is no identified mime type, or no handler - // associated. These files are either ignored or their name is + // We get here if there was no specific error, but there is no + // identified mime type, or no handler associated. + +#ifdef INDEX_UNKNOWN_TEXT_AS_PLAIN + // If the type is an unknown text/xxx, index as text/plain and + // hope for the best (this wouldn't work too well with text/rtf...) + if (mtype.find("text/") == 0) { + return mhFactory("text/plain"); + } +#endif + + // Finally, unhandled files are either ignored or their name is // indexed, depending on configuration bool indexunknown = false; cfg->getConfParam("indexallfilenames", &indexunknown); diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 5ed57706..d0d68f23 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -134,7 +134,7 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc, int percent) { LOGDEB0(("Db::dbDataToRclDoc: data: %s\n", data.c_str())); - ConfSimple parms(&data); + ConfSimple parms(data); if (!parms.ok()) return false; parms.get(Doc::keyurl, doc.url); diff --git a/src/rcldb/rclquery.cpp b/src/rcldb/rclquery.cpp index eecd4893..42a04807 100644 --- a/src/rcldb/rclquery.cpp +++ b/src/rcldb/rclquery.cpp @@ -42,7 +42,7 @@ public: { // Parse xapian document's data and populate doc fields string data = xdoc.get_data(); - ConfSimple parms(&data); + ConfSimple parms(data); // The only filtering for now is on file path (subtree) string url; diff --git a/src/utils/conftree.cpp b/src/utils/conftree.cpp index bd4a0f0a..dd7f27e3 100755 --- a/src/utils/conftree.cpp +++ b/src/utils/conftree.cpp @@ -46,7 +46,7 @@ using std::list; #define MIN(A,B) ((A)<(B) ? (A) : (B)) #endif -//#define DEBUG +#undef DEBUG #ifdef DEBUG #define LOGDEB(X) fprintf X #else @@ -63,22 +63,20 @@ void ConfSimple::parseinput(istream &input) bool eof = false; for (;;) { + cline[0] = 0; input.getline(cline, LL-1); LOGDEB((stderr, "Parse:line: [%s] status %d\n", cline, int(status))); if (!input.good()) { if (input.bad()) { + LOGDEB((stderr, "Parse: input.bad()\n")); status = STATUS_ERROR; return; } + LOGDEB((stderr, "Parse: eof\n")); // Must be eof ? But maybe we have a partial line which // must be processed. This happens if the last line before - // eof ends with a backslash - if (appending) { - eof = true; - goto processline; - } - - break; + // eof ends with a backslash, or there is no final \n + eof = true; } { @@ -94,11 +92,12 @@ void ConfSimple::parseinput(istream &input) else line = cline; - processline: // Note that we trim whitespace before checking for backslash-eol - // This avoids invisible problems. + // This avoids invisible whitespace problems. trimstring(line); if (line.empty() || line.at(0) == '#') { + if (eof) + break; m_order.push_back(ConfLine(ConfLine::CFL_COMMENT, line)); continue; } @@ -142,29 +141,29 @@ void ConfSimple::parseinput(istream &input) continue; } i_set(nm, val, submapkey, true); - if (eof == true) + if (eof) break; } } ConfSimple::ConfSimple(int readonly, bool tildexp) - : dotildexpand(tildexp), m_data(0), m_holdWrites(false) + : dotildexpand(tildexp), m_holdWrites(false) { status = readonly ? STATUS_RO : STATUS_RW; } -ConfSimple::ConfSimple(string *d, int readonly, bool tildexp) - : dotildexpand(tildexp), m_data(d), m_holdWrites(false) +ConfSimple::ConfSimple(const string& d, int readonly, bool tildexp) + : dotildexpand(tildexp), m_holdWrites(false) { status = readonly ? STATUS_RO : STATUS_RW; - stringstream input(*d, ios::in); + stringstream input(d, ios::in); parseinput(input); } ConfSimple::ConfSimple(const char *fname, int readonly, bool tildexp) - : dotildexpand(tildexp), m_filename(fname), m_data(0), m_holdWrites(false) + : dotildexpand(tildexp), m_filename(fname), m_holdWrites(false) { status = readonly ? STATUS_RO : STATUS_RW; @@ -418,7 +417,8 @@ ConfSimple::sortwalk(WalkerCode (*walker)(void *,const string&,const string&), return WALK_CONTINUE; } -// Write to default output: +// Write to default output. This currently only does something if output is +// a file bool ConfSimple::write() { if (!ok()) @@ -430,11 +430,11 @@ bool ConfSimple::write() if (!output.is_open()) return 0; return write(output); - } else if (m_data) { - ostringstream output(*m_data, ios::out | ios::trunc); - return write(output); } else { - // No backing store, no writing + // No backing store, no writing. Maybe one day we'll need it with + // some kind of output string. This can't be the original string which + // is currently readonly. + //ostringstream output(m_ostring, ios::out | ios::trunc); return 1; } } @@ -529,6 +529,18 @@ list ConfSimple::getSubKeys() return mylist; } +bool ConfSimple::hasNameAnywhere(const string& nm) +{ + listkeys = getSubKeys(); + for (list::const_iterator it = keys.begin(); + it != keys.end(); it++) { + string val; + if (get(nm, val, *it)) + return true; + } + return false; +} + // ////////////////////////////////////////////////////////////////////////// // ConfTree Methods: conftree interpret keys like a hierarchical file tree // ////////////////////////////////////////////////////////////////////////// @@ -917,7 +929,7 @@ int main(int argc, char **argv) if (argc != 0) Usage(); string s; - ConfSimple c(&s); + ConfSimple c(s); memtest(c); exit(0); } else if ((op_flags & OPT_V)) { diff --git a/src/utils/conftree.h b/src/utils/conftree.h index e93ac208..dda0df1e 100755 --- a/src/utils/conftree.h +++ b/src/utils/conftree.h @@ -96,6 +96,7 @@ public: virtual ~ConfNull() {}; virtual int get(const string &name, string &value, const string &sk = string()) = 0; + virtual bool hasNameAnywhere(const string& nm) = 0; virtual int set(const string &nm, const string &val, const string &sk = string()) = 0; virtual bool ok() = 0; @@ -127,7 +128,7 @@ public: * @param readonly if true open readonly, else rw * @param tildexp try tilde (home dir) expansion for subsection names */ - ConfSimple(string *data, int readonly = 0, bool tildexp = false); + ConfSimple(const string& data, int readonly = 0, bool tildexp = false); /** * Build an empty object. This will be memory only, with no backing store. @@ -196,6 +197,10 @@ public: /** Return all names in given submap. */ virtual list getNames(const string &sk, const char *pattern = 0); + /** Check if name is present in any submap. This is relatively expensive + * but useful for saving further processing sometimes */ + virtual bool hasNameAnywhere(const string& nm); + /** * Return all subkeys */ @@ -207,13 +212,11 @@ public: * Copy constructor. Expensive but less so than a full rebuild */ ConfSimple(const ConfSimple &rhs) - : ConfNull(), m_data(0) + : ConfNull() { if ((status = rhs.status) == STATUS_ERROR) return; m_filename = rhs.m_filename; - // Note: we just share the pointer, this doesnt belong to us - m_data = rhs.m_data; m_submaps = rhs.m_submaps; } @@ -224,8 +227,6 @@ public: { if (this != &rhs && (status = rhs.status) != STATUS_ERROR) { m_filename = rhs.m_filename; - // Note: we don't own data. Just share the pointer - m_data = rhs.m_data; m_submaps = rhs.m_submaps; } return *this; @@ -237,8 +238,6 @@ protected: private: // Set if we're working with a file string m_filename; - // Set if we're working with an in-memory string - string *m_data; // Configuration data submaps (one per subkey, the main data has a // null subkey) map > m_submaps; @@ -281,7 +280,7 @@ public: * expansion */ ConfTree(const char *fname, int readonly = 0) : ConfSimple(fname, readonly, true) {} - ConfTree(string *data, int readonly = 0) + ConfTree(const string &data, int readonly = 0) : ConfSimple(data, readonly, true) {} ConfTree(int readonly = 0) : ConfSimple(readonly, true) {} @@ -364,6 +363,16 @@ public: return false; } + virtual bool hasNameAnywhere(const string& nm) + { + typename list::iterator it; + for (it = m_confs.begin();it != m_confs.end();it++) { + if ((*it)->hasNameAnywhere(nm)) + return true; + } + return false; + } + virtual int set(const string &nm, const string &val, const string &sk = string()) { if (!m_ok)