dbindexer->fsindexer, split into its own file

2009-11-10 18:10:54 +00:00 · 2009-11-10 18:10:54 +00:00 · d14601bde9
commit d14601bde9
parent 69dcb93059
7 changed files with 750 additions and 675 deletions
--- a/src/index/fsindexer.cpp
+++ b/src/index/fsindexer.cpp
@ -0,0 +1,582 @@
+#ifndef lint
+static char rcsid[] = "@(#$Id: $ (C) 2009 J.F.Dockes";
+#endif
+/*
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the
+ *   Free Software Foundation, Inc.,
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+#ifdef HAVE_CONFIG_H
+#include "autoconfig.h"
+#endif
+
+#include <stdio.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <cstring>
+#include <fnmatch.h>
+
+#include <iostream>
+#include <list>
+#include <map>
+#include <algorithm>
+
+#include "pathut.h"
+#include "conftree.h"
+#include "rclconfig.h"
+#include "fstreewalk.h"
+#include "rcldb.h"
+#include "readfile.h"
+#include "indexer.h"
+#include "fsindexer.h"
+#include "csguess.h"
+#include "transcode.h"
+#include "debuglog.h"
+#include "internfile.h"
+#include "smallut.h"
+#include "wipedir.h"
+#include "fileudi.h"
+
+#ifdef RCL_USE_ASPELL
+#include "rclaspell.h"
+#endif
+
+// When using extended attributes, we have to use the ctime. 
+// This is quite an expensive price to pay...
+#ifdef RCL_USE_XATTR
+#define RCL_STTIME st_ctime
+#else
+#define RCL_STTIME st_mtime
+#endif // RCL_USE_XATTR
+
+#ifndef NO_NAMESPACES
+using namespace std;
+#endif /* NO_NAMESPACES */
+
+#ifndef deleteZ
+#define deleteZ(X) {delete X;X = 0;}
+#endif
+
+FsIndexer::~FsIndexer() {
+    // Maybe clean up temporary directory
+    if (m_tmpdir.length()) {
+	wipedir(m_tmpdir);
+	if (rmdir(m_tmpdir.c_str()) < 0) {
+	    LOGERR(("FsIndexer::~FsIndexer: cannot clear temp dir %s\n",
+		    m_tmpdir.c_str()));
+	}
+    }
+    m_db.close();
+}
+
+list<string> FsIndexer::getStemmerNames()
+{
+    return Rcl::Db::getStemmerNames();
+}
+
+// Index each directory in the topdirs for a given db
+bool FsIndexer::indexTrees(bool resetbefore, list<string> *topdirs)
+{
+    if (!init(resetbefore))
+	return false;
+
+    if (m_updater) {
+	m_updater->status.reset();
+	m_updater->status.dbtotdocs = m_db.docCnt();
+    }
+
+    m_walker.setSkippedPaths(m_config->getSkippedPaths());
+
+    for (list<string>::const_iterator it = topdirs->begin();
+	 it != topdirs->end(); it++) {
+	LOGDEB(("FsIndexer::index: Indexing %s into %s\n", it->c_str(), 
+		getDbDir().c_str()));
+
+	// Set the current directory in config so that subsequent
+	// getConfParams() will get local values
+	m_config->setKeyDir(*it);
+
+	// Adjust the "follow symlinks" option
+	bool follow;
+	if (m_config->getConfParam("followLinks", &follow) && follow) {
+	    m_walker.setOpts(FsTreeWalker::FtwFollow);
+	} else {
+	    m_walker.setOpts(FsTreeWalker::FtwOptNone);
+	}	    
+
+	int abslen;
+	if (m_config->getConfParam("idxabsmlen", &abslen))
+	    m_db.setAbstractParams(abslen, -1, -1);
+
+	// Set up skipped patterns for this subtree. This probably should be
+	// done in the directory change code in processone() instead.
+	m_walker.setSkippedNames(m_config->getSkippedNames());
+
+	// Walk the directory tree
+	if (m_walker.walk(*it, *this) != FsTreeWalker::FtwOk) {
+	    LOGERR(("FsIndexer::index: error while indexing %s: %s\n", 
+		    it->c_str(), m_walker.getReason().c_str()));
+	    return false;
+	}
+    }
+    if (m_updater) {
+	m_updater->status.fn.erase();
+	m_updater->status.phase = DbIxStatus::DBIXS_PURGE;
+	m_updater->update();
+    }
+
+    // Get rid of all database entries that don't exist in the
+    // filesystem anymore.
+    m_db.purge();
+
+    createStemmingDatabases();
+    createAspellDict();
+
+    if (m_updater) {
+	m_updater->status.phase = DbIxStatus::DBIXS_CLOSING;
+	m_updater->status.fn.erase();
+	m_updater->update();
+    }
+    // The close would be done in our destructor, but we want status here
+    if (!m_db.close()) {
+	LOGERR(("FsIndexer::index: error closing database in %s\n", 
+		getDbDir().c_str()));
+	return false;
+    }
+    string missing;
+    FileInterner::getMissingDescription(missing);
+    if (!missing.empty()) {
+	LOGINFO(("FsIndexer::index missing helper program(s):\n%s\n", 
+		 missing.c_str()));
+    }
+    m_config->storeMissingHelperDesc(missing);
+    return true;
+}
+
+// Create stemming databases. We also remove those which are not
+// configured. 
+bool FsIndexer::createStemmingDatabases()
+{
+    string slangs;
+    if (m_config->getConfParam("indexstemminglanguages", slangs)) {
+	list<string> langs;
+	stringToStrings(slangs, langs);
+
+	// Get the list of existing stem dbs from the database (some may have 
+	// been manually created, we just keep those from the config
+	list<string> dblangs = m_db.getStemLangs();
+	list<string>::const_iterator it;
+	for (it = dblangs.begin(); it != dblangs.end(); it++) {
+	    if (find(langs.begin(), langs.end(), *it) == langs.end())
+		m_db.deleteStemDb(*it);
+	}
+	for (it = langs.begin(); it != langs.end(); it++) {
+	    if (m_updater) {
+		m_updater->status.phase = DbIxStatus::DBIXS_STEMDB;
+		m_updater->status.fn = *it;
+		m_updater->update();
+	    }
+	    m_db.createStemDb(*it);
+	}
+    }
+    return true;
+}
+
+bool FsIndexer::init(bool resetbefore, bool rdonly)
+{
+    if (!rdonly && (m_tmpdir.empty() || access(m_tmpdir.c_str(), 0) < 0)) {
+	string reason;
+	if (!maketmpdir(m_tmpdir, reason)) {
+	    LOGERR(("FsIndexer: cannot create temporary directory: %s\n",
+		    reason.c_str()));
+	    return false;
+	}
+    }
+    Rcl::Db::OpenMode mode = rdonly ? Rcl::Db::DbRO :
+	resetbefore ? Rcl::Db::DbTrunc : Rcl::Db::DbUpd;
+    if (!m_db.open(mode)) {
+	LOGERR(("FsIndexer: error opening database %s\n", getDbDir().c_str()));
+	return false;
+    }
+
+    return true;
+}
+
+bool FsIndexer::createStemDb(const string &lang)
+{
+    if (!init(false, true))
+	return false;
+    return m_db.createStemDb(lang);
+}
+
+// The language for the aspell dictionary is handled internally by the aspell
+// module, either from a configuration variable or the NLS environment.
+bool FsIndexer::createAspellDict()
+{
+    LOGDEB2(("FsIndexer::createAspellDict()\n"));
+#ifdef RCL_USE_ASPELL
+    // For the benefit of the real-time indexer, we only initialize
+    // noaspell from the configuration once. It can then be set to
+    // true if dictionary generation fails, which avoids retrying
+    // it forever.
+    static int noaspell = -12345;
+    if (noaspell == -12345) {
+	noaspell = false;
+	m_config->getConfParam("noaspell", &noaspell);
+    }
+    if (noaspell)
+	return true;
+
+    if (!init(false, true))
+	return false;
+    Aspell aspell(m_config);
+    string reason;
+    if (!aspell.init(reason)) {
+	LOGERR(("FsIndexer::createAspellDict: aspell init failed: %s\n", 
+		reason.c_str()));
+	noaspell = true;
+	return false;
+    }
+    LOGDEB(("FsIndexer::createAspellDict: creating dictionary\n"));
+    if (!aspell.buildDict(m_db, reason)) {
+	LOGERR(("FsIndexer::createAspellDict: aspell buildDict failed: %s\n", 
+		reason.c_str()));
+	noaspell = true;
+	return false;
+    }
+#endif
+    return true;
+}
+
+/** 
+ * Index individual files, out of a full tree run. No database purging
+ */
+bool FsIndexer::indexFiles(const list<string> &filenames)
+{
+    bool called_init = false;
+
+    list<string>::const_iterator it;
+    for (it = filenames.begin(); it != filenames.end(); it++) {
+	string dir = path_getfather(*it);
+	m_config->setKeyDir(dir);
+	int abslen;
+	if (m_config->getConfParam("idxabsmlen", &abslen))
+	    m_db.setAbstractParams(abslen, -1, -1);
+	struct stat stb;
+	if (lstat(it->c_str(), &stb) != 0) {
+	    LOGERR(("FsIndexer::indexFiles: lstat(%s): %s", it->c_str(),
+		    strerror(errno)));
+	    continue;
+	}
+
+	// If we get to indexing directory names one day, will need to test 
+	// against dbdir here to avoid modification loops (with rclmon).
+	if (!S_ISREG(stb.st_mode)) {
+	    LOGDEB2(("FsIndexer::indexFiles: %s: not a regular file\n", 
+		    it->c_str()));
+	    continue;
+	}
+
+	static string lstdir;
+	static list<string> skpl;
+	if (lstdir.compare(dir)) {
+	    LOGDEB(("Recomputing list of skipped names\n"));
+	    skpl = m_config->getSkippedNames();
+	    lstdir = dir;
+	}
+	if (!skpl.empty()) {
+	    list<string>::const_iterator skit;
+	    string fn = path_getsimple(*it);
+	    for (skit = skpl.begin(); skit != skpl.end(); skit++) {
+		if (fnmatch(skit->c_str(), fn.c_str(), 0) == 0) {
+		    LOGDEB(("Skipping [%s] :matches skip list\n", fn.c_str()));
+		    goto skipped;
+		}
+	    }
+	}
+	// Defer opening db until really needed.
+	if (!called_init) {
+	    if (!init())
+		return false;
+	    called_init = true;
+	}
+	if (processone(*it, &stb, FsTreeWalker::FtwRegular) != 
+	    FsTreeWalker::FtwOk) {
+	    LOGERR(("FsIndexer::indexFiles: processone failed\n"));
+	    return false;
+	}
+    skipped: 
+	false; // Need a statement here to make compiler happy ??
+    }
+
+    // The close would be done in our destructor, but we want status here
+    if (!m_db.close()) {
+	LOGERR(("FsIndexer::indexfiles: error closing database in %s\n", 
+		getDbDir().c_str()));
+	return false;
+    }
+    return true;
+}
+
+
+/** Purge docs for given files out of the database */
+bool FsIndexer::purgeFiles(const list<string> &filenames)
+{
+    if (!init())
+	return false;
+
+    list<string>::const_iterator it;
+    for (it = filenames.begin(); it != filenames.end(); it++) {
+	string udi;
+	make_udi(*it, "", udi);
+	if (!m_db.purgeFile(udi)) {
+	    LOGERR(("FsIndexer::purgeFiles: Database error\n"));
+	    return false;
+	}
+    }
+
+    // The close would be done in our destructor, but we want status here
+    if (!m_db.close()) {
+	LOGERR(("FsIndexer::purgefiles: error closing database in %s\n", 
+		getDbDir().c_str()));
+	return false;
+    }
+    return true;
+}
+
+// Local fields can be set for fs subtrees in the configuration file 
+void FsIndexer::localfieldsfromconf()
+{
+    LOGDEB(("FsIndexer::localfieldsfromconf\n"));
+    m_localfields.clear();
+    string sfields;
+    if (!m_config->getConfParam("localfields", sfields))
+        return;
+    list<string> lfields;
+    if (!stringToStrings(sfields, lfields)) {
+        LOGERR(("FsIndexer::localfieldsfromconf: bad syntax for [%s]\n", 
+                sfields.c_str()));
+        return;
+    }
+    for (list<string>::const_iterator it = lfields.begin();
+         it != lfields.end(); it++) {
+        ConfSimple conf(*it, 1, true);
+        list<string> nmlst = conf.getNames("");
+        for (list<string>::const_iterator it1 = nmlst.begin();
+             it1 != nmlst.end(); it1++) {
+            conf.get(*it1, m_localfields[*it1]);
+            LOGDEB2(("FsIndexer::localfieldsfromconf: [%s] => [%s]\n",
+                    (*it1).c_str(), m_localfields[*it1].c_str()));
+        }
+    }
+}
+
+// 
+void FsIndexer::setlocalfields(Rcl::Doc& doc)
+{
+    for (map<string, string>::const_iterator it = m_localfields.begin();
+         it != m_localfields.end(); it++) {
+        // Should local fields override those coming from the document
+        // ? I think not, but not too sure
+        if (doc.meta.find(it->second) == doc.meta.end()) {
+            doc.meta[it->first] = it->second;
+        }
+    }
+}
+
+
+/// This method gets called for every file and directory found by the
+/// tree walker. 
+///
+/// It checks with the db if the file has changed and needs to be
+/// reindexed. If so, it calls internfile() which will identify the
+/// file type and call an appropriate handler to convert the document into
+/// internal format, which we then add to the database.
+///
+/// Accent and majuscule handling are performed by the db module when doing
+/// the actual indexing work. The Rcl::Doc created by internfile()
+/// mostly contains pretty raw utf8 data.
+FsTreeWalker::Status 
+FsIndexer::processone(const std::string &fn, const struct stat *stp, 
+		      FsTreeWalker::CbFlag flg)
+{
+    if (m_updater && !m_updater->update()) {
+        return FsTreeWalker::FtwStop;
+    }
+
+    // If we're changing directories, possibly adjust parameters (set
+    // the current directory in configuration object)
+    if (flg == FsTreeWalker::FtwDirEnter || 
+	flg == FsTreeWalker::FtwDirReturn) {
+	m_config->setKeyDir(fn);
+
+	int abslen;
+	if (m_config->getConfParam("idxabsmlen", &abslen))
+	    m_db.setAbstractParams(abslen, -1, -1);
+
+        // Adjust local fields from config for this subtree
+        if (m_havelocalfields)
+            localfieldsfromconf();
+
+	if (flg == FsTreeWalker::FtwDirReturn)
+	    return FsTreeWalker::FtwOk;
+    }
+
+    ////////////////////
+    // Check db up to date ? Doing this before file type
+    // identification means that, if usesystemfilecommand is switched
+    // from on to off it may happen that some files which are now
+    // without mime type will not be purged from the db, resulting
+    // in possible 'cannot intern file' messages at query time...
+
+    // Document signature. This is based on m/ctime and size and used
+    // for the uptodate check (the value computed here is checked
+    // against the stored one). Changing the computation forces a full
+    // reindex of course.
+    char cbuf[100]; 
+    sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
+    string sig = cbuf;
+    string udi;
+    make_udi(fn, "", udi);
+    if (!m_db.needUpdate(udi, sig)) {
+	LOGDEB(("processone: up to date: %s\n", fn.c_str()));
+	if (m_updater) {
+	    // Status bar update, abort request etc.
+	    m_updater->status.fn = fn;
+	    if (!m_updater->update()) {
+		return FsTreeWalker::FtwStop;
+	    }
+	}
+	return FsTreeWalker::FtwOk;
+    }
+
+    LOGDEB0(("processone: processing: [%s] %s\n", 
+             displayableBytes(stp->st_size).c_str(), fn.c_str()));
+
+    FileInterner interner(fn, stp, m_config, m_tmpdir, FileInterner::FIF_none);
+
+    // File name transcoded to utf8 for indexation. 
+    string charset = m_config->getDefCharset(true);
+    // If this fails, the file name won't be indexed, no big deal
+    // Note that we used to do the full path here, but I ended up believing
+    // that it made more sense to use only the file name
+    string utf8fn; int ercnt;
+    if (!transcode(path_getsimple(fn), utf8fn, charset, "UTF-8", &ercnt)) {
+	LOGERR(("processone: fn transcode failure from [%s] to UTF-8: %s\n",
+		charset.c_str(), path_getsimple(fn).c_str()));
+    } else if (ercnt) {
+	LOGDEB(("processone: fn transcode %d errors from [%s] to UTF-8: %s\n",
+		ercnt, charset.c_str(), path_getsimple(fn).c_str()));
+    }
+    LOGDEB2(("processone: fn transcoded from [%s] to [%s] (%s->%s)\n",
+	     path_getsimple(fn).c_str(), utf8fn.c_str(), charset.c_str(), 
+	     "UTF-8"));
+
+    string parent_udi;
+    make_udi(fn, "", parent_udi);
+    Rcl::Doc doc;
+    const string plus("+");
+    char ascdate[20];
+    sprintf(ascdate, "%ld", long(stp->st_mtime));
+
+    FileInterner::Status fis = FileInterner::FIAgain;
+    bool hadNullIpath = false;
+    while (fis == FileInterner::FIAgain) {
+	doc.erase();
+	string ipath;
+	fis = interner.internfile(doc, ipath);
+
+        // Index at least the file name even if there was an error.
+        // We'll change the signature to ensure that the indexing will
+        // be retried every time.
+
+
+	// Internal access path for multi-document files
+	if (ipath.empty())
+	    hadNullIpath = true;
+	else
+	    doc.ipath = ipath;
+
+	// Set file name, mod time and url if not done by filter
+	if (doc.fmtime.empty())
+	    doc.fmtime = ascdate;
+        if (doc.url.empty())
+            doc.url = string("file://") + fn;
+	if (doc.utf8fn.empty())
+	    doc.utf8fn = utf8fn;
+
+	char cbuf[100]; 
+	sprintf(cbuf, "%ld", (long)stp->st_size);
+	doc.fbytes = cbuf;
+	// Document signature for up to date checks: concatenate
+	// m/ctime and size. Looking for changes only, no need to
+	// parseback so no need for reversible formatting. Also set,
+	// but never used, for subdocs.
+	sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
+	doc.sig = cbuf;
+	// If there was an error, ensure indexing will be
+	// retried. This is for the once missing, later installed
+	// filter case. It can make indexing much slower (if there are
+	// myriads of such files, the ext script is executed for them
+	// and fails every time)
+	if (fis == FileInterner::FIError) {
+	    doc.sig += plus;
+	}
+
+        // Possibly add fields from local config
+        if (m_havelocalfields) 
+            setlocalfields(doc);
+	// Add document to database. If there is an ipath, add it as a children
+	// of the file document.
+	string udi;
+	make_udi(fn, ipath, udi);
+	if (!m_db.addOrUpdate(udi, ipath.empty() ? "" : parent_udi, doc)) 
+	    return FsTreeWalker::FtwError;
+
+	// Tell what we are doing and check for interrupt request
+	if (m_updater) {
+	    ++(m_updater->status.docsdone);
+            m_updater->status.fn = fn;
+            if (!ipath.empty())
+                m_updater->status.fn += "|" + ipath;
+            if (!m_updater->update()) {
+                return FsTreeWalker::FtwStop;
+            }
+	}
+    }
+
+    // If we had no instance with a null ipath, we create an empty
+    // document to stand for the file itself, to be used mainly for up
+    // to date checks. Typically this happens for an mbox file.
+    if (hadNullIpath == false) {
+	LOGDEB1(("Creating empty doc for file\n"));
+	Rcl::Doc fileDoc;
+	fileDoc.fmtime = ascdate;
+	fileDoc.utf8fn = utf8fn;
+	fileDoc.mimetype = interner.getMimetype();
+	fileDoc.url = string("file://") + fn;
+
+	char cbuf[100]; 
+	sprintf(cbuf, "%ld", (long)stp->st_size);
+	fileDoc.fbytes = cbuf;
+	// Document signature for up to date checks.
+	sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
+	fileDoc.sig = cbuf;
+	if (!m_db.addOrUpdate(parent_udi, "", fileDoc)) 
+	    return FsTreeWalker::FtwError;
+    }
+
+    return FsTreeWalker::FtwOk;
+}
--- a/src/index/fsindexer.h
+++ b/src/index/fsindexer.h
@ -0,0 +1,108 @@
+/*
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the
+ *   Free Software Foundation, Inc.,
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+#ifndef _fsindexer_h_included_
+#define _fsindexer_h_included_
+/* @(#$Id: $  (C) 2009 J.F.Dockes */
+
+#include "fstreewalk.h"
+#include "rcldb.h"
+
+class DbIxStatusUpdater;
+
+/** Index selected parts of the file system
+ 
+Tree indexing: we inherits FsTreeWalkerCB so that, the processone()
+method is called by the file-system tree walk code for each file and
+directory. We keep all state needed while indexing, and finally call
+the methods to purge the db of stale entries and create the stemming
+databases.
+
+Single file(s) indexing: there are also calls to index or purge lists of files.
+No database purging or stem db updating in this case.
+*/
+class FsIndexer : public FsTreeWalkerCB {
+ public:
+    /** Constructor does nothing but store parameters 
+     *
+     * @param cnf Configuration data
+     * @param updfunc Status updater callback
+     */
+    FsIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0) 
+	: m_config(cnf), m_db(cnf), m_updater(updfunc)
+    {
+        m_havelocalfields = m_config->hasNameAnywhere("localfields");
+    }
+	
+    virtual ~FsIndexer();
+
+    /** Top level file system tree index method for updating a
+	given database.
+
+	The list is supposed to have all the filename space for the
+	db, and we shall purge entries for non-existing files at the
+	end. We create the temporary directory, open the database,
+	then call a file system walk for each top-level directory.
+	When walking is done, we create the stem databases and close
+	the main db.
+    */
+    bool indexTrees(bool resetbefore, std::list<string> *topdirs);
+
+    /** Index a list of files. No db cleaning or stemdb updating */
+    bool indexFiles(const std::list<string> &files);
+
+    /** Purge a list of files. */
+    bool purgeFiles(const std::list<string> &files);
+
+    /** Stemming reset to config: create needed, delete unconfigured */
+    bool createStemmingDatabases();
+
+    /** Create stem database for given language */
+    bool createStemDb(const string &lang);
+
+    /** Create misspelling expansion dictionary if aspell i/f is available */
+    bool createAspellDict();
+
+    /**  Tree walker callback method */
+    FsTreeWalker::Status 
+    processone(const string &, const struct stat *, FsTreeWalker::CbFlag);
+
+    /** Return my db dir */
+    string getDbDir() {return m_config->getDbDir();}
+
+    /** List possible stemmer names */
+    static list<string> getStemmerNames();
+
+ private:
+    FsTreeWalker m_walker;
+    RclConfig   *m_config;
+    Rcl::Db      m_db;
+    string       m_tmpdir;
+    DbIxStatusUpdater *m_updater;
+
+    // The configuration can set attribute fields to be inherited by
+    // all files in a file system area. Ie: set "apptag = thunderbird"
+    // inside ~/.thunderbird. The boolean is set at init to avoid
+    // further wasteful processing if no local fields are set.
+    bool         m_havelocalfields;
+    map<string, string> m_localfields;
+
+    bool init(bool rst = false, bool rdonly = false);
+    void localfieldsfromconf();
+    void setlocalfields(Rcl::Doc& doc);
+};
+
+#endif /* _fsindexer_h_included_ */
--- a/src/index/indexer.cpp
+++ b/src/index/indexer.cpp
@ -25,569 +25,13 @@ static char rcsid[] = "@(#$Id: indexer.cpp,v 1.71 2008-12-17 08:01:40 dockes Exp
 #include <sys/stat.h>
 #include <unistd.h>
 #include <errno.h>
-#include <cstring>
-#include <fnmatch.h>

-#include <iostream>
-#include <list>
-#include <map>
-#include <algorithm>
-
-#include "pathut.h"
-#include "conftree.h"
-#include "rclconfig.h"
-#include "fstreewalk.h"
-#include "rcldb.h"
-#include "readfile.h"
-#include "indexer.h"
-#include "csguess.h"
-#include "transcode.h"
 #include "debuglog.h"
-#include "internfile.h"
-#include "smallut.h"
-#include "wipedir.h"
-#include "fileudi.h"
-
-#ifdef RCL_USE_ASPELL
-#include "rclaspell.h"
-#endif
-
-// When using extended attributes, we have to use the ctime. 
-// This is quite an expensive price to pay...
-#ifdef RCL_USE_XATTR
-#define RCL_STTIME st_ctime
-#else
-#define RCL_STTIME st_mtime
-#endif // RCL_USE_XATTR
-
-#ifndef NO_NAMESPACES
-using namespace std;
-#endif /* NO_NAMESPACES */
-
-#ifndef deleteZ
-#define deleteZ(X) {delete X;X = 0;}
-#endif
-
-DbIndexer::~DbIndexer() {
-    // Maybe clean up temporary directory
-    if (m_tmpdir.length()) {
-	wipedir(m_tmpdir);
-	if (rmdir(m_tmpdir.c_str()) < 0) {
-	    LOGERR(("DbIndexer::~DbIndexer: cannot clear temp dir %s\n",
-		    m_tmpdir.c_str()));
-	}
-    }
-    m_db.close();
-}
-
-list<string> DbIndexer::getStemmerNames()
-{
-    return Rcl::Db::getStemmerNames();
-}
-
-// Index each directory in the topdirs for a given db
-bool DbIndexer::indexDb(bool resetbefore, list<string> *topdirs)
-{
-    if (!init(resetbefore))
-	return false;
-
-    if (m_updater) {
-	m_updater->status.reset();
-	m_updater->status.dbtotdocs = m_db.docCnt();
-    }
-
-    m_walker.setSkippedPaths(m_config->getSkippedPaths());
-
-    for (list<string>::const_iterator it = topdirs->begin();
-	 it != topdirs->end(); it++) {
-	LOGDEB(("DbIndexer::index: Indexing %s into %s\n", it->c_str(), 
-		getDbDir().c_str()));
-
-	// Set the current directory in config so that subsequent
-	// getConfParams() will get local values
-	m_config->setKeyDir(*it);
-
-	// Adjust the "follow symlinks" option
-	bool follow;
-	if (m_config->getConfParam("followLinks", &follow) && follow) {
-	    m_walker.setOpts(FsTreeWalker::FtwFollow);
-	} else {
-	    m_walker.setOpts(FsTreeWalker::FtwOptNone);
-	}	    
-
-	int abslen;
-	if (m_config->getConfParam("idxabsmlen", &abslen))
-	    m_db.setAbstractParams(abslen, -1, -1);
-
-	// Set up skipped patterns for this subtree. This probably should be
-	// done in the directory change code in processone() instead.
-	m_walker.setSkippedNames(m_config->getSkippedNames());
-
-	// Walk the directory tree
-	if (m_walker.walk(*it, *this) != FsTreeWalker::FtwOk) {
-	    LOGERR(("DbIndexer::index: error while indexing %s: %s\n", 
-		    it->c_str(), m_walker.getReason().c_str()));
-	    return false;
-	}
-    }
-    if (m_updater) {
-	m_updater->status.fn.erase();
-	m_updater->status.phase = DbIxStatus::DBIXS_PURGE;
-	m_updater->update();
-    }
-
-    // Get rid of all database entries that don't exist in the
-    // filesystem anymore.
-    m_db.purge();
-
-    createStemmingDatabases();
-    createAspellDict();
-
-    if (m_updater) {
-	m_updater->status.phase = DbIxStatus::DBIXS_CLOSING;
-	m_updater->status.fn.erase();
-	m_updater->update();
-    }
-    // The close would be done in our destructor, but we want status here
-    if (!m_db.close()) {
-	LOGERR(("DbIndexer::index: error closing database in %s\n", 
-		getDbDir().c_str()));
-	return false;
-    }
-    string missing;
-    FileInterner::getMissingDescription(missing);
-    if (!missing.empty()) {
-	LOGINFO(("DbIndexer::index missing helper program(s):\n%s\n", 
-		 missing.c_str()));
-    }
-    m_config->storeMissingHelperDesc(missing);
-    return true;
-}
-
-// Create stemming databases. We also remove those which are not
-// configured. 
-bool DbIndexer::createStemmingDatabases()
-{
-    string slangs;
-    if (m_config->getConfParam("indexstemminglanguages", slangs)) {
-	list<string> langs;
-	stringToStrings(slangs, langs);
-
-	// Get the list of existing stem dbs from the database (some may have 
-	// been manually created, we just keep those from the config
-	list<string> dblangs = m_db.getStemLangs();
-	list<string>::const_iterator it;
-	for (it = dblangs.begin(); it != dblangs.end(); it++) {
-	    if (find(langs.begin(), langs.end(), *it) == langs.end())
-		m_db.deleteStemDb(*it);
-	}
-	for (it = langs.begin(); it != langs.end(); it++) {
-	    if (m_updater) {
-		m_updater->status.phase = DbIxStatus::DBIXS_STEMDB;
-		m_updater->status.fn = *it;
-		m_updater->update();
-	    }
-	    m_db.createStemDb(*it);
-	}
-    }
-    return true;
-}
-
-bool DbIndexer::init(bool resetbefore, bool rdonly)
-{
-    if (!rdonly && (m_tmpdir.empty() || access(m_tmpdir.c_str(), 0) < 0)) {
-	string reason;
-	if (!maketmpdir(m_tmpdir, reason)) {
-	    LOGERR(("DbIndexer: cannot create temporary directory: %s\n",
-		    reason.c_str()));
-	    return false;
-	}
-    }
-    Rcl::Db::OpenMode mode = rdonly ? Rcl::Db::DbRO :
-	resetbefore ? Rcl::Db::DbTrunc : Rcl::Db::DbUpd;
-    if (!m_db.open(mode)) {
-	LOGERR(("DbIndexer: error opening database %s\n", getDbDir().c_str()));
-	return false;
-    }
-
-    return true;
-}
-
-bool DbIndexer::createStemDb(const string &lang)
-{
-    if (!init(false, true))
-	return false;
-    return m_db.createStemDb(lang);
-}
-
-// The language for the aspell dictionary is handled internally by the aspell
-// module, either from a configuration variable or the NLS environment.
-bool DbIndexer::createAspellDict()
-{
-    LOGDEB2(("DbIndexer::createAspellDict()\n"));
-#ifdef RCL_USE_ASPELL
-    // For the benefit of the real-time indexer, we only initialize
-    // noaspell from the configuration once. It can then be set to
-    // true if dictionary generation fails, which avoids retrying
-    // it forever.
-    static int noaspell = -12345;
-    if (noaspell == -12345) {
-	noaspell = false;
-	m_config->getConfParam("noaspell", &noaspell);
-    }
-    if (noaspell)
-	return true;
-
-    if (!init(false, true))
-	return false;
-    Aspell aspell(m_config);
-    string reason;
-    if (!aspell.init(reason)) {
-	LOGERR(("DbIndexer::createAspellDict: aspell init failed: %s\n", 
-		reason.c_str()));
-	noaspell = true;
-	return false;
-    }
-    LOGDEB(("DbIndexer::createAspellDict: creating dictionary\n"));
-    if (!aspell.buildDict(m_db, reason)) {
-	LOGERR(("DbIndexer::createAspellDict: aspell buildDict failed: %s\n", 
-		reason.c_str()));
-	noaspell = true;
-	return false;
-    }
-#endif
-    return true;
-}
-
-/** 
- * Index individual files, out of a full tree run. No database purging
- */
-bool DbIndexer::indexFiles(const list<string> &filenames)
-{
-    bool called_init = false;
-
-    list<string>::const_iterator it;
-    for (it = filenames.begin(); it != filenames.end(); it++) {
-	string dir = path_getfather(*it);
-	m_config->setKeyDir(dir);
-	int abslen;
-	if (m_config->getConfParam("idxabsmlen", &abslen))
-	    m_db.setAbstractParams(abslen, -1, -1);
-	struct stat stb;
-	if (lstat(it->c_str(), &stb) != 0) {
-	    LOGERR(("DbIndexer::indexFiles: lstat(%s): %s", it->c_str(),
-		    strerror(errno)));
-	    continue;
-	}
-
-	// If we get to indexing directory names one day, will need to test 
-	// against dbdir here to avoid modification loops (with rclmon).
-	if (!S_ISREG(stb.st_mode)) {
-	    LOGDEB2(("DbIndexer::indexFiles: %s: not a regular file\n", 
-		    it->c_str()));
-	    continue;
-	}
-
-	static string lstdir;
-	static list<string> skpl;
-	if (lstdir.compare(dir)) {
-	    LOGDEB(("Recomputing list of skipped names\n"));
-	    skpl = m_config->getSkippedNames();
-	    lstdir = dir;
-	}
-	if (!skpl.empty()) {
-	    list<string>::const_iterator skit;
-	    string fn = path_getsimple(*it);
-	    for (skit = skpl.begin(); skit != skpl.end(); skit++) {
-		if (fnmatch(skit->c_str(), fn.c_str(), 0) == 0) {
-		    LOGDEB(("Skipping [%s] :matches skip list\n", fn.c_str()));
-		    goto skipped;
-		}
-	    }
-	}
-	// Defer opening db until really needed.
-	if (!called_init) {
-	    if (!init())
-		return false;
-	    called_init = true;
-	}
-	if (processone(*it, &stb, FsTreeWalker::FtwRegular) != 
-	    FsTreeWalker::FtwOk) {
-	    LOGERR(("DbIndexer::indexFiles: processone failed\n"));
-	    return false;
-	}
-    skipped: 
-	false; // Need a statement here to make compiler happy ??
-    }
-
-    // The close would be done in our destructor, but we want status here
-    if (!m_db.close()) {
-	LOGERR(("DbIndexer::indexfiles: error closing database in %s\n", 
-		getDbDir().c_str()));
-	return false;
-    }
-    return true;
-}
-
-
-/** Purge docs for given files out of the database */
-bool DbIndexer::purgeFiles(const list<string> &filenames)
-{
-    if (!init())
-	return false;
-
-    list<string>::const_iterator it;
-    for (it = filenames.begin(); it != filenames.end(); it++) {
-	string udi;
-	make_udi(*it, "", udi);
-	if (!m_db.purgeFile(udi)) {
-	    LOGERR(("DbIndexer::purgeFiles: Database error\n"));
-	    return false;
-	}
-    }
-
-    // The close would be done in our destructor, but we want status here
-    if (!m_db.close()) {
-	LOGERR(("DbIndexer::purgefiles: error closing database in %s\n", 
-		getDbDir().c_str()));
-	return false;
-    }
-    return true;
-}
-
-// Local fields can be set for fs subtrees in the configuration file 
-void DbIndexer::localfieldsfromconf()
-{
-    LOGDEB(("DbIndexer::localfieldsfromconf\n"));
-    m_localfields.clear();
-    string sfields;
-    if (!m_config->getConfParam("localfields", sfields))
-        return;
-    list<string> lfields;
-    if (!stringToStrings(sfields, lfields)) {
-        LOGERR(("DbIndexer::localfieldsfromconf: bad syntax for [%s]\n", 
-                sfields.c_str()));
-        return;
-    }
-    for (list<string>::const_iterator it = lfields.begin();
-         it != lfields.end(); it++) {
-        ConfSimple conf(*it, 1, true);
-        list<string> nmlst = conf.getNames("");
-        for (list<string>::const_iterator it1 = nmlst.begin();
-             it1 != nmlst.end(); it1++) {
-            conf.get(*it1, m_localfields[*it1]);
-            LOGDEB2(("DbIndexer::localfieldsfromconf: [%s] => [%s]\n",
-                    (*it1).c_str(), m_localfields[*it1].c_str()));
-        }
-    }
-}
-
-// 
-void DbIndexer::setlocalfields(Rcl::Doc& doc)
-{
-    for (map<string, string>::const_iterator it = m_localfields.begin();
-         it != m_localfields.end(); it++) {
-        // Should local fields override those coming from the document
-        // ? I think not, but not too sure
-        if (doc.meta.find(it->second) == doc.meta.end()) {
-            doc.meta[it->first] = it->second;
-        }
-    }
-}
-
-
-/// This method gets called for every file and directory found by the
-/// tree walker. 
-///
-/// It checks with the db if the file has changed and needs to be
-/// reindexed. If so, it calls internfile() which will identify the
-/// file type and call an appropriate handler to convert the document into
-/// internal format, which we then add to the database.
-///
-/// Accent and majuscule handling are performed by the db module when doing
-/// the actual indexing work. The Rcl::Doc created by internfile()
-/// mostly contains pretty raw utf8 data.
-FsTreeWalker::Status 
-DbIndexer::processone(const std::string &fn, const struct stat *stp, 
-		      FsTreeWalker::CbFlag flg)
-{
-    if (m_updater && !m_updater->update()) {
-        return FsTreeWalker::FtwStop;
-    }
-
-    // If we're changing directories, possibly adjust parameters (set
-    // the current directory in configuration object)
-    if (flg == FsTreeWalker::FtwDirEnter || 
-	flg == FsTreeWalker::FtwDirReturn) {
-	m_config->setKeyDir(fn);
-
-	int abslen;
-	if (m_config->getConfParam("idxabsmlen", &abslen))
-	    m_db.setAbstractParams(abslen, -1, -1);
-
-        // Adjust local fields from config for this subtree
-        if (m_havelocalfields)
-            localfieldsfromconf();
-
-	if (flg == FsTreeWalker::FtwDirReturn)
-	    return FsTreeWalker::FtwOk;
-    }
-
-    ////////////////////
-    // Check db up to date ? Doing this before file type
-    // identification means that, if usesystemfilecommand is switched
-    // from on to off it may happen that some files which are now
-    // without mime type will not be purged from the db, resulting
-    // in possible 'cannot intern file' messages at query time...
-
-    // Document signature. This is based on m/ctime and size and used
-    // for the uptodate check (the value computed here is checked
-    // against the stored one). Changing the computation forces a full
-    // reindex of course.
-    char cbuf[100]; 
-    sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
-    string sig = cbuf;
-    string udi;
-    make_udi(fn, "", udi);
-    if (!m_db.needUpdate(udi, sig)) {
-	LOGDEB(("processone: up to date: %s\n", fn.c_str()));
-	if (m_updater) {
-	    // Status bar update, abort request etc.
-	    m_updater->status.fn = fn;
-	    if (!m_updater->update()) {
-		return FsTreeWalker::FtwStop;
-	    }
-	}
-	return FsTreeWalker::FtwOk;
-    }
-
-    LOGDEB0(("processone: processing: [%s] %s\n", 
-             displayableBytes(stp->st_size).c_str(), fn.c_str()));
-
-    FileInterner interner(fn, stp, m_config, m_tmpdir, FileInterner::FIF_none);
-
-    // File name transcoded to utf8 for indexation. 
-    string charset = m_config->getDefCharset(true);
-    // If this fails, the file name won't be indexed, no big deal
-    // Note that we used to do the full path here, but I ended up believing
-    // that it made more sense to use only the file name
-    string utf8fn; int ercnt;
-    if (!transcode(path_getsimple(fn), utf8fn, charset, "UTF-8", &ercnt)) {
-	LOGERR(("processone: fn transcode failure from [%s] to UTF-8: %s\n",
-		charset.c_str(), path_getsimple(fn).c_str()));
-    } else if (ercnt) {
-	LOGDEB(("processone: fn transcode %d errors from [%s] to UTF-8: %s\n",
-		ercnt, charset.c_str(), path_getsimple(fn).c_str()));
-    }
-    LOGDEB2(("processone: fn transcoded from [%s] to [%s] (%s->%s)\n",
-	     path_getsimple(fn).c_str(), utf8fn.c_str(), charset.c_str(), 
-	     "UTF-8"));
-
-    string parent_udi;
-    make_udi(fn, "", parent_udi);
-    Rcl::Doc doc;
-    const string plus("+");
-    char ascdate[20];
-    sprintf(ascdate, "%ld", long(stp->st_mtime));
-
-    FileInterner::Status fis = FileInterner::FIAgain;
-    bool hadNullIpath = false;
-    while (fis == FileInterner::FIAgain) {
-	doc.erase();
-	string ipath;
-	fis = interner.internfile(doc, ipath);
-
-        // Index at least the file name even if there was an error.
-        // We'll change the signature to ensure that the indexing will
-        // be retried every time.
-
-
-	// Internal access path for multi-document files
-	if (ipath.empty())
-	    hadNullIpath = true;
-	else
-	    doc.ipath = ipath;
-
-	// Set file name, mod time and url if not done by filter
-	if (doc.fmtime.empty())
-	    doc.fmtime = ascdate;
-        if (doc.url.empty())
-            doc.url = string("file://") + fn;
-	if (doc.utf8fn.empty())
-	    doc.utf8fn = utf8fn;
-
-	char cbuf[100]; 
-	sprintf(cbuf, "%ld", (long)stp->st_size);
-	doc.fbytes = cbuf;
-	// Document signature for up to date checks: concatenate
-	// m/ctime and size. Looking for changes only, no need to
-	// parseback so no need for reversible formatting. Also set,
-	// but never used, for subdocs.
-	sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
-	doc.sig = cbuf;
-	// If there was an error, ensure indexing will be
-	// retried. This is for the once missing, later installed
-	// filter case. It can make indexing much slower (if there are
-	// myriads of such files, the ext script is executed for them
-	// and fails every time)
-	if (fis == FileInterner::FIError) {
-	    doc.sig += plus;
-	}
-
-        // Possibly add fields from local config
-        if (m_havelocalfields) 
-            setlocalfields(doc);
-	// Add document to database. If there is an ipath, add it as a children
-	// of the file document.
-	string udi;
-	make_udi(fn, ipath, udi);
-	if (!m_db.addOrUpdate(udi, ipath.empty() ? "" : parent_udi, doc)) 
-	    return FsTreeWalker::FtwError;
-
-	// Tell what we are doing and check for interrupt request
-	if (m_updater) {
-	    ++(m_updater->status.docsdone);
-            m_updater->status.fn = fn;
-            if (!ipath.empty())
-                m_updater->status.fn += "|" + ipath;
-            if (!m_updater->update()) {
-                return FsTreeWalker::FtwStop;
-            }
-	}
-    }
-
-    // If we had no instance with a null ipath, we create an empty
-    // document to stand for the file itself, to be used mainly for up
-    // to date checks. Typically this happens for an mbox file.
-    if (hadNullIpath == false) {
-	LOGDEB1(("Creating empty doc for file\n"));
-	Rcl::Doc fileDoc;
-	fileDoc.fmtime = ascdate;
-	fileDoc.utf8fn = utf8fn;
-	fileDoc.mimetype = interner.getMimetype();
-	fileDoc.url = string("file://") + fn;
-
-	char cbuf[100]; 
-	sprintf(cbuf, "%ld", (long)stp->st_size);
-	fileDoc.fbytes = cbuf;
-	// Document signature for up to date checks.
-	sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
-	fileDoc.sig = cbuf;
-	if (!m_db.addOrUpdate(parent_udi, "", fileDoc)) 
-	    return FsTreeWalker::FtwError;
-    }
-
-    return FsTreeWalker::FtwOk;
-}
-
-////////////////////////////////////////////////////////////////////////////
-// ConIndexer methods: ConfIndexer is the top-level object, that could
-// in theory index multiple directories to multiple databases. In practise we
-// have a single database per configuration.
+#include "indexer.h"

 ConfIndexer::~ConfIndexer()
 {
-     deleteZ(m_dbindexer);
+     deleteZ(m_fsindexer);
 }

 bool ConfIndexer::index(bool resetbefore)
@ -634,13 +78,13 @@ bool ConfIndexer::index(bool resetbefore)
    // The dbmap now has dbdir as key and directory lists as values.
    // Index each directory group in turn
    for (dbit = dbmap.begin(); dbit != dbmap.end(); dbit++) {
-	m_dbindexer = new DbIndexer(m_config, m_updater);
-	if (!m_dbindexer->indexDb(resetbefore, &dbit->second)) {
-	    deleteZ(m_dbindexer);
+	m_fsindexer = new FsIndexer(m_config, m_updater);
+	if (!m_fsindexer->indexTrees(resetbefore, &dbit->second)) {
+	    deleteZ(m_fsindexer);
 	    m_reason = "Failed indexing in " + dbit->first;
 	    return false;
 	}
-	deleteZ(m_dbindexer);
+	deleteZ(m_fsindexer);
    }
    return true;
 }
--- a/src/index/indexer.h
+++ b/src/index/indexer.h
@ -29,8 +29,7 @@ using std::map;
 #endif

 #include "rclconfig.h"
-#include "fstreewalk.h"
-#include "rcldb.h"
+#include "fsindexer.h"

 /* Forward decl for lower level indexing object */
 class DbIndexer;
@ -71,7 +70,7 @@ class ConfIndexer {
 public:
    enum runStatus {IndexerOk, IndexerError};
    ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0)
-	: m_config(cnf), m_dbindexer(0), m_updater(updfunc)
+	: m_config(cnf), m_fsindexer(0), m_updater(updfunc)
 	{}
    virtual ~ConfIndexer();
    /** Worker function: doe the actual indexing */
@ -79,96 +78,9 @@ class ConfIndexer {
    const string &getReason() {return m_reason;}
 private:
    RclConfig *m_config;
-    DbIndexer *m_dbindexer; // Object to process directories for a given db
+    FsIndexer *m_fsindexer; // Object to process directories for a given db
    DbIxStatusUpdater *m_updater;
    string m_reason;
 };

-/** Index things into one database
- 
-Tree indexing: we inherits FsTreeWalkerCB so that, the processone()
-  method is called by the file-system tree walk code for each file and
-  directory. We keep all state needed while indexing, and finally call
-  the methods to purge the db of stale entries and create the stemming
-  databases.
-
-Single file(s) indexing: no database purging or stem db updating.
-*/
-class DbIndexer : public FsTreeWalkerCB {
- public:
-    /** Constructor does nothing but store parameters 
-     *
-     * @param cnf Configuration data
-     * @param updfunc Status updater callback
-     */
-    DbIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0) 
-	: m_config(cnf), m_db(cnf), m_updater(updfunc)
-    {
-        m_havelocalfields = m_config->hasNameAnywhere("localfields");
-    }
-	
-    virtual ~DbIndexer();
-
-    /** Top level file system tree index method for updating a
-	given database.
-
-	The list is supposed to have all the filename space for the
-	db, and we shall purge entries for non-existing files at the
-	end. We create the temporary directory, open the database,
-	then call a file system walk for each top-level directory.
-	When walking is done, we create the stem databases and close
-	the main db.
-    */
-    bool indexDb(bool resetbefore, std::list<string> *topdirs);
-
-    /** Index a list of files. No db cleaning or stemdb updating */
-    bool indexFiles(const std::list<string> &files);
-
-    /** Purge a list of files. */
-    bool purgeFiles(const std::list<string> &files);
-
-    /** Stemming reset to config: create needed, delete unconfigured */
-    bool createStemmingDatabases();
-
-    /** Create stem database for given language */
-    bool createStemDb(const string &lang);
-
-    /** Create misspelling expansion dictionary if aspell i/f is available */
-    bool createAspellDict();
-
-    /**  Tree walker callback method */
-    FsTreeWalker::Status 
-    processone(const string &, const struct stat *, FsTreeWalker::CbFlag);
-
-    /** Return my db dir */
-    string getDbDir() {return m_config->getDbDir();}
-
-    /** List possible stemmer names */
-    static list<string> getStemmerNames();
-
- private:
-    FsTreeWalker m_walker;
-    RclConfig   *m_config;
-    Rcl::Db      m_db;
-    string       m_tmpdir;
-    DbIxStatusUpdater *m_updater;
-
-    // The configuration can set attribute fields to be inherited by
-    // all files in a file system area. Ie: set "apptag = thunderbird"
-    // inside ~/.thunderbird. The boolean is set at init to avoid
-    // further wasteful processing if no local fields are set.
-    bool         m_havelocalfields;
-    map<string, string> m_localfields;
-
-    bool init(bool rst = false, bool rdonly = false);
-    void localfieldsfromconf();
-    void setlocalfields(Rcl::Doc& doc);
-};
-
-/** Helper methods in recollindex.cpp for initial checks/setup to index 
- * a list of files (either from the monitor or the command line) */
-extern bool indexfiles(RclConfig *config, const list<string> &filenames);
-extern bool purgefiles(RclConfig *config, const list<string> &filenames);
-extern bool createAuxDbs(RclConfig *config);
-
 #endif /* _INDEXER_H_INCLUDED_ */
--- a/src/index/rclmonprc.cpp
+++ b/src/index/rclmonprc.cpp
@ -39,7 +39,7 @@ static char rcsid[] = "@(#$Id: rclmonprc.cpp,v 1.14 2008-11-18 13:25:48 dockes E
 #include "debuglog.h"
 #include "rclmon.h"
 #include "debuglog.h"
-#include "indexer.h"
+#include "recollindex.h"
 #include "pathut.h"
 #include "x11mon.h"

@ -168,8 +168,6 @@ RclConfig *RclMonEventQueue::getConfig()
    return m_data->m_config;
 }

-extern int stopindexing;
-
 bool RclMonEventQueue::ok()
 {
    if (m_data == 0) {
--- a/src/index/recollindex.cpp
+++ b/src/index/recollindex.cpp
@ -42,16 +42,18 @@ using namespace std;
 #include "cancelcheck.h"
 #include "rcldb.h"
 #include "beaglequeue.h"
+#include "recollindex.h"
+#include "fsindexer.h"

 // Globals for atexit cleanup
 static ConfIndexer *confindexer;
-static DbIndexer *dbindexer;
+static FsIndexer *fsindexer;

 // This is set as an atexit routine, 
 static void cleanup()
 {
    deleteZ(confindexer);
-    deleteZ(dbindexer);
+    deleteZ(fsindexer);
 }

 // Global stop request flag. This is checked in a number of place in the
@ -79,11 +81,11 @@ static void sigcleanup(int sig)
    stopindexing = 1;
 }

-static bool makeDbIndexer(RclConfig *config)
+static bool makeFsIndexer(RclConfig *config)
 {
-    if (!dbindexer)
-	dbindexer = new DbIndexer(config, &updater);
-    return dbindexer ? true : false;
+    if (!fsindexer)
+	fsindexer = new FsIndexer(config, &updater);
+    return fsindexer ? true : false;
 }

 // The list of top directories/files wont change during program run,
@ -95,7 +97,7 @@ static list<string> o_tdl;
 //
 // This is called either from the command line or from the monitor. In
 // this case we're called repeatedly in the same process, and the
-// dbIndexer is only created once by makeDbIndexer (but the db is
+// fsindexer is only created once by makeFsIndexer (but the db is
 // flushed anyway)
 bool indexfiles(RclConfig *config, const list<string> &filenames)
 {
@ -139,10 +141,10 @@ bool indexfiles(RclConfig *config, const list<string> &filenames)
    // go:
    config->setKeyDir(path_getfather(*myfiles.begin()));

-    if (!makeDbIndexer(config))
+    if (!makeFsIndexer(config))
 	return false;

-    return dbindexer->indexFiles(myfiles);
+    return fsindexer->indexFiles(myfiles);
 }

 // Delete a list of files. Same comments about call contexts as indexfiles.
@ -173,21 +175,21 @@ bool purgefiles(RclConfig *config, const list<string> &filenames)
    // go:
    config->setKeyDir(path_getfather(*myfiles.begin()));

-    if (!makeDbIndexer(config))
+    if (!makeFsIndexer(config))
 	return false;
-    return dbindexer->purgeFiles(myfiles);
+    return fsindexer->purgeFiles(myfiles);
 }

 // Create stemming and spelling databases
 bool createAuxDbs(RclConfig *config)
 {
-    if (!makeDbIndexer(config))
+    if (!makeFsIndexer(config))
 	return false;

-    if (!dbindexer->createStemmingDatabases())
+    if (!fsindexer->createStemmingDatabases())
 	return false;

-    if (!dbindexer->createAspellDict())
+    if (!fsindexer->createAspellDict())
 	return false;

    return true;
@ -196,9 +198,9 @@ bool createAuxDbs(RclConfig *config)
 // Create additional stem database 
 static bool createstemdb(RclConfig *config, const string &lang)
 {
-    if (!makeDbIndexer(config))
+    if (!makeFsIndexer(config))
        return false;
-    return dbindexer->createStemDb(lang);
+    return fsindexer->createStemDb(lang);
 }

 static const char *thisprog;
@ -352,7 +354,7 @@ int main(int argc, const char **argv)
    } else if (op_flags & OPT_l) {
 	if (argc != 0) 
 	    Usage();
-	list<string> stemmers = DbIndexer::getStemmerNames();
+	list<string> stemmers = FsIndexer::getStemmerNames();
 	for (list<string>::const_iterator it = stemmers.begin(); 
 	     it != stemmers.end(); it++) {
 	    cout << *it << endl;
@ -395,9 +397,9 @@ int main(int argc, const char **argv)

 #ifdef RCL_USE_ASPELL
    } else if (op_flags & OPT_S) {
-	if (!makeDbIndexer(config))
+	if (!makeFsIndexer(config))
            exit(1);
-        exit(!dbindexer->createAspellDict());
+        exit(!fsindexer->createAspellDict());
 #endif // ASPELL
    } else if (op_flags & OPT_b) {
        BeagleQueueIndexer beagler(config);
--- a/src/index/recollindex.h
+++ b/src/index/recollindex.h
@ -0,0 +1,29 @@
+/*
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the
+ *   Free Software Foundation, Inc.,
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+#ifndef _recollindex_h_included_
+#define _recollindex_h_included_
+/* @(#$Id: $  (C) 2009 J.F.Dockes */
+
+/** Helper methods in recollindex.cpp for initial checks/setup to index 
+ * a list of files (either from the monitor or the command line) */
+extern bool indexfiles(RclConfig *config, const list<string> &filenames);
+extern bool purgefiles(RclConfig *config, const list<string> &filenames);
+extern bool createAuxDbs(RclConfig *config);
+
+extern int stopindexing;
+
+#endif /* _recollindex_h_included_ */