From 4503971dd0228607239211c20a82481b77e596d2 Mon Sep 17 00:00:00 2001
From: dockes <none@none>
Date: Fri, 13 Nov 2009 09:07:18 +0000
Subject: [PATCH] integrate beaglequeueindexer for indexing. Work remains for
 indexfiles() at least

---
 src/index/beaglequeue.cpp | 233 +++++++++++++++++++++++++++++++++-----
 src/index/beaglequeue.h   |  19 +++-
 src/index/fsindexer.cpp   |   3 +-
 src/index/fsindexer.h     |   2 +-
 src/index/indexer.cpp     | 119 +++++++++++++++----
 src/index/indexer.h       |  36 +++---
 src/index/recollindex.cpp |  99 ++++------------
 7 files changed, 357 insertions(+), 154 deletions(-)
diff --git a/src/index/beaglequeue.cpp b/src/index/beaglequeue.cpp
index e323d59b..a4911876 100644
--- a/src/index/beaglequeue.cpp
+++ b/src/index/beaglequeue.cpp
@@ -17,6 +17,10 @@
 #ifndef lint
 static char rcsid[] = "@(#$Id: $ (C) 2005 J.F.Dockes";
 #endif
+#include "autoconfig.h"
+
+#include <sys/types.h>
+
 #include "autoconfig.h"
 #include "pathut.h"
 #include "debuglog.h"
@@ -27,9 +31,14 @@ static char rcsid[] = "@(#$Id: $ (C) 2005 J.F.Dockes";
 #include "internfile.h"
 #include "wipedir.h"
 #include "circache.h"
+#include "indexer.h"
+#include "readfile.h"
+#include "conftree.h"
+#include "transcode.h"
 
 #include <vector>
 #include <fstream>
+#include <sstream>
 using namespace std;
 
 #include <sys/stat.h>
@@ -42,9 +51,7 @@ class BeagleDotFile {
 public:
     BeagleDotFile(RclConfig *conf, const string& fn)
         : m_conf(conf), m_fn(fn)
-    {
-
-    }
+    { }
 
     bool readLine(string& line)
     {
@@ -92,13 +99,20 @@ public:
             return false;
         doc.mimetype = line;
 
-        if (doc.mimetype.empty() && 
-            !stringlowercmp("bookmark", doc.meta[keybght]))
-            doc.mimetype = "text/plain";
+        // We set the bookmarks mtype as html, the text is empty
+        // anyway, so that the html viewer will be called on 'Open'
+        bool isbookmark = false;
+        if (!stringlowercmp("bookmark", doc.meta[keybght])) {
+            isbookmark = true;
+            doc.mimetype = "text/html";
+        }
 
         string confstr;
         string ss(" ");
-        // Read the rest: fields and keywords
+        // Read the rest: fields and keywords. We do a little
+        // massaging of the input lines, then use a ConfSimple to
+        // parse, and finally insert the key/value pairs into the doc
+        // meta[] array
         for (;;) {
             if (!readLine(line)) {
                 // Eof hopefully
@@ -109,7 +123,6 @@ public:
             line = line.substr(2);
             confstr += line + "\n";
         }
-
         ConfSimple fields(confstr, 1);
         list<string> names = fields.getNames("");
         for (list<string>::iterator it = names.begin();
@@ -118,23 +131,50 @@ public:
             fields.get(*it, value, "");
             if (!value.compare("undefined") || !value.compare("null"))
                 continue;
+
+            string *valuep = &value;
+            string cvalue;
+            if (isbookmark) {
+                // It appears that bookmarks are stored in the users'
+                // locale charset (not too sure). No idea what to do
+                // for other types, would have to check the plugin.
+                string charset = m_conf->getDefCharset(true);
+                transcode(value, cvalue, charset,  "UTF-8"); 
+                valuep = &cvalue;
+            }
+                
             string caname = m_conf->fieldCanon(*it);
-            doc.meta[caname].append(ss + value);
+            doc.meta[caname].append(ss + *valuep);
         }
+
+        // Finally build the confsimple that we will save to the
+        // cache, out of document fields. This could also be done in
+        // parallel with the doc.meta build above, but simpler this way.
+        for (map<string,string>::const_iterator it = doc.meta.begin();
+             it != doc.meta.end(); it++) {
+            m_fields.set((*it).first, (*it).second, "");
+        }
+        m_fields.set("url", doc.url, "");
+        m_fields.set("mimetype", doc.mimetype, "");
+
         return true;
     }    
 
     RclConfig *m_conf;
+    ConfSimple m_fields;
     string m_fn;
     ifstream m_input;
 };
 
 const string badtmpdirname = "/no/such/dir/really/can/exist";
-BeagleQueueIndexer::BeagleQueueIndexer(RclConfig *cnf)
-    : m_config(cnf), m_db(cnf)
+BeagleQueueIndexer::BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db,
+                                       DbIxStatusUpdater *updfunc)
+    : m_config(cnf), m_db(db), m_cache(0), m_updater(updfunc)
 {
+
     if (!m_config->getConfParam("beaglequeuedir", m_queuedir))
         m_queuedir = path_tildexpand("~/.beagle/ToIndex");
+
     if (m_tmpdir.empty() || access(m_tmpdir.c_str(), 0) < 0) {
 	string reason;
         if (!maketmpdir(m_tmpdir, reason)) {
@@ -143,12 +183,20 @@ BeagleQueueIndexer::BeagleQueueIndexer(RclConfig *cnf)
             m_tmpdir = badtmpdirname;
 	}
     }
-    Rcl::Db::OpenMode mode = Rcl::Db::DbUpd;
-    if (!m_db.open(mode)) {
-	LOGERR(("BeagleQueueIndexer: error opening database %s\n", 
-                m_config->getDbDir().c_str()));
-	return;
-    }
+
+    string ccdir;
+    m_config->getConfParam("webcachedir", ccdir);
+    if (ccdir.empty())
+        ccdir = "webcache";
+    ccdir = path_tildexpand(ccdir);
+    // If not an absolute path, compute relative to config dir
+    if (ccdir.at(0) != '/')
+        ccdir = path_cat(m_config->getConfDir(), ccdir);
+
+    int maxmbs = 20;
+    m_config->getConfParam("webcachemaxmbs", &maxmbs);
+    m_cache = new CirCache(ccdir);
+    m_cache->create(off_t(maxmbs)*1000*1024, true);
 }
 
 BeagleQueueIndexer::~BeagleQueueIndexer()
@@ -161,13 +209,106 @@ BeagleQueueIndexer::~BeagleQueueIndexer()
 		    m_tmpdir.c_str()));
 	}
     }
-    m_db.close();
+    deleteZ(m_cache);
 }
 
-bool BeagleQueueIndexer::processqueue()
+bool BeagleQueueIndexer::indexFromCache(const string& udi)
+{
+    string dict, data;
+
+    // This is horribly inefficient and needs fixing either by saving
+    // the offsets during the forward scan, or using an auxiliary isam
+    // map
+    if (!m_cache->get(udi, dict, data))
+        return false;
+
+    ConfSimple cf(dict, 1);
+
+    string hittype;
+    if (!cf.get(keybght, hittype, "")) {
+        LOGERR(("BeagleIndexer::index: cc entry has no hit type\n"));
+        return false;
+    }
+
+    // Build a doc from saved metadata 
+    Rcl::Doc dotdoc;
+    cf.get("url", dotdoc.url, "");
+    cf.get("mimetype", dotdoc.mimetype, "");
+    cf.get("fmtime", dotdoc.fmtime, "");
+    cf.get("fbytes", dotdoc.fbytes, "");
+    dotdoc.sig = "";
+    list<string> names = cf.getNames("");
+    for (list<string>::const_iterator it = names.begin();
+         it != names.end(); it++) {
+        cf.get(*it, dotdoc.meta[*it], "");
+    }
+
+    if (!stringlowercmp("bookmark", hittype)) {
+        // Just index the dotdoc
+        return m_db->addOrUpdate(udi, "", dotdoc);
+    } else if (stringlowercmp("webhistory", dotdoc.meta[keybght]) ||
+               (dotdoc.mimetype.compare("text/html") &&
+                dotdoc.mimetype.compare("text/plain"))) {
+        LOGDEB(("BeagleQueueIndexer: skipping: hittype %s mimetype %s\n",
+                dotdoc.meta[keybght].c_str(), dotdoc.mimetype.c_str()));
+        return true;
+    } else {
+        Rcl::Doc doc;
+        FileInterner interner(data, m_config, m_tmpdir, 
+                              FileInterner::FIF_doUseInputMimetype,
+                              dotdoc.mimetype);
+        string ipath;
+        FileInterner::Status fis = interner.internfile(doc, ipath);
+        if (fis != FileInterner::FIDone) {
+            LOGERR(("BeagleQueueIndexer: bad status from internfile\n"));
+            return false;
+        }
+
+        doc.mimetype = dotdoc.mimetype;
+        doc.fmtime = dotdoc.fmtime;
+        doc.url = dotdoc.url;
+        doc.fbytes = dotdoc.fbytes;
+        doc.sig = "";
+
+        return m_db->addOrUpdate(udi, "", doc);
+    }
+}
+
+bool BeagleQueueIndexer::index()
 {
     LOGDEB(("BeagleQueueIndexer::processqueue: dir: [%s]\n", 
             m_queuedir.c_str()));
+    m_config->setKeyDir(m_queuedir);
+
+    // First walk the cache to set the existence flags. We do not
+    // actually check uptodateness because all files in the cache are
+    // supposedly already indexed.
+    //TBD: change this as the cache needs reindexing after an index reset!
+    // Also, we need to read the cache backwards so that the newest
+    // version of each file gets indexed? Or find a way to index
+    // multiple versions ?
+    bool eof;
+    if (!m_cache->rewind(eof)) {
+        if (!eof)
+            return false;
+    }
+    vector<string> alludis;
+    alludis.reserve(20000);
+    while (m_cache->next(eof)) {
+        string dict;
+        m_cache->getcurrentdict(dict);
+        ConfSimple cf(dict, 1);
+        string udi;
+        if (!cf.get("udi", udi, ""))
+            continue;
+        alludis.push_back(udi);
+    }
+    for (vector<string>::reverse_iterator it = alludis.rbegin();
+         it != alludis.rend(); it++) {
+        if (m_db->needUpdate(*it, "")) {
+            indexFromCache(*it);
+        }
+    }
 
     FsTreeWalker walker(FsTreeWalker::FtwNoRecurse);
     walker.addSkippedName(".*");
@@ -181,12 +322,15 @@ BeagleQueueIndexer::processone(const string &path,
                                const struct stat *stp,
                                FsTreeWalker::CbFlag flg)
 {
+    bool dounlink = false;
+
     if (flg != FsTreeWalker::FtwRegular) 
         return FsTreeWalker::FtwOk;
 
     string dotpath = path_cat(path_getfather(path), 
                               string(".") + path_getsimple(path));
     LOGDEB(("BeagleQueueIndexer: prc1: [%s]\n", path.c_str()));
+
     BeagleDotFile dotfile(m_config, dotpath);
     Rcl::Doc dotdoc;
     string udi, udipath;
@@ -205,12 +349,32 @@ BeagleQueueIndexer::processone(const string &path,
 
     // We only process bookmarks or text/html and text/plain files.
     if (!stringlowercmp("bookmark", dotdoc.meta[keybght])) {
+        // For bookmarks, we just index the doc that was built from the
+        // metadata.
+        if (dotdoc.fmtime.empty())
+            dotdoc.fmtime = ascdate;
+
+        char cbuf[100]; 
+        sprintf(cbuf, "%ld", (long)stp->st_size);
+        dotdoc.fbytes = cbuf;
+
+        // Document signature for up to date checks: none. 
+        dotdoc.sig = "";
+        
+        // doc fields not in meta, needing saving to the cache
+        dotfile.m_fields.set("fmtime", dotdoc.fmtime, "");
+        dotfile.m_fields.set("fbytes", dotdoc.fbytes, "");
+
+        if (!m_db->addOrUpdate(udi, "", dotdoc)) 
+            return FsTreeWalker::FtwError;
 
     } else if (stringlowercmp("webhistory", dotdoc.meta[keybght]) ||
                (dotdoc.mimetype.compare("text/html") &&
                 dotdoc.mimetype.compare("text/plain"))) {
         LOGDEB(("BeagleQueueIndexer: skipping: hittype %s mimetype %s\n",
                 dotdoc.meta[keybght].c_str(), dotdoc.mimetype.c_str()));
+        // Unlink them anyway
+        dounlink = true;
         goto out;
     } else {
         Rcl::Doc doc;
@@ -230,17 +394,34 @@ BeagleQueueIndexer::processone(const string &path,
         char cbuf[100]; 
         sprintf(cbuf, "%ld", (long)stp->st_size);
         doc.fbytes = cbuf;
-        // Document signature for up to date checks: none. The file is
-        // going to be deleted anyway. We always reindex what comes in
-        // the queue.  It would probably be possible to extract some
-        // http data to avoid this.
+        // Document signature for up to date checks: none. 
         doc.sig = "";
         doc.url = dotdoc.url;
-        if (!m_db.addOrUpdate(udi, "", doc)) 
+
+        // doc fields not in meta, needing saving to the cache
+        dotfile.m_fields.set("fmtime", dotdoc.fmtime, "");
+        dotfile.m_fields.set("fbytes", dotdoc.fbytes, "");
+
+        if (!m_db->addOrUpdate(udi, "", doc)) 
             return FsTreeWalker::FtwError;
+
     }
+
+    // Copy to cache
+    {
+        stringstream o;
+        dotfile.m_fields.write(o);
+        string fdata;
+        file_to_string(path, fdata);
+        if (!m_cache->put(udi, o.str(), fdata))
+            goto out;
+    }
+
+    dounlink = true;
 out:
-//    unlink(path.c_str());
-//    unlink(dotpath.c_str());
+    if (dounlink) {
+        unlink(path.c_str());
+        unlink(dotpath.c_str());
+    }
     return FsTreeWalker::FtwOk;
 }
diff --git a/src/index/beaglequeue.h b/src/index/beaglequeue.h
index 55640eb5..31447344 100644
--- a/src/index/beaglequeue.h
+++ b/src/index/beaglequeue.h
@@ -28,21 +28,30 @@
 #include "fstreewalk.h"
 #include "rcldb.h"
 
+class DbIxStatusUpdater;
+class CirCache;
+
 class BeagleQueueIndexer : public FsTreeWalkerCB {
 public:
-    BeagleQueueIndexer(RclConfig *cnf);
+    BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db, 
+                       DbIxStatusUpdater *updfunc = 0);
     ~BeagleQueueIndexer();
     
-    bool processqueue();
+    bool index();
 
     FsTreeWalker::Status 
     processone(const string &, const struct stat *, FsTreeWalker::CbFlag);
 
 private:
     RclConfig *m_config;
-    Rcl::Db m_db;
-    string  m_queuedir;
-    string  m_tmpdir;
+    Rcl::Db   *m_db;
+    CirCache  *m_cache;
+    string     m_queuedir;
+    string     m_tmpdir;
+    DbIxStatusUpdater *m_updater;
+
+    bool indexFromCache(const string& udi);
+
 };
 
 #endif /* _beaglequeue_h_included_ */
diff --git a/src/index/fsindexer.cpp b/src/index/fsindexer.cpp
index f4e97e63..bdffe09f 100644
--- a/src/index/fsindexer.cpp
+++ b/src/index/fsindexer.cpp
@@ -90,7 +90,7 @@ bool FsIndexer::init()
 }
 
 // Recursively index each directory in the topdirs:
-bool FsIndexer::index(bool resetbefore)
+bool FsIndexer::index()
 {
     list<string> topdirs = m_config->getTopdirs();
     if (topdirs.empty()) {
@@ -376,7 +376,6 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
         // We'll change the signature to ensure that the indexing will
         // be retried every time.
 
-
 	// Internal access path for multi-document files
 	if (ipath.empty())
 	    hadNullIpath = true;
diff --git a/src/index/fsindexer.h b/src/index/fsindexer.h
index c9d94688..b72fd3e4 100644
--- a/src/index/fsindexer.h
+++ b/src/index/fsindexer.h
@@ -55,7 +55,7 @@ class FsIndexer : public FsTreeWalkerCB {
      * We create the temporary directory, open the database,
      * then call a file system walk for each top-level directory.
      */
-    bool index(bool resetbefore);
+    bool index();
 
     /** Index a list of files. No db cleaning or stemdb updating */
     bool indexFiles(const std::list<string> &files);
diff --git a/src/index/indexer.cpp b/src/index/indexer.cpp
index 2fb42b5a..281aa781 100644
--- a/src/index/indexer.cpp
+++ b/src/index/indexer.cpp
@@ -28,37 +28,63 @@ static char rcsid[] = "@(#$Id: indexer.cpp,v 1.71 2008-12-17 08:01:40 dockes Exp
 
 #include "debuglog.h"
 #include "indexer.h"
+#include "fsindexer.h"
+#include "beaglequeue.h"
+
 #ifdef RCL_USE_ASPELL
 #include "rclaspell.h"
 #endif
 
+ConfIndexer::ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc)
+    : m_config(cnf), m_db(cnf), m_fsindexer(0), 
+      m_dobeagle(false), m_beagler(0),
+      m_updater(updfunc)
+{
+    m_config->getConfParam("processbeaglequeue", &m_dobeagle);
+}
+
 ConfIndexer::~ConfIndexer()
 {
      deleteZ(m_fsindexer);
+     deleteZ(m_beagler);
 }
 
-bool ConfIndexer::index(bool resetbefore)
+bool ConfIndexer::index(bool resetbefore, ixType typestorun)
 {
     Rcl::Db::OpenMode mode = resetbefore ? Rcl::Db::DbTrunc : Rcl::Db::DbUpd;
     if (!m_db.open(mode)) {
-	LOGERR(("ConfIndexer: error opening database %s\n", 
-                m_config->getDbDir().c_str()));
+	LOGERR(("ConfIndexer: error opening database %s : %s\n", 
+                m_config->getDbDir().c_str(), m_db.getReason().c_str()));
 	return false;
     }
 
     m_config->setKeyDir("");
-    m_fsindexer = new FsIndexer(m_config, &m_db, m_updater);
-    bool ret = m_fsindexer->index(resetbefore);
-    deleteZ(m_fsindexer);
-
-    if (m_updater) {
-	m_updater->status.fn.erase();
-	m_updater->status.phase = DbIxStatus::DBIXS_PURGE;
-	m_updater->update();
+    if (typestorun & IxTFs) {
+        deleteZ(m_fsindexer);
+        m_fsindexer = new FsIndexer(m_config, &m_db, m_updater);
+        if (!m_fsindexer || !m_fsindexer->index()) {
+            return false;
+        }
+    }
+
+    if (m_dobeagle && (typestorun & IxTBeagleQueue)) {
+        deleteZ(m_beagler);
+        m_beagler = new BeagleQueueIndexer(m_config, &m_db, m_updater);
+        if (!m_beagler || !m_beagler->index()) {
+            return false;
+        }
+    }
+
+    if (typestorun == IxTAll) {
+        // Get rid of all database entries that don't exist in the
+        // filesystem anymore. Only if all *configured* indexers ran.
+        if (m_updater) {
+            m_updater->status.fn.erase();
+            m_updater->status.phase = DbIxStatus::DBIXS_PURGE;
+            m_updater->update();
+        }
+        m_db.purge();
     }
-    // Get rid of all database entries that don't exist in the
-    // filesystem anymore.
-    m_db.purge();
 
     if (m_updater) {
 	m_updater->status.phase = DbIxStatus::DBIXS_CLOSING;
@@ -78,17 +104,55 @@ bool ConfIndexer::index(bool resetbefore)
     return true;
 }
 
+bool ConfIndexer::initTopDirs()
+{
+    if (m_tdl.empty()) {
+	m_tdl = m_config->getTopdirs();
+	if (m_tdl.empty()) {
+	    m_reason = "Top directory list (topdirs param.) "
+		    "not found in config or Directory list parse error";
+	    return false;
+	}
+    }
+    return true;
+}
+
 bool ConfIndexer::indexFiles(const std::list<string> &files)
 {
+    if (!initTopDirs())
+        return false;
+
+    list<string> myfiles;
+    for (list<string>::const_iterator it = files.begin(); 
+	 it != files.end(); it++) {
+	string fn = path_canon(*it);
+	bool ok = false;
+	// Check that this file name belongs to one of our subtrees
+	for (list<string>::iterator dit = m_tdl.begin(); 
+	     dit != m_tdl.end(); dit++) {
+	    if (fn.find(*dit) == 0) {
+		myfiles.push_back(fn);
+		ok = true;
+		break;
+	    }
+	}
+	if (!ok) {
+	    m_reason += string("File ") + fn + string(" not in indexed area\n");
+	}
+    }
+    if (myfiles.empty())
+	return true;
+
     if (!m_db.open(Rcl::Db::DbUpd)) {
 	LOGERR(("ConfIndexer: indexFiles error opening database %s\n", 
                 m_config->getDbDir().c_str()));
 	return false;
     }
     m_config->setKeyDir("");
-    m_fsindexer = new FsIndexer(m_config, &m_db, m_updater);
+    if (!m_fsindexer)
+        m_fsindexer = new FsIndexer(m_config, &m_db, m_updater);
     bool ret = m_fsindexer->indexFiles(files);
-    deleteZ(m_fsindexer);
+
     // The close would be done in our destructor, but we want status here
     if (!m_db.close()) {
 	LOGERR(("ConfIndexer::index: error closing database in %s\n", 
@@ -100,15 +164,26 @@ bool ConfIndexer::indexFiles(const std::list<string> &files)
 
 bool ConfIndexer::purgeFiles(const std::list<string> &files)
 {
+    if (!initTopDirs())
+        return false;
+
+    list<string> myfiles;
+    for (list<string>::const_iterator it = files.begin(); 
+	 it != files.end(); it++) {
+	myfiles.push_back(path_canon(*it));
+    }
+
     if (!m_db.open(Rcl::Db::DbUpd)) {
 	LOGERR(("ConfIndexer: purgeFiles error opening database %s\n", 
                 m_config->getDbDir().c_str()));
 	return false;
     }
+
     m_config->setKeyDir("");
-    m_fsindexer = new FsIndexer(m_config, &m_db, m_updater);
+    if (!m_fsindexer)
+        m_fsindexer = new FsIndexer(m_config, &m_db, m_updater);
     bool ret = m_fsindexer->purgeFiles(files);
-    deleteZ(m_fsindexer);
+
     // The close would be done in our destructor, but we want status here
     if (!m_db.close()) {
 	LOGERR(("ConfIndexer::index: error closing database in %s\n", 
@@ -159,7 +234,7 @@ bool ConfIndexer::createStemDb(const string &lang)
 // module, either from a configuration variable or the NLS environment.
 bool ConfIndexer::createAspellDict()
 {
-    LOGDEB2(("FsIndexer::createAspellDict()\n"));
+    LOGDEB2(("ConfIndexer::createAspellDict()\n"));
 #ifdef RCL_USE_ASPELL
     // For the benefit of the real-time indexer, we only initialize
     // noaspell from the configuration once. It can then be set to
@@ -180,14 +255,14 @@ bool ConfIndexer::createAspellDict()
     Aspell aspell(m_config);
     string reason;
     if (!aspell.init(reason)) {
-	LOGERR(("FsIndexer::createAspellDict: aspell init failed: %s\n", 
+	LOGERR(("ConfIndexer::createAspellDict: aspell init failed: %s\n", 
 		reason.c_str()));
 	noaspell = true;
 	return false;
     }
-    LOGDEB(("FsIndexer::createAspellDict: creating dictionary\n"));
+    LOGDEB(("ConfIndexer::createAspellDict: creating dictionary\n"));
     if (!aspell.buildDict(m_db, reason)) {
-	LOGERR(("FsIndexer::createAspellDict: aspell buildDict failed: %s\n", 
+	LOGERR(("ConfIndexer::createAspellDict: aspell buildDict failed: %s\n", 
 		reason.c_str()));
 	noaspell = true;
 	return false;
diff --git a/src/index/indexer.h b/src/index/indexer.h
index 2f09abfe..fc37036b 100644
--- a/src/index/indexer.h
+++ b/src/index/indexer.h
@@ -29,10 +29,10 @@ using std::map;
 #endif
 
 #include "rclconfig.h"
-#include "fsindexer.h"
+#include "rcldb.h"
 
-/* Forward decl for lower level indexing object */
-class DbIndexer;
+class FsIndexer;
+class BeagleQueueIndexer;
 
 class DbIxStatus {
  public:
@@ -55,27 +55,20 @@ class DbIxStatusUpdater {
 };
 
 /**
-   The top level indexing object. Processes the configuration, then invokes
-   file system walking to populate/update the database(s).
-
-   Fiction:
-      Multiple top-level directories can be listed in the
-      configuration. Each can be indexed to a different
-      database. Directories are first grouped by database, then an
-      internal class (DbIndexer) is used to process each group.
-   Fact: we've had one db per config forever. The multidb/config code has been 
-   kept around for no good reason, this fiction only affects indexer.cpp
+ * The top level indexing object. Processes the configuration, then invokes
+ * file system walking or other to populate/update the database(s).
 */
 class ConfIndexer {
  public:
     enum runStatus {IndexerOk, IndexerError};
-    ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0)
-	: m_config(cnf), m_db(cnf), m_fsindexer(0), m_updater(updfunc)
-	{}
+    ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0);
     virtual ~ConfIndexer();
 
-    /** Worker function: doe the actual indexing */
-    bool index(bool resetbefore = false);
+    // Indexer types. Maybe we'll have something more dynamic one day
+    enum ixType {IxTNone, IxTFs=1, IxTBeagleQueue=2, 
+                 IxTAll = IxTFs | IxTBeagleQueue};
+    /** Run indexers */
+    bool index(bool resetbefore, ixType typestorun);
 
     const string &getReason() {return m_reason;}
 
@@ -101,8 +94,13 @@ class ConfIndexer {
     RclConfig *m_config;
     Rcl::Db    m_db;
     FsIndexer *m_fsindexer; 
-    DbIxStatusUpdater *m_updater;
+    bool                m_dobeagle;
+    BeagleQueueIndexer *m_beagler; 
+    DbIxStatusUpdater  *m_updater;
     string m_reason;
+    list<string> m_tdl;
+
+    bool initTopDirs();
 };
 
 #endif /* _INDEXER_H_INCLUDED_ */
diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp
index 950b5a27..3ffae840 100644
--- a/src/index/recollindex.cpp
+++ b/src/index/recollindex.cpp
@@ -83,66 +83,27 @@ static bool makeIndexer(RclConfig *config)
 {
     if (!confindexer)
 	confindexer = new ConfIndexer(config, &updater);
-    return confindexer ? true : false;
+    if (!confindexer) {
+        cerr << "Cannot create indexer" << endl;
+        exit(1);
+    }
+    return true;
 }
 
-// The list of top directories/files wont change during program run,
-// let's cache it:
-static list<string> o_tdl;
-
 // Index a list of files. We just check that they belong to one of the
 // topdirs subtrees, and call the indexer method. 
 //
 // This is called either from the command line or from the monitor. In
 // this case we're called repeatedly in the same process, and the
-// confindexer is only created once by makeIndexer (but the db is
-// flushed anyway)
+// confindexer is only created once by makeIndexer (but the db closed and
+// flushed every time)
 bool indexfiles(RclConfig *config, const list<string> &filenames)
 {
     if (filenames.empty())
 	return true;
-    
-    if (o_tdl.empty()) {
-	o_tdl = config->getTopdirs();
-	if (o_tdl.empty()) {
-	    fprintf(stderr, "Top directory list (topdirs param.) "
-		    "not found in config or Directory list parse error");
-	    return false;
-	}
-    }
-
-    list<string> myfiles;
-    for (list<string>::const_iterator it = filenames.begin(); 
-	 it != filenames.end(); it++) {
-	string fn = path_canon(*it);
-	bool ok = false;
-	// Check that this file name belongs to one of our subtrees
-	for (list<string>::iterator dit = o_tdl.begin(); 
-	     dit != o_tdl.end(); dit++) {
-	    if (fn.find(*dit) == 0) {
-		myfiles.push_back(fn);
-		ok = true;
-		break;
-	    }
-	}
-	if (!ok) {
-	    fprintf(stderr, "File %s not in indexed area\n", fn.c_str());
-	}
-    }
-    if (myfiles.empty())
-	return true;
-
-    // Note: we should sort the file names against the topdirs here
-    // and check for different databases. But we can for now only have
-    // one database per config, so we set the keydir from the first
-    // file (which is not really needed...), create the indexer/db and
-    // go:
-    config->setKeyDir(path_getfather(*myfiles.begin()));
-
     if (!makeIndexer(config))
 	return false;
-
-    return confindexer->indexFiles(myfiles);
+    return confindexer->indexFiles(filenames);
 }
 
 // Delete a list of files. Same comments about call contexts as indexfiles.
@@ -150,32 +111,9 @@ bool purgefiles(RclConfig *config, const list<string> &filenames)
 {
     if (filenames.empty())
 	return true;
-    
-    if (o_tdl.empty()) {
-	o_tdl = config->getTopdirs();
-	if (o_tdl.empty()) {
-	    fprintf(stderr, "Top directory list (topdirs param.) "
-		    "not found in config or Directory list parse error");
-	    return false;
-	}
-    }
-
-    list<string> myfiles;
-    for (list<string>::const_iterator it = filenames.begin(); 
-	 it != filenames.end(); it++) {
-	myfiles.push_back(path_canon(*it));
-    }
-
-    // Note: we should sort the file names against the topdirs here
-    // and check for different databases. But we can for now only have
-    // one database per config, so we set the keydir from the first
-    // file (which is not really needed...), create the indexer/db and
-    // go:
-    config->setKeyDir(path_getfather(*myfiles.begin()));
-
     if (!makeIndexer(config))
 	return false;
-    return confindexer->purgeFiles(myfiles);
+    return confindexer->purgeFiles(filenames);
 }
 
 // Create stemming and spelling databases
@@ -343,12 +281,14 @@ int main(int argc, const char **argv)
 		filenames.push_back(*argv++);
 	    }
 	}
-
+        bool status;
 	if (op_flags & OPT_i)
-	    exit(!indexfiles(config, filenames));
+	    status = indexfiles(config, filenames);
 	else 
-	    exit(!purgefiles(config, filenames));
-
+	    status = purgefiles(config, filenames);
+        if (!confindexer->getReason().empty())
+            cerr << confindexer->getReason() << endl;
+        exit(status ? 0 : 1);
     } else if (op_flags & OPT_l) {
 	if (argc != 0) 
 	    Usage();
@@ -400,14 +340,15 @@ int main(int argc, const char **argv)
         exit(!confindexer->createAspellDict());
 #endif // ASPELL
     } else if (op_flags & OPT_b) {
-        BeagleQueueIndexer beagler(config);
-        bool status = beagler.processqueue();
-        return !status;
+        cerr << "Not yet" << endl;
+        return 1;
     } else {
 	confindexer = new ConfIndexer(config, &updater);
-	bool status = confindexer->index(rezero);
+	bool status = confindexer->index(rezero, ConfIndexer::IxTAll);
 	if (!status) 
 	    cerr << "Indexing failed" << endl;
+        if (!confindexer->getReason().empty())
+            cerr << confindexer->getReason() << endl;
 	return !status;
     }
 }