From 5ca462cdff19d971cd454a2d2bbaf508a3751cc3 Mon Sep 17 00:00:00 2001
From: dockes <none@none>
Date: Tue, 14 Dec 2004 17:54:16 +0000
Subject: [PATCH] *** empty log message ***

---
 src/common/rclconfig.cpp  |  64 +++++++++++++++
 src/common/rclconfig.h    |  45 +++++++++++
 src/common/textsplit.cpp  | 114 +++++++++++++++-----------
 src/common/textsplit.h    |  31 +++++++
 src/index/indexer.h       |  23 ++++++
 src/index/mimetype.cpp    |   7 +-
 src/index/mimetype.h      |   3 +-
 src/index/recollindex.cpp | 166 +++++++++++++++++++++++++++++++++-----
 src/rcldb/rcldb.cpp       | 121 +++++++++++++++++++++++++++
 src/rcldb/rcldb.h         |  32 ++++++++
 src/utils/execmd.cpp      |   7 +-
 src/utils/pathut.cpp      |  25 +++++-
 src/utils/pathut.h        |   3 +-
 src/utils/readfile.cpp    |  49 +++++++++++
 src/utils/readfile.h      |  13 +++
 15 files changed, 626 insertions(+), 77 deletions(-)
 create mode 100644 src/common/rclconfig.cpp
 create mode 100644 src/common/rclconfig.h
 create mode 100644 src/common/textsplit.h
 create mode 100644 src/index/indexer.h
 create mode 100644 src/rcldb/rcldb.cpp
 create mode 100644 src/rcldb/rcldb.h
 create mode 100644 src/utils/readfile.cpp
 create mode 100644 src/utils/readfile.h
diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp
new file mode 100644
index 00000000..bd2f1cdc
--- /dev/null
+++ b/src/common/rclconfig.cpp
@@ -0,0 +1,64 @@
+#ifndef lint
+static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes";
+#endif
+
+#include <iostream>
+
+#include "rclconfig.h"
+#include "pathut.h"
+#include "conftree.h"
+
+using namespace std;
+
+ConfTree *getConfig()
+{
+}
+
+RclConfig::RclConfig()
+    : m_ok(false), conf(0), mimemap(0), mimeconf(0)
+{
+    const char *cp = getenv("RECOLL_CONFDIR");
+    if (cp) {
+	confdir = cp;
+    } else {
+	confdir = path_home();
+	confdir += ".recoll/";
+    }
+    string cfilename = confdir;
+    path_cat(cfilename, "recoll.conf");
+
+    // Maybe we should try to open readonly here as, else, this will 
+    // casually create a configuration file
+    conf = new ConfTree(cfilename.c_str(), 0);
+    if (conf == 0) {
+	cerr << "No configuration" << endl;
+	return;
+    }
+
+    string mimemapfile;
+    if (!conf->get("mimemapfile", mimemapfile, "")) {
+	mimemapfile = "mimemap";
+    }
+    string mpath  = confdir;
+    path_cat(mpath, mimemapfile);
+    mimemap = new ConfTree(mpath.c_str());
+    if (mimemap == 0) {
+	cerr << "No mime map file" << endl;
+	return;
+    }
+    string mimeconffile;
+    if (!conf->get("mimeconffile", mimeconffile, "")) {
+	mimeconffile = "mimeconf";
+    }
+    mpath = confdir;
+
+    path_cat(mpath, mimeconffile);
+    mimeconf = new ConfTree(mpath.c_str());
+    if (mimeconf == 0) {
+	cerr << "No mime conf file" << endl;
+	return;
+    }
+    mimeconf->list();
+    m_ok = true;
+    return;
+}
diff --git a/src/common/rclconfig.h b/src/common/rclconfig.h
new file mode 100644
index 00000000..32aacdd0
--- /dev/null
+++ b/src/common/rclconfig.h
@@ -0,0 +1,45 @@
+#ifndef _RCLCONFIG_H_INCLUDED_
+#define _RCLCONFIG_H_INCLUDED_
+/* @(#$Id: rclconfig.h,v 1.1 2004-12-14 17:50:28 dockes Exp $  (C) 2004 J.F.Dockes */
+
+#include "conftree.h"
+
+class RclConfig {
+    int m_ok;
+    string confdir; // Directory where the files are stored
+    ConfTree *conf; // Parsed main configuration
+    string keydir;  // Current directory used for parameter fetches.
+    string defcharset; // These are stored locally to avoid a config lookup
+    string deflang;    // each time.
+    // Note: this will have to change if/when we support per directory maps
+    ConfTree *mimemap;
+    ConfTree *mimeconf;
+ public:
+    RclConfig();
+    ~RclConfig() {delete conf;delete mimemap;delete mimeconf;}
+    bool ok() {return m_ok;}
+    ConfTree *getConfig() {return m_ok ? conf : 0;}
+    ConfTree *getMimeMap() {return m_ok ? mimemap : 0;}
+    ConfTree *getMimeConf() {return m_ok ? mimeconf : 0;}
+    bool getConfParam(const string &name, string &value) 
+    {
+	if (conf == 0)
+	    return false;
+	return conf->get(name, value, keydir);
+    }
+    const string &getDefCharset() {
+	return defcharset;
+    }
+    const string &getDefLang() {
+	return deflang;
+    }
+    void setKeyDir(const string &dir) 
+    {
+	keydir = dir;
+	conf->get("defaultcharset", defcharset, keydir);
+	conf->get("defaultlanguage", deflang, keydir);
+    }
+};
+
+
+#endif /* _RCLCONFIG_H_INCLUDED_ */
diff --git a/src/common/textsplit.cpp b/src/common/textsplit.cpp
index e71f11d0..d6895857 100644
--- a/src/common/textsplit.cpp
+++ b/src/common/textsplit.cpp
@@ -1,14 +1,33 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.1 2004-12-13 15:42:16 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.2 2004-12-14 17:49:11 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
+#ifndef TEST_TEXTSPLIT
 
 #include <iostream>
 #include <string>
 
+#include "textsplit.h"
+
 using namespace std;
 
+/**
+ * Splitting a text into words. The code in this file will work with any 
+ * charset where the basic separators (.,- etc.) have their ascii values 
+ * (ok for UTF-8, ascii, iso8859* and quite a few others).
+ *
+ * We work in a way which would make it quite difficult to handle non-ascii
+ * separator chars (en-dash,etc.). We would then need to actually parse the 
+ * utf-8 stream, and use a different way to classify the characters (instead 
+ * of a 256 slot array).
+ *
+ * We are also not using capitalization information.
+ */
+
 // Character classes: we have three main groups, and then some chars
 // are their own class because they want special handling.
+// We have an array with 256 slots where we keep the character states. 
+// The array could be fully static, but we use a small function to fill it 
+// once.
 enum CharClass {LETTER=256, SPACE=257, DIGIT=258};
 static int charclasses[256];
 static void setcharclasses()
@@ -38,7 +57,7 @@ static void setcharclasses()
     init = 1;
 }
 
-static void emitterm(string &w, int *posp, bool doerase = true)
+void TextSplit::emitterm(string &w, int pos, bool doerase = true)
 {
     // Maybe trim end of word. These are chars that we would keep inside 
     // a word or span, but not at the end
@@ -55,22 +74,27 @@ static void emitterm(string &w, int *posp, bool doerase = true)
     }
  breakloop:
     if (w.length()) {
-	if (posp)
-	    *posp++;
-	cout << w << endl;
+	if (termsink)
+	    termsink(cdata, w, pos);
     }
     if (doerase)
 	w.erase();
 }
 
-void text_to_words(const string &in)
+/* 
+ * We basically emit a word every time we see a separator, but some chars are
+ * handled specially so that special cases, ie, c++ and dockes@okyz.com etc, 
+ * are handled properly,
+ */
+void TextSplit::text_to_words(const string &in)
 {
     setcharclasses();
     string span;
     string word;
     bool number = false;
-    int pos = 0;
+    int wordpos = 0;
     int spanpos = 0;
+
     for (int i = 0; i < in.length(); i++) {
 	int c = in[i];
 	int cc = charclasses[c]; 
@@ -78,11 +102,13 @@ void text_to_words(const string &in)
 	case SPACE:
 	SPACE:
 	    if (word.length()) {
-		if (span.length() != word.length())
-		    emitterm(span, &spanpos);
-		emitterm(word, &pos);
+		if (span.length() != word.length()) {
+		    emitterm(span, spanpos);
+		}
+		emitterm(word, wordpos++);
 		number = false;
 	    }
+	    spanpos = wordpos;
 	    span.erase();
 	    break;
 	case '-':
@@ -94,9 +120,10 @@ void text_to_words(const string &in)
 		    span += c;
 		}
 	    } else {
-		if (span.length() != word.length())
-		    emitterm(span, &spanpos, false);
-		emitterm(word, &pos);
+		if (span.length() != word.length()) {
+		    emitterm(span, spanpos, false);
+		}
+		emitterm(word, wordpos++);
 		number = false;
 		span += c;
 	    }
@@ -104,9 +131,10 @@ void text_to_words(const string &in)
 	case '\'':
 	case '@':
 	    if (word.length()) {
-		if (span.length() != word.length())
-		    emitterm(span, &spanpos, false);
-		emitterm(word, &pos);
+		if (span.length() != word.length()) {
+		    emitterm(span, spanpos, false);
+		}
+		emitterm(word, wordpos++);
 		number = false;
 	    } else
 		word += c;
@@ -117,7 +145,7 @@ void text_to_words(const string &in)
 		word += c;
 	    } else {
 		if (word.length()) {
-		    emitterm(word, &pos);
+		    emitterm(word, wordpos++);
 		    number = false;
 		} else 
 		    word += c;
@@ -139,8 +167,8 @@ void text_to_words(const string &in)
 		// if '-' is the last char before end of line, just
 		// ignore the line change. This is the right thing to
 		// do almost always. We'd then need a way to check if
-		// the - was added as part of the sleep or was really there, 
-		// but this would need a dictionary.
+		// the - was added as part of the word hyphenation, or was 
+		// there in the first place, but this would need a dictionary.
 	    } else {
 		// Handle like a normal separator
 		goto SPACE;
@@ -162,42 +190,35 @@ void text_to_words(const string &in)
     }
     if (word.length()) {
 	if (span.length() != word.length())
-	    emitterm(span, &spanpos);
-	emitterm(word, &pos);
+	    emitterm(span, spanpos);
+	emitterm(word, wordpos);
     }
 }
 
-#if 1 || TEST_TEXTSPLIT
+#else  // TEST driver ->
+
 #include <unistd.h>
 #include <errno.h>
 #include <fcntl.h>
-int
-file_to_string(const string &fn, string &data)
+
+#include <iostream>
+
+#include "textsplit.h"
+#include "readfile.h"
+
+using namespace std;
+
+int termsink(void *, const string &term, int pos)
 {
-    int fd = open(fn.c_str(), 0);
-    if (fd < 0) {
-	perror("open");
-	return -1;
-    }
-    char buf[4096];
-    for (;;) {
-	int n = read(fd, buf, 4096);
-	if (n < 0) {
-	    perror("read");
-	    close(fd);
-	    return -1;
-	}
-	if (n == 0)
-	    break;
-	data.append(buf, n);
-    }
-    close(fd);
+    cout << pos << " " << term << endl;
     return 0;
 }
 
+
 static string teststring = 
     "jfd@okyz.com "
     "Ceci. Est;Oui 1.24 n@d @net .net t@v@c c# c++ -10 o'brien l'ami "
+    "a 134 +134 -14 -1.5 +1.5 1.54e10 a"
     "@^#$(#$(*)"
     "one\n\rtwo\nthree-\nfour"
     "[olala][ululu]"
@@ -206,15 +227,16 @@ static string teststring =
 
 int main(int argc, char **argv)
 {
+    TextSplit splitter(termsink, 0);
     if (argc == 2) {
 	string data;
-	if (file_to_string(argv[1], data) < 0) 
+	if (!file_to_string(argv[1], data)) 
 	    exit(1);
-	text_to_words(data);
+	splitter.text_to_words(data);
     } else {
-	cout << teststring << endl;  text_to_words(teststring);
+	cout << teststring << endl;  
+	splitter.text_to_words(teststring);
     }
     
 }
 #endif // TEST
-
diff --git a/src/common/textsplit.h b/src/common/textsplit.h
new file mode 100644
index 00000000..edd9d79b
--- /dev/null
+++ b/src/common/textsplit.h
@@ -0,0 +1,31 @@
+#ifndef _TEXTSPLIT_H_INCLUDED_
+#define _TEXTSPLIT_H_INCLUDED_
+/* @(#$Id: textsplit.h,v 1.1 2004-12-14 17:49:11 dockes Exp $  (C) 2004 J.F.Dockes */
+
+#include <string>
+
+/** 
+ * Split text into words. 
+ * See comments at top of .cpp for more explanations.
+ * This used a callback function. It could be done with an iterator instead,
+ * but 'ts much simpler this way...
+ */
+class TextSplit {
+ public:
+    typedef int (*TermSink)(void *cdata, const std::string & term, int pos);
+ private:
+    TermSink termsink;
+    void *cdata;
+    void emitterm(std::string &term, int pos, bool doerase);
+ public:
+    /**
+     * Constructor: just store callback and client data
+     */
+    TextSplit(TermSink t, void *c) : termsink(t), cdata(c) {}
+    /**
+     * Split text, emit words and positions.
+     */
+    void text_to_words(const std::string &in);
+};
+
+#endif /* _TEXTSPLIT_H_INCLUDED_ */
diff --git a/src/index/indexer.h b/src/index/indexer.h
new file mode 100644
index 00000000..b38b624f
--- /dev/null
+++ b/src/index/indexer.h
@@ -0,0 +1,23 @@
+#ifndef _INDEXER_H_INCLUDED_
+#define _INDEXER_H_INCLUDED_
+/* @(#$Id: indexer.h,v 1.1 2004-12-14 17:53:51 dockes Exp $  (C) 2004 J.F.Dockes */
+
+#include "rclconfig.h"
+
+/* Definition for document interner functions */
+typedef Rcl::Doc* (*MimeHandlerFunc)(RclConfig *, const string &, 
+				    const string &);
+
+
+#if 0
+class FsIndexer {
+    const ConfTree &conf;
+ public:
+    enum runStatus {IndexerOk, IndexerError};
+    Indexer(const ConfTree &cnf): conf(cnf) {}
+    virtual ~Indexer() {}
+    runStatus run() = 0;
+};
+#endif
+
+#endif /* _INDEXER_H_INCLUDED_ */
diff --git a/src/index/mimetype.cpp b/src/index/mimetype.cpp
index 3eb1b9cc..92144760 100644
--- a/src/index/mimetype.cpp
+++ b/src/index/mimetype.cpp
@@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.1 2004-12-13 15:42:16 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 
 #include <ctype.h>
@@ -11,6 +11,9 @@ using std::string;
 
 string mimetype(const string &filename, ConfTree *mtypes)
 {
+    if (mtypes == 0)
+	return "";
+
     // If filename has a suffix and we find it in the map, we're done
     string::size_type dot = filename.find_last_of(".");
     if (dot != string::npos) {
@@ -26,6 +29,8 @@ string mimetype(const string &filename, ConfTree *mtypes)
     return "";
 }
 
+
+
 #ifdef _TEST_MIMETYPE_
 #include <iostream>
 const char *tvec[] = {
diff --git a/src/index/mimetype.h b/src/index/mimetype.h
index d3165aa6..da22e165 100644
--- a/src/index/mimetype.h
+++ b/src/index/mimetype.h
@@ -1,6 +1,6 @@
 #ifndef _MIMETYPE_H_INCLUDED_
 #define _MIMETYPE_H_INCLUDED_
-/* @(#$Id: mimetype.h,v 1.1 2004-12-13 15:42:16 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: mimetype.h,v 1.2 2004-12-14 17:54:16 dockes Exp $  (C) 2004 J.F.Dockes */
 
 #include <string>
 #include "conftree.h"
@@ -13,4 +13,5 @@
  */
 string mimetype(const std::string &filename, ConfTree *mtypes);
 
+
 #endif /* _MIMETYPE_H_INCLUDED_ */
diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp
index 0e96bd28..780709ee 100644
--- a/src/index/recollindex.cpp
+++ b/src/index/recollindex.cpp
@@ -1,7 +1,9 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.1 2004-12-13 15:42:16 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 
+#include <strings.h>
+
 #include <iostream>
 
 #include "pathut.h"
@@ -9,43 +11,156 @@ static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.1 2004-12-13 15:42:16 dockes
 #include "rclconfig.h"
 #include "fstreewalk.h"
 #include "mimetype.h"
+#include "rcldb.h"
+#include "readfile.h"
+#include "indexer.h"
 
 using namespace std;
 
+
+Rcl::Doc* textPlainToDoc(RclConfig *conf, const string &fn, 
+			 const string &mtype)
+{
+    return 0;
+}
+
+static map<string, MimeHandlerFunc> ihandlers;
+class IHandler_Init {
+ public:
+    IHandler_Init() {
+	ihandlers["text/plain"] = textPlainToDoc;
+    }
+};
+static IHandler_Init ihandleriniter;
+
+/**
+ * Return handler function for given mime type
+ */
+MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
+{
+    // Return handler definition for mime type
+    string hs;
+    if (!mhandlers->get(mtype, hs, "")) 
+	return 0;
+
+    // Break definition into type and name 
+    vector<string> toks;
+    ConfTree::stringToStrings(hs, toks);
+    if (toks.size() < 1) {
+	cerr << "Bad mimeconf line for " << mtype << endl;
+	return 0;
+    }
+
+    // Retrieve handler function according to type
+    if (!strcasecmp(toks[0].c_str(), "internal")) {
+	cerr << "Internal Handler" << endl;
+	map<string, MimeHandlerFunc>::const_iterator it = 
+	    ihandlers.find(mtype);
+	if (it == ihandlers.end()) {
+	    cerr << "Internal handler not found for " << mtype << endl;
+	    return 0;
+	}
+	cerr << "Got handler" << endl;
+	return it->second;
+    } else if (!strcasecmp(toks[0].c_str(), "dll")) {
+	if (toks.size() != 2)
+	    return 0;
+	return 0;
+    } else if (!strcasecmp(toks[0].c_str(), "exec")) {
+	if (toks.size() != 2)
+	    return 0;
+	return 0;
+    } else {
+	return 0;
+    }
+}
+
 class DirIndexer {
     FsTreeWalker walker;
     RclConfig *config;
     string topdir;
+    string dbdir;
+    Rcl::Db db;
  public:
-    DirIndexer(RclConfig *cnf, const string &top) 
-	: config(cnf), topdir(top)
-    {
-    }
+    DirIndexer(RclConfig *cnf, const string &dbd, const string &top) 
+	: config(cnf), topdir(top), dbdir(dbd)
+    { }
+
     friend FsTreeWalker::Status 
       indexfile(void *, const std::string &, const struct stat *, 
 		FsTreeWalker::CbFlag);
-    void index()
-    {
-	walker.walk(topdir, indexfile, this);
-    }
+
+    void index();
 };
 
+void DirIndexer::index()
+{
+#if 0
+    if (!db.open(dbdir, Rcl::Db::DbUpd)) {
+	cerr << "Error opening database in " << dbdir << " for " <<
+	    topdir << endl;
+	return;
+    }
+#endif
+    walker.walk(topdir, indexfile, this);
+#if 0
+    if (!db.close()) {
+	cerr << "Error closing database in " << dbdir << " for " <<
+	    topdir << endl;
+	return;
+    }
+#endif
+}
+
 FsTreeWalker::Status 
-indexfile(void *cdata, const std::string &fn, 
-	  const struct stat *stp, FsTreeWalker::CbFlag flg)
+indexfile(void *cdata, const std::string &fn, const struct stat *stp, 
+	  FsTreeWalker::CbFlag flg)
 {
     DirIndexer *me = (DirIndexer *)cdata;
-    if (flg == FsTreeWalker::FtwDirEnter || flg == FsTreeWalker::FtwDirReturn) {
-	// Possibly adjust defaults
+
+    if (flg == FsTreeWalker::FtwDirEnter || 
+	flg == FsTreeWalker::FtwDirReturn) {
+	me->config->setKeyDir(fn);
 	cout << "indexfile: [" << fn << "]" << endl;
+	cout << "   defcharset: " << me->config->getDefCharset()
+	     << " deflang: " << me->config->getDefLang() << endl;
+
 	return FsTreeWalker::FtwOk;
     }
-    string mtype = mimetype(fn, me->config->getMimeMap());
-    if (mtype.length() > 0) 
-	cout << "indexfile: " << mtype << " " << fn << endl;
-    else
-	cout << "indexfile: " << "(nomime)" << " " << fn << endl;
 
+    string mime = mimetype(fn, me->config->getMimeMap());
+    if (mime.length() == 0) {
+	cout << "indexfile: " << "(no mime)" << " " << fn << endl;
+	// No mime type ?? pass on.
+	return FsTreeWalker::FtwOk;
+    }
+
+    cout << "indexfile: " << mime << " " << fn << endl;
+
+    // Look for appropriate handler
+    MimeHandlerFunc fun = getMimeHandler(mime, me->config->getMimeConf());
+    if (!fun) {
+	// No handler for this type, for now :(
+	return FsTreeWalker::FtwOk;
+    }
+
+    // Check if file has already been indexed, and has changed since
+    // - Make path term, 
+    // - query db: postlist_begin->docid
+    // - fetch doc (get_document(docid)
+    // - check date field, maybe skip
+
+    // Turn file into a document. The document has fields for title, body 
+    // etc.,  all text converted to utf8
+    Rcl::Doc *doc = fun(me->config, fn,  mime);
+
+#if 0
+    // Set up xapian document, add postings and misc fields, 
+    // add to or update database.
+    dbadd(doc);
+#endif
+
+    return FsTreeWalker::FtwOk;
 }
 
 
@@ -63,11 +178,18 @@ int main(int argc, const char **argv)
 	cerr << "No top directories in configuration" << endl;
 	exit(1);
     }
-    list<string> tdl;
+    vector<string> tdl;
     if (ConfTree::stringToStrings(topdirs, tdl)) {
-	for (list<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
-	    cout << *it << endl;
-	    DirIndexer indexer(config, *it);
+	for (int i = 0; i < tdl.size(); i++) {
+	    string topdir = tdl[i];
+	    cout << topdir << endl;
+	    string dbdir;
+	    if (conf->get("dbdir", dbdir, topdir) == 0) {
+		cerr << "No database directory in configuration for " 
+		     << topdir << endl;
+		exit(1);
+	    }
+	    DirIndexer indexer(config, dbdir, topdir);
 	    indexer.index();
 	}
     }
diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp
new file mode 100644
index 00000000..3e1e134c
--- /dev/null
+++ b/src/rcldb/rcldb.cpp
@@ -0,0 +1,121 @@
+#ifndef lint
+static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes";
+#endif
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+using namespace std;
+
+#include "rcldb.h"
+
+#include "xapian.h"
+
+// Data for a xapian database
+class Native {
+ public:
+    bool isopen;
+    bool iswritable;
+    class Xapian::Database db;
+    class Xapian::WritableDatabase wdb;
+    vector<bool> updated;
+
+    Native() : isopen(false), iswritable(false) {}
+
+};
+
+Rcl::Db::Db() 
+{
+    pdata = new Native;
+}
+
+Rcl::Db::~Db()
+{
+    if (pdata == 0)
+	return;
+    Native *ndb = (Native *)pdata;
+    try {
+	// There is nothing to do for an ro db.
+	if (ndb->isopen == false || ndb->iswritable == false) {
+	    delete ndb;
+	    return;
+	}
+	ndb->wdb.flush();
+	delete ndb;
+    } catch (const Xapian::Error &e) {
+	cout << "Exception: " << e.get_msg() << endl;
+    } catch (const string &s) {
+	cout << "Exception: " << s << endl;
+    } catch (const char *s) {
+	cout << "Exception: " << s << endl;
+    } catch (...) {
+	cout << "Caught unknown exception" << endl;
+    }
+}
+
+bool Rcl::Db::open(const string& dir, OpenMode mode)
+{
+    if (pdata == 0)
+	return false;
+    Native *ndb = (Native *)pdata;
+    try {
+	switch (mode) {
+	case DbUpd:
+	    ndb->wdb = Xapian::Auto::open(dir, Xapian::DB_CREATE_OR_OPEN);
+	    ndb->updated.resize(ndb->wdb.get_lastdocid() + 1);
+	    ndb->iswritable = true;
+	    break;
+	case DbTrunc:
+	    ndb->wdb = Xapian::Auto::open(dir, Xapian::DB_CREATE_OR_OVERWRITE);
+	    ndb->iswritable = true;
+	    break;
+	case DbRO:
+	default:
+	    ndb->iswritable = false;
+	    cerr << "Not ready to open RO yet" << endl;
+	    exit(1);
+	}
+	ndb->isopen = true;
+	return true;
+    } catch (const Xapian::Error &e) {
+	cout << "Exception: " << e.get_msg() << endl;
+    } catch (const string &s) {
+	cout << "Exception: " << s << endl;
+    } catch (const char *s) {
+	cout << "Exception: " << s << endl;
+    } catch (...) {
+	cout << "Caught unknown exception" << endl;
+    }
+    return false;
+}
+bool Rcl::Db::close()
+{
+    if (pdata == 0)
+	return false;
+    Native *ndb = (Native *)pdata;
+    if (ndb->isopen == false)
+	return true;
+    try {
+	if (ndb->isopen == true && ndb->iswritable == true) {
+	    ndb->wdb.flush();
+	}
+	delete ndb;
+    } catch (const Xapian::Error &e) {
+	cout << "Exception: " << e.get_msg() << endl;
+	return false;
+    } catch (const string &s) {
+	cout << "Exception: " << s << endl;
+	return false;
+    } catch (const char *s) {
+	cout << "Exception: " << s << endl;
+	return false;
+    } catch (...) {
+	cout << "Caught unknown exception" << endl;
+	return false;
+    }
+    pdata = new Native;
+    if (pdata)
+	return true;
+    return false;
+}
diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h
new file mode 100644
index 00000000..fb586eed
--- /dev/null
+++ b/src/rcldb/rcldb.h
@@ -0,0 +1,32 @@
+#ifndef _DB_H_INCLUDED_
+#define _DB_H_INCLUDED_
+/* @(#$Id: rcldb.h,v 1.1 2004-12-14 17:50:28 dockes Exp $  (C) 2004 J.F.Dockes */
+
+#include <string>
+
+namespace Rcl {
+
+/**
+ * Wrapper class for the native database.
+ */
+class Db {
+    void *pdata;
+ public:
+    Db();
+    ~Db();
+    enum OpenMode {DbRO, DbUpd, DbTrunc};
+    bool open(const std::string &dbdir, OpenMode mode);
+    bool close();
+};
+
+class Doc {
+ public:
+    string title;
+    string abstract;
+    string keywords;
+    string text;
+};
+
+}
+
+#endif /* _DB_H_INCLUDED_ */
diff --git a/src/utils/execmd.cpp b/src/utils/execmd.cpp
index 1ef3d6ac..961749df 100644
--- a/src/utils/execmd.cpp
+++ b/src/utils/execmd.cpp
@@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: execmd.cpp,v 1.1 2004-12-12 08:58:12 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: execmd.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 
 #include <unistd.h>
@@ -14,6 +14,7 @@ static char rcsid[] = "@(#$Id: execmd.cpp,v 1.1 2004-12-12 08:58:12 dockes Exp $
 #include <iostream>
 
 #include "execmd.h"
+#include "pathut.h"
 
 using namespace std;
 #define MAX(A,B) (A>B?A:B)
@@ -152,7 +153,7 @@ ExecCmd::doexec(const string &cmd, const list<string> args,
 	}
 	
 	// Fill up argv
-	argv[0] = cmd.c_str();
+	argv[0] = path_getsimple(cmd).c_str();
 	i = 1;
 	for (it = args.begin(); it != args.end(); it++) {
 	    argv[i++] = it->c_str();
@@ -160,7 +161,7 @@ ExecCmd::doexec(const string &cmd, const list<string> args,
 	argv[i] = 0;
 
 #if 0
-	{int i = 0;cerr << "cmd: " << cmd << endl << "ARGS:" << endl; 
+	{int i = 0;cerr << "cmd: " << cmd << endl << "ARGS: " << endl; 
 	    while (argv[i]) cerr << argv[i++] << endl;}
 #endif
 
diff --git a/src/utils/pathut.cpp b/src/utils/pathut.cpp
index 4881c732..a8d90c2b 100644
--- a/src/utils/pathut.cpp
+++ b/src/utils/pathut.cpp
@@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: pathut.cpp,v 1.1 2004-12-10 18:13:14 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: pathut.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 
 #ifndef TEST_PATHUT
@@ -31,6 +31,20 @@ std::string path_getfather(const std::string &s) {
     return father;
 }
 
+std::string path_getsimple(const std::string &s) {
+    std::string simple = s;
+
+    if (simple.empty())
+	return simple;
+
+    std::string::size_type slp = simple.rfind('/');
+    if (slp == std::string::npos)
+	return simple;
+
+    simple.erase(0, slp+1);
+    return simple;
+}
+
 std::string path_home()
 {
     uid_t uid = getuid();
@@ -53,13 +67,18 @@ using namespace std;
 
 const char *tstvec[] = {"", "/", "/dir", "/dir/", "/dir1/dir2",
 			 "/dir1/dir2",
-			 "./dir", "./dir1/", "dir", "../dir"};
+			"./dir", "./dir1/", "dir", "../dir", "/dir/toto.c",
+			"/dir/.c",
+};
 
 int main(int argc, const char **argv)
 {
 
     for (int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) {
-	cout << tstvec[i] << " -> " << path_getfather(tstvec[i]) << endl;
+	cout << tstvec[i] << " FATHER " << path_getfather(tstvec[i]) << endl;
+    }
+    for (int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) {
+	cout << tstvec[i] << " SIMPLE " << path_getsimple(tstvec[i]) << endl;
     }
     return 0;
 }
diff --git a/src/utils/pathut.h b/src/utils/pathut.h
index a4e253b4..c191bb61 100644
--- a/src/utils/pathut.h
+++ b/src/utils/pathut.h
@@ -1,6 +1,6 @@
 #ifndef _PATHUT_H_INCLUDED_
 #define _PATHUT_H_INCLUDED_
-/* @(#$Id: pathut.h,v 1.1 2004-12-10 18:13:14 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: pathut.h,v 1.2 2004-12-14 17:54:16 dockes Exp $  (C) 2004 J.F.Dockes */
 
 #include <string>
 
@@ -13,6 +13,7 @@ inline void path_cat(std::string &s1, const std::string &s2) {
     s1 += s2;
 }
 		     
+extern std::string path_getsimple(const std::string &s);
 extern std::string path_getfather(const std::string &s);
 extern std::string path_home();
 
diff --git a/src/utils/readfile.cpp b/src/utils/readfile.cpp
new file mode 100644
index 00000000..83dbaa09
--- /dev/null
+++ b/src/utils/readfile.cpp
@@ -0,0 +1,49 @@
+#ifndef lint
+static char rcsid[] = "@(#$Id: readfile.cpp,v 1.1 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
+#endif
+
+#include <unistd.h>
+#include <fcntl.h>
+#ifndef O_STREAMING
+#define O_STREAMING 0
+#endif
+#include <errno.h>
+
+#include <string>
+using std::string;
+
+#include "readfile.h"
+
+bool file_to_string(const string &fn, string &data)
+{
+    bool ret = false;
+
+    int fd = open(fn.c_str(), O_RDONLY|O_STREAMING);
+    if (fd < 0) {
+	// perror("open");
+	return false;
+    }
+    char buf[4096];
+    for (;;) {
+	int n = read(fd, buf, 4096);
+	if (n < 0) {
+	    // perror("read");
+	    goto out;
+	}
+	if (n == 0)
+	    break;
+
+	try {
+	    data.append(buf, n);
+	} catch (...) {
+	    //	    fprintf(stderr, "file_to_string: out of memory\n");
+	    goto out;
+	}
+    }
+
+    ret = true;
+ out:
+    if (fd >= 0)
+	close(fd);
+    return ret;
+}
diff --git a/src/utils/readfile.h b/src/utils/readfile.h
new file mode 100644
index 00000000..bb668ff6
--- /dev/null
+++ b/src/utils/readfile.h
@@ -0,0 +1,13 @@
+#ifndef _READFILE_H_INCLUDED_
+#define _READFILE_H_INCLUDED_
+/* @(#$Id: readfile.h,v 1.1 2004-12-14 17:54:16 dockes Exp $  (C) 2004 J.F.Dockes */
+
+#include <string>
+
+/**
+ * Read whole file into string. 
+ * @return true for ok, false else
+ */
+bool file_to_string(const std::string &filename, std::string &data);
+
+#endif /* _READFILE_H_INCLUDED_ */