From 9b273d94e864e221e86389e9266c630e90af0e66 Mon Sep 17 00:00:00 2001
From: Jean-Francois Dockes <jfd@recoll.org>
Date: Sat, 15 Sep 2012 15:16:20 +0200
Subject: [PATCH] ensure that recoll configured with indexStripChars=1 runs as
 compiled with -DRCL_INDEX_STRIPCHARS

--HG--
branch : CASEDIACSENS
---
 src/aspell/rclaspell.cpp   |  59 +++++--
 src/aspell/rclaspell.h     |  21 +--
 src/common/rclconfig.cpp   |  23 +--
 src/common/rclconfig.h     |  10 +-
 src/qtgui/reslist.cpp      |   7 +-
 src/query/plaintorich.cpp  |  24 ++-
 src/query/reslistpager.cpp |  13 +-
 src/query/xadump.cpp       |  32 +++-
 src/rcldb/expansiondbs.cpp |  35 ++--
 src/rcldb/expansiondbs.h   |  11 +-
 src/rcldb/rcldb.cpp        |  58 +++++--
 src/rcldb/rcldb.h          |  31 ++--
 src/rcldb/searchdata.cpp   | 316 ++++++++++++++++++++-----------------
 src/rcldb/stemdb.cpp       |  29 ++--
 src/utils/smallut.cpp      |   7 +-
 src/utils/smallut.h        |   7 +
 tests/config/recoll.conf   |   2 +
 17 files changed, 425 insertions(+), 260 deletions(-)
diff --git a/src/aspell/rclaspell.cpp b/src/aspell/rclaspell.cpp
index 4381bf04..67029bdf 100644
--- a/src/aspell/rclaspell.cpp
+++ b/src/aspell/rclaspell.cpp
@@ -23,9 +23,9 @@
 
 #include <unistd.h>
 #include <dlfcn.h>
-#include <iostream>
 #include <stdlib.h>
-#include <vector>
+
+using namespace std;
 
 #include ASPELL_INCLUDE
 
@@ -33,7 +33,7 @@
 #include "execmd.h"
 #include "rclaspell.h"
 #include "debuglog.h"
-
+#include "unacpp.h"
 #include "ptmutex.h"
 
 // Just a place where we keep the Aspell library entry points together
@@ -260,6 +260,14 @@ public:
 	while (m_db.termWalkNext(m_tit, *m_input)) {
 	    if (!Rcl::Db::isSpellingCandidate(*m_input))
 		continue;
+#ifndef RCL_INDEX_STRIPCHARS
+	    if (!o_index_stripchars) {
+		string lower;
+		if (!unacmaybefold(*m_input, lower, "UTF-8", UNACOP_FOLD))
+		    continue;
+		m_input->swap(lower);
+	    }
+#endif
 	    // Got a non-empty sort-of appropriate term, let's send it to
 	    // aspell
 	    m_input->append("\n");
@@ -335,17 +343,29 @@ bool Aspell::make_speller(string& reason)
     return true;
 }
 
-bool Aspell::check(Rcl::Db &db, const string &term, string& reason)
+bool Aspell::check(const string &iterm, string& reason)
 {
-    LOGDEB2(("Aspell::check [%s]\n", term.c_str()));
+    LOGDEB2(("Aspell::check [%s]\n", iterm.c_str()));
+    string mterm(iterm);
 
     if (!ok() || !make_speller(reason))
 	return false;
-    if (term.empty())
+    if (iterm.empty())
         return true; //??
 
+#ifndef RCL_INDEX_STRIPCHARS
+    if (!o_index_stripchars) {
+	string lower;
+	if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) {
+	    LOGERR(("Aspell::check : cant lowercase input\n"));
+	    return false;
+	}
+	mterm.swap(lower);
+    }
+#endif
+
     int ret = aapi.aspell_speller_check(m_data->m_speller, 
-                                        term.c_str(), term.length());
+                                        mterm.c_str(), mterm.length());
     reason.clear();
     switch (ret) {
     case 0: return false;
@@ -358,19 +378,31 @@ bool Aspell::check(Rcl::Db &db, const string &term, string& reason)
     }
 }
 
-bool Aspell::suggest(Rcl::Db &db, const string &term, 
+bool Aspell::suggest(Rcl::Db &db, const string &_term, 
                      list<string>& suggestions, string& reason)
 {
     if (!ok() || !make_speller(reason))
 	return false;
-    if (term.empty())
+    string mterm(_term);
+    if (mterm.empty())
         return true; //??
 
+#ifndef RCL_INDEX_STRIPCHARS
+    if (!o_index_stripchars) {
+	string lower;
+	if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) {
+	    LOGERR(("Aspell::check : cant lowercase input\n"));
+	    return false;
+	}
+	mterm.swap(lower);
+    }
+#endif
+
     AspellCanHaveError *ret;
 
     const AspellWordList *wl = 
 	aapi.aspell_speller_suggest(m_data->m_speller, 
-                                    term.c_str(), term.length());
+                                    mterm.c_str(), mterm.length());
     if (wl == 0) {
 	reason = aapi.aspell_speller_error_message(m_data->m_speller);
 	return false;
@@ -385,7 +417,7 @@ bool Aspell::suggest(Rcl::Db &db, const string &term,
         // ******** This should depend if
 	// stemming is turned on or not for querying  *******
 	string sw(word);
-	if (db.termExists(sw) && db.stemDiffers("english", sw, term))
+	if (db.termExists(sw) && db.stemDiffers("english", sw, mterm))
 	    suggestions.push_back(word);
     }
     aapi.delete_aspell_string_enumeration(els);
@@ -418,7 +450,6 @@ using namespace std;
 
 static char *thisprog;
 RclConfig *rclconfig;
-Rcl::Db rcldb;
 
 static char usage [] =
 " -b : build dictionary\n"
@@ -477,7 +508,9 @@ int main(int argc, char **argv)
 	exit(1);
     }
 
-    if (!rcldb.open(dbdir, Rcl::Db::DbRO, 0)) {
+    Rcl::Db rcldb(rclconfig);
+
+    if (!rcldb.open(Rcl::Db::DbRO, 0)) {
 	fprintf(stderr, "Could not open database in %s\n", dbdir.c_str());
 	exit(1);
     }
diff --git a/src/aspell/rclaspell.h b/src/aspell/rclaspell.h
index 06032495..b969aa75 100644
--- a/src/aspell/rclaspell.h
+++ b/src/aspell/rclaspell.h
@@ -37,11 +37,6 @@
 #include "rclconfig.h"
 #include "rcldb.h"
 
-#ifndef NO_NAMESPACES
-using std::string;
-using std::list;
-#endif // NO_NAMESPACES
-
 class AspellData;
 
 class Aspell {
@@ -53,26 +48,26 @@ class Aspell {
     bool ok() const;
 
     /** Find the aspell command and shared library, init function pointers */
-    bool init(string &reason); 
+    bool init(std::string &reason); 
 
     /**  Build dictionary out of index term list. This is done at the end
      * of an indexing pass. */
-    bool buildDict(Rcl::Db &db, string &reason);
+    bool buildDict(Rcl::Db &db, std::string &reason);
 
     /** Check that word is in dictionary. ret==false && !reason.empty() => err*/
-    bool check(Rcl::Db &db, const string& term, string& reason);
+    bool check(const std::string& term, std::string& reason);
 
     /** Return a list of possible expansions for a given word */
-    bool suggest(Rcl::Db &db, const string& term, list<string> &suggestions, 
-		 string &reason);
+    bool suggest(Rcl::Db &db, const std::string& term, 
+		 std::list<std::string> &suggestions, std::string &reason);
 
  private:
-    string dicPath();
+    std::string dicPath();
     RclConfig  *m_config;
-    string      m_lang;
+    std::string      m_lang;
     AspellData *m_data;
 
-    bool make_speller(string& reason);
+    bool make_speller(std::string& reason);
 };
 
 #endif /* RCL_USE_ASPELL */
diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp
index b303fce9..a3c5245f 100644
--- a/src/common/rclconfig.cpp
+++ b/src/common/rclconfig.cpp
@@ -15,6 +15,8 @@
  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
 #ifndef TEST_RCLCONFIG
+#include "autoconfig.h"
+
 #include <unistd.h>
 #include <fcntl.h>
 #include <stdio.h>
@@ -34,6 +36,7 @@
 #include <iostream>
 #include <cstdlib>
 #include <cstring>
+using namespace std;
 
 #include "cstr.h"
 #include "pathut.h"
@@ -45,15 +48,8 @@
 #include "readfile.h"
 #include "fstreewalk.h"
 
-#ifndef NO_NAMESPACES
-using namespace std;
-#endif /* NO_NAMESPACES */
-
-#ifndef MIN
-#define MIN(A,B) (((A)<(B)) ? (A) : (B))
-#endif
-#ifndef MAX
-#define MAX(A,B) (((A)>(B)) ? (A) : (B))
+#ifndef RCL_INDEX_STRIPCHARS
+bool o_index_stripchars;
 #endif
 
 bool ParamStale::needrecompute()
@@ -77,6 +73,7 @@ bool ParamStale::needrecompute()
     }
     return false;
 }
+
 void ParamStale::init(RclConfig *rconf, ConfNull *cnf, const string& nm)
 {
     parent = rconf;
@@ -239,6 +236,14 @@ bool RclConfig::updateMainConfig()
 	FsTreeWalker::setNoFnmPathname();
     }
 
+#ifndef RCL_INDEX_STRIPCHARS
+    static int m_index_stripchars_init = 0;
+    if (!m_index_stripchars_init) {
+	getConfParam("indexStripChars", &o_index_stripchars);
+	m_index_stripchars_init = 1;
+    }
+#endif
+
     return true;
 }
 
diff --git a/src/common/rclconfig.h b/src/common/rclconfig.h
index 64ce44c6..3fd29a52 100644
--- a/src/common/rclconfig.h
+++ b/src/common/rclconfig.h
@@ -303,5 +303,13 @@ class RclConfig {
     bool readFieldsConfig(const string& errloc);
 };
 
-
+// This global variable defines if we are running with an index
+// stripped of accents and case or a raw one. Ideally, it should be
+// constant, but it needs to be initialized from the configuration, so
+// there is no way to do this. It never changes after initialization
+// of course. When set, it is supposed to get all of recoll to behave like if
+// if was compiled with RCL_INDEX_STRIPCHARS
+#ifndef  RCL_INDEX_STRIPCHARS
+extern bool o_index_stripchars;
+#endif
 #endif /* _RCLCONFIG_H_INCLUDED_ */
diff --git a/src/qtgui/reslist.cpp b/src/qtgui/reslist.cpp
index 491d93cf..9cbf037d 100644
--- a/src/qtgui/reslist.cpp
+++ b/src/qtgui/reslist.cpp
@@ -197,10 +197,14 @@ void QtGuiResListPager::suggest(const vector<string>uterms,
 	// If the term is in the index, we don't suggest alternatives. 
 	// Actually, we may want to check the frequencies and propose something
 	// anyway if a possible variation is much more common (as google does)
-        if (aspell->check(*rcldb, *uit, reason))
+#warning need to take case and diacs sensibility into account somehow	
+	// Maybe use the xapian index instead ? How to retrieve the sensitivity flags ?
+	if (0) {
+        if (aspell->check(*uit, reason))
             continue;
         else if (!reason.empty())
             return;
+	}
         if (!aspell->suggest(*rcldb, *uit, asuggs, reason)) {
             LOGERR(("QtGuiResListPager::suggest: aspell failed: %s\n", 
                     reason.c_str()));
@@ -336,6 +340,7 @@ ResList::~ResList()
 	QT_TR_NOOP("Open"),
 	QT_TR_NOOP("(show query)"),
         QT_TR_NOOP("<p><i>Alternate spellings (accents suppressed): </i>"),
+        QT_TR_NOOP("<p><i>Alternate spellings: </i>"),
     };
 }
 
diff --git a/src/query/plaintorich.cpp b/src/query/plaintorich.cpp
index b7c461d5..3dab6f8d 100644
--- a/src/query/plaintorich.cpp
+++ b/src/query/plaintorich.cpp
@@ -79,22 +79,30 @@ class TextSplitPTR : public TextSplit {
 	for (vector<vector<string> >::const_iterator vit = hdata.groups.begin();
 	     vit != hdata.groups.end(); vit++) {
 	    if (vit->size() == 1) {
-#ifdef RCL_INDEX_STRIPCHARS
-		m_terms[vit->front()] = vit - hdata.groups.begin();
-#else
-		string dumb = vit->front();
-		unacmaybefold(vit->front(), dumb, "UTF-8", UNACOP_UNACFOLD);
-		m_terms[dumb] = vit - hdata.groups.begin();
+#ifndef RCL_INDEX_STRIPCHARS
+		if (o_index_stripchars) {
+#endif
+		    m_terms[vit->front()] = vit - hdata.groups.begin();
+#ifndef RCL_INDEX_STRIPCHARS
+		} else {
+		    string dumb = vit->front();
+		    unacmaybefold(vit->front(), dumb, "UTF-8", UNACOP_UNACFOLD);
+		    m_terms[dumb] = vit - hdata.groups.begin();
+		}
 #endif
 	    } else if (vit->size() > 1) {
 		for (vector<string>::const_iterator it = vit->begin(); 
 		     it != vit->end(); it++) {
-#ifdef RCL_INDEX_STRIPCHARS
+#ifndef RCL_INDEX_STRIPCHARS
+		if (o_index_stripchars) {
+#endif
 		    m_gterms.insert(*it);
-#else
+#ifndef RCL_INDEX_STRIPCHARS
+		} else {
 		    string dumb = *it;
 		    unacmaybefold(*it, dumb, "UTF-8", UNACOP_UNACFOLD);
 		    m_gterms.insert(dumb);
+		}
 #endif
 		}
 	    }
diff --git a/src/query/reslistpager.cpp b/src/query/reslistpager.cpp
index 9718709f..d7c7b1d0 100644
--- a/src/query/reslistpager.cpp
+++ b/src/query/reslistpager.cpp
@@ -320,9 +320,16 @@ void ResListPager::displayPage(RclConfig *config)
             map<string, vector<string> > spellings;
             suggest(uterms, spellings);
             if (!spellings.empty()) {
-                chunk << 
-                 trans("<p><i>Alternate spellings (accents suppressed): </i>")
-		      << "<br /><blockquote>";
+		if (o_index_stripchars) {
+		    chunk << 
+		trans("<p><i>Alternate spellings (accents suppressed): </i>")
+			  << "<br /><blockquote>";
+		} else {
+		    chunk << 
+			trans("<p><i>Alternate spellings: </i>")
+			  << "<br /><blockquote>";
+		    
+		}
 
 		for (map<string, vector<string> >::const_iterator it0 =
 			 spellings.begin(); it0 != spellings.end(); it0++) {
diff --git a/src/query/xadump.cpp b/src/query/xadump.cpp
index fddc9853..dd64a9ef 100644
--- a/src/query/xadump.cpp
+++ b/src/query/xadump.cpp
@@ -116,12 +116,20 @@ static void sigcleanup(int sig)
     exit(1);
 }
 
+#ifndef RCL_INDEX_STRIPCHARS
+bool o_index_stripchars;
+#endif
+
 inline bool has_prefix(const string& trm)
 {
-#ifdef RCL_INDEX_STRIPCHARS
-    return trm.size() && 'A' <= trm[0] && trm[0] <= 'Z';
-#else
-    return trm.size() > 0 && trm[0] == ':';
+#ifndef RCL_INDEX_STRIPCHARS
+    if (o_index_stripchars) {
+#endif
+	return trm.size() && 'A' <= trm[0] && trm[0] <= 'Z';
+#ifndef RCL_INDEX_STRIPCHARS
+    } else {
+	return trm.size() > 0 && trm[0] == ':';
+    }
 #endif
 }
 
@@ -201,10 +209,22 @@ int main(int argc, char **argv)
 
     try {
 	db = new Xapian::Database(dbdir);
-
 	cout << "DB: ndocs " << db->get_doccount() << " lastdocid " <<
 	    db->get_lastdocid() << " avglength " << db->get_avlength() << endl;
-	    
+
+#ifndef RCL_INDEX_STRIPCHARS
+	// If we have terms with a leading ':' it's a new style,
+	// unstripped index
+	{
+	    Xapian::TermIterator term = db->allterms_begin(":");
+	    if (term == db->allterms_end())
+		o_index_stripchars = true;
+	    else
+		o_index_stripchars = false;
+	    cout<<"DB: terms are "<<(o_index_stripchars?"stripped":"raw")<<endl;
+	}
+#endif
+    
 	if (op_flags & OPT_T) {
 	    Xapian::TermIterator term;
 	    string printable;
diff --git a/src/rcldb/expansiondbs.cpp b/src/rcldb/expansiondbs.cpp
index da53f77e..b3ad3e88 100644
--- a/src/rcldb/expansiondbs.cpp
+++ b/src/rcldb/expansiondbs.cpp
@@ -63,17 +63,19 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
     // Unaccented stem dbs
     vector<XapWritableComputableSynFamMember> unacstemdbs;
     // We can reuse the same stemmer pointers, the objects are stateless.
-    for (unsigned int i = 0; i < langs.size(); i++) {
-	unacstemdbs.push_back(
-	    XapWritableComputableSynFamMember(wdb, synFamStemUnac, langs[i], 
-					      stemmers.back().getptr()));
-	unacstemdbs.back().recreate();
+    if (!o_index_stripchars) {
+	for (unsigned int i = 0; i < langs.size(); i++) {
+	    unacstemdbs.push_back(
+		XapWritableComputableSynFamMember(wdb, synFamStemUnac, langs[i], 
+						  stemmers.back().getptr()));
+	    unacstemdbs.back().recreate();
+	}
     }
-
     SynTermTransUnac transunac(UNACOP_UNACFOLD);
     XapWritableComputableSynFamMember 
 	diacasedb(wdb, synFamDiCa, "all", &transunac);
-    diacasedb.recreate();
+    if (!o_index_stripchars)
+	diacasedb.recreate();
 #endif
 
     // Walk the list of all terms, and stem/unac each.
@@ -109,8 +111,10 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
 	    // is the input to the stem db, and add a synonym from the
 	    // stripped term to the cased and accented one, for accent
 	    // and case expansion at query time
-	    unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD);
-	    diacasedb.addSynonym(*it);
+	    if (!o_index_stripchars) {
+		unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD);
+		diacasedb.addSynonym(*it);
+	    }
 #endif
 
 	    // Create stemming synonym for every language. The input is the 
@@ -124,12 +128,15 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
 	    // the unaccented term. While this may be incorrect, it is
 	    // also necessary for searching in a diacritic-unsensitive
 	    // way on a raw index
-	    string unac;
-	    unacmaybefold(lower, unac, "UTF-8", UNACOP_UNAC);
-	    if (unac != lower)
-		for (unsigned int i = 0; i < langs.size(); i++) {
-		    unacstemdbs[i].addSynonym(unac);
+	    if (!o_index_stripchars) {
+		string unac;
+		unacmaybefold(lower, unac, "UTF-8", UNACOP_UNAC);
+		if (unac != lower) {
+		    for (unsigned int i = 0; i < langs.size(); i++) {
+			unacstemdbs[i].addSynonym(unac);
+		    }
 		}
+	    }
 #endif
         }
     } XCATCHERROR(ermsg);
diff --git a/src/rcldb/expansiondbs.h b/src/rcldb/expansiondbs.h
index 97846870..dd819826 100644
--- a/src/rcldb/expansiondbs.h
+++ b/src/rcldb/expansiondbs.h
@@ -24,10 +24,13 @@
 
 namespace Rcl {
 
-/* A Capitals/Diacritics removal functor for using with
-   XapComputableSynFamMember */
+/** A Capitals/Diacritics removal functor for using with
+ *  XapComputableSynFamMember */
 class SynTermTransUnac : public SynTermTrans {
 public:
+    /** Constructor
+     * @param op defines if we remove diacritics, case or both 
+     */
     SynTermTransUnac(UnacOp op)
     : m_op(op)
     {
@@ -43,7 +46,9 @@ public:
     UnacOp m_op;
 };
 
-/** Walk the Xapian term list and create all the expansion dbs in one go */
+/** Walk the Xapian term list and create all the expansion dbs in one go.
+ * 
+ */
 extern bool createExpansionDbs(Xapian::WritableDatabase& wdb, 
 			       const std::vector<std::string>& langs);
 }
diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp
index 671e63c1..581436fe 100644
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@@ -92,10 +92,11 @@ const string start_of_field_term = "XXST";
 const string end_of_field_term = "XXND";
 static const string page_break_term = "XXPG";
 #else
-const string start_of_field_term = "XXST/";
-const string end_of_field_term = "XXND/";
-static const string page_break_term = "XXPG/";
+string start_of_field_term;
+string end_of_field_term;
+const string page_break_term = "XXPG/";
 #endif
+
 // Field name for the unsplit file name. Has to exist in the field file 
 // because of usage in termmatch()
 static const string unsplitFilenameFieldName = "rclUnsplitFN";
@@ -683,6 +684,18 @@ Db::Db(RclConfig *cfp)
       m_curtxtsz(0), m_flushtxtsz(0), m_occtxtsz(0), m_occFirstCheck(1),
       m_maxFsOccupPc(0), m_mode(Db::DbRO)
 {
+#ifndef RCL_INDEX_STRIPCHARS
+    if (start_of_field_term.empty()) {
+	if (o_index_stripchars) {
+	    start_of_field_term = "XXST";
+	    end_of_field_term = "XXND";
+	} else {
+	    start_of_field_term = "XXST/";
+	    end_of_field_term = "XXND/";
+	}
+    }
+#endif
+
     m_ndb = new Native(this);
     if (m_config) {
 	m_config->getConfParam("maxfsoccuppc", &m_maxFsOccupPc);
@@ -886,12 +899,13 @@ int Db::termDocCnt(const string& _term)
         return -1;
 
     string term = _term;
-#ifdef RCL_INDEX_STRIPCHARS
-    if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
-	LOGINFO(("Db::termDocCnt: unac failed for [%s]\n", _term.c_str()));
-	return 0;
-    }
+#ifndef RCL_INDEX_STRIPCHARS
+    if (o_index_stripchars)
 #endif
+	if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
+	    LOGINFO(("Db::termDocCnt: unac failed for [%s]\n", _term.c_str()));
+	    return 0;
+	}
 
     if (m_stops.isStop(term)) {
 	LOGDEB1(("Db::termDocCnt [%s] in stop list\n", term.c_str()));
@@ -1151,13 +1165,17 @@ string Db::getSpellingSuggestion(const string& word)
 {
     if (m_ndb == 0)
 	return string();
+
     string term = word;
-#ifdef RCL_INDEX_STRIPCHARS
+
+#ifndef RCL_INDEX_STRIPCHARS
+    if (o_index_stripchars)
+#endif
     if (!unacmaybefold(word, term, "UTF-8", UNACOP_UNACFOLD)) {
 	LOGINFO(("Db::getSpelling: unac failed for [%s]\n", word.c_str()));
 	return string();
     }
-#endif
+
     if (!isSpellingCandidate(term))
 	return string();
     return m_ndb->xrdb.get_spelling_suggestion(term);
@@ -1266,9 +1284,12 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
     TermProc *nxt = &tpidx;
     TermProcStop tpstop(nxt, m_stops);nxt = &tpstop;
     //TermProcCommongrams tpcommon(nxt, m_stops); nxt = &tpcommon;
-#ifdef RCL_INDEX_STRIPCHARS
-    TermProcPrep tpprep(nxt); nxt = &tpprep;
+
+    TermProcPrep tpprep(nxt);
+#ifndef RCL_INDEX_STRIPCHARS
+    if (o_index_stripchars)
 #endif
+	nxt = &tpprep;
 
     TextSplitDb splitter(newdocument, nxt);
     tpidx.setTSD(&splitter);
@@ -1951,12 +1972,15 @@ bool Db::termMatch(MatchType typ, const string &lang,
     // Get rid of capitals and accents
 
     string droot = root;
-#ifdef RCL_INDEX_STRIPCHARS
-    if (!unacmaybefold(root, droot, "UTF-8", UNACOP_UNACFOLD)) {
-	LOGERR(("Db::termMatch: unac failed for [%s]\n", root.c_str()));
-	return false;
-    }
+
+#ifndef RCL_INDEX_STRIPCHARS
+    if (o_index_stripchars)
 #endif
+	if (!unacmaybefold(root, droot, "UTF-8", UNACOP_UNACFOLD)) {
+	    LOGERR(("Db::termMatch: unac failed for [%s]\n", root.c_str()));
+	    return false;
+	}
+
     string nochars = typ == ET_WILD ? cstr_wildSpecChars : cstr_regSpecChars;
 
     string prefix;
diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h
index 445e63a2..2c81b354 100644
--- a/src/rcldb/rcldb.h
+++ b/src/rcldb/rcldb.h
@@ -129,18 +129,27 @@ extern  void *DbUpdWorker(void*);
 
 inline bool has_prefix(const string& trm)
 {
-#ifdef RCL_INDEX_STRIPCHARS
-    return !trm.empty() && 'A' <= trm[0] && trm[0] <= 'Z';
-#else
-    return !trm.empty() && trm[0] == ':';
+#ifndef RCL_INDEX_STRIPCHARS
+    if (o_index_stripchars) {
+#endif
+	return !trm.empty() && 'A' <= trm[0] && trm[0] <= 'Z';
+#ifndef RCL_INDEX_STRIPCHARS
+    } else {
+	return !trm.empty() && trm[0] == ':';
+    }
 #endif
 }
+
 inline string wrap_prefix(const string& pfx) 
 {
-#ifdef RCL_INDEX_STRIPCHARS
-    return pfx;
-#else
-    return cstr_colon + pfx + cstr_colon;
+#ifndef RCL_INDEX_STRIPCHARS
+    if (o_index_stripchars) {
+#endif
+	return pfx;
+#ifndef RCL_INDEX_STRIPCHARS
+    } else {
+	return cstr_colon + pfx + cstr_colon;
+    }
 #endif
 }
 
@@ -384,9 +393,13 @@ private:
 string version_string();
 
 extern const string pathelt_prefix;
+#ifdef RCL_INDEX_STRIPCHARS
 extern const string start_of_field_term;
 extern const string end_of_field_term;
-
+#else
+extern string start_of_field_term;
+extern string end_of_field_term;
+#endif
 }
 
 #endif /* _DB_H_INCLUDED_ */
diff --git a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp
index bddacf3f..c28ea8cc 100644
--- a/src/rcldb/searchdata.cpp
+++ b/src/rcldb/searchdata.cpp
@@ -79,10 +79,22 @@ static const int original_term_wqf_booster = 10;
 
 #ifdef RCL_INDEX_STRIPCHARS
 #define bufprefix(BUF, L) {(BUF)[0] = L;}
-#define bpoffs 1
+#define bpoffs() 1
 #else
-#define bufprefix(BUF, L) {(BUF)[0] = ':'; (BUF)[1] = L; (BUF)[2] = ':';}
-#define bpoffs 3
+static inline void bufprefix(char *buf, char c)
+{
+    if (o_index_stripchars) {
+	buf[0] = c;
+    } else {
+	buf[0] = ':'; 
+	buf[1] = c; 
+	buf[2] = ':';
+    }
+}
+static inline int bpoffs() 
+{
+    return o_index_stripchars ? 1 : 3;
+}
 #endif
 
 static Xapian::Query
@@ -92,7 +104,7 @@ date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
     // only doing %d's !
     char buf[200];
     bufprefix(buf, 'D');
-    sprintf(buf+bpoffs, "%04d%02d", y1, m1);
+    sprintf(buf+bpoffs(), "%04d%02d", y1, m1);
     vector<Xapian::Query> v;
 
     int d_last = monthdays(m1, y1);
@@ -103,7 +115,7 @@ date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
     // Deal with any initial partial month
     if (d1 > 1 || d_end < d_last) {
     	for ( ; d1 <= d_end ; d1++) {
-	    sprintf(buf + 6 + bpoffs, "%02d", d1);
+	    sprintf(buf + 6 + bpoffs(), "%02d", d1);
 	    v.push_back(Xapian::Query(buf));
 	}
     } else {
@@ -117,32 +129,32 @@ date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
 
     int m_last = (y1 < y2) ? 12 : m2 - 1;
     while (++m1 <= m_last) {
-	sprintf(buf + 4 + bpoffs, "%02d", m1);
+	sprintf(buf + 4 + bpoffs(), "%02d", m1);
 	bufprefix(buf, 'M');
 	v.push_back(Xapian::Query(buf));
     }
 	
     if (y1 < y2) {
 	while (++y1 < y2) {
-	    sprintf(buf + bpoffs, "%04d", y1);
+	    sprintf(buf + bpoffs(), "%04d", y1);
 	    bufprefix(buf, 'Y');
 	    v.push_back(Xapian::Query(buf));
 	}
-	sprintf(buf + bpoffs, "%04d", y2);
+	sprintf(buf + bpoffs(), "%04d", y2);
 	bufprefix(buf, 'M');
 	for (m1 = 1; m1 < m2; m1++) {
-	    sprintf(buf + 4 + bpoffs, "%02d", m1);
+	    sprintf(buf + 4 + bpoffs(), "%02d", m1);
 	    v.push_back(Xapian::Query(buf));
 	}
     }
 	
-    sprintf(buf + 2 + bpoffs, "%02d", m2);
+    sprintf(buf + 2 + bpoffs(), "%02d", m2);
 
     // Deal with any final partial month
     if (d2 < monthdays(m2, y2)) {
 	bufprefix(buf, 'D');
     	for (d1 = 1 ; d1 <= d2; d1++) {
-	    sprintf(buf + 6 + bpoffs, "%02d", d1);
+	    sprintf(buf + 6 + bpoffs(), "%02d", d1);
 	    v.push_back(Xapian::Query(buf));
 	}
     } else {
@@ -663,13 +675,13 @@ static void listVector(const string& what, const vector<string>&l)
  */
 void StringToXapianQ::expandTerm(int mods, 
 				 const string& term, 
-                                 vector<string>& exp, string &sterm,
+                                 vector<string>& oexp, string &sterm,
 				 const string& prefix)
 {
     LOGDEB0(("expandTerm: mods 0x%x fld [%s] trm [%s] lang [%s]\n",
 	     mods, m_field.c_str(), term.c_str(), m_stemlang.c_str()));
     sterm.clear();
-    exp.clear();
+    oexp.clear();
     if (term.empty())
 	return;
 
@@ -693,145 +705,161 @@ void StringToXapianQ::expandTerm(int mods,
     bool diac_sensitive = (mods & SearchDataClause::SDCM_DIACSENS) != 0;
     bool case_sensitive = (mods & SearchDataClause::SDCM_CASESENS) != 0;
 
-    // If we are working with a raw index, apply the rules for case and 
-    // diacritics sensitivity.
+    if (o_index_stripchars) {
+	diac_sensitive = case_sensitive = false;
+    } else {
+	// If we are working with a raw index, apply the rules for case and 
+	// diacritics sensitivity.
 
-    // If any character has a diacritic, we become
-    // diacritic-sensitive. Note that the way that the test is
-    // performed (conversion+comparison) will automatically ignore
-    // accented characters which are actually a separate letter
-    if (unachasaccents(term))
-	diac_sensitive = true;
+	// If any character has a diacritic, we become
+	// diacritic-sensitive. Note that the way that the test is
+	// performed (conversion+comparison) will automatically ignore
+	// accented characters which are actually a separate letter
+	if (unachasaccents(term))
+	    diac_sensitive = true;
 
-    // If any character apart the first is uppercase, we become case-sensitive. 
-    // The first character is reserved for turning off stemming. You need to
-    // use a query language modifier to search for Floor in a case-sensitive
-    // way.
-    Utf8Iter it(term);
-    it++;
-    if (unachasuppercase(term.substr(it.getBpos())))
-	case_sensitive = true;
+	// If any character apart the first is uppercase, we become
+	// case-sensitive.  The first character is reserved for
+	// turning off stemming. You need to use a query language
+	// modifier to search for Floor in a case-sensitive way.
+	Utf8Iter it(term);
+	it++;
+	if (unachasuppercase(term.substr(it.getBpos())))
+	    case_sensitive = true;
 
-    // If we are sensitive to case or diacritics turn stemming off
-    if (diac_sensitive || case_sensitive)
-	nostemexp = true;
+	// If we are sensitive to case or diacritics turn stemming off
+	if (diac_sensitive || case_sensitive)
+	    nostemexp = true;
 
-    if (!case_sensitive || !diac_sensitive)
-	noexpansion = false;
+	if (!case_sensitive || !diac_sensitive)
+	    noexpansion = false;
+    }
 #endif
 
     if (noexpansion) {
 	sterm = term;
-	exp.push_back(prefix + term);
-    } else {
-	TermMatchResult res;
-	if (haswild) {
-	    // Note that if there are wildcards, we do a direct from-index
-	    // expansion, which means that we are casediac-sensitive. There
-	    // would be nothing to prevent us to expand from the casediac
-	    // synonyms first. To be done later
-	    m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1, 
-                           m_field);
-	} else {
-	    sterm = term;
-#ifdef RCL_INDEX_STRIPCHARS
-	    m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, 
-                           m_field);
-#else
-	    // No stem expansion when diacritic or case sensitivity is
-	    // set, it makes no sense (it would mess with the
-	    // diacritics anyway if they are not in the stem part).
-	    // In these 3 cases, perform appropriate expansion from
-	    // the charstripping db, and do a bogus wildcard expansion
-	    // (there is no wild card) to generate the result:
-	    if (diac_sensitive && case_sensitive) {
-		// No expansion whatsoever
-		m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1, 
-			       m_field);
-	    } else {
-		// Access case and diacritics expansion:
-		vector<string> exp;
-		SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
-		XapComputableSynFamMember synac(m_db.m_ndb->xrdb, synFamDiCa,
-						"all", &unacfoldtrans);
+	oexp.push_back(prefix + term);
+	LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
+	return;
+    } 
 
-		if (diac_sensitive) {
-		    // Expand for accents and case, filtering for same accents,
-		    // then bogus wildcard expansion for generating result
-		    SynTermTransUnac foldtrans(UNACOP_FOLD);
-		    synac.synExpand(term, exp, &foldtrans);
-		    for (vector<string>::const_iterator it = exp.begin();
-			 it != exp.end(); it++) {
-			m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, *it, res, 
-				       -1, m_field);
-		    }
-		} else if (case_sensitive) {
-		    // Expand for accents and case, filtering for same case,
-		    // then bogus wildcard expansion for generating result
-		    SynTermTransUnac unactrans(UNACOP_UNAC);
-		    synac.synExpand(term, exp, &unactrans);
-		    for (vector<string>::const_iterator it = exp.begin();
-			 it != exp.end(); it++) {
-			m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, *it, res, 
-				       -1, m_field);
-		    }
-		} else {
-		    // Expand for accents and case, then lowercase
-		    // result for input to stemdb.
-		    synac.synExpand(term, exp);
-		    for (unsigned int i = 0; i < exp.size(); i++) {
-			string lower;
-			unacmaybefold(exp[i], lower, "UTF-8", UNACOP_FOLD);
-			exp[i] = lower;
-		    }
-		    sort(exp.begin(), exp.end());
-		    vector<string>::iterator uit = 
-			unique(exp.begin(), exp.end());
-		    exp.resize(uit - exp.begin());
-		    LOGDEB(("ExpandTerm: after casediac: %s\n", 
-			    stringsToString(exp).c_str()));
+    SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
+    XapComputableSynFamMember synac(m_db.m_ndb->xrdb, synFamDiCa, "all", 
+				    &unacfoldtrans);
+    vector<string> lexp;
 
-		    StemDb db(m_db.m_ndb->xrdb);
-		    vector<string> exp1;
-		    for (vector<string>::const_iterator it = exp.begin();
-			 it != exp.end(); it++) {
-			db.stemExpand(m_stemlang, *it, exp1);
-		    }
-		    LOGDEB(("ExpandTerm: after stem: %s\n", 
-			    stringsToString(exp1).c_str()));
-
-		    // Expand the resulting list for case (all stemdb content
-		    // is lowercase)
-		    exp.clear();
-		    for (vector<string>::const_iterator it = exp1.begin();
-			 it != exp1.end(); it++) {
-			synac.synExpand(*it, exp);
-		    }
-		    sort(exp.begin(), exp.end());
-		    uit = unique(exp.begin(), exp.end());
-		    exp.resize(uit - exp.begin());
-
-		    LOGDEB(("ExpandTerm: after case exp of stem: %s\n", 
-			    stringsToString(exp).c_str()));
-
-                    // Bogus wildcard expand to generate the result
-		    for (vector<string>::const_iterator it = exp.begin();
-			 it != exp.end(); it++) {
-			m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, *it, res, 
-				       -1, m_field);
-		    }
-
-		}
-	    }
-#endif
-	}
-
-	for (vector<TermMatchEntry>::const_iterator it = res.entries.begin(); 
-	     it != res.entries.end(); it++) {
-	    exp.push_back(it->term);
-	}
-	LOGDEB(("ExpandTerm: final: %s\n", stringsToString(exp).c_str()));
+    TermMatchResult res;
+    if (haswild) {
+	// Note that if there are wildcards, we do a direct from-index
+	// expansion, which means that we are casediac-sensitive. There
+	// would be nothing to prevent us to expand from the casediac
+	// synonyms first. To be done later
+	m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1, 
+		       m_field);
+	goto termmatchtoresult;
     }
+
+    sterm = term;
+
+#ifdef RCL_INDEX_STRIPCHARS
+
+    m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_field);
+
+#else
+
+    if (o_index_stripchars) {
+	// If the index is raw, we can only come here if nostemexp is unset
+	// and we just need stem expansion.
+	m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_field);
+	goto termmatchtoresult;
+    } 
+
+    // No stem expansion when diacritic or case sensitivity is set, it
+    // makes no sense (it would mess with the diacritics anyway if
+    // they are not in the stem part).  In these 3 cases, perform
+    // appropriate expansion from the charstripping db, and do a bogus
+    // wildcard expansion (there is no wild card) to generate the
+    // result:
+
+    if (diac_sensitive && case_sensitive) {
+	// No expansion whatsoever
+	m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1, m_field);
+	goto termmatchtoresult;
+    }
+
+    if (diac_sensitive) {
+	// Expand for accents and case, filtering for same accents,
+	// then bogus wildcard expansion for generating result
+	SynTermTransUnac foldtrans(UNACOP_FOLD);
+	synac.synExpand(term, lexp, &foldtrans);
+	goto exptotermatch;
+    } 
+
+    if (case_sensitive) {
+	// Expand for accents and case, filtering for same case, then
+	// bogus wildcard expansion for generating result
+	SynTermTransUnac unactrans(UNACOP_UNAC);
+	synac.synExpand(term, lexp, &unactrans);
+	goto exptotermatch;
+    }
+
+    // We are neither accent- nor case- sensitive and may need stem
+    // expansion or not.
+
+    // Expand for accents and case
+    synac.synExpand(term, lexp);
+    LOGDEB(("ExpTerm: casediac: %s\n", stringsToString(lexp).c_str()));
+    if (nostemexp)
+	goto exptotermatch;
+
+    // Need stem expansion. Lowercase the result of accent and case
+    // expansion for input to stemdb.
+    for (unsigned int i = 0; i < lexp.size(); i++) {
+	string lower;
+	unacmaybefold(lexp[i], lower, "UTF-8", UNACOP_FOLD);
+	lexp[i] = lower;
+    }
+    sort(lexp.begin(), lexp.end());
+    {
+	vector<string>::iterator uit = unique(lexp.begin(), lexp.end());
+	lexp.resize(uit - lexp.begin());
+	StemDb db(m_db.m_ndb->xrdb);
+	vector<string> exp1;
+	for (vector<string>::const_iterator it = lexp.begin(); 
+	     it != lexp.end(); it++) {
+	    db.stemExpand(m_stemlang, *it, exp1);
+	}
+	LOGDEB(("ExpTerm: stem: %s\n", stringsToString(exp1).c_str()));
+
+	// Expand the resulting list for case (all stemdb content
+	// is lowercase)
+	lexp.clear();
+	for (vector<string>::const_iterator it = exp1.begin(); 
+	     it != exp1.end(); it++) {
+	    synac.synExpand(*it, lexp);
+	}
+	sort(lexp.begin(), lexp.end());
+	uit = unique(lexp.begin(), lexp.end());
+	lexp.resize(uit - lexp.begin());
+    }
+    LOGDEB(("ExpTerm: case exp of stem: %s\n", stringsToString(lexp).c_str()));
+
+    // Bogus wildcard expand to generate the result
+exptotermatch:
+    for (vector<string>::const_iterator it = lexp.begin();
+	 it != lexp.end(); it++) {
+	m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, *it, 
+		       res, -1, m_field);
+    }
+#endif
+
+    // Term match entries to vector of terms
+termmatchtoresult:
+    for (vector<TermMatchEntry>::const_iterator it = res.entries.begin(); 
+	 it != res.entries.end(); it++) {
+	oexp.push_back(it->term);
+    }
+    LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
 }
 
 // Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d
@@ -1097,9 +1125,11 @@ bool StringToXapianQ::processUserString(const string &iq,
             TermProcStop tpstop(nxt, stops); nxt = &tpstop;
             //TermProcCommongrams tpcommon(nxt, stops); nxt = &tpcommon;
             //tpcommon.onlygrams(true);
-#ifdef RCL_INDEX_STRIPCHARS
-	    TermProcPrep tpprep(nxt); nxt = &tpprep;
+	    TermProcPrep tpprep(nxt);
+#ifndef RCL_INDEX_STRIPCHARS
+	    if (o_index_stripchars)
 #endif
+		nxt = &tpprep;
 
 	    TextSplitQ splitter(TextSplit::Flags(TextSplit::TXTS_ONLYSPANS | 
 						 TextSplit::TXTS_KEEPWILD), 
diff --git a/src/rcldb/stemdb.cpp b/src/rcldb/stemdb.cpp
index ad20553d..c3d435e8 100644
--- a/src/rcldb/stemdb.cpp
+++ b/src/rcldb/stemdb.cpp
@@ -26,6 +26,8 @@
 
 #include <algorithm>
 #include <map>
+#include <iostream>
+using namespace std;
 
 #include <xapian.h>
 
@@ -34,18 +36,14 @@
 #include "smallut.h"
 #include "synfamily.h"
 #include "unacpp.h"
-
-#include <iostream>
-
-using namespace std;
+#include "rclconfig.h"
 
 namespace Rcl {
 
 /**
  * Expand for one or several languages
  */
-bool StemDb::stemExpand(const std::string& langs,
-			const std::string& term,
+bool StemDb::stemExpand(const std::string& langs, const std::string& term,
 			vector<string>& result)
 {
     vector<string> llangs;
@@ -59,14 +57,17 @@ bool StemDb::stemExpand(const std::string& langs,
     }
 
 #ifndef RCL_INDEX_STRIPCHARS
-    for (vector<string>::const_iterator it = llangs.begin();
-	 it != llangs.end(); it++) {
-	SynTermTransStem stemmer(*it);
-	XapComputableSynFamMember expander(getdb(), synFamStemUnac, 
-					   *it, &stemmer);
-	string unac;
-	unacmaybefold(term, unac, "UTF-8", UNACOP_UNAC);
-	(void)expander.synExpand(unac, result);
+    // Expand the unaccented stem
+    if (!o_index_stripchars) {
+	for (vector<string>::const_iterator it = llangs.begin();
+	     it != llangs.end(); it++) {
+	    SynTermTransStem stemmer(*it);
+	    XapComputableSynFamMember expander(getdb(), synFamStemUnac, 
+					       *it, &stemmer);
+	    string unac;
+	    unacmaybefold(term, unac, "UTF-8", UNACOP_UNAC);
+	    (void)expander.synExpand(unac, result);
+	}
     }
 #endif 
 
diff --git a/src/utils/smallut.cpp b/src/utils/smallut.cpp
index 701d7b2d..a8a784c1 100644
--- a/src/utils/smallut.cpp
+++ b/src/utils/smallut.cpp
@@ -33,17 +33,12 @@
 #include <string>
 #include <iostream>
 #include <list>
+using namespace std;
 
 #include "smallut.h"
 #include "utf8iter.h"
 #include "hldata.h"
 
-#ifndef NO_NAMESPACES
-using namespace std;
-#endif /* NO_NAMESPACES */
-
-#define MIN(A,B) ((A)<(B)?(A):(B))
-
 int stringicmp(const string & s1, const string& s2) 
 {
     string::const_iterator it1 = s1.begin();
diff --git a/src/utils/smallut.h b/src/utils/smallut.h
index 424953ae..c0f698d3 100644
--- a/src/utils/smallut.h
+++ b/src/utils/smallut.h
@@ -224,4 +224,11 @@ public:
     }
 };
 
+#ifndef MIN
+#define MIN(A,B) (((A)<(B)) ? (A) : (B))
+#endif
+#ifndef MAX
+#define MAX(A,B) (((A)>(B)) ? (A) : (B))
+#endif
+
 #endif /* _SMALLUT_H_INCLUDED_ */
diff --git a/tests/config/recoll.conf b/tests/config/recoll.conf
index df13a81b..1408117d 100644
--- a/tests/config/recoll.conf
+++ b/tests/config/recoll.conf
@@ -4,6 +4,8 @@ logfilename = /tmp/logrcltst
 daemloglevel = 6
 daemlogfilename = /tmp/rclmontrace
 
+indexStripChars = 1
+
 topdirs = /home/dockes/projets/fulltext/testrecoll/
 
 skippedPaths = \