Converted query language parser from the old regexp jungle to bison. Allow using parentheses for clearer syntax.

2015-01-29 16:15:17 +01:00 · 2015-01-29 16:15:17 +01:00 · 3fb7183eae
commit 3fb7183eae
parent 88bccb47b3
18 changed files with 1765 additions and 1862 deletions
--- a/src/Makefile.in
+++ b/src/Makefile.in
@ -15,6 +15,7 @@ QTGUI = @QTGUI@
 RCLLIBVERSION=@RCLLIBVERSION@

 all: configure mk/sysconf 
+	${MAKE} -C query wasaparse.tab.cpp
 	(cd lib; sh mkMake)
 	${MAKE} -C lib
 	${MAKE} -C index depend recollindex
@ -59,6 +60,7 @@ clean:
 # Note: we don't remove the top Makefile, to keep the "clean" targets
 # available but a "Make" won't work without a configure anyway
 distclean: clean
+	${MAKE} -C query distclean
 	-${MAKE} -C desktop/unity-lens-recoll distclean
 	-${MAKE} -C python/recoll distclean
 	rm -f mk/sysconf mk/localdefs sampleconf/recoll.conf \
--- a/src/kde/kioslave/kio_recoll/htmlif.cpp
+++ b/src/kde/kioslave/kio_recoll/htmlif.cpp
@ -34,7 +34,6 @@ using namespace std;
 #include "pathut.h"
 #include "searchdata.h"
 #include "rclquery.h"
-#include "wasastringtoquery.h"
 #include "wasatorcl.h"
 #include "kio_recoll.h"
 #include "docseqdb.h"
--- a/src/kde/kioslave/kio_recoll/kio_recoll.cpp
+++ b/src/kde/kioslave/kio_recoll/kio_recoll.cpp
@ -38,7 +38,6 @@ using namespace std;
 #include "pathut.h"
 #include "searchdata.h"
 #include "rclquery.h"
-#include "wasastringtoquery.h"
 #include "wasatorcl.h"
 #include "kio_recoll.h"
 #include "docseqdb.h"
--- a/src/lib/mkMake.in
+++ b/src/lib/mkMake.in
@ -42,8 +42,8 @@ ${depth}/query/plaintorich.cpp \
 ${depth}/query/recollq.cpp \
 ${depth}/query/reslistpager.cpp \
 ${depth}/query/sortseq.cpp \
-${depth}/query/wasastringtoquery.cpp \
-${depth}/query/wasatorcl.cpp \
+${depth}/query/wasaparse.cpp \
+${depth}/query/wasaparse.tab.cpp \
 ${depth}/rcldb/daterange.cpp \
 ${depth}/rcldb/expansiondbs.cpp \
 ${depth}/rcldb/rclabstract.cpp \
@ -53,6 +53,7 @@ ${depth}/rcldb/rcldups.cpp \
 ${depth}/rcldb/rclquery.cpp \
 ${depth}/rcldb/rclterms.cpp \
 ${depth}/rcldb/searchdata.cpp \
+${depth}/rcldb/searchdatatox.cpp \
 ${depth}/rcldb/searchdataxml.cpp \
 ${depth}/rcldb/stemdb.cpp \
 ${depth}/rcldb/stoplist.cpp \
--- a/src/php/recoll/recoll.cpp
+++ b/src/php/recoll/recoll.cpp
@ -37,7 +37,6 @@
 #include "pathut.h"
 #include "rclinit.h"
 #include "debuglog.h"
-#include "wasastringtoquery.h"
 #include "wasatorcl.h"
 #include "internfile.h"
 #include "wipedir.h"
--- a/src/python/recoll/pyrecoll.cpp
+++ b/src/python/recoll/pyrecoll.cpp
@ -32,7 +32,6 @@ using namespace std;
 #include "searchdata.h"
 #include "rclquery.h"
 #include "pathut.h"
-#include "wasastringtoquery.h"
 #include "wasatorcl.h"
 #include "debuglog.h"
 #include "pathut.h"
--- a/src/query/Makefile
+++ b/src/query/Makefile
@ -4,8 +4,12 @@ include $(depth)/mk/sysconf
 PROGS = xadump recollq #trhist qtry qxtry 
 SRCS = xadump.cpp 

-all: depend librecoll $(PROGS)
+all: wasaparse.tab.cpp depend librecoll $(PROGS)

+wasaparse.tab.cpp : wasaparse.y
+	bison wasaparse.y
+	mv -f wasaparse.tab.c wasaparse.tab.cpp
+	
 XADUMP_OBJS= xadump.o 
 xadump : $(XADUMP_OBJS)
 	$(CXX) $(ALL_CXXFLAGS) -o xadump $(XADUMP_OBJS) \
@ -39,3 +43,7 @@ trwasastrtoq.o : wasastringtoquery.cpp wasastringtoquery.h
 include $(depth)/mk/commontargets

 include alldeps
+
+distclean::
+	-rm -f location.hh position.hh stack.hh \
+            wasaparse.tab.c wasaparse.tab.cpp wasaparse.tab.h
--- a/src/query/recollq.cpp
+++ b/src/query/recollq.cpp
@ -36,7 +36,6 @@ using namespace std;
 #include "pathut.h"
 #include "rclinit.h"
 #include "debuglog.h"
-#include "wasastringtoquery.h"
 #include "wasatorcl.h"
 #include "internfile.h"
 #include "wipedir.h"
--- a/src/query/wasaparse.cpp
+++ b/src/query/wasaparse.cpp
@ -0,0 +1,235 @@
+/* Copyright (C) 2006 J.F.Dockes
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the
+ *   Free Software Foundation, Inc.,
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+#include "autoconfig.h"
+
+#include <iostream>
+
+#include "wasatorcl.h"
+#include "wasaparserdriver.h"
+#include "searchdata.h"
+#include "debuglog.h"
+
+#define YYDEBUG 1
+
+#include "wasaparse.tab.h"
+
+using namespace std;
+using namespace Rcl;
+
+
+void
+yy::parser::error (const location_type& l, const std::string& m)
+{
+    d->setreason(m);
+}
+
+
+SearchData *wasaStringToRcl(const RclConfig *config,
+                                 const std::string& stemlang,
+                                 const std::string& query, string &reason,
+                                 const std::string& autosuffs)
+{
+    WasaParserDriver d(config, stemlang, autosuffs);
+    SearchData *sd = d.parse(query);
+    if (!sd) 
+        reason = d.getreason();
+    return sd;
+}
+
+SearchData *WasaParserDriver::parse(const std::string& in)
+{
+    m_input = in;
+    m_index = 0;
+    delete m_result;
+    m_result = 0;
+    m_returns = stack<int>();
+
+    yy::parser parser(this);
+    parser.set_debug_level(0);
+
+    if (parser.parse() != 0) {
+        delete m_result;
+        m_result = 0;
+    }
+
+    return m_result;
+}
+
+int WasaParserDriver::GETCHAR()
+{
+    if (!m_returns.empty()) {
+        int c = m_returns.top();
+        m_returns.pop();
+        return c;
+    }
+    if (m_index < m_input.size())
+        return m_input[m_index++];
+    return 0;
+}
+void WasaParserDriver::UNGETCHAR(int c)
+{
+    m_returns.push(c);
+}
+
+// Add clause to query, handling special pseudo-clauses for size/date
+// etc. (mostly determined on field name).
+bool WasaParserDriver::addClause(SearchData *sd, 
+                                 SearchDataClauseSimple* cl)
+{
+    if (cl->getfield().empty()) {
+        // Simple clause with empty field spec.
+        // Possibly change terms found in the "autosuffs" list into "ext"
+        // field queries
+        if (!m_autosuffs.empty()) {
+            vector<string> asfv;
+            if (stringToStrings(m_autosuffs, asfv)) {
+                if (find_if(asfv.begin(), asfv.end(), 
+                            StringIcmpPred(cl->gettext())) != asfv.end()) {
+                    cl->setfield("ext");
+                    cl->addModifier(SearchDataClause::SDCM_NOSTEMMING);
+                }
+            }
+        }
+        return sd->addClause(cl);
+    }
+
+
+    const string& fld = cl->getfield();
+
+    // MIME types and categories
+    if (!stringicmp("mime", fld) ||!stringicmp("format", fld)) {
+        if (cl->getexclude()) {
+            sd->remFiletype(cl->gettext());
+        } else {
+            sd->addFiletype(cl->gettext());
+        }
+        delete cl;
+        return true;
+    } 
+
+    if (!stringicmp("rclcat", fld) || !stringicmp("type", fld)) {
+        vector<string> mtypes;
+        if (m_config && m_config->getMimeCatTypes(cl->gettext(), mtypes)) {
+            for (vector<string>::iterator mit = mtypes.begin();
+                 mit != mtypes.end(); mit++) {
+                if (cl->getexclude()) {
+                    sd->remFiletype(*mit);
+                } else {
+                    sd->addFiletype(*mit);
+                }
+            }
+        }
+        delete cl;
+        return true;
+    }
+
+    // Handle "date" spec
+    if (!stringicmp("date", fld)) {
+        DateInterval di;
+        if (!parsedateinterval(cl->gettext(), &di)) {
+            LOGERR(("Bad date interval format: %s\n",
+                    cl->gettext().c_str()));
+            m_reason = "Bad date interval format";
+            delete cl;
+            return false;
+        }
+        LOGDEB(("addClause:: date span:  %d-%d-%d/%d-%d-%d\n",
+                di.y1,di.m1,di.d1, di.y2,di.m2,di.d2));
+        sd->setDateSpan(&di);
+        delete cl;
+        return true;
+    } 
+
+    // Handle "size" spec
+    if (!stringicmp("size", fld)) {
+        char *cp;
+        size_t size = strtoll(cl->gettext().c_str(), &cp, 10);
+        if (*cp != 0) {
+            switch (*cp) {
+            case 'k': case 'K': size *= 1E3;break;
+            case 'm': case 'M': size *= 1E6;break;
+            case 'g': case 'G': size *= 1E9;break;
+            case 't': case 'T': size *= 1E12;break;
+            default: 
+                m_reason = string("Bad multiplier suffix: ") + *cp;
+                delete cl;
+                return false;
+            }
+        }
+
+        SearchDataClause::Relation rel = cl->getrel();
+
+        delete cl;
+
+        switch (rel) {
+        case SearchDataClause::REL_EQUALS:
+            sd->setMaxSize(size);
+            sd->setMinSize(size);
+            break;
+        case SearchDataClause::REL_LT:
+        case SearchDataClause::REL_LTE:
+            sd->setMaxSize(size);
+            break;
+        case SearchDataClause::REL_GT: 
+        case SearchDataClause::REL_GTE:
+            sd->setMinSize(size);
+            break;
+        default:
+            m_reason = "Bad relation operator with size query. Use > < or =";
+            return false;
+        }
+        return true;
+    }
+
+    if (!stringicmp("dir", fld)) {
+        // dir filtering special case
+        SearchDataClausePath *nclause = 
+            new SearchDataClausePath(cl->gettext(), cl->getexclude());
+        delete cl;
+        sd->addClause(nclause);
+    }
+
+    if (cl->getTp() == SCLT_OR || cl->getTp() == SCLT_AND) {
+        // If this is a normal clause and the term has commas or
+        // slashes inside, take it as a list, turn the slashes/commas
+        // to spaces, leave unquoted. Otherwise, this would end up as
+        // a phrase query. This is a handy way to enter multiple terms
+        // to be searched inside a field. We interpret ',' as AND, and
+        // '/' as OR. No mixes allowed and ',' wins.
+        SClType tp = SCLT_FILENAME;// impossible value
+        string ns = neutchars(cl->gettext(), ",");
+        if (ns.compare(cl->gettext())) {
+            // had ','
+            tp = SCLT_AND;
+        } else {
+            ns = neutchars(cl->gettext(), "/");
+            if (ns.compare(cl->gettext())) {
+                // had not ',' but has '/'
+                tp = SCLT_OR;
+            }
+        }
+
+        if (tp != SCLT_FILENAME) {
+            SearchDataClauseSimple *ncl = 
+                new SearchDataClauseSimple(tp, ns, fld);
+            delete cl;
+            return sd->addClause(ncl);
+        }
+    }
+    return sd->addClause(cl);
+}
+
--- a/src/query/wasaparse.y
+++ b/src/query/wasaparse.y
@ -0,0 +1,415 @@
+%{
+#define YYDEBUG 1
+
+#include <stdio.h>
+
+#include <iostream>
+#include <string>
+
+#include "searchdata.h"
+#include "wasaparserdriver.h"
+#include "wasaparse.tab.h"
+
+using namespace std;
+
+int yylex(yy::parser::semantic_type *, WasaParserDriver *);
+void yyerror(char const *);
+static void qualify(Rcl::SearchDataClauseDist *, const string &);
+
+static void addSubQuery(WasaParserDriver *d,
+                        Rcl::SearchData *sd, Rcl::SearchData *sq)
+{
+    sd->addClause(new Rcl::SearchDataClauseSub(RefCntr<Rcl::SearchData>(sq)));
+}
+
+%}
+
+%skeleton "lalr1.cc"
+%defines
+%error-verbose
+
+%parse-param {WasaParserDriver* d}
+%lex-param {WasaParserDriver* d}
+
+%union {
+    std::string *str;
+    Rcl::SearchDataClauseSimple *cl;
+    Rcl::SearchData *sd;
+}
+%destructor {delete $$;} <str>
+
+%type <cl> qualquote
+%type <cl> fieldexpr
+%type <cl> term
+%type <sd> query
+%type <str> complexfieldname
+
+ /* Non operator tokens need precedence because of the possibility of
+    concatenation which needs to have lower prec than OR */
+%left <str> WORD
+%left <str> QUOTED
+%left <str> QUALIFIERS
+%left AND UCONCAT
+%left OR
+
+%token EQUALS CONTAINS SMALLEREQ SMALLER GREATEREQ GREATER
+
+%%
+
+topquery: query
+{
+    d->m_result = $1;
+}
+
+query: 
+query query %prec UCONCAT
+{
+    //cerr << "q: query query" << endl;
+    Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
+    addSubQuery(d, sd, $1);
+    addSubQuery(d, sd, $2);
+    $$ = sd;
+}
+| query AND query
+{
+    //cerr << "q: query AND query" << endl;
+    Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
+    addSubQuery(d, sd, $1);
+    addSubQuery(d, sd, $3);
+    $$ = sd;
+}
+| query OR query
+{
+    //cerr << "q: query OR query" << endl;
+    Rcl::SearchData *top = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
+    Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_OR, d->m_stemlang);
+    addSubQuery(d, sd, $1);
+    addSubQuery(d, sd, $3);
+    addSubQuery(d, top, sd);
+    $$ = top;
+}
+| '(' query ')' 
+{
+    //cerr << "q: ( query )" << endl;
+    $$ = $2;
+}
+|
+fieldexpr %prec UCONCAT
+{
+    //cerr << "q: fieldexpr" << endl;
+    Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
+    d->addClause(sd, $1);
+    $$ = sd;
+}
+;
+
+fieldexpr: term 
+{
+    // cerr << "fe: simple fieldexpr: " << $1->gettext() << endl;
+    $$ = $1;
+}
+| complexfieldname EQUALS term 
+{
+    // cerr << "fe: " << *$1 << " = " << $3->gettext() << endl;
+    $3->setfield(*$1);
+    $3->setrel(Rcl::SearchDataClause::REL_EQUALS);
+    $$ = $3;
+    delete $1;
+}
+| complexfieldname CONTAINS term 
+{
+    // cerr << "fe: " << *$1 << " : " << $3->gettext() << endl;
+    $3->setfield(*$1);
+    $3->setrel(Rcl::SearchDataClause::REL_CONTAINS);
+    $$ = $3;
+    delete $1;
+}
+| complexfieldname SMALLER term 
+{
+    // cerr << "fe: " << *$1 << " < " << $3->gettext() << endl;
+    $3->setfield(*$1);
+    $3->setrel(Rcl::SearchDataClause::REL_LT);
+    $$ = $3;
+    delete $1;
+}
+| complexfieldname SMALLEREQ term 
+{
+    // cerr << "fe: " << *$1 << " <= " << $3->gettext() << endl;
+    $3->setfield(*$1);
+    $3->setrel(Rcl::SearchDataClause::REL_LTE);
+    $$ = $3;
+    delete $1;
+}
+| complexfieldname GREATER term 
+{
+    // cerr << "fe: "  << *$1 << " > " << $3->gettext() << endl;
+    $3->setfield(*$1);
+    $3->setrel(Rcl::SearchDataClause::REL_GT);
+    $$ = $3;
+    delete $1;
+}
+| complexfieldname GREATEREQ term 
+{
+    // cerr << "fe: " << *$1 << " >= " << $3->gettext() << endl;
+    $3->setfield(*$1);
+    $3->setrel(Rcl::SearchDataClause::REL_GTE);
+    $$ = $3;
+    delete $1;
+}
+| '-' fieldexpr 
+{
+    // cerr << "fe: - fieldexpr[" << $2->gettext() << "]" << endl;
+    $2->setexclude(true);
+    $$ = $2;
+}
+;
+
+/* Deal with field names like dc:title */
+complexfieldname: 
+WORD
+{
+    // cerr << "cfn: WORD" << endl;
+    $$ = $1;
+}
+|
+complexfieldname CONTAINS WORD
+{
+    // cerr << "cfn: complexfieldname ':' WORD" << endl;
+    $$ = new string(*$1 + string(":") + *$3);
+    delete $1;
+    delete $3;
+}
+
+term: 
+WORD
+{
+    //cerr << "term[" << *$1 << "]" << endl;
+    $$ = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, *$1);
+    delete $1;
+}
+| qualquote 
+{
+    $$ = $1;
+}
+
+qualquote: 
+QUOTED
+{
+    // cerr << "QUOTED[" << *$1 << "]" << endl;
+    $$ = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, *$1, 0);
+    delete $1;
+}
+| QUOTED QUALIFIERS 
+{
+    // cerr << "QUOTED[" << *$1 << "] QUALIFIERS[" << *$2 << "]" << endl;
+    Rcl::SearchDataClauseDist *cl = 
+        new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, *$1, 0);
+    qualify(cl, *$2);
+    $$ = cl;
+    delete $1;
+    delete $2;
+}
+
+
+%%
+
+#include <ctype.h>
+
+// Look for int at index, skip and return new index found? value.
+static unsigned int qualGetInt(const string& q, unsigned int cur, int *pval)
+{
+    unsigned int ncur = cur;
+    if (cur < q.size() - 1) {
+        char *endptr;
+        int val = strtol(&q[cur + 1], &endptr, 10);
+        if (endptr != &q[cur + 1]) {
+            ncur += endptr - &q[cur + 1];
+            *pval = val;
+        }
+    }
+    return ncur;
+}
+
+static void qualify(Rcl::SearchDataClauseDist *cl, const string& quals)
+{
+    // cerr << "qualify(" << cl << ", " << quals << ")" << endl;
+    for (unsigned int i = 0; i < quals.length(); i++) {
+        //fprintf(stderr, "qual char %c\n", quals[i]);
+        switch (quals[i]) {
+        case 'b': 
+            cl->setWeight(10.0);
+            break;
+        case 'c': break;
+        case 'C': 
+            cl->addModifier(Rcl::SearchDataClause::SDCM_CASESENS);
+            break;
+        case 'd': break;
+        case 'D':  
+            cl->addModifier(Rcl::SearchDataClause::SDCM_DIACSENS);
+            break;
+        case 'e': 
+            cl->addModifier(Rcl::SearchDataClause::SDCM_CASESENS);
+            cl->addModifier(Rcl::SearchDataClause::SDCM_DIACSENS);
+            cl->addModifier(Rcl::SearchDataClause::SDCM_NOSTEMMING);
+            break;
+        case 'l': 
+            cl->addModifier(Rcl::SearchDataClause::SDCM_NOSTEMMING);
+            break;
+        case 'L': break;
+        case 'o':  
+        {
+            int slack = 10;
+            i = qualGetInt(quals, i, &slack);
+            cl->setslack(slack);
+            //cerr << "set slack " << cl->getslack() << " done" << endl;
+        }
+        break;
+        case 'p': 
+            cl->setTp(Rcl::SCLT_NEAR);
+            if (cl->getslack() == 0) {
+                cl->setslack(10);
+                //cerr << "set slack " << cl->getslack() << " done" << endl;
+            }
+            break;
+        case '.':case '0':case '1':case '2':case '3':case '4':
+        case '5':case '6':case '7':case '8':case '9':
+        {
+            int n = 0;
+            float factor = 1.0;
+            if (sscanf(&(quals[i]), "%f %n", &factor, &n)) {
+                if (factor != 1.0) {
+                    cl->setWeight(factor);
+                }
+            }
+            if (n > 0)
+                i += n - 1;
+        }
+        default:
+            break;
+        }
+    }
+}
+
+
+// specialstartchars are special only at the beginning of a token
+// (e.g. doctor-who is a term, not 2 terms separated by '-')
+static const string specialstartchars("-");
+// specialinchars are special everywhere except inside a quoted string
+static const string specialinchars(":=<>()");
+
+// Called with the first dquote already read
+static int parseString(WasaParserDriver *d, yy::parser::semantic_type *yylval)
+{
+    string* value = new string();
+    d->qualifiers().clear();
+    int c;
+    while ((c = d->GETCHAR())) {
+        switch (c) {
+        case '\\':
+            /* Escape: get next char */
+            c = d->GETCHAR();
+            if (c == 0) {
+                value->push_back(c);
+                goto out;
+            }
+            value->push_back(c);
+            break;
+        case '"':
+            /* End of string. Look for qualifiers */
+            while ((c = d->GETCHAR()) && !isspace(c))
+                d->qualifiers().push_back(c);
+            goto out;
+        default:
+            value->push_back(c);
+        }
+    }
+out:
+    //cerr << "GOT QUOTED ["<<value<<"] quals [" << d->qualifiers() << "]" << endl;
+    yylval->str = value;
+    return yy::parser::token::QUOTED;
+}
+
+
+int yylex(yy::parser::semantic_type *yylval, WasaParserDriver *d)
+{
+    if (!d->qualifiers().empty()) {
+        yylval->str = new string();
+        yylval->str->swap(d->qualifiers());
+        return yy::parser::token::QUALIFIERS;
+    }
+
+    int c;
+
+    /* Skip white space.  */
+    while ((c = d->GETCHAR()) && isspace(c))
+        continue;
+
+    if (c == 0)
+        return 0;
+
+    if (specialstartchars.find_first_of(c) != string::npos) {
+        //cerr << "yylex: return " << c << endl;
+        return c;
+    }
+
+    // field-term relations
+    switch (c) {
+    case '=': return yy::parser::token::EQUALS;
+    case ':': return yy::parser::token::CONTAINS;
+    case '<': {
+        int c1 = d->GETCHAR();
+        if (c1 == '=') {
+            return yy::parser::token::SMALLEREQ;
+        } else {
+            d->UNGETCHAR(c1);
+            return yy::parser::token::SMALLER;
+        }
+    }
+    case '>': {
+        int c1 = d->GETCHAR();
+        if (c1 == '=') {
+            return yy::parser::token::GREATEREQ;
+        } else {
+            d->UNGETCHAR(c1);
+            return yy::parser::token::GREATER;
+        }
+    }
+    case '(': case ')':
+        return c;
+    }
+        
+    if (c == '"')
+        return parseString(d, yylval);
+
+    d->UNGETCHAR(c);
+
+    // Other chars start a term or field name or reserved word
+    string* word = new string();
+    while ((c = d->GETCHAR())) {
+        if (isspace(c)) {
+            //cerr << "Word broken by whitespace" << endl;
+            break;
+        } else if (specialinchars.find_first_of(c) != string::npos) {
+            //cerr << "Word broken by special char" << endl;
+            d->UNGETCHAR(c);
+            break;
+        } else if (c == 0) {
+            //cerr << "Word broken by EOF" << endl;
+            break;
+        } else {
+            word->push_back(c);
+        }
+    }
+    
+    if (!word->compare("AND") || !word->compare("&&")) {
+        delete word;
+        return yy::parser::token::AND;
+    } else if (!word->compare("OR") || !word->compare("||")) {
+        delete word;
+        return yy::parser::token::OR;
+    }
+
+//    cerr << "Got word [" << word << "]" << endl;
+    yylval->str = word;
+    return yy::parser::token::WORD;
+}
--- a/src/query/wasaparserdriver.h
+++ b/src/query/wasaparserdriver.h
@ -0,0 +1,81 @@
+/* Copyright (C) 2006 J.F.Dockes
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the
+ *   Free Software Foundation, Inc.,
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+#ifndef _WASAPARSERDRIVER_H_INCLUDED_
+#define _WASAPARSERDRIVER_H_INCLUDED_
+
+#include <string>
+#include <stack>
+
+class WasaParserDriver;
+namespace Rcl {
+    class SearchData;
+    class SearchDataClauseSimple;
+}
+namespace yy {
+    class parser;
+}
+
+class RclConfig;
+
+class WasaParserDriver {
+public:
+    
+    WasaParserDriver(const RclConfig *c, const std::string sl, 
+                     const std::string& as)
+        : m_stemlang(sl), m_autosuffs(as), m_config(c),
+          m_index(0), m_result(0) {}
+
+    Rcl::SearchData *parse(const std::string&);
+    bool addClause(Rcl::SearchData *sd, Rcl::SearchDataClauseSimple* cl);
+
+    int GETCHAR();
+    void UNGETCHAR(int c);
+
+    std::string& qualifiers() {
+        return m_qualifiers;
+    }
+    void setreason(const std::string& reason) {
+        m_reason = reason;
+    }
+    const std::string& getreason() const {
+        return m_reason;
+    }
+    
+private:
+    friend class yy::parser;
+
+    std::string m_stemlang;
+    std::string m_autosuffs;
+    const RclConfig  *m_config;
+
+    std::string m_input;
+    unsigned int m_index;
+    std::stack<int> m_returns;
+    Rcl::SearchData *m_result;
+
+    std::string m_reason;
+
+    // Let the quoted string reader store qualifiers in there, simpler
+    // than handling this in the parser, because their nature is
+    // determined by the absence of white space after the closing
+    // dquote. e.g "some term"abc. We could avoid this by making white
+    // space a token.
+    std::string m_qualifiers;
+};
+
+
+#endif /* _WASAPARSERDRIVER_H_INCLUDED_ */
--- a/src/query/wasastringtoquery.cpp
+++ b/src/query/wasastringtoquery.cpp
@ -1,515 +0,0 @@
-/* Copyright (C) 2006 J.F.Dockes
- *   This program is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation; either version 2 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program; if not, write to the
- *   Free Software Foundation, Inc.,
- *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- */
-#ifndef TEST_WASASTRINGTOQUERY
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <regex.h>
-
-#include "smallut.h"
-#include "wasastringtoquery.h"
-
-#undef DEB_WASASTRINGTOQ
-#ifdef DEB_WASASTRINGTOQ
-#define DPRINT(X) fprintf X
-#define DUMPQ(Q) {string D;Q->describe(D);fprintf(stderr, "%s\n", D.c_str());}
-#else
-#define DPRINT(X)
-#define DUMPQ(Q)
-#endif
-
-WasaQuery::~WasaQuery()
-{
-    for (vector<WasaQuery*>::iterator it = m_subs.begin();
-	 it != m_subs.end(); it++) {
-	delete *it;
-    }
-    m_subs.clear();
-}
-
-static const char* reltosrel(WasaQuery::Rel rel)
-{
-    switch (rel) {
-    case WasaQuery::REL_EQUALS: return "=";
-    case WasaQuery::REL_CONTAINS: return ":";
-    case WasaQuery::REL_LT: return "<";
-    case WasaQuery::REL_LTE: return "<=";
-    case WasaQuery::REL_GT: return ">";
-    case WasaQuery::REL_GTE: return ">=";
-    default: return "?";
-    }
-}
-
-void WasaQuery::describe(string &desc) const
-{
-    desc += "(";
-    string fieldspec = m_fieldspec.empty() ? string() : m_fieldspec + 
-	reltosrel(m_rel);
-    switch (m_op) {
-    case OP_NULL: 
-	desc += "NULL"; 
-	break;
-    case OP_LEAF: 
-	if (m_exclude)
-	    desc += "NOT (";
-	desc += fieldspec + m_value;
-	if (m_exclude)
-	    desc += ")";
-	break;
-    case OP_OR: 
-    case OP_AND:
-	for (vector<WasaQuery *>::const_iterator it = m_subs.begin();
-	     it != m_subs.end(); it++) {
-	    (*it)->describe(desc);
-	    vector<WasaQuery *>::const_iterator it1 = it;
-	    it1++;
-	    if (it1 != m_subs.end())
-		desc += m_op == OP_OR ? "OR ": "AND ";
-	}
-	break;
-    }
-    if (desc[desc.length() - 1] == ' ')
-	desc.erase(desc.length() - 1);
-    desc += ")"; 
-    if (m_modifiers != 0) {
-	if (m_modifiers & WQM_BOOST)     desc += "BOOST|";
-	if (m_modifiers & WQM_CASESENS)  desc += "CASESENS|";
-	if (m_modifiers & WQM_DIACSENS)  desc += "DIACSENS|";
-	if (m_modifiers & WQM_FUZZY)     desc += "FUZZY|";
-	if (m_modifiers & WQM_NOSTEM)    desc += "NOSTEM|";
-	if (m_modifiers & WQM_PHRASESLACK) {
-	    char buf[100];
-	    sprintf(buf, "%d", m_slack);
-	    desc += "PHRASESLACK(" + string(buf) + string(")|");
-	}
-	if (m_modifiers & WQM_PROX)      desc += "PROX|";
-	if (m_modifiers & WQM_REGEX)     desc += "REGEX|";
-	if (m_modifiers & WQM_SLOPPY)    desc += "SLOPPY|";
-	if (m_modifiers & WQM_WORDS)     desc += "WORDS|";
-
-	if (desc.length() > 0 && desc[desc.length()-1] == '|')
-	    desc.erase(desc.length()-1);
-    }
-    desc += " ";
-}
-
-// The string query parser code:
-
-/* Shamelessly lifted from Beagle:			
- * This is our regular Expression Pattern:
- * we expect something like this:
- * -key:"Value String"modifiers
- * key:Value
- * or
- * Value
-*/
-
-/* The master regular expression used to parse a query string
- * Sub-expressions in parenthesis are numbered from 1. Each opening
- * parenthesis increases the index, but we're not interested in all
- * Deviations from standard:
- *  Relation: the standard-conformant line read as (release<1.16):
-        "(:|=|<|>|<=|>=)"            //7 Relation
-    but we are not actually making use of the relation type
-    (interpreting all as ":"), and this can product unexpected results
-    as a (ie pasted) search for nonexfield=value will silently drop
-    the nonexfield part, while the user probably was not aware of
-    triggering a field search (expecting just ':' to do this).
- */
-static const char * parserExpr = 
-    "(OR|\\|\\|)[[:space:]]*"        //1 OR,|| 
-    "|"
-    "(AND|&&)[[:space:]]*"           // 2 AND,&& (ignored, default)
-    "|"
-    "("                              //3 
-      "([+-])?"                      //4 Force or exclude indicator
-      "("                            //5
-        "([[:alpha:]][[:alnum:]:]*)" //6 Field spec: ie: "dc:title:letitre"
-        "[[:space:]]*"
-        "(:|=|>|<)"            //7 Relation
-        "[[:space:]]*)?"
-      "("                            //8
-        "(\""                        //9
-          "([^\"]+)"                 //10 "A quoted term"
-        "\")"                        
-        "([bcCdDeflLoprsw.0-9]*)"             //11 modifiers
-        "|"
-        "([^[:space:]\"]+)"          //12 ANormalTerm
-      ")"
-    ")[[:space:]]*"
-;
-
-// For debugging the parser. But see also NMATCH
-static const char *matchNames[] = {
-     /* 0*/   "",
-     /* 1*/   "OR",
-     /* 2*/   "AND",
-     /* 3*/   "",
-     /* 4*/   "+-",
-     /* 5*/   "",
-     /* 6*/   "FIELD",
-     /* 7*/   "RELATION",
-     /* 8*/   "",
-     /* 9*/   "",
-     /*10*/   "QUOTEDTERM",
-     /*11*/   "MODIFIERS",
-     /*12*/   "TERM",
-};
-#define NMATCH (sizeof(matchNames) / sizeof(char *))
-
-// Symbolic names for the interesting submatch indices
-enum SbMatchIdx {SMI_OR=1, SMI_AND=2, SMI_PM=4, SMI_FIELD=6, SMI_REL=7,
-		 SMI_QUOTED=10, SMI_MODIF=11, SMI_TERM=12};
-
-static const int maxmatchlen = 1024;
-static const int errbuflen = 300;
-
-class StringToWasaQuery::Internal {
-public:
-    Internal() 
-	: m_rxneedsfree(false)
-    {}
-    ~Internal()
-    {
-	if (m_rxneedsfree)
-	    regfree(&m_rx);
-    }
-    bool checkSubMatch(int i, char *match, string& reason)
-    {
-	if (i < 0 || i >= int(NMATCH) || m_pmatch[i].rm_so == -1) {
-	    //DPRINT((stderr, "checkSubMatch: no match: i %d rm_so %d\n", 
-	    //i, m_pmatch[i].rm_so));
-	    return false;
-	}
-	if (m_pmatch[i].rm_eo - m_pmatch[i].rm_so <= 0) {
-	    // weird and fatal
-	    reason = "Internal regular expression handling error";
-	    return false;
-	}
-	//DPRINT((stderr, "checkSubMatch: so %d eo %d\n", m_pmatch[i].rm_so, 
-	//m_pmatch[i].rm_eo));
-	memcpy(match, m_cp + m_pmatch[i].rm_so, 
-	       m_pmatch[i].rm_eo - m_pmatch[i].rm_so);
-	match[m_pmatch[i].rm_eo - m_pmatch[i].rm_so] = 0;
-	return true;
-    }
-
-    WasaQuery *stringToQuery(const string& str, string& reason);
-
-    friend class StringToWasaQuery;
-private:
-    const char *m_cp;
-    regex_t     m_rx;
-    bool        m_rxneedsfree;
-    regmatch_t  m_pmatch[NMATCH];
-};
-
-StringToWasaQuery::StringToWasaQuery() 
-    : internal(new Internal)
-{
-}
-
-StringToWasaQuery::~StringToWasaQuery()
-{
-    delete internal;
-}
-
-WasaQuery *
-StringToWasaQuery::stringToQuery(const string& str, string& reason)
-{
-    if (internal == 0)
-	return 0;
-    WasaQuery *wq = internal->stringToQuery(str, reason);
-    DUMPQ(wq);
-    return wq;
-}
-
-WasaQuery *
-StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
-{
-    if (m_rxneedsfree)
-	regfree(&m_rx);
-
-    char errbuf[errbuflen+1];
-    int errcode;
-    if ((errcode = regcomp(&m_rx, parserExpr, REG_EXTENDED)) != 0) {
-	regerror(errcode, &m_rx, errbuf, errbuflen);
-	reason = errbuf;
-	return 0;
-    }
-    m_rxneedsfree = true;
-
-    const char *cpe;
-    m_cp = str.c_str();
-    cpe = str.c_str() + str.length();
-
-    WasaQuery *query = new WasaQuery;
-    query->m_op = WasaQuery::OP_AND;
-    WasaQuery *orChain = 0;
-    bool prev_or = false;
-
-    // Loop on repeated regexp matches on the main string.
-    for (int loop = 0;;loop++) {
-	if ((errcode = regexec(&m_rx, m_cp, NMATCH, m_pmatch, 0))) {
-	    regerror(errcode, &m_rx, errbuf, errbuflen);
-	    reason = errbuf;
-	    return 0;
-	}
-	if (m_pmatch[0].rm_eo <= 0) {
-	    // weird and fatal
-	    reason = "Internal regular expression handling error";
-	    return 0;
-	}
-
-#ifdef DEB_WASASTRINGTOQ
-	DPRINT((stderr, "Next part:\n"));
-	for (unsigned int i = 0; i < NMATCH; i++) {
-	    if (m_pmatch[i].rm_so == -1) 	continue;
-	    char match[maxmatchlen+1];
-	    memcpy(match, m_cp + m_pmatch[i].rm_so,
-		   m_pmatch[i].rm_eo - m_pmatch[i].rm_so);
-	    match[m_pmatch[i].rm_eo - m_pmatch[i].rm_so] = 0;
-	    if (matchNames[i][0])
-		DPRINT((stderr, "%10s: [%s] (%d->%d)\n", matchNames[i], match, 
-			(int)m_pmatch[i].rm_so, (int)m_pmatch[i].rm_eo));
-	}
-#endif
-
-	char match[maxmatchlen+1];
-	if (checkSubMatch(SMI_OR, match, reason)) {
-	    if (prev_or) {
-		// Bad syntax
-		reason = "Bad syntax: consecutive OR";
-		return 0;
-	    }
-
-	    if (orChain == 0) {
-		// Fist OR seen: start OR subclause.
-		if ((orChain = new WasaQuery()) == 0) {
-		    reason = "Out of memory";
-		    return 0;
-		}
-		orChain->m_op = WasaQuery::OP_OR;
-	    }
-
-	    // For the first OR, we need to transfer the previous
-	    // query from the main vector to the OR subquery
-	    if (orChain->m_subs.empty() && !query->m_subs.empty()) {
-		orChain->m_subs.push_back(query->m_subs.back());
-		query->m_subs.pop_back();
-	    }
-	    prev_or = true;
-
-	} else if (checkSubMatch(SMI_AND, match, reason)) {
-	    // Do nothing, AND is the default. We might want to check for 
-	    // errors like consecutive ANDs, or OR AND
-
-	} else {
-
-	    WasaQuery *nclause = new WasaQuery;
-	    if (nclause == 0) {
-		reason = "Out of memory";
-		return 0;
-	    }
-
-	    // Check for quoted or unquoted value
-	    unsigned int mods = 0;
-	    if (checkSubMatch(SMI_QUOTED, match, reason)) {
-		nclause->m_value = match;
-                mods |= WasaQuery::WQM_QUOTED;
-	    } else if (checkSubMatch(SMI_TERM, match, reason)) {
-		nclause->m_value = match;
-	    }
-
-	    if (nclause->m_value.empty()) {
-		// Isolated +- or fieldname: without a value. Ignore until
-		// told otherwise.
-		DPRINT((stderr, "Clause with empty value, skipping\n"));
-		delete nclause;
-		goto nextfield;
-	    }
-	    
-	    if (checkSubMatch(SMI_MODIF, match, reason)) {
-		DPRINT((stderr, "Got modifiers: [%s]\n", match));
-		for (unsigned int i = 0; i < strlen(match); i++) {
-		    switch (match[i]) {
-		    case 'b': 
-			mods |= WasaQuery::WQM_BOOST; 
-			nclause->m_weight = 10.0;
-			break;
-		    case 'c': break;
-		    case 'C': mods |= WasaQuery::WQM_CASESENS; break;
-		    case 'd': break;
-		    case 'D': mods |= WasaQuery::WQM_DIACSENS; break;
-		    case 'e': mods |= WasaQuery::WQM_CASESENS | 
-			    WasaQuery::WQM_DIACSENS |  
-			    WasaQuery::WQM_NOSTEM; 
-			break;
-		    case 'f': mods |= WasaQuery::WQM_FUZZY; break;
-		    case 'l': mods |= WasaQuery::WQM_NOSTEM; break;
-		    case 'L': break;
-		    case 'o': 
-			mods |= WasaQuery::WQM_PHRASESLACK; 
-			// Default slack if specified only by 'o' is 10.
-			nclause->m_slack = 10;
-			if (i < strlen(match) - 1) {
-			    char *endptr;
-			    int slack = strtol(match+i+1, &endptr, 10);
-			    if (endptr != match+i+1) {
-				i += endptr - (match+i+1);
-				nclause->m_slack = slack;
-			    }
-			}
-			break;
-		    case 'p': 
-			mods |= WasaQuery::WQM_PROX; 
-			nclause->m_slack = 10;
-			break;
-		    case 'r': mods |= WasaQuery::WQM_REGEX; break;
-		    case 's': mods |= WasaQuery::WQM_SLOPPY; break;
-		    case 'w': mods |= WasaQuery::WQM_WORDS; break;
-		    case '.':case '0':case '1':case '2':case '3':case '4':
-		    case '5':case '6':case '7':case '8':case '9':
-		    {
-			int n;
-			float factor;
-			if (sscanf(match+i, "%f %n", &factor, &n)) {
-			    nclause->m_weight = factor;
-			    DPRINT((stderr, "Got factor %.2f len %d\n",
-				    factor, n));
-			}
-			if (n)
-			    i += n-1;
-		    }
-		    }
-		}
-	    }
-	    nclause->m_modifiers = WasaQuery::Modifier(mods);
-
-	    // Field indicator ?
-	    if (checkSubMatch(SMI_FIELD, match, reason)) {
-		// We used Check for special fields indicating sorting
-		// etc. here but this went away from the spec. See 1.4
-		// if it comes back
-		nclause->m_fieldspec = match;
-		if (checkSubMatch(SMI_REL, match, reason)) {
-		    switch (match[0]) {
-		    case '=':nclause->m_rel = WasaQuery::REL_EQUALS;break;
-		    case ':':nclause->m_rel = WasaQuery::REL_CONTAINS;break;
-		    case '<':
-			if (match[1] == '=')
-			    nclause->m_rel = WasaQuery::REL_LTE;
-			else
-			    nclause->m_rel = WasaQuery::REL_LT;
-			break;
-		    case '>':
-			if (match[1] == '=')
-			    nclause->m_rel = WasaQuery::REL_GTE;
-			else
-			    nclause->m_rel = WasaQuery::REL_GT;
-			break;
-		    default:
-			nclause->m_rel = WasaQuery::REL_CONTAINS;
-		    }
-		} else {
-		    // ?? If field matched we should have a relation
-		    nclause->m_rel = WasaQuery::REL_CONTAINS;
-		}
-	    }
-
-	    nclause->m_op = WasaQuery::OP_LEAF;
-	    // +- indicator ?
-	    if (checkSubMatch(SMI_PM, match, reason) && match[0] == '-') {
-		nclause->m_exclude = true;
-	    } else {
-		nclause->m_exclude = false;
-	    }
-
-	    if (prev_or) {
-		// The precedent token was an OR, add new clause to or chain
-		//DPRINT((stderr, "Adding to OR chain\n"));
-		orChain->m_subs.push_back(nclause);
-	    } else {
-		if (orChain) {
-		    // Getting out of OR. Add the OR subquery to the main one
-		    //DPRINT((stderr, "Adding OR chain to main\n"));
-		    query->m_subs.push_back(orChain);
-		    orChain = 0;
-		} 
-		//DPRINT((stderr, "Adding to main chain\n"));
-		// Add new clause to main query
-		query->m_subs.push_back(nclause);
-	    }
-
-	    prev_or = false;
-	}
-
-    nextfield:
-	// Advance current string position. We checked earlier that
-	// the increment is strictly positive, so we won't loop
-	// forever
-	m_cp += m_pmatch[0].rm_eo;
-	if (m_cp >= cpe)
-	    break;
-    }
-
-    if (orChain) {
-	// Getting out of OR. Add the OR subquery to the main one
-	DPRINT((stderr, "Adding OR chain to main.Before: \n"));
-	DUMPQ(query);
-	DUMPQ(orChain);
-	query->m_subs.push_back(orChain);
-    }
-
-    regfree(&m_rx);
-    m_rxneedsfree = false;
-    return query;
-}
-
-#else // TEST
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "wasastringtoquery.h"
-
-static char *thisprog;
-
-int main(int argc, char **argv)
-{
-    thisprog = argv[0];
-    argc--; argv++;
-
-    if (argc != 1) {
-	fprintf(stderr, "need one arg\n");
-	exit(1);
-    }
-    const string str = *argv++;argc--;
-    string reason;
-    StringToWasaQuery qparser;
-    WasaQuery *q = qparser.stringToQuery(str, reason);
-    if (q == 0) {
-	fprintf(stderr, "stringToQuery failed: %s\n", reason.c_str());
-	exit(1);
-    }
-    string desc;
-    q->describe(desc);
-    fprintf(stderr, "Finally: %s\n", desc.c_str());
-    exit(0);
-}
-
-#endif // TEST_WASASTRINGTOQUERY
--- a/src/query/wasastringtoquery.h
+++ b/src/query/wasastringtoquery.h
@ -1,112 +0,0 @@
-/* Copyright (C) 2006 J.F.Dockes
- *   This program is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation; either version 2 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program; if not, write to the
- *   Free Software Foundation, Inc.,
- *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- */
-#ifndef _WASASTRINGTOQUERY_H_INCLUDED_
-#define _WASASTRINGTOQUERY_H_INCLUDED_
-
-#include <string>
-#include <vector>
-
-using std::string;
-using std::vector;
-/* Note: Xesam used to be named wasabi. We changed the references to wasabi in
-   the comments, but not the code */
-
-/** 
- * A simple class to represent a parsed Xesam user language element. 
- * Can hold one leaf element or an array of subqueries to be joined by AND/OR
- *
- * The complete query is represented by a top WasaQuery holding a
- * chain of ANDed subclauses. Some of the subclauses may be themselves
- * OR'ed lists (it doesn't go deeper). Entries in the AND list may be
- * negated (AND NOT).
- *
- * For LEAF elements, the value can hold one or several words. In the
- * latter case, it should be interpreted as a phrase (comes from a
- * user-entered "quoted string"), except if the modifier flags say otherwise.
- * 
- * Some fields only make sense either for compound or LEAF queries. This 
- * is commented for each. We should subclass really.
- *
- * Note that wasaStringToQuery supposedly parses the whole Xesam 
- * User Search Language v 0.95, but that some elements are dropped or
- * ignored during the translation to a native Recoll query in wasaToRcl
- */
-class WasaQuery {
-public:
-    /** Type of this element: leaf or AND/OR chain */
-    enum Op {OP_NULL, OP_LEAF, OP_OR, OP_AND};
-    /** Relation to be searched between field and value. Recoll actually only
-	supports "contain" except for a size field */
-    enum Rel {REL_NULL, REL_EQUALS, REL_CONTAINS, REL_LT, REL_LTE, 
-	      REL_GT, REL_GTE};
-    /** Modifiers for terms: case/diacritics handling,
-	stemming control... */
-    enum Modifier {WQM_CASESENS = 1, WQM_DIACSENS = 2, WQM_NOSTEM = 4, 
-		   WQM_BOOST = 8, WQM_PROX = 0x10, WQM_SLOPPY = 0x20, 
-		   WQM_WORDS = 0x40, WQM_PHRASESLACK = 0x80, WQM_REGEX = 0x100,
-		   WQM_FUZZY = 0x200, WQM_QUOTED = 0x400};
-
-    typedef vector<WasaQuery*> subqlist_t;
-
-    WasaQuery() 
-	: m_op(OP_NULL), m_rel(REL_NULL), m_exclude(false), 
-	  m_modifiers(0), m_slack(0), m_weight(1.0)
-    {}
-
-    ~WasaQuery();
-
-    /** Get string describing the query tree from this point */
-    void describe(string &desc) const;
-
-    /** Op to be performed on either value (may be LEAF or EXCL, or subqs */
-    WasaQuery::Op      m_op;
-
-    /** Field specification if any (ie: title, author ...) Only OPT_LEAF */
-    string             m_fieldspec;
-    /** Relation between field and value: =, :, <,>,<=, >= */
-    WasaQuery::Rel     m_rel;
-
-    /* Negating flag */
-    bool             m_exclude;
-
-    /* String value. Valid for op == OP_LEAF or EXCL */
-    string             m_value;
-
-    /** Subqueries. Valid for conjunctions */
-    vector<WasaQuery*> m_subs;
-    
-    unsigned int   m_modifiers;
-    int            m_slack;
-    float          m_weight;
-};
-
-/**
- * Wasabi query string parser class. Could be a simple function
- * really, but there might be some parser initialization work done in
- * the constructor.
- */
-class StringToWasaQuery {
-public:
-    StringToWasaQuery();
-    ~StringToWasaQuery();
-    WasaQuery *stringToQuery(const string& str, string& reason);
-    class Internal;
-private:
-    Internal *internal;
-};
-
-#endif /* _WASASTRINGTOQUERY_H_INCLUDED_ */
--- a/src/query/wasatorcl.cpp
+++ b/src/query/wasatorcl.cpp
@ -1,286 +0,0 @@
-/* Copyright (C) 2006 J.F.Dockes
- *   This program is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation; either version 2 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program; if not, write to the
- *   Free Software Foundation, Inc.,
- *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- */
-#include <cstdio>
-#include <string>
-#include <list>
-#include <algorithm>
-using std::string;
-using std::list;
-
-#include "rclconfig.h"
-#include "wasastringtoquery.h"
-#include "rcldb.h"
-#include "searchdata.h"
-#include "wasatorcl.h"
-#include "debuglog.h"
-#include "smallut.h"
-#include "rclconfig.h"
-#include "refcntr.h"
-#include "textsplit.h"
-
-static Rcl::SearchData *wasaQueryToRcl(const RclConfig *config, 
-				       const string& stemlang,
-				       WasaQuery *wasa, 
-				       const string& autosuffs, string& reason)
-{
-    if (wasa == 0) {
-	reason = "NULL query";
-	return 0;
-    }
-    if (wasa->m_op != WasaQuery::OP_AND && wasa->m_op != WasaQuery::OP_OR) {
-	reason = "Top query neither AND nor OR ?";
-	LOGERR(("wasaQueryToRcl: top query neither AND nor OR!\n"));
-	return 0;
-    }
-
-    Rcl::SearchData *sdata = new 
-	Rcl::SearchData(wasa->m_op == WasaQuery::OP_AND ? Rcl::SCLT_AND : 
-			Rcl::SCLT_OR, stemlang);
-    LOGDEB2(("wasaQueryToRcl: %s chain\n", wasa->m_op == WasaQuery::OP_AND ? 
-	     "AND" : "OR"));
-
-    WasaQuery::subqlist_t::iterator it;
-    Rcl::SearchDataClause *nclause;
-
-    // Walk the list of clauses. Some pseudo-field types need special
-    // processing, which results in setting data in the top struct
-    // instead of adding a clause. We check for these first
-    for (it = wasa->m_subs.begin(); it != wasa->m_subs.end(); it++) {
-
-	if (!stringicmp("mime", (*it)->m_fieldspec) ||
-	    !stringicmp("format", (*it)->m_fieldspec)) {
-	    if ((*it)->m_op == WasaQuery::OP_LEAF) {
-		if ((*it)->m_exclude) {
-		    sdata->remFiletype((*it)->m_value);
-		} else {
-		    sdata->addFiletype((*it)->m_value);
-		}
-	    } else {
-		reason = "internal error: mime clause not leaf??";
-		return 0;
-	    }
-	    continue;
-	} 
-
-	// Xesam uses "type", we also support "rclcat", for broad
-	// categories like "audio", "presentation", etc.
-	if (!stringicmp("rclcat", (*it)->m_fieldspec) ||
-	    !stringicmp("type", (*it)->m_fieldspec)) {
-	    if ((*it)->m_op != WasaQuery::OP_LEAF) {
-		reason = "internal error: rclcat/type clause not leaf??";
-		return 0;
-	    }
-	    vector<string> mtypes;
-	    if (config && config->getMimeCatTypes((*it)->m_value, mtypes)
-		&& !mtypes.empty()) {
-		for (vector<string>::iterator mit = mtypes.begin();
-		     mit != mtypes.end(); mit++) {
-		    if ((*it)->m_exclude) {
-			sdata->remFiletype(*mit);
-		    } else {
-			sdata->addFiletype(*mit);
-		    }
-		}
-	    } else {
-		reason = "Unknown rclcat/type value: no mime types found";
-		return 0;
-	    }
-	    continue;
-	}
-
-	// Handle "date" spec
-	if (!stringicmp("date", (*it)->m_fieldspec)) {
-	    if ((*it)->m_op != WasaQuery::OP_LEAF) {
-		reason = "Negative date filtering not supported";
-		return 0;
-	    }
-	    DateInterval di;
-	    if (!parsedateinterval((*it)->m_value, &di)) {
-		LOGERR(("wasaQueryToRcl: bad date interval format\n"));
-		reason = "Bad date interval format";
-		return 0;
-	    }
-	    LOGDEB(("wasaQueryToRcl:: date span:  %d-%d-%d/%d-%d-%d\n",
-		    di.y1,di.m1,di.d1, di.y2,di.m2,di.d2));
-	    sdata->setDateSpan(&di);
-	    continue;
-	} 
-
-	// Handle "size" spec
-	if (!stringicmp("size", (*it)->m_fieldspec)) {
-	    if ((*it)->m_op != WasaQuery::OP_LEAF) {
-		reason = "Negative size filtering not supported";
-		return 0;
-	    }
-	    char *cp;
-	    size_t size = strtoll((*it)->m_value.c_str(), &cp, 10);
-	    if (*cp != 0) {
-		switch (*cp) {
-		case 'k': case 'K': size *= 1E3;break;
-		case 'm': case 'M': size *= 1E6;break;
-		case 'g': case 'G': size *= 1E9;break;
-		case 't': case 'T': size *= 1E12;break;
-		default: 
-		    reason = string("Bad multiplier suffix: ") + *cp;
-		    return 0;
-		}
-	    }
-
-	    switch ((*it)->m_rel) {
-	    case WasaQuery::REL_EQUALS:
-		sdata->setMaxSize(size);
-		sdata->setMinSize(size);
-		break;
-	    case WasaQuery::REL_LT:
-	    case WasaQuery::REL_LTE:
-		sdata->setMaxSize(size);
-		break;
-	    case WasaQuery::REL_GT: 
-	    case WasaQuery::REL_GTE:
-		sdata->setMinSize(size);
-		break;
-	    default:
-		reason = "Bad relation operator with size query. Use > < or =";
-		return 0;
-	    }
-	    continue;
-	} 
-
-	// "Regular" processing follows:
-	unsigned int mods = (unsigned int)(*it)->m_modifiers;
-	LOGDEB0(("wasaQueryToRcl: clause modifiers 0x%x\n", mods));
-	nclause = 0;
-
-	switch ((*it)->m_op) {
-	case WasaQuery::OP_NULL:
-	case WasaQuery::OP_AND:
-	default:
-	    reason = "Found bad NULL or AND query type in list";
-	    LOGERR(("wasaQueryToRcl: found bad NULL or AND q type in list\n"));
-	    continue;
-
-	case WasaQuery::OP_LEAF: {
-	    LOGDEB0(("wasaQueryToRcl: leaf clause [%s:%s] slack %d excl %d\n", 
-		     (*it)->m_fieldspec.c_str(), (*it)->m_value.c_str(),
-		     (*it)->m_slack, (*it)->m_exclude));
-
-            // Change terms found in the "autosuffs" list into "ext"
-            // field queries
-            if ((*it)->m_fieldspec.empty() && !autosuffs.empty()) {
-                vector<string> asfv;
-                if (stringToStrings(autosuffs, asfv)) {
-                    if (find_if(asfv.begin(), asfv.end(), 
-                                StringIcmpPred((*it)->m_value)) != asfv.end()) {
-                        (*it)->m_fieldspec = "ext";
-                        (*it)->m_modifiers |= WasaQuery::WQM_NOSTEM;
-                    }
-                }
-            }
-
-	    if (!stringicmp("dir", (*it)->m_fieldspec)) {
-		// dir filtering special case
-		nclause = new Rcl::SearchDataClausePath((*it)->m_value, 
-							(*it)->m_exclude);
-	    } else {
-		if ((*it)->m_exclude && wasa->m_op != WasaQuery::OP_AND) {
-		    LOGERR(("wasaQueryToRcl: excl clause inside OR list!\n"));
-		    continue;
-		}
-
-		if (mods & WasaQuery::WQM_QUOTED) {
-		    Rcl::SClType tp = (mods & WasaQuery::WQM_PROX)  ?
-			Rcl::SCLT_NEAR :
-			Rcl::SCLT_PHRASE;
-		    nclause = new Rcl::SearchDataClauseDist(tp, (*it)->m_value,
-							    (*it)->m_slack,
-							    (*it)->m_fieldspec);
-		} else {
-                    // If term has commas or slashes inside, take it
-                    // as a list, turn the slashes/commas to spaces,
-                    // leave unquoted. Otherwise, this would end up as
-                    // a phrase query. This is a handy way to enter
-                    // multiple terms to be searched inside a
-                    // field. We interpret ',' as AND, and '/' as
-                    // OR. No mixes allowed and ',' wins.
-		    Rcl::SClType tp = (*it)->m_exclude ? Rcl::SCLT_OR:
-			Rcl::SCLT_AND;
-                    string ns = neutchars((*it)->m_value, ",");
-                    if (ns.compare((*it)->m_value)) {
-                        // had ','
-                        tp = Rcl::SCLT_AND;
-                    } else {
-                        ns = neutchars((*it)->m_value, "/");
-                        if (ns.compare((*it)->m_value)) {
-                            tp = Rcl::SCLT_OR;
-                        }
-                    }
-		    nclause = new Rcl::SearchDataClauseSimple(tp, ns,
-                                                            (*it)->m_fieldspec);
-		}
-		nclause->setexclude((*it)->m_exclude);
-	    }
-
-	    if (nclause == 0) {
-		reason = "Out of memory";
-		LOGERR(("wasaQueryToRcl: out of memory\n"));
-		return 0;
-	    }
-	}
-	    break;
-	    
-	case WasaQuery::OP_OR:
-	    LOGDEB2(("wasaQueryToRcl: OR clause [%s]:[%s]\n", 
-		     (*it)->m_fieldspec.c_str(), (*it)->m_value.c_str()));
-	    // Create a subquery.
-	    Rcl::SearchData *sub = 
-		wasaQueryToRcl(config, stemlang, *it, autosuffs, reason);
-	    if (sub == 0) {
-		continue;
-	    }
-	    nclause = 
-		new Rcl::SearchDataClauseSub(RefCntr<Rcl::SearchData>(sub));
-	    if (nclause == 0) {
-		LOGERR(("wasaQueryToRcl: out of memory\n"));
-		reason = "Out of memory";
-		return 0;
-	    }
-	}
-
-	if (mods & WasaQuery::WQM_NOSTEM)
-	    nclause->addModifier(Rcl::SearchDataClause::SDCM_NOSTEMMING);
-	if (mods & WasaQuery::WQM_DIACSENS)
-	    nclause->addModifier(Rcl::SearchDataClause::SDCM_DIACSENS);
-	if (mods & WasaQuery::WQM_CASESENS)
-	    nclause->addModifier(Rcl::SearchDataClause::SDCM_CASESENS);
-	if ((*it)->m_weight != 1.0)
-	    nclause->setWeight((*it)->m_weight);
-	sdata->addClause(nclause);
-    }
-
-    return sdata;
-}
-
-Rcl::SearchData *wasaStringToRcl(const RclConfig *config, const string& stemlang,
-				 const string &qs, string &reason, 
-                                 const string& autosuffs)
-{
-    StringToWasaQuery parser;
-    WasaQuery *wq = parser.stringToQuery(qs, reason);
-    if (wq == 0) 
-	return 0;
-    return wasaQueryToRcl(config, stemlang, wq, autosuffs, reason);
-}
--- a/src/query/wasatorcl.h
+++ b/src/query/wasatorcl.h
@ -17,15 +17,18 @@

 #ifndef _WASATORCL_H_INCLUDED_
 #define _WASATORCL_H_INCLUDED_
+
 #include <string>
-using std::string;
-
-#include "rcldb.h"
-#include "searchdata.h"

+namespace Rcl {
+    class SearchData;
+}
 class RclConfig;

-extern Rcl::SearchData *wasaStringToRcl(const RclConfig *, const string& stemlang,
-					const string& query, string &reason,
-					const string& autosuffs = string());
+extern Rcl::SearchData *wasaStringToRcl(const RclConfig *, 
+                                        const std::string& stemlang,
+                                        const std::string& query, 
+                                        std::string &reason,
+                                        const std::string& autosuffs = "");
+
 #endif /* _WASATORCL_H_INCLUDED_ */
--- a/src/rcldb/searchdata.cpp
+++ b/src/rcldb/searchdata.cpp
@ -52,8 +52,6 @@ namespace Rcl {
 typedef  vector<SearchDataClause *>::iterator qlist_it_t;
 typedef  vector<SearchDataClause *>::const_iterator qlist_cit_t;

-static const int original_term_wqf_booster = 10;
-
 void SearchData::commoninit()
 {
    m_haveDates = false;
@ -74,241 +72,6 @@ SearchData::~SearchData()
 	delete *it;
 }

-// Expand categories and mime type wild card exps Categories are
-// expanded against the configuration, mimetypes against the index
-// (for wildcards).
-bool SearchData::expandFileTypes(Db &db, vector<string>& tps)
-{
-    const RclConfig *cfg = db.getConf();
-    if (!cfg) {
-	LOGFATAL(("Db::expandFileTypes: null configuration!!\n"));
-	return false;
-    }
-    vector<string> exptps;
-
-    for (vector<string>::iterator it = tps.begin(); it != tps.end(); it++) {
-	if (cfg->isMimeCategory(*it)) {
-	    vector<string>tps;
-	    cfg->getMimeCatTypes(*it, tps);
-	    exptps.insert(exptps.end(), tps.begin(), tps.end());
-	} else {
-	    TermMatchResult res;
-	    string mt = stringtolower((const string&)*it);
-	    // We set casesens|diacsens to get an equivalent of ixTermMatch()
-	    db.termMatch(Db::ET_WILD|Db::ET_CASESENS|Db::ET_DIACSENS, string(),
-			 mt, res, -1, "mtype");
-	    if (res.entries.empty()) {
-		exptps.push_back(it->c_str());
-	    } else {
-		for (vector<TermMatchEntry>::const_iterator rit = 
-			 res.entries.begin(); rit != res.entries.end(); rit++) {
-		    exptps.push_back(strip_prefix(rit->term));
-		}
-	    }
-	}
-    }
-    sort(exptps.begin(), exptps.end());
-    exptps.erase(unique(exptps.begin(), exptps.end()), exptps.end());
-
-    tps = exptps;
-    return true;
-}
-
-static const char *maxXapClauseMsg = 
-    "Maximum Xapian query size exceeded. Increase maxXapianClauses "
-    "in the configuration. ";
-static const char *maxXapClauseCaseDiacMsg = 
-    "Or try to use case (C) or diacritics (D) sensitivity qualifiers, or less "
-    "wildcards ?"
-    ;
-
-bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp, 
-				vector<SearchDataClause*>& query, 
-				string& reason, void *d)
-{
-    Xapian::Query xq;
-    for (qlist_it_t it = query.begin(); it != query.end(); it++) {
-	Xapian::Query nq;
-	if (!(*it)->toNativeQuery(db, &nq)) {
-	    LOGERR(("SearchData::clausesToQuery: toNativeQuery failed: %s\n",
-		    (*it)->getReason().c_str()));
-	    reason += (*it)->getReason() + " ";
-	    return false;
-	}	    
-        if (nq.empty()) {
-            LOGDEB(("SearchData::clausesToQuery: skipping empty clause\n"));
-            continue;
-        }
-	// If this structure is an AND list, must use AND_NOT for excl clauses.
-	// Else this is an OR list, and there can't be excl clauses (checked by
-	// addClause())
-	Xapian::Query::op op;
-	if (tp == SCLT_AND) {
-            if ((*it)->getexclude()) {
-                op =  Xapian::Query::OP_AND_NOT;
-            } else {
-                op =  Xapian::Query::OP_AND;
-            }
-	} else {
-	    op = Xapian::Query::OP_OR;
-	}
-        if (xq.empty()) {
-            if (op == Xapian::Query::OP_AND_NOT)
-                xq = Xapian::Query(op, Xapian::Query::MatchAll, nq);
-            else 
-                xq = nq;
-        } else {
-            xq = Xapian::Query(op, xq, nq);
-        }
-	if (int(xq.get_length()) >= getMaxCl()) {
-	    LOGERR(("%s\n", maxXapClauseMsg));
-	    m_reason += maxXapClauseMsg;
-	    if (!o_index_stripchars)
-		m_reason += maxXapClauseCaseDiacMsg;
-	    return false;
-	}
-    }
-
-    LOGDEB0(("SearchData::clausesToQuery: got %d clauses\n", xq.get_length()));
-
-    if (xq.empty())
-	xq = Xapian::Query::MatchAll;
-
-   *((Xapian::Query *)d) = xq;
-    return true;
-}
-
-bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
-{
-    LOGDEB(("SearchData::toNativeQuery: stemlang [%s]\n", m_stemlang.c_str()));
-    m_reason.erase();
-
-    db.getConf()->getConfParam("maxTermExpand", &m_maxexp);
-    db.getConf()->getConfParam("maxXapianClauses", &m_maxcl);
-
-    // Walk the clause list translating each in turn and building the 
-    // Xapian query tree
-    Xapian::Query xq;
-    if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq)) {
-	LOGERR(("SearchData::toNativeQuery: clausesToQuery failed. reason: %s\n", 
-		m_reason.c_str()));
-	return false;
-    }
-
-    if (m_haveDates) {
-        // If one of the extremities is unset, compute db extremas
-        if (m_dates.y1 == 0 || m_dates.y2 == 0) {
-            int minyear = 1970, maxyear = 2100;
-            if (!db.maxYearSpan(&minyear, &maxyear)) {
-                LOGERR(("Can't retrieve index min/max dates\n"));
-                //whatever, go on.
-            }
-
-            if (m_dates.y1 == 0) {
-                m_dates.y1 = minyear;
-                m_dates.m1 = 1;
-                m_dates.d1 = 1;
-            }
-            if (m_dates.y2 == 0) {
-                m_dates.y2 = maxyear;
-                m_dates.m2 = 12;
-                m_dates.d2 = 31;
-            }
-        }
-        LOGDEB(("Db::toNativeQuery: date interval: %d-%d-%d/%d-%d-%d\n",
-                m_dates.y1, m_dates.m1, m_dates.d1,
-                m_dates.y2, m_dates.m2, m_dates.d2));
-        Xapian::Query dq = date_range_filter(m_dates.y1, m_dates.m1, m_dates.d1,
-                m_dates.y2, m_dates.m2, m_dates.d2);
-        if (dq.empty()) {
-            LOGINFO(("Db::toNativeQuery: date filter is empty\n"));
-        }
-        // If no probabilistic query is provided then promote the daterange
-        // filter to be THE query instead of filtering an empty query.
-        if (xq.empty()) {
-            LOGINFO(("Db::toNativeQuery: proba query is empty\n"));
-            xq = dq;
-        } else {
-            xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, dq);
-        }
-    }
-
-
-    if (m_minSize != size_t(-1) || m_maxSize != size_t(-1)) {
-        Xapian::Query sq;
-	char min[50], max[50];
-	sprintf(min, "%lld", (long long)m_minSize);
-	sprintf(max, "%lld", (long long)m_maxSize);
-	if (m_minSize == size_t(-1)) {
-	    string value(max);
-	    leftzeropad(value, 12);
-	    sq = Xapian::Query(Xapian::Query::OP_VALUE_LE, VALUE_SIZE, value);
-	} else if (m_maxSize == size_t(-1)) {
-	    string value(min);
-	    leftzeropad(value, 12);
-	    sq = Xapian::Query(Xapian::Query::OP_VALUE_GE, VALUE_SIZE, value);
-	} else {
-	    string minvalue(min);
-	    leftzeropad(minvalue, 12);
-	    string maxvalue(max);
-	    leftzeropad(maxvalue, 12);
-	    sq = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, VALUE_SIZE, 
-			       minvalue, maxvalue);
-	}
-	    
-        // If no probabilistic query is provided then promote the
-        // filter to be THE query instead of filtering an empty query.
-        if (xq.empty()) {
-            LOGINFO(("Db::toNativeQuery: proba query is empty\n"));
-            xq = sq;
-        } else {
-            xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, sq);
-        }
-    }
-
-    // Add the autophrase if any
-    if (m_autophrase.isNotNull()) {
-	Xapian::Query apq;
-	if (m_autophrase->toNativeQuery(db, &apq)) {
-	    xq = xq.empty() ? apq : 
-		Xapian::Query(Xapian::Query::OP_AND_MAYBE, xq, apq);
-	}
-    }
-
-    // Add the file type filtering clause if any
-    if (!m_filetypes.empty()) {
-	expandFileTypes(db, m_filetypes);
-	    
-	Xapian::Query tq;
-	for (vector<string>::iterator it = m_filetypes.begin(); 
-	     it != m_filetypes.end(); it++) {
-	    string term = wrap_prefix(mimetype_prefix) + *it;
-	    LOGDEB0(("Adding file type term: [%s]\n", term.c_str()));
-	    tq = tq.empty() ? Xapian::Query(term) : 
-		Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
-	}
-	xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_FILTER, xq, tq);
-    }
-
-    // Add the neg file type filtering clause if any
-    if (!m_nfiletypes.empty()) {
-	expandFileTypes(db, m_nfiletypes);
-	    
-	Xapian::Query tq;
-	for (vector<string>::iterator it = m_nfiletypes.begin(); 
-	     it != m_nfiletypes.end(); it++) {
-	    string term = wrap_prefix(mimetype_prefix) + *it;
-	    LOGDEB0(("Adding negative file type term: [%s]\n", term.c_str()));
-	    tq = tq.empty() ? Xapian::Query(term) : 
-		Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
-	}
-	xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_AND_NOT, xq, tq);
-    }
-
-    *((Xapian::Query *)d) = xq;
-    return true;
-}
-
 // This is called by the GUI simple search if the option is set: add
 // (OR) phrase to a query (if it is simple enough) so that results
 // where the search terms are close and in order will come up on top.
@ -428,695 +191,4 @@ void SearchData::getTerms(HighlightData &hld) const
    return;
 }

-// Splitter callback for breaking a user string into simple terms and
-// phrases. This is for parts of the user entry which would appear as
-// a single word because there is no white space inside, but are
-// actually multiple terms to rcldb (ie term1,term2)
-class TextSplitQ : public TextSplitP {
- public:
-    TextSplitQ(Flags flags, const StopList &_stops, TermProc *prc)
-	: TextSplitP(prc, flags), 
-	  curnostemexp(false), stops(_stops), alltermcount(0), lastpos(0)
-    {}
-
-    bool takeword(const std::string &term, int pos, int bs, int be) 
-    {
-	// Check if the first letter is a majuscule in which
-	// case we do not want to do stem expansion. Need to do this
-	// before unac of course...
-	curnostemexp = unaciscapital(term);
-
-	return TextSplitP::takeword(term, pos, bs, be);
-    }
-
-    bool           curnostemexp;
-    vector<string> terms;
-    vector<bool>   nostemexps;
-    const StopList &stops;
-    // Count of terms including stopwords: this is for adjusting
-    // phrase/near slack
-    int alltermcount; 
-    int lastpos;
-};
-
-class TermProcQ : public TermProc {
-public:
-    TermProcQ() : TermProc(0), m_ts(0) {}
-    void setTSQ(TextSplitQ *ts) {m_ts = ts;}
-    
-    bool takeword(const std::string &term, int pos, int bs, int be) 
-    {
-	m_ts->alltermcount++;
-	if (m_ts->lastpos < pos)
-	    m_ts->lastpos = pos;
-	bool noexpand = be ? m_ts->curnostemexp : true;
-	LOGDEB1(("TermProcQ::takeword: pushing [%s] pos %d noexp %d\n", 
-		 term.c_str(), pos, noexpand));
-	if (m_terms[pos].size() < term.size()) {
-	    m_terms[pos] = term;
-	    m_nste[pos] = noexpand;
-	}
-	return true;
-    }
-    bool flush()
-    {
-	for (map<int, string>::const_iterator it = m_terms.begin();
-	     it != m_terms.end(); it++) {
-	    m_ts->terms.push_back(it->second);
-	    m_ts->nostemexps.push_back(m_nste[it->first]);
-	}
-	return true;
-    }
-private:
-    TextSplitQ *m_ts;
-    map<int, string> m_terms;
-    map<int, bool> m_nste;
-};
-
-
-#if 1
-static void listVector(const string& what, const vector<string>&l)
-{
-    string a;
-    for (vector<string>::const_iterator it = l.begin(); it != l.end(); it++) {
-        a = a + *it + " ";
-    }
-    LOGDEB0(("%s: %s\n", what.c_str(), a.c_str()));
-}
-#endif
-
-/** Expand term into term list, using appropriate mode: stem, wildcards, 
- *  diacritics... 
- *
- * @param mods stem expansion, case and diacritics sensitivity control.
- * @param term input single word
- * @param oexp output expansion list
- * @param sterm output original input term if there were no wildcards
- * @param prefix field prefix in index. We could recompute it, but the caller
- *  has it already. Used in the simple case where there is nothing to expand, 
- *  and we just return the prefixed term (else Db::termMatch deals with it).
- */
-bool SearchDataClauseSimple::expandTerm(Rcl::Db &db, 
-					string& ermsg, int mods, 
-					const string& term, 
-					vector<string>& oexp, string &sterm,
-					const string& prefix)
-{
-    LOGDEB0(("expandTerm: mods 0x%x fld [%s] trm [%s] lang [%s]\n",
-	     mods, m_field.c_str(), term.c_str(), getStemLang().c_str()));
-    sterm.clear();
-    oexp.clear();
-    if (term.empty())
-	return true;
-
-    bool maxexpissoft = false;
-    int maxexpand = getSoftMaxExp();
-    if (maxexpand != -1) {
-	maxexpissoft = true;
-    } else {
-	maxexpand = getMaxExp();
-    }
-
-    bool haswild = term.find_first_of(cstr_minwilds) != string::npos;
-
-    // If there are no wildcards, add term to the list of user-entered terms
-    if (!haswild) {
-	m_hldata.uterms.insert(term);
-        sterm = term;
-    }
-    // No stem expansion if there are wildcards or if prevented by caller
-    bool nostemexp = (mods & SDCM_NOSTEMMING) != 0;
-    if (haswild || getStemLang().empty()) {
-	LOGDEB2(("expandTerm: found wildcards or stemlang empty: no exp\n"));
-	nostemexp = true;
-    }
-
-    // noexpansion can be modified further down by possible case/diac expansion
-    bool noexpansion = nostemexp && !haswild; 
-
-    int termmatchsens = 0;
-
-    bool diac_sensitive = (mods & SDCM_DIACSENS) != 0;
-    bool case_sensitive = (mods & SDCM_CASESENS) != 0;
-
-    if (o_index_stripchars) {
-	diac_sensitive = case_sensitive = false;
-    } else {
-	// If we are working with a raw index, apply the rules for case and 
-	// diacritics sensitivity.
-
-	// If any character has a diacritic, we become
-	// diacritic-sensitive. Note that the way that the test is
-	// performed (conversion+comparison) will automatically ignore
-	// accented characters which are actually a separate letter
-	if (getAutoDiac() && unachasaccents(term)) {
-	    LOGDEB0(("expandTerm: term has accents -> diac-sensitive\n"));
-	    diac_sensitive = true;
-	}
-
-	// If any character apart the first is uppercase, we become
-	// case-sensitive.  The first character is reserved for
-	// turning off stemming. You need to use a query language
-	// modifier to search for Floor in a case-sensitive way.
-	Utf8Iter it(term);
-	it++;
-	if (getAutoCase() && unachasuppercase(term.substr(it.getBpos()))) {
-	    LOGDEB0(("expandTerm: term has uppercase -> case-sensitive\n"));
-	    case_sensitive = true;
-	}
-
-	// If we are sensitive to case or diacritics turn stemming off
-	if (diac_sensitive || case_sensitive) {
-	    LOGDEB0(("expandTerm: diac or case sens set -> stemexpand off\n"));
-	    nostemexp = true;
-	}
-
-	if (!case_sensitive || !diac_sensitive)
-	    noexpansion = false;
-    }
-
-    if (case_sensitive)
-	termmatchsens |= Db::ET_CASESENS;
-    if (diac_sensitive)
-	termmatchsens |= Db::ET_DIACSENS;
-
-    if (noexpansion) {
-	oexp.push_back(prefix + term);
-	m_hldata.terms[term] = term;
-	LOGDEB(("ExpandTerm: noexpansion: final: %s\n", stringsToString(oexp).c_str()));
-	return true;
-    } 
-
-    Db::MatchType mtyp = haswild ? Db::ET_WILD : 
-	nostemexp ? Db::ET_NONE : Db::ET_STEM;
-    TermMatchResult res;
-    if (!db.termMatch(mtyp | termmatchsens, getStemLang(), term, res, maxexpand,
-		      m_field)) {
-	// Let it go through
-    }
-
-    // Term match entries to vector of terms
-    if (int(res.entries.size()) >= maxexpand && !maxexpissoft) {
-	ermsg = "Maximum term expansion size exceeded."
-	    " Maybe use case/diacritics sensitivity or increase maxTermExpand.";
-	return false;
-    }
-    for (vector<TermMatchEntry>::const_iterator it = res.entries.begin(); 
-	 it != res.entries.end(); it++) {
-	oexp.push_back(it->term);
-    }
-    // If the term does not exist at all in the db, the return from
-    // termMatch() is going to be empty, which is not what we want (we
-    // would then compute an empty Xapian query)
-    if (oexp.empty())
-	oexp.push_back(prefix + term);
-
-    // Remember the uterm-to-expansion links
-    for (vector<string>::const_iterator it = oexp.begin(); 
-	 it != oexp.end(); it++) {
-	m_hldata.terms[strip_prefix(*it)] = term;
-    }
-    LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
-    return true;
-}
-
-// Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d
-void multiply_groups(vector<vector<string> >::const_iterator vvit,
-		     vector<vector<string> >::const_iterator vvend, 
-		     vector<string>& comb,
-		     vector<vector<string> >&allcombs)
-{
-    // Remember my string vector and compute next, for recursive calls.
-    vector<vector<string> >::const_iterator myvit = vvit++;
-
-    // Walk the string vector I'm called upon and, for each string,
-    // add it to current result, an call myself recursively on the
-    // next string vector. The last call (last element of the vector of
-    // vectors), adds the elementary result to the output
-
-    // Walk my string vector
-    for (vector<string>::const_iterator strit = (*myvit).begin();
-	 strit != (*myvit).end(); strit++) {
-
-	// Add my current value to the string vector we're building
-	comb.push_back(*strit);
-
-	if (vvit == vvend) {
-	    // Last call: store current result
-	    allcombs.push_back(comb);
-	} else {
-	    // Call recursively on next string vector
-	    multiply_groups(vvit, vvend, comb, allcombs);
-	}
-	// Pop the value I just added (make room for the next element in my
-	// vector)
-	comb.pop_back();
-    }
-}
-
-void SearchDataClauseSimple::processSimpleSpan(Rcl::Db &db, string& ermsg,
-					       const string& span, 
-					       int mods, void * pq)
-{
-    vector<Xapian::Query>& pqueries(*(vector<Xapian::Query>*)pq);
-    LOGDEB0(("StringToXapianQ::processSimpleSpan: [%s] mods 0x%x\n",
-	    span.c_str(), (unsigned int)mods));
-    vector<string> exp;  
-    string sterm; // dumb version of user term
-
-    string prefix;
-    const FieldTraits *ftp;
-    if (!m_field.empty() && db.fieldToTraits(m_field, &ftp, true)) {
-	prefix = wrap_prefix(ftp->pfx);
-    }
-
-    if (!expandTerm(db, ermsg, mods, span, exp, sterm, prefix))
-	return;
-    
-    // Set up the highlight data. No prefix should go in there
-    for (vector<string>::const_iterator it = exp.begin(); 
-	 it != exp.end(); it++) {
-	m_hldata.groups.push_back(vector<string>(1, it->substr(prefix.size())));
-	m_hldata.slacks.push_back(0);
-	m_hldata.grpsugidx.push_back(m_hldata.ugroups.size() - 1);
-    }
-
-    // Push either term or OR of stem-expanded set
-    Xapian::Query xq(Xapian::Query::OP_OR, exp.begin(), exp.end());
-    m_curcl += exp.size();
-
-    // If sterm (simplified original user term) is not null, give it a
-    // relevance boost. We do this even if no expansion occurred (else
-    // the non-expanded terms in a term list would end-up with even
-    // less wqf). This does not happen if there are wildcards anywhere
-    // in the search.
-    // We normally boost the original term in the stem expansion list. Don't
-    // do it if there are wildcards anywhere, this would skew the results.
-    bool doBoostUserTerm = 
-	(m_parentSearch && !m_parentSearch->haveWildCards()) || 
-	(m_parentSearch == 0 && !m_haveWildCards);
-    if (doBoostUserTerm && !sterm.empty()) {
-        xq = Xapian::Query(Xapian::Query::OP_OR, xq, 
-			   Xapian::Query(prefix+sterm, 
-					 original_term_wqf_booster));
-    }
-    pqueries.push_back(xq);
-}
-
-// User entry element had several terms: transform into a PHRASE or
-// NEAR xapian query, the elements of which can themselves be OR
-// queries if the terms get expanded by stemming or wildcards (we
-// don't do stemming for PHRASE though)
-void SearchDataClauseSimple::processPhraseOrNear(Rcl::Db &db, string& ermsg, 
-						 TextSplitQ *splitData, 
-						 int mods, void *pq,
-						 bool useNear, int slack)
-{
-    vector<Xapian::Query> &pqueries(*(vector<Xapian::Query>*)pq);
-    Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR : 
-	Xapian::Query::OP_PHRASE;
-    vector<Xapian::Query> orqueries;
-#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
-    bool hadmultiple = false;
-#endif
-    vector<vector<string> >groups;
-
-    string prefix;
-    const FieldTraits *ftp;
-    if (!m_field.empty() && db.fieldToTraits(m_field, &ftp, true)) {
-	prefix = wrap_prefix(ftp->pfx);
-    }
-
-    if (mods & Rcl::SearchDataClause::SDCM_ANCHORSTART) {
-	orqueries.push_back(Xapian::Query(prefix + start_of_field_term));
-	slack++;
-    }
-
-    // Go through the list and perform stem/wildcard expansion for each element
-    vector<bool>::iterator nxit = splitData->nostemexps.begin();
-    for (vector<string>::iterator it = splitData->terms.begin();
-	 it != splitData->terms.end(); it++, nxit++) {
-	LOGDEB0(("ProcessPhrase: processing [%s]\n", it->c_str()));
-	// Adjust when we do stem expansion. Not if disabled by
-	// caller, not inside phrases, and some versions of xapian
-	// will accept only one OR clause inside NEAR.
-	bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE) 
-#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
-	    || hadmultiple
-#endif // single OR inside NEAR
-	    ;
-	int lmods = mods;
-	if (nostemexp)
-	    lmods |= SearchDataClause::SDCM_NOSTEMMING;
-	string sterm;
-	vector<string> exp;
-	if (!expandTerm(db, ermsg, lmods, *it, exp, sterm, prefix))
-	    return;
-	LOGDEB0(("ProcessPhraseOrNear: exp size %d\n", exp.size()));
-	listVector("", exp);
-	// groups is used for highlighting, we don't want prefixes in there.
-	vector<string> noprefs;
-	for (vector<string>::const_iterator it = exp.begin(); 
-	     it != exp.end(); it++) {
-	    noprefs.push_back(it->substr(prefix.size()));
-	}
-	groups.push_back(noprefs);
-	orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 
-					  exp.begin(), exp.end()));
-	m_curcl += exp.size();
-	if (m_curcl >= getMaxCl())
-	    return;
-#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
-	if (exp.size() > 1) 
-	    hadmultiple = true;
-#endif
-    }
-
-    if (mods & Rcl::SearchDataClause::SDCM_ANCHOREND) {
-	orqueries.push_back(Xapian::Query(prefix + end_of_field_term));
-	slack++;
-    }
-
-    // Generate an appropriate PHRASE/NEAR query with adjusted slack
-    // For phrases, give a relevance boost like we do for original terms
-    LOGDEB2(("PHRASE/NEAR:  alltermcount %d lastpos %d\n", 
-             splitData->alltermcount, splitData->lastpos));
-    Xapian::Query xq(op, orqueries.begin(), orqueries.end(),
-		     splitData->lastpos + 1 + slack);
-    if (op == Xapian::Query::OP_PHRASE)
-	xq = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, xq, 
-			   original_term_wqf_booster);
-    pqueries.push_back(xq);
-
-    // Add all combinations of NEAR/PHRASE groups to the highlighting data. 
-    vector<vector<string> > allcombs;
-    vector<string> comb;
-    multiply_groups(groups.begin(), groups.end(), comb, allcombs);
-    
-    // Insert the search groups and slacks in the highlight data, with
-    // a reference to the user entry that generated them:
-    m_hldata.groups.insert(m_hldata.groups.end(), 
-			   allcombs.begin(), allcombs.end());
-    m_hldata.slacks.insert(m_hldata.slacks.end(), allcombs.size(), slack);
-    m_hldata.grpsugidx.insert(m_hldata.grpsugidx.end(), allcombs.size(), 
-			      m_hldata.ugroups.size() - 1);
-}
-
-// Trim string beginning with ^ or ending with $ and convert to flags
-static int stringToMods(string& s)
-{
-    int mods = 0;
-    // Check for an anchored search
-    trimstring(s);
-    if (s.length() > 0 && s[0] == '^') {
-	mods |= Rcl::SearchDataClause::SDCM_ANCHORSTART;
-	s.erase(0, 1);
-    }
-    if (s.length() > 0 && s[s.length()-1] == '$') {
-	mods |= Rcl::SearchDataClause::SDCM_ANCHOREND;
-	s.erase(s.length()-1);
-    }
-    return mods;
-}
-
-/** 
- * Turn user entry string (NOT query language) into a list of xapian queries.
- * We just separate words and phrases, and do wildcard and stem expansion,
- *
- * This is used to process data entered into an OR/AND/NEAR/PHRASE field of
- * the GUI (in the case of NEAR/PHRASE, clausedist adds dquotes to the user
- * entry).
- *
- * This appears awful, and it would seem that the split into
- * terms/phrases should be performed in the upper layer so that we
- * only receive pure term or near/phrase pure elements here, but in
- * fact there are things that would appear like terms to naive code,
- * and which will actually may be turned into phrases (ie: tom:jerry),
- * in a manner which intimately depends on the index implementation,
- * so that it makes sense to process this here.
- *
- * The final list contains one query for each term or phrase
- *   - Elements corresponding to a stem-expanded part are an OP_OR
- *     composition of the stem-expanded terms (or a single term query).
- *   - Elements corresponding to phrase/near are an OP_PHRASE/NEAR
- *     composition of the phrase terms (no stem expansion in this case)
- * @return the subquery count (either or'd stem-expanded terms or phrase word
- *   count)
- */
-bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
-					       string &ermsg, void *pq, 
-					       int slack, bool useNear)
-{
-    vector<Xapian::Query> &pqueries(*(vector<Xapian::Query>*)pq);
-    int mods = m_modifiers;
-
-    LOGDEB(("StringToXapianQ:pUS:: qstr [%s] fld [%s] mods 0x%x "
-	    "slack %d near %d\n", 
-	    iq.c_str(), m_field.c_str(), mods, slack, useNear));
-    ermsg.erase();
-    m_curcl = 0;
-    const StopList stops = db.getStopList();
-
-    // Simple whitespace-split input into user-level words and
-    // double-quoted phrases: word1 word2 "this is a phrase". 
-    //
-    // The text splitter may further still decide that the resulting
-    // "words" are really phrases, this depends on separators:
-    // [paul@dom.net] would still be a word (span), but [about:me]
-    // will probably be handled as a phrase.
-    vector<string> phrases;
-    TextSplit::stringToStrings(iq, phrases);
-
-    // Process each element: textsplit into terms, handle stem/wildcard 
-    // expansion and transform into an appropriate Xapian::Query
-    try {
-	for (vector<string>::iterator it = phrases.begin(); 
-	     it != phrases.end(); it++) {
-	    LOGDEB0(("strToXapianQ: phrase/word: [%s]\n", it->c_str()));
-	    // Anchoring modifiers
-	    int amods = stringToMods(*it);
-	    int terminc = amods != 0 ? 1 : 0;
-	    mods |= amods;
-	    // If there are multiple spans in this element, including
-	    // at least one composite, we have to increase the slack
-	    // else a phrase query including a span would fail. 
-	    // Ex: "term0@term1 term2" is onlyspans-split as:
-	    //   0 term0@term1             0   12
-	    //   2 term2                  13   18
-	    // The position of term2 is 2, not 1, so a phrase search
-	    // would fail.
-	    // We used to do  word split, searching for 
-	    // "term0 term1 term2" instead, which may have worse 
-	    // performance, but will succeed.
-	    // We now adjust the phrase/near slack by comparing the term count
-	    // and the last position
-
-	    // The term processing pipeline:
-	    TermProcQ tpq;
-	    TermProc *nxt = &tpq;
-            TermProcStop tpstop(nxt, stops); nxt = &tpstop;
-            //TermProcCommongrams tpcommon(nxt, stops); nxt = &tpcommon;
-            //tpcommon.onlygrams(true);
-	    TermProcPrep tpprep(nxt);
-	    if (o_index_stripchars)
-		nxt = &tpprep;
-
-	    TextSplitQ splitter(TextSplit::Flags(TextSplit::TXTS_ONLYSPANS | 
-						 TextSplit::TXTS_KEEPWILD), 
-				stops, nxt);
-	    tpq.setTSQ(&splitter);
-	    splitter.text_to_words(*it);
-
-	    slack += splitter.lastpos - splitter.terms.size() + 1;
-
-	    LOGDEB0(("strToXapianQ: termcount: %d\n", splitter.terms.size()));
-	    switch (splitter.terms.size() + terminc) {
-	    case 0: 
-		continue;// ??
-	    case 1: {
-		int lmods = mods;
-		if (splitter.nostemexps.front())
-		    lmods |= SearchDataClause::SDCM_NOSTEMMING;
-		m_hldata.ugroups.push_back(splitter.terms);
-		processSimpleSpan(db, ermsg, splitter.terms.front(),
-				  lmods, &pqueries);
-	    }
-		break;
-	    default:
-		m_hldata.ugroups.push_back(splitter.terms);
-		processPhraseOrNear(db, ermsg, &splitter, mods, &pqueries,
-				    useNear, slack);
-	    }
-	    if (m_curcl >= getMaxCl()) {
-		ermsg = maxXapClauseMsg;
-		if (!o_index_stripchars)
-		    ermsg += maxXapClauseCaseDiacMsg;
-		break;
-	    }
-	}
-    } catch (const Xapian::Error &e) {
-	ermsg = e.get_msg();
-    } catch (const string &s) {
-	ermsg = s;
-    } catch (const char *s) {
-	ermsg = s;
-    } catch (...) {
-	ermsg = "Caught unknown exception";
-    }
-    if (!ermsg.empty()) {
-	LOGERR(("stringToXapianQueries: %s\n", ermsg.c_str()));
-	return false;
-    }
-    return true;
-}
-
-// Translate a simple OR or AND search clause. 
-bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)
-{
-    LOGDEB2(("SearchDataClauseSimple::toNativeQuery: stemlang [%s]\n",
-	     getStemLang().c_str()));
-
-    Xapian::Query *qp = (Xapian::Query *)p;
-    *qp = Xapian::Query();
-
-    Xapian::Query::op op;
-    switch (m_tp) {
-    case SCLT_AND: op = Xapian::Query::OP_AND; break;
-    case SCLT_OR: op = Xapian::Query::OP_OR; break;
-    default:
-	LOGERR(("SearchDataClauseSimple: bad m_tp %d\n", m_tp));
-	return false;
-    }
-
-    vector<Xapian::Query> pqueries;
-    if (!processUserString(db, m_text, m_reason, &pqueries))
-	return false;
-    if (pqueries.empty()) {
-	LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
-	return true;
-    }
-
-    *qp = Xapian::Query(op, pqueries.begin(), pqueries.end());
-    if  (m_weight != 1.0) {
-	*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
-    }
-    return true;
-}
-
-// Translate a FILENAME search clause. This always comes
-// from a "filename" search from the gui or recollq. A query language
-// "filename:"-prefixed field will not go through here, but through
-// the generic field-processing code.
-//
-// We do not split the entry any more (used to do some crazy thing
-// about expanding multiple fragments in the past). We just take the
-// value blanks and all and expand this against the indexed unsplit
-// file names
-bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p)
-{
-    Xapian::Query *qp = (Xapian::Query *)p;
-    *qp = Xapian::Query();
-
-    int maxexp = getSoftMaxExp();
-    if (maxexp == -1)
-	maxexp = getMaxExp();
-
-    vector<string> names;
-    db.filenameWildExp(m_text, names, maxexp);
-    *qp = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());
-
-    if (m_weight != 1.0) {
-	*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
-    }
-    return true;
-}
-
-// Translate a dir: path filtering clause. See comments in .h
-bool SearchDataClausePath::toNativeQuery(Rcl::Db &db, void *p)
-{
-    LOGDEB(("SearchDataClausePath::toNativeQuery: [%s]\n", m_text.c_str()));
-    Xapian::Query *qp = (Xapian::Query *)p;
-    *qp = Xapian::Query();
-
-    if (m_text.empty()) {
-	LOGERR(("SearchDataClausePath: empty path??\n"));
-	m_reason = "Empty path ?";
-	return false;
-    }
-
-    vector<Xapian::Query> orqueries;
-
-    if (m_text[0] == '/')
-	orqueries.push_back(Xapian::Query(wrap_prefix(pathelt_prefix)));
-    else
-        m_text = path_tildexpand(m_text);
-
-    vector<string> vpath;
-    stringToTokens(m_text, vpath, "/");
-
-    for (vector<string>::const_iterator pit = vpath.begin(); 
-	 pit != vpath.end(); pit++){
-
-	string sterm;
-	vector<string> exp;
-	if (!expandTerm(db, m_reason, 
-			SDCM_NOSTEMMING|SDCM_CASESENS|SDCM_DIACSENS,
-			*pit, exp, sterm, wrap_prefix(pathelt_prefix))) {
-	    return false;
-	}
-	LOGDEB0(("SDataPath::toNative: exp size %d\n", exp.size()));
-	listVector("", exp);
-	if (exp.size() == 1)
-	    orqueries.push_back(Xapian::Query(exp[0]));
-	else 
-	    orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 
-					      exp.begin(), exp.end()));
-	m_curcl += exp.size();
-	if (m_curcl >= getMaxCl())
-	    return false;
-    }
-
-    *qp = Xapian::Query(Xapian::Query::OP_PHRASE, 
-			orqueries.begin(), orqueries.end());
-
-    if (m_weight != 1.0) {
-	*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
-    }
-    return true;
-}
-
-// Translate NEAR or PHRASE clause. 
-bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p)
-{
-    LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));
-
-    Xapian::Query *qp = (Xapian::Query *)p;
-    *qp = Xapian::Query();
-
-    vector<Xapian::Query> pqueries;
-    Xapian::Query nq;
-
-    // We produce a single phrase out of the user entry then use
-    // stringToXapianQueries() to lowercase and simplify the phrase
-    // terms etc. This will result into a single (complex)
-    // Xapian::Query.
-    if (m_text.find('\"') != string::npos) {
-	m_text = neutchars(m_text, "\"");
-    }
-    string s = cstr_dquote + m_text + cstr_dquote;
-    bool useNear = (m_tp == SCLT_NEAR);
-    if (!processUserString(db, s, m_reason, &pqueries, m_slack, useNear))
-	return false;
-    if (pqueries.empty()) {
-	LOGERR(("SearchDataClauseDist: resolved to null query\n"));
-	return true;
-    }
-
-    *qp = *pqueries.begin();
-    if (m_weight != 1.0) {
-	*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
-    }
-    return true;
-}
-
 } // Namespace Rcl
--- a/src/rcldb/searchdata.h
+++ b/src/rcldb/searchdata.h
@ -102,7 +102,7 @@ public:
    bool toNativeQuery(Rcl::Db &db, void *);

    /** We become the owner of cl and will delete it */
-    bool addClause(SearchDataClause *cl);
+    bool addClause(SearchDataClause* cl);

    /** If this is a simple query (one field only, no distance clauses),
     * add phrase made of query terms to query, so that docs containing the
@ -164,7 +164,7 @@ public:
 private:
    // Combine type. Only SCLT_AND or SCLT_OR here
    SClType                   m_tp; 
-    // Complex query descriptor
+    // The clauses
    std::vector<SearchDataClause*> m_query;
    // Restricted set of filetypes if not empty.
    std::vector<std::string>            m_filetypes; 
@ -173,14 +173,18 @@ private:
    // Autophrase if set. Can't be part of the normal chain because
    // it uses OP_AND_MAYBE
    RefCntr<SearchDataClauseDist>   m_autophrase;
-    // 
+
+    // Special stuff produced by input which looks like a clause but means
+    // something else (date and size specs)
    bool                      m_haveDates;
    DateInterval              m_dates; // Restrict to date interval
    size_t                    m_maxSize;
    size_t                    m_minSize;
+
    // Printable expanded version of the complete query, retrieved/set
    // from rcldb after the Xapian::setQuery() call
    std::string m_description; 
+    // Error diag
    std::string m_reason;
    bool   m_haveWildCards;
    std::string m_stemlang;
@ -215,10 +219,12 @@ class SearchDataClause {
 public:
    enum Modifier {SDCM_NONE=0, SDCM_NOSTEMMING=1, SDCM_ANCHORSTART=2,
 		   SDCM_ANCHOREND=4, SDCM_CASESENS=8, SDCM_DIACSENS=16};
+    enum Relation {REL_CONTAINS, REL_EQUALS, REL_LT, REL_LTE, REL_GT, REL_GTE};

    SearchDataClause(SClType tp) 
    : m_tp(tp), m_parentSearch(0), m_haveWildCards(0), 
-      m_modifiers(SDCM_NONE), m_weight(1.0), m_exclude(false)
+      m_modifiers(SDCM_NONE), m_weight(1.0), m_exclude(false), 
+      m_rel(REL_CONTAINS)
    {}
    virtual ~SearchDataClause() {}
    virtual bool toNativeQuery(Rcl::Db &db, void *) = 0;
@ -230,6 +236,9 @@ public:
    {
 	return m_tp;
    }
+    void setTp(SClType tp) {
+        m_tp = tp;
+    }
    void setParent(SearchData *p) 
    {
 	m_parentSearch = p;
@ -279,7 +288,12 @@ public:
    {
 	m_exclude = onoff;
    }
-
+    virtual void setrel(Relation rel) {
+        m_rel = rel;
+    }
+    virtual Relation getrel() {
+        return m_rel;
+    }
    friend class SearchData;
 protected:
    std::string      m_reason;
@ -289,6 +303,8 @@ protected:
    Modifier    m_modifiers;
    float       m_weight;
    bool        m_exclude;
+    Relation    m_rel;
+
 private:
    SearchDataClause(const SearchDataClause&) 
    {
@ -339,13 +355,15 @@ public:
    {
 	return m_field;
    }
+    virtual void setfield(const string& field) {
+        m_field = field;
+    }
 protected:
    std::string  m_text;  // Raw user entry text.
    std::string  m_field; // Field specification if any
    HighlightData m_hldata;
    // Current count of Xapian clauses, to check against expansion limit
    int  m_curcl;
-
    bool processUserString(Rcl::Db &db, const string &iq,
 			   std::string &ermsg,
 			   void* pq, int slack = 0, bool useNear = false);
@ -444,6 +462,9 @@ public:
    {
 	return m_slack;
    }
+    virtual void setslack(int slack) {
+        m_slack = slack;
+    }
 private:
    int m_slack;
 };
--- a/src/rcldb/searchdatatox.cpp
+++ b/src/rcldb/searchdatatox.cpp
@ -0,0 +1,983 @@
+/* Copyright (C) 2006 J.F.Dockes
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the
+ *   Free Software Foundation, Inc.,
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+// Handle translation from rcl's SearchData structures to Xapian Queries
+
+#include "autoconfig.h"
+
+#include <stdio.h>
+
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <sstream>
+using namespace std;
+
+#include "xapian.h"
+
+#include "cstr.h"
+#include "rcldb.h"
+#include "rcldb_p.h"
+#include "searchdata.h"
+#include "debuglog.h"
+#include "smallut.h"
+#include "textsplit.h"
+#include "unacpp.h"
+#include "utf8iter.h"
+#include "stoplist.h"
+#include "rclconfig.h"
+#include "termproc.h"
+#include "synfamily.h"
+#include "stemdb.h"
+#include "expansiondbs.h"
+#include "base64.h"
+#include "daterange.h"
+
+namespace Rcl {
+
+typedef  vector<SearchDataClause *>::iterator qlist_it_t;
+
+static const int original_term_wqf_booster = 10;
+
+// Expand categories and mime type wild card exps Categories are
+// expanded against the configuration, mimetypes against the index
+// (for wildcards).
+bool SearchData::expandFileTypes(Db &db, vector<string>& tps)
+{
+    const RclConfig *cfg = db.getConf();
+    if (!cfg) {
+	LOGFATAL(("Db::expandFileTypes: null configuration!!\n"));
+	return false;
+    }
+    vector<string> exptps;
+
+    for (vector<string>::iterator it = tps.begin(); it != tps.end(); it++) {
+	if (cfg->isMimeCategory(*it)) {
+	    vector<string>tps;
+	    cfg->getMimeCatTypes(*it, tps);
+	    exptps.insert(exptps.end(), tps.begin(), tps.end());
+	} else {
+	    TermMatchResult res;
+	    string mt = stringtolower((const string&)*it);
+	    // We set casesens|diacsens to get an equivalent of ixTermMatch()
+	    db.termMatch(Db::ET_WILD|Db::ET_CASESENS|Db::ET_DIACSENS, string(),
+			 mt, res, -1, "mtype");
+	    if (res.entries.empty()) {
+		exptps.push_back(it->c_str());
+	    } else {
+		for (vector<TermMatchEntry>::const_iterator rit = 
+			 res.entries.begin(); rit != res.entries.end(); rit++) {
+		    exptps.push_back(strip_prefix(rit->term));
+		}
+	    }
+	}
+    }
+    sort(exptps.begin(), exptps.end());
+    exptps.erase(unique(exptps.begin(), exptps.end()), exptps.end());
+
+    tps = exptps;
+    return true;
+}
+
+static const char *maxXapClauseMsg = 
+    "Maximum Xapian query size exceeded. Increase maxXapianClauses "
+    "in the configuration. ";
+static const char *maxXapClauseCaseDiacMsg = 
+    "Or try to use case (C) or diacritics (D) sensitivity qualifiers, or less "
+    "wildcards ?"
+    ;
+
+bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp, 
+				vector<SearchDataClause*>& query, 
+				string& reason, void *d)
+{
+    Xapian::Query xq;
+    for (qlist_it_t it = query.begin(); it != query.end(); it++) {
+	Xapian::Query nq;
+	if (!(*it)->toNativeQuery(db, &nq)) {
+	    LOGERR(("SearchData::clausesToQuery: toNativeQuery failed: %s\n",
+		    (*it)->getReason().c_str()));
+	    reason += (*it)->getReason() + " ";
+	    return false;
+	}	    
+        if (nq.empty()) {
+            LOGDEB(("SearchData::clausesToQuery: skipping empty clause\n"));
+            continue;
+        }
+	// If this structure is an AND list, must use AND_NOT for excl clauses.
+	// Else this is an OR list, and there can't be excl clauses (checked by
+	// addClause())
+	Xapian::Query::op op;
+	if (tp == SCLT_AND) {
+            if ((*it)->getexclude()) {
+                op =  Xapian::Query::OP_AND_NOT;
+            } else {
+                op =  Xapian::Query::OP_AND;
+            }
+	} else {
+	    op = Xapian::Query::OP_OR;
+	}
+        if (xq.empty()) {
+            if (op == Xapian::Query::OP_AND_NOT)
+                xq = Xapian::Query(op, Xapian::Query::MatchAll, nq);
+            else 
+                xq = nq;
+        } else {
+            xq = Xapian::Query(op, xq, nq);
+        }
+	if (int(xq.get_length()) >= getMaxCl()) {
+	    LOGERR(("%s\n", maxXapClauseMsg));
+	    m_reason += maxXapClauseMsg;
+	    if (!o_index_stripchars)
+		m_reason += maxXapClauseCaseDiacMsg;
+	    return false;
+	}
+    }
+
+    LOGDEB0(("SearchData::clausesToQuery: got %d clauses\n", xq.get_length()));
+
+    if (xq.empty())
+	xq = Xapian::Query::MatchAll;
+
+   *((Xapian::Query *)d) = xq;
+    return true;
+}
+
+bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
+{
+    LOGDEB(("SearchData::toNativeQuery: stemlang [%s]\n", m_stemlang.c_str()));
+    m_reason.erase();
+
+    db.getConf()->getConfParam("maxTermExpand", &m_maxexp);
+    db.getConf()->getConfParam("maxXapianClauses", &m_maxcl);
+
+    // Walk the clause list translating each in turn and building the 
+    // Xapian query tree
+    Xapian::Query xq;
+    if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq)) {
+	LOGERR(("SearchData::toNativeQuery: clausesToQuery failed. reason: %s\n", 
+		m_reason.c_str()));
+	return false;
+    }
+
+    if (m_haveDates) {
+        // If one of the extremities is unset, compute db extremas
+        if (m_dates.y1 == 0 || m_dates.y2 == 0) {
+            int minyear = 1970, maxyear = 2100;
+            if (!db.maxYearSpan(&minyear, &maxyear)) {
+                LOGERR(("Can't retrieve index min/max dates\n"));
+                //whatever, go on.
+            }
+
+            if (m_dates.y1 == 0) {
+                m_dates.y1 = minyear;
+                m_dates.m1 = 1;
+                m_dates.d1 = 1;
+            }
+            if (m_dates.y2 == 0) {
+                m_dates.y2 = maxyear;
+                m_dates.m2 = 12;
+                m_dates.d2 = 31;
+            }
+        }
+        LOGDEB(("Db::toNativeQuery: date interval: %d-%d-%d/%d-%d-%d\n",
+                m_dates.y1, m_dates.m1, m_dates.d1,
+                m_dates.y2, m_dates.m2, m_dates.d2));
+        Xapian::Query dq = date_range_filter(m_dates.y1, m_dates.m1, m_dates.d1,
+                m_dates.y2, m_dates.m2, m_dates.d2);
+        if (dq.empty()) {
+            LOGINFO(("Db::toNativeQuery: date filter is empty\n"));
+        }
+        // If no probabilistic query is provided then promote the daterange
+        // filter to be THE query instead of filtering an empty query.
+        if (xq.empty()) {
+            LOGINFO(("Db::toNativeQuery: proba query is empty\n"));
+            xq = dq;
+        } else {
+            xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, dq);
+        }
+    }
+
+
+    if (m_minSize != size_t(-1) || m_maxSize != size_t(-1)) {
+        Xapian::Query sq;
+	char min[50], max[50];
+	sprintf(min, "%lld", (long long)m_minSize);
+	sprintf(max, "%lld", (long long)m_maxSize);
+	if (m_minSize == size_t(-1)) {
+	    string value(max);
+	    leftzeropad(value, 12);
+	    sq = Xapian::Query(Xapian::Query::OP_VALUE_LE, VALUE_SIZE, value);
+	} else if (m_maxSize == size_t(-1)) {
+	    string value(min);
+	    leftzeropad(value, 12);
+	    sq = Xapian::Query(Xapian::Query::OP_VALUE_GE, VALUE_SIZE, value);
+	} else {
+	    string minvalue(min);
+	    leftzeropad(minvalue, 12);
+	    string maxvalue(max);
+	    leftzeropad(maxvalue, 12);
+	    sq = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, VALUE_SIZE, 
+			       minvalue, maxvalue);
+	}
+	    
+        // If no probabilistic query is provided then promote the
+        // filter to be THE query instead of filtering an empty query.
+        if (xq.empty()) {
+            LOGINFO(("Db::toNativeQuery: proba query is empty\n"));
+            xq = sq;
+        } else {
+            xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, sq);
+        }
+    }
+
+    // Add the autophrase if any
+    if (m_autophrase.isNotNull()) {
+	Xapian::Query apq;
+	if (m_autophrase->toNativeQuery(db, &apq)) {
+	    xq = xq.empty() ? apq : 
+		Xapian::Query(Xapian::Query::OP_AND_MAYBE, xq, apq);
+	}
+    }
+
+    // Add the file type filtering clause if any
+    if (!m_filetypes.empty()) {
+	expandFileTypes(db, m_filetypes);
+	    
+	Xapian::Query tq;
+	for (vector<string>::iterator it = m_filetypes.begin(); 
+	     it != m_filetypes.end(); it++) {
+	    string term = wrap_prefix(mimetype_prefix) + *it;
+	    LOGDEB0(("Adding file type term: [%s]\n", term.c_str()));
+	    tq = tq.empty() ? Xapian::Query(term) : 
+		Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
+	}
+	xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_FILTER, xq, tq);
+    }
+
+    // Add the neg file type filtering clause if any
+    if (!m_nfiletypes.empty()) {
+	expandFileTypes(db, m_nfiletypes);
+	    
+	Xapian::Query tq;
+	for (vector<string>::iterator it = m_nfiletypes.begin(); 
+	     it != m_nfiletypes.end(); it++) {
+	    string term = wrap_prefix(mimetype_prefix) + *it;
+	    LOGDEB0(("Adding negative file type term: [%s]\n", term.c_str()));
+	    tq = tq.empty() ? Xapian::Query(term) : 
+		Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
+	}
+	xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_AND_NOT, xq, tq);
+    }
+
+    *((Xapian::Query *)d) = xq;
+    return true;
+}
+
+// Splitter callback for breaking a user string into simple terms and
+// phrases. This is for parts of the user entry which would appear as
+// a single word because there is no white space inside, but are
+// actually multiple terms to rcldb (ie term1,term2)
+class TextSplitQ : public TextSplitP {
+ public:
+    TextSplitQ(Flags flags, const StopList &_stops, TermProc *prc)
+	: TextSplitP(prc, flags), 
+	  curnostemexp(false), stops(_stops), alltermcount(0), lastpos(0)
+    {}
+
+    bool takeword(const std::string &term, int pos, int bs, int be) 
+    {
+	// Check if the first letter is a majuscule in which
+	// case we do not want to do stem expansion. Need to do this
+	// before unac of course...
+	curnostemexp = unaciscapital(term);
+
+	return TextSplitP::takeword(term, pos, bs, be);
+    }
+
+    bool           curnostemexp;
+    vector<string> terms;
+    vector<bool>   nostemexps;
+    const StopList &stops;
+    // Count of terms including stopwords: this is for adjusting
+    // phrase/near slack
+    int alltermcount; 
+    int lastpos;
+};
+
+class TermProcQ : public TermProc {
+public:
+    TermProcQ() : TermProc(0), m_ts(0) {}
+    void setTSQ(TextSplitQ *ts) {m_ts = ts;}
+    
+    bool takeword(const std::string &term, int pos, int bs, int be) 
+    {
+	m_ts->alltermcount++;
+	if (m_ts->lastpos < pos)
+	    m_ts->lastpos = pos;
+	bool noexpand = be ? m_ts->curnostemexp : true;
+	LOGDEB1(("TermProcQ::takeword: pushing [%s] pos %d noexp %d\n", 
+		 term.c_str(), pos, noexpand));
+	if (m_terms[pos].size() < term.size()) {
+	    m_terms[pos] = term;
+	    m_nste[pos] = noexpand;
+	}
+	return true;
+    }
+    bool flush()
+    {
+	for (map<int, string>::const_iterator it = m_terms.begin();
+	     it != m_terms.end(); it++) {
+	    m_ts->terms.push_back(it->second);
+	    m_ts->nostemexps.push_back(m_nste[it->first]);
+	}
+	return true;
+    }
+private:
+    TextSplitQ *m_ts;
+    map<int, string> m_terms;
+    map<int, bool> m_nste;
+};
+
+
+#if 1
+static void listVector(const string& what, const vector<string>&l)
+{
+    string a;
+    for (vector<string>::const_iterator it = l.begin(); it != l.end(); it++) {
+        a = a + *it + " ";
+    }
+    LOGDEB0(("%s: %s\n", what.c_str(), a.c_str()));
+}
+#endif
+
+/** Expand term into term list, using appropriate mode: stem, wildcards, 
+ *  diacritics... 
+ *
+ * @param mods stem expansion, case and diacritics sensitivity control.
+ * @param term input single word
+ * @param oexp output expansion list
+ * @param sterm output original input term if there were no wildcards
+ * @param prefix field prefix in index. We could recompute it, but the caller
+ *  has it already. Used in the simple case where there is nothing to expand, 
+ *  and we just return the prefixed term (else Db::termMatch deals with it).
+ */
+bool SearchDataClauseSimple::expandTerm(Rcl::Db &db, 
+					string& ermsg, int mods, 
+					const string& term, 
+					vector<string>& oexp, string &sterm,
+					const string& prefix)
+{
+    LOGDEB0(("expandTerm: mods 0x%x fld [%s] trm [%s] lang [%s]\n",
+	     mods, m_field.c_str(), term.c_str(), getStemLang().c_str()));
+    sterm.clear();
+    oexp.clear();
+    if (term.empty())
+	return true;
+
+    bool maxexpissoft = false;
+    int maxexpand = getSoftMaxExp();
+    if (maxexpand != -1) {
+	maxexpissoft = true;
+    } else {
+	maxexpand = getMaxExp();
+    }
+
+    bool haswild = term.find_first_of(cstr_minwilds) != string::npos;
+
+    // If there are no wildcards, add term to the list of user-entered terms
+    if (!haswild) {
+	m_hldata.uterms.insert(term);
+        sterm = term;
+    }
+    // No stem expansion if there are wildcards or if prevented by caller
+    bool nostemexp = (mods & SDCM_NOSTEMMING) != 0;
+    if (haswild || getStemLang().empty()) {
+	LOGDEB2(("expandTerm: found wildcards or stemlang empty: no exp\n"));
+	nostemexp = true;
+    }
+
+    // noexpansion can be modified further down by possible case/diac expansion
+    bool noexpansion = nostemexp && !haswild; 
+
+    int termmatchsens = 0;
+
+    bool diac_sensitive = (mods & SDCM_DIACSENS) != 0;
+    bool case_sensitive = (mods & SDCM_CASESENS) != 0;
+
+    if (o_index_stripchars) {
+	diac_sensitive = case_sensitive = false;
+    } else {
+	// If we are working with a raw index, apply the rules for case and 
+	// diacritics sensitivity.
+
+	// If any character has a diacritic, we become
+	// diacritic-sensitive. Note that the way that the test is
+	// performed (conversion+comparison) will automatically ignore
+	// accented characters which are actually a separate letter
+	if (getAutoDiac() && unachasaccents(term)) {
+	    LOGDEB0(("expandTerm: term has accents -> diac-sensitive\n"));
+	    diac_sensitive = true;
+	}
+
+	// If any character apart the first is uppercase, we become
+	// case-sensitive.  The first character is reserved for
+	// turning off stemming. You need to use a query language
+	// modifier to search for Floor in a case-sensitive way.
+	Utf8Iter it(term);
+	it++;
+	if (getAutoCase() && unachasuppercase(term.substr(it.getBpos()))) {
+	    LOGDEB0(("expandTerm: term has uppercase -> case-sensitive\n"));
+	    case_sensitive = true;
+	}
+
+	// If we are sensitive to case or diacritics turn stemming off
+	if (diac_sensitive || case_sensitive) {
+	    LOGDEB0(("expandTerm: diac or case sens set -> stemexpand off\n"));
+	    nostemexp = true;
+	}
+
+	if (!case_sensitive || !diac_sensitive)
+	    noexpansion = false;
+    }
+
+    if (case_sensitive)
+	termmatchsens |= Db::ET_CASESENS;
+    if (diac_sensitive)
+	termmatchsens |= Db::ET_DIACSENS;
+
+    if (noexpansion) {
+	oexp.push_back(prefix + term);
+	m_hldata.terms[term] = term;
+	LOGDEB(("ExpandTerm: noexpansion: final: %s\n", stringsToString(oexp).c_str()));
+	return true;
+    } 
+
+    Db::MatchType mtyp = haswild ? Db::ET_WILD : 
+	nostemexp ? Db::ET_NONE : Db::ET_STEM;
+    TermMatchResult res;
+    if (!db.termMatch(mtyp | termmatchsens, getStemLang(), term, res, maxexpand,
+		      m_field)) {
+	// Let it go through
+    }
+
+    // Term match entries to vector of terms
+    if (int(res.entries.size()) >= maxexpand && !maxexpissoft) {
+	ermsg = "Maximum term expansion size exceeded."
+	    " Maybe use case/diacritics sensitivity or increase maxTermExpand.";
+	return false;
+    }
+    for (vector<TermMatchEntry>::const_iterator it = res.entries.begin(); 
+	 it != res.entries.end(); it++) {
+	oexp.push_back(it->term);
+    }
+    // If the term does not exist at all in the db, the return from
+    // termMatch() is going to be empty, which is not what we want (we
+    // would then compute an empty Xapian query)
+    if (oexp.empty())
+	oexp.push_back(prefix + term);
+
+    // Remember the uterm-to-expansion links
+    for (vector<string>::const_iterator it = oexp.begin(); 
+	 it != oexp.end(); it++) {
+	m_hldata.terms[strip_prefix(*it)] = term;
+    }
+    LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
+    return true;
+}
+
+// Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d
+void multiply_groups(vector<vector<string> >::const_iterator vvit,
+		     vector<vector<string> >::const_iterator vvend, 
+		     vector<string>& comb,
+		     vector<vector<string> >&allcombs)
+{
+    // Remember my string vector and compute next, for recursive calls.
+    vector<vector<string> >::const_iterator myvit = vvit++;
+
+    // Walk the string vector I'm called upon and, for each string,
+    // add it to current result, an call myself recursively on the
+    // next string vector. The last call (last element of the vector of
+    // vectors), adds the elementary result to the output
+
+    // Walk my string vector
+    for (vector<string>::const_iterator strit = (*myvit).begin();
+	 strit != (*myvit).end(); strit++) {
+
+	// Add my current value to the string vector we're building
+	comb.push_back(*strit);
+
+	if (vvit == vvend) {
+	    // Last call: store current result
+	    allcombs.push_back(comb);
+	} else {
+	    // Call recursively on next string vector
+	    multiply_groups(vvit, vvend, comb, allcombs);
+	}
+	// Pop the value I just added (make room for the next element in my
+	// vector)
+	comb.pop_back();
+    }
+}
+
+void SearchDataClauseSimple::processSimpleSpan(Rcl::Db &db, string& ermsg,
+					       const string& span, 
+					       int mods, void * pq)
+{
+    vector<Xapian::Query>& pqueries(*(vector<Xapian::Query>*)pq);
+    LOGDEB0(("StringToXapianQ::processSimpleSpan: [%s] mods 0x%x\n",
+	    span.c_str(), (unsigned int)mods));
+    vector<string> exp;  
+    string sterm; // dumb version of user term
+
+    string prefix;
+    const FieldTraits *ftp;
+    if (!m_field.empty() && db.fieldToTraits(m_field, &ftp, true)) {
+	prefix = wrap_prefix(ftp->pfx);
+    }
+
+    if (!expandTerm(db, ermsg, mods, span, exp, sterm, prefix))
+	return;
+    
+    // Set up the highlight data. No prefix should go in there
+    for (vector<string>::const_iterator it = exp.begin(); 
+	 it != exp.end(); it++) {
+	m_hldata.groups.push_back(vector<string>(1, it->substr(prefix.size())));
+	m_hldata.slacks.push_back(0);
+	m_hldata.grpsugidx.push_back(m_hldata.ugroups.size() - 1);
+    }
+
+    // Push either term or OR of stem-expanded set
+    Xapian::Query xq(Xapian::Query::OP_OR, exp.begin(), exp.end());
+    m_curcl += exp.size();
+
+    // If sterm (simplified original user term) is not null, give it a
+    // relevance boost. We do this even if no expansion occurred (else
+    // the non-expanded terms in a term list would end-up with even
+    // less wqf). This does not happen if there are wildcards anywhere
+    // in the search.
+    // We normally boost the original term in the stem expansion list. Don't
+    // do it if there are wildcards anywhere, this would skew the results.
+    bool doBoostUserTerm = 
+	(m_parentSearch && !m_parentSearch->haveWildCards()) || 
+	(m_parentSearch == 0 && !m_haveWildCards);
+    if (doBoostUserTerm && !sterm.empty()) {
+        xq = Xapian::Query(Xapian::Query::OP_OR, xq, 
+			   Xapian::Query(prefix+sterm, 
+					 original_term_wqf_booster));
+    }
+    pqueries.push_back(xq);
+}
+
+// User entry element had several terms: transform into a PHRASE or
+// NEAR xapian query, the elements of which can themselves be OR
+// queries if the terms get expanded by stemming or wildcards (we
+// don't do stemming for PHRASE though)
+void SearchDataClauseSimple::processPhraseOrNear(Rcl::Db &db, string& ermsg, 
+						 TextSplitQ *splitData, 
+						 int mods, void *pq,
+						 bool useNear, int slack)
+{
+    vector<Xapian::Query> &pqueries(*(vector<Xapian::Query>*)pq);
+    Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR : 
+	Xapian::Query::OP_PHRASE;
+    vector<Xapian::Query> orqueries;
+#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
+    bool hadmultiple = false;
+#endif
+    vector<vector<string> >groups;
+
+    string prefix;
+    const FieldTraits *ftp;
+    if (!m_field.empty() && db.fieldToTraits(m_field, &ftp, true)) {
+	prefix = wrap_prefix(ftp->pfx);
+    }
+
+    if (mods & Rcl::SearchDataClause::SDCM_ANCHORSTART) {
+	orqueries.push_back(Xapian::Query(prefix + start_of_field_term));
+	slack++;
+    }
+
+    // Go through the list and perform stem/wildcard expansion for each element
+    vector<bool>::iterator nxit = splitData->nostemexps.begin();
+    for (vector<string>::iterator it = splitData->terms.begin();
+	 it != splitData->terms.end(); it++, nxit++) {
+	LOGDEB0(("ProcessPhrase: processing [%s]\n", it->c_str()));
+	// Adjust when we do stem expansion. Not if disabled by
+	// caller, not inside phrases, and some versions of xapian
+	// will accept only one OR clause inside NEAR.
+	bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE) 
+#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
+	    || hadmultiple
+#endif // single OR inside NEAR
+	    ;
+	int lmods = mods;
+	if (nostemexp)
+	    lmods |= SearchDataClause::SDCM_NOSTEMMING;
+	string sterm;
+	vector<string> exp;
+	if (!expandTerm(db, ermsg, lmods, *it, exp, sterm, prefix))
+	    return;
+	LOGDEB0(("ProcessPhraseOrNear: exp size %d\n", exp.size()));
+	listVector("", exp);
+	// groups is used for highlighting, we don't want prefixes in there.
+	vector<string> noprefs;
+	for (vector<string>::const_iterator it = exp.begin(); 
+	     it != exp.end(); it++) {
+	    noprefs.push_back(it->substr(prefix.size()));
+	}
+	groups.push_back(noprefs);
+	orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 
+					  exp.begin(), exp.end()));
+	m_curcl += exp.size();
+	if (m_curcl >= getMaxCl())
+	    return;
+#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
+	if (exp.size() > 1) 
+	    hadmultiple = true;
+#endif
+    }
+
+    if (mods & Rcl::SearchDataClause::SDCM_ANCHOREND) {
+	orqueries.push_back(Xapian::Query(prefix + end_of_field_term));
+	slack++;
+    }
+
+    // Generate an appropriate PHRASE/NEAR query with adjusted slack
+    // For phrases, give a relevance boost like we do for original terms
+    LOGDEB2(("PHRASE/NEAR:  alltermcount %d lastpos %d\n", 
+             splitData->alltermcount, splitData->lastpos));
+    Xapian::Query xq(op, orqueries.begin(), orqueries.end(),
+		     splitData->lastpos + 1 + slack);
+    if (op == Xapian::Query::OP_PHRASE)
+	xq = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, xq, 
+			   original_term_wqf_booster);
+    pqueries.push_back(xq);
+
+    // Add all combinations of NEAR/PHRASE groups to the highlighting data. 
+    vector<vector<string> > allcombs;
+    vector<string> comb;
+    multiply_groups(groups.begin(), groups.end(), comb, allcombs);
+    
+    // Insert the search groups and slacks in the highlight data, with
+    // a reference to the user entry that generated them:
+    m_hldata.groups.insert(m_hldata.groups.end(), 
+			   allcombs.begin(), allcombs.end());
+    m_hldata.slacks.insert(m_hldata.slacks.end(), allcombs.size(), slack);
+    m_hldata.grpsugidx.insert(m_hldata.grpsugidx.end(), allcombs.size(), 
+			      m_hldata.ugroups.size() - 1);
+}
+
+// Trim string beginning with ^ or ending with $ and convert to flags
+static int stringToMods(string& s)
+{
+    int mods = 0;
+    // Check for an anchored search
+    trimstring(s);
+    if (s.length() > 0 && s[0] == '^') {
+	mods |= Rcl::SearchDataClause::SDCM_ANCHORSTART;
+	s.erase(0, 1);
+    }
+    if (s.length() > 0 && s[s.length()-1] == '$') {
+	mods |= Rcl::SearchDataClause::SDCM_ANCHOREND;
+	s.erase(s.length()-1);
+    }
+    return mods;
+}
+
+/** 
+ * Turn user entry string (NOT query language) into a list of xapian queries.
+ * We just separate words and phrases, and do wildcard and stem expansion,
+ *
+ * This is used to process data entered into an OR/AND/NEAR/PHRASE field of
+ * the GUI (in the case of NEAR/PHRASE, clausedist adds dquotes to the user
+ * entry).
+ *
+ * This appears awful, and it would seem that the split into
+ * terms/phrases should be performed in the upper layer so that we
+ * only receive pure term or near/phrase pure elements here, but in
+ * fact there are things that would appear like terms to naive code,
+ * and which will actually may be turned into phrases (ie: tom:jerry),
+ * in a manner which intimately depends on the index implementation,
+ * so that it makes sense to process this here.
+ *
+ * The final list contains one query for each term or phrase
+ *   - Elements corresponding to a stem-expanded part are an OP_OR
+ *     composition of the stem-expanded terms (or a single term query).
+ *   - Elements corresponding to phrase/near are an OP_PHRASE/NEAR
+ *     composition of the phrase terms (no stem expansion in this case)
+ * @return the subquery count (either or'd stem-expanded terms or phrase word
+ *   count)
+ */
+bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
+					       string &ermsg, void *pq, 
+					       int slack, bool useNear)
+{
+    vector<Xapian::Query> &pqueries(*(vector<Xapian::Query>*)pq);
+    int mods = m_modifiers;
+
+    LOGDEB(("StringToXapianQ:pUS:: qstr [%s] fld [%s] mods 0x%x "
+	    "slack %d near %d\n", 
+	    iq.c_str(), m_field.c_str(), mods, slack, useNear));
+    ermsg.erase();
+    m_curcl = 0;
+    const StopList stops = db.getStopList();
+
+    // Simple whitespace-split input into user-level words and
+    // double-quoted phrases: word1 word2 "this is a phrase". 
+    //
+    // The text splitter may further still decide that the resulting
+    // "words" are really phrases, this depends on separators:
+    // [paul@dom.net] would still be a word (span), but [about:me]
+    // will probably be handled as a phrase.
+    vector<string> phrases;
+    TextSplit::stringToStrings(iq, phrases);
+
+    // Process each element: textsplit into terms, handle stem/wildcard 
+    // expansion and transform into an appropriate Xapian::Query
+    try {
+	for (vector<string>::iterator it = phrases.begin(); 
+	     it != phrases.end(); it++) {
+	    LOGDEB0(("strToXapianQ: phrase/word: [%s]\n", it->c_str()));
+	    // Anchoring modifiers
+	    int amods = stringToMods(*it);
+	    int terminc = amods != 0 ? 1 : 0;
+	    mods |= amods;
+	    // If there are multiple spans in this element, including
+	    // at least one composite, we have to increase the slack
+	    // else a phrase query including a span would fail. 
+	    // Ex: "term0@term1 term2" is onlyspans-split as:
+	    //   0 term0@term1             0   12
+	    //   2 term2                  13   18
+	    // The position of term2 is 2, not 1, so a phrase search
+	    // would fail.
+	    // We used to do  word split, searching for 
+	    // "term0 term1 term2" instead, which may have worse 
+	    // performance, but will succeed.
+	    // We now adjust the phrase/near slack by comparing the term count
+	    // and the last position
+
+	    // The term processing pipeline:
+	    TermProcQ tpq;
+	    TermProc *nxt = &tpq;
+            TermProcStop tpstop(nxt, stops); nxt = &tpstop;
+            //TermProcCommongrams tpcommon(nxt, stops); nxt = &tpcommon;
+            //tpcommon.onlygrams(true);
+	    TermProcPrep tpprep(nxt);
+	    if (o_index_stripchars)
+		nxt = &tpprep;
+
+	    TextSplitQ splitter(TextSplit::Flags(TextSplit::TXTS_ONLYSPANS | 
+						 TextSplit::TXTS_KEEPWILD), 
+				stops, nxt);
+	    tpq.setTSQ(&splitter);
+	    splitter.text_to_words(*it);
+
+	    slack += splitter.lastpos - splitter.terms.size() + 1;
+
+	    LOGDEB0(("strToXapianQ: termcount: %d\n", splitter.terms.size()));
+	    switch (splitter.terms.size() + terminc) {
+	    case 0: 
+		continue;// ??
+	    case 1: {
+		int lmods = mods;
+		if (splitter.nostemexps.front())
+		    lmods |= SearchDataClause::SDCM_NOSTEMMING;
+		m_hldata.ugroups.push_back(splitter.terms);
+		processSimpleSpan(db, ermsg, splitter.terms.front(),
+				  lmods, &pqueries);
+	    }
+		break;
+	    default:
+		m_hldata.ugroups.push_back(splitter.terms);
+		processPhraseOrNear(db, ermsg, &splitter, mods, &pqueries,
+				    useNear, slack);
+	    }
+	    if (m_curcl >= getMaxCl()) {
+		ermsg = maxXapClauseMsg;
+		if (!o_index_stripchars)
+		    ermsg += maxXapClauseCaseDiacMsg;
+		break;
+	    }
+	}
+    } catch (const Xapian::Error &e) {
+	ermsg = e.get_msg();
+    } catch (const string &s) {
+	ermsg = s;
+    } catch (const char *s) {
+	ermsg = s;
+    } catch (...) {
+	ermsg = "Caught unknown exception";
+    }
+    if (!ermsg.empty()) {
+	LOGERR(("stringToXapianQueries: %s\n", ermsg.c_str()));
+	return false;
+    }
+    return true;
+}
+
+// Translate a simple OR or AND search clause. 
+bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)
+{
+    LOGDEB(("SearchDataClauseSimple::toNativeQuery: fld [%s] val [%s] "
+            "stemlang [%s]\n", m_field.c_str(), m_text.c_str(),
+            getStemLang().c_str()));
+
+    Xapian::Query *qp = (Xapian::Query *)p;
+    *qp = Xapian::Query();
+
+    Xapian::Query::op op;
+    switch (m_tp) {
+    case SCLT_AND: op = Xapian::Query::OP_AND; break;
+    case SCLT_OR: op = Xapian::Query::OP_OR; break;
+    default:
+	LOGERR(("SearchDataClauseSimple: bad m_tp %d\n", m_tp));
+	return false;
+    }
+
+    vector<Xapian::Query> pqueries;
+    if (!processUserString(db, m_text, m_reason, &pqueries))
+	return false;
+    if (pqueries.empty()) {
+	LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
+	return true;
+    }
+
+    *qp = Xapian::Query(op, pqueries.begin(), pqueries.end());
+    if  (m_weight != 1.0) {
+	*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
+    }
+    return true;
+}
+
+// Translate a FILENAME search clause. This always comes
+// from a "filename" search from the gui or recollq. A query language
+// "filename:"-prefixed field will not go through here, but through
+// the generic field-processing code.
+//
+// We do not split the entry any more (used to do some crazy thing
+// about expanding multiple fragments in the past). We just take the
+// value blanks and all and expand this against the indexed unsplit
+// file names
+bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p)
+{
+    Xapian::Query *qp = (Xapian::Query *)p;
+    *qp = Xapian::Query();
+
+    int maxexp = getSoftMaxExp();
+    if (maxexp == -1)
+	maxexp = getMaxExp();
+
+    vector<string> names;
+    db.filenameWildExp(m_text, names, maxexp);
+    *qp = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());
+
+    if (m_weight != 1.0) {
+	*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
+    }
+    return true;
+}
+
+// Translate a dir: path filtering clause. See comments in .h
+bool SearchDataClausePath::toNativeQuery(Rcl::Db &db, void *p)
+{
+    LOGDEB(("SearchDataClausePath::toNativeQuery: [%s]\n", m_text.c_str()));
+    Xapian::Query *qp = (Xapian::Query *)p;
+    *qp = Xapian::Query();
+
+    if (m_text.empty()) {
+	LOGERR(("SearchDataClausePath: empty path??\n"));
+	m_reason = "Empty path ?";
+	return false;
+    }
+
+    vector<Xapian::Query> orqueries;
+
+    if (m_text[0] == '/')
+	orqueries.push_back(Xapian::Query(wrap_prefix(pathelt_prefix)));
+    else
+        m_text = path_tildexpand(m_text);
+
+    vector<string> vpath;
+    stringToTokens(m_text, vpath, "/");
+
+    for (vector<string>::const_iterator pit = vpath.begin(); 
+	 pit != vpath.end(); pit++){
+
+	string sterm;
+	vector<string> exp;
+	if (!expandTerm(db, m_reason, 
+			SDCM_NOSTEMMING|SDCM_CASESENS|SDCM_DIACSENS,
+			*pit, exp, sterm, wrap_prefix(pathelt_prefix))) {
+	    return false;
+	}
+	LOGDEB0(("SDataPath::toNative: exp size %d\n", exp.size()));
+	listVector("", exp);
+	if (exp.size() == 1)
+	    orqueries.push_back(Xapian::Query(exp[0]));
+	else 
+	    orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 
+					      exp.begin(), exp.end()));
+	m_curcl += exp.size();
+	if (m_curcl >= getMaxCl())
+	    return false;
+    }
+
+    *qp = Xapian::Query(Xapian::Query::OP_PHRASE, 
+			orqueries.begin(), orqueries.end());
+
+    if (m_weight != 1.0) {
+	*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
+    }
+    return true;
+}
+
+// Translate NEAR or PHRASE clause. 
+bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p)
+{
+    LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));
+
+    Xapian::Query *qp = (Xapian::Query *)p;
+    *qp = Xapian::Query();
+
+    vector<Xapian::Query> pqueries;
+    Xapian::Query nq;
+
+    // We produce a single phrase out of the user entry then use
+    // stringToXapianQueries() to lowercase and simplify the phrase
+    // terms etc. This will result into a single (complex)
+    // Xapian::Query.
+    if (m_text.find('\"') != string::npos) {
+	m_text = neutchars(m_text, "\"");
+    }
+    string s = cstr_dquote + m_text + cstr_dquote;
+    bool useNear = (m_tp == SCLT_NEAR);
+    if (!processUserString(db, s, m_reason, &pqueries, m_slack, useNear))
+	return false;
+    if (pqueries.empty()) {
+	LOGERR(("SearchDataClauseDist: resolved to null query\n"));
+	return true;
+    }
+
+    *qp = *pqueries.begin();
+    if (m_weight != 1.0) {
+	*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
+    }
+    return true;
+}
+
+} // Namespace Rcl