most of adv search working. Still need subtree/filename filters

2005-10-19 10:21:48 +00:00 · 2005-10-19 10:21:48 +00:00 · ce740a26ad
commit ce740a26ad
parent 04a926456a
15 changed files with 410 additions and 151 deletions
--- a/src/common/textsplit.cpp
+++ b/src/common/textsplit.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.12 2005-09-22 14:09:04 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.13 2005-10-19 10:21:48 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 #ifndef TEST_TEXTSPLIT

@ -82,11 +82,6 @@ bool TextSplit::emitterm(bool isspan, string &w, int pos,
 {
    LOGDEB2(("TextSplit::emitterm: '%s' pos %d\n", w.c_str(), pos));

-    // It may happen that our cleanup would result in emitting the
-    // same term twice. We try to avoid this
-    static string prevterm;
-    static int prevpos = -1;
-
    if (!cb)
 	return false;

--- a/src/common/textsplit.h
+++ b/src/common/textsplit.h
@ -1,6 +1,6 @@
 #ifndef _TEXTSPLIT_H_INCLUDED_
 #define _TEXTSPLIT_H_INCLUDED_
-/* @(#$Id: textsplit.h,v 1.7 2005-10-10 13:25:23 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: textsplit.h,v 1.8 2005-10-19 10:21:48 dockes Exp $  (C) 2004 J.F.Dockes */

 #include <string>
 #ifndef NO_NAMESPACES
@ -28,6 +28,10 @@ class TextSplitCB {
 */
 class TextSplit {
    bool fq;        // for query:  Are we splitting for query or index ?
+    // It may happen that our cleanup would result in emitting the
+    // same term twice. We try to avoid this
+    string prevterm;
+    int prevpos;
    TextSplitCB *cb;
    int maxWordLength;
    bool emitterm(bool isspan, std::string &term, int pos, int bs, int be);
@ -38,7 +42,7 @@ class TextSplit {
     * Constructor: just store callback object
     */
    TextSplit(TextSplitCB *t, bool forquery = false) 
-	: fq(forquery), cb(t), maxWordLength(40) {}
+	: fq(forquery), prevpos(-1), cb(t), maxWordLength(40) {}
    /**
     * Split text, emit words and positions.
     */
--- a/src/filters/rcldoc
+++ b/src/filters/rcldoc
@ -1,5 +1,5 @@
 #!/bin/sh
-# @(#$Id: rcldoc,v 1.1 2005-02-09 12:07:30 dockes Exp $  (C) 2004 J.F.Dockes
+# @(#$Id: rcldoc,v 1.2 2005-10-19 10:21:48 dockes Exp $  (C) 2004 J.F.Dockes
 # Parts taken from Estraier:
 #================================================================
 # Estraier: a personal full-text search system
@ -35,6 +35,31 @@ fi

 infile="$1"

+iscmd()
+{
+    cmd=$1
+    case $cmd in
+    */*)
+	if test -x $cmd ; then return 0; else return 1; fi ;;
+    *)
+      IFS=: ; set -- $PATH; unset IFS
+      for d in $* ; do test -x $d/$cmd && return 0; done
+      return 1 ;;
+    esac
+}
+checkcmds()
+{
+    for cmd in $*;do
+      if iscmd $cmd 
+      then 
+      else 
+        echo $cmd not found 1>&2 
+	exit 1
+      fi
+    done
+}
+checkcmds awk antiword iconv
+
 # check the input file existence
 if test ! -f "$infile"
 then
--- a/src/filters/rclpdf
+++ b/src/filters/rclpdf
@ -1,5 +1,5 @@
 #!/bin/sh
-# @(#$Id: rclpdf,v 1.1 2005-02-01 17:20:05 dockes Exp $  (C) 2004 J.F.Dockes
+# @(#$Id: rclpdf,v 1.2 2005-10-19 10:21:48 dockes Exp $  (C) 2004 J.F.Dockes
 # This is copied almost verbatim from Estraier:
 #================================================================
 # Estraier: a personal full-text search system
@ -33,6 +33,31 @@ then
  exit 1
 fi

+iscmd()
+{
+    cmd=$1
+    case $cmd in
+    */*)
+	if test -x $cmd ; then return 0; else return 1; fi ;;
+    *)
+      IFS=: ; set -- $PATH; unset IFS
+      for d in $* ; do test -x $d/$cmd && return 0; done
+      return 1 ;;
+    esac
+}
+checkcmds()
+{
+    for cmd in $*;do
+      if iscmd $cmd 
+      then 
+      else 
+        echo $cmd not found 1>&2 
+	exit 1
+      fi
+    done
+}
+checkcmds pdftotext iconv awk
+
 # output the result
 pdftotext -raw -htmlmeta -enc UTF-8 -eol unix -q "$infile" - |
 iconv -f UTF-8 -t UTF-8 -c -s |
@ -80,5 +105,3 @@ BEGIN {
 # didn't really understand its use as iconv -c is supposed to fix the
 # encoding anyway

-# exit normally
-exit 0
--- a/src/filters/rclps
+++ b/src/filters/rclps
@ -1,5 +1,5 @@
 #!/bin/sh
-# @(#$Id: rclps,v 1.1 2005-02-02 17:57:08 dockes Exp $  (C) 2004 J.F.Dockes
+# @(#$Id: rclps,v 1.2 2005-10-19 10:21:48 dockes Exp $  (C) 2004 J.F.Dockes
 # Parts taken from Estraier:
 #================================================================
 # Estraier: a personal full-text search system
@ -34,6 +34,31 @@ fi

 infile="$1"

+iscmd()
+{
+    cmd=$1
+    case $cmd in
+    */*)
+	if test -x $cmd ; then return 0; else return 1; fi ;;
+    *)
+      IFS=: ; set -- $PATH; unset IFS
+      for d in $* ; do test -x $d/$cmd && return 0; done
+      return 1 ;;
+    esac
+}
+checkcmds()
+{
+    for cmd in $*;do
+      if iscmd $cmd 
+      then 
+      else 
+        echo $cmd not found 1>&2 
+	exit 1
+      fi
+    done
+}
+checkcmds $decoder iconv awk
+
 # check the input file existence
 if test ! -f "$infile"
 then
@ -69,5 +94,3 @@ END {
    printf("</p></body></html>\n");
 }' | iconv -f iso-8859-1 -t UTF-8 -c -s 

-# exit normally
-exit 0
--- a/src/filters/rclsoff
+++ b/src/filters/rclsoff
@ -1,5 +1,5 @@
 #!/bin/sh
-# @(#$Id: rclsoff,v 1.1 2005-02-09 12:07:30 dockes Exp $  (C) 2004 J.F.Dockes
+# @(#$Id: rclsoff,v 1.2 2005-10-19 10:21:48 dockes Exp $  (C) 2004 J.F.Dockes
 # Parts taken from Estraier:
 #================================================================
 # Estraier: a personal full-text search system
@ -28,6 +28,31 @@ fi

 infile="$1"

+iscmd()
+{
+    cmd=$1
+    case $cmd in
+    */*)
+	if test -x $cmd ; then return 0; else return 1; fi ;;
+    *)
+      IFS=: ; set -- $PATH; unset IFS
+      for d in $* ; do test -x $d/$cmd && return 0; done
+      return 1 ;;
+    esac
+}
+checkcmds()
+{
+    for cmd in $*;do
+      if iscmd $cmd 
+      then 
+      else 
+        echo $cmd not found 1>&2 
+	exit 1
+      fi
+    done
+}
+checkcmds awk iconv unzip
+
 # check the input file existence
 if test ! -f "$infile"
 then
--- a/src/qtgui/advsearch.ui
+++ b/src/qtgui/advsearch.ui
@ -413,7 +413,7 @@
    <include location="local" impldecl="in implementation">advsearch.ui.h</include>
 </includes>
 <signals>
-    <signal>startSearch(AdvSearchData)</signal>
+    <signal>startSearch(Rcl::AdvSearchData)</signal>
 </signals>
 <slots>
    <slot>delFiltypPB_clicked()</slot>
--- a/src/qtgui/advsearch.ui.h
+++ b/src/qtgui/advsearch.ui.h
@ -91,14 +91,14 @@ void advsearch::restrictFtCB_toggled(bool on)

 void advsearch::searchPB_clicked()
 {
-    AdvSearchData mydata;
+    Rcl::AdvSearchData mydata;
    mydata.allwords = string((const char*)(andWordsLE->text().utf8()));
    mydata.phrase  = string((const char*)(phraseLE->text().utf8()));
    mydata.orwords = string((const char*)(orWordsLE->text().utf8()));
    mydata.nowords = string((const char*)(noWordsLE->text().utf8()));
    if (restrictFtCB->isOn() && noFiltypsLB->count() > 0) {
 	for (unsigned int i = 0; i < yesFiltypsLB->count(); i++) {
-	    QCString ctext = noFiltypsLB->item(i)->text().utf8();
+	    QCString ctext = yesFiltypsLB->item(i)->text().utf8();
 	    mydata.filetypes.push_back(string((const char *)ctext));
 	}
    }
--- a/src/qtgui/main.cpp
+++ b/src/qtgui/main.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: main.cpp,v 1.9 2005-10-10 12:29:42 dockes Exp $ (C) 2005 J.F.Dockes";
+static char rcsid[] = "@(#$Id: main.cpp,v 1.10 2005-10-19 10:21:48 dockes Exp $ (C) 2005 J.F.Dockes";
 #endif

 #include <unistd.h>
@ -10,8 +10,9 @@ static char rcsid[] = "@(#$Id: main.cpp,v 1.9 2005-10-10 12:29:42 dockes Exp $ (

 #include <qmessagebox.h>

-#include "recollmain.h"
 #include "rcldb.h"
+using Rcl::AdvSearchData;
+
 #include "rclconfig.h"
 #include "pathut.h"
 #include "recoll.h"
@ -19,11 +20,43 @@ static char rcsid[] = "@(#$Id: main.cpp,v 1.9 2005-10-10 12:29:42 dockes Exp $ (
 #include "wipedir.h"
 #include "rclinit.h"

+#include "recollmain.h"
+
 RclConfig *rclconfig;
 Rcl::Db *rcldb;
 int recollNeedsExit;
 string tmpdir;

+void getQueryStemming(bool &dostem, std::string &stemlang)
+{
+    string param;
+    if (rclconfig->getConfParam("querystemming", param))
+	dostem = ConfTree::stringToBool(param);
+    else
+	dostem = false;
+    if (!rclconfig->getConfParam("querystemminglanguage", stemlang))
+	stemlang = "english";
+}
+
+bool maybeOpenDb(string &reason)
+{
+    if (!rcldb)
+	return false;
+    if (!rcldb->isopen()) {
+	string dbdir;
+	if (rclconfig->getConfParam(string("dbdir"), dbdir) == 0) {
+	    reason = "No db directory in configuration";
+	    return false;
+	}
+	dbdir = path_tildexpand(dbdir);
+	if (!rcldb->open(dbdir, Rcl::Db::DbRO)) {
+	    reason = "Could not open database in " + 
+		dbdir + " wait for indexing to complete?";
+	    return false;
+	}
+    }
+    return true;
+}

 void recollCleanup()
 {
@ -86,7 +119,7 @@ int main( int argc, char ** argv )

    rcldb = new Rcl::Db;

-    if (!rcldb->open(dbdir, Rcl::Db::DbRO)) {
+    if (!rcldb || !rcldb->open(dbdir, Rcl::Db::DbRO)) {
 	startindexing = 1;
 	QMessageBox::information(0, "Recoll",
 				 QString("Could not open database in ") + 
--- a/src/qtgui/plaintorich.cpp
+++ b/src/qtgui/plaintorich.cpp
@ -1,3 +1,6 @@
+#ifndef lint
+static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.3 2005-10-19 10:21:48 dockes Exp $ (C) 2005 J.F.Dockes";
+#endif


 #include <string>
--- a/src/qtgui/recoll.h
+++ b/src/qtgui/recoll.h
@ -1,29 +1,22 @@
 #ifndef _RECOLL_H_INCLUDED_
 #define _RECOLL_H_INCLUDED_
-/* @(#$Id: recoll.h,v 1.3 2005-10-17 13:36:53 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: recoll.h,v 1.4 2005-10-19 10:21:48 dockes Exp $  (C) 2004 J.F.Dockes */
 #include <string>
 #include <list>
 #include "rclconfig.h"
 #include "rcldb.h"
 #include "idxthread.h"

-extern void recollCleanup();
-
 // Misc declarations in need of sharing between the UI files
+
+extern void recollCleanup();
+extern bool maybeOpenDb(std::string &reason);
+extern void getQueryStemming(bool &dostem, std::string &stemlang);
+
 extern RclConfig *rclconfig;
 extern Rcl::Db *rcldb;
-extern string tmpdir;
+extern std::string tmpdir;

 extern int recollNeedsExit;

-// Holder for data collected by the advanced search dialog
-struct AdvSearchData {
-    std::string allwords;
-    std::string phrase;
-    std::string orwords;
-    std::string nowords;
-    std::list<std::string> filetypes; // restrict to types. Empty if inactive
-    std::string topdir; // restrict to subtree. Empty if inactive
-};
-
 #endif /* _RECOLL_H_INCLUDED_ */
--- a/src/qtgui/recollmain.ui
+++ b/src/qtgui/recollmain.ui
@ -282,6 +282,7 @@
 <includes>
    <include location="local" impldecl="in declaration">preview/.ui/preview.h</include>
    <include location="local" impldecl="in declaration">recoll.h</include>
+    <include location="local" impldecl="in declaration">advsearch.h</include>
    <include location="local" impldecl="in implementation">recollmain.ui.h</include>
 </includes>
 <variables>
@ -290,6 +291,7 @@
    <variable>bool dostem;</variable>
    <variable>std::string stemlang;</variable>
    <variable>Preview *curPreview;</variable>
+    <variable>advsearch *asearchform;</variable>
 </variables>
 <slots>
    <slot>fileExit()</slot>
@ -304,7 +306,7 @@
    <slot>listNextPB_clicked()</slot>
    <slot>advSearchPB_clicked()</slot>
    <slot>previewClosed( Preview * w )</slot>
-    <slot>startAdvSearch( AdvSearchData sdata )</slot>
+    <slot>startAdvSearch( Rcl::AdvSearchData sdata )</slot>
 </slots>
 <functions>
    <function access="private">init()</function>
--- a/src/qtgui/recollmain.ui.h
+++ b/src/qtgui/recollmain.ui.h
@ -32,6 +32,8 @@ using std::pair;
 #include "smallut.h"
 #include "plaintorich.h"
 #include "unacpp.h"
+#include "advsearch.h"
+

 #ifndef MIN
 #define MIN(A,B) ((A) < (B) ? (A) : (B))
@ -45,6 +47,7 @@ static const int respagesize = 8;
 void RecollMain::init()
 {
    curPreview = 0;
+    asearchform = 0;
 }

 // We want to catch ^Q everywhere to mean quit.
@ -266,31 +269,13 @@ void RecollMain::reslistTE_clicked(int par, int car)
 void RecollMain::queryText_returnPressed()
 {
    LOGDEB(("RecollMain::queryText_returnPressed()\n"));
-    if (!rcldb->isopen()) {
-	string dbdir;
-	if (rclconfig->getConfParam(string("dbdir"), dbdir) == 0) {
-	    QMessageBox::critical(0, "Recoll",
-				  QString("No db directory in configuration"));
-	    exit(1);
-	}
-	dbdir = path_tildexpand(dbdir);
-	if (!rcldb->open(dbdir, Rcl::Db::DbRO)) {
-	    QMessageBox::information(0, "Recoll",
-				     QString("Could not open database in ") + 
-				     QString(dbdir) + " wait for indexing " +
-				     "to complete?");
-	    return;
-	}
-    }
-    if (stemlang.empty()) {
-	string param;
-	if (rclconfig->getConfParam("querystemming", param))
-	    dostem = ConfTree::stringToBool(param);
-	else
-	    dostem = false;
-	if (!rclconfig->getConfParam("querystemminglanguage", stemlang))
-	    stemlang = "english";
+    string reason;
+    if (!maybeOpenDb(reason)) {
+	QMessageBox::critical(0, "Recoll", QString(reason.c_str()));
+	return;
    }
+    if (stemlang.empty())
+	getQueryStemming(dostem, stemlang);

    reslist_current = -1;
    reslist_winfirst = -1;
@ -388,7 +373,7 @@ void RecollMain::listNextPB_clicked()
 	    strftime(datebuf, 99, "<i>Modified:</i>&nbsp;%F&nbsp;%T", tm);
 	}
 	string abst = stripMarkup(doc.abstract);
-	LOGDEB(("Abstract: {%s}\n", abst.c_str()));
+	LOGDEB1(("Abstract: {%s}\n", abst.c_str()));
 	string result = "<p>" + 
 	    string(perbuf) + " <b>" + doc.title + "</b><br>" +
 	    doc.mimetype + "&nbsp;" +
@ -433,46 +418,40 @@ void RecollMain::previewClosed(Preview *w)
    delete w;
 }

-
-
-#include "advsearch.h"
-
-advsearch *asearchform;
-
+// Open advanced search dialog.
 void RecollMain::advSearchPB_clicked()
 {
    if (asearchform == 0) {
-	// Couldn't find way to have a normal wm frame
 	asearchform = new advsearch(this, "Advanced search", FALSE,
 				    WStyle_Customize | WStyle_NormalBorder | 
 				    WStyle_Title | WStyle_SysMenu);
 	asearchform->setSizeGripEnabled(FALSE);
-	connect(asearchform, SIGNAL(startSearch(AdvSearchData)), 
-		this, SLOT(startAdvSearch(AdvSearchData)));
+	connect(asearchform, SIGNAL(startSearch(Rcl::AdvSearchData)), 
+		this, SLOT(startAdvSearch(Rcl::AdvSearchData)));
 	asearchform->show();
    } else {
 	asearchform->show();
    }
 }

-void RecollMain::startAdvSearch(AdvSearchData sdata)
+// Execute and advanced search query
+void RecollMain::startAdvSearch(Rcl::AdvSearchData sdata)
 {
    LOGDEB(("RecollMain::startAdvSearch\n"));
-    LOGDEB((" allwords: %s\n", sdata.allwords.c_str()));
-    LOGDEB((" phrase: %s\n", sdata.phrase.c_str()));
-    LOGDEB((" orwords: %s\n", sdata.orwords.c_str()));
-    LOGDEB((" nowords: %s\n", sdata.nowords.c_str()));
-    string ft;
-    for (list<string>::iterator it = sdata.filetypes.begin(); 
-	 it != sdata.filetypes.end(); it++) {
-	ft += *it + " ";
+    string reason;
+    if (!maybeOpenDb(reason)) {
+	QMessageBox::critical(0, "Recoll", QString(reason.c_str()));
+	return;
    }
-    if (!ft.empty()) 
-	LOGDEB(("Searched file types: %s\n", ft.c_str()));
-    if (!sdata.topdir.empty())
-	LOGDEB(("Restricted to: %s\n", sdata.topdir.c_str()));

+    if (stemlang.empty())
+	getQueryStemming(dostem, stemlang);
+
+    reslist_current = -1;
+    reslist_winfirst = -1;
+
+    if (!rcldb->setQuery(sdata,  stemlang))
+	return;
+    curPreview = 0;
+    listNextPB_clicked();
 }
-
-
-
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.28 2005-04-06 10:20:11 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.29 2005-10-19 10:21:47 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 #include <stdio.h>
 #include <sys/stat.h>
@ -37,9 +37,10 @@ class Native {

    // Querying
    Xapian::Database db;
-    Xapian::Query query;
+    Xapian::Query    query; // query descriptor: terms and subqueries
+			    // joined by operators (or/and etc...)
    Xapian::Enquire *enquire;
-    Xapian::MSet mset;
+    Xapian::MSet     mset;

    Native() : isopen(false), iswritable(false), enquire(0) {
    }
@ -206,8 +207,8 @@ bool mySplitterCB::takeword(const std::string &term, int pos, int, int)
 }

 // Unaccent and lowercase data: use unac 
-// for accents, and do it by hand for upper / lower. Note lowercasing is
-// only for ascii letters anyway, so it's just A-Z -> a-z
+// for accents, and do it by hand for upper / lower. 
+// TOBEDONE: lowercasing is done only for ascii letters, just A-Z -> a-z 
 // Removing crlfs is so that we can use the text in the document data fields.
 bool Rcl::dumb_string(const string &in, string &out)
 {
@ -404,15 +405,15 @@ bool Rcl::Db::needUpdate(const string &filename, const struct stat *stp)
    // If the db is up to date, set the update flags for all documents
    Xapian::PostingIterator doc;
    try {
-	Xapian::PostingIterator did0 = ndb->wdb.postlist_begin(pathterm);
-	for (Xapian::PostingIterator did = did0;
-	     did != ndb->wdb.postlist_end(pathterm); did++) {
+	Xapian::PostingIterator docid0 = ndb->wdb.postlist_begin(pathterm);
+	for (Xapian::PostingIterator docid = docid0;
+	     docid != ndb->wdb.postlist_end(pathterm); docid++) {

-	    Xapian::Document doc = ndb->wdb.get_document(*did);
+	    Xapian::Document doc = ndb->wdb.get_document(*docid);

 	    // Check the date once. no need to look at the others if the
 	    // db needs updating.
-	    if (did == did0) {
+	    if (docid == docid0) {
 		string data = doc.get_data();
 		const char *cp = strstr(data.c_str(), "mtime=");
 		cp += 6;
@ -424,8 +425,8 @@ bool Rcl::Db::needUpdate(const string &filename, const struct stat *stp)
 	    }

 	    // Db is up to date. Make a note that this document exists.
-	    if (*did < ndb->updated.size())
-		ndb->updated[*did] = true;
+	    if (*docid < ndb->updated.size())
+		ndb->updated[*docid] = true;
 	}
    } catch (...) {
 	return true;
@ -596,13 +597,13 @@ bool Rcl::Db::purge()
    // trying to delete an unexistant document ?
    // Flushing before trying the deletes seeems to work around the problem
    ndb->wdb.flush();
-    for (Xapian::docid did = 1; did < ndb->updated.size(); ++did) {
-	if (!ndb->updated[did]) {
+    for (Xapian::docid docid = 1; docid < ndb->updated.size(); ++docid) {
+	if (!ndb->updated[docid]) {
 	    try {
-		ndb->wdb.delete_document(did);
-		LOGDEB(("Rcl::Db::purge: deleted document #%d\n", did));
+		ndb->wdb.delete_document(docid);
+		LOGDEB(("Rcl::Db::purge: deleted document #%d\n", docid));
 	    } catch (const Xapian::DocNotFoundError &) {
-		LOGDEB2(("Rcl::Db::purge: document #%d not found\n", did));
+		LOGDEB2(("Rcl::Db::purge: document #%d not found\n", docid));
 	    }
 	}
    }
@ -628,6 +629,13 @@ class wsQData : public TextSplitCB {
 	terms.push_back(term);
 	return true;
    }
+    void dumball() {
+	for (vector<string>::iterator it=terms.begin(); it !=terms.end();it++){
+	    string dumb;
+	    Rcl::dumb_string(*it, dumb);
+	    *it = dumb;
+	}
+    }
 };


@ -638,11 +646,11 @@ static list<string> stemexpand(Native *ndb, string term, const string& lang)
    try {
 	Xapian::Stem stemmer(lang);
 	string stem = stemmer.stem_word(term);
-	LOGDEB(("stemexpand: '%s' -> '%s'\n", term.c_str(), stem.c_str()));
+	LOGDEB(("stemexpand: '%s' stem-> '%s'\n", term.c_str(), stem.c_str()));
 	// Try to fetch the doc from the stem db
 	string stemdbdir = stemdbname(ndb->basedir, lang);
 	Xapian::Database sdb(stemdbdir);
-	LOGDEB1(("Rcl::Db::stemexpand: %s lastdocid: %d\n", 
+	LOGDEB1(("stemexpand: %s lastdocid: %d\n", 
 		stemdbdir.c_str(), sdb.get_lastdocid()));
 	if (!sdb.term_exists(stem)) {
 	    LOGDEB1(("Rcl::Db::stemexpand: no term for %s\n", stem.c_str()));
@ -651,7 +659,7 @@ static list<string> stemexpand(Native *ndb, string term, const string& lang)
 	}
 	Xapian::PostingIterator did = sdb.postlist_begin(stem);
 	if (did == sdb.postlist_end(stem)) {
-	    LOGDEB1(("Rcl::Db::stemexpand: no term(1) for %s\n",stem.c_str()));
+	    LOGDEB1(("stemexpand: no term(1) for %s\n",stem.c_str()));
 	    explist.push_back(term);
 	    return explist;
 	}
@ -669,7 +677,7 @@ static list<string> stemexpand(Native *ndb, string term, const string& lang)
 	if (find(explist.begin(), explist.end(), term) == explist.end()) {
 	    explist.push_back(term);
 	}
-	LOGDEB(("Rcl::Db::stemexpand: %s ->  %s\n", stem.c_str(),
+	LOGDEB(("stemexpand: %s ->  %s\n", stem.c_str(),
 		stringlistdisp(explist).c_str()));
    } catch (...) {
 	LOGERR(("stemexpand: error accessing stem db\n"));
@ -679,6 +687,81 @@ static list<string> stemexpand(Native *ndb, string term, const string& lang)
    return explist;
 }

+// Turn string into possibly complex xapian query. There is little
+// interpretation done on the string (no +term -term or filename:term
+// stuff). We just separate words and phrases, and interpret
+// capitalized terms as wanting no stem expansion
+static void stringToXapianQueries(const string &iq,
+				  const string& stemlang,
+				  Native *ndb,
+				  list<Xapian::Query> &pqueries,
+				  Rcl::Db::QueryOpts opts = Rcl::Db::QO_NONE)
+{
+    string qstring = iq;
+#if 0
+    // Unaccent and lowerterm. Note that lowerterming here may not be
+    // such a good idea because it forbids using capitalized words to
+    // indicate that a term should not use stem expansion, for
+    // example.
+    if (!Rcl::dumb_string(iqstring, qstring))
+	return false;
+#endif
+
+    // Split into (possibly single word) phrases ("this is a phrase"):
+    list<string> phrases;
+    ConfTree::stringToStrings(qstring, phrases);
+
+    // Then process each phrase: split into terms and transform into
+    // appropriate Xapian Query
+
+    for (list<string>::iterator it=phrases.begin(); it !=phrases.end(); it++) {
+	LOGDEB(("strToXapianQ: phrase or word: [%s]\n", it->c_str()));
+
+	wsQData splitData;
+	TextSplit splitter(&splitData, true);
+	splitter.text_to_words(*it);
+	LOGDEB(("strToXapianQ: splitter term count: %d\n", 
+		splitData.terms.size()));
+	switch(splitData.terms.size()) {
+	case 0: continue;// ??
+	case 1: // Not a real phrase: one term
+	    {
+		string term = splitData.terms.front();
+		bool nostemexp = false;
+		// Yes this doesnt work with accented or non-european
+		// majuscules. TOBEDONE: something :)
+		if (term.length() > 0 && term[0] >= 'A' && term[0] <= 'Z')
+		    nostemexp = true;
+
+		LOGDEB(("Term: %s\n", term.c_str()));
+
+		// Possibly perform stem compression/expansion
+		list<string> exp;  
+		string term1;
+		Rcl::dumb_string(term, term1);
+		if (!nostemexp && (opts & Rcl::Db::QO_STEM)) {
+		    exp = stemexpand(ndb, term1, stemlang);
+		} else {
+		    exp.push_back(term1);
+		}
+
+		// Push either term or stem-expanded set
+		pqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 
+						 exp.begin(), exp.end()));
+	    }
+	    break;
+
+	default:
+	    // Phrase: no stem expansion
+	    splitData.dumball();
+	    LOGDEB(("Pushing phrase: [%s]\n", splitData.catterms().c_str()));
+	    pqueries.push_back(Xapian::Query(Xapian::Query::OP_PHRASE,
+					     splitData.terms.begin(),
+					     splitData.terms.end()));
+	}
+    }
+}
+
 bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts, 
 		       const string& stemlang)
 {
@ -688,48 +771,8 @@ bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts,
    if (!ndb)
 	return false;

-    string qstring;;
-    if (!dumb_string(iqstring, qstring)) {
-	return false;
-    }
-
-    // First split into (possibly single word) phrases ("this is a phrase"):
-    list<string> phrases;
-    ConfTree::stringToStrings(qstring, phrases);
-    for (list<string>::const_iterator i=phrases.begin();
-	 i != phrases.end();i++) {
-	LOGDEB(("Rcl::Db::setQuery: phrase: '%s'\n", i->c_str()));
-    }
-
    list<Xapian::Query> pqueries;
-    for (list<string>::const_iterator it = phrases.begin(); 
-	 it != phrases.end(); it++) {
-
-	wsQData splitData;
-	TextSplit splitter(&splitData, true);
-	splitter.text_to_words(*it);
-	LOGDEB1(("Rcl::Db::setquery: splitter term count: %d\n", 
-		splitData.terms.size()));
-	switch(splitData.terms.size()) {
-	case 0: continue;// ??
-	case 1: {
-	    list<string> exp;  
-	    if (opts & QO_STEM) 
-		exp = stemexpand(ndb, splitData.terms.front(), stemlang);
-	    else
-		exp.push_back(splitData.terms.front());
-	    pqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 
-					     exp.begin(), 
-					     exp.end()));
-	}
-	    break;
-	default:
-	    LOGDEB(("Pushing phrase: %s\n", splitData.catterms().c_str()));
-	    pqueries.push_back(Xapian::Query(Xapian::Query::OP_PHRASE,
-					     splitData.terms.begin(),
-					     splitData.terms.end()));
-	}
-    }
+    stringToXapianQueries(iqstring, stemlang, ndb, pqueries, opts);
    ndb->query = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(), 
 			       pqueries.end());
    delete ndb->enquire;
@ -739,6 +782,93 @@ bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts,
    return true;
 }

+bool Rcl::Db::setQuery(AdvSearchData &sdata, const string& stemlang)
+{
+    LOGDEB(("Rcl::Db::setQuery: adv:\n"));
+    LOGDEB((" allwords: %s\n", sdata.allwords.c_str()));
+    LOGDEB((" phrase:   %s\n", sdata.phrase.c_str()));
+    LOGDEB((" orwords:  %s\n", sdata.orwords.c_str()));
+    LOGDEB((" nowords:  %s\n", sdata.nowords.c_str()));
+    string ft;
+    for (list<string>::iterator it = sdata.filetypes.begin(); 
+    	 it != sdata.filetypes.end(); it++) {ft += *it + " ";}
+    if (!ft.empty()) 
+	LOGDEB((" searched file types: %s\n", ft.c_str()));
+    if (!sdata.topdir.empty())
+	LOGDEB((" restricted to: %s\n", sdata.topdir.c_str()));
+
+    Native *ndb = (Native *)pdata;
+    if (!ndb)
+	return false;
+
+    list<Xapian::Query> pqueries;
+    Xapian::Query xq;
+    
+    if (!sdata.allwords.empty()) {
+	stringToXapianQueries(sdata.allwords, stemlang, ndb, pqueries);
+	if (!pqueries.empty()) {
+	    xq = Xapian::Query(Xapian::Query::OP_AND, pqueries.begin(), 
+			       pqueries.end());
+	    pqueries.clear();
+	}
+    }
+
+    if (!sdata.orwords.empty()) {
+	stringToXapianQueries(sdata.orwords, stemlang, ndb, pqueries);
+	if (!pqueries.empty()) {
+	    Xapian::Query nq;
+	    nq = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
+			       pqueries.end());
+	    xq = xq.empty() ? nq :
+		Xapian::Query(Xapian::Query::OP_AND, xq, nq);
+	    pqueries.clear();
+	}
+    }
+
+    if (!sdata.nowords.empty()) {
+	stringToXapianQueries(sdata.nowords, stemlang, ndb, pqueries);
+	if (!pqueries.empty()) {
+	    Xapian::Query nq;
+	    nq = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
+			       pqueries.end());
+	    xq = xq.empty() ? nq :
+		Xapian::Query(Xapian::Query::OP_AND_NOT, xq, nq);
+	    pqueries.clear();
+	}
+    }
+
+    if (!sdata.phrase.empty()) {
+	Xapian::Query nq;
+	string s = string("\"") + sdata.phrase + string("\"");
+	stringToXapianQueries(s, stemlang, ndb, pqueries);
+	if (!pqueries.empty()) {
+	    // There should be a single list element phrase query.
+	    xq = xq.empty() ? *pqueries.begin() : 
+		Xapian::Query(Xapian::Query::OP_AND, xq, *pqueries.begin());
+	    pqueries.clear();
+	}
+    }
+
+    if (!sdata.filetypes.empty()) {
+	Xapian::Query tq;
+	for (list<string>::iterator it = sdata.filetypes.begin(); 
+	     it != sdata.filetypes.end(); it++) {
+	    string term = "T" + *it;
+	    LOGDEB(("Adding file type term: [%s]\n", term.c_str()));
+	    tq = tq.empty() ? Xapian::Query(term) : 
+		Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
+	}
+	xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_AND, xq, tq);
+    }
+
+    ndb->query = xq;
+    delete ndb->enquire;
+    ndb->enquire = new Xapian::Enquire(ndb->db);
+    ndb->enquire->set_query(ndb->query);
+    ndb->mset = Xapian::MSet();
+    return true;
+}
+
 bool Rcl::Db::getQueryTerms(list<string>& terms)
 {
    Native *ndb = (Native *)pdata;
@ -766,6 +896,10 @@ int Rcl::Db::getResCnt()
    return ndb->mset.get_matches_lower_bound();
 }

+// Get document at rank i in query (i is the index in the whole result
+// set, as in the enquire class. We check if the current mset has the
+// doc, else ask for an other one. We use msets of 10 documents. Don't
+// know if the whole thing makes sense at all but it seems to work.
 bool Rcl::Db::getDoc(int i, Doc &doc, int *percent)
 {
    LOGDEB1(("Rcl::Db::getDoc: %d\n", i));
--- a/src/rcldb/rcldb.h
+++ b/src/rcldb/rcldb.h
@ -1,12 +1,13 @@
 #ifndef _DB_H_INCLUDED_
 #define _DB_H_INCLUDED_
-/* @(#$Id: rcldb.h,v 1.13 2005-03-25 09:40:27 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: rcldb.h,v 1.14 2005-10-19 10:21:47 dockes Exp $  (C) 2004 J.F.Dockes */

 #include <string>
 #include <list>

 #ifndef NO_NAMESPACES
 using std::string;
+using std::list;
 #endif

 // rcldb defines an interface for a 'real' text database. The current 
@ -24,7 +25,9 @@ using std::string;

 struct stat;

+#ifndef NO_NAMESPACES
 namespace Rcl {
+#endif

 /**
 * Dumb bunch holder for document attributes and data
@ -57,6 +60,19 @@ class Doc {
    }
 };

+/**
+ * Holder for the advanced query data 
+ */
+class AdvSearchData {
+    public:
+    string allwords;
+    string phrase;
+    string orwords;
+    string nowords;
+    list<string> filetypes; // restrict to types. Empty if inactive
+    string topdir; // restrict to subtree. Empty if inactive
+};
+
 /**
 * Wrapper class for the native database.
 */
@ -83,7 +99,8 @@ class Db {
    enum QueryOpts {QO_NONE=0, QO_STEM = 1};
    bool setQuery(const string &q, QueryOpts opts = QO_NONE, 
 		  const string& stemlang = "english");
-    bool getQueryTerms(std::list<string>& terms);
+    bool setQuery(AdvSearchData &q, const string& stemlang = "english");
+    bool getQueryTerms(list<string>& terms);

    // Get document at rank i. This is probably vastly inferior to the type
    // of interface in Xapian, but we have to start with something simple
@ -96,6 +113,9 @@ class Db {
 // Unaccent and lowercase data.
 extern bool dumb_string(const string &in, string &out);

+#ifndef NO_NAMESPACES
 }
+#endif // NO_NAMESPACES
+

 #endif /* _DB_H_INCLUDED_ */