make searchdata a more flexible struct

2006-11-13 08:50:07 +00:00 · 2006-11-13 08:50:07 +00:00 · cdbf026738
commit cdbf026738
parent 1d7f103fe7
6 changed files with 696 additions and 522 deletions
--- a/src/lib/Makefile
+++ b/src/lib/Makefile
@ -8,8 +8,8 @@ LIBS = librcl.a
 all: $(LIBS)
-OBJS =  conftree.o csguess.o debuglog.o execmd.o idfile.o md5.o wipedir.o fstreewalk.o mh_html.o mh_mail.o mh_exec.o mh_text.o htmlparse.o indexer.o internfile.o mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathhash.o pathut.o rclconfig.o rcldb.o rclinit.o stemdb.o base64.o readfile.o smallut.o textsplit.o transcode.o unacpp.o history.o docseq.o sortseq.o copyfile.o rclaspell.o
+OBJS =  conftree.o csguess.o debuglog.o execmd.o idfile.o md5.o wipedir.o fstreewalk.o mh_html.o mh_mail.o searchdata.o mh_exec.o mh_text.o htmlparse.o indexer.o internfile.o mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathhash.o pathut.o rclconfig.o rcldb.o rclinit.o stemdb.o base64.o readfile.o smallut.o textsplit.o transcode.o unacpp.o history.o docseq.o sortseq.o copyfile.o rclaspell.o
-DEPS =  conftree.dep.stamp csguess.dep.stamp debuglog.dep.stamp execmd.dep.stamp idfile.dep.stamp md5.dep.stamp wipedir.dep.stamp fstreewalk.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_exec.dep.stamp mh_text.dep.stamp htmlparse.dep.stamp indexer.dep.stamp internfile.dep.stamp mimehandler.dep.stamp mimeparse.dep.stamp mimetype.dep.stamp myhtmlparse.dep.stamp pathhash.dep.stamp pathut.dep.stamp rclconfig.dep.stamp rcldb.dep.stamp rclinit.dep.stamp stemdb.dep.stamp base64.dep.stamp readfile.dep.stamp smallut.dep.stamp textsplit.dep.stamp transcode.dep.stamp unacpp.dep.stamp history.dep.stamp docseq.dep.stamp sortseq.dep.stamp copyfile.dep.stamp rclaspell.dep.stamp
+DEPS =  conftree.dep.stamp csguess.dep.stamp debuglog.dep.stamp execmd.dep.stamp idfile.dep.stamp md5.dep.stamp wipedir.dep.stamp fstreewalk.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp searchdata.dep.stamp mh_exec.dep.stamp mh_text.dep.stamp htmlparse.dep.stamp indexer.dep.stamp internfile.dep.stamp mimehandler.dep.stamp mimeparse.dep.stamp mimetype.dep.stamp myhtmlparse.dep.stamp pathhash.dep.stamp pathut.dep.stamp rclconfig.dep.stamp rcldb.dep.stamp rclinit.dep.stamp stemdb.dep.stamp base64.dep.stamp readfile.dep.stamp smallut.dep.stamp textsplit.dep.stamp transcode.dep.stamp unacpp.dep.stamp history.dep.stamp docseq.dep.stamp sortseq.dep.stamp copyfile.dep.stamp rclaspell.dep.stamp
 librcl.a : $(DEPS) $(OBJS) unac.o
 	ar ru librcl.a $(OBJS) unac.o
@ -37,6 +37,8 @@ mh_html.o : ../common/mh_html.cpp
 	$(CXX) $(ALL_CXXFLAGS) -c ../common/mh_html.cpp
 mh_mail.o : ../common/mh_mail.cpp
 	$(CXX) $(ALL_CXXFLAGS) -c ../common/mh_mail.cpp
 searchdata.o : ../common/searchdata.cpp
 	$(CXX) $(ALL_CXXFLAGS) -c ../common/searchdata.cpp
 mh_exec.o : ../common/mh_exec.cpp
 	$(CXX) $(ALL_CXXFLAGS) -c ../common/mh_exec.cpp
 mh_text.o : ../common/mh_text.cpp
@ -125,6 +127,9 @@ mh_html.dep.stamp : ../common/mh_html.cpp
 mh_mail.dep.stamp : ../common/mh_mail.cpp
 	$(CXX) -M $(ALL_CXXFLAGS) ../common/mh_mail.cpp > mh_mail.dep
 	touch mh_mail.dep.stamp
 searchdata.dep.stamp : ../common/searchdata.cpp
 	$(CXX) -M $(ALL_CXXFLAGS) ../common/searchdata.cpp > searchdata.dep
 	touch searchdata.dep.stamp
 mh_exec.dep.stamp : ../common/mh_exec.cpp
 	$(CXX) -M $(ALL_CXXFLAGS) ../common/mh_exec.cpp > mh_exec.dep
 	touch mh_exec.dep.stamp
@ -213,6 +218,7 @@ include wipedir.dep
 include fstreewalk.dep
 include mh_html.dep
 include mh_mail.dep
 include searchdata.dep
 include mh_exec.dep
 include mh_text.dep
 include htmlparse.dep
--- a/src/lib/mkMake
+++ b/src/lib/mkMake
@ -8,6 +8,7 @@ SRCS="${depth}/utils/conftree.cpp ${depth}/index/csguess.cpp \
     ${depth}/utils/idfile.cpp ${depth}/utils/md5.cpp \
     ${depth}/utils/wipedir.cpp ${depth}/utils/fstreewalk.cpp \
     ${depth}/common/mh_html.cpp ${depth}/common/mh_mail.cpp \
     ${depth}/common/searchdata.cpp \
     ${depth}/common/mh_exec.cpp ${depth}/common/mh_text.cpp \
     ${depth}/common/htmlparse.cpp ${depth}/index/indexer.cpp \
     ${depth}/common/internfile.cpp ${depth}/common/mimehandler.cpp \
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.90 2006-11-12 08:35:11 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.91 2006-11-13 08:49:44 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -174,6 +174,229 @@ bool Native::subDocs(const string &hash, vector<Xapian::docid>& docids)
    return false;
 }
 bool Native::dbDataToRclDoc(std::string &data, Doc &doc, 
 			    int qopts,
 			    Xapian::docid docid, const list<string>& terms)
 {
    LOGDEB1(("Db::dbDataToRclDoc: opts %x data: %s\n", qopts, data.c_str()));
    ConfSimple parms(&data);
    if (!parms.ok())
 	return false;
    parms.get(string("url"), doc.url);
    parms.get(string("mtype"), doc.mimetype);
    parms.get(string("fmtime"), doc.fmtime);
    parms.get(string("dmtime"), doc.dmtime);
    parms.get(string("origcharset"), doc.origcharset);
    parms.get(string("caption"), doc.title);
    parms.get(string("keywords"), doc.keywords);
    parms.get(string("abstract"), doc.abstract);
    // Possibly remove synthetic abstract indicator (if it's there, we
    // used to index the beginning of the text as abstract).
    bool syntabs = false;
    if (doc.abstract.find(rclSyntAbs) == 0) {
 	doc.abstract = doc.abstract.substr(rclSyntAbs.length());
 	syntabs = true;
    }
    // If the option is set and the abstract is synthetic or empty , build 
    // abstract from position data. 
    if ((qopts & Db::QO_BUILD_ABSTRACT) && !terms.empty()) {
 	LOGDEB(("dbDataToRclDoc:: building abstract from position data\n"));
 	if (doc.abstract.empty() || syntabs || 
 	    (qopts & Db::QO_REPLACE_ABSTRACT))
 	    doc.abstract = makeAbstract(docid, terms);
    } 
    parms.get(string("ipath"), doc.ipath);
    parms.get(string("fbytes"), doc.fbytes);
    parms.get(string("dbytes"), doc.dbytes);
    doc.xdocid = docid;
    return true;
 }
 // We build a possibly full size but sparsely populated (only around
 // the search term occurrences) reconstruction of the document. It
 // would be possible to compress the array, by having only multiple
 // chunks around the terms, but this would seriously complicate the
 // data structure.
 string Native::makeAbstract(Xapian::docid docid, const list<string>& terms)
 {
    LOGDEB(("Native::makeAbstract: maxlen %d wWidth %d\n",
 	    m_db->m_synthAbsLen, m_db->m_synthAbsWordCtxLen));
    Chrono chron;
    // For each of the query terms, query xapian for its positions
    // list in the document. For each position entry, remember it in qtermposs
    // and insert it and its neighbours in the set of 'interesting' positions
    // The terms 'array' that we partially populate with the document
    // terms, at their positions around the search terms positions:
    map<unsigned int, string> sparseDoc;
    // All the query term positions. We remember this mainly because we are
    // going to random-shuffle it for selecting the chunks that we actually 
    // print.
    vector<unsigned int> qtermposs; 
    // Limit the total number of slots we populate.
    const unsigned int maxtotaloccs = 300;
    // Max occurrences per term. We initially know nothing about the
    // occurrences repartition (it would be possible that only one
    // term in the list occurs, or that all do). So this is a rather
    // arbitrary choice.
    const unsigned int maxoccperterm = maxtotaloccs / 10;
    unsigned int totaloccs = 0;
    for (list<string>::const_iterator qit = terms.begin(); qit != terms.end();
 	 qit++) {
 	Xapian::PositionIterator pos;
 	// There may be query terms not in this doc. This raises an
 	// exception when requesting the position list, we catch it.
 	string emptys;
 	try {
 	    unsigned int occurrences = 0;
 	    for (pos = db.positionlist_begin(docid, *qit); 
 		 pos != db.positionlist_end(docid, *qit); pos++) {
 		unsigned int ipos = *pos;
 		LOGDEB2(("Abstract: [%s] at %d\n", qit->c_str(), ipos));
 		// Remember the term position
 		qtermposs.push_back(ipos);
 		// Add adjacent slots to the set to populate at next step
 		unsigned int sta = MAX(0, ipos-m_db->m_synthAbsWordCtxLen);
 		unsigned int sto = ipos+m_db->m_synthAbsWordCtxLen;
 		for (unsigned int ii = sta; ii <= sto;  ii++) {
 		    if (ii == ipos)
 			sparseDoc[ii] = *qit;
 		    else
 			sparseDoc[ii] = emptys;
 		}
 		// Limit the number of occurences we keep for each
 		// term. The abstract has a finite length anyway !
 		if (occurrences++ > maxoccperterm)
 		    break;
 	    }
 	} catch (...) {
 	    // Term does not occur. No problem.
 	}
 	// Limit total size
 	if (totaloccs++ > maxtotaloccs)
 	    break;
    }
    LOGDEB(("Abstract:%d:chosen number of positions %d. Populating\n", 
 	    chron.millis(), qtermposs.size()));
    // Walk the full document position list (for each term walk
    // position list) and populate slots around the query terms. We
    // arbitrarily truncate the list to avoid taking forever. If we do
    // cutoff, the abstract may be inconsistant, which is bad...
    { 
 	Xapian::TermIterator term;
 	int cutoff = 500 * 1000;
 	for (term = db.termlist_begin(docid);
 	     term != db.termlist_end(docid); term++) {
 	    if (cutoff-- < 0) {
 		LOGDEB(("Abstract: max term count cutoff\n"));
 		break;
 	    }
 	    Xapian::PositionIterator pos;
 	    for (pos = db.positionlist_begin(docid, *term); 
 		 pos != db.positionlist_end(docid, *term); pos++) {
 		if (cutoff-- < 0) {
 		    LOGDEB(("Abstract: max term count cutoff\n"));
 		    break;
 		}
 		map<unsigned int, string>::iterator vit;
 		if ((vit=sparseDoc.find(*pos)) != sparseDoc.end()) {
 		    // Don't replace a term: the terms list is in
 		    // alphabetic order, and we may have several terms
 		    // at the same position, we want to keep only the
 		    // first one (ie: dockes and dockes@wanadoo.fr)
 		    if (vit->second.empty()) {
 			LOGDEB2(("Abstract: populating: [%s] at %d\n", 
 				(*term).c_str(), *pos));
 			sparseDoc[*pos] = *term;
 		    }
 		}
 	    }
 	}
    }
 #if 0
    // Debug only: output the full term[position] vector
    bool epty = false;
    int ipos = 0;
    for (map<unsigned int, string>::iterator it = sparseDoc.begin(); 
 	 it != sparseDoc.end();
 	 it++, ipos++) {
 	if (it->empty()) {
 	    if (!epty)
 		LOGDEB(("Abstract:vec[%d]: [%s]\n", ipos, it->c_str()));
 	    epty=true;
 	} else {
 	    epty = false;
 	    LOGDEB(("Abstract:vec[%d]: [%s]\n", ipos, it->c_str()));
 	}
    }
 #endif
    LOGDEB(("Abstract:%d: randomizing and extracting\n", chron.millis()));
    // We randomize the selection of term positions, from which we
    // shall pull, starting at the beginning, until the abstract is
    // big enough. The abstract is finally built in correct position
    // order, thanks to the position map.
    random_shuffle(qtermposs.begin(), qtermposs.end());
    map<unsigned int, string> mabs;
    unsigned int abslen = 0;
    // Extract data around the N first (in random order) query term
    // positions, and store the terms in the map. Don't concatenate
    // immediately into chunks because there might be overlaps
    for (vector<unsigned int>::const_iterator pos = qtermposs.begin();
 	 pos != qtermposs.end(); pos++) {
 	if (int(abslen) > m_db->m_synthAbsLen)
 	    break;
 	unsigned int sta = MAX(0, *pos - m_db->m_synthAbsWordCtxLen);
 	unsigned int sto = *pos + m_db->m_synthAbsWordCtxLen;
 	LOGDEB2(("Abstract: %d<-%d->%d\n", sta, *pos, sto));
 	for (unsigned int ii = sta; ii <= sto; ii++) {
 	    if (int(abslen) > m_db->m_synthAbsLen)
 		break;
 	    map<unsigned int, string>::const_iterator vit = 
 		sparseDoc.find(ii);
 	    if (vit != sparseDoc.end() && !vit->second.empty()) {
 		LOGDEB2(("Abstract: position %d -> [%s]\n", 
 			 ii, vit->second.c_str()));
 		mabs[ii] = vit->second;
 		abslen += vit->second.length();
 	    } else {
 		LOGDEB2(("Abstract: empty position at %d\n", ii));
 	    }
 	}
 	// Possibly add a ... at the end of chunk if it's not
 	// overlapping
 	if (mabs.find(sto+1) == mabs.end())
 	    mabs[sto+1] = "...";
    }
    // Build the abstract by walking the map (in order of position)
    string abstract;
    for (map<unsigned int, string>::const_iterator it = mabs.begin();
 	 it != mabs.end(); it++) {
 	LOGDEB2(("Abtract:output %u -> [%s]\n", it->first,it->second.c_str()));
 	abstract += it->second + " ";
    }
    LOGDEB(("Abtract: done in %d mS\n", chron.millis()));
    return abstract;
 }
 /* Rcl::Db methods ///////////////////////////////// */
@ -909,279 +1132,67 @@ bool Db::purgeFile(const string &fn)
    return false;
 }
-// Splitter callback for breaking query into terms
+bool Db::filenameWildExp(const string& fnexp, list<string>& names)
 class wsQData : public TextSplitCB {
 public:
    vector<string> terms;
    string catterms() {
 	string s;
 	for (unsigned int i=0;i<terms.size();i++) {
 	    s += "[" + terms[i] + "] ";
 	}
 	return s;
    }
    bool takeword(const std::string &term, int , int, int) {
 	LOGDEB1(("wsQData::takeword: %s\n", term.c_str()));
 	terms.push_back(term);
 	return true;
    }
    void dumball() {
 	for (vector<string>::iterator it=terms.begin(); it !=terms.end();it++){
 	    string dumb;
 	    dumb_string(*it, dumb);
 	    *it = dumb;
 	}
    }
 };
 // Turn string into list of xapian queries. There is little
 // interpretation done on the string (no +term -term or filename:term
 // stuff). We just separate words and phrases, and interpret
 // capitalized terms as wanting no stem expansion. 
 // The final list contains one query for each term or phrase
 //   - Elements corresponding to a stem-expanded part are an OP_OR
 //     composition of the stem-expanded terms (or a single term query).
 //   - Elements corresponding to a phrase are an OP_PHRASE composition of the
 //     phrase terms (no stem expansion in this case)
 static void stringToXapianQueries(const string &iq,
 				  const string& stemlang,
 				  Db *db,
 				  list<Xapian::Query> &pqueries,
 				  unsigned int opts = Db::QO_NONE)
 {
-    string qstring = iq;
+    // File name search, with possible wildcards. 
    // We expand wildcards by scanning the filename terms (prefixed 
    // with XSFN) from the database. 
    // We build an OR query with the expanded values if any.
    string pattern;
    dumb_string(fnexp, pattern);
-    // Split into (possibly single word) phrases ("this is a phrase"):
+    // If pattern is not quoted, and has no wildcards, we add * at
-    list<string> phrases;
+    // each end: match any substring
-    stringToStrings(qstring, phrases);
+    if (pattern[0] == '"' && pattern[pattern.size()-1] == '"') {
 	pattern = pattern.substr(1, pattern.size() -2);
    } else if (pattern.find_first_of("*?[") == string::npos) {
 	pattern = "*" + pattern + "*";
    } // else let it be
-    // Then process each phrase: split into terms and transform into
+    LOGDEB((" pattern: [%s]\n", pattern.c_str()));
    // appropriate Xapian Query
-    for (list<string>::iterator it=phrases.begin(); it !=phrases.end(); it++) {
+    // Match pattern against all file names in the db
-	LOGDEB(("strToXapianQ: phrase or word: [%s]\n", it->c_str()));
+    Xapian::TermIterator it = m_ndb->db.allterms_begin(); 
-
+    it.skip_to("XSFN");
-	// If there are both spans and single words in this element,
+    for (;it != m_ndb->db.allterms_end(); it++) {
-	// we need to use a word split, else a phrase query including
+	if ((*it).find("XSFN") != 0)
-	// a span would fail if we didn't adjust the proximity to
+	    break;
-	// account for the additional span term which is complicated.
+	string fn = (*it).substr(4);
-	wsQData splitDataS, splitDataW;
+	LOGDEB2(("Matching [%s] and [%s]\n", pattern.c_str(), fn.c_str()));
-	TextSplit splitterS(&splitDataS, TextSplit::TXTS_ONLYSPANS);
+	if (fnmatch(pattern.c_str(), fn.c_str(), 0) != FNM_NOMATCH) {
-	splitterS.text_to_words(*it);
+	    names.push_back((*it).c_str());
-	TextSplit splitterW(&splitDataW, TextSplit::TXTS_NOSPANS);
+	}
-	splitterW.text_to_words(*it);
+	// Limit the match count
-	wsQData& splitData = splitDataS;
+	if (names.size() > 1000) {
-	if (splitDataS.terms.size() > 1 && splitDataS.terms.size() != 
+	    LOGERR(("Db::SetQuery: too many matched file names\n"));
 	    splitDataW.terms.size())
 	    splitData = splitDataW;
 	LOGDEB1(("strToXapianQ: splitter term count: %d\n", 
 		splitData.terms.size()));
 	switch(splitData.terms.size()) {
 	case 0: continue;// ??
 	case 1: // Not a real phrase: one term
 	    {
 		string term = splitData.terms.front();
 		bool nostemexp = false;
 		// Check if the first letter is a majuscule in which
 		// case we do not want to do stem expansion. Note that
 		// the test is convoluted and possibly problematic
 		if (term.length() > 0) {
 		    string noacterm,noaclowterm;
 		    if (unacmaybefold(term, noacterm, "UTF-8", false) &&
 			unacmaybefold(noacterm, noaclowterm, "UTF-8", true)) {
 			Utf8Iter it1(noacterm);
 			Utf8Iter it2(noaclowterm);
 			if (*it1 != *it2)
 			    nostemexp = true;
 		    }
 		}
 		LOGDEB1(("Term: %s stem expansion: %s\n", 
 			term.c_str(), nostemexp?"no":"yes"));
 		list<string> exp;  
 		string term1;
 		dumb_string(term, term1);
 		// Possibly perform stem compression/expansion
 		if (!nostemexp && (opts & Db::QO_STEM)) {
 		    exp = db->stemExpand(stemlang, term1);
 		} else {
 		    exp.push_back(term1);
 		}
 		// Push either term or OR of stem-expanded set
 		pqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 
 						 exp.begin(), exp.end()));
 	    }
 	    break;
 	default:
 	    // Phrase: no stem expansion
 	    splitData.dumball();
 	    LOGDEB(("Pushing phrase: [%s]\n", splitData.catterms().c_str()));
 	    pqueries.push_back(Xapian::Query(Xapian::Query::OP_PHRASE,
 					     splitData.terms.begin(),
 					     splitData.terms.end()));
 	}
    }
    if (names.empty()) {
 	// Build an impossible query: we know its impossible because we
 	// control the prefixes!
 	names.push_back("XIMPOSSIBLE");
    }
    return true;
 }
 // Prepare query out of "advanced search" data
-bool Db::setQuery(AdvSearchData &sdata, int opts, const string& stemlang)
+bool Db::setQuery(RefCntr<SearchData> sdata, int opts, 
 		  const string& stemlang)
 {
-    LOGDEB(("Db::setQuery: adv:\n"));
+    if (!m_ndb) {
-    LOGDEB((" allwords: %s\n", sdata.allwords.c_str()));
+	LOGERR(("Db::setQuery: no db!\n"));
    LOGDEB((" phrase:   %s\n", sdata.phrase.c_str()));
    LOGDEB((" orwords:  %s\n", sdata.orwords.c_str()));
    LOGDEB((" orwords1:  %s\n", sdata.orwords1.c_str()));
    LOGDEB((" nowords:  %s\n", sdata.nowords.c_str()));
    LOGDEB((" filename:  %s\n", sdata.filename.c_str()));
    string ft;
    for (list<string>::iterator it = sdata.filetypes.begin(); 
    	 it != sdata.filetypes.end(); it++) {ft += *it + " ";}
    if (!ft.empty()) 
 	LOGDEB((" searched file types: %s\n", ft.c_str()));
    if (!sdata.topdir.empty())
 	LOGDEB((" restricted to: %s\n", sdata.topdir.c_str()));
    LOGDEB((" Options: 0x%x\n", opts));
    m_filterTopDir = sdata.topdir;
    m_dbindices.clear();
    if (!m_ndb)
 	return false;
-    list<Xapian::Query> pqueries;
+    }
    Xapian::Query xq;
    LOGDEB(("Db::setQuery:\n"));
    m_filterTopDir = sdata->m_topdir;
    m_dbindices.clear();
    m_qOpts = opts;
-    if (!sdata.filename.empty()) {
+    Xapian::Query xq;
-	LOGDEB((" filename search\n"));
+    sdata->toNativeQuery(*this, &xq, (opts & Db::QO_STEM) ? stemlang : "");
 	// File name search, with possible wildcards. 
 	// We expand wildcards by scanning the filename terms (prefixed 
        // with XSFN) from the database. 
 	// We build an OR query with the expanded values if any.
 	string pattern;
 	dumb_string(sdata.filename, pattern);
 	// If pattern is not quoted, and has no wildcards, we add * at
 	// each end: match any substring
 	if (pattern[0] == '"' && pattern[pattern.size()-1] == '"') {
 	    pattern = pattern.substr(1, pattern.size() -2);
 	} else if (pattern.find_first_of("*?[") == string::npos) {
 	    pattern = "*" + pattern + "*";
 	} // else let it be
 	LOGDEB((" pattern: [%s]\n", pattern.c_str()));
 	// Match pattern against all file names in the db
 	Xapian::TermIterator it = m_ndb->db.allterms_begin(); 
 	it.skip_to("XSFN");
 	list<string> names;
 	for (;it != m_ndb->db.allterms_end(); it++) {
 	    if ((*it).find("XSFN") != 0)
 		break;
 	    string fn = (*it).substr(4);
 	    LOGDEB2(("Matching [%s] and [%s]\n", pattern.c_str(), fn.c_str()));
 	    if (fnmatch(pattern.c_str(), fn.c_str(), 0) != FNM_NOMATCH) {
 		names.push_back((*it).c_str());
 	    }
 	    // Limit the match count
 	    if (names.size() > 1000) {
 		LOGERR(("Db::SetQuery: too many matched file names\n"));
 		break;
 	    }
 	}
 	if (names.empty()) {
 	    // Build an impossible query: we know its impossible because we
 	    // control the prefixes!
 	    names.push_back("XIMPOSSIBLE");
 	}
 	// Build a query out of the matching file name terms.
 	xq = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());
    }
    if (!sdata.allwords.empty()) {
 	stringToXapianQueries(sdata.allwords, stemlang, this,pqueries,m_qOpts);
 	if (!pqueries.empty()) {
 	    Xapian::Query nq = 
 		Xapian::Query(Xapian::Query::OP_AND, pqueries.begin(),
 			      pqueries.end());
 	    xq = xq.empty() ? nq :
 		Xapian::Query(Xapian::Query::OP_AND, xq, nq);
 	    pqueries.clear();
 	}
    }
    if (!sdata.orwords.empty()) {
 	stringToXapianQueries(sdata.orwords, stemlang, this,pqueries,m_qOpts);
 	if (!pqueries.empty()) {
 	    Xapian::Query nq = 
 		Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
 			       pqueries.end());
 	    xq = xq.empty() ? nq :
 		Xapian::Query(Xapian::Query::OP_AND, xq, nq);
 	    pqueries.clear();
 	}
    }
    if (!sdata.orwords1.empty()) {
 	stringToXapianQueries(sdata.orwords1, stemlang, this,pqueries,m_qOpts);
 	if (!pqueries.empty()) {
 	    Xapian::Query nq = 
 		Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
 			       pqueries.end());
 	    xq = xq.empty() ? nq :
 		Xapian::Query(Xapian::Query::OP_AND, xq, nq);
 	    pqueries.clear();
 	}
    }
    if (!sdata.phrase.empty()) {
 	Xapian::Query nq;
 	string s = string("\"") + sdata.phrase + string("\"");
 	stringToXapianQueries(s, stemlang, this, pqueries);
 	if (!pqueries.empty()) {
 	    // There should be a single list element phrase query.
 	    xq = xq.empty() ? *pqueries.begin() : 
 		Xapian::Query(Xapian::Query::OP_AND, xq, *pqueries.begin());
 	    pqueries.clear();
 	}
    }
    if (!sdata.filetypes.empty()) {
 	Xapian::Query tq;
 	for (list<string>::iterator it = sdata.filetypes.begin(); 
 	     it != sdata.filetypes.end(); it++) {
 	    string term = "T" + *it;
 	    LOGDEB(("Adding file type term: [%s]\n", term.c_str()));
 	    tq = tq.empty() ? Xapian::Query(term) : 
 		Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
 	}
 	xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_FILTER, xq, tq);
    }
    // "And not" part. Must come last, as we have to check it's not
    // the only term in the query.  We do no stem expansion on 'No'
    // words. Should we ?
    if (!sdata.nowords.empty()) {
 	stringToXapianQueries(sdata.nowords, stemlang, this, pqueries);
 	if (!pqueries.empty()) {
 	    Xapian::Query nq;
 	    nq = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
 			       pqueries.end());
 	    if (xq.empty()) {
 		// Xapian cant do this currently. Have to have a positive 
 		// part!
 		sdata.description = "Error: pure negative query\n";
 		LOGERR(("Rcl::Db::setQuery: error: pure negative query\n"));
 		return false;
 	    }
 	    xq = Xapian::Query(Xapian::Query::OP_AND_NOT, xq, nq);
 	    pqueries.clear();
 	}
    }
    m_ndb->query = xq;
    delete m_ndb->enquire;
@ -1189,10 +1200,11 @@ bool Db::setQuery(AdvSearchData &sdata, int opts, const string& stemlang)
    m_ndb->enquire->set_query(m_ndb->query);
    m_ndb->mset = Xapian::MSet();
    // Get the query description and trim the "Xapian::Query"
-    sdata.description = m_ndb->query.get_description();
+    sdata->m_description = m_ndb->query.get_description();
-    if (sdata.description.find("Xapian::Query") == 0)
+    if (sdata->m_description.find("Xapian::Query") == 0)
-	sdata.description = sdata.description.substr(strlen("Xapian::Query"));
+	sdata->m_description = 
-    LOGDEB(("Db::SetQuery: Q: %s\n", sdata.description.c_str()));
+	    sdata->m_description.substr(strlen("Xapian::Query"));
    LOGDEB(("Db::SetQuery: Q: %s\n", sdata->m_description.c_str()));
    return true;
 }
@ -1422,43 +1434,6 @@ int Db::getResCnt()
    return m_ndb->mset.get_matches_lower_bound();
 }
 bool Native::dbDataToRclDoc(std::string &data, Doc &doc, 
 			    int qopts,
 			    Xapian::docid docid, const list<string>& terms)
 {
    LOGDEB1(("Db::dbDataToRclDoc: opts %x data: %s\n", qopts, data.c_str()));
    ConfSimple parms(&data);
    if (!parms.ok())
 	return false;
    parms.get(string("url"), doc.url);
    parms.get(string("mtype"), doc.mimetype);
    parms.get(string("fmtime"), doc.fmtime);
    parms.get(string("dmtime"), doc.dmtime);
    parms.get(string("origcharset"), doc.origcharset);
    parms.get(string("caption"), doc.title);
    parms.get(string("keywords"), doc.keywords);
    parms.get(string("abstract"), doc.abstract);
    // Possibly remove synthetic abstract indicator (if it's there, we
    // used to index the beginning of the text as abstract).
    bool syntabs = false;
    if (doc.abstract.find(rclSyntAbs) == 0) {
 	doc.abstract = doc.abstract.substr(rclSyntAbs.length());
 	syntabs = true;
    }
    // If the option is set and the abstract is synthetic or empty , build 
    // abstract from position data. 
    if ((qopts & Db::QO_BUILD_ABSTRACT) && !terms.empty()) {
 	LOGDEB(("dbDataToRclDoc:: building abstract from position data\n"));
 	if (doc.abstract.empty() || syntabs || 
 	    (qopts & Db::QO_REPLACE_ABSTRACT))
 	    doc.abstract = makeAbstract(docid, terms);
    } 
    parms.get(string("ipath"), doc.ipath);
    parms.get(string("fbytes"), doc.fbytes);
    parms.get(string("dbytes"), doc.dbytes);
    doc.xdocid = docid;
    return true;
 }
 // Get document at rank i in query (i is the index in the whole result
 // set, as in the enquire class. We check if the current mset has the
@ -1641,191 +1616,6 @@ list<string> Db::expand(const Doc &doc)
 }
 // We build a possibly full size but sparsely populated (only around
 // the search term occurrences) reconstruction of the document. It
 // would be possible to compress the array, by having only multiple
 // chunks around the terms, but this would seriously complicate the
 // data structure.
 string Native::makeAbstract(Xapian::docid docid, const list<string>& terms)
 {
    LOGDEB(("Native::makeAbstract: maxlen %d wWidth %d\n",
 	    m_db->m_synthAbsLen, m_db->m_synthAbsWordCtxLen));
    Chrono chron;
    // For each of the query terms, query xapian for its positions
    // list in the document. For each position entry, remember it in qtermposs
    // and insert it and its neighbours in the set of 'interesting' positions
    // The terms 'array' that we partially populate with the document
    // terms, at their positions around the search terms positions:
    map<unsigned int, string> sparseDoc;
    // All the query term positions. We remember this mainly because we are
    // going to random-shuffle it for selecting the chunks that we actually 
    // print.
    vector<unsigned int> qtermposs; 
    // Limit the total number of slots we populate.
    const unsigned int maxtotaloccs = 300;
    // Max occurrences per term. We initially know nothing about the
    // occurrences repartition (it would be possible that only one
    // term in the list occurs, or that all do). So this is a rather
    // arbitrary choice.
    const unsigned int maxoccperterm = maxtotaloccs / 10;
    unsigned int totaloccs = 0;
    for (list<string>::const_iterator qit = terms.begin(); qit != terms.end();
 	 qit++) {
 	Xapian::PositionIterator pos;
 	// There may be query terms not in this doc. This raises an
 	// exception when requesting the position list, we catch it.
 	string emptys;
 	try {
 	    unsigned int occurrences = 0;
 	    for (pos = db.positionlist_begin(docid, *qit); 
 		 pos != db.positionlist_end(docid, *qit); pos++) {
 		unsigned int ipos = *pos;
 		LOGDEB2(("Abstract: [%s] at %d\n", qit->c_str(), ipos));
 		// Remember the term position
 		qtermposs.push_back(ipos);
 		// Add adjacent slots to the set to populate at next step
 		unsigned int sta = MAX(0, ipos-m_db->m_synthAbsWordCtxLen);
 		unsigned int sto = ipos+m_db->m_synthAbsWordCtxLen;
 		for (unsigned int ii = sta; ii <= sto;  ii++) {
 		    if (ii == ipos)
 			sparseDoc[ii] = *qit;
 		    else
 			sparseDoc[ii] = emptys;
 		}
 		// Limit the number of occurences we keep for each
 		// term. The abstract has a finite length anyway !
 		if (occurrences++ > maxoccperterm)
 		    break;
 	    }
 	} catch (...) {
 	    // Term does not occur. No problem.
 	}
 	// Limit total size
 	if (totaloccs++ > maxtotaloccs)
 	    break;
    }
    LOGDEB(("Abstract:%d:chosen number of positions %d. Populating\n", 
 	    chron.millis(), qtermposs.size()));
    // Walk the full document position list (for each term walk
    // position list) and populate slots around the query terms. We
    // arbitrarily truncate the list to avoid taking forever. If we do
    // cutoff, the abstract may be inconsistant, which is bad...
    { 
 	Xapian::TermIterator term;
 	int cutoff = 500 * 1000;
 	for (term = db.termlist_begin(docid);
 	     term != db.termlist_end(docid); term++) {
 	    if (cutoff-- < 0) {
 		LOGDEB(("Abstract: max term count cutoff\n"));
 		break;
 	    }
 	    Xapian::PositionIterator pos;
 	    for (pos = db.positionlist_begin(docid, *term); 
 		 pos != db.positionlist_end(docid, *term); pos++) {
 		if (cutoff-- < 0) {
 		    LOGDEB(("Abstract: max term count cutoff\n"));
 		    break;
 		}
 		map<unsigned int, string>::iterator vit;
 		if ((vit=sparseDoc.find(*pos)) != sparseDoc.end()) {
 		    // Don't replace a term: the terms list is in
 		    // alphabetic order, and we may have several terms
 		    // at the same position, we want to keep only the
 		    // first one (ie: dockes and dockes@wanadoo.fr)
 		    if (vit->second.empty()) {
 			LOGDEB2(("Abstract: populating: [%s] at %d\n", 
 				(*term).c_str(), *pos));
 			sparseDoc[*pos] = *term;
 		    }
 		}
 	    }
 	}
    }
 #if 0
    // Debug only: output the full term[position] vector
    bool epty = false;
    int ipos = 0;
    for (map<unsigned int, string>::iterator it = sparseDoc.begin(); 
 	 it != sparseDoc.end();
 	 it++, ipos++) {
 	if (it->empty()) {
 	    if (!epty)
 		LOGDEB(("Abstract:vec[%d]: [%s]\n", ipos, it->c_str()));
 	    epty=true;
 	} else {
 	    epty = false;
 	    LOGDEB(("Abstract:vec[%d]: [%s]\n", ipos, it->c_str()));
 	}
    }
 #endif
    LOGDEB(("Abstract:%d: randomizing and extracting\n", chron.millis()));
    // We randomize the selection of term positions, from which we
    // shall pull, starting at the beginning, until the abstract is
    // big enough. The abstract is finally built in correct position
    // order, thanks to the position map.
    random_shuffle(qtermposs.begin(), qtermposs.end());
    map<unsigned int, string> mabs;
    unsigned int abslen = 0;
    // Extract data around the N first (in random order) query term
    // positions, and store the terms in the map. Don't concatenate
    // immediately into chunks because there might be overlaps
    for (vector<unsigned int>::const_iterator pos = qtermposs.begin();
 	 pos != qtermposs.end(); pos++) {
 	if (int(abslen) > m_db->m_synthAbsLen)
 	    break;
 	unsigned int sta = MAX(0, *pos - m_db->m_synthAbsWordCtxLen);
 	unsigned int sto = *pos + m_db->m_synthAbsWordCtxLen;
 	LOGDEB2(("Abstract: %d<-%d->%d\n", sta, *pos, sto));
 	for (unsigned int ii = sta; ii <= sto; ii++) {
 	    if (int(abslen) > m_db->m_synthAbsLen)
 		break;
 	    map<unsigned int, string>::const_iterator vit = 
 		sparseDoc.find(ii);
 	    if (vit != sparseDoc.end() && !vit->second.empty()) {
 		LOGDEB2(("Abstract: position %d -> [%s]\n", 
 			 ii, vit->second.c_str()));
 		mabs[ii] = vit->second;
 		abslen += vit->second.length();
 	    } else {
 		LOGDEB2(("Abstract: empty position at %d\n", ii));
 	    }
 	}
 	// Possibly add a ... at the end of chunk if it's not
 	// overlapping
 	if (mabs.find(sto+1) == mabs.end())
 	    mabs[sto+1] = "...";
    }
    // Build the abstract by walking the map (in order of position)
    string abstract;
    for (map<unsigned int, string>::const_iterator it = mabs.begin();
 	 it != mabs.end(); it++) {
 	LOGDEB2(("Abtract:output %u -> [%s]\n", it->first,it->second.c_str()));
 	abstract += it->second + " ";
    }
    LOGDEB(("Abtract: done in %d mS\n", chron.millis()));
    return abstract;
 }
 #ifndef NO_NAMESPACES
 }
 #endif
--- a/src/rcldb/rcldb.h
+++ b/src/rcldb/rcldb.h
@ -16,12 +16,14 @@
 */
 #ifndef _DB_H_INCLUDED_
 #define _DB_H_INCLUDED_
-/* @(#$Id: rcldb.h,v 1.40 2006-10-30 12:59:44 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: rcldb.h,v 1.41 2006-11-13 08:49:44 dockes Exp $  (C) 2004 J.F.Dockes */
 #include <string>
 #include <list>
 #include <vector>
 #include "refcntr.h"
 #ifndef NO_NAMESPACES
 using std::string;
 using std::list;
@ -103,7 +105,7 @@ class Doc {
    }
 };
-class AdvSearchData;
+class SearchData;
 class Native;
 class TermIter;
@ -155,7 +157,7 @@ class Db {
    /* Query-related functions */
    // Parse query string and initialize query
-    bool setQuery(AdvSearchData &q, int opts = QO_NONE,
+    bool setQuery(RefCntr<SearchData> q, int opts = QO_NONE,
 		  const string& stemlang = "english");
    bool getQueryTerms(list<string>& terms);
    bool getMatchTerms(const Doc& doc, list<string>& terms);
@ -213,6 +215,9 @@ class Db {
    /** Perform stem expansion across all dbs configured for searching */
    list<string> stemExpand(const string& lang, const string& term);
    /** Filename wildcard expansion */
    bool filenameWildExp(const string& exp, list<string>& names);
 private:
    string m_filterTopDir; // Current query filter on subtree top directory 
@ -248,6 +253,7 @@ private:
    vector<bool> updated;
    bool reOpen(); // Close/open, same mode/opts
    /* Copyconst and assignemt private and forbidden */
    Db(const Db &) {}
    Db & operator=(const Db &) {return *this;};
--- a/src/rcldb/searchdata.cpp
+++ b/src/rcldb/searchdata.cpp
@ -0,0 +1,299 @@
 #ifndef lint
 static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.1 2006-11-13 08:49:44 dockes Exp $ (C) 2006 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program; if not, write to the
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
 // Handle translation from rcl's SearchData structures to Xapian Queries
 #include <string>
 #include <list>
 #ifndef NO_NAMESPACES
 using namespace std;
 #endif
 #include "xapian.h"
 #include "rcldb.h"
 #include "searchdata.h"
 #include "debuglog.h"
 #include "smallut.h"
 #include "textsplit.h"
 #include "unacpp.h"
 #include "utf8iter.h"
 namespace Rcl {
 typedef  list<SearchDataClause *>::iterator qlist_it_t;
 bool SearchData::toNativeQuery(Rcl::Db &db, void *d, const string& stemlang)
 {
    Xapian::Query xq;
    // Walk the clause list translating each in turn and building the 
    // Xapian query tree
    for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++) {
 	Xapian::Query nq;
 	(*it)->toNativeQuery(db, &nq, stemlang);
 	Xapian::Query::op op;
 	// If this structure is an AND list, must use AND_NOT for excl clauses.
 	// Else this is an OR list, and there can't be excl clauses
 	if (m_tp == SCLT_AND) {
 	    op = (*it)->m_tp == SCLT_EXCL ? 
 		Xapian::Query::OP_AND_NOT: Xapian::Query::OP_AND;
 	} else {
 	    op = Xapian::Query::OP_OR;
 	}
 	xq = xq.empty() ? nq : Xapian::Query(op, xq, nq);
    }
    // Add the file type filtering clause if any
    if (!m_filetypes.empty()) {
 	list<Xapian::Query> pqueries;
 	Xapian::Query tq;
 	for (list<string>::iterator it = m_filetypes.begin(); 
 	     it != m_filetypes.end(); it++) {
 	    string term = "T" + *it;
 	    LOGDEB(("Adding file type term: [%s]\n", term.c_str()));
 	    tq = tq.empty() ? Xapian::Query(term) : 
 		Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
 	}
 	xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_FILTER, xq, tq);
    }
    *((Xapian::Query *)d) = xq;
    return true;
 }
 // Add clause to current list. OR lists cant have EXCL clauses.
 bool SearchData::addClause(SearchDataClause* cl)
 {
    if (m_tp == SCLT_OR && (cl->m_tp == SCLT_EXCL)) {
 	LOGERR(("SearchData::addClause: cant add EXCL to OR list\n"));
 	return false;
    }
    m_query.push_back(cl);
    return true;
 }
 // Make me all new
 void SearchData::erase() {
    for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++)
 	delete *it;
    m_query.clear();
    m_filetypes.clear();
    m_topdir.erase();
    m_description.erase();
 }
 // Am I a file name only search ? This is to turn off term highlighting
 bool SearchData::fileNameOnly() {
    for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++)
 	if (!(*it)->isFileName())
 	    return false;
    return true;
 }
 // Splitter callback for breaking a user query string into simple
 // terms and phrases
 class wsQData : public TextSplitCB {
 public:
    vector<string> terms;
    // Debug
    string catterms() {
 	string s;
 	for (unsigned int i = 0; i < terms.size(); i++) {
 	    s += "[" + terms[i] + "] ";
 	}
 	return s;
    }
    bool takeword(const std::string &term, int , int, int) {
 	LOGDEB1(("wsQData::takeword: %s\n", term.c_str()));
 	terms.push_back(term);
 	return true;
    }
    // Decapital + deaccent all terms 
    void dumball() {
 	for (vector<string>::iterator it=terms.begin(); it !=terms.end();it++){
 	    string dumb;
 	    dumb_string(*it, dumb);
 	    *it = dumb;
 	}
    }
 };
 // Turn string into list of xapian queries. There is little
 // interpretation done on the string (no +term -term or filename:term
 // stuff). We just separate words and phrases, and interpret
 // capitalized terms as wanting no stem expansion. 
 // The final list contains one query for each term or phrase
 //   - Elements corresponding to a stem-expanded part are an OP_OR
 //     composition of the stem-expanded terms (or a single term query).
 //   - Elements corresponding to a phrase are an OP_PHRASE composition of the
 //     phrase terms (no stem expansion in this case)
 static void stringToXapianQueries(const string &iq,
 				  const string& stemlang,
 				  Db& db,
 				  list<Xapian::Query> &pqueries)
 {
    string qstring = iq;
    bool opt_stemexp = !stemlang.empty();
    // Split into (possibly single word) phrases ("this is a phrase"):
    list<string> phrases;
    stringToStrings(qstring, phrases);
    // Then process each phrase: split into terms and transform into
    // appropriate Xapian Query
    for (list<string>::iterator it=phrases.begin(); it !=phrases.end(); it++) {
 	LOGDEB(("strToXapianQ: phrase or word: [%s]\n", it->c_str()));
 	// If there are both spans and single words in this element,
 	// we need to use a word split, else a phrase query including
 	// a span would fail if we didn't adjust the proximity to
 	// account for the additional span term which is complicated.
 	wsQData splitDataS, splitDataW;
 	TextSplit splitterS(&splitDataS, TextSplit::TXTS_ONLYSPANS);
 	splitterS.text_to_words(*it);
 	TextSplit splitterW(&splitDataW, TextSplit::TXTS_NOSPANS);
 	splitterW.text_to_words(*it);
 	wsQData& splitData = splitDataS;
 	if (splitDataS.terms.size() > 1 && splitDataS.terms.size() != 
 	    splitDataW.terms.size())
 	    splitData = splitDataW;
 	LOGDEB1(("strToXapianQ: splitter term count: %d\n", 
 		splitData.terms.size()));
 	switch(splitData.terms.size()) {
 	case 0: continue;// ??
 	case 1: // Not a real phrase: one term
 	    {
 		string term = splitData.terms.front();
 		bool nostemexp = false;
 		// Check if the first letter is a majuscule in which
 		// case we do not want to do stem expansion. Note that
 		// the test is convoluted and possibly problematic
 		if (term.length() > 0) {
 		    string noacterm,noaclowterm;
 		    if (unacmaybefold(term, noacterm, "UTF-8", false) &&
 			unacmaybefold(noacterm, noaclowterm, "UTF-8", true)) {
 			Utf8Iter it1(noacterm);
 			Utf8Iter it2(noaclowterm);
 			if (*it1 != *it2)
 			    nostemexp = true;
 		    }
 		}
 		LOGDEB1(("Term: %s stem expansion: %s\n", 
 			term.c_str(), nostemexp?"no":"yes"));
 		list<string> exp;  
 		string term1;
 		dumb_string(term, term1);
 		// Possibly perform stem compression/expansion
 		if (!nostemexp && opt_stemexp) {
 		    exp = db.stemExpand(stemlang, term1);
 		} else {
 		    exp.push_back(term1);
 		}
 		// Push either term or OR of stem-expanded set
 		pqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 
 						 exp.begin(), exp.end()));
 	    }
 	    break;
 	default:
 	    // Phrase: no stem expansion
 	    splitData.dumball();
 	    LOGDEB(("Pushing phrase: [%s]\n", splitData.catterms().c_str()));
 	    pqueries.push_back(Xapian::Query(Xapian::Query::OP_PHRASE,
 					     splitData.terms.begin(),
 					     splitData.terms.end()));
 	}
    }
 }
 // Translate a simple OR, AND, or EXCL search clause. 
 bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p, 
 					   const string& stemlang)
 {
    Xapian::Query *qp = (Xapian::Query *)p;
    *qp = Xapian::Query();
    Xapian::Query::op op;
    switch (m_tp) {
    case SCLT_AND: op = Xapian::Query::OP_AND; break;
    case SCLT_OR: 
    case SCLT_EXCL: op = Xapian::Query::OP_OR; break;
    default:
 	LOGERR(("SearchDataClauseSimple: bad m_tp %d\n", m_tp));
 	return false;
    }
    list<Xapian::Query> pqueries;
    stringToXapianQueries(m_text, stemlang, db, pqueries);
    if (pqueries.empty()) {
 	LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
 	return true;
    }
    *qp = Xapian::Query(op, pqueries.begin(), pqueries.end());
    return true;
 }
 // Translate a FILENAME search clause. 
 bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p, 
 					     const string& stemlang)
 {
    Xapian::Query *qp = (Xapian::Query *)p;
    *qp = Xapian::Query();
    list<string> names;
    db.filenameWildExp(m_text, names);
    // Build a query out of the matching file name terms.
    *qp = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());
    return true;
 }
 // Translate NEAR or PHRASE clause. We're not handling the distance parameter
 // yet.
 bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p, 
 					 const string& stemlang)
 {
    Xapian::Query *qp = (Xapian::Query *)p;
    *qp = Xapian::Query();
    Xapian::Query::op op = m_tp == SCLT_PHRASE ? Xapian::Query::OP_PHRASE :
 	Xapian::Query::OP_NEAR;
    list<Xapian::Query> pqueries;
    Xapian::Query nq;
    string s = string("\"") + m_text + string("\"");
    // Use stringToXapianQueries anyway to lowercase and simplify the
    // phrase terms etc. The result should be a single element list
    stringToXapianQueries(s, stemlang, db, pqueries);
    if (pqueries.empty()) {
 	LOGERR(("SearchDataClauseDist: resolved to null query\n"));
 	return true;
    }
    *qp = *pqueries.begin();
    return true;
 }
 } // Namespace Rcl
--- a/src/rcldb/searchdata.h
+++ b/src/rcldb/searchdata.h
@ -1,40 +1,112 @@
 #ifndef _SEARCHDATA_H_INCLUDED_
 #define _SEARCHDATA_H_INCLUDED_
-/* @(#$Id: searchdata.h,v 1.2 2006-04-22 06:27:37 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: searchdata.h,v 1.3 2006-11-13 08:49:45 dockes Exp $  (C) 2004 J.F.Dockes */
 #include <string>
 #include <list>
 #include "rcldb.h"
 #ifndef NO_NAMESPACES
 using std::list;
 using std::string;
 #endif
 namespace Rcl {
-/**
+
- * Holder for query data 
+/** Search clause types */
- */
+enum SClType {
-class AdvSearchData {
+    SCLT_AND, 
-    public:
+    SCLT_OR, SCLT_EXCL, SCLT_FILENAME, SCLT_PHRASE, SCLT_NEAR,
-    string allwords;
+    SCLT_SUB
    string phrase;
    string orwords;
    string orwords1; // Have two instances of orwords for and'ing them
    string nowords;
    string filename; 
    list<string> filetypes; // restrict to types. Empty if inactive
    string topdir; // restrict to subtree. Empty if inactive
    string description; // Printable expanded version of the complete query
                        // returned after setQuery.
    void erase() {
 	allwords.erase();
 	phrase.erase();
 	orwords.erase();
 	orwords1.erase();
 	nowords.erase();
 	filetypes.clear(); 
 	topdir.erase();
 	filename.erase();
 	description.erase();
    }
    bool fileNameOnly() {
 	return allwords.empty() && phrase.empty() && orwords.empty() && 
 	    orwords1.empty() && nowords.empty();
    }
 };
-}
+class SearchDataClause;
 /** 
 * Holder for a list of search clauses. Some of the clauses can be comples
 * subqueries.
 */
 class SearchData {
 public:
    SClType                  m_tp; // Only SCLT_AND or SCLT_OR here
    list<SearchDataClause *> m_query;
    list<string>             m_filetypes; // Restrict to filetypes if set.
    string                   m_topdir; // Restrict to subtree.
    // Printable expanded version of the complete query, obtained from Xapian
    // valid after setQuery() call
    string m_description; 
    SearchData(SClType tp) : m_tp(tp) {}
    ~SearchData() {erase();}
    /** Make pristine */
    void erase();
    /** Is there anything but a file name search in here ? */
    bool fileNameOnly();
    /** Translate to Xapian query. rcldb knows about the void*  */
    bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
    /** We become the owner of cl and will delete it */
    bool addClause(SearchDataClause *cl);
 private:
    /* Copyconst and assignment private and forbidden */
    SearchData(const SearchData &) {}
    SearchData& operator=(const SearchData&) {return *this;};
 };
 class SearchDataClause {
 public:
    SClType m_tp;
    SearchDataClause(SClType tp) : m_tp(tp) {}
    virtual ~SearchDataClause() {}
    virtual bool toNativeQuery(Rcl::Db &db, void *, const string&) = 0;
    virtual bool isFileName() {return m_tp == SCLT_FILENAME ? true : false;}
 };
 class SearchDataClauseSimple : public SearchDataClause {
 public:
    SearchDataClauseSimple(SClType tp, string txt)
 	: SearchDataClause(tp), m_text(txt) {}
    virtual ~SearchDataClauseSimple() {}
    virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
 protected:
    string  m_text;
 };
 class SearchDataClauseFilename : public SearchDataClauseSimple {
 public:
    SearchDataClauseFilename(string txt)
 	: SearchDataClauseSimple(SCLT_FILENAME, m_text) {}
    virtual ~SearchDataClauseFilename() {}
    virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
 };
 class SearchDataClauseDist : public SearchDataClauseSimple {
 public:
    SearchDataClauseDist(SClType tp, string txt, int dist) 
 	: SearchDataClauseSimple(tp, txt), m_distance(dist) {}
    virtual ~SearchDataClauseDist() {}
    virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
 protected:
    int     m_distance;
 };
 class SearchDataClauseSub : public SearchDataClause {
 public:
    SearchDataClauseSub(SClType tp, SClType stp) 
 	: SearchDataClause(tp), m_sub(stp) {}
    virtual ~SearchDataClauseSub() {}
    virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
 protected:
    SearchData m_sub;
 };
 } // Namespace Rcl
 #endif /* _SEARCHDATA_H_INCLUDED_ */