defined data structure to pass around the search term description used for highlighting and other

2012-08-17 10:45:00 +02:00 · 2012-08-17 10:45:00 +02:00 · dc7b3420a0
commit dc7b3420a0
parent ebdd6faaf5
16 changed files with 409 additions and 351 deletions
--- a/src/qtgui/preview_w.cpp
+++ b/src/qtgui/preview_w.cpp
@ -686,12 +686,12 @@ class LoadThread : public QThread {
 /* A thread to convert to rich text (mark search terms) */
 class ToRichThread : public QThread {
    string &in;
-    const HiliteData &hdata;
+    const HighlightData &hdata;
    list<string> &out;
    int loglevel;
    PlainToRichQtPreview *ptr;
 public:
-    ToRichThread(string &i, const HiliteData& hd, list<string> &o, 
+    ToRichThread(string &i, const HighlightData& hd, list<string> &o, 
 		 PlainToRichQtPreview *_ptr)
 	: in(i), hdata(hd), out(o), ptr(_ptr)
    {
--- a/src/qtgui/preview_w.h
+++ b/src/qtgui/preview_w.h
@ -91,7 +91,7 @@ class Preview : public QWidget {
    public:

    Preview(int sid, // Search Id
-	    const HiliteData& hdata) // Search terms etc. for highlighting
+	    const HighlightData& hdata) // Search terms etc. for highlighting
 	: QWidget(0), m_searchId(sid), m_hData(hdata)
    {
 	init();
@ -141,7 +141,7 @@ private:
    bool          m_canBeep;
    bool          m_loading;
    QWidget      *m_currentW;
-    HiliteData    m_hData;
+    HighlightData m_hData;
    bool          m_justCreated; // First tab create is different
    bool          m_haveAnchors; // Search terms are marked in text
    int           m_lastAnchor; // Number of last anchor. Then rewind to 1
--- a/src/qtgui/rclmain_w.cpp
+++ b/src/qtgui/rclmain_w.cpp
@ -1220,8 +1220,8 @@ void RclMain::startPreview(int docnum, Rcl::Doc doc, int mod)
 	curPreview = 0;
    }
    if (curPreview == 0) {
-	HiliteData hdata;
-	m_source->getTerms(hdata.terms, hdata.groups, hdata.gslks);
+	HighlightData hdata;
+	m_source->getTerms(hdata);
 	curPreview = new Preview(reslist->listId(), hdata);

 	if (curPreview == 0) {
@ -1284,7 +1284,7 @@ void RclMain::updateIdxForDocs(vector<Rcl::Doc>& docs)
 */
 void RclMain::startPreview(Rcl::Doc doc)
 {
-    Preview *preview = new Preview(0, HiliteData());
+    Preview *preview = new Preview(0, HighlightData());
    if (preview == 0) {
 	QMessageBox::warning(0, tr("Warning"), 
 			     tr("Can't create preview window"),
--- a/src/qtgui/restable.cpp
+++ b/src/qtgui/restable.cpp
@ -299,8 +299,8 @@ void RecollModel::setDocSource(RefCntr<DocSequence> nsource)
 	m_source = RefCntr<DocSequence>();
    } else {
 	m_source = RefCntr<DocSequence>(new DocSource(theconfig, nsource));
-	m_hdata.reset();
-	m_source->getTerms(m_hdata.terms, m_hdata.groups, m_hdata.gslks);
+	m_hdata.clear();
+	m_source->getTerms(m_hdata);
    }
 }

--- a/src/qtgui/restable.h
+++ b/src/qtgui/restable.h
@ -19,6 +19,10 @@

 #include <Qt>

+#include <string>
+#include <map>
+#include <vector>
+
 #include "ui_restable.h"
 #include "refcntr.h"
 #include "docseq.h"
@ -26,7 +30,7 @@

 class ResTable;

-typedef string (FieldGetter)(const string& fldname, const Rcl::Doc& doc);
+typedef std::string (FieldGetter)(const std::string& fldname, const Rcl::Doc& doc);

 class RecollModel : public QAbstractTableModel {

@ -49,15 +53,15 @@ public:
    virtual void setDocSource(RefCntr<DocSequence> nsource);
    virtual RefCntr<DocSequence> getDocSource() {return m_source;}
    virtual void deleteColumn(int);
-    virtual const vector<string>& getFields() {return m_fields;}
-    virtual const map<string, QString>& getAllFields() 
+    virtual const std::vector<std::string>& getFields() {return m_fields;}
+    virtual const std::map<std::string, QString>& getAllFields() 
    { 
 	return o_displayableFields;
    }
-    virtual void addColumn(int, const string&);
+    virtual void addColumn(int, const std::string&);
    // Some column name are aliases/translator for base document field 
    // (ie: date, datetime->mtime). Help deal with this:
-    virtual string baseField(const string&);
+    virtual std::string baseField(const std::string&);

    // Ignore sort() call because 
    virtual void setIgnoreSort(bool onoff) {m_ignoreSort = onoff;}
@ -69,12 +73,12 @@ signals:

 private:
    mutable RefCntr<DocSequence> m_source;
-    vector<string> m_fields;
-    vector<FieldGetter*> m_getters;
-    static map<string, QString> o_displayableFields;
+    std::vector<std::string> m_fields;
+    std::vector<FieldGetter*> m_getters;
+    static std::map<std::string, QString> o_displayableFields;
    bool m_ignoreSort;
-    FieldGetter* chooseGetter(const string&);
-    HiliteData m_hdata;
+    FieldGetter* chooseGetter(const std::string&);
+    HighlightData m_hdata;
 };

 class ResTable;
--- a/src/query/docseq.h
+++ b/src/query/docseq.h
@ -19,19 +19,16 @@
 #include <string>
 #include <list>
 #include <vector>
-#ifndef NO_NAMESPACES
-using std::string;
-using std::list;
-using std::vector;
-#endif
+

 #include "rcldoc.h"
 #include "refcntr.h"
+#include "hldata.h"

 // A result list entry. 
 struct ResListEntry {
    Rcl::Doc doc;
-    string subHeader;
+    std::string subHeader;
 };

 /** Sort specification. */
@ -40,7 +37,7 @@ class DocSeqSortSpec {
    DocSeqSortSpec() : desc(false) {}
    bool isNotNull() const {return !field.empty();}
    void reset() {field.erase();}
-    string field;
+    std::string field;
    bool   desc;
 };

@ -50,12 +47,12 @@ class DocSeqFiltSpec {
 public:
    DocSeqFiltSpec() {}
    enum Crit {DSFS_MIMETYPE, DSFS_QLANG, DSFS_PASSALL};
-    void orCrit(Crit crit, const string& value) {
+    void orCrit(Crit crit, const std::string& value) {
 	crits.push_back(crit);
 	values.push_back(value);
    }
    std::vector<Crit> crits;
-    std::vector<string> values;
+    std::vector<std::string> values;
    void reset() {crits.clear(); values.clear();}
    bool isNotNull() const {return crits.size() != 0;}
 };
@ -73,7 +70,7 @@ class DocSeqFiltSpec {
 */
 class DocSequence {
 public:
-    DocSequence(const string &t) : m_title(t) {}
+    DocSequence(const std::string &t) : m_title(t) {}
    virtual ~DocSequence() {}

    /** Get document at given rank. 
@ -84,16 +81,17 @@ class DocSequence {
     *           inside history)
     * @return true if ok, false for error or end of data
     */
-    virtual bool getDoc(int num, Rcl::Doc &doc, string *sh = 0) = 0;
+    virtual bool getDoc(int num, Rcl::Doc &doc, std::string *sh = 0) = 0;

    /** Get next page of documents. This accumulates entries into the result
     *  list parameter (doesn't reset it). */
-    virtual int getSeqSlice(int offs, int cnt, vector<ResListEntry>& result);
+    virtual int getSeqSlice(int offs, int cnt, 
+			    std::vector<ResListEntry>& result);

    /** Get abstract for document. This is special because it may take time.
     *  The default is to return the input doc's abstract fields, but some 
     *  sequences can compute a better value (ie: docseqdb) */
-    virtual bool getAbstract(Rcl::Doc& doc, vector<string>& abs) {
+    virtual bool getAbstract(Rcl::Doc& doc, std::vector<std::string>& abs) {
 	abs.push_back(doc.meta[Rcl::Doc::keyabs]);
 	return true;
    }
@ -103,25 +101,21 @@ class DocSequence {
    virtual int getResCnt() = 0;

    /** Get title for result list */
-    virtual string title() {return m_title;}
+    virtual std::string title() {return m_title;}

    /** Get description for underlying query */
-    virtual string getDescription() = 0;
+    virtual std::string getDescription() = 0;

    /** Get search terms (for highlighting abstracts). Some sequences
     * may have no associated search terms. Implement this for them. */
-    virtual bool getTerms(vector<string>& terms, 
-			  vector<vector<string> >& groups, 
-			  vector<int>& gslks) 
+    virtual void getTerms(HighlightData& hld)			  
    {
-	terms.clear(); groups.clear(); gslks.clear(); return true;
+	hld.clear();
    }
-    /** Get user-input terms (before stemming etc.) */
-    virtual void getUTerms(vector<string>& terms)
+    virtual std::list<std::string> expand(Rcl::Doc &) 
    {
-	terms.clear(); 
+	return std::list<std::string>();
    }
-    virtual list<string> expand(Rcl::Doc &) {return list<string>();}

    /** Optional functionality. */
    virtual bool canFilter() {return false;}
@ -130,16 +124,16 @@ class DocSequence {
    virtual bool setSortSpec(const DocSeqSortSpec &) {return false;}
    virtual RefCntr<DocSequence> getSourceSeq() {return RefCntr<DocSequence>();}

-    static void set_translations(const string& sort, const string& filt)
+    static void set_translations(const std::string& sort, const std::string& filt)
    {
 	o_sort_trans = sort;
 	o_filt_trans = filt;
    }
 protected:
-    static string o_sort_trans;
-    static string o_filt_trans;
+    static std::string o_sort_trans;
+    static std::string o_filt_trans;
 private:
-    string          m_title;
+    std::string          m_title;
 };

 /** A modifier has a child sequence which does the real work and does
@ -152,25 +146,23 @@ public:
    {}
    virtual ~DocSeqModifier() {}

-    virtual bool getAbstract(Rcl::Doc& doc, vector<string>& abs) 
+    virtual bool getAbstract(Rcl::Doc& doc, std::vector<std::string>& abs) 
    {
 	if (m_seq.isNull())
 	    return false;
 	return m_seq->getAbstract(doc, abs);
    }
-    virtual string getDescription() 
+    virtual std::string getDescription() 
    {
 	if (m_seq.isNull())
 	    return "";
 	return m_seq->getDescription();
    }
-    virtual bool getTerms(vector<string>& terms, 
-			  vector<vector<string> >& groups, 
-			  vector<int>& gslks) 
+    virtual void getTerms(HighlightData& hld)
    {
 	if (m_seq.isNull())
-	    return false;
-	return m_seq->getTerms(terms, groups, gslks);
+	    return;
+	m_seq->getTerms(hld);
    }
    virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc) 
    {
@ -178,13 +170,7 @@ public:
 	    return false;
 	return m_seq->getEnclosing(doc, pdoc);
    }
-    virtual void getUTerms(vector<string>& terms)
-    {
-	if (m_seq.isNull())
-	    return;
-	m_seq->getUTerms(terms);
-    }
-    virtual string title() {return m_seq->title();}
+    virtual std::string title() {return m_seq->title();}
    virtual RefCntr<DocSequence> getSourceSeq() {return m_seq;}

 protected:
@ -203,7 +189,7 @@ public:
    virtual bool canSort() {return true;}
    virtual bool setFiltSpec(const DocSeqFiltSpec &);
    virtual bool setSortSpec(const DocSeqSortSpec &);
-    virtual bool getDoc(int num, Rcl::Doc &doc, string *sh = 0)
+    virtual bool getDoc(int num, Rcl::Doc &doc, std::string *sh = 0)
    {
 	if (m_seq.isNull())
 	    return false;
@ -215,7 +201,7 @@ public:
 	    return 0;
 	return m_seq->getResCnt();
    }
-    virtual string title();
+    virtual std::string title();
 private:
    bool buildStack();
    void stripStack();
--- a/src/query/docseqdb.cpp
+++ b/src/query/docseqdb.cpp
@ -39,16 +39,9 @@ DocSequenceDb::~DocSequenceDb()
 {
 }

-bool DocSequenceDb::getTerms(vector<string>& terms, 
-			     vector<vector<string> >& groups, 
-			     vector<int>& gslks)
+void DocSequenceDb::getTerms(HighlightData& hld)
 {
-    return m_fsdata->getTerms(terms, groups, gslks);
-}
-
-void DocSequenceDb::getUTerms(vector<string>& terms)
-{
-    m_sdata->getUTerms(terms);
+    m_fsdata->getTerms(hld);
 }

 string DocSequenceDb::getDescription() 
@ -180,5 +173,13 @@ bool DocSequenceDb::setQuery()
 	return true;
    m_rescnt = -1;
    m_needSetQuery = !m_q->setQuery(m_fsdata);
+
+    if (0) {
+	HighlightData hld;
+	m_fsdata->getTerms(hld);
+	string str; 
+	hld.toString(str);
+	fprintf(stderr, "DocSequenceDb::setQuery: terms: %s\n", str.c_str());
+    }
    return !m_needSetQuery;
 }
--- a/src/query/docseqdb.h
+++ b/src/query/docseqdb.h
@ -30,10 +30,7 @@ class DocSequenceDb : public DocSequence {
    virtual ~DocSequenceDb();
    virtual bool getDoc(int num, Rcl::Doc &doc, string * = 0);
    virtual int getResCnt();
-    virtual bool getTerms(vector<string>& terms, 
-			  vector<vector<string> >& groups, 
-			  vector<int>& gslks);
-    virtual void getUTerms(vector<string>& terms);
+    virtual void getTerms(HighlightData& hld);
    virtual bool getAbstract(Rcl::Doc &doc, vector<string>&);
    virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc);
    virtual string getDescription();
--- a/src/query/plaintorich.cpp
+++ b/src/query/plaintorich.cpp
@ -24,12 +24,10 @@
 #include <map>
 #include <algorithm>

-#ifndef NO_NAMESPACES
 using std::vector;
 using std::list;
 using std::pair;
 using std::set;
-#endif /* NO_NAMESPACES */

 #include "rcldb.h"
 #include "rclconfig.h"
@ -51,28 +49,30 @@ static string vecStringToString(const vector<string>& t)
    return sterms;
 }

-// Text splitter callback used to take note of the position of query terms 
-// inside the result text. This is then used to insert highlight tags. 
+// Text splitter used to take note of the position of query terms
+// inside the result text. This is then used to insert highlight tags.
 class TextSplitPTR : public TextSplit {
 public:

    // Out: begin and end byte positions of query terms/groups in text
    vector<pair<int, int> > tboffs;  

-    TextSplitPTR(const vector<string>& its, 
-                 const vector<vector<string> >&groups, 
-                 const vector<int>& slacks) 
-	:  m_wcount(0), m_groups(groups), m_slacks(slacks)
+    TextSplitPTR(const HighlightData& hdata)
+    :  m_wcount(0), m_hdata(hdata)
    {
-	for (vector<string>::const_iterator it = its.begin(); 
-	     it != its.end(); it++) {
-	    m_terms.insert(*it);
-	}
-	for (vector<vector<string> >::const_iterator vit = m_groups.begin(); 
-	     vit != m_groups.end(); vit++) {
-	    for (vector<string>::const_iterator it = (*vit).begin(); 
-		 it != (*vit).end(); it++) {
-		m_gterms.insert(*it);
+	// We separate single terms and groups and extract the group
+	// terms for computing positions list before looking for group
+	// matches
+
+	for (vector<vector<string> >::const_iterator vit = hdata.groups.begin();
+	     vit != hdata.groups.end(); vit++) {
+	    if (vit->size() == 1) {
+		m_terms.insert(vit->front());
+	    } else if (vit->size() > 1) {
+		for (vector<string>::const_iterator it = vit->begin(); 
+		     it != vit->end(); it++) {
+		    m_gterms.insert(*it);
+		}
 	    }
 	}
    }
@ -116,15 +116,16 @@ class TextSplitPTR : public TextSplit {
 private:
    virtual bool matchGroup(const vector<string>& terms, int dist);

+    // Word count. Used to call checkCancel from time to time.
    int m_wcount;

    // In: user query terms
    set<string>    m_terms; 

-    // In: user query groups, for near/phrase searches.
-    const vector<vector<string> >& m_groups;
-    const vector<int>&             m_slacks;
-    set<string>                    m_gterms;
+    // m_gterms holds all the terms in m_groups, as a set for quick lookup
+    set<string>    m_gterms;
+
+    const HighlightData& m_hdata;

    // group/near terms word positions.
    map<string, vector<int> > m_plists;
@ -294,10 +295,11 @@ public:
 // handle all groups as NEAR (ignore order).
 bool TextSplitPTR::matchGroups()
 {
-    vector<vector<string> >::const_iterator vit = m_groups.begin();
-    vector<int>::const_iterator sit = m_slacks.begin();
-    for (; vit != m_groups.end() && sit != m_slacks.end(); vit++, sit++) {
-	matchGroup(*vit, *sit + (*vit).size());
+    for (unsigned int i = 0; i < m_hdata.groups.size(); i++) {
+	if (m_hdata.groups[i].size() <= 1)
+	    continue;
+	matchGroup(m_hdata.groups[i], 
+		   m_hdata.groups[i].size() + m_hdata.slacks[i]);
    }

    // Sort regions by increasing start and decreasing width.  
@ -317,39 +319,22 @@ bool TextSplitPTR::matchGroups()
 // the input is html, the body is always a single output chunk.
 bool PlainToRich::plaintorich(const string& in, 
 			      list<string>& out, // Output chunk list
-			      const HiliteData& hdata,
+			      const HighlightData& hdata,
 			      int chunksize)
 {
    Chrono chron;
-    const vector<string>& terms(hdata.terms);
-    const vector<vector<string> >& groups(hdata.groups);
-    const vector<int>& slacks(hdata.gslks);
-
-    if (0 && DebugLog::getdbl()->getlevel() >= DEBDEB0) {
-	string sterms = vecStringToString(terms);
-	LOGDEB0(("plaintorich: terms: %s\n", sterms.c_str()));
-	sterms.clear();
-	for (vector<vector<string> >::const_iterator vit = groups.begin(); 
-	     vit != groups.end(); vit++) {
-	    sterms += "GROUP: ";
-	    sterms += vecStringToString(*vit);
-	    sterms += "\n";
-	}
-	LOGDEB0(("plaintorich: groups:\n %s", sterms.c_str()));
-        LOGDEB2(("  TEXT:[%s]\n", in.c_str()));
-    }

    // Compute the positions for the query terms.  We use the text
    // splitter to break the text into words, and compare the words to
    // the search terms,
-    TextSplitPTR splitter(terms, groups, slacks);
+    TextSplitPTR splitter(hdata);
    // Note: the splitter returns the term locations in byte, not
    // character, offsets.
    splitter.text_to_words(in);
    LOGDEB2(("plaintorich: split done %d mS\n", chron.millis()));
-
    // Compute the positions for NEAR and PHRASE groups.
    splitter.matchGroups();
+    LOGDEB2(("plaintorich: group match done %d mS\n", chron.millis()));

    out.clear();
    out.push_back("");
--- a/src/query/plaintorich.h
+++ b/src/query/plaintorich.h
@ -19,26 +19,8 @@

 #include <string>
 #include <list>
-using std::list;
-using std::string;

-/// Holder for plaintorich() input data: words and groups of words to
-/// be highlighted
-struct HiliteData {
-    // Single terms
-    vector<string> terms;
-    // NEAR and PHRASE elements
-    vector<vector<string> > groups;
-    // Group slacks (number of permitted non-matched words). 
-    // Parallel vector to the above 'groups'
-    vector<int> gslks; 
-    void reset() 
-    {
-	terms.clear();
-	groups.clear();
-	gslks.clear();
-    }
-};
+#include "hldata.h"

 /** 
 * A class for highlighting search results. Overridable methods allow
@ -83,21 +65,21 @@ public:
     *   lowercase and unaccented.
     * @param chunksize max size of chunks in output list
     */
-    virtual bool plaintorich(const string &in, list<string> &out,
-			     const HiliteData& hdata,
+    virtual bool plaintorich(const std::string &in, std::list<std::string> &out,
+			     const HighlightData& hdata,
 			     int chunksize = 50000
 			     );

    /* Overridable output methods for headers, highlighting and marking tags */
-    virtual string header() {return snull;}
-    virtual string startMatch() {return snull;}
-    virtual string endMatch() {return snull;}
-    virtual string startAnchor(int) {return snull;}
-    virtual string endAnchor() {return snull;}
-    virtual string startChunk() {return snull;}
+    virtual std::string header() {return snull;}
+    virtual std::string startMatch() {return snull;}
+    virtual std::string endMatch() {return snull;}
+    virtual std::string startAnchor(int) {return snull;}
+    virtual std::string endAnchor() {return snull;}
+    virtual std::string startChunk() {return snull;}

 protected:
-    const string snull;
+    const std::string snull;
    bool m_inputhtml;
    // Use <br> to break plain text lines (else caller has used a <pre> tag)
    bool m_eolbr; 
--- a/src/query/reslistpager.cpp
+++ b/src/query/reslistpager.cpp
@ -132,9 +132,8 @@ void ResListPager::resultPageFor(int docnum)
    m_respage = npage;
 }

-void ResListPager::displayDoc(RclConfig *config,
-			      int i, Rcl::Doc& doc, const HiliteData& hdata,
-			      const string& sh)
+void ResListPager::displayDoc(RclConfig *config, int i, Rcl::Doc& doc, 
+			      const HighlightData& hdata, const string& sh)
 {
    ostringstream chunk;
    int percent;
@ -309,8 +308,9 @@ void ResListPager::displayPage(RclConfig *config)

    if (pageEmpty()) {
 	chunk << trans("<p><b>No results found</b><br>");
-        vector<string>uterms;
-        m_docSource->getUTerms(uterms);
+	HighlightData hldata;
+        m_docSource->getTerms(hldata);
+        vector<string> uterms(hldata.uterms.begin(), hldata.uterms.end());
        if (!uterms.empty()) {
            map<string, vector<string> > spellings;
            suggest(uterms, spellings);
@ -366,8 +366,8 @@ void ResListPager::displayPage(RclConfig *config)
    if (pageEmpty())
 	return;

-    HiliteData hdata;
-    m_docSource->getTerms(hdata.terms, hdata.groups, hdata.gslks);
+    HighlightData hdata;
+    m_docSource->getTerms(hdata);

    // Emit data for result entry paragraph. Do it in chunks that make sense
    // html-wise, else our client may get confused
--- a/src/query/reslistpager.h
+++ b/src/query/reslistpager.h
@ -19,14 +19,13 @@
 #define _reslistpager_h_included_

 #include <vector>
-using std::vector;

 #include "refcntr.h"
 #include "docseq.h"
+#include "hldata.h"

 class RclConfig;
 class PlainToRich;
-struct HiliteData;

 /**
 * Manage a paged HTML result list. 
@ -85,7 +84,7 @@ public:
    void resultPageFor(int docnum);
    void displayPage(RclConfig *);
    void displayDoc(RclConfig *, int idx, Rcl::Doc& doc, 
-		    const HiliteData& hdata, const string& sh = "");
+		    const HighlightData& hdata, const string& sh = "");
    bool pageEmpty() {return m_respage.size() == 0;}

    string queryDescription() {return m_docSource.isNull() ? "" :
@ -112,8 +111,9 @@ public:
    virtual string pageTop() {return string();}
    virtual string headerContent() {return string();}
    virtual string iconUrl(RclConfig *, Rcl::Doc& doc);
-    virtual void suggest(const vector<string>, 
-			 map<string, vector<string> >& sugg) {
+    virtual void suggest(const std::vector<std::string>, 
+			std::map<std::string, std::vector<std::string> >& sugg) 
+    {
        sugg.clear();
    }
    virtual string absSep() {return "&hellip;";}
@ -126,7 +126,7 @@ private:
    bool                 m_hasNext;
    PlainToRich         *m_hiliter;
    RefCntr<DocSequence> m_docSource;
-    vector<ResListEntry> m_respage;
+    std::vector<ResListEntry> m_respage;
 };

 #endif /* _reslistpager_h_included_ */
--- a/src/rcldb/searchdata.cpp
+++ b/src/rcldb/searchdata.cpp
@ -498,23 +498,12 @@ bool SearchData::fileNameOnly()
    return true;
 }

-// Extract all terms and term groups
-bool SearchData::getTerms(vector<string>& terms, 
-			  vector<vector<string> >& groups,
-			  vector<int>& gslks) const
+// Extract all term data
+void SearchData::getTerms(HighlightData &hld) const
 {
    for (qlist_cit_t it = m_query.begin(); it != m_query.end(); it++)
-	(*it)->getTerms(terms, groups, gslks);
-    return true;
-}
-// Extract user terms
-void SearchData::getUTerms(vector<string>& terms) const
-{
-    for (qlist_cit_t it = m_query.begin(); it != m_query.end(); it++)
-	(*it)->getUTerms(terms);
-    sort(terms.begin(), terms.end());
-    vector<string>::iterator it = unique(terms.begin(), terms.end());
-    terms.erase(it, terms.end());
+	(*it)->getTerms(hld);
+    return;
 }

 // Splitter callback for breaking a user string into simple terms and
@ -590,10 +579,10 @@ private:
 // translating.
 class StringToXapianQ {
 public:
-    StringToXapianQ(Db& db, const string& field, 
+    StringToXapianQ(Db& db, HighlightData& hld, const string& field, 
 		    const string &stmlng, bool boostUser)
-	: m_db(db), m_field(field), m_stemlang(stmlng), 
-	  m_doBoostUserTerms(boostUser)
+	: m_db(db), m_field(field), m_stemlang(stmlng),
+	  m_doBoostUserTerms(boostUser), m_hld(hld)
    { }

    bool processUserString(const string &iq,
@ -601,20 +590,6 @@ public:
 			   vector<Xapian::Query> &pqueries, 
 			   const StopList &stops,
 			   int slack = 0, bool useNear = false);
-    // After processing the string: return search terms and term
-    // groups (ie: for highlighting)
-    bool getTerms(vector<string>& terms, vector<vector<string> >& groups) 
-    {
-	terms.insert(terms.end(), m_terms.begin(), m_terms.end());
-	groups.insert(groups.end(), m_groups.begin(), m_groups.end());
-	return true;
-    }
-    bool getUTerms(vector<string>& terms) 
-    {
-	terms.insert(terms.end(), m_uterms.begin(), m_uterms.end());
-	return true;
-    }
-
 private:
    void expandTerm(bool dont, const string& term, vector<string>& exp, 
                    string& sterm, const string& prefix);
@ -630,10 +605,7 @@ private:
    const string& m_field;
    const string& m_stemlang;
    bool          m_doBoostUserTerms;
-    // Single terms and phrases resulting from breaking up text;
-    vector<string>          m_uterms;
-    vector<string>          m_terms;
-    vector<vector<string> > m_groups; 
+    HighlightData& m_hld;
 };

 #if 1
@ -647,7 +619,7 @@ static void listVector(const string& what, const vector<string>&l)
 }
 #endif

-/** Expand stem and wildcards
+/** Take simple term and expand stem and wildcards
 *
 * @param nostemexp don't perform stem expansion. This is mainly used to
 *   prevent stem expansion inside phrases (because the user probably
@ -680,9 +652,11 @@ void StringToXapianQ::expandTerm(bool nostemexp,
 	nostemexp = true;
    }

+    if (!haswild)
+	m_hld.uterms.insert(term);
+
    if (nostemexp && !haswild) {
 	sterm = term;
-        m_uterms.push_back(sterm);
 	exp.resize(1);
 	exp[0] = prefix + term;
    } else {
@ -692,7 +666,6 @@ void StringToXapianQ::expandTerm(bool nostemexp,
                           m_field);
 	} else {
 	    sterm = term;
-            m_uterms.push_back(sterm);
 	    m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, 
 			   m_field);
 	}
@ -701,7 +674,6 @@ void StringToXapianQ::expandTerm(bool nostemexp,
 	    exp.push_back(it->term);
 	}
    }
-    //listVector("ExpandTerm:uterms now: ", m_uterms);
 }

 // Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d
@ -753,12 +725,15 @@ void StringToXapianQ::processSimpleSpan(const string& span, bool nostemexp,
    }

    expandTerm(nostemexp, span, exp, sterm, prefix);
-
-    // m_terms is used for highlighting, we don't want prefixes in there.
+    
+    // Set up the highlight data. No prefix should go in there
    for (vector<string>::const_iterator it = exp.begin(); 
 	 it != exp.end(); it++) {
-	m_terms.push_back(it->substr(prefix.size()));
+	m_hld.groups.push_back(vector<string>(1, it->substr(prefix.size())));
+	m_hld.slacks.push_back(0);
+	m_hld.grpsugidx.push_back(m_hld.ugroups.size() - 1);
    }
+
    // Push either term or OR of stem-expanded set
    Xapian::Query xq(Xapian::Query::OP_OR, exp.begin(), exp.end());

@ -786,7 +761,9 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
    Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR : 
 	Xapian::Query::OP_PHRASE;
    vector<Xapian::Query> orqueries;
+#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
    bool hadmultiple = false;
+#endif
    vector<vector<string> >groups;

    string prefix;
@ -805,15 +782,19 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
    for (vector<string>::iterator it = splitData->terms.begin();
 	 it != splitData->terms.end(); it++, nxit++) {
 	LOGDEB0(("ProcessPhrase: processing [%s]\n", it->c_str()));
-	// Adjust when we do stem expansion. Not inside phrases, and
-	// some versions of xapian will accept only one OR clause
-	// inside NEAR, all others must be leafs.
-	bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE) || hadmultiple;
+	// Adjust when we do stem expansion. Not if disabled by
+	// caller, not inside phrases, and some versions of xapian
+	// will accept only one OR clause inside NEAR.
+	bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE) 
+#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
+	    || hadmultiple
+#endif // single OR inside NEAR
+	    ;

 	string sterm;
 	vector<string> exp;
 	expandTerm(nostemexp, *it, exp, sterm, prefix);
-	LOGDEB0(("ProcessPhrase: exp size %d\n", exp.size()));
+	LOGDEB0(("ProcessPhraseOrNear: exp size %d\n", exp.size()));
 	listVector("", exp);
 	// groups is used for highlighting, we don't want prefixes in there.
 	vector<string> noprefs;
@ -850,7 +831,13 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
    vector<vector<string> > allcombs;
    vector<string> comb;
    multiply_groups(groups.begin(), groups.end(), comb, allcombs);
-    m_groups.insert(m_groups.end(), allcombs.begin(), allcombs.end());
+    
+    // Insert the search groups and slacks in the highlight data, with
+    // a reference to the user entry that generated them:
+    m_hld.groups.insert(m_hld.groups.end(), allcombs.begin(), allcombs.end());
+    m_hld.slacks.insert(m_hld.slacks.end(), allcombs.size(), slack);
+    m_hld.grpsugidx.insert(m_hld.grpsugidx.end(), allcombs.size(), 
+			   m_hld.ugroups.size() - 1);
 }

 // Trim string beginning with ^ or ending with $ and convert to flags
@ -875,7 +862,16 @@ static int stringToMods(string& s)
 * We just separate words and phrases, and do wildcard and stem expansion,
 *
 * This is used to process data entered into an OR/AND/NEAR/PHRASE field of
- * the GUI.
+ * the GUI (in the case of NEAR/PHRASE, clausedist adds dquotes to the user
+ * entry).
+ *
+ * This appears awful, and it would seem that the split into
+ * terms/phrases should be performed in the upper layer so that we
+ * only receive pure term or near/phrase pure elements here, but in
+ * fact there are things that would appear like terms to naive code,
+ * and which will actually may be turned into phrases (ie: tom:jerry),
+ * in a manner which intimately depends on the index implementation,
+ * so that it makes sense to process this here.
 *
 * The final list contains one query for each term or phrase
 *   - Elements corresponding to a stem-expanded part are an OP_OR
@ -895,9 +891,6 @@ bool StringToXapianQ::processUserString(const string &iq,
 {
    LOGDEB(("StringToXapianQ:: query string: [%s], slack %d, near %d\n", iq.c_str(), slack, useNear));
    ermsg.erase();
-    m_uterms.clear();
-    m_terms.clear();
-    m_groups.clear();

    // Simple whitespace-split input into user-level words and
    // double-quoted phrases: word1 word2 "this is a phrase". 
@ -952,10 +945,12 @@ bool StringToXapianQ::processUserString(const string &iq,
 	    case 0: 
 		continue;// ??
 	    case 1: 
+		m_hld.ugroups.push_back(vector<string>(1, *it));
 		processSimpleSpan(splitter.terms.front(), 
                                  splitter.nostemexps.front(), pqueries);
 		break;
 	    default:
+		m_hld.ugroups.push_back(vector<string>(1, *it));
 		processPhraseOrNear(&splitter, pqueries, useNear, slack, mods);
 	    }
 	}
@ -984,8 +979,6 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
    LOGDEB2(("SearchDataClauseSimple::toNativeQuery: stemlang [%s]\n",
 	     stemlang.c_str()));

-    m_terms.clear();
-    m_groups.clear();
    Xapian::Query *qp = (Xapian::Query *)p;
    *qp = Xapian::Query();

@ -1007,16 +1000,14 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
 	(m_parentSearch && !m_parentSearch->haveWildCards()) || 
 	(m_parentSearch == 0 && !m_haveWildCards);

-    StringToXapianQ tr(db, m_field, l_stemlang, doBoostUserTerm);
+    StringToXapianQ tr(db, m_hldata, m_field, l_stemlang, doBoostUserTerm);
    if (!tr.processUserString(m_text, m_reason, pqueries, db.getStopList()))
 	return false;
    if (pqueries.empty()) {
 	LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
 	return true;
    }
-    tr.getTerms(m_terms, m_groups);
-    tr.getUTerms(m_uterms);
-    //listVector("SearchDataClauseSimple: Uterms: ", m_uterms);
+
    *qp = Xapian::Query(op, pqueries.begin(), pqueries.end());
    if  (m_weight != 1.0) {
 	*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
@ -1056,8 +1047,6 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
    const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? cstr_null:
 	stemlang;
    LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));
-    m_terms.clear();
-    m_groups.clear();

    Xapian::Query *qp = (Xapian::Query *)p;
    *qp = Xapian::Query();
@ -1080,7 +1069,7 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
    }
    string s = cstr_dquote + m_text + cstr_dquote;
    bool useNear = (m_tp == SCLT_NEAR);
-    StringToXapianQ tr(db, m_field, l_stemlang, doBoostUserTerm);
+    StringToXapianQ tr(db, m_hldata, m_field, l_stemlang, doBoostUserTerm);
    if (!tr.processUserString(s, m_reason, pqueries, db.getStopList(),
 			      m_slack, useNear))
 	return false;
@ -1088,8 +1077,7 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
 	LOGERR(("SearchDataClauseDist: resolved to null query\n"));
 	return true;
    }
-    tr.getTerms(m_terms, m_groups);
-    tr.getUTerms(m_uterms);
+
    *qp = *pqueries.begin();
    if (m_weight != 1.0) {
 	*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
@ -1097,21 +1085,4 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
    return true;
 }

-// Translate subquery
-bool SearchDataClauseSub::toNativeQuery(Rcl::Db &db, void *p, const string&)
-{
-    return m_sub->toNativeQuery(db, p);
-}
-
-bool SearchDataClauseSub::getTerms(vector<string>& terms, 
-				   vector<vector<string> >& groups,
-				   vector<int>& gslks) const
-{
-    return m_sub.getconstptr()->getTerms(terms, groups, gslks);
-}
-void SearchDataClauseSub::getUTerms(vector<string>& terms) const
-{
-    m_sub.getconstptr()->getUTerms(terms);
-}
-
 } // Namespace Rcl
--- a/src/rcldb/searchdata.h
+++ b/src/rcldb/searchdata.h
@ -31,14 +31,11 @@
 #include "refcntr.h"
 #include "smallut.h"
 #include "cstr.h"
+#include "hldata.h"

 class RclConfig;

-#ifndef NO_NAMESPACES
-using std::vector;
-using std::string;
 namespace Rcl {
-#endif // NO_NAMESPACES

 /** Search clause types */
 enum SClType {
@ -50,33 +47,33 @@ enum SClType {
 class SearchDataClause;

 /** 
-  Data structure representing a Recoll user query, for translation
-  into a Xapian query tree. This could probably better called a 'question'.
+    Data structure representing a Recoll user query, for translation
+    into a Xapian query tree. This could probably better called a 'question'.

-  This is a list of search clauses combined through either OR or AND.
+    This is a list of search clauses combined through either OR or AND.

-  Clauses either reflect user entry in a query field: some text, a
-  clause type (AND/OR/NEAR etc.), possibly a distance, or points to
-  another SearchData representing a subquery.
+    Clauses either reflect user entry in a query field: some text, a
+    clause type (AND/OR/NEAR etc.), possibly a distance, or points to
+    another SearchData representing a subquery.

-  The content of each clause when added may not be fully parsed yet
-  (may come directly from a gui field). It will be parsed and may be
-  translated to several queries in the Xapian sense, for exemple
-  several terms and phrases as would result from 
-  ["this is a phrase"  term1 term2] . 
+    The content of each clause when added may not be fully parsed yet
+    (may come directly from a gui field). It will be parsed and may be
+    translated to several queries in the Xapian sense, for exemple
+    several terms and phrases as would result from 
+    ["this is a phrase"  term1 term2] . 

-  This is why the clauses also have an AND/OR/... type. 
+    This is why the clauses also have an AND/OR/... type. 

-  A phrase clause could be added either explicitly or using double quotes:
-  {SCLT_PHRASE, [this is a phrase]} or as {SCLT_XXX, ["this is a phrase"]}
+    A phrase clause could be added either explicitly or using double quotes:
+    {SCLT_PHRASE, [this is a phrase]} or as {SCLT_XXX, ["this is a phrase"]}

- */
+*/
 class SearchData {
 public:
    SearchData(SClType tp) 
-        : m_tp(tp), m_topdirexcl(false), m_topdirweight(1.0), 
-	  m_haveDates(false), m_maxSize(size_t(-1)),
-	  m_minSize(size_t(-1)), m_haveWildCards(false) 
+    : m_tp(tp), m_topdirexcl(false), m_topdirweight(1.0), 
+      m_haveDates(false), m_maxSize(size_t(-1)),
+      m_minSize(size_t(-1)), m_haveWildCards(false) 
    {
 	if (m_tp != SCLT_OR && m_tp != SCLT_AND) 
 	    m_tp = SCLT_OR;
@ -108,7 +105,7 @@ public:
    bool maybeAddAutoPhrase(Rcl::Db &db, double threshold);

    /** Set/get top subdirectory for filtering results */
-    void setTopdir(const string& t, bool excl = false, float w = 1.0) 
+    void setTopdir(const std::string& t, bool excl = false, float w = 1.0) 
    {
 	m_topdir = t;
 	m_topdirexcl = excl;
@ -122,38 +119,37 @@ public:
    void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;}

    /** Add file type for filtering results */
-    void addFiletype(const string& ft) {m_filetypes.push_back(ft);}
+    void addFiletype(const std::string& ft) {m_filetypes.push_back(ft);}
    /** Add file type to not wanted list */
-    void remFiletype(const string& ft) {m_nfiletypes.push_back(ft);}
+    void remFiletype(const std::string& ft) {m_nfiletypes.push_back(ft);}

-    void setStemlang(const string& lang = "english") {m_stemlang = lang;}
+    void setStemlang(const std::string& lang = "english") {m_stemlang = lang;}

    /** Retrieve error description */
-    string getReason() {return m_reason;}
+    std::string getReason() {return m_reason;}

-    /** Get terms and phrase/near groups. Used in the GUI for highlighting 
-     * The groups and gslks vectors are parallel and hold the phrases/near
-     * string groups and their associated slacks (distance in excess of group
-     * size)
+    /** Return term expansion data. Mostly used by caller for highlighting
     */
-    bool getTerms(vector<string>& terms, 
-		  vector<vector<string> >& groups, vector<int>& gslks) const;
-    /** Get user-input terms (before expansion etc.) */
-    void getUTerms(vector<string>& terms) const;
+    void getTerms(HighlightData& hldata) const;

    /** 
     * Get/set the description field which is retrieved from xapian after
     * initializing the query. It is stored here for usage in the GUI.
     */
-    string getDescription() {return m_description;}
-    void setDescription(const string& d) {m_description = d;}
+    std::string getDescription() {return m_description;}
+    void setDescription(const std::string& d) {m_description = d;}

 private:
-    SClType                   m_tp; // Only SCLT_AND or SCLT_OR here
-    vector<SearchDataClause*> m_query;
-    vector<string>            m_filetypes; // Restrict to filetypes if set.
-    vector<string>            m_nfiletypes; // Unwanted file types
-    string                    m_topdir; // Restrict to subtree.
+    // Combine type. Only SCLT_AND or SCLT_OR here
+    SClType                   m_tp; 
+    // Complex query descriptor
+    std::vector<SearchDataClause*> m_query;
+    // Restricted set of filetypes if not empty.
+    std::vector<std::string>            m_filetypes; 
+    // Excluded set of file types if not empty
+    std::vector<std::string>            m_nfiletypes;
+    // Restrict to subtree.
+    std::string                    m_topdir; 
    bool                      m_topdirexcl; // Invert meaning
    float                     m_topdirweight; // affect weight instead of filter
    bool                      m_haveDates;
@ -162,11 +158,11 @@ private:
    size_t                    m_minSize;
    // Printable expanded version of the complete query, retrieved/set
    // from rcldb after the Xapian::setQuery() call
-    string m_description; 
-    string m_reason;
+    std::string m_description; 
+    std::string m_reason;
    bool   m_haveWildCards;
-    string m_stemlang;
-    bool expandFileTypes(RclConfig *cfg, vector<string>& exptps);
+    std::string m_stemlang;
+    bool expandFileTypes(RclConfig *cfg, std::vector<std::string>& exptps);
    /* Copyconst and assignment private and forbidden */
    SearchData(const SearchData &) {}
    SearchData& operator=(const SearchData&) {return *this;};
@ -178,39 +174,56 @@ public:
 		   SDCM_ANCHOREND=4};

    SearchDataClause(SClType tp) 
-	: m_tp(tp), m_parentSearch(0), m_haveWildCards(0), 
-	  m_modifiers(SDCM_NONE), m_weight(1.0)
+    : m_tp(tp), m_parentSearch(0), m_haveWildCards(0), 
+      m_modifiers(SDCM_NONE), m_weight(1.0)
    {}
    virtual ~SearchDataClause() {}
-    virtual bool toNativeQuery(Rcl::Db &db, void *, const string&) = 0;
+    virtual bool toNativeQuery(Rcl::Db &db, void *, const std::string&) = 0;
    bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;}
-    virtual string getReason() const {return m_reason;}
-    virtual bool getTerms(vector<string>&, vector<vector<string> >&,
-			  vector<int>&) const = 0;
-    virtual void getUTerms(vector<string>&) const = 0;
+    virtual std::string getReason() const {return m_reason;}
+    virtual void getTerms(HighlightData & hldata) const = 0;

-    SClType getTp() {return m_tp;}
-    void setParent(SearchData *p) {m_parentSearch = p;}
-    virtual void setModifiers(Modifier mod) {m_modifiers = mod;}
-    virtual int getModifiers() {return m_modifiers;}
-    virtual void addModifier(Modifier mod) {
+    SClType getTp() 
+    {
+	return m_tp;
+    }
+    void setParent(SearchData *p) 
+    {
+	m_parentSearch = p;
+    }
+    virtual void setModifiers(Modifier mod) 
+    {
+	m_modifiers = mod;
+    }
+    virtual int getModifiers() 
+    {
+	return m_modifiers;
+    }
+    virtual void addModifier(Modifier mod) 
+    {
 	int imod = getModifiers();
 	imod |= mod;
 	setModifiers(Modifier(imod));
    }
-    virtual void setWeight(float w) {m_weight = w;}
+    virtual void setWeight(float w) 
+    {
+	m_weight = w;
+    }
    friend class SearchData;

 protected:
-    string      m_reason;
+    std::string      m_reason;
    SClType     m_tp;
    SearchData *m_parentSearch;
    bool        m_haveWildCards;
    Modifier    m_modifiers;
    float       m_weight;
 private:
-    SearchDataClause(const SearchDataClause&) {}
-    SearchDataClause& operator=(const SearchDataClause&) {
+    SearchDataClause(const SearchDataClause&) 
+    {
+    }
+    SearchDataClause& operator=(const SearchDataClause&) 
+    {
 	return *this;
    }
 };
@ -221,45 +234,37 @@ private:
 */
 class SearchDataClauseSimple : public SearchDataClause {
 public:
-    SearchDataClauseSimple(SClType tp, const string& txt, 
-			   const string& fld = string())
-	: SearchDataClause(tp), m_text(txt), m_field(fld), m_slack(0) {
-	m_haveWildCards = (txt.find_first_of(cstr_minwilds) != string::npos);
+    SearchDataClauseSimple(SClType tp, const std::string& txt, 
+			   const std::string& fld = std::string())
+	: SearchDataClause(tp), m_text(txt), m_field(fld)
+    {
+	m_haveWildCards = 
+	    (txt.find_first_of(cstr_minwilds) != std::string::npos);
    }

-    virtual ~SearchDataClauseSimple() {}
+    virtual ~SearchDataClauseSimple() 
+    {
+    }

    /** Translate to Xapian query */
-    virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
+    virtual bool toNativeQuery(Rcl::Db &, void *, const std::string& stemlang);

-    /** Retrieve query terms and term groups. This is used for highlighting */
-    virtual bool getTerms(vector<string>& terms, /* Single terms */
-			  vector<vector<string> >& groups, /* Prox grps */
-			  vector<int>& gslks) const        /* Prox slacks */
+    virtual void getTerms(HighlightData& hldata) const
    {
-	terms.insert(terms.end(), m_terms.begin(), m_terms.end());
-	groups.insert(groups.end(), m_groups.begin(), m_groups.end());
-	gslks.insert(gslks.end(), m_groups.size(), m_slack);
-	return true;
+	hldata.append(m_hldata);
    }
-    virtual void getUTerms(vector<string>& terms) const
+    virtual const std::string& gettext() 
    {
-	terms.insert(terms.end(), m_uterms.begin(), m_uterms.end());
+	return m_text;
+    }
+    virtual const std::string& getfield() 
+    {
+	return m_field;
    }
-    virtual const string& gettext() {return m_text;}
-    virtual const string& getfield() {return m_field;}
 protected:
-    string  m_text;  // Raw user entry text.
-    string  m_field; // Field specification if any
-    // Single terms and phrases resulting from breaking up m_text;
-    // valid after toNativeQuery() call
-    vector<string>          m_terms;
-    vector<vector<string> > m_groups;
-    // User terms before expansion
-    vector<string>          m_uterms;
-    // Declare m_slack here. Always 0, but allows getTerms to work for
-    // SearchDataClauseDist
-    int m_slack;
+    std::string  m_text;  // Raw user entry text.
+    std::string  m_field; // Field specification if any
+    HighlightData m_hldata;
 };

 /** 
@ -272,29 +277,39 @@ protected:
 */
 class SearchDataClauseFilename : public SearchDataClauseSimple {
 public:
-    SearchDataClauseFilename(const string& txt)
-	: SearchDataClauseSimple(SCLT_FILENAME, txt) {
+    SearchDataClauseFilename(const std::string& txt)
+	: SearchDataClauseSimple(SCLT_FILENAME, txt) 
+    {
 	// File name searches don't count when looking for wild cards.
 	m_haveWildCards = false;
    }
-    virtual ~SearchDataClauseFilename() {}
-    virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
+
+    virtual ~SearchDataClauseFilename() 
+    {
+    }
+
+    virtual bool toNativeQuery(Rcl::Db &, void *, const std::string& stemlang);
 };

 /** 
 * A clause coming from a NEAR or PHRASE entry field. There is only one 
- * string group, and a specified distance, which applies to it.
+ * std::string group, and a specified distance, which applies to it.
 */
 class SearchDataClauseDist : public SearchDataClauseSimple {
 public:
-    SearchDataClauseDist(SClType tp, const string& txt, int slack, 
-			 const string& fld = string())
-	: SearchDataClauseSimple(tp, txt, fld) {m_slack = slack;}
-    virtual ~SearchDataClauseDist() {}
+    SearchDataClauseDist(SClType tp, const std::string& txt, int slack, 
+			 const std::string& fld = std::string())
+	: SearchDataClauseSimple(tp, txt, fld), m_slack(slack)
+    {
+    }

-    virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
+    virtual ~SearchDataClauseDist() 
+    {
+    }

-    // m_slack is declared in SearchDataClauseSimple
+    virtual bool toNativeQuery(Rcl::Db &, void *, const std::string& stemlang);
+private:
+    int m_slack;
 };

 /** Subquery */
@ -302,15 +317,28 @@ class SearchDataClauseSub : public SearchDataClause {
 public:
    // We take charge of the SearchData * and will delete it.
    SearchDataClauseSub(SClType tp, RefCntr<SearchData> sub) 
-	: SearchDataClause(tp), m_sub(sub) {}
-    virtual ~SearchDataClauseSub() {}
-    virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
-    virtual bool getTerms(vector<string>&, vector<vector<string> >&,
-			  vector<int>&) const;
-    virtual void getUTerms(vector<string>&) const;
+	: SearchDataClause(tp), m_sub(sub) 
+    {
+    }
+
+    virtual ~SearchDataClauseSub() 
+    {
+    }
+
+    virtual bool toNativeQuery(Rcl::Db &db, void *p, const std::string&)
+    {
+	return m_sub->toNativeQuery(db, p);
+    }
+
+    virtual void getTerms(HighlightData& hldata) const
+    {
+	m_sub.getconstptr()->getTerms(hldata);
+    }
+
 protected:
    RefCntr<SearchData> m_sub;
 };

 } // Namespace Rcl
+
 #endif /* _SEARCHDATA_H_INCLUDED_ */
--- a/src/utils/hldata.h
+++ b/src/utils/hldata.h
@ -0,0 +1,54 @@
+#ifndef _hldata_h_included_
+#define _hldata_h_included_
+
+#include <vector>
+#include <string>
+#include <set>
+
+/** Store about user terms and their expansions. This is used mostly for
+ *  highlighting result text and walking the matches.
+ */
+struct HighlightData {
+    /** The user terms, excluding those with wildcards. 
+     * This list is intended for orthographic suggestions but the terms are
+     * unaccented lowercased anyway because they are compared to the dictionary
+     * generated from the index term list (which is unaccented).
+     */
+    std::set<std::string> uterms;
+
+    /** The original user terms-or-groups. This is for displaying the matched
+     * terms or groups, ie in relation with highlighting or skipping to the 
+     * next match. These are raw, diacritics and case preserved.
+     */
+    std::vector<std::vector<std::string> > ugroups;
+
+    /** Processed/expanded terms and groups. Used for looking for
+     * regions to highlight. Terms are just groups with 1 entry. All
+     * terms in there are unaccented, and the list may include values
+     * expanded from the original terms by stem or wildcard expansion.
+     */
+    std::vector<std::vector<std::string> > groups;
+    /** Group slacks. Parallel to groups */
+    std::vector<int> slacks;
+
+    /** Index into ugroups for each group. Parallel to groups. As a
+     * user term or group may generate many processed/expanded terms
+     * or groups, this is how we relate them 
+     */
+    std::vector<unsigned int> grpsugidx;
+
+    void clear()
+    {
+	uterms.clear();
+	ugroups.clear();
+	groups.clear();
+	slacks.clear();
+	grpsugidx.clear();
+    }
+    void append(const HighlightData&);
+
+    // Print (debug)
+    void toString(std::string& out);
+};
+
+#endif /* _hldata_h_included_ */
--- a/src/utils/smallut.cpp
+++ b/src/utils/smallut.cpp
@ -36,6 +36,7 @@

 #include "smallut.h"
 #include "utf8iter.h"
+#include "hldata.h"

 #ifndef NO_NAMESPACES
 using namespace std;
@ -1038,8 +1039,57 @@ void catstrerror(string *reason, const char *what, int _errno)
 #endif
 }

+void HighlightData::toString(std::string& out)
+{
+    out.append("\nUser terms (orthograph): ");
+    for (std::set<std::string>::const_iterator it = uterms.begin();
+	 it != uterms.end(); it++) {
+	out.append(" [").append(*it).append("]");
+    }

-#else
+    out.append("\nGroups: ");
+    char cbuf[200];
+    sprintf(cbuf, "Groups size %d grpsugidx size %d ugroups size %d",
+	    int(groups.size()), int(grpsugidx.size()), int(ugroups.size()));
+    out.append(cbuf);
+
+    unsigned int ugidx = (unsigned int)-1;
+    for (unsigned int i = 0; i < groups.size(); i++) {
+	if (ugidx != grpsugidx[i]) {
+	    ugidx = grpsugidx[i];
+	    out.append("\n(");
+	    for (unsigned int j = 0; j < ugroups[ugidx].size(); j++) {
+		out.append("[").append(ugroups[ugidx][j]).append("] ");
+	    }
+	    out.append(") ->");
+	}
+	out.append(" {");
+	for (unsigned int j = 0; j < groups[i].size(); j++) {
+	    out.append("[").append(groups[i][j]).append("]");
+	}
+	sprintf(cbuf, "%d", slacks[i]);
+	out.append("}").append(cbuf);
+    }
+    out.append("\n");
+    fprintf(stderr, "toString ok\n");
+}
+
+void HighlightData::append(const HighlightData& hl)
+{
+    uterms.insert(hl.uterms.begin(), hl.uterms.end());
+
+    size_t ugsz0 = ugroups.size();
+    ugroups.insert(ugroups.end(), hl.ugroups.begin(), hl.ugroups.end());
+
+    groups.insert(groups.end(), hl.groups.begin(), hl.groups.end());
+    slacks.insert(slacks.end(), hl.slacks.begin(), hl.slacks.end());
+    for (std::vector<unsigned int>::const_iterator it = hl.grpsugidx.begin(); 
+	 it != hl.grpsugidx.end(); it++) {
+	grpsugidx.push_back(*it + ugsz0);
+    }
+}
+
+#else // TEST_SMALLUT

 #include <string>
 using namespace std;