merge stemExpand into termExpand. return term frequencies from there and display in spellW

2006-12-19 12:11:21 +00:00 · 2006-12-19 12:11:21 +00:00 · be05eaa6e0
commit be05eaa6e0
parent 50b01c6ea4
13 changed files with 301 additions and 201 deletions
--- a/src/qtgui/spell.ui
+++ b/src/qtgui/spell.ui
@ -106,30 +106,40 @@
                        </widget>
                    </grid>
                </widget>
-                <widget class="QTextEdit">
+                <widget class="QListView">
+                    <column>
+                        <property name="text">
+                            <string>Term</string>
+                        </property>
+                        <property name="clickable">
+                            <bool>true</bool>
+                        </property>
+                        <property name="resizable">
+                            <bool>true</bool>
+                        </property>
+                    </column>
+                    <column>
+                        <property name="text">
+                            <string>Count</string>
+                        </property>
+                        <property name="clickable">
+                            <bool>true</bool>
+                        </property>
+                        <property name="resizable">
+                            <bool>true</bool>
+                        </property>
+                    </column>
                    <property name="name">
-                        <cstring>suggsTE</cstring>
+                        <cstring>suggsLV</cstring>
                    </property>
-                    <property name="minimumSize">
-                        <size>
-                            <width>0</width>
-                            <height>200</height>
-                        </size>
+                    <property name="selectionMode">
+                        <enum>Extended</enum>
                    </property>
-                    <property name="focusPolicy">
-                        <enum>TabFocus</enum>
-                    </property>
-                    <property name="textFormat">
-                        <enum>PlainText</enum>
-                    </property>
-                    <property name="readOnly">
+                    <property name="showSortIndicator">
                        <bool>true</bool>
                    </property>
-                    <property name="undoRedoEnabled">
-                        <bool>false</bool>
-                    </property>
-                    <property name="tabChangesFocus">
-                        <bool>true</bool>
+                    <property name="resizeMode">
+                        <enum>NoColumn</enum>
                    </property>
                </widget>
            </vbox>
@ -139,7 +149,6 @@
 <tabstops>
    <tabstop>baseWordLE</tabstop>
    <tabstop>expandPB</tabstop>
-    <tabstop>suggsTE</tabstop>
    <tabstop>dismissPB</tabstop>
    <tabstop>expTypeCMB</tabstop>
    <tabstop>stemLangCMB</tabstop>
--- a/src/qtgui/spell_w.cpp
+++ b/src/qtgui/spell_w.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: spell_w.cpp,v 1.7 2006-11-30 13:38:44 dockes Exp $ (C) 2005 J.F.Dockes";
+static char rcsid[] = "@(#$Id: spell_w.cpp,v 1.8 2006-12-19 12:11:21 dockes Exp $ (C) 2005 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -25,17 +25,22 @@ static char rcsid[] = "@(#$Id: spell_w.cpp,v 1.7 2006-11-30 13:38:44 dockes Exp

 #include <qmessagebox.h>
 #include <qpushbutton.h>
-#include <qtextedit.h>
 #include <qlabel.h>
 #include <qlineedit.h>
 #include <qlayout.h>
 #include <qtooltip.h>
 #include <qcombobox.h>
+#if (QT_VERSION < 0x040000)
+#include <qlistview.h>
+#else
+#include <q3listview.h>
+#endif

 #include "debuglog.h"
 #include "recoll.h"
 #include "spell_w.h"
 #include "guiutils.h"
+#include "rcldb.h"

 #ifdef RCL_USE_ASPELL
 #include "rclaspell.h"
@ -79,16 +84,47 @@ void SpellW::init()
    connect(baseWordLE, SIGNAL(returnPressed()), this, SLOT(doExpand()));
    connect(expandPB, SIGNAL(clicked()), this, SLOT(doExpand()));
    connect(dismissPB, SIGNAL(clicked()), this, SLOT(close()));
-    connect(suggsTE, SIGNAL(doubleClicked(int, int)), 
-	    this, SLOT(textDoubleClicked(int, int)));
+
+    connect(suggsLV,
+#if (QT_VERSION < 0x040000)
+	   SIGNAL(doubleClicked(QListViewItem *, const QPoint &, int)),
+#else
+	   SIGNAL(doubleClicked(Q3ListViewItem *, const QPoint &, int)),
+#endif
+	   this, SLOT(textDoubleClicked()));
+
    connect(expTypeCMB, SIGNAL(activated(int)), 
 	    this, SLOT(modeSet(int)));
+
+    suggsLV->setColumnWidth(0, 200);
+    suggsLV->setColumnWidth(1, 100);
+    // No initial sorting: user can choose to establish one
+    suggsLV->setSorting(100, false);
 }

+// Subclass qlistviewitem for numeric sorting on column 1
+class MyListViewItem : public QListViewItem
+{
+public:
+    MyListViewItem(QListView *listView, const QString& s1, const QString& s2)
+        : QListViewItem(listView, s1, s2)
+    { }
+
+    int compare(QListViewItem * i, int col, bool ascending) const {
+	if (col == 0)
+	    return i->text(0).compare(text(0));
+	if (col == 1)
+	    return i->text(1).toInt() - text(1).toInt();
+	// ??
+	return 0;
+    }
+};
+
+
 /* Expand term according to current mode */
 void SpellW::doExpand()
 {
-    suggsTE->clear();
+    suggsLV->clear();
    if (baseWordLE->text().isEmpty()) 
 	return;

@ -100,26 +136,27 @@ void SpellW::doExpand()

    string expr = string((const char *)baseWordLE->text().utf8());
    list<string> suggs;
+
    prefs.termMatchType = expTypeCMB->currentItem();

    Rcl::Db::MatchType mt = Rcl::Db::ET_WILD;
+    switch(expTypeCMB->currentItem()) {
+    case 0: mt = Rcl::Db::ET_WILD; break;
+    case 1:mt = Rcl::Db::ET_REGEXP; break;
+    case 2:mt = Rcl::Db::ET_STEM; break;
+    }
+
+    list<Rcl::TermMatchEntry> entries;
    switch (expTypeCMB->currentItem()) {
-    case 1: mt = Rcl::Db::ET_REGEXP;
-	/* FALLTHROUGH */
    case 0: 
-	if (!rcldb->termMatch(mt, expr, suggs, prefs.queryStemLang.ascii(),
+    case 1:
+    case 2: {
+	if (!rcldb->termMatch(mt, prefs.queryStemLang.ascii(), expr, entries, 
 			      200)) {
 	    LOGERR(("SpellW::doExpand:rcldb::termMatch failed\n"));
 	    return;
 	}
-	break;
-
-
-    case 2: 
-	{
-	    string stemlang = (const char *)stemLangCMB->currentText().utf8();
-	    suggs = rcldb->stemExpand(stemlang,expr);
-	}
+    }
 	break;

 #ifdef RCL_USE_ASPELL
@ -132,24 +169,37 @@ void SpellW::doExpand()
 	    LOGDEB(("SpellW::doExpand: aspell init error\n"));
 	    return;
 	}
+	list<string> suggs;
 	if (!aspell->suggest(*rcldb, expr, suggs, reason)) {
 	    QMessageBox::warning(0, "Recoll",
 				 tr("Aspell expansion error. "));
 	    LOGERR(("SpellW::doExpand:suggest failed: %s\n", reason.c_str()));
 	}
+	for (list<string>::const_iterator it = suggs.begin(); 
+	     it != suggs.end(); it++) 
+	    entries.push_back(Rcl::TermMatchEntry(*it));
    }
 #endif
    }

-    if (suggs.empty()) {
-	suggsTE->append(tr("No expansion found"));
+
+    if (entries.empty()) {
+	new MyListViewItem(suggsLV, tr("No expansion found"), "");
    } else {
-	for (list<string>::iterator it = suggs.begin(); 
-	     it != suggs.end(); it++) {
-	    suggsTE->append(QString::fromUtf8(it->c_str()));
+	// Seems that need to use a reverse iterator to get same order in 
+	// listview and input list ??
+	for (list<Rcl::TermMatchEntry>::reverse_iterator it = entries.rbegin(); 
+	     it != entries.rend(); it++) {
+	    LOGDEB(("SpellW::expand: %6d [%s]\n", it->wcf, it->term.c_str()));
+	    char num[20];
+	    if (it->wcf)
+		sprintf(num, "%d", it->wcf);
+	    else
+		num[0] = 0;
+	    new MyListViewItem(suggsLV, 
+			      QString::fromUtf8(it->term.c_str()),
+			      QString::fromAscii(num));
 	}
-	suggsTE->setCursorPosition(0,0);
-	suggsTE->ensureCursorVisible();
    }
 }

@ -157,17 +207,24 @@ void SpellW::wordChanged(const QString &text)
 {
    if (text.isEmpty()) {
 	expandPB->setEnabled(false);
-	suggsTE->clear();
+	suggsLV->clear();
    } else {
 	expandPB->setEnabled(true);
    }
 }

-void SpellW::textDoubleClicked(int para, int)
+void SpellW::textDoubleClicked()
 {
-    suggsTE->setSelection(para, 0, para, 1000);
-    if (suggsTE->hasSelectedText())
-	emit(wordSelect(suggsTE->selectedText()));
+    QListViewItemIterator it(suggsLV);
+    while (it.current()) {
+	QListViewItem *item = it.current();
+	if (!item->isSelected()) {
+	    ++it;
+	    continue;
+	}
+	emit(wordSelect((const char *)item->text(0)));
+	++it;
+    }
 }

 void SpellW::modeSet(int mode)
--- a/src/qtgui/spell_w.h
+++ b/src/qtgui/spell_w.h
@ -1,6 +1,6 @@
 #ifndef _ASPELL_W_H_INCLUDED_
 #define _ASPELL_W_H_INCLUDED_
-/* @(#$Id: spell_w.h,v 1.5 2006-12-04 09:56:26 dockes Exp $  (C) 2006 J.F.Dockes */
+/* @(#$Id: spell_w.h,v 1.6 2006-12-19 12:11:21 dockes Exp $  (C) 2006 J.F.Dockes */
 /*
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@ -54,7 +54,7 @@ public:
 public slots:
    virtual void doExpand();
    virtual void wordChanged(const QString&);
-    virtual void textDoubleClicked(int, int);
+    virtual void textDoubleClicked();
    virtual void modeSet(int);

 signals:
--- a/src/qtgui/ssearch_w.cpp
+++ b/src/qtgui/ssearch_w.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.16 2006-12-14 13:53:43 dockes Exp $ (C) 2006 J.F.Dockes";
+static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.17 2006-12-19 12:11:21 dockes Exp $ (C) 2006 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -169,11 +169,9 @@ void SSearch::completion()

    // Query database
    const int max = 100;
-    list<string> strs;
-    
-    if (!rcldb->termMatch(Rcl::Db::ET_WILD, s, strs, 
-			    prefs.queryStemLang.ascii(),max)
-	|| strs.size() == 0) {
+    list<Rcl::TermMatchEntry> strs;
+    if (!rcldb->termMatch(Rcl::Db::ET_WILD, prefs.queryStemLang.ascii(),
+			  s, strs, max) || strs.size() == 0) {
 	QApplication::beep();
 	return;
    }
@ -186,12 +184,14 @@ void SSearch::completion()
    QString res;
    bool ok = false;
    if (strs.size() == 1) {
-	res = QString::fromUtf8(strs.begin()->c_str());
+	res = QString::fromUtf8(strs.begin()->term.c_str());
 	ok = true;
    } else {
 	QStringList lst;
-	for (list<string>::iterator it=strs.begin(); it != strs.end(); it++) 
-	    lst.push_back(QString::fromUtf8(it->c_str()));
+	for (list<Rcl::TermMatchEntry>::iterator it=strs.begin(); 
+	     it != strs.end(); it++) {
+	    lst.push_back(QString::fromUtf8(it->term.c_str()));
+	}
 	res = QInputDialog::getItem(tr("Completions"),
 				    tr("Select an item:"), lst, 0, 
 				    FALSE, &ok, this);
--- a/src/qtgui/viewaction.ui
+++ b/src/qtgui/viewaction.ui
@ -59,9 +59,6 @@
                    <property name="frameShadow">
                        <enum>Sunken</enum>
                    </property>
-                    <property name="resizePolicy">
-                        <enum>Manual</enum>
-                    </property>
                    <property name="selectionMode">
                        <enum>Extended</enum>
                    </property>
@ -71,9 +68,6 @@
                    <property name="showSortIndicator">
                        <bool>true</bool>
                    </property>
-                    <property name="resizeMode">
-                        <enum>LastColumn</enum>
-                    </property>
                    <property name="toolTip" stdset="0">
                        <string>Select one or several file types, then click Change Action to modify the program used to open them</string>
                    </property>
--- a/src/qtgui/viewaction_w.cpp
+++ b/src/qtgui/viewaction_w.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: viewaction_w.cpp,v 1.3 2006-12-18 16:45:52 dockes Exp $ (C) 2006 J.F.Dockes";
+static char rcsid[] = "@(#$Id: viewaction_w.cpp,v 1.4 2006-12-19 12:11:21 dockes Exp $ (C) 2006 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -37,6 +37,7 @@ using namespace std;

 #include <qmessagebox.h>
 #include <qinputdialog.h>
+#include <qlayout.h>

 #include "recoll.h"
 #include "debuglog.h"
@ -55,13 +56,6 @@ void ViewAction::init()
 	   SIGNAL(doubleClicked(Q3ListViewItem *, const QPoint &, int)),
 #endif
 	   this, SLOT(editAction()));
-
-    // Note: could get the column width setting to work in qt4
-    actionsLV->setColumnWidthMode(0, QListView::Manual);
-    actionsLV->setColumnWidth(0, 300);
-    actionsLV->setColumnWidthMode(1, QListView::Manual);
-    actionsLV->setColumnWidth(1, 120);
-
    fillLists();
    resize(QSize(450, 250).expandedTo(minimumSizeHint()) );
 }
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.100 2006-12-07 13:24:19 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.101 2006-12-19 12:11:21 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -1285,6 +1285,42 @@ bool Db::setQuery(RefCntr<SearchData> sdata, int opts,
    return true;
 }

+class TermMatchCmpByWcf {
+public:
+    int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
+	return r.wcf - l.wcf < 0;
+    }
+};
+class TermMatchCmpByTerm {
+public:
+    int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
+	return l.term.compare(r.term) > 0;
+    }
+};
+class TermMatchTermEqual {
+public:
+    int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
+	return !l.term.compare(r.term);
+    }
+};
+
+bool Db::stemExpand(const string &lang, const string &term, 
+		    list<TermMatchEntry>& result, int max)
+{
+    list<string> dirs = m_extraDbs;
+    dirs.push_front(m_basedir);
+    for (list<string>::iterator it = dirs.begin();
+	 it != dirs.end(); it++) {
+	list<string> more;
+	StemDb::stemExpand(*it, lang, term, more);
+	LOGDEB1(("Db::stemExpand: Got %d from %s\n", 
+		 more.size(), it->c_str()));
+	result.insert(result.end(), more.begin(), more.end());
+    }
+    LOGDEB1(("Db:::stemExpand: final count %d \n", result.size()));
+    return true;
+}
+
 // Characters that can begin a wildcard or regexp expression. We use skipto
 // to begin the allterms search with terms that begin with the portion of
 // the input string prior to these chars.
@ -1292,85 +1328,97 @@ const string wildSpecChars = "*?[";
 const string regSpecChars = "(.[{";

 // Find all index terms that match a wildcard or regular expression
-bool Db::termMatch(MatchType typ, const string &root, list<string>& res,
-		     const string &lang, int max)
+bool Db::termMatch(MatchType typ, const string &lang,
+		   const string &root, 
+		   list<TermMatchEntry>& res,
+		   int max)
 {
    if (!m_ndb || !m_ndb->m_isopen)
 	return false;
+
    Xapian::Database db = m_ndb->m_iswritable ? m_ndb->wdb: m_ndb->db;
+
    res.clear();
+
    // Get rid of capitals and accents
    string droot;
    dumb_string(root, droot);
    string nochars = typ == ET_WILD ? wildSpecChars : regSpecChars;

-    regex_t reg;
-    int errcode;
-    // Compile regexp. We anchor the input by enclosing it in ^ and $
-    if (typ == ET_REGEXP) {
-	string mroot = droot;
-	if (mroot.at(0) != '^')
-	    mroot = string("^") + mroot;
-	if (mroot.at(mroot.length()-1) != '$')
-	    mroot += "$";
-	if ((errcode = regcomp(&reg, mroot.c_str(), REG_EXTENDED|REG_NOSUB))) {
-	    char errbuf[200];
-	    regerror(errcode, &reg, errbuf, 199);
-	    LOGERR(("termMatch: regcomp failed: %s\n", errbuf));
-	    res.push_back(errbuf);
-	    regfree(&reg);
+    if (typ == ET_STEM) {
+	if (!stemExpand(lang, root, res, max))
 	    return false;
+	for (list<TermMatchEntry>::iterator it = res.begin(); 
+	     it != res.end(); it++) {
+	    it->wcf = db.get_collection_freq(it->term);
+	    LOGDEB(("termMatch: %d [%s]\n", it->wcf, it->term.c_str()));
 	}
-    }
-
-    // Find the initial section before any special char
-    string::size_type es = droot.find_first_of(nochars);
-    string is;
-    switch (es) {
-    case string::npos: is = droot;break;
-    case 0: break;
-    default: is = droot.substr(0, es);break;
-    }
-    LOGDEB(("termMatch: initsec: [%s]\n", is.c_str()));
-
-    Xapian::TermIterator it = db.allterms_begin(); 
-    if (!is.empty())
-	it.skip_to(is.c_str());
-    for (int n = 0;it != db.allterms_end(); it++) {
-        // If we're beyond the terms matching the initial string, end
-	if (!is.empty() && (*it).find(is) != 0)
-	    break;
-	// Don't match special internal terms beginning with uppercase ascii
-	if ((*it).at(0) >= 'A' && (*it).at(0) <= 'Z')
-	    continue;
-	if (typ == ET_WILD) {
-	    if (fnmatch(droot.c_str(), (*it).c_str(), 0) == FNM_NOMATCH)
-		continue;
-	} else {
-	    if (regexec(&reg, (*it).c_str(), 0, 0, 0))
-		continue;
-	}
-	// Do we want stem expansion here? We don't do it for now
-	if (1 || lang.empty()) {
-	    res.push_back(*it);
-	    ++n;
-	} else {
-	    list<string> stemexps = stemExpand(lang, *it);
-	    unsigned int cnt = 
-		(int)stemexps.size() > max - n ? max - n : stemexps.size();
-	    list<string>::iterator sit = stemexps.begin();
-	    while (cnt--) {
-		res.push_back(*sit++);
-		n++;
+    } else {
+	regex_t reg;
+	int errcode;
+	if (typ == ET_REGEXP) {
+	    // Compile regexp. We anchor the input by enclosing it in ^ and $
+	    string mroot = droot;
+	    if (mroot.at(0) != '^')
+		mroot = string("^") + mroot;
+	    if (mroot.at(mroot.length()-1) != '$')
+		mroot += "$";
+	    if ((errcode = regcomp(&reg, mroot.c_str(), 
+				   REG_EXTENDED|REG_NOSUB))) {
+		char errbuf[200];
+		regerror(errcode, &reg, errbuf, 199);
+		LOGERR(("termMatch: regcomp failed: %s\n", errbuf));
+		res.push_back(string(errbuf));
+		regfree(&reg);
+		return false;
 	    }
 	}
-	if (n >= max)
-	    break;
+
+	// Find the initial section before any special char
+	string::size_type es = droot.find_first_of(nochars);
+	string is;
+	switch (es) {
+	case string::npos: is = droot;break;
+	case 0: break;
+	default: is = droot.substr(0, es);break;
+	}
+	LOGDEB(("termMatch: initsec: [%s]\n", is.c_str()));
+
+	Xapian::TermIterator it = db.allterms_begin(); 
+	if (!is.empty())
+	    it.skip_to(is.c_str());
+	for (int n = 0;it != db.allterms_end(); it++) {
+	    // If we're beyond the terms matching the initial string, end
+	    if (!is.empty() && (*it).find(is) != 0)
+		break;
+	    // Don't match special internal terms beginning with uppercase ascii
+	    if ((*it).at(0) >= 'A' && (*it).at(0) <= 'Z')
+		continue;
+	    if (typ == ET_WILD) {
+		if (fnmatch(droot.c_str(), (*it).c_str(), 0) == FNM_NOMATCH)
+		    continue;
+	    } else {
+		if (regexec(&reg, (*it).c_str(), 0, 0, 0))
+		    continue;
+	    }
+	    // Do we want stem expansion here? We don't do it for now
+	    res.push_back(TermMatchEntry(*it, it.get_termfreq()));
+	    ++n;
+	}
+	if (typ == ET_REGEXP) {
+	    regfree(&reg);
+	}
+
    }
-    res.sort();
-    res.unique();
-    if (typ == ET_REGEXP) {
-	regfree(&reg);
+
+    TermMatchCmpByTerm tcmp;
+    res.sort(tcmp);
+    TermMatchTermEqual teq;
+    res.unique(teq);
+    TermMatchCmpByWcf wcmp;
+    res.sort(wcmp);
+    if (max > 0) {
+	res.resize(MIN(res.size(), (unsigned int)max));
    }
    return true;
 }
@ -1417,23 +1465,6 @@ bool Db::termExists(const string& word)
    return true;
 }

-list<string> Db::stemExpand(const string& lang, const string& term) 
-{
-    list<string> dirs = m_extraDbs;
-    dirs.push_front(m_basedir);
-    list<string> exp;
-    for (list<string>::iterator it = dirs.begin();
-	 it != dirs.end(); it++) {
-	list<string> more = StemDb::stemExpand(*it, lang, term);
-	LOGDEB1(("Db::stemExpand: Got %d from %s\n", 
-		 more.size(), it->c_str()));
-	exp.splice(exp.end(), more);
-    }
-    exp.sort();
-    exp.unique();
-    LOGDEB1(("Db:::stemExpand: final count %d \n", exp.size()));
-    return exp;
-}

 bool Db::stemDiffers(const string& lang, const string& word, 
 		     const string& base)
--- a/src/rcldb/rcldb.h
+++ b/src/rcldb/rcldb.h
@ -16,7 +16,7 @@
 */
 #ifndef _DB_H_INCLUDED_
 #define _DB_H_INCLUDED_
-/* @(#$Id: rcldb.h,v 1.44 2006-12-14 14:54:13 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: rcldb.h,v 1.45 2006-12-19 12:11:21 dockes Exp $  (C) 2004 J.F.Dockes */

 #include <string>
 #include <list>
@ -53,7 +53,16 @@ namespace Rcl {
 class SearchData;
 class Native;
 class TermIter;
- 
+
+class TermMatchEntry {
+public:
+    TermMatchEntry() : wcf(0) {}
+    TermMatchEntry(const string&t, int f) : term(t), wcf(f) {}
+    TermMatchEntry(const string&t) : term(t), wcf(0) {}
+    string term;
+    int    wcf; // Within collection frequency
+};
+
 /**
 * Wrapper class for the native database.
 */
@ -109,9 +118,9 @@ class Db {
    /** Return a list of index terms that match the input string
     * Expansion is performed either with either wildcard or regexp processing
     * Stem expansion is performed if lang is not empty */
-    enum MatchType {ET_WILD, ET_REGEXP};
-    bool termMatch(MatchType typ, const string &s, list<string>& result,
-		   const string &lang, int max=20);
+    enum MatchType {ET_WILD, ET_REGEXP, ET_STEM};
+    bool termMatch(MatchType typ, const string &lang, const string &s, 
+		   list<TermMatchEntry>& result, int max = -1);

    /** Add extra database for querying */
    bool addQueryDb(const string &dir);
@ -159,12 +168,11 @@ class Db {
    bool stemDiffers(const string& lang, const string& term, 
 		     const string& base);
    
-    /** Perform stem expansion across all dbs configured for searching */
-    list<string> stemExpand(const string& lang, const string& term);
-
    /** Filename wildcard expansion */
    bool filenameWildExp(const string& exp, list<string>& names);
    string getReason(){return m_reason;}
+
+
 private:

    string m_filterTopDir; // Current query filter on subtree top directory 
@ -201,6 +209,8 @@ private:
    vector<bool> updated;

    bool reOpen(); // Close/open, same mode/opts
+    bool stemExpand(const string &lang, const string &s, 
+		    list<TermMatchEntry>& result, int max = -1);

    /* Copyconst and assignemt private and forbidden */
    Db(const Db &) {}
--- a/src/rcldb/searchdata.cpp
+++ b/src/rcldb/searchdata.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.6 2006-11-30 13:38:44 dockes Exp $ (C) 2006 J.F.Dockes";
+static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.7 2006-12-19 12:11:21 dockes Exp $ (C) 2006 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -214,7 +214,12 @@ void StringToXapianQ::maybeStemExp(bool nostemexp,
    if (nostemexp) {
 	exp = list<string>(1, term1);
    } else {
-	exp = m_db.stemExpand(m_stemlang, term1);
+	list<TermMatchEntry> l;
+	m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term1, l);
+	for (list<TermMatchEntry>::const_iterator it = l.begin(); 
+	     it != l.end(); it++) {
+	    exp.push_back(it->term);
+	}
    }
 }

--- a/src/rcldb/stemdb.cpp
+++ b/src/rcldb/stemdb.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: stemdb.cpp,v 1.5 2006-10-09 16:37:08 dockes Exp $ (C) 2005 J.F.Dockes";
+static char rcsid[] = "@(#$Id: stemdb.cpp,v 1.6 2006-12-19 12:11:21 dockes Exp $ (C) 2005 J.F.Dockes";
 #endif

 /**
@ -206,13 +206,24 @@ bool createDb(Xapian::Database& xdb, const string& dbdir, const string& lang)
    return true;
 }

+static string stringlistdisp(const list<string>& sl)
+{
+    string s;
+    for (list<string>::const_iterator it = sl.begin(); it!= sl.end(); it++)
+	s += "[" + *it + "] ";
+    if (!s.empty())
+	s.erase(s.length()-1);
+    return s;
+}
+
 /**
 * Expand term to list of all terms which stem to the same term.
 */
-list<string> stemExpand(const string& dbdir, const string& lang,
-			const string& term)
+bool stemExpand(const std::string& dbdir, 
+		const std::string& lang,
+		const std::string& term,
+		list<string>& result)
 {
-    list<string> explist;
    try {
 	Xapian::Stem stemmer(lang);
 	string stem = stemmer.stem_word(term);
@ -224,14 +235,14 @@ list<string> stemExpand(const string& dbdir, const string& lang,
 		stemdbdir.c_str(), sdb.get_lastdocid()));
 	if (!sdb.term_exists(stem)) {
 	    LOGDEB1(("Db::stemExpand: no term for %s\n", stem.c_str()));
-	    explist.push_back(term);
-	    return explist;
+	    result.push_back(term);
+	    return true;
 	}
 	Xapian::PostingIterator did = sdb.postlist_begin(stem);
 	if (did == sdb.postlist_end(stem)) {
 	    LOGDEB1(("stemExpand: no term(1) for %s\n",stem.c_str()));
-	    explist.push_back(term);
-	    return explist;
+	    result.push_back(term);
+	    return true;
 	}
 	Xapian::Document doc = sdb.get_document(*did);
 	string data = doc.get_data();
@ -242,24 +253,24 @@ list<string> stemExpand(const string& dbdir, const string& lang,
 	++pos;
 	string::size_type pos1 = data.find_last_of("\n");
 	if (pos == string::npos || pos1 == string::npos ||pos1 <= pos) { // ??
-	    explist.push_back(term);
-	    return explist;
+	    result.push_back(term);
+	    return true;
 	}
-	stringToStrings(data.substr(pos, pos1-pos), explist);
+	stringToStrings(data.substr(pos, pos1-pos), result);

 	// If the user term itself is not in the list, add it.
-	if (find(explist.begin(), explist.end(), term) == explist.end()) {
-	    explist.push_back(term);
+	if (find(result.begin(), result.end(), term) == result.end()) {
+	    result.push_back(term);
 	}
 	LOGDEB(("stemExpand: %s ->  %s\n", stem.c_str(),
-		stringlistdisp(explist).c_str()));
+		stringlistdisp(result).c_str()));
    } catch (...) {
 	LOGERR(("stemExpand: error accessing stem db. dbdir [%s] lang [%s]\n",
 		dbdir.c_str(), lang.c_str()));
-	explist.push_back(term);
-	return explist;
+	result.push_back(term);
+	return false;
    }
-    return explist;
+    return true;
 }

 }
--- a/src/rcldb/stemdb.h
+++ b/src/rcldb/stemdb.h
@ -1,6 +1,6 @@
 #ifndef _STEMDB_H_INCLUDED_
 #define _STEMDB_H_INCLUDED_
-/* @(#$Id: stemdb.h,v 1.2 2006-11-15 14:57:53 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: stemdb.h,v 1.3 2006-12-19 12:11:21 dockes Exp $  (C) 2004 J.F.Dockes */
 /// Stem database code
 /// 
 /// Stem databases list stems and the set of index terms they expand to. They 
@ -13,6 +13,7 @@
 #include <string>

 #include <xapian.h>
+
 #ifndef NO_NAMESPACES
 using std::string;
 using std::list;
@ -28,10 +29,10 @@ extern bool deleteDb(const std::string& dbdir, const std::string& lang);
 extern bool createDb(Xapian::Database& xdb, 
 		     const std::string& dbdir, const std::string& lang);
 /// Expand term to stem siblings
-extern std::list<std::string> stemExpand(const std::string& dbdir, 
-					 const std::string& lang,
-					 const std::string& term);
-
+extern bool stemExpand(const std::string& dbdir, 
+		       const std::string& lang,
+		       const std::string& term,
+		       list<string>& result);
 #ifndef NO_NAMESPACES
 }
 }
--- a/src/utils/smallut.cpp
+++ b/src/utils/smallut.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: smallut.cpp,v 1.24 2006-12-18 12:06:11 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: smallut.cpp,v 1.25 2006-12-19 12:11:21 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -38,16 +38,6 @@ using namespace std;

 #define MIN(A,B) ((A)<(B)?(A):(B))

-string stringlistdisp(const list<string>& sl)
-{
-    string s;
-    for (list<string>::const_iterator it = sl.begin(); it!= sl.end(); it++)
-	s += "[" + *it + "] ";
-    if (!s.empty())
-	s.erase(s.length()-1);
-    return s;
-}
-
 int stringicmp(const string & s1, const string& s2) 
 {
    string::const_iterator it1 = s1.begin();
--- a/src/utils/smallut.h
+++ b/src/utils/smallut.h
@ -16,7 +16,7 @@
 */
 #ifndef _SMALLUT_H_INCLUDED_
 #define _SMALLUT_H_INCLUDED_
-/* @(#$Id: smallut.h,v 1.24 2006-12-18 12:06:11 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: smallut.h,v 1.25 2006-12-19 12:11:21 dockes Exp $  (C) 2004 J.F.Dockes */
 #include <string>
 #include <list>
 #include <map>
@ -38,8 +38,6 @@ extern int stringisuffcmp(const string& s1, const string& s2);
 // Compare charset names, removing the more common spelling variations
 extern bool samecharset(const string &cs1, const string &cs2);

-extern string stringlistdisp(const list<string>& strs);
-
 /**
 * Parse input string into list of strings. 
 *