From be05eaa6e0e80c5ae80ce650630c5fad3c447ad9 Mon Sep 17 00:00:00 2001 From: dockes Date: Tue, 19 Dec 2006 12:11:21 +0000 Subject: [PATCH] merge stemExpand into termExpand. return term frequencies from there and display in spellW --- src/qtgui/spell.ui | 49 ++++++---- src/qtgui/spell_w.cpp | 113 +++++++++++++++------ src/qtgui/spell_w.h | 4 +- src/qtgui/ssearch_w.cpp | 18 ++-- src/qtgui/viewaction.ui | 6 -- src/qtgui/viewaction_w.cpp | 10 +- src/rcldb/rcldb.cpp | 195 +++++++++++++++++++++---------------- src/rcldb/rcldb.h | 26 +++-- src/rcldb/searchdata.cpp | 9 +- src/rcldb/stemdb.cpp | 45 +++++---- src/rcldb/stemdb.h | 11 ++- src/utils/smallut.cpp | 12 +-- src/utils/smallut.h | 4 +- 13 files changed, 301 insertions(+), 201 deletions(-) diff --git a/src/qtgui/spell.ui b/src/qtgui/spell.ui index 8f35fc7d..f768f024 100644 --- a/src/qtgui/spell.ui +++ b/src/qtgui/spell.ui @@ -106,30 +106,40 @@ - + + + + Term + + + true + + + true + + + + + Count + + + true + + + true + + - suggsTE + suggsLV - - - 0 - 200 - + + Extended - - TabFocus - - - PlainText - - + true - - false - - - true + + NoColumn @@ -139,7 +149,6 @@ baseWordLE expandPB - suggsTE dismissPB expTypeCMB stemLangCMB diff --git a/src/qtgui/spell_w.cpp b/src/qtgui/spell_w.cpp index e63703c5..3df01d59 100644 --- a/src/qtgui/spell_w.cpp +++ b/src/qtgui/spell_w.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: spell_w.cpp,v 1.7 2006-11-30 13:38:44 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: spell_w.cpp,v 1.8 2006-12-19 12:11:21 dockes Exp $ (C) 2005 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -25,17 +25,22 @@ static char rcsid[] = "@(#$Id: spell_w.cpp,v 1.7 2006-11-30 13:38:44 dockes Exp #include #include -#include #include #include #include #include #include +#if (QT_VERSION < 0x040000) +#include +#else +#include +#endif #include "debuglog.h" #include "recoll.h" #include "spell_w.h" #include "guiutils.h" +#include "rcldb.h" #ifdef RCL_USE_ASPELL #include "rclaspell.h" @@ -79,16 +84,47 @@ void SpellW::init() connect(baseWordLE, SIGNAL(returnPressed()), this, SLOT(doExpand())); connect(expandPB, SIGNAL(clicked()), this, SLOT(doExpand())); connect(dismissPB, SIGNAL(clicked()), this, SLOT(close())); - connect(suggsTE, SIGNAL(doubleClicked(int, int)), - this, SLOT(textDoubleClicked(int, int))); + + connect(suggsLV, +#if (QT_VERSION < 0x040000) + SIGNAL(doubleClicked(QListViewItem *, const QPoint &, int)), +#else + SIGNAL(doubleClicked(Q3ListViewItem *, const QPoint &, int)), +#endif + this, SLOT(textDoubleClicked())); + connect(expTypeCMB, SIGNAL(activated(int)), this, SLOT(modeSet(int))); + + suggsLV->setColumnWidth(0, 200); + suggsLV->setColumnWidth(1, 100); + // No initial sorting: user can choose to establish one + suggsLV->setSorting(100, false); } +// Subclass qlistviewitem for numeric sorting on column 1 +class MyListViewItem : public QListViewItem +{ +public: + MyListViewItem(QListView *listView, const QString& s1, const QString& s2) + : QListViewItem(listView, s1, s2) + { } + + int compare(QListViewItem * i, int col, bool ascending) const { + if (col == 0) + return i->text(0).compare(text(0)); + if (col == 1) + return i->text(1).toInt() - text(1).toInt(); + // ?? + return 0; + } +}; + + /* Expand term according to current mode */ void SpellW::doExpand() { - suggsTE->clear(); + suggsLV->clear(); if (baseWordLE->text().isEmpty()) return; @@ -100,26 +136,27 @@ void SpellW::doExpand() string expr = string((const char *)baseWordLE->text().utf8()); list suggs; + prefs.termMatchType = expTypeCMB->currentItem(); Rcl::Db::MatchType mt = Rcl::Db::ET_WILD; + switch(expTypeCMB->currentItem()) { + case 0: mt = Rcl::Db::ET_WILD; break; + case 1:mt = Rcl::Db::ET_REGEXP; break; + case 2:mt = Rcl::Db::ET_STEM; break; + } + + list entries; switch (expTypeCMB->currentItem()) { - case 1: mt = Rcl::Db::ET_REGEXP; - /* FALLTHROUGH */ case 0: - if (!rcldb->termMatch(mt, expr, suggs, prefs.queryStemLang.ascii(), + case 1: + case 2: { + if (!rcldb->termMatch(mt, prefs.queryStemLang.ascii(), expr, entries, 200)) { LOGERR(("SpellW::doExpand:rcldb::termMatch failed\n")); return; } - break; - - - case 2: - { - string stemlang = (const char *)stemLangCMB->currentText().utf8(); - suggs = rcldb->stemExpand(stemlang,expr); - } + } break; #ifdef RCL_USE_ASPELL @@ -132,24 +169,37 @@ void SpellW::doExpand() LOGDEB(("SpellW::doExpand: aspell init error\n")); return; } + list suggs; if (!aspell->suggest(*rcldb, expr, suggs, reason)) { QMessageBox::warning(0, "Recoll", tr("Aspell expansion error. ")); LOGERR(("SpellW::doExpand:suggest failed: %s\n", reason.c_str())); } + for (list::const_iterator it = suggs.begin(); + it != suggs.end(); it++) + entries.push_back(Rcl::TermMatchEntry(*it)); } #endif } - if (suggs.empty()) { - suggsTE->append(tr("No expansion found")); + + if (entries.empty()) { + new MyListViewItem(suggsLV, tr("No expansion found"), ""); } else { - for (list::iterator it = suggs.begin(); - it != suggs.end(); it++) { - suggsTE->append(QString::fromUtf8(it->c_str())); + // Seems that need to use a reverse iterator to get same order in + // listview and input list ?? + for (list::reverse_iterator it = entries.rbegin(); + it != entries.rend(); it++) { + LOGDEB(("SpellW::expand: %6d [%s]\n", it->wcf, it->term.c_str())); + char num[20]; + if (it->wcf) + sprintf(num, "%d", it->wcf); + else + num[0] = 0; + new MyListViewItem(suggsLV, + QString::fromUtf8(it->term.c_str()), + QString::fromAscii(num)); } - suggsTE->setCursorPosition(0,0); - suggsTE->ensureCursorVisible(); } } @@ -157,17 +207,24 @@ void SpellW::wordChanged(const QString &text) { if (text.isEmpty()) { expandPB->setEnabled(false); - suggsTE->clear(); + suggsLV->clear(); } else { expandPB->setEnabled(true); } } -void SpellW::textDoubleClicked(int para, int) +void SpellW::textDoubleClicked() { - suggsTE->setSelection(para, 0, para, 1000); - if (suggsTE->hasSelectedText()) - emit(wordSelect(suggsTE->selectedText())); + QListViewItemIterator it(suggsLV); + while (it.current()) { + QListViewItem *item = it.current(); + if (!item->isSelected()) { + ++it; + continue; + } + emit(wordSelect((const char *)item->text(0))); + ++it; + } } void SpellW::modeSet(int mode) diff --git a/src/qtgui/spell_w.h b/src/qtgui/spell_w.h index 5f3a895d..c25acb06 100644 --- a/src/qtgui/spell_w.h +++ b/src/qtgui/spell_w.h @@ -1,6 +1,6 @@ #ifndef _ASPELL_W_H_INCLUDED_ #define _ASPELL_W_H_INCLUDED_ -/* @(#$Id: spell_w.h,v 1.5 2006-12-04 09:56:26 dockes Exp $ (C) 2006 J.F.Dockes */ +/* @(#$Id: spell_w.h,v 1.6 2006-12-19 12:11:21 dockes Exp $ (C) 2006 J.F.Dockes */ /* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -54,7 +54,7 @@ public: public slots: virtual void doExpand(); virtual void wordChanged(const QString&); - virtual void textDoubleClicked(int, int); + virtual void textDoubleClicked(); virtual void modeSet(int); signals: diff --git a/src/qtgui/ssearch_w.cpp b/src/qtgui/ssearch_w.cpp index 7d5a3218..e44eb3fb 100644 --- a/src/qtgui/ssearch_w.cpp +++ b/src/qtgui/ssearch_w.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.16 2006-12-14 13:53:43 dockes Exp $ (C) 2006 J.F.Dockes"; +static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.17 2006-12-19 12:11:21 dockes Exp $ (C) 2006 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -169,11 +169,9 @@ void SSearch::completion() // Query database const int max = 100; - list strs; - - if (!rcldb->termMatch(Rcl::Db::ET_WILD, s, strs, - prefs.queryStemLang.ascii(),max) - || strs.size() == 0) { + list strs; + if (!rcldb->termMatch(Rcl::Db::ET_WILD, prefs.queryStemLang.ascii(), + s, strs, max) || strs.size() == 0) { QApplication::beep(); return; } @@ -186,12 +184,14 @@ void SSearch::completion() QString res; bool ok = false; if (strs.size() == 1) { - res = QString::fromUtf8(strs.begin()->c_str()); + res = QString::fromUtf8(strs.begin()->term.c_str()); ok = true; } else { QStringList lst; - for (list::iterator it=strs.begin(); it != strs.end(); it++) - lst.push_back(QString::fromUtf8(it->c_str())); + for (list::iterator it=strs.begin(); + it != strs.end(); it++) { + lst.push_back(QString::fromUtf8(it->term.c_str())); + } res = QInputDialog::getItem(tr("Completions"), tr("Select an item:"), lst, 0, FALSE, &ok, this); diff --git a/src/qtgui/viewaction.ui b/src/qtgui/viewaction.ui index a1e556bd..09911d4e 100644 --- a/src/qtgui/viewaction.ui +++ b/src/qtgui/viewaction.ui @@ -59,9 +59,6 @@ Sunken - - Manual - Extended @@ -71,9 +68,6 @@ true - - LastColumn - Select one or several file types, then click Change Action to modify the program used to open them diff --git a/src/qtgui/viewaction_w.cpp b/src/qtgui/viewaction_w.cpp index 49eb9baf..93f71edd 100644 --- a/src/qtgui/viewaction_w.cpp +++ b/src/qtgui/viewaction_w.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: viewaction_w.cpp,v 1.3 2006-12-18 16:45:52 dockes Exp $ (C) 2006 J.F.Dockes"; +static char rcsid[] = "@(#$Id: viewaction_w.cpp,v 1.4 2006-12-19 12:11:21 dockes Exp $ (C) 2006 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -37,6 +37,7 @@ using namespace std; #include #include +#include #include "recoll.h" #include "debuglog.h" @@ -55,13 +56,6 @@ void ViewAction::init() SIGNAL(doubleClicked(Q3ListViewItem *, const QPoint &, int)), #endif this, SLOT(editAction())); - - // Note: could get the column width setting to work in qt4 - actionsLV->setColumnWidthMode(0, QListView::Manual); - actionsLV->setColumnWidth(0, 300); - actionsLV->setColumnWidthMode(1, QListView::Manual); - actionsLV->setColumnWidth(1, 120); - fillLists(); resize(QSize(450, 250).expandedTo(minimumSizeHint()) ); } diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index a8698b02..37eb9a95 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.100 2006-12-07 13:24:19 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.101 2006-12-19 12:11:21 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -1285,6 +1285,42 @@ bool Db::setQuery(RefCntr sdata, int opts, return true; } +class TermMatchCmpByWcf { +public: + int operator()(const TermMatchEntry& l, const TermMatchEntry& r) { + return r.wcf - l.wcf < 0; + } +}; +class TermMatchCmpByTerm { +public: + int operator()(const TermMatchEntry& l, const TermMatchEntry& r) { + return l.term.compare(r.term) > 0; + } +}; +class TermMatchTermEqual { +public: + int operator()(const TermMatchEntry& l, const TermMatchEntry& r) { + return !l.term.compare(r.term); + } +}; + +bool Db::stemExpand(const string &lang, const string &term, + list& result, int max) +{ + list dirs = m_extraDbs; + dirs.push_front(m_basedir); + for (list::iterator it = dirs.begin(); + it != dirs.end(); it++) { + list more; + StemDb::stemExpand(*it, lang, term, more); + LOGDEB1(("Db::stemExpand: Got %d from %s\n", + more.size(), it->c_str())); + result.insert(result.end(), more.begin(), more.end()); + } + LOGDEB1(("Db:::stemExpand: final count %d \n", result.size())); + return true; +} + // Characters that can begin a wildcard or regexp expression. We use skipto // to begin the allterms search with terms that begin with the portion of // the input string prior to these chars. @@ -1292,85 +1328,97 @@ const string wildSpecChars = "*?["; const string regSpecChars = "(.[{"; // Find all index terms that match a wildcard or regular expression -bool Db::termMatch(MatchType typ, const string &root, list& res, - const string &lang, int max) +bool Db::termMatch(MatchType typ, const string &lang, + const string &root, + list& res, + int max) { if (!m_ndb || !m_ndb->m_isopen) return false; + Xapian::Database db = m_ndb->m_iswritable ? m_ndb->wdb: m_ndb->db; + res.clear(); + // Get rid of capitals and accents string droot; dumb_string(root, droot); string nochars = typ == ET_WILD ? wildSpecChars : regSpecChars; - regex_t reg; - int errcode; - // Compile regexp. We anchor the input by enclosing it in ^ and $ - if (typ == ET_REGEXP) { - string mroot = droot; - if (mroot.at(0) != '^') - mroot = string("^") + mroot; - if (mroot.at(mroot.length()-1) != '$') - mroot += "$"; - if ((errcode = regcomp(®, mroot.c_str(), REG_EXTENDED|REG_NOSUB))) { - char errbuf[200]; - regerror(errcode, ®, errbuf, 199); - LOGERR(("termMatch: regcomp failed: %s\n", errbuf)); - res.push_back(errbuf); - regfree(®); + if (typ == ET_STEM) { + if (!stemExpand(lang, root, res, max)) return false; + for (list::iterator it = res.begin(); + it != res.end(); it++) { + it->wcf = db.get_collection_freq(it->term); + LOGDEB(("termMatch: %d [%s]\n", it->wcf, it->term.c_str())); } - } - - // Find the initial section before any special char - string::size_type es = droot.find_first_of(nochars); - string is; - switch (es) { - case string::npos: is = droot;break; - case 0: break; - default: is = droot.substr(0, es);break; - } - LOGDEB(("termMatch: initsec: [%s]\n", is.c_str())); - - Xapian::TermIterator it = db.allterms_begin(); - if (!is.empty()) - it.skip_to(is.c_str()); - for (int n = 0;it != db.allterms_end(); it++) { - // If we're beyond the terms matching the initial string, end - if (!is.empty() && (*it).find(is) != 0) - break; - // Don't match special internal terms beginning with uppercase ascii - if ((*it).at(0) >= 'A' && (*it).at(0) <= 'Z') - continue; - if (typ == ET_WILD) { - if (fnmatch(droot.c_str(), (*it).c_str(), 0) == FNM_NOMATCH) - continue; - } else { - if (regexec(®, (*it).c_str(), 0, 0, 0)) - continue; - } - // Do we want stem expansion here? We don't do it for now - if (1 || lang.empty()) { - res.push_back(*it); - ++n; - } else { - list stemexps = stemExpand(lang, *it); - unsigned int cnt = - (int)stemexps.size() > max - n ? max - n : stemexps.size(); - list::iterator sit = stemexps.begin(); - while (cnt--) { - res.push_back(*sit++); - n++; + } else { + regex_t reg; + int errcode; + if (typ == ET_REGEXP) { + // Compile regexp. We anchor the input by enclosing it in ^ and $ + string mroot = droot; + if (mroot.at(0) != '^') + mroot = string("^") + mroot; + if (mroot.at(mroot.length()-1) != '$') + mroot += "$"; + if ((errcode = regcomp(®, mroot.c_str(), + REG_EXTENDED|REG_NOSUB))) { + char errbuf[200]; + regerror(errcode, ®, errbuf, 199); + LOGERR(("termMatch: regcomp failed: %s\n", errbuf)); + res.push_back(string(errbuf)); + regfree(®); + return false; } } - if (n >= max) - break; + + // Find the initial section before any special char + string::size_type es = droot.find_first_of(nochars); + string is; + switch (es) { + case string::npos: is = droot;break; + case 0: break; + default: is = droot.substr(0, es);break; + } + LOGDEB(("termMatch: initsec: [%s]\n", is.c_str())); + + Xapian::TermIterator it = db.allterms_begin(); + if (!is.empty()) + it.skip_to(is.c_str()); + for (int n = 0;it != db.allterms_end(); it++) { + // If we're beyond the terms matching the initial string, end + if (!is.empty() && (*it).find(is) != 0) + break; + // Don't match special internal terms beginning with uppercase ascii + if ((*it).at(0) >= 'A' && (*it).at(0) <= 'Z') + continue; + if (typ == ET_WILD) { + if (fnmatch(droot.c_str(), (*it).c_str(), 0) == FNM_NOMATCH) + continue; + } else { + if (regexec(®, (*it).c_str(), 0, 0, 0)) + continue; + } + // Do we want stem expansion here? We don't do it for now + res.push_back(TermMatchEntry(*it, it.get_termfreq())); + ++n; + } + if (typ == ET_REGEXP) { + regfree(®); + } + } - res.sort(); - res.unique(); - if (typ == ET_REGEXP) { - regfree(®); + + TermMatchCmpByTerm tcmp; + res.sort(tcmp); + TermMatchTermEqual teq; + res.unique(teq); + TermMatchCmpByWcf wcmp; + res.sort(wcmp); + if (max > 0) { + res.resize(MIN(res.size(), (unsigned int)max)); } return true; } @@ -1417,23 +1465,6 @@ bool Db::termExists(const string& word) return true; } -list Db::stemExpand(const string& lang, const string& term) -{ - list dirs = m_extraDbs; - dirs.push_front(m_basedir); - list exp; - for (list::iterator it = dirs.begin(); - it != dirs.end(); it++) { - list more = StemDb::stemExpand(*it, lang, term); - LOGDEB1(("Db::stemExpand: Got %d from %s\n", - more.size(), it->c_str())); - exp.splice(exp.end(), more); - } - exp.sort(); - exp.unique(); - LOGDEB1(("Db:::stemExpand: final count %d \n", exp.size())); - return exp; -} bool Db::stemDiffers(const string& lang, const string& word, const string& base) diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index 73487883..8b1562e5 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -16,7 +16,7 @@ */ #ifndef _DB_H_INCLUDED_ #define _DB_H_INCLUDED_ -/* @(#$Id: rcldb.h,v 1.44 2006-12-14 14:54:13 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: rcldb.h,v 1.45 2006-12-19 12:11:21 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include @@ -53,7 +53,16 @@ namespace Rcl { class SearchData; class Native; class TermIter; - + +class TermMatchEntry { +public: + TermMatchEntry() : wcf(0) {} + TermMatchEntry(const string&t, int f) : term(t), wcf(f) {} + TermMatchEntry(const string&t) : term(t), wcf(0) {} + string term; + int wcf; // Within collection frequency +}; + /** * Wrapper class for the native database. */ @@ -109,9 +118,9 @@ class Db { /** Return a list of index terms that match the input string * Expansion is performed either with either wildcard or regexp processing * Stem expansion is performed if lang is not empty */ - enum MatchType {ET_WILD, ET_REGEXP}; - bool termMatch(MatchType typ, const string &s, list& result, - const string &lang, int max=20); + enum MatchType {ET_WILD, ET_REGEXP, ET_STEM}; + bool termMatch(MatchType typ, const string &lang, const string &s, + list& result, int max = -1); /** Add extra database for querying */ bool addQueryDb(const string &dir); @@ -159,12 +168,11 @@ class Db { bool stemDiffers(const string& lang, const string& term, const string& base); - /** Perform stem expansion across all dbs configured for searching */ - list stemExpand(const string& lang, const string& term); - /** Filename wildcard expansion */ bool filenameWildExp(const string& exp, list& names); string getReason(){return m_reason;} + + private: string m_filterTopDir; // Current query filter on subtree top directory @@ -201,6 +209,8 @@ private: vector updated; bool reOpen(); // Close/open, same mode/opts + bool stemExpand(const string &lang, const string &s, + list& result, int max = -1); /* Copyconst and assignemt private and forbidden */ Db(const Db &) {} diff --git a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp index 360a09d7..da7c033c 100644 --- a/src/rcldb/searchdata.cpp +++ b/src/rcldb/searchdata.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.6 2006-11-30 13:38:44 dockes Exp $ (C) 2006 J.F.Dockes"; +static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.7 2006-12-19 12:11:21 dockes Exp $ (C) 2006 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -214,7 +214,12 @@ void StringToXapianQ::maybeStemExp(bool nostemexp, if (nostemexp) { exp = list(1, term1); } else { - exp = m_db.stemExpand(m_stemlang, term1); + list l; + m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term1, l); + for (list::const_iterator it = l.begin(); + it != l.end(); it++) { + exp.push_back(it->term); + } } } diff --git a/src/rcldb/stemdb.cpp b/src/rcldb/stemdb.cpp index 02496a5a..767dac90 100644 --- a/src/rcldb/stemdb.cpp +++ b/src/rcldb/stemdb.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: stemdb.cpp,v 1.5 2006-10-09 16:37:08 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: stemdb.cpp,v 1.6 2006-12-19 12:11:21 dockes Exp $ (C) 2005 J.F.Dockes"; #endif /** @@ -206,13 +206,24 @@ bool createDb(Xapian::Database& xdb, const string& dbdir, const string& lang) return true; } +static string stringlistdisp(const list& sl) +{ + string s; + for (list::const_iterator it = sl.begin(); it!= sl.end(); it++) + s += "[" + *it + "] "; + if (!s.empty()) + s.erase(s.length()-1); + return s; +} + /** * Expand term to list of all terms which stem to the same term. */ -list stemExpand(const string& dbdir, const string& lang, - const string& term) +bool stemExpand(const std::string& dbdir, + const std::string& lang, + const std::string& term, + list& result) { - list explist; try { Xapian::Stem stemmer(lang); string stem = stemmer.stem_word(term); @@ -224,14 +235,14 @@ list stemExpand(const string& dbdir, const string& lang, stemdbdir.c_str(), sdb.get_lastdocid())); if (!sdb.term_exists(stem)) { LOGDEB1(("Db::stemExpand: no term for %s\n", stem.c_str())); - explist.push_back(term); - return explist; + result.push_back(term); + return true; } Xapian::PostingIterator did = sdb.postlist_begin(stem); if (did == sdb.postlist_end(stem)) { LOGDEB1(("stemExpand: no term(1) for %s\n",stem.c_str())); - explist.push_back(term); - return explist; + result.push_back(term); + return true; } Xapian::Document doc = sdb.get_document(*did); string data = doc.get_data(); @@ -242,24 +253,24 @@ list stemExpand(const string& dbdir, const string& lang, ++pos; string::size_type pos1 = data.find_last_of("\n"); if (pos == string::npos || pos1 == string::npos ||pos1 <= pos) { // ?? - explist.push_back(term); - return explist; + result.push_back(term); + return true; } - stringToStrings(data.substr(pos, pos1-pos), explist); + stringToStrings(data.substr(pos, pos1-pos), result); // If the user term itself is not in the list, add it. - if (find(explist.begin(), explist.end(), term) == explist.end()) { - explist.push_back(term); + if (find(result.begin(), result.end(), term) == result.end()) { + result.push_back(term); } LOGDEB(("stemExpand: %s -> %s\n", stem.c_str(), - stringlistdisp(explist).c_str())); + stringlistdisp(result).c_str())); } catch (...) { LOGERR(("stemExpand: error accessing stem db. dbdir [%s] lang [%s]\n", dbdir.c_str(), lang.c_str())); - explist.push_back(term); - return explist; + result.push_back(term); + return false; } - return explist; + return true; } } diff --git a/src/rcldb/stemdb.h b/src/rcldb/stemdb.h index b6e1308a..98ae6030 100644 --- a/src/rcldb/stemdb.h +++ b/src/rcldb/stemdb.h @@ -1,6 +1,6 @@ #ifndef _STEMDB_H_INCLUDED_ #define _STEMDB_H_INCLUDED_ -/* @(#$Id: stemdb.h,v 1.2 2006-11-15 14:57:53 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: stemdb.h,v 1.3 2006-12-19 12:11:21 dockes Exp $ (C) 2004 J.F.Dockes */ /// Stem database code /// /// Stem databases list stems and the set of index terms they expand to. They @@ -13,6 +13,7 @@ #include #include + #ifndef NO_NAMESPACES using std::string; using std::list; @@ -28,10 +29,10 @@ extern bool deleteDb(const std::string& dbdir, const std::string& lang); extern bool createDb(Xapian::Database& xdb, const std::string& dbdir, const std::string& lang); /// Expand term to stem siblings -extern std::list stemExpand(const std::string& dbdir, - const std::string& lang, - const std::string& term); - +extern bool stemExpand(const std::string& dbdir, + const std::string& lang, + const std::string& term, + list& result); #ifndef NO_NAMESPACES } } diff --git a/src/utils/smallut.cpp b/src/utils/smallut.cpp index 488188aa..84b7ecef 100644 --- a/src/utils/smallut.cpp +++ b/src/utils/smallut.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: smallut.cpp,v 1.24 2006-12-18 12:06:11 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: smallut.cpp,v 1.25 2006-12-19 12:11:21 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -38,16 +38,6 @@ using namespace std; #define MIN(A,B) ((A)<(B)?(A):(B)) -string stringlistdisp(const list& sl) -{ - string s; - for (list::const_iterator it = sl.begin(); it!= sl.end(); it++) - s += "[" + *it + "] "; - if (!s.empty()) - s.erase(s.length()-1); - return s; -} - int stringicmp(const string & s1, const string& s2) { string::const_iterator it1 = s1.begin(); diff --git a/src/utils/smallut.h b/src/utils/smallut.h index 0fb98169..3a405868 100644 --- a/src/utils/smallut.h +++ b/src/utils/smallut.h @@ -16,7 +16,7 @@ */ #ifndef _SMALLUT_H_INCLUDED_ #define _SMALLUT_H_INCLUDED_ -/* @(#$Id: smallut.h,v 1.24 2006-12-18 12:06:11 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: smallut.h,v 1.25 2006-12-19 12:11:21 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include #include @@ -38,8 +38,6 @@ extern int stringisuffcmp(const string& s1, const string& s2); // Compare charset names, removing the more common spelling variations extern bool samecharset(const string &cs1, const string &cs2); -extern string stringlistdisp(const list& strs); - /** * Parse input string into list of strings. *