merge stemExpand into termExpand. return term frequencies from there and display in spellW

This commit is contained in:
dockes 2006-12-19 12:11:21 +00:00
parent 50b01c6ea4
commit be05eaa6e0
13 changed files with 301 additions and 201 deletions

View File

@ -106,30 +106,40 @@
</widget>
</grid>
</widget>
<widget class="QTextEdit">
<widget class="QListView">
<column>
<property name="text">
<string>Term</string>
</property>
<property name="clickable">
<bool>true</bool>
</property>
<property name="resizable">
<bool>true</bool>
</property>
</column>
<column>
<property name="text">
<string>Count</string>
</property>
<property name="clickable">
<bool>true</bool>
</property>
<property name="resizable">
<bool>true</bool>
</property>
</column>
<property name="name">
<cstring>suggsTE</cstring>
<cstring>suggsLV</cstring>
</property>
<property name="minimumSize">
<size>
<width>0</width>
<height>200</height>
</size>
<property name="selectionMode">
<enum>Extended</enum>
</property>
<property name="focusPolicy">
<enum>TabFocus</enum>
</property>
<property name="textFormat">
<enum>PlainText</enum>
</property>
<property name="readOnly">
<property name="showSortIndicator">
<bool>true</bool>
</property>
<property name="undoRedoEnabled">
<bool>false</bool>
</property>
<property name="tabChangesFocus">
<bool>true</bool>
<property name="resizeMode">
<enum>NoColumn</enum>
</property>
</widget>
</vbox>
@ -139,7 +149,6 @@
<tabstops>
<tabstop>baseWordLE</tabstop>
<tabstop>expandPB</tabstop>
<tabstop>suggsTE</tabstop>
<tabstop>dismissPB</tabstop>
<tabstop>expTypeCMB</tabstop>
<tabstop>stemLangCMB</tabstop>

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: spell_w.cpp,v 1.7 2006-11-30 13:38:44 dockes Exp $ (C) 2005 J.F.Dockes";
static char rcsid[] = "@(#$Id: spell_w.cpp,v 1.8 2006-12-19 12:11:21 dockes Exp $ (C) 2005 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -25,17 +25,22 @@ static char rcsid[] = "@(#$Id: spell_w.cpp,v 1.7 2006-11-30 13:38:44 dockes Exp
#include <qmessagebox.h>
#include <qpushbutton.h>
#include <qtextedit.h>
#include <qlabel.h>
#include <qlineedit.h>
#include <qlayout.h>
#include <qtooltip.h>
#include <qcombobox.h>
#if (QT_VERSION < 0x040000)
#include <qlistview.h>
#else
#include <q3listview.h>
#endif
#include "debuglog.h"
#include "recoll.h"
#include "spell_w.h"
#include "guiutils.h"
#include "rcldb.h"
#ifdef RCL_USE_ASPELL
#include "rclaspell.h"
@ -79,16 +84,47 @@ void SpellW::init()
connect(baseWordLE, SIGNAL(returnPressed()), this, SLOT(doExpand()));
connect(expandPB, SIGNAL(clicked()), this, SLOT(doExpand()));
connect(dismissPB, SIGNAL(clicked()), this, SLOT(close()));
connect(suggsTE, SIGNAL(doubleClicked(int, int)),
this, SLOT(textDoubleClicked(int, int)));
connect(suggsLV,
#if (QT_VERSION < 0x040000)
SIGNAL(doubleClicked(QListViewItem *, const QPoint &, int)),
#else
SIGNAL(doubleClicked(Q3ListViewItem *, const QPoint &, int)),
#endif
this, SLOT(textDoubleClicked()));
connect(expTypeCMB, SIGNAL(activated(int)),
this, SLOT(modeSet(int)));
suggsLV->setColumnWidth(0, 200);
suggsLV->setColumnWidth(1, 100);
// No initial sorting: user can choose to establish one
suggsLV->setSorting(100, false);
}
// Subclass qlistviewitem for numeric sorting on column 1
class MyListViewItem : public QListViewItem
{
public:
MyListViewItem(QListView *listView, const QString& s1, const QString& s2)
: QListViewItem(listView, s1, s2)
{ }
int compare(QListViewItem * i, int col, bool ascending) const {
if (col == 0)
return i->text(0).compare(text(0));
if (col == 1)
return i->text(1).toInt() - text(1).toInt();
// ??
return 0;
}
};
/* Expand term according to current mode */
void SpellW::doExpand()
{
suggsTE->clear();
suggsLV->clear();
if (baseWordLE->text().isEmpty())
return;
@ -100,26 +136,27 @@ void SpellW::doExpand()
string expr = string((const char *)baseWordLE->text().utf8());
list<string> suggs;
prefs.termMatchType = expTypeCMB->currentItem();
Rcl::Db::MatchType mt = Rcl::Db::ET_WILD;
switch(expTypeCMB->currentItem()) {
case 0: mt = Rcl::Db::ET_WILD; break;
case 1:mt = Rcl::Db::ET_REGEXP; break;
case 2:mt = Rcl::Db::ET_STEM; break;
}
list<Rcl::TermMatchEntry> entries;
switch (expTypeCMB->currentItem()) {
case 1: mt = Rcl::Db::ET_REGEXP;
/* FALLTHROUGH */
case 0:
if (!rcldb->termMatch(mt, expr, suggs, prefs.queryStemLang.ascii(),
case 1:
case 2: {
if (!rcldb->termMatch(mt, prefs.queryStemLang.ascii(), expr, entries,
200)) {
LOGERR(("SpellW::doExpand:rcldb::termMatch failed\n"));
return;
}
break;
case 2:
{
string stemlang = (const char *)stemLangCMB->currentText().utf8();
suggs = rcldb->stemExpand(stemlang,expr);
}
}
break;
#ifdef RCL_USE_ASPELL
@ -132,24 +169,37 @@ void SpellW::doExpand()
LOGDEB(("SpellW::doExpand: aspell init error\n"));
return;
}
list<string> suggs;
if (!aspell->suggest(*rcldb, expr, suggs, reason)) {
QMessageBox::warning(0, "Recoll",
tr("Aspell expansion error. "));
LOGERR(("SpellW::doExpand:suggest failed: %s\n", reason.c_str()));
}
for (list<string>::const_iterator it = suggs.begin();
it != suggs.end(); it++)
entries.push_back(Rcl::TermMatchEntry(*it));
}
#endif
}
if (suggs.empty()) {
suggsTE->append(tr("No expansion found"));
if (entries.empty()) {
new MyListViewItem(suggsLV, tr("No expansion found"), "");
} else {
for (list<string>::iterator it = suggs.begin();
it != suggs.end(); it++) {
suggsTE->append(QString::fromUtf8(it->c_str()));
// Seems that need to use a reverse iterator to get same order in
// listview and input list ??
for (list<Rcl::TermMatchEntry>::reverse_iterator it = entries.rbegin();
it != entries.rend(); it++) {
LOGDEB(("SpellW::expand: %6d [%s]\n", it->wcf, it->term.c_str()));
char num[20];
if (it->wcf)
sprintf(num, "%d", it->wcf);
else
num[0] = 0;
new MyListViewItem(suggsLV,
QString::fromUtf8(it->term.c_str()),
QString::fromAscii(num));
}
suggsTE->setCursorPosition(0,0);
suggsTE->ensureCursorVisible();
}
}
@ -157,17 +207,24 @@ void SpellW::wordChanged(const QString &text)
{
if (text.isEmpty()) {
expandPB->setEnabled(false);
suggsTE->clear();
suggsLV->clear();
} else {
expandPB->setEnabled(true);
}
}
void SpellW::textDoubleClicked(int para, int)
void SpellW::textDoubleClicked()
{
suggsTE->setSelection(para, 0, para, 1000);
if (suggsTE->hasSelectedText())
emit(wordSelect(suggsTE->selectedText()));
QListViewItemIterator it(suggsLV);
while (it.current()) {
QListViewItem *item = it.current();
if (!item->isSelected()) {
++it;
continue;
}
emit(wordSelect((const char *)item->text(0)));
++it;
}
}
void SpellW::modeSet(int mode)

View File

@ -1,6 +1,6 @@
#ifndef _ASPELL_W_H_INCLUDED_
#define _ASPELL_W_H_INCLUDED_
/* @(#$Id: spell_w.h,v 1.5 2006-12-04 09:56:26 dockes Exp $ (C) 2006 J.F.Dockes */
/* @(#$Id: spell_w.h,v 1.6 2006-12-19 12:11:21 dockes Exp $ (C) 2006 J.F.Dockes */
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -54,7 +54,7 @@ public:
public slots:
virtual void doExpand();
virtual void wordChanged(const QString&);
virtual void textDoubleClicked(int, int);
virtual void textDoubleClicked();
virtual void modeSet(int);
signals:

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.16 2006-12-14 13:53:43 dockes Exp $ (C) 2006 J.F.Dockes";
static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.17 2006-12-19 12:11:21 dockes Exp $ (C) 2006 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -169,11 +169,9 @@ void SSearch::completion()
// Query database
const int max = 100;
list<string> strs;
if (!rcldb->termMatch(Rcl::Db::ET_WILD, s, strs,
prefs.queryStemLang.ascii(),max)
|| strs.size() == 0) {
list<Rcl::TermMatchEntry> strs;
if (!rcldb->termMatch(Rcl::Db::ET_WILD, prefs.queryStemLang.ascii(),
s, strs, max) || strs.size() == 0) {
QApplication::beep();
return;
}
@ -186,12 +184,14 @@ void SSearch::completion()
QString res;
bool ok = false;
if (strs.size() == 1) {
res = QString::fromUtf8(strs.begin()->c_str());
res = QString::fromUtf8(strs.begin()->term.c_str());
ok = true;
} else {
QStringList lst;
for (list<string>::iterator it=strs.begin(); it != strs.end(); it++)
lst.push_back(QString::fromUtf8(it->c_str()));
for (list<Rcl::TermMatchEntry>::iterator it=strs.begin();
it != strs.end(); it++) {
lst.push_back(QString::fromUtf8(it->term.c_str()));
}
res = QInputDialog::getItem(tr("Completions"),
tr("Select an item:"), lst, 0,
FALSE, &ok, this);

View File

@ -59,9 +59,6 @@
<property name="frameShadow">
<enum>Sunken</enum>
</property>
<property name="resizePolicy">
<enum>Manual</enum>
</property>
<property name="selectionMode">
<enum>Extended</enum>
</property>
@ -71,9 +68,6 @@
<property name="showSortIndicator">
<bool>true</bool>
</property>
<property name="resizeMode">
<enum>LastColumn</enum>
</property>
<property name="toolTip" stdset="0">
<string>Select one or several file types, then click Change Action to modify the program used to open them</string>
</property>

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: viewaction_w.cpp,v 1.3 2006-12-18 16:45:52 dockes Exp $ (C) 2006 J.F.Dockes";
static char rcsid[] = "@(#$Id: viewaction_w.cpp,v 1.4 2006-12-19 12:11:21 dockes Exp $ (C) 2006 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -37,6 +37,7 @@ using namespace std;
#include <qmessagebox.h>
#include <qinputdialog.h>
#include <qlayout.h>
#include "recoll.h"
#include "debuglog.h"
@ -55,13 +56,6 @@ void ViewAction::init()
SIGNAL(doubleClicked(Q3ListViewItem *, const QPoint &, int)),
#endif
this, SLOT(editAction()));
// Note: could get the column width setting to work in qt4
actionsLV->setColumnWidthMode(0, QListView::Manual);
actionsLV->setColumnWidth(0, 300);
actionsLV->setColumnWidthMode(1, QListView::Manual);
actionsLV->setColumnWidth(1, 120);
fillLists();
resize(QSize(450, 250).expandedTo(minimumSizeHint()) );
}

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.100 2006-12-07 13:24:19 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.101 2006-12-19 12:11:21 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -1285,6 +1285,42 @@ bool Db::setQuery(RefCntr<SearchData> sdata, int opts,
return true;
}
class TermMatchCmpByWcf {
public:
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
return r.wcf - l.wcf < 0;
}
};
class TermMatchCmpByTerm {
public:
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
return l.term.compare(r.term) > 0;
}
};
class TermMatchTermEqual {
public:
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
return !l.term.compare(r.term);
}
};
bool Db::stemExpand(const string &lang, const string &term,
list<TermMatchEntry>& result, int max)
{
list<string> dirs = m_extraDbs;
dirs.push_front(m_basedir);
for (list<string>::iterator it = dirs.begin();
it != dirs.end(); it++) {
list<string> more;
StemDb::stemExpand(*it, lang, term, more);
LOGDEB1(("Db::stemExpand: Got %d from %s\n",
more.size(), it->c_str()));
result.insert(result.end(), more.begin(), more.end());
}
LOGDEB1(("Db:::stemExpand: final count %d \n", result.size()));
return true;
}
// Characters that can begin a wildcard or regexp expression. We use skipto
// to begin the allterms search with terms that begin with the portion of
// the input string prior to these chars.
@ -1292,85 +1328,97 @@ const string wildSpecChars = "*?[";
const string regSpecChars = "(.[{";
// Find all index terms that match a wildcard or regular expression
bool Db::termMatch(MatchType typ, const string &root, list<string>& res,
const string &lang, int max)
bool Db::termMatch(MatchType typ, const string &lang,
const string &root,
list<TermMatchEntry>& res,
int max)
{
if (!m_ndb || !m_ndb->m_isopen)
return false;
Xapian::Database db = m_ndb->m_iswritable ? m_ndb->wdb: m_ndb->db;
res.clear();
// Get rid of capitals and accents
string droot;
dumb_string(root, droot);
string nochars = typ == ET_WILD ? wildSpecChars : regSpecChars;
regex_t reg;
int errcode;
// Compile regexp. We anchor the input by enclosing it in ^ and $
if (typ == ET_REGEXP) {
string mroot = droot;
if (mroot.at(0) != '^')
mroot = string("^") + mroot;
if (mroot.at(mroot.length()-1) != '$')
mroot += "$";
if ((errcode = regcomp(&reg, mroot.c_str(), REG_EXTENDED|REG_NOSUB))) {
char errbuf[200];
regerror(errcode, &reg, errbuf, 199);
LOGERR(("termMatch: regcomp failed: %s\n", errbuf));
res.push_back(errbuf);
regfree(&reg);
if (typ == ET_STEM) {
if (!stemExpand(lang, root, res, max))
return false;
for (list<TermMatchEntry>::iterator it = res.begin();
it != res.end(); it++) {
it->wcf = db.get_collection_freq(it->term);
LOGDEB(("termMatch: %d [%s]\n", it->wcf, it->term.c_str()));
}
}
// Find the initial section before any special char
string::size_type es = droot.find_first_of(nochars);
string is;
switch (es) {
case string::npos: is = droot;break;
case 0: break;
default: is = droot.substr(0, es);break;
}
LOGDEB(("termMatch: initsec: [%s]\n", is.c_str()));
Xapian::TermIterator it = db.allterms_begin();
if (!is.empty())
it.skip_to(is.c_str());
for (int n = 0;it != db.allterms_end(); it++) {
// If we're beyond the terms matching the initial string, end
if (!is.empty() && (*it).find(is) != 0)
break;
// Don't match special internal terms beginning with uppercase ascii
if ((*it).at(0) >= 'A' && (*it).at(0) <= 'Z')
continue;
if (typ == ET_WILD) {
if (fnmatch(droot.c_str(), (*it).c_str(), 0) == FNM_NOMATCH)
continue;
} else {
if (regexec(&reg, (*it).c_str(), 0, 0, 0))
continue;
}
// Do we want stem expansion here? We don't do it for now
if (1 || lang.empty()) {
res.push_back(*it);
++n;
} else {
list<string> stemexps = stemExpand(lang, *it);
unsigned int cnt =
(int)stemexps.size() > max - n ? max - n : stemexps.size();
list<string>::iterator sit = stemexps.begin();
while (cnt--) {
res.push_back(*sit++);
n++;
} else {
regex_t reg;
int errcode;
if (typ == ET_REGEXP) {
// Compile regexp. We anchor the input by enclosing it in ^ and $
string mroot = droot;
if (mroot.at(0) != '^')
mroot = string("^") + mroot;
if (mroot.at(mroot.length()-1) != '$')
mroot += "$";
if ((errcode = regcomp(&reg, mroot.c_str(),
REG_EXTENDED|REG_NOSUB))) {
char errbuf[200];
regerror(errcode, &reg, errbuf, 199);
LOGERR(("termMatch: regcomp failed: %s\n", errbuf));
res.push_back(string(errbuf));
regfree(&reg);
return false;
}
}
if (n >= max)
break;
// Find the initial section before any special char
string::size_type es = droot.find_first_of(nochars);
string is;
switch (es) {
case string::npos: is = droot;break;
case 0: break;
default: is = droot.substr(0, es);break;
}
LOGDEB(("termMatch: initsec: [%s]\n", is.c_str()));
Xapian::TermIterator it = db.allterms_begin();
if (!is.empty())
it.skip_to(is.c_str());
for (int n = 0;it != db.allterms_end(); it++) {
// If we're beyond the terms matching the initial string, end
if (!is.empty() && (*it).find(is) != 0)
break;
// Don't match special internal terms beginning with uppercase ascii
if ((*it).at(0) >= 'A' && (*it).at(0) <= 'Z')
continue;
if (typ == ET_WILD) {
if (fnmatch(droot.c_str(), (*it).c_str(), 0) == FNM_NOMATCH)
continue;
} else {
if (regexec(&reg, (*it).c_str(), 0, 0, 0))
continue;
}
// Do we want stem expansion here? We don't do it for now
res.push_back(TermMatchEntry(*it, it.get_termfreq()));
++n;
}
if (typ == ET_REGEXP) {
regfree(&reg);
}
}
res.sort();
res.unique();
if (typ == ET_REGEXP) {
regfree(&reg);
TermMatchCmpByTerm tcmp;
res.sort(tcmp);
TermMatchTermEqual teq;
res.unique(teq);
TermMatchCmpByWcf wcmp;
res.sort(wcmp);
if (max > 0) {
res.resize(MIN(res.size(), (unsigned int)max));
}
return true;
}
@ -1417,23 +1465,6 @@ bool Db::termExists(const string& word)
return true;
}
list<string> Db::stemExpand(const string& lang, const string& term)
{
list<string> dirs = m_extraDbs;
dirs.push_front(m_basedir);
list<string> exp;
for (list<string>::iterator it = dirs.begin();
it != dirs.end(); it++) {
list<string> more = StemDb::stemExpand(*it, lang, term);
LOGDEB1(("Db::stemExpand: Got %d from %s\n",
more.size(), it->c_str()));
exp.splice(exp.end(), more);
}
exp.sort();
exp.unique();
LOGDEB1(("Db:::stemExpand: final count %d \n", exp.size()));
return exp;
}
bool Db::stemDiffers(const string& lang, const string& word,
const string& base)

View File

@ -16,7 +16,7 @@
*/
#ifndef _DB_H_INCLUDED_
#define _DB_H_INCLUDED_
/* @(#$Id: rcldb.h,v 1.44 2006-12-14 14:54:13 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: rcldb.h,v 1.45 2006-12-19 12:11:21 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include <list>
@ -53,7 +53,16 @@ namespace Rcl {
class SearchData;
class Native;
class TermIter;
class TermMatchEntry {
public:
TermMatchEntry() : wcf(0) {}
TermMatchEntry(const string&t, int f) : term(t), wcf(f) {}
TermMatchEntry(const string&t) : term(t), wcf(0) {}
string term;
int wcf; // Within collection frequency
};
/**
* Wrapper class for the native database.
*/
@ -109,9 +118,9 @@ class Db {
/** Return a list of index terms that match the input string
* Expansion is performed either with either wildcard or regexp processing
* Stem expansion is performed if lang is not empty */
enum MatchType {ET_WILD, ET_REGEXP};
bool termMatch(MatchType typ, const string &s, list<string>& result,
const string &lang, int max=20);
enum MatchType {ET_WILD, ET_REGEXP, ET_STEM};
bool termMatch(MatchType typ, const string &lang, const string &s,
list<TermMatchEntry>& result, int max = -1);
/** Add extra database for querying */
bool addQueryDb(const string &dir);
@ -159,12 +168,11 @@ class Db {
bool stemDiffers(const string& lang, const string& term,
const string& base);
/** Perform stem expansion across all dbs configured for searching */
list<string> stemExpand(const string& lang, const string& term);
/** Filename wildcard expansion */
bool filenameWildExp(const string& exp, list<string>& names);
string getReason(){return m_reason;}
private:
string m_filterTopDir; // Current query filter on subtree top directory
@ -201,6 +209,8 @@ private:
vector<bool> updated;
bool reOpen(); // Close/open, same mode/opts
bool stemExpand(const string &lang, const string &s,
list<TermMatchEntry>& result, int max = -1);
/* Copyconst and assignemt private and forbidden */
Db(const Db &) {}

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.6 2006-11-30 13:38:44 dockes Exp $ (C) 2006 J.F.Dockes";
static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.7 2006-12-19 12:11:21 dockes Exp $ (C) 2006 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -214,7 +214,12 @@ void StringToXapianQ::maybeStemExp(bool nostemexp,
if (nostemexp) {
exp = list<string>(1, term1);
} else {
exp = m_db.stemExpand(m_stemlang, term1);
list<TermMatchEntry> l;
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term1, l);
for (list<TermMatchEntry>::const_iterator it = l.begin();
it != l.end(); it++) {
exp.push_back(it->term);
}
}
}

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: stemdb.cpp,v 1.5 2006-10-09 16:37:08 dockes Exp $ (C) 2005 J.F.Dockes";
static char rcsid[] = "@(#$Id: stemdb.cpp,v 1.6 2006-12-19 12:11:21 dockes Exp $ (C) 2005 J.F.Dockes";
#endif
/**
@ -206,13 +206,24 @@ bool createDb(Xapian::Database& xdb, const string& dbdir, const string& lang)
return true;
}
static string stringlistdisp(const list<string>& sl)
{
string s;
for (list<string>::const_iterator it = sl.begin(); it!= sl.end(); it++)
s += "[" + *it + "] ";
if (!s.empty())
s.erase(s.length()-1);
return s;
}
/**
* Expand term to list of all terms which stem to the same term.
*/
list<string> stemExpand(const string& dbdir, const string& lang,
const string& term)
bool stemExpand(const std::string& dbdir,
const std::string& lang,
const std::string& term,
list<string>& result)
{
list<string> explist;
try {
Xapian::Stem stemmer(lang);
string stem = stemmer.stem_word(term);
@ -224,14 +235,14 @@ list<string> stemExpand(const string& dbdir, const string& lang,
stemdbdir.c_str(), sdb.get_lastdocid()));
if (!sdb.term_exists(stem)) {
LOGDEB1(("Db::stemExpand: no term for %s\n", stem.c_str()));
explist.push_back(term);
return explist;
result.push_back(term);
return true;
}
Xapian::PostingIterator did = sdb.postlist_begin(stem);
if (did == sdb.postlist_end(stem)) {
LOGDEB1(("stemExpand: no term(1) for %s\n",stem.c_str()));
explist.push_back(term);
return explist;
result.push_back(term);
return true;
}
Xapian::Document doc = sdb.get_document(*did);
string data = doc.get_data();
@ -242,24 +253,24 @@ list<string> stemExpand(const string& dbdir, const string& lang,
++pos;
string::size_type pos1 = data.find_last_of("\n");
if (pos == string::npos || pos1 == string::npos ||pos1 <= pos) { // ??
explist.push_back(term);
return explist;
result.push_back(term);
return true;
}
stringToStrings(data.substr(pos, pos1-pos), explist);
stringToStrings(data.substr(pos, pos1-pos), result);
// If the user term itself is not in the list, add it.
if (find(explist.begin(), explist.end(), term) == explist.end()) {
explist.push_back(term);
if (find(result.begin(), result.end(), term) == result.end()) {
result.push_back(term);
}
LOGDEB(("stemExpand: %s -> %s\n", stem.c_str(),
stringlistdisp(explist).c_str()));
stringlistdisp(result).c_str()));
} catch (...) {
LOGERR(("stemExpand: error accessing stem db. dbdir [%s] lang [%s]\n",
dbdir.c_str(), lang.c_str()));
explist.push_back(term);
return explist;
result.push_back(term);
return false;
}
return explist;
return true;
}
}

View File

@ -1,6 +1,6 @@
#ifndef _STEMDB_H_INCLUDED_
#define _STEMDB_H_INCLUDED_
/* @(#$Id: stemdb.h,v 1.2 2006-11-15 14:57:53 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: stemdb.h,v 1.3 2006-12-19 12:11:21 dockes Exp $ (C) 2004 J.F.Dockes */
/// Stem database code
///
/// Stem databases list stems and the set of index terms they expand to. They
@ -13,6 +13,7 @@
#include <string>
#include <xapian.h>
#ifndef NO_NAMESPACES
using std::string;
using std::list;
@ -28,10 +29,10 @@ extern bool deleteDb(const std::string& dbdir, const std::string& lang);
extern bool createDb(Xapian::Database& xdb,
const std::string& dbdir, const std::string& lang);
/// Expand term to stem siblings
extern std::list<std::string> stemExpand(const std::string& dbdir,
const std::string& lang,
const std::string& term);
extern bool stemExpand(const std::string& dbdir,
const std::string& lang,
const std::string& term,
list<string>& result);
#ifndef NO_NAMESPACES
}
}

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: smallut.cpp,v 1.24 2006-12-18 12:06:11 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: smallut.cpp,v 1.25 2006-12-19 12:11:21 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -38,16 +38,6 @@ using namespace std;
#define MIN(A,B) ((A)<(B)?(A):(B))
string stringlistdisp(const list<string>& sl)
{
string s;
for (list<string>::const_iterator it = sl.begin(); it!= sl.end(); it++)
s += "[" + *it + "] ";
if (!s.empty())
s.erase(s.length()-1);
return s;
}
int stringicmp(const string & s1, const string& s2)
{
string::const_iterator it1 = s1.begin();

View File

@ -16,7 +16,7 @@
*/
#ifndef _SMALLUT_H_INCLUDED_
#define _SMALLUT_H_INCLUDED_
/* @(#$Id: smallut.h,v 1.24 2006-12-18 12:06:11 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: smallut.h,v 1.25 2006-12-19 12:11:21 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include <list>
#include <map>
@ -38,8 +38,6 @@ extern int stringisuffcmp(const string& s1, const string& s2);
// Compare charset names, removing the more common spelling variations
extern bool samecharset(const string &cs1, const string &cs2);
extern string stringlistdisp(const list<string>& strs);
/**
* Parse input string into list of strings.
*