merge stemExpand into termExpand. return term frequencies from there and display in spellW
This commit is contained in:
parent
50b01c6ea4
commit
be05eaa6e0
@ -106,30 +106,40 @@
|
||||
</widget>
|
||||
</grid>
|
||||
</widget>
|
||||
<widget class="QTextEdit">
|
||||
<widget class="QListView">
|
||||
<column>
|
||||
<property name="text">
|
||||
<string>Term</string>
|
||||
</property>
|
||||
<property name="clickable">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="resizable">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
</column>
|
||||
<column>
|
||||
<property name="text">
|
||||
<string>Count</string>
|
||||
</property>
|
||||
<property name="clickable">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="resizable">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
</column>
|
||||
<property name="name">
|
||||
<cstring>suggsTE</cstring>
|
||||
<cstring>suggsLV</cstring>
|
||||
</property>
|
||||
<property name="minimumSize">
|
||||
<size>
|
||||
<width>0</width>
|
||||
<height>200</height>
|
||||
</size>
|
||||
<property name="selectionMode">
|
||||
<enum>Extended</enum>
|
||||
</property>
|
||||
<property name="focusPolicy">
|
||||
<enum>TabFocus</enum>
|
||||
</property>
|
||||
<property name="textFormat">
|
||||
<enum>PlainText</enum>
|
||||
</property>
|
||||
<property name="readOnly">
|
||||
<property name="showSortIndicator">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="undoRedoEnabled">
|
||||
<bool>false</bool>
|
||||
</property>
|
||||
<property name="tabChangesFocus">
|
||||
<bool>true</bool>
|
||||
<property name="resizeMode">
|
||||
<enum>NoColumn</enum>
|
||||
</property>
|
||||
</widget>
|
||||
</vbox>
|
||||
@ -139,7 +149,6 @@
|
||||
<tabstops>
|
||||
<tabstop>baseWordLE</tabstop>
|
||||
<tabstop>expandPB</tabstop>
|
||||
<tabstop>suggsTE</tabstop>
|
||||
<tabstop>dismissPB</tabstop>
|
||||
<tabstop>expTypeCMB</tabstop>
|
||||
<tabstop>stemLangCMB</tabstop>
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: spell_w.cpp,v 1.7 2006-11-30 13:38:44 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: spell_w.cpp,v 1.8 2006-12-19 12:11:21 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -25,17 +25,22 @@ static char rcsid[] = "@(#$Id: spell_w.cpp,v 1.7 2006-11-30 13:38:44 dockes Exp
|
||||
|
||||
#include <qmessagebox.h>
|
||||
#include <qpushbutton.h>
|
||||
#include <qtextedit.h>
|
||||
#include <qlabel.h>
|
||||
#include <qlineedit.h>
|
||||
#include <qlayout.h>
|
||||
#include <qtooltip.h>
|
||||
#include <qcombobox.h>
|
||||
#if (QT_VERSION < 0x040000)
|
||||
#include <qlistview.h>
|
||||
#else
|
||||
#include <q3listview.h>
|
||||
#endif
|
||||
|
||||
#include "debuglog.h"
|
||||
#include "recoll.h"
|
||||
#include "spell_w.h"
|
||||
#include "guiutils.h"
|
||||
#include "rcldb.h"
|
||||
|
||||
#ifdef RCL_USE_ASPELL
|
||||
#include "rclaspell.h"
|
||||
@ -79,16 +84,47 @@ void SpellW::init()
|
||||
connect(baseWordLE, SIGNAL(returnPressed()), this, SLOT(doExpand()));
|
||||
connect(expandPB, SIGNAL(clicked()), this, SLOT(doExpand()));
|
||||
connect(dismissPB, SIGNAL(clicked()), this, SLOT(close()));
|
||||
connect(suggsTE, SIGNAL(doubleClicked(int, int)),
|
||||
this, SLOT(textDoubleClicked(int, int)));
|
||||
|
||||
connect(suggsLV,
|
||||
#if (QT_VERSION < 0x040000)
|
||||
SIGNAL(doubleClicked(QListViewItem *, const QPoint &, int)),
|
||||
#else
|
||||
SIGNAL(doubleClicked(Q3ListViewItem *, const QPoint &, int)),
|
||||
#endif
|
||||
this, SLOT(textDoubleClicked()));
|
||||
|
||||
connect(expTypeCMB, SIGNAL(activated(int)),
|
||||
this, SLOT(modeSet(int)));
|
||||
|
||||
suggsLV->setColumnWidth(0, 200);
|
||||
suggsLV->setColumnWidth(1, 100);
|
||||
// No initial sorting: user can choose to establish one
|
||||
suggsLV->setSorting(100, false);
|
||||
}
|
||||
|
||||
// Subclass qlistviewitem for numeric sorting on column 1
|
||||
class MyListViewItem : public QListViewItem
|
||||
{
|
||||
public:
|
||||
MyListViewItem(QListView *listView, const QString& s1, const QString& s2)
|
||||
: QListViewItem(listView, s1, s2)
|
||||
{ }
|
||||
|
||||
int compare(QListViewItem * i, int col, bool ascending) const {
|
||||
if (col == 0)
|
||||
return i->text(0).compare(text(0));
|
||||
if (col == 1)
|
||||
return i->text(1).toInt() - text(1).toInt();
|
||||
// ??
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/* Expand term according to current mode */
|
||||
void SpellW::doExpand()
|
||||
{
|
||||
suggsTE->clear();
|
||||
suggsLV->clear();
|
||||
if (baseWordLE->text().isEmpty())
|
||||
return;
|
||||
|
||||
@ -100,26 +136,27 @@ void SpellW::doExpand()
|
||||
|
||||
string expr = string((const char *)baseWordLE->text().utf8());
|
||||
list<string> suggs;
|
||||
|
||||
prefs.termMatchType = expTypeCMB->currentItem();
|
||||
|
||||
Rcl::Db::MatchType mt = Rcl::Db::ET_WILD;
|
||||
switch(expTypeCMB->currentItem()) {
|
||||
case 0: mt = Rcl::Db::ET_WILD; break;
|
||||
case 1:mt = Rcl::Db::ET_REGEXP; break;
|
||||
case 2:mt = Rcl::Db::ET_STEM; break;
|
||||
}
|
||||
|
||||
list<Rcl::TermMatchEntry> entries;
|
||||
switch (expTypeCMB->currentItem()) {
|
||||
case 1: mt = Rcl::Db::ET_REGEXP;
|
||||
/* FALLTHROUGH */
|
||||
case 0:
|
||||
if (!rcldb->termMatch(mt, expr, suggs, prefs.queryStemLang.ascii(),
|
||||
case 1:
|
||||
case 2: {
|
||||
if (!rcldb->termMatch(mt, prefs.queryStemLang.ascii(), expr, entries,
|
||||
200)) {
|
||||
LOGERR(("SpellW::doExpand:rcldb::termMatch failed\n"));
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case 2:
|
||||
{
|
||||
string stemlang = (const char *)stemLangCMB->currentText().utf8();
|
||||
suggs = rcldb->stemExpand(stemlang,expr);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
#ifdef RCL_USE_ASPELL
|
||||
@ -132,24 +169,37 @@ void SpellW::doExpand()
|
||||
LOGDEB(("SpellW::doExpand: aspell init error\n"));
|
||||
return;
|
||||
}
|
||||
list<string> suggs;
|
||||
if (!aspell->suggest(*rcldb, expr, suggs, reason)) {
|
||||
QMessageBox::warning(0, "Recoll",
|
||||
tr("Aspell expansion error. "));
|
||||
LOGERR(("SpellW::doExpand:suggest failed: %s\n", reason.c_str()));
|
||||
}
|
||||
for (list<string>::const_iterator it = suggs.begin();
|
||||
it != suggs.end(); it++)
|
||||
entries.push_back(Rcl::TermMatchEntry(*it));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (suggs.empty()) {
|
||||
suggsTE->append(tr("No expansion found"));
|
||||
|
||||
if (entries.empty()) {
|
||||
new MyListViewItem(suggsLV, tr("No expansion found"), "");
|
||||
} else {
|
||||
for (list<string>::iterator it = suggs.begin();
|
||||
it != suggs.end(); it++) {
|
||||
suggsTE->append(QString::fromUtf8(it->c_str()));
|
||||
// Seems that need to use a reverse iterator to get same order in
|
||||
// listview and input list ??
|
||||
for (list<Rcl::TermMatchEntry>::reverse_iterator it = entries.rbegin();
|
||||
it != entries.rend(); it++) {
|
||||
LOGDEB(("SpellW::expand: %6d [%s]\n", it->wcf, it->term.c_str()));
|
||||
char num[20];
|
||||
if (it->wcf)
|
||||
sprintf(num, "%d", it->wcf);
|
||||
else
|
||||
num[0] = 0;
|
||||
new MyListViewItem(suggsLV,
|
||||
QString::fromUtf8(it->term.c_str()),
|
||||
QString::fromAscii(num));
|
||||
}
|
||||
suggsTE->setCursorPosition(0,0);
|
||||
suggsTE->ensureCursorVisible();
|
||||
}
|
||||
}
|
||||
|
||||
@ -157,17 +207,24 @@ void SpellW::wordChanged(const QString &text)
|
||||
{
|
||||
if (text.isEmpty()) {
|
||||
expandPB->setEnabled(false);
|
||||
suggsTE->clear();
|
||||
suggsLV->clear();
|
||||
} else {
|
||||
expandPB->setEnabled(true);
|
||||
}
|
||||
}
|
||||
|
||||
void SpellW::textDoubleClicked(int para, int)
|
||||
void SpellW::textDoubleClicked()
|
||||
{
|
||||
suggsTE->setSelection(para, 0, para, 1000);
|
||||
if (suggsTE->hasSelectedText())
|
||||
emit(wordSelect(suggsTE->selectedText()));
|
||||
QListViewItemIterator it(suggsLV);
|
||||
while (it.current()) {
|
||||
QListViewItem *item = it.current();
|
||||
if (!item->isSelected()) {
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
emit(wordSelect((const char *)item->text(0)));
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
void SpellW::modeSet(int mode)
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#ifndef _ASPELL_W_H_INCLUDED_
|
||||
#define _ASPELL_W_H_INCLUDED_
|
||||
/* @(#$Id: spell_w.h,v 1.5 2006-12-04 09:56:26 dockes Exp $ (C) 2006 J.F.Dockes */
|
||||
/* @(#$Id: spell_w.h,v 1.6 2006-12-19 12:11:21 dockes Exp $ (C) 2006 J.F.Dockes */
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@ -54,7 +54,7 @@ public:
|
||||
public slots:
|
||||
virtual void doExpand();
|
||||
virtual void wordChanged(const QString&);
|
||||
virtual void textDoubleClicked(int, int);
|
||||
virtual void textDoubleClicked();
|
||||
virtual void modeSet(int);
|
||||
|
||||
signals:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.16 2006-12-14 13:53:43 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.17 2006-12-19 12:11:21 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -169,11 +169,9 @@ void SSearch::completion()
|
||||
|
||||
// Query database
|
||||
const int max = 100;
|
||||
list<string> strs;
|
||||
|
||||
if (!rcldb->termMatch(Rcl::Db::ET_WILD, s, strs,
|
||||
prefs.queryStemLang.ascii(),max)
|
||||
|| strs.size() == 0) {
|
||||
list<Rcl::TermMatchEntry> strs;
|
||||
if (!rcldb->termMatch(Rcl::Db::ET_WILD, prefs.queryStemLang.ascii(),
|
||||
s, strs, max) || strs.size() == 0) {
|
||||
QApplication::beep();
|
||||
return;
|
||||
}
|
||||
@ -186,12 +184,14 @@ void SSearch::completion()
|
||||
QString res;
|
||||
bool ok = false;
|
||||
if (strs.size() == 1) {
|
||||
res = QString::fromUtf8(strs.begin()->c_str());
|
||||
res = QString::fromUtf8(strs.begin()->term.c_str());
|
||||
ok = true;
|
||||
} else {
|
||||
QStringList lst;
|
||||
for (list<string>::iterator it=strs.begin(); it != strs.end(); it++)
|
||||
lst.push_back(QString::fromUtf8(it->c_str()));
|
||||
for (list<Rcl::TermMatchEntry>::iterator it=strs.begin();
|
||||
it != strs.end(); it++) {
|
||||
lst.push_back(QString::fromUtf8(it->term.c_str()));
|
||||
}
|
||||
res = QInputDialog::getItem(tr("Completions"),
|
||||
tr("Select an item:"), lst, 0,
|
||||
FALSE, &ok, this);
|
||||
|
||||
@ -59,9 +59,6 @@
|
||||
<property name="frameShadow">
|
||||
<enum>Sunken</enum>
|
||||
</property>
|
||||
<property name="resizePolicy">
|
||||
<enum>Manual</enum>
|
||||
</property>
|
||||
<property name="selectionMode">
|
||||
<enum>Extended</enum>
|
||||
</property>
|
||||
@ -71,9 +68,6 @@
|
||||
<property name="showSortIndicator">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="resizeMode">
|
||||
<enum>LastColumn</enum>
|
||||
</property>
|
||||
<property name="toolTip" stdset="0">
|
||||
<string>Select one or several file types, then click Change Action to modify the program used to open them</string>
|
||||
</property>
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: viewaction_w.cpp,v 1.3 2006-12-18 16:45:52 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: viewaction_w.cpp,v 1.4 2006-12-19 12:11:21 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -37,6 +37,7 @@ using namespace std;
|
||||
|
||||
#include <qmessagebox.h>
|
||||
#include <qinputdialog.h>
|
||||
#include <qlayout.h>
|
||||
|
||||
#include "recoll.h"
|
||||
#include "debuglog.h"
|
||||
@ -55,13 +56,6 @@ void ViewAction::init()
|
||||
SIGNAL(doubleClicked(Q3ListViewItem *, const QPoint &, int)),
|
||||
#endif
|
||||
this, SLOT(editAction()));
|
||||
|
||||
// Note: could get the column width setting to work in qt4
|
||||
actionsLV->setColumnWidthMode(0, QListView::Manual);
|
||||
actionsLV->setColumnWidth(0, 300);
|
||||
actionsLV->setColumnWidthMode(1, QListView::Manual);
|
||||
actionsLV->setColumnWidth(1, 120);
|
||||
|
||||
fillLists();
|
||||
resize(QSize(450, 250).expandedTo(minimumSizeHint()) );
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.100 2006-12-07 13:24:19 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.101 2006-12-19 12:11:21 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -1285,6 +1285,42 @@ bool Db::setQuery(RefCntr<SearchData> sdata, int opts,
|
||||
return true;
|
||||
}
|
||||
|
||||
class TermMatchCmpByWcf {
|
||||
public:
|
||||
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
|
||||
return r.wcf - l.wcf < 0;
|
||||
}
|
||||
};
|
||||
class TermMatchCmpByTerm {
|
||||
public:
|
||||
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
|
||||
return l.term.compare(r.term) > 0;
|
||||
}
|
||||
};
|
||||
class TermMatchTermEqual {
|
||||
public:
|
||||
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
|
||||
return !l.term.compare(r.term);
|
||||
}
|
||||
};
|
||||
|
||||
bool Db::stemExpand(const string &lang, const string &term,
|
||||
list<TermMatchEntry>& result, int max)
|
||||
{
|
||||
list<string> dirs = m_extraDbs;
|
||||
dirs.push_front(m_basedir);
|
||||
for (list<string>::iterator it = dirs.begin();
|
||||
it != dirs.end(); it++) {
|
||||
list<string> more;
|
||||
StemDb::stemExpand(*it, lang, term, more);
|
||||
LOGDEB1(("Db::stemExpand: Got %d from %s\n",
|
||||
more.size(), it->c_str()));
|
||||
result.insert(result.end(), more.begin(), more.end());
|
||||
}
|
||||
LOGDEB1(("Db:::stemExpand: final count %d \n", result.size()));
|
||||
return true;
|
||||
}
|
||||
|
||||
// Characters that can begin a wildcard or regexp expression. We use skipto
|
||||
// to begin the allterms search with terms that begin with the portion of
|
||||
// the input string prior to these chars.
|
||||
@ -1292,85 +1328,97 @@ const string wildSpecChars = "*?[";
|
||||
const string regSpecChars = "(.[{";
|
||||
|
||||
// Find all index terms that match a wildcard or regular expression
|
||||
bool Db::termMatch(MatchType typ, const string &root, list<string>& res,
|
||||
const string &lang, int max)
|
||||
bool Db::termMatch(MatchType typ, const string &lang,
|
||||
const string &root,
|
||||
list<TermMatchEntry>& res,
|
||||
int max)
|
||||
{
|
||||
if (!m_ndb || !m_ndb->m_isopen)
|
||||
return false;
|
||||
|
||||
Xapian::Database db = m_ndb->m_iswritable ? m_ndb->wdb: m_ndb->db;
|
||||
|
||||
res.clear();
|
||||
|
||||
// Get rid of capitals and accents
|
||||
string droot;
|
||||
dumb_string(root, droot);
|
||||
string nochars = typ == ET_WILD ? wildSpecChars : regSpecChars;
|
||||
|
||||
regex_t reg;
|
||||
int errcode;
|
||||
// Compile regexp. We anchor the input by enclosing it in ^ and $
|
||||
if (typ == ET_REGEXP) {
|
||||
string mroot = droot;
|
||||
if (mroot.at(0) != '^')
|
||||
mroot = string("^") + mroot;
|
||||
if (mroot.at(mroot.length()-1) != '$')
|
||||
mroot += "$";
|
||||
if ((errcode = regcomp(®, mroot.c_str(), REG_EXTENDED|REG_NOSUB))) {
|
||||
char errbuf[200];
|
||||
regerror(errcode, ®, errbuf, 199);
|
||||
LOGERR(("termMatch: regcomp failed: %s\n", errbuf));
|
||||
res.push_back(errbuf);
|
||||
regfree(®);
|
||||
if (typ == ET_STEM) {
|
||||
if (!stemExpand(lang, root, res, max))
|
||||
return false;
|
||||
for (list<TermMatchEntry>::iterator it = res.begin();
|
||||
it != res.end(); it++) {
|
||||
it->wcf = db.get_collection_freq(it->term);
|
||||
LOGDEB(("termMatch: %d [%s]\n", it->wcf, it->term.c_str()));
|
||||
}
|
||||
}
|
||||
|
||||
// Find the initial section before any special char
|
||||
string::size_type es = droot.find_first_of(nochars);
|
||||
string is;
|
||||
switch (es) {
|
||||
case string::npos: is = droot;break;
|
||||
case 0: break;
|
||||
default: is = droot.substr(0, es);break;
|
||||
}
|
||||
LOGDEB(("termMatch: initsec: [%s]\n", is.c_str()));
|
||||
|
||||
Xapian::TermIterator it = db.allterms_begin();
|
||||
if (!is.empty())
|
||||
it.skip_to(is.c_str());
|
||||
for (int n = 0;it != db.allterms_end(); it++) {
|
||||
// If we're beyond the terms matching the initial string, end
|
||||
if (!is.empty() && (*it).find(is) != 0)
|
||||
break;
|
||||
// Don't match special internal terms beginning with uppercase ascii
|
||||
if ((*it).at(0) >= 'A' && (*it).at(0) <= 'Z')
|
||||
continue;
|
||||
if (typ == ET_WILD) {
|
||||
if (fnmatch(droot.c_str(), (*it).c_str(), 0) == FNM_NOMATCH)
|
||||
continue;
|
||||
} else {
|
||||
if (regexec(®, (*it).c_str(), 0, 0, 0))
|
||||
continue;
|
||||
}
|
||||
// Do we want stem expansion here? We don't do it for now
|
||||
if (1 || lang.empty()) {
|
||||
res.push_back(*it);
|
||||
++n;
|
||||
} else {
|
||||
list<string> stemexps = stemExpand(lang, *it);
|
||||
unsigned int cnt =
|
||||
(int)stemexps.size() > max - n ? max - n : stemexps.size();
|
||||
list<string>::iterator sit = stemexps.begin();
|
||||
while (cnt--) {
|
||||
res.push_back(*sit++);
|
||||
n++;
|
||||
} else {
|
||||
regex_t reg;
|
||||
int errcode;
|
||||
if (typ == ET_REGEXP) {
|
||||
// Compile regexp. We anchor the input by enclosing it in ^ and $
|
||||
string mroot = droot;
|
||||
if (mroot.at(0) != '^')
|
||||
mroot = string("^") + mroot;
|
||||
if (mroot.at(mroot.length()-1) != '$')
|
||||
mroot += "$";
|
||||
if ((errcode = regcomp(®, mroot.c_str(),
|
||||
REG_EXTENDED|REG_NOSUB))) {
|
||||
char errbuf[200];
|
||||
regerror(errcode, ®, errbuf, 199);
|
||||
LOGERR(("termMatch: regcomp failed: %s\n", errbuf));
|
||||
res.push_back(string(errbuf));
|
||||
regfree(®);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (n >= max)
|
||||
break;
|
||||
|
||||
// Find the initial section before any special char
|
||||
string::size_type es = droot.find_first_of(nochars);
|
||||
string is;
|
||||
switch (es) {
|
||||
case string::npos: is = droot;break;
|
||||
case 0: break;
|
||||
default: is = droot.substr(0, es);break;
|
||||
}
|
||||
LOGDEB(("termMatch: initsec: [%s]\n", is.c_str()));
|
||||
|
||||
Xapian::TermIterator it = db.allterms_begin();
|
||||
if (!is.empty())
|
||||
it.skip_to(is.c_str());
|
||||
for (int n = 0;it != db.allterms_end(); it++) {
|
||||
// If we're beyond the terms matching the initial string, end
|
||||
if (!is.empty() && (*it).find(is) != 0)
|
||||
break;
|
||||
// Don't match special internal terms beginning with uppercase ascii
|
||||
if ((*it).at(0) >= 'A' && (*it).at(0) <= 'Z')
|
||||
continue;
|
||||
if (typ == ET_WILD) {
|
||||
if (fnmatch(droot.c_str(), (*it).c_str(), 0) == FNM_NOMATCH)
|
||||
continue;
|
||||
} else {
|
||||
if (regexec(®, (*it).c_str(), 0, 0, 0))
|
||||
continue;
|
||||
}
|
||||
// Do we want stem expansion here? We don't do it for now
|
||||
res.push_back(TermMatchEntry(*it, it.get_termfreq()));
|
||||
++n;
|
||||
}
|
||||
if (typ == ET_REGEXP) {
|
||||
regfree(®);
|
||||
}
|
||||
|
||||
}
|
||||
res.sort();
|
||||
res.unique();
|
||||
if (typ == ET_REGEXP) {
|
||||
regfree(®);
|
||||
|
||||
TermMatchCmpByTerm tcmp;
|
||||
res.sort(tcmp);
|
||||
TermMatchTermEqual teq;
|
||||
res.unique(teq);
|
||||
TermMatchCmpByWcf wcmp;
|
||||
res.sort(wcmp);
|
||||
if (max > 0) {
|
||||
res.resize(MIN(res.size(), (unsigned int)max));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -1417,23 +1465,6 @@ bool Db::termExists(const string& word)
|
||||
return true;
|
||||
}
|
||||
|
||||
list<string> Db::stemExpand(const string& lang, const string& term)
|
||||
{
|
||||
list<string> dirs = m_extraDbs;
|
||||
dirs.push_front(m_basedir);
|
||||
list<string> exp;
|
||||
for (list<string>::iterator it = dirs.begin();
|
||||
it != dirs.end(); it++) {
|
||||
list<string> more = StemDb::stemExpand(*it, lang, term);
|
||||
LOGDEB1(("Db::stemExpand: Got %d from %s\n",
|
||||
more.size(), it->c_str()));
|
||||
exp.splice(exp.end(), more);
|
||||
}
|
||||
exp.sort();
|
||||
exp.unique();
|
||||
LOGDEB1(("Db:::stemExpand: final count %d \n", exp.size()));
|
||||
return exp;
|
||||
}
|
||||
|
||||
bool Db::stemDiffers(const string& lang, const string& word,
|
||||
const string& base)
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _DB_H_INCLUDED_
|
||||
#define _DB_H_INCLUDED_
|
||||
/* @(#$Id: rcldb.h,v 1.44 2006-12-14 14:54:13 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: rcldb.h,v 1.45 2006-12-19 12:11:21 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
@ -53,7 +53,16 @@ namespace Rcl {
|
||||
class SearchData;
|
||||
class Native;
|
||||
class TermIter;
|
||||
|
||||
|
||||
class TermMatchEntry {
|
||||
public:
|
||||
TermMatchEntry() : wcf(0) {}
|
||||
TermMatchEntry(const string&t, int f) : term(t), wcf(f) {}
|
||||
TermMatchEntry(const string&t) : term(t), wcf(0) {}
|
||||
string term;
|
||||
int wcf; // Within collection frequency
|
||||
};
|
||||
|
||||
/**
|
||||
* Wrapper class for the native database.
|
||||
*/
|
||||
@ -109,9 +118,9 @@ class Db {
|
||||
/** Return a list of index terms that match the input string
|
||||
* Expansion is performed either with either wildcard or regexp processing
|
||||
* Stem expansion is performed if lang is not empty */
|
||||
enum MatchType {ET_WILD, ET_REGEXP};
|
||||
bool termMatch(MatchType typ, const string &s, list<string>& result,
|
||||
const string &lang, int max=20);
|
||||
enum MatchType {ET_WILD, ET_REGEXP, ET_STEM};
|
||||
bool termMatch(MatchType typ, const string &lang, const string &s,
|
||||
list<TermMatchEntry>& result, int max = -1);
|
||||
|
||||
/** Add extra database for querying */
|
||||
bool addQueryDb(const string &dir);
|
||||
@ -159,12 +168,11 @@ class Db {
|
||||
bool stemDiffers(const string& lang, const string& term,
|
||||
const string& base);
|
||||
|
||||
/** Perform stem expansion across all dbs configured for searching */
|
||||
list<string> stemExpand(const string& lang, const string& term);
|
||||
|
||||
/** Filename wildcard expansion */
|
||||
bool filenameWildExp(const string& exp, list<string>& names);
|
||||
string getReason(){return m_reason;}
|
||||
|
||||
|
||||
private:
|
||||
|
||||
string m_filterTopDir; // Current query filter on subtree top directory
|
||||
@ -201,6 +209,8 @@ private:
|
||||
vector<bool> updated;
|
||||
|
||||
bool reOpen(); // Close/open, same mode/opts
|
||||
bool stemExpand(const string &lang, const string &s,
|
||||
list<TermMatchEntry>& result, int max = -1);
|
||||
|
||||
/* Copyconst and assignemt private and forbidden */
|
||||
Db(const Db &) {}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.6 2006-11-30 13:38:44 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.7 2006-12-19 12:11:21 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -214,7 +214,12 @@ void StringToXapianQ::maybeStemExp(bool nostemexp,
|
||||
if (nostemexp) {
|
||||
exp = list<string>(1, term1);
|
||||
} else {
|
||||
exp = m_db.stemExpand(m_stemlang, term1);
|
||||
list<TermMatchEntry> l;
|
||||
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term1, l);
|
||||
for (list<TermMatchEntry>::const_iterator it = l.begin();
|
||||
it != l.end(); it++) {
|
||||
exp.push_back(it->term);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: stemdb.cpp,v 1.5 2006-10-09 16:37:08 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: stemdb.cpp,v 1.6 2006-12-19 12:11:21 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
/**
|
||||
@ -206,13 +206,24 @@ bool createDb(Xapian::Database& xdb, const string& dbdir, const string& lang)
|
||||
return true;
|
||||
}
|
||||
|
||||
static string stringlistdisp(const list<string>& sl)
|
||||
{
|
||||
string s;
|
||||
for (list<string>::const_iterator it = sl.begin(); it!= sl.end(); it++)
|
||||
s += "[" + *it + "] ";
|
||||
if (!s.empty())
|
||||
s.erase(s.length()-1);
|
||||
return s;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expand term to list of all terms which stem to the same term.
|
||||
*/
|
||||
list<string> stemExpand(const string& dbdir, const string& lang,
|
||||
const string& term)
|
||||
bool stemExpand(const std::string& dbdir,
|
||||
const std::string& lang,
|
||||
const std::string& term,
|
||||
list<string>& result)
|
||||
{
|
||||
list<string> explist;
|
||||
try {
|
||||
Xapian::Stem stemmer(lang);
|
||||
string stem = stemmer.stem_word(term);
|
||||
@ -224,14 +235,14 @@ list<string> stemExpand(const string& dbdir, const string& lang,
|
||||
stemdbdir.c_str(), sdb.get_lastdocid()));
|
||||
if (!sdb.term_exists(stem)) {
|
||||
LOGDEB1(("Db::stemExpand: no term for %s\n", stem.c_str()));
|
||||
explist.push_back(term);
|
||||
return explist;
|
||||
result.push_back(term);
|
||||
return true;
|
||||
}
|
||||
Xapian::PostingIterator did = sdb.postlist_begin(stem);
|
||||
if (did == sdb.postlist_end(stem)) {
|
||||
LOGDEB1(("stemExpand: no term(1) for %s\n",stem.c_str()));
|
||||
explist.push_back(term);
|
||||
return explist;
|
||||
result.push_back(term);
|
||||
return true;
|
||||
}
|
||||
Xapian::Document doc = sdb.get_document(*did);
|
||||
string data = doc.get_data();
|
||||
@ -242,24 +253,24 @@ list<string> stemExpand(const string& dbdir, const string& lang,
|
||||
++pos;
|
||||
string::size_type pos1 = data.find_last_of("\n");
|
||||
if (pos == string::npos || pos1 == string::npos ||pos1 <= pos) { // ??
|
||||
explist.push_back(term);
|
||||
return explist;
|
||||
result.push_back(term);
|
||||
return true;
|
||||
}
|
||||
stringToStrings(data.substr(pos, pos1-pos), explist);
|
||||
stringToStrings(data.substr(pos, pos1-pos), result);
|
||||
|
||||
// If the user term itself is not in the list, add it.
|
||||
if (find(explist.begin(), explist.end(), term) == explist.end()) {
|
||||
explist.push_back(term);
|
||||
if (find(result.begin(), result.end(), term) == result.end()) {
|
||||
result.push_back(term);
|
||||
}
|
||||
LOGDEB(("stemExpand: %s -> %s\n", stem.c_str(),
|
||||
stringlistdisp(explist).c_str()));
|
||||
stringlistdisp(result).c_str()));
|
||||
} catch (...) {
|
||||
LOGERR(("stemExpand: error accessing stem db. dbdir [%s] lang [%s]\n",
|
||||
dbdir.c_str(), lang.c_str()));
|
||||
explist.push_back(term);
|
||||
return explist;
|
||||
result.push_back(term);
|
||||
return false;
|
||||
}
|
||||
return explist;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#ifndef _STEMDB_H_INCLUDED_
|
||||
#define _STEMDB_H_INCLUDED_
|
||||
/* @(#$Id: stemdb.h,v 1.2 2006-11-15 14:57:53 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: stemdb.h,v 1.3 2006-12-19 12:11:21 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/// Stem database code
|
||||
///
|
||||
/// Stem databases list stems and the set of index terms they expand to. They
|
||||
@ -13,6 +13,7 @@
|
||||
#include <string>
|
||||
|
||||
#include <xapian.h>
|
||||
|
||||
#ifndef NO_NAMESPACES
|
||||
using std::string;
|
||||
using std::list;
|
||||
@ -28,10 +29,10 @@ extern bool deleteDb(const std::string& dbdir, const std::string& lang);
|
||||
extern bool createDb(Xapian::Database& xdb,
|
||||
const std::string& dbdir, const std::string& lang);
|
||||
/// Expand term to stem siblings
|
||||
extern std::list<std::string> stemExpand(const std::string& dbdir,
|
||||
const std::string& lang,
|
||||
const std::string& term);
|
||||
|
||||
extern bool stemExpand(const std::string& dbdir,
|
||||
const std::string& lang,
|
||||
const std::string& term,
|
||||
list<string>& result);
|
||||
#ifndef NO_NAMESPACES
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: smallut.cpp,v 1.24 2006-12-18 12:06:11 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: smallut.cpp,v 1.25 2006-12-19 12:11:21 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -38,16 +38,6 @@ using namespace std;
|
||||
|
||||
#define MIN(A,B) ((A)<(B)?(A):(B))
|
||||
|
||||
string stringlistdisp(const list<string>& sl)
|
||||
{
|
||||
string s;
|
||||
for (list<string>::const_iterator it = sl.begin(); it!= sl.end(); it++)
|
||||
s += "[" + *it + "] ";
|
||||
if (!s.empty())
|
||||
s.erase(s.length()-1);
|
||||
return s;
|
||||
}
|
||||
|
||||
int stringicmp(const string & s1, const string& s2)
|
||||
{
|
||||
string::const_iterator it1 = s1.begin();
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _SMALLUT_H_INCLUDED_
|
||||
#define _SMALLUT_H_INCLUDED_
|
||||
/* @(#$Id: smallut.h,v 1.24 2006-12-18 12:06:11 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: smallut.h,v 1.25 2006-12-19 12:11:21 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
#include <string>
|
||||
#include <list>
|
||||
#include <map>
|
||||
@ -38,8 +38,6 @@ extern int stringisuffcmp(const string& s1, const string& s2);
|
||||
// Compare charset names, removing the more common spelling variations
|
||||
extern bool samecharset(const string &cs1, const string &cs2);
|
||||
|
||||
extern string stringlistdisp(const list<string>& strs);
|
||||
|
||||
/**
|
||||
* Parse input string into list of strings.
|
||||
*
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user