display more complete stats in spell window

This commit is contained in:
Jean-Francois Dockes 2010-05-08 10:38:13 +02:00
parent 48358c8252
commit 4006825961
7 changed files with 182 additions and 132 deletions

View File

@ -7,7 +7,7 @@
<x>0</x>
<y>0</y>
<width>400</width>
<height>300</height>
<height>450</height>
</rect>
</property>
<property name="sizePolicy">
@ -25,89 +25,105 @@
<property name="windowTitle">
<string>Term Explorer</string>
</property>
<layout class="QVBoxLayout">
<layout class="QVBoxLayout" name="verticalLayout_2">
<item>
<layout class="QVBoxLayout">
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<layout class="QGridLayout">
<item row="0" column="1" colspan="2">
<widget class="QLineEdit" name="baseWordLE">
<property name="minimumSize">
<size>
<width>100</width>
<height>0</height>
</size>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QPushButton" name="expandPB">
<property name="enabled">
<bool>false</bool>
</property>
<property name="focusPolicy">
<enum>Qt::NoFocus</enum>
</property>
<property name="text">
<string>&amp;Expand </string>
</property>
<property name="shortcut">
<string>Alt+E</string>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QComboBox" name="stemLangCMB"/>
</item>
<item row="0" column="0">
<widget class="QComboBox" name="expTypeCMB"/>
</item>
<item row="1" column="2">
<widget class="QPushButton" name="dismissPB">
<property name="enabled">
<bool>true</bool>
</property>
<property name="focusPolicy">
<enum>Qt::NoFocus</enum>
</property>
<property name="text">
<string>&amp;Close</string>
</property>
<property name="shortcut">
<string>Alt+C</string>
</property>
</widget>
<layout class="QVBoxLayout">
<item>
<layout class="QGridLayout">
<item row="0" column="1" colspan="2">
<widget class="QLineEdit" name="baseWordLE">
<property name="minimumSize">
<size>
<width>100</width>
<height>0</height>
</size>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QPushButton" name="expandPB">
<property name="enabled">
<bool>false</bool>
</property>
<property name="focusPolicy">
<enum>Qt::NoFocus</enum>
</property>
<property name="text">
<string>&amp;Expand </string>
</property>
<property name="shortcut">
<string>Alt+E</string>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QComboBox" name="stemLangCMB"/>
</item>
<item row="0" column="0">
<widget class="QComboBox" name="expTypeCMB"/>
</item>
<item row="1" column="2">
<widget class="QPushButton" name="dismissPB">
<property name="enabled">
<bool>true</bool>
</property>
<property name="focusPolicy">
<enum>Qt::NoFocus</enum>
</property>
<property name="text">
<string>&amp;Close</string>
</property>
<property name="shortcut">
<string>Alt+C</string>
</property>
</widget>
</item>
</layout>
</item>
</layout>
</item>
<item>
<widget class="QLabel" name="statsLBL">
<property name="font">
<font>
<pointsize>7</pointsize>
</font>
</property>
<property name="text">
<string>No db info.</string>
</property>
</widget>
</item>
<item>
<widget class="QTableWidget" name="suggsLV">
<property name="rowCount">
<number>2</number>
</property>
<property name="columnCount">
<number>2</number>
</property>
<attribute name="horizontalHeaderStretchLastSection">
<bool>false</bool>
</attribute>
<attribute name="verticalHeaderVisible">
<bool>false</bool>
</attribute>
<attribute name="verticalHeaderVisible">
<bool>false</bool>
</attribute>
<attribute name="horizontalHeaderStretchLastSection">
<bool>false</bool>
</attribute>
<row/>
<row/>
<column/>
<column/>
</widget>
</item>
</layout>
</item>
<item>
<widget class="QTableWidget" name="suggsLV">
<property name="rowCount">
<number>2</number>
</property>
<property name="columnCount">
<number>2</number>
</property>
<attribute name="horizontalHeaderStretchLastSection">
<bool>false</bool>
</attribute>
<attribute name="verticalHeaderVisible">
<bool>false</bool>
</attribute>
<attribute name="verticalHeaderVisible">
<bool>false</bool>
</attribute>
<attribute name="horizontalHeaderStretchLastSection">
<bool>false</bool>
</attribute>
<row/>
<row/>
<column/>
<column/>
</widget>
</item>
</layout>
</widget>
<layoutdefault spacing="6" margin="11"/>

View File

@ -8,8 +8,8 @@
<rect>
<x>0</x>
<y>0</y>
<width>298</width>
<height>295</height>
<width>400</width>
<height>450</height>
</rect>
</property>
<property name="sizePolicy">
@ -106,6 +106,19 @@
</widget>
</grid>
</widget>
<widget class="QLabel">
<property name="name">
<cstring>statsLBL</cstring>
</property>
<property name="font">
<font>
<pointsize>7</pointsize>
</font>
</property>
<property name="text">
<string>No db info.</string>
</property>
</widget>
<widget class="QListView">
<column>
<property name="text">
@ -120,7 +133,7 @@
</column>
<column>
<property name="text">
<string>Count</string>
<string>Doc. / Tot.</string>
</property>
<property name="clickable">
<bool>true</bool>

View File

@ -99,7 +99,7 @@ void SpellW::init()
suggsLV->setSorting(100, false);
#else
QStringList labels(tr("Term"));
labels.push_back(tr("Count"));
labels.push_back(tr("Doc. / Tot."));
suggsLV->setHorizontalHeaderLabels(labels);
suggsLV->setShowGrid(0);
suggsLV->horizontalHeader()->setResizeMode(0, QHeaderView::Stretch);
@ -109,7 +109,7 @@ void SpellW::init()
#endif
suggsLV->setColumnWidth(0, 200);
suggsLV->setColumnWidth(1, 100);
suggsLV->setColumnWidth(1, 150);
}
#if (QT_VERSION < 0x040000)
@ -165,18 +165,22 @@ void SpellW::doExpand()
case 2:mt = Rcl::Db::ET_STEM; break;
}
list<Rcl::TermMatchEntry> entries;
Rcl::TermMatchResult res;
switch (expTypeCMB->currentItem()) {
case 0:
case 1:
case 2: {
case 2:
{
string l_stemlang = stemLangCMB->currentText().ascii();
if (!rcldb->termMatch(mt, l_stemlang, expr, entries, 200)) {
if (!rcldb->termMatch(mt, l_stemlang, expr, res, 200)) {
LOGERR(("SpellW::doExpand:rcldb::termMatch failed\n"));
return;
}
statsLBL->setText(tr("Index: %1 documents, average length %2 terms")
.arg(res.dbdoccount).arg(res.dbavgdoclen, 0, 'f', 1));
}
break;
#ifdef RCL_USE_ASPELL
@ -197,13 +201,13 @@ void SpellW::doExpand()
}
for (list<string>::const_iterator it = suggs.begin();
it != suggs.end(); it++)
entries.push_back(Rcl::TermMatchEntry(*it));
res.entries.push_back(Rcl::TermMatchEntry(*it));
}
#endif
}
if (entries.empty()) {
if (res.entries.empty()) {
#if (QT_VERSION < 0x040000)
new MyListViewItem(suggsLV, tr("No expansion found"), "");
#else
@ -211,17 +215,18 @@ void SpellW::doExpand()
#endif
} else {
#if (QT_VERSION < 0x040000)
for (list<Rcl::TermMatchEntry>::reverse_iterator it = entries.rbegin();
it != entries.rend(); it++) {
for (list<Rcl::TermMatchEntry>::reverse_iterator it =
res.entries.rbegin();
it != res.entries.rend(); it++) {
#else
int row = 0;
for (list<Rcl::TermMatchEntry>::iterator it = entries.begin();
it != entries.end(); it++) {
for (list<Rcl::TermMatchEntry>::iterator it = res.entries.begin();
it != res.entries.end(); it++) {
#endif
LOGDEB(("SpellW::expand: %6d [%s]\n", it->wcf, it->term.c_str()));
char num[20];
if (it->wcf)
sprintf(num, "%d", it->wcf);
sprintf(num, "%d / %d", it->docs, it->wcf);
else
num[0] = 0;
#if (QT_VERSION < 0x040000)

View File

@ -233,17 +233,17 @@ void SSearch::completion()
// Query database
const int max = 100;
list<Rcl::TermMatchEntry> strs;
Rcl::TermMatchResult tmres;
string stemLang = (const char *)prefs.queryStemLang.ascii();
if (stemLang == "ALL") {
rclconfig->getConfParam("indexstemminglanguages", stemLang);
}
if (!rcldb->termMatch(Rcl::Db::ET_WILD, stemLang, s, strs, max) ||
strs.size() == 0) {
if (!rcldb->termMatch(Rcl::Db::ET_WILD, stemLang, s, tmres, max) ||
tmres.entries.size() == 0) {
QApplication::beep();
return;
}
if (strs.size() == (unsigned int)max) {
if (tmres.entries.size() == (unsigned int)max) {
QMessageBox::warning(0, "Recoll", tr("Too many completions"));
return;
}
@ -251,13 +251,13 @@ void SSearch::completion()
// If list from db is single word, insert it, else ask user to select
QString res;
bool ok = false;
if (strs.size() == 1) {
res = QString::fromUtf8(strs.begin()->term.c_str());
if (tmres.entries.size() == 1) {
res = QString::fromUtf8(tmres.entries.begin()->term.c_str());
ok = true;
} else {
QStringList lst;
for (list<Rcl::TermMatchEntry>::iterator it=strs.begin();
it != strs.end(); it++) {
for (list<Rcl::TermMatchEntry>::iterator it = tmres.entries.begin();
it != tmres.entries.end(); it++) {
lst.push_back(QString::fromUtf8(it->term.c_str()));
}
res = QInputDialog::getItem(tr("Completions"),

View File

@ -1340,11 +1340,11 @@ bool Db::filenameWildExp(const string& fnexp, list<string>& names)
} // else let it be
LOGDEB(("Rcl::Db::filenameWildExp: pattern: [%s]\n", pattern.c_str()));
list<TermMatchEntry> entries;
if (!termMatch(ET_WILD, string(), pattern, entries, 1000, Doc::keyfn))
TermMatchResult result;
if (!termMatch(ET_WILD, string(), pattern, result, 1000, Doc::keyfn))
return false;
for (list<TermMatchEntry>::const_iterator it = entries.begin();
it != entries.end(); it++)
for (list<TermMatchEntry>::const_iterator it = result.entries.begin();
it != result.entries.end(); it++)
names.push_back(it->term);
if (names.empty()) {
@ -1375,18 +1375,17 @@ public:
};
bool Db::stemExpand(const string &lang, const string &term,
list<TermMatchEntry>& result, int max)
TermMatchResult& result, int max)
{
list<string> dirs = m_extraDbs;
dirs.push_front(m_basedir);
for (list<string>::iterator it = dirs.begin();
it != dirs.end(); it++) {
for (list<string>::iterator it = dirs.begin(); it != dirs.end(); it++) {
list<string> more;
StemDb::stemExpand(*it, lang, term, more);
LOGDEB1(("Db::stemExpand: Got %d from %s\n",
more.size(), it->c_str()));
result.insert(result.end(), more.begin(), more.end());
if (result.size() >= (unsigned int)max)
result.entries.insert(result.entries.end(), more.begin(), more.end());
if (result.entries.size() >= (unsigned int)max)
break;
}
LOGDEB1(("Db:::stemExpand: final count %d \n", result.size()));
@ -1412,7 +1411,7 @@ const string regSpecChars = "(.[{";
// Find all index terms that match a wildcard or regular expression
bool Db::termMatch(MatchType typ, const string &lang,
const string &root,
list<TermMatchEntry>& res,
TermMatchResult& res,
int max,
const string& field,
string *prefixp
@ -1423,6 +1422,10 @@ bool Db::termMatch(MatchType typ, const string &lang,
Xapian::Database xdb = m_ndb->xdb();
res.clear();
XAPTRY(res.dbdoccount = xdb.get_doccount();
res.dbavgdoclen = xdb.get_avlength(), xdb, m_reason);
if (!m_reason.empty())
return false;
// Get rid of capitals and accents
string droot;
@ -1446,18 +1449,19 @@ bool Db::termMatch(MatchType typ, const string &lang,
if (typ == ET_STEM) {
if (!stemExpand(lang, root, res, max))
return false;
res.sort();
res.unique();
for (list<TermMatchEntry>::iterator it = res.begin();
it != res.end(); it++) {
XAPTRY(it->wcf = xdb.get_collection_freq(it->term),
res.entries.sort();
res.entries.unique();
for (list<TermMatchEntry>::iterator it = res.entries.begin();
it != res.entries.end(); it++) {
XAPTRY(it->wcf = xdb.get_collection_freq(it->term);
it->docs = xdb.get_termfreq(it->term),
xdb, m_reason);
if (!m_reason.empty())
return false;
LOGDEB1(("termMatch: %d [%s]\n", it->wcf, it->term.c_str()));
}
if (!prefix.empty())
addPrefix(res, prefix);
addPrefix(res.entries, prefix);
} else {
regex_t reg;
int errcode;
@ -1468,7 +1472,7 @@ bool Db::termMatch(MatchType typ, const string &lang,
char errbuf[200];
regerror(errcode, &reg, errbuf, 199);
LOGERR(("termMatch: regcomp failed: %s\n", errbuf));
res.push_back(string(errbuf));
res.entries.push_back(string(errbuf));
regfree(&reg);
return false;
}
@ -1508,7 +1512,9 @@ bool Db::termMatch(MatchType typ, const string &lang,
continue;
}
// Do we want stem expansion here? We don't do it for now
res.push_back(TermMatchEntry(*it, it.get_termfreq()));
res.entries.push_back(TermMatchEntry(*it,
xdb.get_collection_freq(*it),
it.get_termfreq()));
++n;
}
m_reason.erase();
@ -1532,13 +1538,13 @@ bool Db::termMatch(MatchType typ, const string &lang,
}
TermMatchCmpByTerm tcmp;
res.sort(tcmp);
res.entries.sort(tcmp);
TermMatchTermEqual teq;
res.unique(teq);
res.entries.unique(teq);
TermMatchCmpByWcf wcmp;
res.sort(wcmp);
res.entries.sort(wcmp);
if (max > 0) {
res.resize(MIN(res.size(), (unsigned int)max));
res.entries.resize(MIN(res.entries.size(), (unsigned int)max));
}
return true;
}

View File

@ -64,12 +64,22 @@ class Query;
class TermMatchEntry {
public:
TermMatchEntry() : wcf(0) {}
TermMatchEntry(const string&t, int f) : term(t), wcf(f) {}
TermMatchEntry(const string&t, int f, int d) : term(t), wcf(f), docs(d) {}
TermMatchEntry(const string&t) : term(t), wcf(0) {}
bool operator==(const TermMatchEntry &o) { return term == o.term;}
bool operator<(const TermMatchEntry &o) { return term < o.term;}
string term;
int wcf; // Within collection frequency
int wcf; // Total count of occurrences within collection.
int docs; // Number of documents countaining term.
};
class TermMatchResult {
public:
TermMatchResult() {clear();}
void clear() {entries.clear(); dbdoccount = 0; dbavgdoclen = 0;}
list<TermMatchEntry> entries;
unsigned int dbdoccount;
double dbavgdoclen;
};
/**
@ -157,7 +167,7 @@ class Db {
* Stem expansion is performed if lang is not empty */
enum MatchType {ET_WILD, ET_REGEXP, ET_STEM};
bool termMatch(MatchType typ, const string &lang, const string &s,
list<TermMatchEntry>& result, int max = -1,
TermMatchResult& result, int max = -1,
const string& field = "",
string *prefix = 0
);
@ -239,7 +249,7 @@ private:
// Reinitialize when adding/removing additional dbs
bool adjustdbs();
bool stemExpand(const string &lang, const string &s,
list<TermMatchEntry>& result, int max = -1);
TermMatchResult& result, int max = -1);
/* Copyconst and assignemt private and forbidden */
Db(const Db &) {}

View File

@ -347,18 +347,18 @@ void StringToXapianQ::expandTerm(bool nostemexp,
if (prefix)
*prefix = pfx;
} else {
list<TermMatchEntry> l;
TermMatchResult res;
if (haswild) {
m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, l, -1, m_field,
prefix);
m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1,
m_field, prefix);
} else {
sterm = term;
m_uterms.push_back(sterm);
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, l, -1, m_field,
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_field,
prefix);
}
for (list<TermMatchEntry>::const_iterator it = l.begin();
it != l.end(); it++) {
for (list<TermMatchEntry>::const_iterator it = res.entries.begin();
it != res.entries.end(); it++) {
exp.push_back(it->term);
}
}