display more complete stats in spell window

This commit is contained in:
Jean-Francois Dockes 2010-05-08 10:38:13 +02:00
parent 48358c8252
commit 4006825961
7 changed files with 182 additions and 132 deletions

View File

@ -7,7 +7,7 @@
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>400</width> <width>400</width>
<height>300</height> <height>450</height>
</rect> </rect>
</property> </property>
<property name="sizePolicy"> <property name="sizePolicy">
@ -25,89 +25,105 @@
<property name="windowTitle"> <property name="windowTitle">
<string>Term Explorer</string> <string>Term Explorer</string>
</property> </property>
<layout class="QVBoxLayout"> <layout class="QVBoxLayout" name="verticalLayout_2">
<item> <item>
<layout class="QVBoxLayout"> <layout class="QVBoxLayout" name="verticalLayout">
<item> <item>
<layout class="QGridLayout"> <layout class="QVBoxLayout">
<item row="0" column="1" colspan="2"> <item>
<widget class="QLineEdit" name="baseWordLE"> <layout class="QGridLayout">
<property name="minimumSize"> <item row="0" column="1" colspan="2">
<size> <widget class="QLineEdit" name="baseWordLE">
<width>100</width> <property name="minimumSize">
<height>0</height> <size>
</size> <width>100</width>
</property> <height>0</height>
</widget> </size>
</item> </property>
<item row="1" column="1"> </widget>
<widget class="QPushButton" name="expandPB"> </item>
<property name="enabled"> <item row="1" column="1">
<bool>false</bool> <widget class="QPushButton" name="expandPB">
</property> <property name="enabled">
<property name="focusPolicy"> <bool>false</bool>
<enum>Qt::NoFocus</enum> </property>
</property> <property name="focusPolicy">
<property name="text"> <enum>Qt::NoFocus</enum>
<string>&amp;Expand </string> </property>
</property> <property name="text">
<property name="shortcut"> <string>&amp;Expand </string>
<string>Alt+E</string> </property>
</property> <property name="shortcut">
</widget> <string>Alt+E</string>
</item> </property>
<item row="1" column="0"> </widget>
<widget class="QComboBox" name="stemLangCMB"/> </item>
</item> <item row="1" column="0">
<item row="0" column="0"> <widget class="QComboBox" name="stemLangCMB"/>
<widget class="QComboBox" name="expTypeCMB"/> </item>
</item> <item row="0" column="0">
<item row="1" column="2"> <widget class="QComboBox" name="expTypeCMB"/>
<widget class="QPushButton" name="dismissPB"> </item>
<property name="enabled"> <item row="1" column="2">
<bool>true</bool> <widget class="QPushButton" name="dismissPB">
</property> <property name="enabled">
<property name="focusPolicy"> <bool>true</bool>
<enum>Qt::NoFocus</enum> </property>
</property> <property name="focusPolicy">
<property name="text"> <enum>Qt::NoFocus</enum>
<string>&amp;Close</string> </property>
</property> <property name="text">
<property name="shortcut"> <string>&amp;Close</string>
<string>Alt+C</string> </property>
</property> <property name="shortcut">
</widget> <string>Alt+C</string>
</property>
</widget>
</item>
</layout>
</item> </item>
</layout> </layout>
</item> </item>
<item>
<widget class="QLabel" name="statsLBL">
<property name="font">
<font>
<pointsize>7</pointsize>
</font>
</property>
<property name="text">
<string>No db info.</string>
</property>
</widget>
</item>
<item>
<widget class="QTableWidget" name="suggsLV">
<property name="rowCount">
<number>2</number>
</property>
<property name="columnCount">
<number>2</number>
</property>
<attribute name="horizontalHeaderStretchLastSection">
<bool>false</bool>
</attribute>
<attribute name="verticalHeaderVisible">
<bool>false</bool>
</attribute>
<attribute name="verticalHeaderVisible">
<bool>false</bool>
</attribute>
<attribute name="horizontalHeaderStretchLastSection">
<bool>false</bool>
</attribute>
<row/>
<row/>
<column/>
<column/>
</widget>
</item>
</layout> </layout>
</item> </item>
<item>
<widget class="QTableWidget" name="suggsLV">
<property name="rowCount">
<number>2</number>
</property>
<property name="columnCount">
<number>2</number>
</property>
<attribute name="horizontalHeaderStretchLastSection">
<bool>false</bool>
</attribute>
<attribute name="verticalHeaderVisible">
<bool>false</bool>
</attribute>
<attribute name="verticalHeaderVisible">
<bool>false</bool>
</attribute>
<attribute name="horizontalHeaderStretchLastSection">
<bool>false</bool>
</attribute>
<row/>
<row/>
<column/>
<column/>
</widget>
</item>
</layout> </layout>
</widget> </widget>
<layoutdefault spacing="6" margin="11"/> <layoutdefault spacing="6" margin="11"/>

View File

@ -8,8 +8,8 @@
<rect> <rect>
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>298</width> <width>400</width>
<height>295</height> <height>450</height>
</rect> </rect>
</property> </property>
<property name="sizePolicy"> <property name="sizePolicy">
@ -106,6 +106,19 @@
</widget> </widget>
</grid> </grid>
</widget> </widget>
<widget class="QLabel">
<property name="name">
<cstring>statsLBL</cstring>
</property>
<property name="font">
<font>
<pointsize>7</pointsize>
</font>
</property>
<property name="text">
<string>No db info.</string>
</property>
</widget>
<widget class="QListView"> <widget class="QListView">
<column> <column>
<property name="text"> <property name="text">
@ -120,7 +133,7 @@
</column> </column>
<column> <column>
<property name="text"> <property name="text">
<string>Count</string> <string>Doc. / Tot.</string>
</property> </property>
<property name="clickable"> <property name="clickable">
<bool>true</bool> <bool>true</bool>

View File

@ -99,7 +99,7 @@ void SpellW::init()
suggsLV->setSorting(100, false); suggsLV->setSorting(100, false);
#else #else
QStringList labels(tr("Term")); QStringList labels(tr("Term"));
labels.push_back(tr("Count")); labels.push_back(tr("Doc. / Tot."));
suggsLV->setHorizontalHeaderLabels(labels); suggsLV->setHorizontalHeaderLabels(labels);
suggsLV->setShowGrid(0); suggsLV->setShowGrid(0);
suggsLV->horizontalHeader()->setResizeMode(0, QHeaderView::Stretch); suggsLV->horizontalHeader()->setResizeMode(0, QHeaderView::Stretch);
@ -109,7 +109,7 @@ void SpellW::init()
#endif #endif
suggsLV->setColumnWidth(0, 200); suggsLV->setColumnWidth(0, 200);
suggsLV->setColumnWidth(1, 100); suggsLV->setColumnWidth(1, 150);
} }
#if (QT_VERSION < 0x040000) #if (QT_VERSION < 0x040000)
@ -165,18 +165,22 @@ void SpellW::doExpand()
case 2:mt = Rcl::Db::ET_STEM; break; case 2:mt = Rcl::Db::ET_STEM; break;
} }
list<Rcl::TermMatchEntry> entries; Rcl::TermMatchResult res;
switch (expTypeCMB->currentItem()) { switch (expTypeCMB->currentItem()) {
case 0: case 0:
case 1: case 1:
case 2: { case 2:
{
string l_stemlang = stemLangCMB->currentText().ascii(); string l_stemlang = stemLangCMB->currentText().ascii();
if (!rcldb->termMatch(mt, l_stemlang, expr, entries, 200)) { if (!rcldb->termMatch(mt, l_stemlang, expr, res, 200)) {
LOGERR(("SpellW::doExpand:rcldb::termMatch failed\n")); LOGERR(("SpellW::doExpand:rcldb::termMatch failed\n"));
return; return;
} }
statsLBL->setText(tr("Index: %1 documents, average length %2 terms")
.arg(res.dbdoccount).arg(res.dbavgdoclen, 0, 'f', 1));
} }
break; break;
#ifdef RCL_USE_ASPELL #ifdef RCL_USE_ASPELL
@ -197,13 +201,13 @@ void SpellW::doExpand()
} }
for (list<string>::const_iterator it = suggs.begin(); for (list<string>::const_iterator it = suggs.begin();
it != suggs.end(); it++) it != suggs.end(); it++)
entries.push_back(Rcl::TermMatchEntry(*it)); res.entries.push_back(Rcl::TermMatchEntry(*it));
} }
#endif #endif
} }
if (entries.empty()) { if (res.entries.empty()) {
#if (QT_VERSION < 0x040000) #if (QT_VERSION < 0x040000)
new MyListViewItem(suggsLV, tr("No expansion found"), ""); new MyListViewItem(suggsLV, tr("No expansion found"), "");
#else #else
@ -211,17 +215,18 @@ void SpellW::doExpand()
#endif #endif
} else { } else {
#if (QT_VERSION < 0x040000) #if (QT_VERSION < 0x040000)
for (list<Rcl::TermMatchEntry>::reverse_iterator it = entries.rbegin(); for (list<Rcl::TermMatchEntry>::reverse_iterator it =
it != entries.rend(); it++) { res.entries.rbegin();
it != res.entries.rend(); it++) {
#else #else
int row = 0; int row = 0;
for (list<Rcl::TermMatchEntry>::iterator it = entries.begin(); for (list<Rcl::TermMatchEntry>::iterator it = res.entries.begin();
it != entries.end(); it++) { it != res.entries.end(); it++) {
#endif #endif
LOGDEB(("SpellW::expand: %6d [%s]\n", it->wcf, it->term.c_str())); LOGDEB(("SpellW::expand: %6d [%s]\n", it->wcf, it->term.c_str()));
char num[20]; char num[20];
if (it->wcf) if (it->wcf)
sprintf(num, "%d", it->wcf); sprintf(num, "%d / %d", it->docs, it->wcf);
else else
num[0] = 0; num[0] = 0;
#if (QT_VERSION < 0x040000) #if (QT_VERSION < 0x040000)

View File

@ -233,17 +233,17 @@ void SSearch::completion()
// Query database // Query database
const int max = 100; const int max = 100;
list<Rcl::TermMatchEntry> strs; Rcl::TermMatchResult tmres;
string stemLang = (const char *)prefs.queryStemLang.ascii(); string stemLang = (const char *)prefs.queryStemLang.ascii();
if (stemLang == "ALL") { if (stemLang == "ALL") {
rclconfig->getConfParam("indexstemminglanguages", stemLang); rclconfig->getConfParam("indexstemminglanguages", stemLang);
} }
if (!rcldb->termMatch(Rcl::Db::ET_WILD, stemLang, s, strs, max) || if (!rcldb->termMatch(Rcl::Db::ET_WILD, stemLang, s, tmres, max) ||
strs.size() == 0) { tmres.entries.size() == 0) {
QApplication::beep(); QApplication::beep();
return; return;
} }
if (strs.size() == (unsigned int)max) { if (tmres.entries.size() == (unsigned int)max) {
QMessageBox::warning(0, "Recoll", tr("Too many completions")); QMessageBox::warning(0, "Recoll", tr("Too many completions"));
return; return;
} }
@ -251,13 +251,13 @@ void SSearch::completion()
// If list from db is single word, insert it, else ask user to select // If list from db is single word, insert it, else ask user to select
QString res; QString res;
bool ok = false; bool ok = false;
if (strs.size() == 1) { if (tmres.entries.size() == 1) {
res = QString::fromUtf8(strs.begin()->term.c_str()); res = QString::fromUtf8(tmres.entries.begin()->term.c_str());
ok = true; ok = true;
} else { } else {
QStringList lst; QStringList lst;
for (list<Rcl::TermMatchEntry>::iterator it=strs.begin(); for (list<Rcl::TermMatchEntry>::iterator it = tmres.entries.begin();
it != strs.end(); it++) { it != tmres.entries.end(); it++) {
lst.push_back(QString::fromUtf8(it->term.c_str())); lst.push_back(QString::fromUtf8(it->term.c_str()));
} }
res = QInputDialog::getItem(tr("Completions"), res = QInputDialog::getItem(tr("Completions"),

View File

@ -1340,11 +1340,11 @@ bool Db::filenameWildExp(const string& fnexp, list<string>& names)
} // else let it be } // else let it be
LOGDEB(("Rcl::Db::filenameWildExp: pattern: [%s]\n", pattern.c_str())); LOGDEB(("Rcl::Db::filenameWildExp: pattern: [%s]\n", pattern.c_str()));
list<TermMatchEntry> entries; TermMatchResult result;
if (!termMatch(ET_WILD, string(), pattern, entries, 1000, Doc::keyfn)) if (!termMatch(ET_WILD, string(), pattern, result, 1000, Doc::keyfn))
return false; return false;
for (list<TermMatchEntry>::const_iterator it = entries.begin(); for (list<TermMatchEntry>::const_iterator it = result.entries.begin();
it != entries.end(); it++) it != result.entries.end(); it++)
names.push_back(it->term); names.push_back(it->term);
if (names.empty()) { if (names.empty()) {
@ -1375,18 +1375,17 @@ public:
}; };
bool Db::stemExpand(const string &lang, const string &term, bool Db::stemExpand(const string &lang, const string &term,
list<TermMatchEntry>& result, int max) TermMatchResult& result, int max)
{ {
list<string> dirs = m_extraDbs; list<string> dirs = m_extraDbs;
dirs.push_front(m_basedir); dirs.push_front(m_basedir);
for (list<string>::iterator it = dirs.begin(); for (list<string>::iterator it = dirs.begin(); it != dirs.end(); it++) {
it != dirs.end(); it++) {
list<string> more; list<string> more;
StemDb::stemExpand(*it, lang, term, more); StemDb::stemExpand(*it, lang, term, more);
LOGDEB1(("Db::stemExpand: Got %d from %s\n", LOGDEB1(("Db::stemExpand: Got %d from %s\n",
more.size(), it->c_str())); more.size(), it->c_str()));
result.insert(result.end(), more.begin(), more.end()); result.entries.insert(result.entries.end(), more.begin(), more.end());
if (result.size() >= (unsigned int)max) if (result.entries.size() >= (unsigned int)max)
break; break;
} }
LOGDEB1(("Db:::stemExpand: final count %d \n", result.size())); LOGDEB1(("Db:::stemExpand: final count %d \n", result.size()));
@ -1412,7 +1411,7 @@ const string regSpecChars = "(.[{";
// Find all index terms that match a wildcard or regular expression // Find all index terms that match a wildcard or regular expression
bool Db::termMatch(MatchType typ, const string &lang, bool Db::termMatch(MatchType typ, const string &lang,
const string &root, const string &root,
list<TermMatchEntry>& res, TermMatchResult& res,
int max, int max,
const string& field, const string& field,
string *prefixp string *prefixp
@ -1423,6 +1422,10 @@ bool Db::termMatch(MatchType typ, const string &lang,
Xapian::Database xdb = m_ndb->xdb(); Xapian::Database xdb = m_ndb->xdb();
res.clear(); res.clear();
XAPTRY(res.dbdoccount = xdb.get_doccount();
res.dbavgdoclen = xdb.get_avlength(), xdb, m_reason);
if (!m_reason.empty())
return false;
// Get rid of capitals and accents // Get rid of capitals and accents
string droot; string droot;
@ -1446,18 +1449,19 @@ bool Db::termMatch(MatchType typ, const string &lang,
if (typ == ET_STEM) { if (typ == ET_STEM) {
if (!stemExpand(lang, root, res, max)) if (!stemExpand(lang, root, res, max))
return false; return false;
res.sort(); res.entries.sort();
res.unique(); res.entries.unique();
for (list<TermMatchEntry>::iterator it = res.begin(); for (list<TermMatchEntry>::iterator it = res.entries.begin();
it != res.end(); it++) { it != res.entries.end(); it++) {
XAPTRY(it->wcf = xdb.get_collection_freq(it->term), XAPTRY(it->wcf = xdb.get_collection_freq(it->term);
it->docs = xdb.get_termfreq(it->term),
xdb, m_reason); xdb, m_reason);
if (!m_reason.empty()) if (!m_reason.empty())
return false; return false;
LOGDEB1(("termMatch: %d [%s]\n", it->wcf, it->term.c_str())); LOGDEB1(("termMatch: %d [%s]\n", it->wcf, it->term.c_str()));
} }
if (!prefix.empty()) if (!prefix.empty())
addPrefix(res, prefix); addPrefix(res.entries, prefix);
} else { } else {
regex_t reg; regex_t reg;
int errcode; int errcode;
@ -1468,7 +1472,7 @@ bool Db::termMatch(MatchType typ, const string &lang,
char errbuf[200]; char errbuf[200];
regerror(errcode, &reg, errbuf, 199); regerror(errcode, &reg, errbuf, 199);
LOGERR(("termMatch: regcomp failed: %s\n", errbuf)); LOGERR(("termMatch: regcomp failed: %s\n", errbuf));
res.push_back(string(errbuf)); res.entries.push_back(string(errbuf));
regfree(&reg); regfree(&reg);
return false; return false;
} }
@ -1508,7 +1512,9 @@ bool Db::termMatch(MatchType typ, const string &lang,
continue; continue;
} }
// Do we want stem expansion here? We don't do it for now // Do we want stem expansion here? We don't do it for now
res.push_back(TermMatchEntry(*it, it.get_termfreq())); res.entries.push_back(TermMatchEntry(*it,
xdb.get_collection_freq(*it),
it.get_termfreq()));
++n; ++n;
} }
m_reason.erase(); m_reason.erase();
@ -1532,13 +1538,13 @@ bool Db::termMatch(MatchType typ, const string &lang,
} }
TermMatchCmpByTerm tcmp; TermMatchCmpByTerm tcmp;
res.sort(tcmp); res.entries.sort(tcmp);
TermMatchTermEqual teq; TermMatchTermEqual teq;
res.unique(teq); res.entries.unique(teq);
TermMatchCmpByWcf wcmp; TermMatchCmpByWcf wcmp;
res.sort(wcmp); res.entries.sort(wcmp);
if (max > 0) { if (max > 0) {
res.resize(MIN(res.size(), (unsigned int)max)); res.entries.resize(MIN(res.entries.size(), (unsigned int)max));
} }
return true; return true;
} }

View File

@ -64,12 +64,22 @@ class Query;
class TermMatchEntry { class TermMatchEntry {
public: public:
TermMatchEntry() : wcf(0) {} TermMatchEntry() : wcf(0) {}
TermMatchEntry(const string&t, int f) : term(t), wcf(f) {} TermMatchEntry(const string&t, int f, int d) : term(t), wcf(f), docs(d) {}
TermMatchEntry(const string&t) : term(t), wcf(0) {} TermMatchEntry(const string&t) : term(t), wcf(0) {}
bool operator==(const TermMatchEntry &o) { return term == o.term;} bool operator==(const TermMatchEntry &o) { return term == o.term;}
bool operator<(const TermMatchEntry &o) { return term < o.term;} bool operator<(const TermMatchEntry &o) { return term < o.term;}
string term; string term;
int wcf; // Within collection frequency int wcf; // Total count of occurrences within collection.
int docs; // Number of documents countaining term.
};
class TermMatchResult {
public:
TermMatchResult() {clear();}
void clear() {entries.clear(); dbdoccount = 0; dbavgdoclen = 0;}
list<TermMatchEntry> entries;
unsigned int dbdoccount;
double dbavgdoclen;
}; };
/** /**
@ -157,7 +167,7 @@ class Db {
* Stem expansion is performed if lang is not empty */ * Stem expansion is performed if lang is not empty */
enum MatchType {ET_WILD, ET_REGEXP, ET_STEM}; enum MatchType {ET_WILD, ET_REGEXP, ET_STEM};
bool termMatch(MatchType typ, const string &lang, const string &s, bool termMatch(MatchType typ, const string &lang, const string &s,
list<TermMatchEntry>& result, int max = -1, TermMatchResult& result, int max = -1,
const string& field = "", const string& field = "",
string *prefix = 0 string *prefix = 0
); );
@ -239,7 +249,7 @@ private:
// Reinitialize when adding/removing additional dbs // Reinitialize when adding/removing additional dbs
bool adjustdbs(); bool adjustdbs();
bool stemExpand(const string &lang, const string &s, bool stemExpand(const string &lang, const string &s,
list<TermMatchEntry>& result, int max = -1); TermMatchResult& result, int max = -1);
/* Copyconst and assignemt private and forbidden */ /* Copyconst and assignemt private and forbidden */
Db(const Db &) {} Db(const Db &) {}

View File

@ -347,18 +347,18 @@ void StringToXapianQ::expandTerm(bool nostemexp,
if (prefix) if (prefix)
*prefix = pfx; *prefix = pfx;
} else { } else {
list<TermMatchEntry> l; TermMatchResult res;
if (haswild) { if (haswild) {
m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, l, -1, m_field, m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1,
prefix); m_field, prefix);
} else { } else {
sterm = term; sterm = term;
m_uterms.push_back(sterm); m_uterms.push_back(sterm);
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, l, -1, m_field, m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_field,
prefix); prefix);
} }
for (list<TermMatchEntry>::const_iterator it = l.begin(); for (list<TermMatchEntry>::const_iterator it = res.entries.begin();
it != l.end(); it++) { it != res.entries.end(); it++) {
exp.push_back(it->term); exp.push_back(it->term);
} }
} }