diff --git a/src/qt4gui/spell.ui b/src/qt4gui/spell.ui
index 06d9256f..6c0b7826 100644
--- a/src/qt4gui/spell.ui
+++ b/src/qt4gui/spell.ui
@@ -7,7 +7,7 @@
0
0
400
- 300
+ 450
@@ -25,89 +25,105 @@
Term Explorer
-
+
-
-
+
-
-
-
-
-
-
-
- 100
- 0
-
-
-
-
- -
-
-
- false
-
-
- Qt::NoFocus
-
-
- &Expand
-
-
- Alt+E
-
-
-
- -
-
-
- -
-
-
- -
-
-
- true
-
-
- Qt::NoFocus
-
-
- &Close
-
-
- Alt+C
-
-
+
+
-
+
+
-
+
+
+
+ 100
+ 0
+
+
+
+
+ -
+
+
+ false
+
+
+ Qt::NoFocus
+
+
+ &Expand
+
+
+ Alt+E
+
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+ true
+
+
+ Qt::NoFocus
+
+
+ &Close
+
+
+ Alt+C
+
+
+
+
+ -
+
+
+
+ 7
+
+
+
+ No db info.
+
+
+
+ -
+
+
+ 2
+
+
+ 2
+
+
+ false
+
+
+ false
+
+
+ false
+
+
+ false
+
+
+
+
+
+
+
- -
-
-
- 2
-
-
- 2
-
-
- false
-
-
- false
-
-
- false
-
-
- false
-
-
-
-
-
-
-
diff --git a/src/qtgui/spell.ui b/src/qtgui/spell.ui
index f0dccbcf..bb2eff23 100644
--- a/src/qtgui/spell.ui
+++ b/src/qtgui/spell.ui
@@ -8,8 +8,8 @@
0
0
- 298
- 295
+ 400
+ 450
@@ -106,6 +106,19 @@
+
+
+ statsLBL
+
+
+
+ 7
+
+
+
+ No db info.
+
+
@@ -120,7 +133,7 @@
- Count
+ Doc. / Tot.
true
diff --git a/src/qtgui/spell_w.cpp b/src/qtgui/spell_w.cpp
index 59ff6245..75d47f5d 100644
--- a/src/qtgui/spell_w.cpp
+++ b/src/qtgui/spell_w.cpp
@@ -99,7 +99,7 @@ void SpellW::init()
suggsLV->setSorting(100, false);
#else
QStringList labels(tr("Term"));
- labels.push_back(tr("Count"));
+ labels.push_back(tr("Doc. / Tot."));
suggsLV->setHorizontalHeaderLabels(labels);
suggsLV->setShowGrid(0);
suggsLV->horizontalHeader()->setResizeMode(0, QHeaderView::Stretch);
@@ -109,7 +109,7 @@ void SpellW::init()
#endif
suggsLV->setColumnWidth(0, 200);
- suggsLV->setColumnWidth(1, 100);
+ suggsLV->setColumnWidth(1, 150);
}
#if (QT_VERSION < 0x040000)
@@ -165,18 +165,22 @@ void SpellW::doExpand()
case 2:mt = Rcl::Db::ET_STEM; break;
}
- list entries;
+ Rcl::TermMatchResult res;
switch (expTypeCMB->currentItem()) {
case 0:
case 1:
- case 2: {
+ case 2:
+ {
string l_stemlang = stemLangCMB->currentText().ascii();
- if (!rcldb->termMatch(mt, l_stemlang, expr, entries, 200)) {
+ if (!rcldb->termMatch(mt, l_stemlang, expr, res, 200)) {
LOGERR(("SpellW::doExpand:rcldb::termMatch failed\n"));
return;
}
+ statsLBL->setText(tr("Index: %1 documents, average length %2 terms")
+ .arg(res.dbdoccount).arg(res.dbavgdoclen, 0, 'f', 1));
}
+
break;
#ifdef RCL_USE_ASPELL
@@ -197,13 +201,13 @@ void SpellW::doExpand()
}
for (list::const_iterator it = suggs.begin();
it != suggs.end(); it++)
- entries.push_back(Rcl::TermMatchEntry(*it));
+ res.entries.push_back(Rcl::TermMatchEntry(*it));
}
#endif
}
- if (entries.empty()) {
+ if (res.entries.empty()) {
#if (QT_VERSION < 0x040000)
new MyListViewItem(suggsLV, tr("No expansion found"), "");
#else
@@ -211,17 +215,18 @@ void SpellW::doExpand()
#endif
} else {
#if (QT_VERSION < 0x040000)
- for (list::reverse_iterator it = entries.rbegin();
- it != entries.rend(); it++) {
+ for (list::reverse_iterator it =
+ res.entries.rbegin();
+ it != res.entries.rend(); it++) {
#else
int row = 0;
- for (list::iterator it = entries.begin();
- it != entries.end(); it++) {
+ for (list::iterator it = res.entries.begin();
+ it != res.entries.end(); it++) {
#endif
LOGDEB(("SpellW::expand: %6d [%s]\n", it->wcf, it->term.c_str()));
char num[20];
if (it->wcf)
- sprintf(num, "%d", it->wcf);
+ sprintf(num, "%d / %d", it->docs, it->wcf);
else
num[0] = 0;
#if (QT_VERSION < 0x040000)
diff --git a/src/qtgui/ssearch_w.cpp b/src/qtgui/ssearch_w.cpp
index a0f24011..5711aac4 100644
--- a/src/qtgui/ssearch_w.cpp
+++ b/src/qtgui/ssearch_w.cpp
@@ -233,17 +233,17 @@ void SSearch::completion()
// Query database
const int max = 100;
- list strs;
+ Rcl::TermMatchResult tmres;
string stemLang = (const char *)prefs.queryStemLang.ascii();
if (stemLang == "ALL") {
rclconfig->getConfParam("indexstemminglanguages", stemLang);
}
- if (!rcldb->termMatch(Rcl::Db::ET_WILD, stemLang, s, strs, max) ||
- strs.size() == 0) {
+ if (!rcldb->termMatch(Rcl::Db::ET_WILD, stemLang, s, tmres, max) ||
+ tmres.entries.size() == 0) {
QApplication::beep();
return;
}
- if (strs.size() == (unsigned int)max) {
+ if (tmres.entries.size() == (unsigned int)max) {
QMessageBox::warning(0, "Recoll", tr("Too many completions"));
return;
}
@@ -251,13 +251,13 @@ void SSearch::completion()
// If list from db is single word, insert it, else ask user to select
QString res;
bool ok = false;
- if (strs.size() == 1) {
- res = QString::fromUtf8(strs.begin()->term.c_str());
+ if (tmres.entries.size() == 1) {
+ res = QString::fromUtf8(tmres.entries.begin()->term.c_str());
ok = true;
} else {
QStringList lst;
- for (list::iterator it=strs.begin();
- it != strs.end(); it++) {
+ for (list::iterator it = tmres.entries.begin();
+ it != tmres.entries.end(); it++) {
lst.push_back(QString::fromUtf8(it->term.c_str()));
}
res = QInputDialog::getItem(tr("Completions"),
diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp
index 1f9f33a5..bcea623a 100644
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@@ -1340,11 +1340,11 @@ bool Db::filenameWildExp(const string& fnexp, list& names)
} // else let it be
LOGDEB(("Rcl::Db::filenameWildExp: pattern: [%s]\n", pattern.c_str()));
- list entries;
- if (!termMatch(ET_WILD, string(), pattern, entries, 1000, Doc::keyfn))
+ TermMatchResult result;
+ if (!termMatch(ET_WILD, string(), pattern, result, 1000, Doc::keyfn))
return false;
- for (list::const_iterator it = entries.begin();
- it != entries.end(); it++)
+ for (list::const_iterator it = result.entries.begin();
+ it != result.entries.end(); it++)
names.push_back(it->term);
if (names.empty()) {
@@ -1375,18 +1375,17 @@ public:
};
bool Db::stemExpand(const string &lang, const string &term,
- list& result, int max)
+ TermMatchResult& result, int max)
{
list dirs = m_extraDbs;
dirs.push_front(m_basedir);
- for (list::iterator it = dirs.begin();
- it != dirs.end(); it++) {
+ for (list::iterator it = dirs.begin(); it != dirs.end(); it++) {
list more;
StemDb::stemExpand(*it, lang, term, more);
LOGDEB1(("Db::stemExpand: Got %d from %s\n",
more.size(), it->c_str()));
- result.insert(result.end(), more.begin(), more.end());
- if (result.size() >= (unsigned int)max)
+ result.entries.insert(result.entries.end(), more.begin(), more.end());
+ if (result.entries.size() >= (unsigned int)max)
break;
}
LOGDEB1(("Db:::stemExpand: final count %d \n", result.size()));
@@ -1412,7 +1411,7 @@ const string regSpecChars = "(.[{";
// Find all index terms that match a wildcard or regular expression
bool Db::termMatch(MatchType typ, const string &lang,
const string &root,
- list& res,
+ TermMatchResult& res,
int max,
const string& field,
string *prefixp
@@ -1423,6 +1422,10 @@ bool Db::termMatch(MatchType typ, const string &lang,
Xapian::Database xdb = m_ndb->xdb();
res.clear();
+ XAPTRY(res.dbdoccount = xdb.get_doccount();
+ res.dbavgdoclen = xdb.get_avlength(), xdb, m_reason);
+ if (!m_reason.empty())
+ return false;
// Get rid of capitals and accents
string droot;
@@ -1446,18 +1449,19 @@ bool Db::termMatch(MatchType typ, const string &lang,
if (typ == ET_STEM) {
if (!stemExpand(lang, root, res, max))
return false;
- res.sort();
- res.unique();
- for (list::iterator it = res.begin();
- it != res.end(); it++) {
- XAPTRY(it->wcf = xdb.get_collection_freq(it->term),
+ res.entries.sort();
+ res.entries.unique();
+ for (list::iterator it = res.entries.begin();
+ it != res.entries.end(); it++) {
+ XAPTRY(it->wcf = xdb.get_collection_freq(it->term);
+ it->docs = xdb.get_termfreq(it->term),
xdb, m_reason);
if (!m_reason.empty())
return false;
LOGDEB1(("termMatch: %d [%s]\n", it->wcf, it->term.c_str()));
}
if (!prefix.empty())
- addPrefix(res, prefix);
+ addPrefix(res.entries, prefix);
} else {
regex_t reg;
int errcode;
@@ -1468,7 +1472,7 @@ bool Db::termMatch(MatchType typ, const string &lang,
char errbuf[200];
regerror(errcode, ®, errbuf, 199);
LOGERR(("termMatch: regcomp failed: %s\n", errbuf));
- res.push_back(string(errbuf));
+ res.entries.push_back(string(errbuf));
regfree(®);
return false;
}
@@ -1508,7 +1512,9 @@ bool Db::termMatch(MatchType typ, const string &lang,
continue;
}
// Do we want stem expansion here? We don't do it for now
- res.push_back(TermMatchEntry(*it, it.get_termfreq()));
+ res.entries.push_back(TermMatchEntry(*it,
+ xdb.get_collection_freq(*it),
+ it.get_termfreq()));
++n;
}
m_reason.erase();
@@ -1532,13 +1538,13 @@ bool Db::termMatch(MatchType typ, const string &lang,
}
TermMatchCmpByTerm tcmp;
- res.sort(tcmp);
+ res.entries.sort(tcmp);
TermMatchTermEqual teq;
- res.unique(teq);
+ res.entries.unique(teq);
TermMatchCmpByWcf wcmp;
- res.sort(wcmp);
+ res.entries.sort(wcmp);
if (max > 0) {
- res.resize(MIN(res.size(), (unsigned int)max));
+ res.entries.resize(MIN(res.entries.size(), (unsigned int)max));
}
return true;
}
diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h
index fb6792ea..3520396e 100644
--- a/src/rcldb/rcldb.h
+++ b/src/rcldb/rcldb.h
@@ -64,12 +64,22 @@ class Query;
class TermMatchEntry {
public:
TermMatchEntry() : wcf(0) {}
- TermMatchEntry(const string&t, int f) : term(t), wcf(f) {}
+ TermMatchEntry(const string&t, int f, int d) : term(t), wcf(f), docs(d) {}
TermMatchEntry(const string&t) : term(t), wcf(0) {}
bool operator==(const TermMatchEntry &o) { return term == o.term;}
bool operator<(const TermMatchEntry &o) { return term < o.term;}
string term;
- int wcf; // Within collection frequency
+ int wcf; // Total count of occurrences within collection.
+ int docs; // Number of documents countaining term.
+};
+
+class TermMatchResult {
+public:
+ TermMatchResult() {clear();}
+ void clear() {entries.clear(); dbdoccount = 0; dbavgdoclen = 0;}
+ list entries;
+ unsigned int dbdoccount;
+ double dbavgdoclen;
};
/**
@@ -157,7 +167,7 @@ class Db {
* Stem expansion is performed if lang is not empty */
enum MatchType {ET_WILD, ET_REGEXP, ET_STEM};
bool termMatch(MatchType typ, const string &lang, const string &s,
- list& result, int max = -1,
+ TermMatchResult& result, int max = -1,
const string& field = "",
string *prefix = 0
);
@@ -239,7 +249,7 @@ private:
// Reinitialize when adding/removing additional dbs
bool adjustdbs();
bool stemExpand(const string &lang, const string &s,
- list& result, int max = -1);
+ TermMatchResult& result, int max = -1);
/* Copyconst and assignemt private and forbidden */
Db(const Db &) {}
diff --git a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp
index d3f8cca1..9e754f82 100644
--- a/src/rcldb/searchdata.cpp
+++ b/src/rcldb/searchdata.cpp
@@ -347,18 +347,18 @@ void StringToXapianQ::expandTerm(bool nostemexp,
if (prefix)
*prefix = pfx;
} else {
- list l;
+ TermMatchResult res;
if (haswild) {
- m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, l, -1, m_field,
- prefix);
+ m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1,
+ m_field, prefix);
} else {
sterm = term;
m_uterms.push_back(sterm);
- m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, l, -1, m_field,
+ m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_field,
prefix);
}
- for (list::const_iterator it = l.begin();
- it != l.end(); it++) {
+ for (list::const_iterator it = res.entries.begin();
+ it != res.entries.end(); it++) {
exp.push_back(it->term);
}
}