diff --git a/src/qt4gui/spell.ui b/src/qt4gui/spell.ui index 06d9256f..6c0b7826 100644 --- a/src/qt4gui/spell.ui +++ b/src/qt4gui/spell.ui @@ -7,7 +7,7 @@ 0 0 400 - 300 + 450 @@ -25,89 +25,105 @@ Term Explorer - + - + - - - - - - 100 - 0 - - - - - - - - false - - - Qt::NoFocus - - - &Expand - - - Alt+E - - - - - - - - - - - - - true - - - Qt::NoFocus - - - &Close - - - Alt+C - - + + + + + + + + 100 + 0 + + + + + + + + false + + + Qt::NoFocus + + + &Expand + + + Alt+E + + + + + + + + + + + + + true + + + Qt::NoFocus + + + &Close + + + Alt+C + + + + + + + + + 7 + + + + No db info. + + + + + + + 2 + + + 2 + + + false + + + false + + + false + + + false + + + + + + + - - - - 2 - - - 2 - - - false - - - false - - - false - - - false - - - - - - - diff --git a/src/qtgui/spell.ui b/src/qtgui/spell.ui index f0dccbcf..bb2eff23 100644 --- a/src/qtgui/spell.ui +++ b/src/qtgui/spell.ui @@ -8,8 +8,8 @@ 0 0 - 298 - 295 + 400 + 450 @@ -106,6 +106,19 @@ + + + statsLBL + + + + 7 + + + + No db info. + + @@ -120,7 +133,7 @@ - Count + Doc. / Tot. true diff --git a/src/qtgui/spell_w.cpp b/src/qtgui/spell_w.cpp index 59ff6245..75d47f5d 100644 --- a/src/qtgui/spell_w.cpp +++ b/src/qtgui/spell_w.cpp @@ -99,7 +99,7 @@ void SpellW::init() suggsLV->setSorting(100, false); #else QStringList labels(tr("Term")); - labels.push_back(tr("Count")); + labels.push_back(tr("Doc. / Tot.")); suggsLV->setHorizontalHeaderLabels(labels); suggsLV->setShowGrid(0); suggsLV->horizontalHeader()->setResizeMode(0, QHeaderView::Stretch); @@ -109,7 +109,7 @@ void SpellW::init() #endif suggsLV->setColumnWidth(0, 200); - suggsLV->setColumnWidth(1, 100); + suggsLV->setColumnWidth(1, 150); } #if (QT_VERSION < 0x040000) @@ -165,18 +165,22 @@ void SpellW::doExpand() case 2:mt = Rcl::Db::ET_STEM; break; } - list entries; + Rcl::TermMatchResult res; switch (expTypeCMB->currentItem()) { case 0: case 1: - case 2: { + case 2: + { string l_stemlang = stemLangCMB->currentText().ascii(); - if (!rcldb->termMatch(mt, l_stemlang, expr, entries, 200)) { + if (!rcldb->termMatch(mt, l_stemlang, expr, res, 200)) { LOGERR(("SpellW::doExpand:rcldb::termMatch failed\n")); return; } + statsLBL->setText(tr("Index: %1 documents, average length %2 terms") + .arg(res.dbdoccount).arg(res.dbavgdoclen, 0, 'f', 1)); } + break; #ifdef RCL_USE_ASPELL @@ -197,13 +201,13 @@ void SpellW::doExpand() } for (list::const_iterator it = suggs.begin(); it != suggs.end(); it++) - entries.push_back(Rcl::TermMatchEntry(*it)); + res.entries.push_back(Rcl::TermMatchEntry(*it)); } #endif } - if (entries.empty()) { + if (res.entries.empty()) { #if (QT_VERSION < 0x040000) new MyListViewItem(suggsLV, tr("No expansion found"), ""); #else @@ -211,17 +215,18 @@ void SpellW::doExpand() #endif } else { #if (QT_VERSION < 0x040000) - for (list::reverse_iterator it = entries.rbegin(); - it != entries.rend(); it++) { + for (list::reverse_iterator it = + res.entries.rbegin(); + it != res.entries.rend(); it++) { #else int row = 0; - for (list::iterator it = entries.begin(); - it != entries.end(); it++) { + for (list::iterator it = res.entries.begin(); + it != res.entries.end(); it++) { #endif LOGDEB(("SpellW::expand: %6d [%s]\n", it->wcf, it->term.c_str())); char num[20]; if (it->wcf) - sprintf(num, "%d", it->wcf); + sprintf(num, "%d / %d", it->docs, it->wcf); else num[0] = 0; #if (QT_VERSION < 0x040000) diff --git a/src/qtgui/ssearch_w.cpp b/src/qtgui/ssearch_w.cpp index a0f24011..5711aac4 100644 --- a/src/qtgui/ssearch_w.cpp +++ b/src/qtgui/ssearch_w.cpp @@ -233,17 +233,17 @@ void SSearch::completion() // Query database const int max = 100; - list strs; + Rcl::TermMatchResult tmres; string stemLang = (const char *)prefs.queryStemLang.ascii(); if (stemLang == "ALL") { rclconfig->getConfParam("indexstemminglanguages", stemLang); } - if (!rcldb->termMatch(Rcl::Db::ET_WILD, stemLang, s, strs, max) || - strs.size() == 0) { + if (!rcldb->termMatch(Rcl::Db::ET_WILD, stemLang, s, tmres, max) || + tmres.entries.size() == 0) { QApplication::beep(); return; } - if (strs.size() == (unsigned int)max) { + if (tmres.entries.size() == (unsigned int)max) { QMessageBox::warning(0, "Recoll", tr("Too many completions")); return; } @@ -251,13 +251,13 @@ void SSearch::completion() // If list from db is single word, insert it, else ask user to select QString res; bool ok = false; - if (strs.size() == 1) { - res = QString::fromUtf8(strs.begin()->term.c_str()); + if (tmres.entries.size() == 1) { + res = QString::fromUtf8(tmres.entries.begin()->term.c_str()); ok = true; } else { QStringList lst; - for (list::iterator it=strs.begin(); - it != strs.end(); it++) { + for (list::iterator it = tmres.entries.begin(); + it != tmres.entries.end(); it++) { lst.push_back(QString::fromUtf8(it->term.c_str())); } res = QInputDialog::getItem(tr("Completions"), diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 1f9f33a5..bcea623a 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1340,11 +1340,11 @@ bool Db::filenameWildExp(const string& fnexp, list& names) } // else let it be LOGDEB(("Rcl::Db::filenameWildExp: pattern: [%s]\n", pattern.c_str())); - list entries; - if (!termMatch(ET_WILD, string(), pattern, entries, 1000, Doc::keyfn)) + TermMatchResult result; + if (!termMatch(ET_WILD, string(), pattern, result, 1000, Doc::keyfn)) return false; - for (list::const_iterator it = entries.begin(); - it != entries.end(); it++) + for (list::const_iterator it = result.entries.begin(); + it != result.entries.end(); it++) names.push_back(it->term); if (names.empty()) { @@ -1375,18 +1375,17 @@ public: }; bool Db::stemExpand(const string &lang, const string &term, - list& result, int max) + TermMatchResult& result, int max) { list dirs = m_extraDbs; dirs.push_front(m_basedir); - for (list::iterator it = dirs.begin(); - it != dirs.end(); it++) { + for (list::iterator it = dirs.begin(); it != dirs.end(); it++) { list more; StemDb::stemExpand(*it, lang, term, more); LOGDEB1(("Db::stemExpand: Got %d from %s\n", more.size(), it->c_str())); - result.insert(result.end(), more.begin(), more.end()); - if (result.size() >= (unsigned int)max) + result.entries.insert(result.entries.end(), more.begin(), more.end()); + if (result.entries.size() >= (unsigned int)max) break; } LOGDEB1(("Db:::stemExpand: final count %d \n", result.size())); @@ -1412,7 +1411,7 @@ const string regSpecChars = "(.[{"; // Find all index terms that match a wildcard or regular expression bool Db::termMatch(MatchType typ, const string &lang, const string &root, - list& res, + TermMatchResult& res, int max, const string& field, string *prefixp @@ -1423,6 +1422,10 @@ bool Db::termMatch(MatchType typ, const string &lang, Xapian::Database xdb = m_ndb->xdb(); res.clear(); + XAPTRY(res.dbdoccount = xdb.get_doccount(); + res.dbavgdoclen = xdb.get_avlength(), xdb, m_reason); + if (!m_reason.empty()) + return false; // Get rid of capitals and accents string droot; @@ -1446,18 +1449,19 @@ bool Db::termMatch(MatchType typ, const string &lang, if (typ == ET_STEM) { if (!stemExpand(lang, root, res, max)) return false; - res.sort(); - res.unique(); - for (list::iterator it = res.begin(); - it != res.end(); it++) { - XAPTRY(it->wcf = xdb.get_collection_freq(it->term), + res.entries.sort(); + res.entries.unique(); + for (list::iterator it = res.entries.begin(); + it != res.entries.end(); it++) { + XAPTRY(it->wcf = xdb.get_collection_freq(it->term); + it->docs = xdb.get_termfreq(it->term), xdb, m_reason); if (!m_reason.empty()) return false; LOGDEB1(("termMatch: %d [%s]\n", it->wcf, it->term.c_str())); } if (!prefix.empty()) - addPrefix(res, prefix); + addPrefix(res.entries, prefix); } else { regex_t reg; int errcode; @@ -1468,7 +1472,7 @@ bool Db::termMatch(MatchType typ, const string &lang, char errbuf[200]; regerror(errcode, ®, errbuf, 199); LOGERR(("termMatch: regcomp failed: %s\n", errbuf)); - res.push_back(string(errbuf)); + res.entries.push_back(string(errbuf)); regfree(®); return false; } @@ -1508,7 +1512,9 @@ bool Db::termMatch(MatchType typ, const string &lang, continue; } // Do we want stem expansion here? We don't do it for now - res.push_back(TermMatchEntry(*it, it.get_termfreq())); + res.entries.push_back(TermMatchEntry(*it, + xdb.get_collection_freq(*it), + it.get_termfreq())); ++n; } m_reason.erase(); @@ -1532,13 +1538,13 @@ bool Db::termMatch(MatchType typ, const string &lang, } TermMatchCmpByTerm tcmp; - res.sort(tcmp); + res.entries.sort(tcmp); TermMatchTermEqual teq; - res.unique(teq); + res.entries.unique(teq); TermMatchCmpByWcf wcmp; - res.sort(wcmp); + res.entries.sort(wcmp); if (max > 0) { - res.resize(MIN(res.size(), (unsigned int)max)); + res.entries.resize(MIN(res.entries.size(), (unsigned int)max)); } return true; } diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index fb6792ea..3520396e 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -64,12 +64,22 @@ class Query; class TermMatchEntry { public: TermMatchEntry() : wcf(0) {} - TermMatchEntry(const string&t, int f) : term(t), wcf(f) {} + TermMatchEntry(const string&t, int f, int d) : term(t), wcf(f), docs(d) {} TermMatchEntry(const string&t) : term(t), wcf(0) {} bool operator==(const TermMatchEntry &o) { return term == o.term;} bool operator<(const TermMatchEntry &o) { return term < o.term;} string term; - int wcf; // Within collection frequency + int wcf; // Total count of occurrences within collection. + int docs; // Number of documents countaining term. +}; + +class TermMatchResult { +public: + TermMatchResult() {clear();} + void clear() {entries.clear(); dbdoccount = 0; dbavgdoclen = 0;} + list entries; + unsigned int dbdoccount; + double dbavgdoclen; }; /** @@ -157,7 +167,7 @@ class Db { * Stem expansion is performed if lang is not empty */ enum MatchType {ET_WILD, ET_REGEXP, ET_STEM}; bool termMatch(MatchType typ, const string &lang, const string &s, - list& result, int max = -1, + TermMatchResult& result, int max = -1, const string& field = "", string *prefix = 0 ); @@ -239,7 +249,7 @@ private: // Reinitialize when adding/removing additional dbs bool adjustdbs(); bool stemExpand(const string &lang, const string &s, - list& result, int max = -1); + TermMatchResult& result, int max = -1); /* Copyconst and assignemt private and forbidden */ Db(const Db &) {} diff --git a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp index d3f8cca1..9e754f82 100644 --- a/src/rcldb/searchdata.cpp +++ b/src/rcldb/searchdata.cpp @@ -347,18 +347,18 @@ void StringToXapianQ::expandTerm(bool nostemexp, if (prefix) *prefix = pfx; } else { - list l; + TermMatchResult res; if (haswild) { - m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, l, -1, m_field, - prefix); + m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1, + m_field, prefix); } else { sterm = term; m_uterms.push_back(sterm); - m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, l, -1, m_field, + m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_field, prefix); } - for (list::const_iterator it = l.begin(); - it != l.end(); it++) { + for (list::const_iterator it = res.entries.begin(); + it != res.entries.end(); it++) { exp.push_back(it->term); } }