diff --git a/src/qtgui/spell_w.cpp b/src/qtgui/spell_w.cpp index 7cbe5b7d..44bf6dc9 100644 --- a/src/qtgui/spell_w.cpp +++ b/src/qtgui/spell_w.cpp @@ -51,6 +51,11 @@ using std::list; using std::multimap; using std::string; +inline bool wordlessMode(SpellW::comboboxchoice v) +{ + return (v == SpellW::TYPECMB_STATS || v == SpellW::TYPECMB_FAILED); +} + void SpellW::init() { m_c2t.clear(); @@ -64,6 +69,8 @@ void SpellW::init() m_c2t.push_back(TYPECMB_SPELL); expTypeCMB->addItem(tr("Show index statistics")); m_c2t.push_back(TYPECMB_STATS); + expTypeCMB->addItem(tr("List files which could not be indexed (slow)")); + m_c2t.push_back(TYPECMB_FAILED); // Stemming language combobox stemLangCMB->clear(); @@ -74,8 +81,7 @@ void SpellW::init() } for (vector::const_iterator it = langs.begin(); it != langs.end(); it++) { - stemLangCMB-> - addItem(QString::fromUtf8(it->c_str(), it->length())); + stemLangCMB->addItem(u8s2qs(*it)); } (void)new HelpClient(this); @@ -131,7 +137,7 @@ void SpellW::doExpand() // Can't clear qt4 table widget: resets column headers too resTW->setRowCount(0); - if (baseWordLE->text().isEmpty() && mode != TYPECMB_STATS) + if (baseWordLE->text().isEmpty() && !wordlessMode(mode)) return; string reason; @@ -157,7 +163,7 @@ void SpellW::doExpand() Rcl::TermMatchResult res; string expr = string((const char *)baseWordLE->text().toUtf8()); Rcl::DbStats dbs; - rcldb->dbStats(dbs); + rcldb->dbStats(dbs, false); switch (mode) { case TYPECMB_WILD: @@ -199,6 +205,12 @@ void SpellW::doExpand() return; } break; + case TYPECMB_FAILED: + { + showFailed(); + return; + } + break; } if (res.entries.empty()) { @@ -224,15 +236,14 @@ void SpellW::doExpand() for (vector::iterator it = res.entries.begin(); it != res.entries.end(); it++) { - LOGDEB2("SpellW::expand: " << (it->wcf) << " [" << (it->term) << "]\n" ); + LOGDEB2("SpellW::expand: " << it->wcf << " [" << it->term << "]\n"); char num[30]; if (it->wcf) sprintf(num, "%d / %d", it->docs, it->wcf); else num[0] = 0; resTW->setRowCount(row+1); - resTW->setItem(row, 0, - new QTableWidgetItem(QString::fromUtf8(it->term.c_str()))); + resTW->setItem(row, 0, new QTableWidgetItem(u8s2qs(it->term))); resTW->setItem(row++, 1, new QTableWidgetItem(QString::fromUtf8(num))); } @@ -245,7 +256,7 @@ void SpellW::showStats() int row = 0; Rcl::DbStats res; - if (!rcldb->dbStats(res)) { + if (!rcldb->dbStats(res, false)) { LOGERR("SpellW::doExpand:rcldb::dbStats failed\n" ); return; } @@ -323,8 +334,7 @@ void SpellW::showStats() resTW->setItem(row, 0, new QTableWidgetItem(tr("Database directory size"))); resTW->setItem(row++, 1, new QTableWidgetItem( - QString::fromUtf8( - displayableBytes(dbkbytes*1024).c_str()))); + u8s2qs(displayableBytes(dbkbytes*1024)))); vector allmimetypes = theconfig->getAllMimeTypes(); multimap mtbycnt; @@ -350,12 +360,29 @@ void SpellW::showStats() it != mtbycnt.rend(); it++) { resTW->setRowCount(row+1); resTW->setItem(row, 0, new QTableWidgetItem(QString(" ") + - QString::fromUtf8(it->second.c_str()))); + u8s2qs(it->second))); resTW->setItem(row++, 1, new QTableWidgetItem( QString::number(it->first))); } } +void SpellW::showFailed() +{ + statsLBL->setText(""); + int row = 0; + + Rcl::DbStats res; + if (!rcldb->dbStats(res, true)) { + LOGERR("SpellW::doExpand:rcldb::dbStats failed\n" ); + return; + } + for (auto entry : res.failedurls) { + resTW->setRowCount(row+1); + resTW->setItem(row, 0, new QTableWidgetItem(u8s2qs(entry))); + resTW->setItem(row++, 1, new QTableWidgetItem("")); + } +} + void SpellW::wordChanged(const QString &text) { if (text.isEmpty()) { @@ -390,7 +417,7 @@ void SpellW::setMode(comboboxchoice mode) void SpellW::setModeCommon(comboboxchoice mode) { - if (m_prevmode == TYPECMB_STATS) { + if (wordlessMode(m_prevmode)) { baseWordLE->setText(""); } m_prevmode = mode; @@ -412,13 +439,9 @@ void SpellW::setModeCommon(comboboxchoice mode) } else { stemLangCMB->setEnabled(false); } - if (mode == TYPECMB_STATS) + + if (wordlessMode(mode)) { baseWordLE->setEnabled(false); - else - baseWordLE->setEnabled(true); - - - if (mode == TYPECMB_STATS) { QStringList labels(tr("Item")); labels.push_back(tr("Value")); resTW->setHorizontalHeaderLabels(labels); @@ -426,6 +449,7 @@ void SpellW::setModeCommon(comboboxchoice mode) caseSensCB->setEnabled(false); doExpand(); } else { + baseWordLE->setEnabled(true); QStringList labels(tr("Term")); labels.push_back(tr("Doc. / Tot.")); resTW->setHorizontalHeaderLabels(labels); diff --git a/src/qtgui/spell_w.h b/src/qtgui/spell_w.h index ac0a5dd4..dd90d83b 100644 --- a/src/qtgui/spell_w.h +++ b/src/qtgui/spell_w.h @@ -36,7 +36,7 @@ public: virtual bool eventFilter(QObject *target, QEvent *event ); enum comboboxchoice {TYPECMB_NONE, TYPECMB_WILD, TYPECMB_REG, TYPECMB_STEM, - TYPECMB_SPELL, TYPECMB_STATS}; + TYPECMB_SPELL, TYPECMB_STATS, TYPECMB_FAILED}; public slots: virtual void doExpand(); virtual void wordChanged(const QString&); @@ -58,6 +58,7 @@ private: void init(); void copy(); void showStats(); + void showFailed(); int cmbIdx(comboboxchoice mode); void setModeCommon(comboboxchoice mode); }; diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 651abe07..5921dea0 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -2140,7 +2140,7 @@ bool Db::purgeOrphans(const string &udi) return m_ndb->purgeFileWrite(true, udi, uniterm); } -bool Db::dbStats(DbStats& res) +bool Db::dbStats(DbStats& res, bool listfailed) { if (!m_ndb || !m_ndb->m_isopen) return false; @@ -2153,6 +2153,45 @@ bool Db::dbStats(DbStats& res) , xdb, m_reason); if (!m_reason.empty()) return false; + if (!listfailed) { + return true; + } + + // listfailed is set : look for failed docs + string ermsg; + try { + for (unsigned int docid = 1; docid < xdb.get_lastdocid(); docid++) { + try { + Xapian::Document doc = xdb.get_document(docid); + string sig = doc.get_value(VALUE_SIG); + if (sig.empty() || sig[sig.size()-1] != '+') { + continue; + } + string data = doc.get_data(); + ConfSimple parms(data); + if (!parms.ok()) { + } else { + string url, ipath; + parms.get(Doc::keyipt, ipath); + parms.get(Doc::keyurl, url); + // Turn to local url or not? It seems to make more + // sense to keep the original urls as seen by the + // indexer. + // m_config->urlrewrite(dbdir, url); + if (!ipath.empty()) { + url += " | " + ipath; + } + res.failedurls.push_back(url); + } + } catch (Xapian::DocNotFoundError) { + continue; + } + } + } XCATCHERROR(ermsg); + if (!ermsg.empty()) { + LOGERR("Db::dbStats: " << ermsg << "\n"); + return false; + } return true; } diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index ebfcfc74..e7485ad1 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -124,14 +124,13 @@ public: class DbStats { public: DbStats() - :dbdoccount(0), dbavgdoclen(0), mindoclen(0), maxdoclen(0) - { - } + :dbdoccount(0), dbavgdoclen(0), mindoclen(0), maxdoclen(0) { } // Index-wide stats unsigned int dbdoccount; double dbavgdoclen; size_t mindoclen; size_t maxdoclen; + vector failedurls; /* Only set if requested */ }; inline bool has_prefix(const string& trm) @@ -385,7 +384,7 @@ class Db { bool termMatch(int typ_sens, const string &lang, const string &term, TermMatchResult& result, int max = -1, const string& field = "", vector *multiwords = 0); - bool dbStats(DbStats& stats); + bool dbStats(DbStats& stats, bool listFailed); /** Return min and max years for doc mod times in db */ bool maxYearSpan(int *minyear, int *maxyear); /** Return all mime types in index. This can be different from the