Add function to list failed files to the term explorer

This commit is contained in:
Jean-Francois Dockes 2017-09-03 10:46:11 +02:00
parent 51ac5e8440
commit 503edd5a92
4 changed files with 87 additions and 24 deletions

View File

@ -51,6 +51,11 @@ using std::list;
using std::multimap;
using std::string;
inline bool wordlessMode(SpellW::comboboxchoice v)
{
return (v == SpellW::TYPECMB_STATS || v == SpellW::TYPECMB_FAILED);
}
void SpellW::init()
{
m_c2t.clear();
@ -64,6 +69,8 @@ void SpellW::init()
m_c2t.push_back(TYPECMB_SPELL);
expTypeCMB->addItem(tr("Show index statistics"));
m_c2t.push_back(TYPECMB_STATS);
expTypeCMB->addItem(tr("List files which could not be indexed (slow)"));
m_c2t.push_back(TYPECMB_FAILED);
// Stemming language combobox
stemLangCMB->clear();
@ -74,8 +81,7 @@ void SpellW::init()
}
for (vector<string>::const_iterator it = langs.begin();
it != langs.end(); it++) {
stemLangCMB->
addItem(QString::fromUtf8(it->c_str(), it->length()));
stemLangCMB->addItem(u8s2qs(*it));
}
(void)new HelpClient(this);
@ -131,7 +137,7 @@ void SpellW::doExpand()
// Can't clear qt4 table widget: resets column headers too
resTW->setRowCount(0);
if (baseWordLE->text().isEmpty() && mode != TYPECMB_STATS)
if (baseWordLE->text().isEmpty() && !wordlessMode(mode))
return;
string reason;
@ -157,7 +163,7 @@ void SpellW::doExpand()
Rcl::TermMatchResult res;
string expr = string((const char *)baseWordLE->text().toUtf8());
Rcl::DbStats dbs;
rcldb->dbStats(dbs);
rcldb->dbStats(dbs, false);
switch (mode) {
case TYPECMB_WILD:
@ -199,6 +205,12 @@ void SpellW::doExpand()
return;
}
break;
case TYPECMB_FAILED:
{
showFailed();
return;
}
break;
}
if (res.entries.empty()) {
@ -224,15 +236,14 @@ void SpellW::doExpand()
for (vector<Rcl::TermMatchEntry>::iterator it = res.entries.begin();
it != res.entries.end(); it++) {
LOGDEB2("SpellW::expand: " << (it->wcf) << " [" << (it->term) << "]\n" );
LOGDEB2("SpellW::expand: " << it->wcf << " [" << it->term << "]\n");
char num[30];
if (it->wcf)
sprintf(num, "%d / %d", it->docs, it->wcf);
else
num[0] = 0;
resTW->setRowCount(row+1);
resTW->setItem(row, 0,
new QTableWidgetItem(QString::fromUtf8(it->term.c_str())));
resTW->setItem(row, 0, new QTableWidgetItem(u8s2qs(it->term)));
resTW->setItem(row++, 1,
new QTableWidgetItem(QString::fromUtf8(num)));
}
@ -245,7 +256,7 @@ void SpellW::showStats()
int row = 0;
Rcl::DbStats res;
if (!rcldb->dbStats(res)) {
if (!rcldb->dbStats(res, false)) {
LOGERR("SpellW::doExpand:rcldb::dbStats failed\n" );
return;
}
@ -323,8 +334,7 @@ void SpellW::showStats()
resTW->setItem(row, 0,
new QTableWidgetItem(tr("Database directory size")));
resTW->setItem(row++, 1, new QTableWidgetItem(
QString::fromUtf8(
displayableBytes(dbkbytes*1024).c_str())));
u8s2qs(displayableBytes(dbkbytes*1024))));
vector<string> allmimetypes = theconfig->getAllMimeTypes();
multimap<int, string> mtbycnt;
@ -350,12 +360,29 @@ void SpellW::showStats()
it != mtbycnt.rend(); it++) {
resTW->setRowCount(row+1);
resTW->setItem(row, 0, new QTableWidgetItem(QString(" ") +
QString::fromUtf8(it->second.c_str())));
u8s2qs(it->second)));
resTW->setItem(row++, 1, new QTableWidgetItem(
QString::number(it->first)));
}
}
void SpellW::showFailed()
{
statsLBL->setText("");
int row = 0;
Rcl::DbStats res;
if (!rcldb->dbStats(res, true)) {
LOGERR("SpellW::doExpand:rcldb::dbStats failed\n" );
return;
}
for (auto entry : res.failedurls) {
resTW->setRowCount(row+1);
resTW->setItem(row, 0, new QTableWidgetItem(u8s2qs(entry)));
resTW->setItem(row++, 1, new QTableWidgetItem(""));
}
}
void SpellW::wordChanged(const QString &text)
{
if (text.isEmpty()) {
@ -390,7 +417,7 @@ void SpellW::setMode(comboboxchoice mode)
void SpellW::setModeCommon(comboboxchoice mode)
{
if (m_prevmode == TYPECMB_STATS) {
if (wordlessMode(m_prevmode)) {
baseWordLE->setText("");
}
m_prevmode = mode;
@ -412,13 +439,9 @@ void SpellW::setModeCommon(comboboxchoice mode)
} else {
stemLangCMB->setEnabled(false);
}
if (mode == TYPECMB_STATS)
if (wordlessMode(mode)) {
baseWordLE->setEnabled(false);
else
baseWordLE->setEnabled(true);
if (mode == TYPECMB_STATS) {
QStringList labels(tr("Item"));
labels.push_back(tr("Value"));
resTW->setHorizontalHeaderLabels(labels);
@ -426,6 +449,7 @@ void SpellW::setModeCommon(comboboxchoice mode)
caseSensCB->setEnabled(false);
doExpand();
} else {
baseWordLE->setEnabled(true);
QStringList labels(tr("Term"));
labels.push_back(tr("Doc. / Tot."));
resTW->setHorizontalHeaderLabels(labels);

View File

@ -36,7 +36,7 @@ public:
virtual bool eventFilter(QObject *target, QEvent *event );
enum comboboxchoice {TYPECMB_NONE, TYPECMB_WILD, TYPECMB_REG, TYPECMB_STEM,
TYPECMB_SPELL, TYPECMB_STATS};
TYPECMB_SPELL, TYPECMB_STATS, TYPECMB_FAILED};
public slots:
virtual void doExpand();
virtual void wordChanged(const QString&);
@ -58,6 +58,7 @@ private:
void init();
void copy();
void showStats();
void showFailed();
int cmbIdx(comboboxchoice mode);
void setModeCommon(comboboxchoice mode);
};

View File

@ -2140,7 +2140,7 @@ bool Db::purgeOrphans(const string &udi)
return m_ndb->purgeFileWrite(true, udi, uniterm);
}
bool Db::dbStats(DbStats& res)
bool Db::dbStats(DbStats& res, bool listfailed)
{
if (!m_ndb || !m_ndb->m_isopen)
return false;
@ -2153,6 +2153,45 @@ bool Db::dbStats(DbStats& res)
, xdb, m_reason);
if (!m_reason.empty())
return false;
if (!listfailed) {
return true;
}
// listfailed is set : look for failed docs
string ermsg;
try {
for (unsigned int docid = 1; docid < xdb.get_lastdocid(); docid++) {
try {
Xapian::Document doc = xdb.get_document(docid);
string sig = doc.get_value(VALUE_SIG);
if (sig.empty() || sig[sig.size()-1] != '+') {
continue;
}
string data = doc.get_data();
ConfSimple parms(data);
if (!parms.ok()) {
} else {
string url, ipath;
parms.get(Doc::keyipt, ipath);
parms.get(Doc::keyurl, url);
// Turn to local url or not? It seems to make more
// sense to keep the original urls as seen by the
// indexer.
// m_config->urlrewrite(dbdir, url);
if (!ipath.empty()) {
url += " | " + ipath;
}
res.failedurls.push_back(url);
}
} catch (Xapian::DocNotFoundError) {
continue;
}
}
} XCATCHERROR(ermsg);
if (!ermsg.empty()) {
LOGERR("Db::dbStats: " << ermsg << "\n");
return false;
}
return true;
}

View File

@ -124,14 +124,13 @@ public:
class DbStats {
public:
DbStats()
:dbdoccount(0), dbavgdoclen(0), mindoclen(0), maxdoclen(0)
{
}
:dbdoccount(0), dbavgdoclen(0), mindoclen(0), maxdoclen(0) { }
// Index-wide stats
unsigned int dbdoccount;
double dbavgdoclen;
size_t mindoclen;
size_t maxdoclen;
vector<string> failedurls; /* Only set if requested */
};
inline bool has_prefix(const string& trm)
@ -385,7 +384,7 @@ class Db {
bool termMatch(int typ_sens, const string &lang, const string &term,
TermMatchResult& result, int max = -1,
const string& field = "", vector<string> *multiwords = 0);
bool dbStats(DbStats& stats);
bool dbStats(DbStats& stats, bool listFailed);
/** Return min and max years for doc mod times in db */
bool maxYearSpan(int *minyear, int *maxyear);
/** Return all mime types in index. This can be different from the