Add function to list failed files to the term explorer
This commit is contained in:
parent
51ac5e8440
commit
503edd5a92
@ -51,6 +51,11 @@ using std::list;
|
|||||||
using std::multimap;
|
using std::multimap;
|
||||||
using std::string;
|
using std::string;
|
||||||
|
|
||||||
|
inline bool wordlessMode(SpellW::comboboxchoice v)
|
||||||
|
{
|
||||||
|
return (v == SpellW::TYPECMB_STATS || v == SpellW::TYPECMB_FAILED);
|
||||||
|
}
|
||||||
|
|
||||||
void SpellW::init()
|
void SpellW::init()
|
||||||
{
|
{
|
||||||
m_c2t.clear();
|
m_c2t.clear();
|
||||||
@ -64,6 +69,8 @@ void SpellW::init()
|
|||||||
m_c2t.push_back(TYPECMB_SPELL);
|
m_c2t.push_back(TYPECMB_SPELL);
|
||||||
expTypeCMB->addItem(tr("Show index statistics"));
|
expTypeCMB->addItem(tr("Show index statistics"));
|
||||||
m_c2t.push_back(TYPECMB_STATS);
|
m_c2t.push_back(TYPECMB_STATS);
|
||||||
|
expTypeCMB->addItem(tr("List files which could not be indexed (slow)"));
|
||||||
|
m_c2t.push_back(TYPECMB_FAILED);
|
||||||
|
|
||||||
// Stemming language combobox
|
// Stemming language combobox
|
||||||
stemLangCMB->clear();
|
stemLangCMB->clear();
|
||||||
@ -74,8 +81,7 @@ void SpellW::init()
|
|||||||
}
|
}
|
||||||
for (vector<string>::const_iterator it = langs.begin();
|
for (vector<string>::const_iterator it = langs.begin();
|
||||||
it != langs.end(); it++) {
|
it != langs.end(); it++) {
|
||||||
stemLangCMB->
|
stemLangCMB->addItem(u8s2qs(*it));
|
||||||
addItem(QString::fromUtf8(it->c_str(), it->length()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
(void)new HelpClient(this);
|
(void)new HelpClient(this);
|
||||||
@ -131,7 +137,7 @@ void SpellW::doExpand()
|
|||||||
|
|
||||||
// Can't clear qt4 table widget: resets column headers too
|
// Can't clear qt4 table widget: resets column headers too
|
||||||
resTW->setRowCount(0);
|
resTW->setRowCount(0);
|
||||||
if (baseWordLE->text().isEmpty() && mode != TYPECMB_STATS)
|
if (baseWordLE->text().isEmpty() && !wordlessMode(mode))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
string reason;
|
string reason;
|
||||||
@ -157,7 +163,7 @@ void SpellW::doExpand()
|
|||||||
Rcl::TermMatchResult res;
|
Rcl::TermMatchResult res;
|
||||||
string expr = string((const char *)baseWordLE->text().toUtf8());
|
string expr = string((const char *)baseWordLE->text().toUtf8());
|
||||||
Rcl::DbStats dbs;
|
Rcl::DbStats dbs;
|
||||||
rcldb->dbStats(dbs);
|
rcldb->dbStats(dbs, false);
|
||||||
|
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
case TYPECMB_WILD:
|
case TYPECMB_WILD:
|
||||||
@ -199,6 +205,12 @@ void SpellW::doExpand()
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case TYPECMB_FAILED:
|
||||||
|
{
|
||||||
|
showFailed();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (res.entries.empty()) {
|
if (res.entries.empty()) {
|
||||||
@ -224,15 +236,14 @@ void SpellW::doExpand()
|
|||||||
|
|
||||||
for (vector<Rcl::TermMatchEntry>::iterator it = res.entries.begin();
|
for (vector<Rcl::TermMatchEntry>::iterator it = res.entries.begin();
|
||||||
it != res.entries.end(); it++) {
|
it != res.entries.end(); it++) {
|
||||||
LOGDEB2("SpellW::expand: " << (it->wcf) << " [" << (it->term) << "]\n" );
|
LOGDEB2("SpellW::expand: " << it->wcf << " [" << it->term << "]\n");
|
||||||
char num[30];
|
char num[30];
|
||||||
if (it->wcf)
|
if (it->wcf)
|
||||||
sprintf(num, "%d / %d", it->docs, it->wcf);
|
sprintf(num, "%d / %d", it->docs, it->wcf);
|
||||||
else
|
else
|
||||||
num[0] = 0;
|
num[0] = 0;
|
||||||
resTW->setRowCount(row+1);
|
resTW->setRowCount(row+1);
|
||||||
resTW->setItem(row, 0,
|
resTW->setItem(row, 0, new QTableWidgetItem(u8s2qs(it->term)));
|
||||||
new QTableWidgetItem(QString::fromUtf8(it->term.c_str())));
|
|
||||||
resTW->setItem(row++, 1,
|
resTW->setItem(row++, 1,
|
||||||
new QTableWidgetItem(QString::fromUtf8(num)));
|
new QTableWidgetItem(QString::fromUtf8(num)));
|
||||||
}
|
}
|
||||||
@ -245,7 +256,7 @@ void SpellW::showStats()
|
|||||||
int row = 0;
|
int row = 0;
|
||||||
|
|
||||||
Rcl::DbStats res;
|
Rcl::DbStats res;
|
||||||
if (!rcldb->dbStats(res)) {
|
if (!rcldb->dbStats(res, false)) {
|
||||||
LOGERR("SpellW::doExpand:rcldb::dbStats failed\n" );
|
LOGERR("SpellW::doExpand:rcldb::dbStats failed\n" );
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -323,8 +334,7 @@ void SpellW::showStats()
|
|||||||
resTW->setItem(row, 0,
|
resTW->setItem(row, 0,
|
||||||
new QTableWidgetItem(tr("Database directory size")));
|
new QTableWidgetItem(tr("Database directory size")));
|
||||||
resTW->setItem(row++, 1, new QTableWidgetItem(
|
resTW->setItem(row++, 1, new QTableWidgetItem(
|
||||||
QString::fromUtf8(
|
u8s2qs(displayableBytes(dbkbytes*1024))));
|
||||||
displayableBytes(dbkbytes*1024).c_str())));
|
|
||||||
|
|
||||||
vector<string> allmimetypes = theconfig->getAllMimeTypes();
|
vector<string> allmimetypes = theconfig->getAllMimeTypes();
|
||||||
multimap<int, string> mtbycnt;
|
multimap<int, string> mtbycnt;
|
||||||
@ -350,12 +360,29 @@ void SpellW::showStats()
|
|||||||
it != mtbycnt.rend(); it++) {
|
it != mtbycnt.rend(); it++) {
|
||||||
resTW->setRowCount(row+1);
|
resTW->setRowCount(row+1);
|
||||||
resTW->setItem(row, 0, new QTableWidgetItem(QString(" ") +
|
resTW->setItem(row, 0, new QTableWidgetItem(QString(" ") +
|
||||||
QString::fromUtf8(it->second.c_str())));
|
u8s2qs(it->second)));
|
||||||
resTW->setItem(row++, 1, new QTableWidgetItem(
|
resTW->setItem(row++, 1, new QTableWidgetItem(
|
||||||
QString::number(it->first)));
|
QString::number(it->first)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SpellW::showFailed()
|
||||||
|
{
|
||||||
|
statsLBL->setText("");
|
||||||
|
int row = 0;
|
||||||
|
|
||||||
|
Rcl::DbStats res;
|
||||||
|
if (!rcldb->dbStats(res, true)) {
|
||||||
|
LOGERR("SpellW::doExpand:rcldb::dbStats failed\n" );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (auto entry : res.failedurls) {
|
||||||
|
resTW->setRowCount(row+1);
|
||||||
|
resTW->setItem(row, 0, new QTableWidgetItem(u8s2qs(entry)));
|
||||||
|
resTW->setItem(row++, 1, new QTableWidgetItem(""));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void SpellW::wordChanged(const QString &text)
|
void SpellW::wordChanged(const QString &text)
|
||||||
{
|
{
|
||||||
if (text.isEmpty()) {
|
if (text.isEmpty()) {
|
||||||
@ -390,7 +417,7 @@ void SpellW::setMode(comboboxchoice mode)
|
|||||||
|
|
||||||
void SpellW::setModeCommon(comboboxchoice mode)
|
void SpellW::setModeCommon(comboboxchoice mode)
|
||||||
{
|
{
|
||||||
if (m_prevmode == TYPECMB_STATS) {
|
if (wordlessMode(m_prevmode)) {
|
||||||
baseWordLE->setText("");
|
baseWordLE->setText("");
|
||||||
}
|
}
|
||||||
m_prevmode = mode;
|
m_prevmode = mode;
|
||||||
@ -412,13 +439,9 @@ void SpellW::setModeCommon(comboboxchoice mode)
|
|||||||
} else {
|
} else {
|
||||||
stemLangCMB->setEnabled(false);
|
stemLangCMB->setEnabled(false);
|
||||||
}
|
}
|
||||||
if (mode == TYPECMB_STATS)
|
|
||||||
|
if (wordlessMode(mode)) {
|
||||||
baseWordLE->setEnabled(false);
|
baseWordLE->setEnabled(false);
|
||||||
else
|
|
||||||
baseWordLE->setEnabled(true);
|
|
||||||
|
|
||||||
|
|
||||||
if (mode == TYPECMB_STATS) {
|
|
||||||
QStringList labels(tr("Item"));
|
QStringList labels(tr("Item"));
|
||||||
labels.push_back(tr("Value"));
|
labels.push_back(tr("Value"));
|
||||||
resTW->setHorizontalHeaderLabels(labels);
|
resTW->setHorizontalHeaderLabels(labels);
|
||||||
@ -426,6 +449,7 @@ void SpellW::setModeCommon(comboboxchoice mode)
|
|||||||
caseSensCB->setEnabled(false);
|
caseSensCB->setEnabled(false);
|
||||||
doExpand();
|
doExpand();
|
||||||
} else {
|
} else {
|
||||||
|
baseWordLE->setEnabled(true);
|
||||||
QStringList labels(tr("Term"));
|
QStringList labels(tr("Term"));
|
||||||
labels.push_back(tr("Doc. / Tot."));
|
labels.push_back(tr("Doc. / Tot."));
|
||||||
resTW->setHorizontalHeaderLabels(labels);
|
resTW->setHorizontalHeaderLabels(labels);
|
||||||
|
|||||||
@ -36,7 +36,7 @@ public:
|
|||||||
virtual bool eventFilter(QObject *target, QEvent *event );
|
virtual bool eventFilter(QObject *target, QEvent *event );
|
||||||
|
|
||||||
enum comboboxchoice {TYPECMB_NONE, TYPECMB_WILD, TYPECMB_REG, TYPECMB_STEM,
|
enum comboboxchoice {TYPECMB_NONE, TYPECMB_WILD, TYPECMB_REG, TYPECMB_STEM,
|
||||||
TYPECMB_SPELL, TYPECMB_STATS};
|
TYPECMB_SPELL, TYPECMB_STATS, TYPECMB_FAILED};
|
||||||
public slots:
|
public slots:
|
||||||
virtual void doExpand();
|
virtual void doExpand();
|
||||||
virtual void wordChanged(const QString&);
|
virtual void wordChanged(const QString&);
|
||||||
@ -58,6 +58,7 @@ private:
|
|||||||
void init();
|
void init();
|
||||||
void copy();
|
void copy();
|
||||||
void showStats();
|
void showStats();
|
||||||
|
void showFailed();
|
||||||
int cmbIdx(comboboxchoice mode);
|
int cmbIdx(comboboxchoice mode);
|
||||||
void setModeCommon(comboboxchoice mode);
|
void setModeCommon(comboboxchoice mode);
|
||||||
};
|
};
|
||||||
|
|||||||
@ -2140,7 +2140,7 @@ bool Db::purgeOrphans(const string &udi)
|
|||||||
return m_ndb->purgeFileWrite(true, udi, uniterm);
|
return m_ndb->purgeFileWrite(true, udi, uniterm);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Db::dbStats(DbStats& res)
|
bool Db::dbStats(DbStats& res, bool listfailed)
|
||||||
{
|
{
|
||||||
if (!m_ndb || !m_ndb->m_isopen)
|
if (!m_ndb || !m_ndb->m_isopen)
|
||||||
return false;
|
return false;
|
||||||
@ -2153,6 +2153,45 @@ bool Db::dbStats(DbStats& res)
|
|||||||
, xdb, m_reason);
|
, xdb, m_reason);
|
||||||
if (!m_reason.empty())
|
if (!m_reason.empty())
|
||||||
return false;
|
return false;
|
||||||
|
if (!listfailed) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// listfailed is set : look for failed docs
|
||||||
|
string ermsg;
|
||||||
|
try {
|
||||||
|
for (unsigned int docid = 1; docid < xdb.get_lastdocid(); docid++) {
|
||||||
|
try {
|
||||||
|
Xapian::Document doc = xdb.get_document(docid);
|
||||||
|
string sig = doc.get_value(VALUE_SIG);
|
||||||
|
if (sig.empty() || sig[sig.size()-1] != '+') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
string data = doc.get_data();
|
||||||
|
ConfSimple parms(data);
|
||||||
|
if (!parms.ok()) {
|
||||||
|
} else {
|
||||||
|
string url, ipath;
|
||||||
|
parms.get(Doc::keyipt, ipath);
|
||||||
|
parms.get(Doc::keyurl, url);
|
||||||
|
// Turn to local url or not? It seems to make more
|
||||||
|
// sense to keep the original urls as seen by the
|
||||||
|
// indexer.
|
||||||
|
// m_config->urlrewrite(dbdir, url);
|
||||||
|
if (!ipath.empty()) {
|
||||||
|
url += " | " + ipath;
|
||||||
|
}
|
||||||
|
res.failedurls.push_back(url);
|
||||||
|
}
|
||||||
|
} catch (Xapian::DocNotFoundError) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} XCATCHERROR(ermsg);
|
||||||
|
if (!ermsg.empty()) {
|
||||||
|
LOGERR("Db::dbStats: " << ermsg << "\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -124,14 +124,13 @@ public:
|
|||||||
class DbStats {
|
class DbStats {
|
||||||
public:
|
public:
|
||||||
DbStats()
|
DbStats()
|
||||||
:dbdoccount(0), dbavgdoclen(0), mindoclen(0), maxdoclen(0)
|
:dbdoccount(0), dbavgdoclen(0), mindoclen(0), maxdoclen(0) { }
|
||||||
{
|
|
||||||
}
|
|
||||||
// Index-wide stats
|
// Index-wide stats
|
||||||
unsigned int dbdoccount;
|
unsigned int dbdoccount;
|
||||||
double dbavgdoclen;
|
double dbavgdoclen;
|
||||||
size_t mindoclen;
|
size_t mindoclen;
|
||||||
size_t maxdoclen;
|
size_t maxdoclen;
|
||||||
|
vector<string> failedurls; /* Only set if requested */
|
||||||
};
|
};
|
||||||
|
|
||||||
inline bool has_prefix(const string& trm)
|
inline bool has_prefix(const string& trm)
|
||||||
@ -385,7 +384,7 @@ class Db {
|
|||||||
bool termMatch(int typ_sens, const string &lang, const string &term,
|
bool termMatch(int typ_sens, const string &lang, const string &term,
|
||||||
TermMatchResult& result, int max = -1,
|
TermMatchResult& result, int max = -1,
|
||||||
const string& field = "", vector<string> *multiwords = 0);
|
const string& field = "", vector<string> *multiwords = 0);
|
||||||
bool dbStats(DbStats& stats);
|
bool dbStats(DbStats& stats, bool listFailed);
|
||||||
/** Return min and max years for doc mod times in db */
|
/** Return min and max years for doc mod times in db */
|
||||||
bool maxYearSpan(int *minyear, int *maxyear);
|
bool maxYearSpan(int *minyear, int *maxyear);
|
||||||
/** Return all mime types in index. This can be different from the
|
/** Return all mime types in index. This can be different from the
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user