improve indexing status reporting

This commit is contained in:
dockes 2006-04-12 10:41:39 +00:00
parent c9cf0c9460
commit 9086c6e531
8 changed files with 123 additions and 44 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.30 2006-04-04 13:49:54 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.31 2006-04-12 10:41:39 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -68,6 +68,11 @@ bool DbIndexer::indexDb(bool resetbefore, list<string> *topdirs)
if (!init(resetbefore))
return false;
if (m_updater) {
m_updater->status.reset();
m_updater->status.dbtotdocs = m_db.docCnt();
}
for (list<string>::const_iterator it = topdirs->begin();
it != topdirs->end(); it++) {
LOGDEB(("DbIndexer::index: Indexing %s into %s\n", it->c_str(),
@ -94,6 +99,11 @@ bool DbIndexer::indexDb(bool resetbefore, list<string> *topdirs)
return false;
}
}
if (m_updater) {
m_updater->status.fn.clear();
m_updater->status.phase = DbIxStatus::DBIXS_PURGE;
m_updater->update();
}
// Get rid of all database entries that don't exist in the
// filesystem anymore.
@ -115,11 +125,21 @@ bool DbIndexer::indexDb(bool resetbefore, list<string> *topdirs)
m_db.deleteStemDb(*it);
}
for (it = langs.begin(); it != langs.end(); it++) {
if (m_updater) {
m_updater->status.phase = DbIxStatus::DBIXS_STEMDB;
m_updater->status.fn = *it;
m_updater->update();
}
m_db.createStemDb(*it);
}
}
// The close would be done in our destructor, but we want status here
if (m_updater) {
m_updater->status.phase = DbIxStatus::DBIXS_CLOSING;
m_updater->status.fn.clear();
m_updater->update();
}
if (!m_db.close()) {
LOGERR(("DbIndexer::index: error closing database in %s\n",
m_dbdir.c_str()));
@ -200,10 +220,8 @@ FsTreeWalker::Status
DbIndexer::processone(const std::string &fn, const struct stat *stp,
FsTreeWalker::CbFlag flg)
{
if (m_updfunc) {
if (!m_updfunc->update(fn)) {
if (m_updater && !m_updater->update()) {
return FsTreeWalker::FtwStop;
}
}
// If we're changing directories, possibly adjust parameters (set
// the current directory in configuration object)
@ -217,15 +235,20 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
// identification means that, if usesystemfilecommand is switched
// from on to off it may happen that some files which are now
// without mime type will not be purged from the db, resulting
// into possible 'cannot intern file' messages at query time...
// in possible 'cannot intern file' messages at query time...
if (!m_db.needUpdate(fn, stp)) {
LOGDEB(("indexfile: up to date: %s\n", fn.c_str()));
if (m_updater) {
m_updater->status.fn = fn;
if (!m_updater->update()) {
return FsTreeWalker::FtwStop;
}
}
return FsTreeWalker::FtwOk;
}
FileInterner interner(fn, m_config, m_tmpdir);
FileInterner::Status fis = FileInterner::FIAgain;
int i = 0;
while (fis == FileInterner::FIAgain) {
Rcl::Doc doc;
string ipath;
@ -233,12 +256,6 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
if (fis == FileInterner::FIError)
break;
if (m_updfunc) {
if ((++i % 100) == 0 && !m_updfunc->update(fn+"|"+ipath)) {
return FsTreeWalker::FtwStop;
}
}
// Set the date if this was not done in the document handler
if (doc.fmtime.empty()) {
char ascdate[20];
@ -257,6 +274,17 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
// Do database-specific work to update document data
if (!m_db.add(fn, doc, stp))
return FsTreeWalker::FtwError;
if (m_updater) {
if ((++(m_updater->status.docsdone) % 10) == 0) {
m_updater->status.fn = fn;
if (!ipath.empty())
m_updater->status.fn += "|" + ipath;
if (!m_updater->update()) {
return FsTreeWalker::FtwStop;
}
}
}
}
return FsTreeWalker::FtwOk;
@ -340,7 +368,7 @@ bool ConfIndexer::index(bool resetbefore)
// cout << *dit << " ";
//}
//cout << endl;
m_dbindexer = new DbIndexer(m_config, dbit->first, m_updfunc);
m_dbindexer = new DbIndexer(m_config, dbit->first, m_updater);
if (!m_dbindexer->indexDb(resetbefore, &dbit->second)) {
deleteZ(m_dbindexer);
m_reason = "Failed indexing in " + dbit->first;

View File

@ -16,11 +16,16 @@
*/
#ifndef _INDEXER_H_INCLUDED_
#define _INDEXER_H_INCLUDED_
/* @(#$Id: indexer.h,v 1.14 2006-04-04 13:49:54 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: indexer.h,v 1.15 2006-04-12 10:41:39 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include <list>
#ifndef NO_NAMESPACES
using std::string;
using std::list;
#endif
#include "rclconfig.h"
#include "fstreewalk.h"
#include "rcldb.h"
@ -28,12 +33,24 @@
/* Forward decl for lower level indexing object */
class DbIndexer;
/* Callback to say what we're doing. If the update func returns false, we
* stop */
class DbIxStatus {
public:
enum Phase {DBIXS_FILES, DBIXS_PURGE, DBIXS_STEMDB, DBIXS_CLOSING};
Phase phase;
string fn; // Last file processed
int docsdone; // Documents processed
int dbtotdocs; // Doc count in index at start
void reset() {phase = DBIXS_FILES;fn.clear();docsdone=dbtotdocs=0;}
DbIxStatus() {reset();}
};
/** Callback to say what we're doing. If the update func returns false, we
* stop as soon as possible without corrupting state */
class DbIxStatusUpdater {
public:
DbIxStatus status;
virtual ~DbIxStatusUpdater(){}
virtual bool update(const std::string &) = 0;
virtual bool update() = 0;
};
/**
@ -49,7 +66,7 @@ class ConfIndexer {
public:
enum runStatus {IndexerOk, IndexerError};
ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0)
: m_config(cnf), m_dbindexer(0), m_updfunc(updfunc)
: m_config(cnf), m_dbindexer(0), m_updater(updfunc)
{}
virtual ~ConfIndexer();
/** Worker function: doe the actual indexing */
@ -58,7 +75,7 @@ class ConfIndexer {
private:
RclConfig *m_config;
DbIndexer *m_dbindexer; // Object to process directories for a given db
DbIxStatusUpdater *m_updfunc;
DbIxStatusUpdater *m_updater;
string m_reason;
};
@ -76,10 +93,10 @@ class DbIndexer : public FsTreeWalkerCB {
public:
/** Constructor does nothing but store parameters */
DbIndexer(RclConfig *cnf, // Configuration data
const std::string &dbd, // Place where the db lives
const string &dbd, // Place where the db lives
DbIxStatusUpdater *updfunc = 0 // status updater callback
)
: m_config(cnf), m_dbdir(dbd), m_updfunc(updfunc) {
: m_config(cnf), m_dbdir(dbd), m_updater(updfunc) {
}
virtual ~DbIndexer();
@ -94,26 +111,26 @@ class DbIndexer : public FsTreeWalkerCB {
When walking is done, we create the stem databases and close
the main db.
*/
bool indexDb(bool resetbefore, std::list<std::string> *topdirs);
bool indexDb(bool resetbefore, std::list<string> *topdirs);
/** Index a list of files. No db cleaning or stemdb updating */
bool indexFiles(const std::list<std::string> &files);
bool indexFiles(const std::list<string> &files);
/** Create stem database for given language */
bool createStemDb(const string &lang);
/** Tree walker callback method */
FsTreeWalker::Status
processone(const std::string &, const struct stat *,
processone(const string &, const struct stat *,
FsTreeWalker::CbFlag);
private:
FsTreeWalker m_walker;
RclConfig *m_config;
std::string m_dbdir;
string m_dbdir;
Rcl::Db m_db;
std::string m_tmpdir;
DbIxStatusUpdater *m_updfunc;
string m_tmpdir;
DbIxStatusUpdater *m_updater;
bool init(bool rst = false);
};

View File

@ -30,10 +30,10 @@ static QMutex curfile_mutex;
class IdxThread : public QThread , public DbIxStatusUpdater {
virtual void run();
public:
virtual bool update(const string &fn) {
virtual bool update() {
QMutexLocker locker(&curfile_mutex);
m_curfile = fn;
LOGDEB1(("IdxThread::update: indexing %s\n", m_curfile.c_str()));
m_statusSnap = status;
LOGDEB1(("IdxThread::update: indexing %s\n", m_statusSnap.fn.c_str()));
if (stopindexing) {
stopindexing = 0;
return false;
@ -41,7 +41,8 @@ class IdxThread : public QThread , public DbIxStatusUpdater {
return true;
}
ConfIndexer *indexer;
string m_curfile;
// Maintain a copy/snapshot of idx status
DbIxStatus m_statusSnap;
int loglevel;
};
@ -97,8 +98,8 @@ void stop_idxthread()
idxthread.wait();
}
std::string idxthread_currentfile()
DbIxStatus idxthread_idxStatus()
{
QMutexLocker locker(&curfile_mutex);
return(idxthread.m_curfile);
return(idxthread.m_statusSnap);
}

View File

@ -16,8 +16,9 @@
*/
#ifndef _IDXTHREAD_H_INCLUDED_
#define _IDXTHREAD_H_INCLUDED_
/* @(#$Id: idxthread.h,v 1.5 2006-04-04 13:49:55 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: idxthread.h,v 1.6 2006-04-12 10:41:39 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include "indexer.h"
class RclConfig;
@ -25,7 +26,7 @@ class RclConfig;
// sessions.
extern void start_idxthread(const RclConfig& cnf);
extern void stop_idxthread();
extern std::string idxthread_currentfile();
extern DbIxStatus idxthread_idxStatus();
extern int stopindexing;
extern int startindexing;

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rclmain.cpp,v 1.20 2006-04-12 07:26:17 dockes Exp $ (C) 2005 J.F.Dockes";
static char rcsid[] = "@(#$Id: rclmain.cpp,v 1.21 2006-04-12 10:41:39 dockes Exp $ (C) 2005 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -219,17 +219,35 @@ void RclMain::periodic100()
QString::fromAscii(indexingReason.c_str()));
}
indexingstatus = IDXTS_NULL;
fileStart_IndexingAction->setEnabled(TRUE);
// Make sure we reopen the db to get the results.
LOGINFO(("Indexing done: closing query database\n"));
rcldb->close();
} else if (indexingdone == 0) {
if (toggle == 0) {
QString msg = tr("Indexing in progress: ");
string cf = idxthread_currentfile();
DbIxStatus status = idxthread_idxStatus();
QString phs;
switch (status.phase) {
case DbIxStatus::DBIXS_FILES: phs=tr("Files");break;
case DbIxStatus::DBIXS_PURGE: phs=tr("Purge");break;
case DbIxStatus::DBIXS_STEMDB: phs=tr("Stemdb");break;
case DbIxStatus::DBIXS_CLOSING:phs=tr("Closing");break;
default: phs=tr("Unknown");break;
}
msg += phs + " ";
if (status.phase == DbIxStatus::DBIXS_FILES) {
char cnts[100];
if (status.dbtotdocs>0)
sprintf(cnts,"(%d/%d) ",status.docsdone, status.dbtotdocs);
else
sprintf(cnts, "(%d) ", status.docsdone);
msg += QString::fromAscii(cnts) + " ";
}
string mf;int ecnt = 0;
string fcharset = rclconfig->getDefCharset(true);
if (!transcode(cf, mf, fcharset, "UTF-8", &ecnt) || ecnt) {
mf = url_encode(cf, 0);
if (!transcode(status.fn, mf, fcharset, "UTF-8", &ecnt) || ecnt) {
mf = url_encode(status.fn, 0);
}
msg += QString::fromUtf8(mf.c_str());
statusBar()->message(msg);
@ -247,6 +265,7 @@ void RclMain::fileStart_IndexingAction_activated()
{
if (indexingdone)
startindexing = 1;
fileStart_IndexingAction->setEnabled(FALSE);
}
// Note that all our 'urls' are like : file://...

View File

@ -173,10 +173,10 @@
<cstring>fileStart_IndexingAction</cstring>
</property>
<property name="text">
<string>Start Indexing</string>
<string>Update index</string>
</property>
<property name="menuText">
<string>Start &amp;Indexing</string>
<string>Update &amp;index</string>
</property>
</action>
<action>

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.66 2006-04-12 07:26:16 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.67 2006-04-12 10:41:39 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -275,9 +275,10 @@ bool Db::close()
LOGERR(("Db:close: exception while deleting db: %s\n", ermsg));
return false;
}
bool Db::reOpen()
{
if (m_ndb->m_isopen) {
if (m_ndb && m_ndb->m_isopen) {
if (!close())
return false;
if (!open(m_ndb->m_basedir, m_ndb->m_mode, m_qOpts)) {
@ -286,6 +287,16 @@ bool Db::reOpen()
}
return true;
}
int Db::docCnt()
{
if (m_ndb && m_ndb->m_isopen) {
return m_ndb->m_iswritable ? m_ndb->wdb.get_doccount() :
m_ndb->db.get_doccount();
}
return -1;
}
bool Db::addQueryDb(const string &dir)
{
LOGDEB(("Db::addQueryDb: ndb %p iswritable %d db [%s]\n", m_ndb,

View File

@ -16,7 +16,7 @@
*/
#ifndef _DB_H_INCLUDED_
#define _DB_H_INCLUDED_
/* @(#$Id: rcldb.h,v 1.31 2006-04-06 13:08:28 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: rcldb.h,v 1.32 2006-04-12 10:41:39 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include <list>
@ -143,6 +143,8 @@ class Db {
bool close();
bool isopen();
int docCnt(); /// Return total docs in db
// Update-related functions
bool add(const string &filename, const Doc &doc, const struct stat *stp);
bool needUpdate(const string &filename, const struct stat *stp);