improve indexing status reporting

This commit is contained in:
dockes 2006-04-12 10:41:39 +00:00
parent c9cf0c9460
commit 9086c6e531
8 changed files with 123 additions and 44 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.30 2006-04-04 13:49:54 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: indexer.cpp,v 1.31 2006-04-12 10:41:39 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -68,6 +68,11 @@ bool DbIndexer::indexDb(bool resetbefore, list<string> *topdirs)
if (!init(resetbefore)) if (!init(resetbefore))
return false; return false;
if (m_updater) {
m_updater->status.reset();
m_updater->status.dbtotdocs = m_db.docCnt();
}
for (list<string>::const_iterator it = topdirs->begin(); for (list<string>::const_iterator it = topdirs->begin();
it != topdirs->end(); it++) { it != topdirs->end(); it++) {
LOGDEB(("DbIndexer::index: Indexing %s into %s\n", it->c_str(), LOGDEB(("DbIndexer::index: Indexing %s into %s\n", it->c_str(),
@ -94,6 +99,11 @@ bool DbIndexer::indexDb(bool resetbefore, list<string> *topdirs)
return false; return false;
} }
} }
if (m_updater) {
m_updater->status.fn.clear();
m_updater->status.phase = DbIxStatus::DBIXS_PURGE;
m_updater->update();
}
// Get rid of all database entries that don't exist in the // Get rid of all database entries that don't exist in the
// filesystem anymore. // filesystem anymore.
@ -115,11 +125,21 @@ bool DbIndexer::indexDb(bool resetbefore, list<string> *topdirs)
m_db.deleteStemDb(*it); m_db.deleteStemDb(*it);
} }
for (it = langs.begin(); it != langs.end(); it++) { for (it = langs.begin(); it != langs.end(); it++) {
if (m_updater) {
m_updater->status.phase = DbIxStatus::DBIXS_STEMDB;
m_updater->status.fn = *it;
m_updater->update();
}
m_db.createStemDb(*it); m_db.createStemDb(*it);
} }
} }
// The close would be done in our destructor, but we want status here // The close would be done in our destructor, but we want status here
if (m_updater) {
m_updater->status.phase = DbIxStatus::DBIXS_CLOSING;
m_updater->status.fn.clear();
m_updater->update();
}
if (!m_db.close()) { if (!m_db.close()) {
LOGERR(("DbIndexer::index: error closing database in %s\n", LOGERR(("DbIndexer::index: error closing database in %s\n",
m_dbdir.c_str())); m_dbdir.c_str()));
@ -200,10 +220,8 @@ FsTreeWalker::Status
DbIndexer::processone(const std::string &fn, const struct stat *stp, DbIndexer::processone(const std::string &fn, const struct stat *stp,
FsTreeWalker::CbFlag flg) FsTreeWalker::CbFlag flg)
{ {
if (m_updfunc) { if (m_updater && !m_updater->update()) {
if (!m_updfunc->update(fn)) {
return FsTreeWalker::FtwStop; return FsTreeWalker::FtwStop;
}
} }
// If we're changing directories, possibly adjust parameters (set // If we're changing directories, possibly adjust parameters (set
// the current directory in configuration object) // the current directory in configuration object)
@ -217,15 +235,20 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
// identification means that, if usesystemfilecommand is switched // identification means that, if usesystemfilecommand is switched
// from on to off it may happen that some files which are now // from on to off it may happen that some files which are now
// without mime type will not be purged from the db, resulting // without mime type will not be purged from the db, resulting
// into possible 'cannot intern file' messages at query time... // in possible 'cannot intern file' messages at query time...
if (!m_db.needUpdate(fn, stp)) { if (!m_db.needUpdate(fn, stp)) {
LOGDEB(("indexfile: up to date: %s\n", fn.c_str())); LOGDEB(("indexfile: up to date: %s\n", fn.c_str()));
if (m_updater) {
m_updater->status.fn = fn;
if (!m_updater->update()) {
return FsTreeWalker::FtwStop;
}
}
return FsTreeWalker::FtwOk; return FsTreeWalker::FtwOk;
} }
FileInterner interner(fn, m_config, m_tmpdir); FileInterner interner(fn, m_config, m_tmpdir);
FileInterner::Status fis = FileInterner::FIAgain; FileInterner::Status fis = FileInterner::FIAgain;
int i = 0;
while (fis == FileInterner::FIAgain) { while (fis == FileInterner::FIAgain) {
Rcl::Doc doc; Rcl::Doc doc;
string ipath; string ipath;
@ -233,12 +256,6 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
if (fis == FileInterner::FIError) if (fis == FileInterner::FIError)
break; break;
if (m_updfunc) {
if ((++i % 100) == 0 && !m_updfunc->update(fn+"|"+ipath)) {
return FsTreeWalker::FtwStop;
}
}
// Set the date if this was not done in the document handler // Set the date if this was not done in the document handler
if (doc.fmtime.empty()) { if (doc.fmtime.empty()) {
char ascdate[20]; char ascdate[20];
@ -257,6 +274,17 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
// Do database-specific work to update document data // Do database-specific work to update document data
if (!m_db.add(fn, doc, stp)) if (!m_db.add(fn, doc, stp))
return FsTreeWalker::FtwError; return FsTreeWalker::FtwError;
if (m_updater) {
if ((++(m_updater->status.docsdone) % 10) == 0) {
m_updater->status.fn = fn;
if (!ipath.empty())
m_updater->status.fn += "|" + ipath;
if (!m_updater->update()) {
return FsTreeWalker::FtwStop;
}
}
}
} }
return FsTreeWalker::FtwOk; return FsTreeWalker::FtwOk;
@ -340,7 +368,7 @@ bool ConfIndexer::index(bool resetbefore)
// cout << *dit << " "; // cout << *dit << " ";
//} //}
//cout << endl; //cout << endl;
m_dbindexer = new DbIndexer(m_config, dbit->first, m_updfunc); m_dbindexer = new DbIndexer(m_config, dbit->first, m_updater);
if (!m_dbindexer->indexDb(resetbefore, &dbit->second)) { if (!m_dbindexer->indexDb(resetbefore, &dbit->second)) {
deleteZ(m_dbindexer); deleteZ(m_dbindexer);
m_reason = "Failed indexing in " + dbit->first; m_reason = "Failed indexing in " + dbit->first;

View File

@ -16,11 +16,16 @@
*/ */
#ifndef _INDEXER_H_INCLUDED_ #ifndef _INDEXER_H_INCLUDED_
#define _INDEXER_H_INCLUDED_ #define _INDEXER_H_INCLUDED_
/* @(#$Id: indexer.h,v 1.14 2006-04-04 13:49:54 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: indexer.h,v 1.15 2006-04-12 10:41:39 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string> #include <string>
#include <list> #include <list>
#ifndef NO_NAMESPACES
using std::string;
using std::list;
#endif
#include "rclconfig.h" #include "rclconfig.h"
#include "fstreewalk.h" #include "fstreewalk.h"
#include "rcldb.h" #include "rcldb.h"
@ -28,12 +33,24 @@
/* Forward decl for lower level indexing object */ /* Forward decl for lower level indexing object */
class DbIndexer; class DbIndexer;
/* Callback to say what we're doing. If the update func returns false, we class DbIxStatus {
* stop */ public:
enum Phase {DBIXS_FILES, DBIXS_PURGE, DBIXS_STEMDB, DBIXS_CLOSING};
Phase phase;
string fn; // Last file processed
int docsdone; // Documents processed
int dbtotdocs; // Doc count in index at start
void reset() {phase = DBIXS_FILES;fn.clear();docsdone=dbtotdocs=0;}
DbIxStatus() {reset();}
};
/** Callback to say what we're doing. If the update func returns false, we
* stop as soon as possible without corrupting state */
class DbIxStatusUpdater { class DbIxStatusUpdater {
public: public:
DbIxStatus status;
virtual ~DbIxStatusUpdater(){} virtual ~DbIxStatusUpdater(){}
virtual bool update(const std::string &) = 0; virtual bool update() = 0;
}; };
/** /**
@ -49,7 +66,7 @@ class ConfIndexer {
public: public:
enum runStatus {IndexerOk, IndexerError}; enum runStatus {IndexerOk, IndexerError};
ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0) ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0)
: m_config(cnf), m_dbindexer(0), m_updfunc(updfunc) : m_config(cnf), m_dbindexer(0), m_updater(updfunc)
{} {}
virtual ~ConfIndexer(); virtual ~ConfIndexer();
/** Worker function: doe the actual indexing */ /** Worker function: doe the actual indexing */
@ -58,7 +75,7 @@ class ConfIndexer {
private: private:
RclConfig *m_config; RclConfig *m_config;
DbIndexer *m_dbindexer; // Object to process directories for a given db DbIndexer *m_dbindexer; // Object to process directories for a given db
DbIxStatusUpdater *m_updfunc; DbIxStatusUpdater *m_updater;
string m_reason; string m_reason;
}; };
@ -76,10 +93,10 @@ class DbIndexer : public FsTreeWalkerCB {
public: public:
/** Constructor does nothing but store parameters */ /** Constructor does nothing but store parameters */
DbIndexer(RclConfig *cnf, // Configuration data DbIndexer(RclConfig *cnf, // Configuration data
const std::string &dbd, // Place where the db lives const string &dbd, // Place where the db lives
DbIxStatusUpdater *updfunc = 0 // status updater callback DbIxStatusUpdater *updfunc = 0 // status updater callback
) )
: m_config(cnf), m_dbdir(dbd), m_updfunc(updfunc) { : m_config(cnf), m_dbdir(dbd), m_updater(updfunc) {
} }
virtual ~DbIndexer(); virtual ~DbIndexer();
@ -94,26 +111,26 @@ class DbIndexer : public FsTreeWalkerCB {
When walking is done, we create the stem databases and close When walking is done, we create the stem databases and close
the main db. the main db.
*/ */
bool indexDb(bool resetbefore, std::list<std::string> *topdirs); bool indexDb(bool resetbefore, std::list<string> *topdirs);
/** Index a list of files. No db cleaning or stemdb updating */ /** Index a list of files. No db cleaning or stemdb updating */
bool indexFiles(const std::list<std::string> &files); bool indexFiles(const std::list<string> &files);
/** Create stem database for given language */ /** Create stem database for given language */
bool createStemDb(const string &lang); bool createStemDb(const string &lang);
/** Tree walker callback method */ /** Tree walker callback method */
FsTreeWalker::Status FsTreeWalker::Status
processone(const std::string &, const struct stat *, processone(const string &, const struct stat *,
FsTreeWalker::CbFlag); FsTreeWalker::CbFlag);
private: private:
FsTreeWalker m_walker; FsTreeWalker m_walker;
RclConfig *m_config; RclConfig *m_config;
std::string m_dbdir; string m_dbdir;
Rcl::Db m_db; Rcl::Db m_db;
std::string m_tmpdir; string m_tmpdir;
DbIxStatusUpdater *m_updfunc; DbIxStatusUpdater *m_updater;
bool init(bool rst = false); bool init(bool rst = false);
}; };

View File

@ -30,10 +30,10 @@ static QMutex curfile_mutex;
class IdxThread : public QThread , public DbIxStatusUpdater { class IdxThread : public QThread , public DbIxStatusUpdater {
virtual void run(); virtual void run();
public: public:
virtual bool update(const string &fn) { virtual bool update() {
QMutexLocker locker(&curfile_mutex); QMutexLocker locker(&curfile_mutex);
m_curfile = fn; m_statusSnap = status;
LOGDEB1(("IdxThread::update: indexing %s\n", m_curfile.c_str())); LOGDEB1(("IdxThread::update: indexing %s\n", m_statusSnap.fn.c_str()));
if (stopindexing) { if (stopindexing) {
stopindexing = 0; stopindexing = 0;
return false; return false;
@ -41,7 +41,8 @@ class IdxThread : public QThread , public DbIxStatusUpdater {
return true; return true;
} }
ConfIndexer *indexer; ConfIndexer *indexer;
string m_curfile; // Maintain a copy/snapshot of idx status
DbIxStatus m_statusSnap;
int loglevel; int loglevel;
}; };
@ -97,8 +98,8 @@ void stop_idxthread()
idxthread.wait(); idxthread.wait();
} }
std::string idxthread_currentfile() DbIxStatus idxthread_idxStatus()
{ {
QMutexLocker locker(&curfile_mutex); QMutexLocker locker(&curfile_mutex);
return(idxthread.m_curfile); return(idxthread.m_statusSnap);
} }

View File

@ -16,8 +16,9 @@
*/ */
#ifndef _IDXTHREAD_H_INCLUDED_ #ifndef _IDXTHREAD_H_INCLUDED_
#define _IDXTHREAD_H_INCLUDED_ #define _IDXTHREAD_H_INCLUDED_
/* @(#$Id: idxthread.h,v 1.5 2006-04-04 13:49:55 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: idxthread.h,v 1.6 2006-04-12 10:41:39 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string> #include <string>
#include "indexer.h"
class RclConfig; class RclConfig;
@ -25,7 +26,7 @@ class RclConfig;
// sessions. // sessions.
extern void start_idxthread(const RclConfig& cnf); extern void start_idxthread(const RclConfig& cnf);
extern void stop_idxthread(); extern void stop_idxthread();
extern std::string idxthread_currentfile(); extern DbIxStatus idxthread_idxStatus();
extern int stopindexing; extern int stopindexing;
extern int startindexing; extern int startindexing;

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: rclmain.cpp,v 1.20 2006-04-12 07:26:17 dockes Exp $ (C) 2005 J.F.Dockes"; static char rcsid[] = "@(#$Id: rclmain.cpp,v 1.21 2006-04-12 10:41:39 dockes Exp $ (C) 2005 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -219,17 +219,35 @@ void RclMain::periodic100()
QString::fromAscii(indexingReason.c_str())); QString::fromAscii(indexingReason.c_str()));
} }
indexingstatus = IDXTS_NULL; indexingstatus = IDXTS_NULL;
fileStart_IndexingAction->setEnabled(TRUE);
// Make sure we reopen the db to get the results. // Make sure we reopen the db to get the results.
LOGINFO(("Indexing done: closing query database\n")); LOGINFO(("Indexing done: closing query database\n"));
rcldb->close(); rcldb->close();
} else if (indexingdone == 0) { } else if (indexingdone == 0) {
if (toggle == 0) { if (toggle == 0) {
QString msg = tr("Indexing in progress: "); QString msg = tr("Indexing in progress: ");
string cf = idxthread_currentfile(); DbIxStatus status = idxthread_idxStatus();
QString phs;
switch (status.phase) {
case DbIxStatus::DBIXS_FILES: phs=tr("Files");break;
case DbIxStatus::DBIXS_PURGE: phs=tr("Purge");break;
case DbIxStatus::DBIXS_STEMDB: phs=tr("Stemdb");break;
case DbIxStatus::DBIXS_CLOSING:phs=tr("Closing");break;
default: phs=tr("Unknown");break;
}
msg += phs + " ";
if (status.phase == DbIxStatus::DBIXS_FILES) {
char cnts[100];
if (status.dbtotdocs>0)
sprintf(cnts,"(%d/%d) ",status.docsdone, status.dbtotdocs);
else
sprintf(cnts, "(%d) ", status.docsdone);
msg += QString::fromAscii(cnts) + " ";
}
string mf;int ecnt = 0; string mf;int ecnt = 0;
string fcharset = rclconfig->getDefCharset(true); string fcharset = rclconfig->getDefCharset(true);
if (!transcode(cf, mf, fcharset, "UTF-8", &ecnt) || ecnt) { if (!transcode(status.fn, mf, fcharset, "UTF-8", &ecnt) || ecnt) {
mf = url_encode(cf, 0); mf = url_encode(status.fn, 0);
} }
msg += QString::fromUtf8(mf.c_str()); msg += QString::fromUtf8(mf.c_str());
statusBar()->message(msg); statusBar()->message(msg);
@ -247,6 +265,7 @@ void RclMain::fileStart_IndexingAction_activated()
{ {
if (indexingdone) if (indexingdone)
startindexing = 1; startindexing = 1;
fileStart_IndexingAction->setEnabled(FALSE);
} }
// Note that all our 'urls' are like : file://... // Note that all our 'urls' are like : file://...

View File

@ -173,10 +173,10 @@
<cstring>fileStart_IndexingAction</cstring> <cstring>fileStart_IndexingAction</cstring>
</property> </property>
<property name="text"> <property name="text">
<string>Start Indexing</string> <string>Update index</string>
</property> </property>
<property name="menuText"> <property name="menuText">
<string>Start &amp;Indexing</string> <string>Update &amp;index</string>
</property> </property>
</action> </action>
<action> <action>

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.66 2006-04-12 07:26:16 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.67 2006-04-12 10:41:39 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -275,9 +275,10 @@ bool Db::close()
LOGERR(("Db:close: exception while deleting db: %s\n", ermsg)); LOGERR(("Db:close: exception while deleting db: %s\n", ermsg));
return false; return false;
} }
bool Db::reOpen() bool Db::reOpen()
{ {
if (m_ndb->m_isopen) { if (m_ndb && m_ndb->m_isopen) {
if (!close()) if (!close())
return false; return false;
if (!open(m_ndb->m_basedir, m_ndb->m_mode, m_qOpts)) { if (!open(m_ndb->m_basedir, m_ndb->m_mode, m_qOpts)) {
@ -286,6 +287,16 @@ bool Db::reOpen()
} }
return true; return true;
} }
int Db::docCnt()
{
if (m_ndb && m_ndb->m_isopen) {
return m_ndb->m_iswritable ? m_ndb->wdb.get_doccount() :
m_ndb->db.get_doccount();
}
return -1;
}
bool Db::addQueryDb(const string &dir) bool Db::addQueryDb(const string &dir)
{ {
LOGDEB(("Db::addQueryDb: ndb %p iswritable %d db [%s]\n", m_ndb, LOGDEB(("Db::addQueryDb: ndb %p iswritable %d db [%s]\n", m_ndb,

View File

@ -16,7 +16,7 @@
*/ */
#ifndef _DB_H_INCLUDED_ #ifndef _DB_H_INCLUDED_
#define _DB_H_INCLUDED_ #define _DB_H_INCLUDED_
/* @(#$Id: rcldb.h,v 1.31 2006-04-06 13:08:28 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: rcldb.h,v 1.32 2006-04-12 10:41:39 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string> #include <string>
#include <list> #include <list>
@ -143,6 +143,8 @@ class Db {
bool close(); bool close();
bool isopen(); bool isopen();
int docCnt(); /// Return total docs in db
// Update-related functions // Update-related functions
bool add(const string &filename, const Doc &doc, const struct stat *stp); bool add(const string &filename, const Doc &doc, const struct stat *stp);
bool needUpdate(const string &filename, const struct stat *stp); bool needUpdate(const string &filename, const struct stat *stp);