From 9086c6e531ef9d3d168c86edd9ce103168c08dbe Mon Sep 17 00:00:00 2001 From: dockes Date: Wed, 12 Apr 2006 10:41:39 +0000 Subject: [PATCH] improve indexing status reporting --- src/index/indexer.cpp | 54 +++++++++++++++++++++++++++++++---------- src/index/indexer.h | 45 +++++++++++++++++++++++----------- src/qtgui/idxthread.cpp | 13 +++++----- src/qtgui/idxthread.h | 5 ++-- src/qtgui/rclmain.cpp | 27 ++++++++++++++++++--- src/qtgui/recollmain.ui | 4 +-- src/rcldb/rcldb.cpp | 15 ++++++++++-- src/rcldb/rcldb.h | 4 ++- 8 files changed, 123 insertions(+), 44 deletions(-) diff --git a/src/index/indexer.cpp b/src/index/indexer.cpp index b89507f2..36f8bb09 100644 --- a/src/index/indexer.cpp +++ b/src/index/indexer.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: indexer.cpp,v 1.30 2006-04-04 13:49:54 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: indexer.cpp,v 1.31 2006-04-12 10:41:39 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -68,6 +68,11 @@ bool DbIndexer::indexDb(bool resetbefore, list *topdirs) if (!init(resetbefore)) return false; + if (m_updater) { + m_updater->status.reset(); + m_updater->status.dbtotdocs = m_db.docCnt(); + } + for (list::const_iterator it = topdirs->begin(); it != topdirs->end(); it++) { LOGDEB(("DbIndexer::index: Indexing %s into %s\n", it->c_str(), @@ -94,6 +99,11 @@ bool DbIndexer::indexDb(bool resetbefore, list *topdirs) return false; } } + if (m_updater) { + m_updater->status.fn.clear(); + m_updater->status.phase = DbIxStatus::DBIXS_PURGE; + m_updater->update(); + } // Get rid of all database entries that don't exist in the // filesystem anymore. @@ -115,11 +125,21 @@ bool DbIndexer::indexDb(bool resetbefore, list *topdirs) m_db.deleteStemDb(*it); } for (it = langs.begin(); it != langs.end(); it++) { + if (m_updater) { + m_updater->status.phase = DbIxStatus::DBIXS_STEMDB; + m_updater->status.fn = *it; + m_updater->update(); + } m_db.createStemDb(*it); } } // The close would be done in our destructor, but we want status here + if (m_updater) { + m_updater->status.phase = DbIxStatus::DBIXS_CLOSING; + m_updater->status.fn.clear(); + m_updater->update(); + } if (!m_db.close()) { LOGERR(("DbIndexer::index: error closing database in %s\n", m_dbdir.c_str())); @@ -200,10 +220,8 @@ FsTreeWalker::Status DbIndexer::processone(const std::string &fn, const struct stat *stp, FsTreeWalker::CbFlag flg) { - if (m_updfunc) { - if (!m_updfunc->update(fn)) { + if (m_updater && !m_updater->update()) { return FsTreeWalker::FtwStop; - } } // If we're changing directories, possibly adjust parameters (set // the current directory in configuration object) @@ -217,15 +235,20 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp, // identification means that, if usesystemfilecommand is switched // from on to off it may happen that some files which are now // without mime type will not be purged from the db, resulting - // into possible 'cannot intern file' messages at query time... + // in possible 'cannot intern file' messages at query time... if (!m_db.needUpdate(fn, stp)) { LOGDEB(("indexfile: up to date: %s\n", fn.c_str())); + if (m_updater) { + m_updater->status.fn = fn; + if (!m_updater->update()) { + return FsTreeWalker::FtwStop; + } + } return FsTreeWalker::FtwOk; } FileInterner interner(fn, m_config, m_tmpdir); FileInterner::Status fis = FileInterner::FIAgain; - int i = 0; while (fis == FileInterner::FIAgain) { Rcl::Doc doc; string ipath; @@ -233,12 +256,6 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp, if (fis == FileInterner::FIError) break; - if (m_updfunc) { - if ((++i % 100) == 0 && !m_updfunc->update(fn+"|"+ipath)) { - return FsTreeWalker::FtwStop; - } - } - // Set the date if this was not done in the document handler if (doc.fmtime.empty()) { char ascdate[20]; @@ -257,6 +274,17 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp, // Do database-specific work to update document data if (!m_db.add(fn, doc, stp)) return FsTreeWalker::FtwError; + + if (m_updater) { + if ((++(m_updater->status.docsdone) % 10) == 0) { + m_updater->status.fn = fn; + if (!ipath.empty()) + m_updater->status.fn += "|" + ipath; + if (!m_updater->update()) { + return FsTreeWalker::FtwStop; + } + } + } } return FsTreeWalker::FtwOk; @@ -340,7 +368,7 @@ bool ConfIndexer::index(bool resetbefore) // cout << *dit << " "; //} //cout << endl; - m_dbindexer = new DbIndexer(m_config, dbit->first, m_updfunc); + m_dbindexer = new DbIndexer(m_config, dbit->first, m_updater); if (!m_dbindexer->indexDb(resetbefore, &dbit->second)) { deleteZ(m_dbindexer); m_reason = "Failed indexing in " + dbit->first; diff --git a/src/index/indexer.h b/src/index/indexer.h index 4a263cf2..afedaf1a 100644 --- a/src/index/indexer.h +++ b/src/index/indexer.h @@ -16,11 +16,16 @@ */ #ifndef _INDEXER_H_INCLUDED_ #define _INDEXER_H_INCLUDED_ -/* @(#$Id: indexer.h,v 1.14 2006-04-04 13:49:54 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: indexer.h,v 1.15 2006-04-12 10:41:39 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include +#ifndef NO_NAMESPACES +using std::string; +using std::list; +#endif + #include "rclconfig.h" #include "fstreewalk.h" #include "rcldb.h" @@ -28,12 +33,24 @@ /* Forward decl for lower level indexing object */ class DbIndexer; -/* Callback to say what we're doing. If the update func returns false, we - * stop */ +class DbIxStatus { + public: + enum Phase {DBIXS_FILES, DBIXS_PURGE, DBIXS_STEMDB, DBIXS_CLOSING}; + Phase phase; + string fn; // Last file processed + int docsdone; // Documents processed + int dbtotdocs; // Doc count in index at start + void reset() {phase = DBIXS_FILES;fn.clear();docsdone=dbtotdocs=0;} + DbIxStatus() {reset();} +}; + +/** Callback to say what we're doing. If the update func returns false, we + * stop as soon as possible without corrupting state */ class DbIxStatusUpdater { public: + DbIxStatus status; virtual ~DbIxStatusUpdater(){} - virtual bool update(const std::string &) = 0; + virtual bool update() = 0; }; /** @@ -49,7 +66,7 @@ class ConfIndexer { public: enum runStatus {IndexerOk, IndexerError}; ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0) - : m_config(cnf), m_dbindexer(0), m_updfunc(updfunc) + : m_config(cnf), m_dbindexer(0), m_updater(updfunc) {} virtual ~ConfIndexer(); /** Worker function: doe the actual indexing */ @@ -58,7 +75,7 @@ class ConfIndexer { private: RclConfig *m_config; DbIndexer *m_dbindexer; // Object to process directories for a given db - DbIxStatusUpdater *m_updfunc; + DbIxStatusUpdater *m_updater; string m_reason; }; @@ -76,10 +93,10 @@ class DbIndexer : public FsTreeWalkerCB { public: /** Constructor does nothing but store parameters */ DbIndexer(RclConfig *cnf, // Configuration data - const std::string &dbd, // Place where the db lives + const string &dbd, // Place where the db lives DbIxStatusUpdater *updfunc = 0 // status updater callback ) - : m_config(cnf), m_dbdir(dbd), m_updfunc(updfunc) { + : m_config(cnf), m_dbdir(dbd), m_updater(updfunc) { } virtual ~DbIndexer(); @@ -94,26 +111,26 @@ class DbIndexer : public FsTreeWalkerCB { When walking is done, we create the stem databases and close the main db. */ - bool indexDb(bool resetbefore, std::list *topdirs); + bool indexDb(bool resetbefore, std::list *topdirs); /** Index a list of files. No db cleaning or stemdb updating */ - bool indexFiles(const std::list &files); + bool indexFiles(const std::list &files); /** Create stem database for given language */ bool createStemDb(const string &lang); /** Tree walker callback method */ FsTreeWalker::Status - processone(const std::string &, const struct stat *, + processone(const string &, const struct stat *, FsTreeWalker::CbFlag); private: FsTreeWalker m_walker; RclConfig *m_config; - std::string m_dbdir; + string m_dbdir; Rcl::Db m_db; - std::string m_tmpdir; - DbIxStatusUpdater *m_updfunc; + string m_tmpdir; + DbIxStatusUpdater *m_updater; bool init(bool rst = false); }; diff --git a/src/qtgui/idxthread.cpp b/src/qtgui/idxthread.cpp index dfae5cdd..4e61ee87 100644 --- a/src/qtgui/idxthread.cpp +++ b/src/qtgui/idxthread.cpp @@ -30,10 +30,10 @@ static QMutex curfile_mutex; class IdxThread : public QThread , public DbIxStatusUpdater { virtual void run(); public: - virtual bool update(const string &fn) { + virtual bool update() { QMutexLocker locker(&curfile_mutex); - m_curfile = fn; - LOGDEB1(("IdxThread::update: indexing %s\n", m_curfile.c_str())); + m_statusSnap = status; + LOGDEB1(("IdxThread::update: indexing %s\n", m_statusSnap.fn.c_str())); if (stopindexing) { stopindexing = 0; return false; @@ -41,7 +41,8 @@ class IdxThread : public QThread , public DbIxStatusUpdater { return true; } ConfIndexer *indexer; - string m_curfile; + // Maintain a copy/snapshot of idx status + DbIxStatus m_statusSnap; int loglevel; }; @@ -97,8 +98,8 @@ void stop_idxthread() idxthread.wait(); } -std::string idxthread_currentfile() +DbIxStatus idxthread_idxStatus() { QMutexLocker locker(&curfile_mutex); - return(idxthread.m_curfile); + return(idxthread.m_statusSnap); } diff --git a/src/qtgui/idxthread.h b/src/qtgui/idxthread.h index 17401028..8b0bc418 100644 --- a/src/qtgui/idxthread.h +++ b/src/qtgui/idxthread.h @@ -16,8 +16,9 @@ */ #ifndef _IDXTHREAD_H_INCLUDED_ #define _IDXTHREAD_H_INCLUDED_ -/* @(#$Id: idxthread.h,v 1.5 2006-04-04 13:49:55 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: idxthread.h,v 1.6 2006-04-12 10:41:39 dockes Exp $ (C) 2004 J.F.Dockes */ #include +#include "indexer.h" class RclConfig; @@ -25,7 +26,7 @@ class RclConfig; // sessions. extern void start_idxthread(const RclConfig& cnf); extern void stop_idxthread(); -extern std::string idxthread_currentfile(); +extern DbIxStatus idxthread_idxStatus(); extern int stopindexing; extern int startindexing; diff --git a/src/qtgui/rclmain.cpp b/src/qtgui/rclmain.cpp index b7284bda..755412f8 100644 --- a/src/qtgui/rclmain.cpp +++ b/src/qtgui/rclmain.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rclmain.cpp,v 1.20 2006-04-12 07:26:17 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rclmain.cpp,v 1.21 2006-04-12 10:41:39 dockes Exp $ (C) 2005 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -219,17 +219,35 @@ void RclMain::periodic100() QString::fromAscii(indexingReason.c_str())); } indexingstatus = IDXTS_NULL; + fileStart_IndexingAction->setEnabled(TRUE); // Make sure we reopen the db to get the results. LOGINFO(("Indexing done: closing query database\n")); rcldb->close(); } else if (indexingdone == 0) { if (toggle == 0) { QString msg = tr("Indexing in progress: "); - string cf = idxthread_currentfile(); + DbIxStatus status = idxthread_idxStatus(); + QString phs; + switch (status.phase) { + case DbIxStatus::DBIXS_FILES: phs=tr("Files");break; + case DbIxStatus::DBIXS_PURGE: phs=tr("Purge");break; + case DbIxStatus::DBIXS_STEMDB: phs=tr("Stemdb");break; + case DbIxStatus::DBIXS_CLOSING:phs=tr("Closing");break; + default: phs=tr("Unknown");break; + } + msg += phs + " "; + if (status.phase == DbIxStatus::DBIXS_FILES) { + char cnts[100]; + if (status.dbtotdocs>0) + sprintf(cnts,"(%d/%d) ",status.docsdone, status.dbtotdocs); + else + sprintf(cnts, "(%d) ", status.docsdone); + msg += QString::fromAscii(cnts) + " "; + } string mf;int ecnt = 0; string fcharset = rclconfig->getDefCharset(true); - if (!transcode(cf, mf, fcharset, "UTF-8", &ecnt) || ecnt) { - mf = url_encode(cf, 0); + if (!transcode(status.fn, mf, fcharset, "UTF-8", &ecnt) || ecnt) { + mf = url_encode(status.fn, 0); } msg += QString::fromUtf8(mf.c_str()); statusBar()->message(msg); @@ -247,6 +265,7 @@ void RclMain::fileStart_IndexingAction_activated() { if (indexingdone) startindexing = 1; + fileStart_IndexingAction->setEnabled(FALSE); } // Note that all our 'urls' are like : file://... diff --git a/src/qtgui/recollmain.ui b/src/qtgui/recollmain.ui index b6adb0bd..1268531d 100644 --- a/src/qtgui/recollmain.ui +++ b/src/qtgui/recollmain.ui @@ -173,10 +173,10 @@ fileStart_IndexingAction - Start Indexing + Update index - Start &Indexing + Update &index diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 7be17daf..e1067f68 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.66 2006-04-12 07:26:16 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.67 2006-04-12 10:41:39 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -275,9 +275,10 @@ bool Db::close() LOGERR(("Db:close: exception while deleting db: %s\n", ermsg)); return false; } + bool Db::reOpen() { - if (m_ndb->m_isopen) { + if (m_ndb && m_ndb->m_isopen) { if (!close()) return false; if (!open(m_ndb->m_basedir, m_ndb->m_mode, m_qOpts)) { @@ -286,6 +287,16 @@ bool Db::reOpen() } return true; } + +int Db::docCnt() +{ + if (m_ndb && m_ndb->m_isopen) { + return m_ndb->m_iswritable ? m_ndb->wdb.get_doccount() : + m_ndb->db.get_doccount(); + } + return -1; +} + bool Db::addQueryDb(const string &dir) { LOGDEB(("Db::addQueryDb: ndb %p iswritable %d db [%s]\n", m_ndb, diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index 7f6329da..f0717bd6 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -16,7 +16,7 @@ */ #ifndef _DB_H_INCLUDED_ #define _DB_H_INCLUDED_ -/* @(#$Id: rcldb.h,v 1.31 2006-04-06 13:08:28 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: rcldb.h,v 1.32 2006-04-12 10:41:39 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include @@ -143,6 +143,8 @@ class Db { bool close(); bool isopen(); + int docCnt(); /// Return total docs in db + // Update-related functions bool add(const string &filename, const Doc &doc, const struct stat *stp); bool needUpdate(const string &filename, const struct stat *stp);