From f1b2d3004a20689faae79d6645fadbae5348f451 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Fri, 16 Apr 2021 13:27:42 +0200 Subject: [PATCH] Simplified the indexing status update code by treating the updater as a singleton Add a 'flushing' state for when we commit the Xapian index. --- src/Makefile.am | 31 ++++----- src/index/fsfetcher.cpp | 10 ++- src/index/fsfetcher.h | 3 + src/index/fsindexer.cpp | 72 ++++++-------------- src/index/fsindexer.h | 9 +-- src/index/idxstatus.cpp | 137 +++++++++++++++++++++++++++++++++++++- src/index/idxstatus.h | 29 ++++++-- src/index/indexer.cpp | 36 ++++------ src/index/indexer.h | 40 ++--------- src/index/recollindex.cpp | 102 ++++------------------------ src/index/webqueue.cpp | 12 +--- src/index/webqueue.h | 5 +- src/qtgui/rclm_idx.cpp | 1 + src/rcldb/rcldb.cpp | 7 ++ 14 files changed, 255 insertions(+), 239 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index d9e63035..ad85a81b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -89,8 +89,6 @@ common/unacpp.h \ common/uproplist.h \ common/utf8fn.cpp \ common/utf8fn.h \ -index/webqueue.cpp \ -index/webqueue.h \ index/webqueuefetcher.cpp \ index/webqueuefetcher.h \ index/checkretryfailed.cpp \ @@ -101,8 +99,6 @@ index/fetcher.cpp \ index/fetcher.h \ index/fsfetcher.cpp \ index/fsfetcher.h \ -index/fsindexer.cpp \ -index/fsindexer.h \ index/idxdiags.h \ index/idxdiags.cpp \ index/idxstatus.h \ @@ -273,6 +269,8 @@ utils/utf8iter.h \ utils/wipedir.cpp \ utils/wipedir.h \ utils/workqueue.h \ +utils/x11mon.cpp \ +utils/x11mon.h \ utils/zlibut.cpp \ utils/zlibut.h \ xaposix/safefcntl.h \ @@ -290,7 +288,7 @@ AM_YFLAGS = -d # need it librecoll_la_LDFLAGS = -release $(VERSION) -no-undefined @NO_UNDEF_LINK_FLAG@ -librecoll_la_LIBADD = $(XSLT_LIBS) $(LIBXAPIAN) $(LIBICONV) $(LIBTHREADS) +librecoll_la_LIBADD = $(XSLT_LIBS) $(LIBXAPIAN) $(LIBICONV) $(X_LIBX11) $(LIBTHREADS) # There is probably a better way to do this. The KIO needs to be linked # with librecoll, but librecoll is installed into a non-standard place @@ -319,16 +317,19 @@ if MAKEXADUMP endif recollindex_SOURCES = \ - index/recollindex.cpp \ - index/checkindexed.cpp \ - index/checkindexed.h \ - index/indexer.cpp \ - index/indexer.h \ - index/rclmonprc.cpp \ - index/rclmonrcv.cpp \ - utils/x11mon.cpp \ - utils/x11mon.h -recollindex_LDADD = librecoll.la $(X_LIBX11) + index/checkindexed.cpp \ + index/checkindexed.h \ + index/fsindexer.cpp \ + index/fsindexer.h \ + index/indexer.cpp \ + index/indexer.h \ + index/rclmonprc.cpp \ + index/rclmonrcv.cpp \ + index/recollindex.cpp \ + index/webqueue.cpp \ + index/webqueue.h + +recollindex_LDADD = librecoll.la recollq_SOURCES = query/recollqmain.cpp recollq_LDADD = librecoll.la diff --git a/src/index/fsfetcher.cpp b/src/index/fsfetcher.cpp index e9d6cd6e..389329a7 100644 --- a/src/index/fsfetcher.cpp +++ b/src/index/fsfetcher.cpp @@ -57,14 +57,20 @@ bool FSDocFetcher::fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out) out.data = fn; return true; } - + +void fsmakesig(const struct PathStat *stp, string& out) +{ + out = lltodecstr(stp->pst_size) + + lltodecstr(o_uptodate_test_use_mtime ? stp->pst_mtime : stp->pst_ctime); +} + bool FSDocFetcher::makesig(RclConfig* cnf, const Rcl::Doc& idoc, string& sig) { string fn; struct PathStat st; if (urltopath(cnf, idoc, fn, st) != DocFetcher::FetchOk) return false; - FsIndexer::makesig(&st, sig); + fsmakesig(&st, sig); return true; } diff --git a/src/index/fsfetcher.h b/src/index/fsfetcher.h index f7ebb862..363855a8 100644 --- a/src/index/fsfetcher.h +++ b/src/index/fsfetcher.h @@ -18,6 +18,7 @@ #define _FSFETCHER_H_INCLUDED_ #include "fetcher.h" +#include "pathut.h" /** * The file-system fetcher: @@ -32,4 +33,6 @@ class FSDocFetcher : public DocFetcher{ virtual ~FSDocFetcher() {} }; +extern void fsmakesig(const struct PathStat *stp, std::string& out); + #endif /* _FSFETCHER_H_INCLUDED_ */ diff --git a/src/index/fsindexer.cpp b/src/index/fsindexer.cpp index 62515789..11eaa458 100644 --- a/src/index/fsindexer.cpp +++ b/src/index/fsindexer.cpp @@ -48,6 +48,7 @@ #include "extrameta.h" #include "utf8fn.h" #include "idxdiags.h" +#include "fsfetcher.h" #if defined(HAVE_POSIX_FADVISE) #include #include @@ -103,8 +104,8 @@ public: } }; -FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc) - : m_config(cnf), m_db(db), m_updater(updfunc), +FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db) + : m_config(cnf), m_db(db), m_missing(new FSIFIMissingStore), m_detectxattronly(false), m_noretryfailed(false) #ifdef IDX_THREADS @@ -186,12 +187,7 @@ bool FsIndexer::index(int flags) if (!init()) return false; - if (m_updater) { -#ifdef IDX_THREADS - std::unique_lock locker(m_updater->m_mutex); -#endif - m_updater->status.dbtotdocs = m_db->docCnt(); - } + statusUpdater()->setDbTotDocs(m_db->docCnt()); m_walker.setSkippedPaths(m_config->getSkippedPaths()); if (quickshallow) { @@ -512,12 +508,6 @@ void FsIndexer::setlocalfields(const map& fields, Rcl::Doc& doc) } } -void FsIndexer::makesig(const struct PathStat *stp, string& out) -{ - out = lltodecstr(stp->pst_size) + - lltodecstr(o_uptodate_test_use_mtime ? stp->pst_mtime : stp->pst_ctime); -} - #ifdef IDX_THREADS // Called updworker as seen from here, but the first step (and only in // most meaningful configurations) is doing the word-splitting, which @@ -586,13 +576,8 @@ void *FsIndexerInternfileWorker(void * fsp) FsTreeWalker::Status FsIndexer::processone( const std::string &fn, const struct PathStat *stp, FsTreeWalker::CbFlag flg) { - if (m_updater) { -#ifdef IDX_THREADS - std::unique_lock locker(m_updater->m_mutex); -#endif - if (!m_updater->update()) { - return FsTreeWalker::FtwStop; - } + if (!statusUpdater()->update(DbIxStatus::DBIXS_FILES, fn)) { + return FsTreeWalker::FtwStop; } // If we're changing directories, possibly adjust parameters (set @@ -665,7 +650,7 @@ FsTreeWalker::Status FsIndexer::processonefile( // m/ctime and size and the possibly new value is checked against // the stored one. string sig; - makesig(stp, sig); + fsmakesig(stp, sig); string udi; make_udi(fn, cstr_null, udi); unsigned int existingDoc; @@ -706,16 +691,9 @@ FsTreeWalker::Status FsIndexer::processonefile( if (!needupdate) { LOGDEB0("processone: up to date: " << fn << "\n"); - if (m_updater) { -#ifdef IDX_THREADS - std::unique_lock locker(m_updater->m_mutex); -#endif - // Status bar update, abort request etc. - m_updater->status.fn = fn; - ++(m_updater->status.filesdone); - if (!m_updater->update()) { - return FsTreeWalker::FtwStop; - } + if (!statusUpdater()->update( + DbIxStatus::DBIXS_FILES, fn, DbIxStatusUpdater::IncrFilesDone)) { + return FsTreeWalker::FtwStop; } return FsTreeWalker::FtwOk; } @@ -827,26 +805,20 @@ FsTreeWalker::Status FsIndexer::processonefile( } // Tell what we are doing and check for interrupt request - if (m_updater) { -#ifdef IDX_THREADS - std::unique_lock locker(m_updater->m_mutex); -#endif - ++(m_updater->status.docsdone); - if (m_updater->status.dbtotdocs < m_updater->status.docsdone) - m_updater->status.dbtotdocs = m_updater->status.docsdone; - m_updater->status.fn = fn; - if (!doc.ipath.empty()) { - m_updater->status.fn += "|" + doc.ipath; - } else { - if (fis == FileInterner::FIError) { - ++(m_updater->status.fileerrors); - } - ++(m_updater->status.filesdone); - } - if (!m_updater->update()) { - return FsTreeWalker::FtwStop; + int incr = DbIxStatusUpdater::IncrDocsDone; + std::string sfn(fn); + if (!doc.ipath.empty()) { + sfn += "|" + doc.ipath; + } else { + if (fis == FileInterner::FIError) { + incr |= DbIxStatusUpdater::IncrFileErrors; } + incr |= DbIxStatusUpdater::IncrFilesDone; } + if (!statusUpdater()->update(DbIxStatus::DBIXS_FILES, sfn, incr)) { + return FsTreeWalker::FtwStop; + } + } if (fis == FileInterner::FIError) { diff --git a/src/index/fsindexer.h b/src/index/fsindexer.h index e2a2ab22..261cd8e5 100644 --- a/src/index/fsindexer.h +++ b/src/index/fsindexer.h @@ -26,7 +26,6 @@ #include "workqueue.h" #endif // IDX_THREADS -class DbIxStatusUpdater; class FIMissingStore; struct PathStat; @@ -53,9 +52,8 @@ public: /** Constructor does nothing but store parameters * * @param cnf Configuration data - * @param updfunc Status updater callback */ - FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc = 0); + FsIndexer(RclConfig *cnf, Rcl::Db *db); virtual ~FsIndexer(); /** @@ -77,10 +75,6 @@ public: FsTreeWalker::Status processone(const string &fn, const struct PathStat *, FsTreeWalker::CbFlag); - /** Make signature for file up to date checks */ - static void makesig(const struct PathStat *stp, string& out); - - private: class PurgeCandidateRecorder { @@ -118,7 +112,6 @@ private: RclConfig *m_config; Rcl::Db *m_db; string m_reason; - DbIxStatusUpdater *m_updater; // Top/start directories list std::vector m_tdl; // Store for missing filters and associated mime types diff --git a/src/index/idxstatus.cpp b/src/index/idxstatus.cpp index 0295038f..6b0032cb 100644 --- a/src/index/idxstatus.cpp +++ b/src/index/idxstatus.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2017-2018 J.F.Dockes +/* Copyright (C) 2017-2021 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -16,10 +16,18 @@ */ #include "autoconfig.h" -#include "idxstatus.h" +#include -#include "rclconfig.h" +#include "chrono.h" #include "conftree.h" +#include "idxstatus.h" +#include "log.h" +#include "rclconfig.h" +#include "x11mon.h" + +// Global stop request flag. This is checked in a number of place in the +// indexing routines. +int stopindexing; void readIdxStatus(RclConfig *config, DbIxStatus &status) { @@ -33,3 +41,126 @@ void readIdxStatus(RclConfig *config, DbIxStatus &status) status.totfiles = (int)cs.getInt("totfiles", 0); status.hasmonitor = cs.getBool("hasmonitor", false); } + +// Receive status updates from the ongoing indexing operation +// Also check for an interrupt request and return the info to caller which +// should subsequently orderly terminate what it is doing. +class DbIxStatusUpdater::Internal { +public: +#ifdef IDX_THREADS + std::mutex m_mutex; +#endif + Internal(const RclConfig *config, bool nox11mon) + : m_file(config->getIdxStatusFile().c_str()), m_stopfilename(config->getIdxStopFile()), + nox11monitor(nox11mon) { + // The total number of files included in the index is actually + // difficult to compute from the index itself. For display + // purposes, we save it in the status file from indexing to + // indexing (mostly...) + string stf; + if (m_file.get("totfiles", stf)) { + status.totfiles = atoi(stf.c_str()); + } + } + + virtual bool update() { + if (status.dbtotdocs < status.docsdone) + status.dbtotdocs = status.docsdone; + // Update the status file. Avoid doing it too often. Always do + // it at the end (status DONE) + if (status.phase == DbIxStatus::DBIXS_DONE || + status.phase != m_prevphase || m_chron.millis() > 300) { + if (status.totfiles < status.filesdone || status.phase == DbIxStatus::DBIXS_DONE) { + status.totfiles = status.filesdone; + } + m_prevphase = status.phase; + m_chron.restart(); + m_file.holdWrites(true); + m_file.set("phase", int(status.phase)); + m_file.set("docsdone", status.docsdone); + m_file.set("filesdone", status.filesdone); + m_file.set("fileerrors", status.fileerrors); + m_file.set("dbtotdocs", status.dbtotdocs); + m_file.set("totfiles", status.totfiles); + m_file.set("fn", status.fn); + m_file.set("hasmonitor", status.hasmonitor); + m_file.holdWrites(false); + } + if (path_exists(m_stopfilename)) { + LOGINF("recollindex: asking indexer to stop because " << m_stopfilename << " exists\n"); + path_unlink(m_stopfilename); + stopindexing = true; + } + if (stopindexing) { + return false; + } + +#ifndef DISABLE_X11MON + // If we are in the monitor, we also need to check X11 status + // during the initial indexing pass (else the user could log + // out and the indexing would go on, not good (ie: if the user + // logs in again, the new recollindex will fail). + if (status.hasmonitor && !nox11monitor && !x11IsAlive()) { + LOGDEB("X11 session went away during initial indexing pass\n"); + stopindexing = true; + return false; + } +#endif + return true; + } + + DbIxStatus status; + ConfSimple m_file; + string m_stopfilename; + Chrono m_chron; + bool nox11monitor{false}; + DbIxStatus::Phase m_prevphase{DbIxStatus::DBIXS_NONE}; +}; + + +DbIxStatusUpdater::DbIxStatusUpdater(const RclConfig *config, bool nox11monitor) { + m = new Internal(config, nox11monitor); +} + +void DbIxStatusUpdater::setMonitor(bool onoff) +{ + m->status.hasmonitor = onoff; +} + +void DbIxStatusUpdater::setDbTotDocs(int totdocs) +{ +#ifdef IDX_THREADS + std::unique_lock lock(m->m_mutex); +#endif + m->status.dbtotdocs = totdocs; +} + +bool DbIxStatusUpdater::update(DbIxStatus::Phase phase, const string& fn, int incr) { +#ifdef IDX_THREADS + std::unique_lock lock(m->m_mutex); +#endif + + // We don't change a FLUSH status except if the new status is NONE + // (recollindex init or rcldb after commit(). Else, the flush status maybe + // overwritten by a "file updated" status and not be displayed + if (phase == DbIxStatus::DBIXS_NONE || m->status.phase != DbIxStatus::DBIXS_FLUSH) + m->status.phase = phase; + m->status.fn = fn; + if (incr & IncrDocsDone) + m->status.docsdone++; + if (incr & IncrFilesDone) + m->status.filesdone++; + if (incr & IncrFileErrors) + m->status.fileerrors++; + return m->update(); +} + +static DbIxStatusUpdater *updater; + +DbIxStatusUpdater *statusUpdater(RclConfig *config, bool nox11mon) +{ + if (updater) { + return updater; + } + return (updater = new DbIxStatusUpdater(config, nox11mon)); +} diff --git a/src/index/idxstatus.h b/src/index/idxstatus.h index ad5da14c..9920daed 100644 --- a/src/index/idxstatus.h +++ b/src/index/idxstatus.h @@ -24,10 +24,8 @@ // $RECOLL_CONFDIR/idxstatus.txt class DbIxStatus { public: - enum Phase {DBIXS_NONE, - DBIXS_FILES, DBIXS_PURGE, DBIXS_STEMDB, DBIXS_CLOSING, - DBIXS_MONITOR, - DBIXS_DONE}; + enum Phase {DBIXS_NONE, DBIXS_FILES, DBIXS_FLUSH, DBIXS_PURGE, DBIXS_STEMDB, DBIXS_CLOSING, + DBIXS_MONITOR, DBIXS_DONE}; Phase phase; std::string fn; // Last file processed int docsdone; // Documents actually updated @@ -53,4 +51,27 @@ public: class RclConfig; extern void readIdxStatus(RclConfig *config, DbIxStatus &status); +/** Callback to say what we're doing. If the update func returns false, we + * stop as soon as possible without corrupting state */ +class DbIxStatusUpdater { +public: + DbIxStatusUpdater(const RclConfig *config, bool nox11monitor); + virtual ~DbIxStatusUpdater(){} + + enum Incr {IncrNone, IncrDocsDone = 0x1, IncrFilesDone = 0x2, IncrFileErrors = 0x4}; + // Change phase/fn and update + virtual bool update(DbIxStatus::Phase phase, const std::string& fn, int incr = IncrNone); + + void setMonitor(bool onoff); + void setDbTotDocs(int totdocs); + + class Internal; +private: + Internal *m; +}; + +// We use the updater as a singleton everywhere. It is instanciated in +// idxstatus.cpp. Must be called once with non-null config at first. +extern DbIxStatusUpdater *statusUpdater(RclConfig *config=nullptr, bool nox11monitor=false); + #endif /* _IDXSTATUS_H_INCLUDED_ */ diff --git a/src/index/indexer.cpp b/src/index/indexer.cpp index 8b0aa076..c0e5af03 100644 --- a/src/index/indexer.cpp +++ b/src/index/indexer.cpp @@ -42,10 +42,6 @@ using std::list; using std::string; using std::vector; -// Global stop request flag. This is checked in a number of place in the -// indexing routines. -int stopindexing; - // This would more logically live in recollindex.cpp, but then librecoll would // have an undefined symbol ConfSimple idxreasons; @@ -94,10 +90,8 @@ bool runWebFilesMoverScript(RclConfig *config) } #endif -ConfIndexer::ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc) - : m_config(cnf), m_db(cnf), m_fsindexer(0), - m_doweb(false), m_webindexer(0), - m_updater(updfunc) +ConfIndexer::ConfIndexer(RclConfig *cnf) + : m_config(cnf), m_db(cnf) { m_config->getConfParam("processwebqueue", &m_doweb); } @@ -135,7 +129,7 @@ bool ConfIndexer::firstFsIndexingSequence() { LOGDEB("ConfIndexer::firstFsIndexingSequence\n"); deleteZ(m_fsindexer); - m_fsindexer = new FsIndexer(m_config, &m_db, m_updater); + m_fsindexer = new FsIndexer(m_config, &m_db); if (!m_fsindexer) { return false; } @@ -170,7 +164,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags) firstFsIndexingSequence(); } deleteZ(m_fsindexer); - m_fsindexer = new FsIndexer(m_config, &m_db, m_updater); + m_fsindexer = new FsIndexer(m_config, &m_db); if (!m_fsindexer || !m_fsindexer->index(flags)) { if (stopindexing) { addIdxReason("indexer", "Indexing was interrupted."); @@ -185,7 +179,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags) if (m_doweb && (typestorun & IxTWebQueue)) { runWebFilesMoverScript(m_config); deleteZ(m_webindexer); - m_webindexer = new WebQueueIndexer(m_config, &m_db, m_updater); + m_webindexer = new WebQueueIndexer(m_config, &m_db); if (!m_webindexer || !m_webindexer->index()) { m_db.close(); addIdxReason("indexer", "Web index creation failed. See" + logloc); @@ -196,7 +190,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags) if (typestorun == IxTAll) { // Get rid of all database entries that don't exist in the // filesystem anymore. Only if all *configured* indexers ran. - if (m_updater && !m_updater->update(DbIxStatus::DBIXS_PURGE, "")) { + if (!statusUpdater()->update(DbIxStatus::DBIXS_PURGE, string())) { m_db.close(); addIdxReason("indexer", "Index purge failed. See" + logloc); return false; @@ -207,8 +201,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags) // The close would be done in our destructor, but we want status // here. Makes no sense to check for cancel, we'll have to close // anyway - if (m_updater) - m_updater->update(DbIxStatus::DBIXS_CLOSING, string()); + statusUpdater()->update(DbIxStatus::DBIXS_CLOSING, string()); if (!m_db.close()) { LOGERR("ConfIndexer::index: error closing database in " << m_config->getDbDir() << "\n"); @@ -216,13 +209,13 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags) return false; } - if (m_updater && !m_updater->update(DbIxStatus::DBIXS_CLOSING, string())) + if (!statusUpdater()->update(DbIxStatus::DBIXS_CLOSING, string())) return false; bool ret = true; if (!createStemmingDatabases()) { ret = false; } - if (m_updater && !m_updater->update(DbIxStatus::DBIXS_CLOSING, string())) + if (!statusUpdater()->update(DbIxStatus::DBIXS_CLOSING, string())) return false; // Don't fail indexing because of an aspell issue: we ignore the status. @@ -230,8 +223,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags) (void)createAspellDict(); clearMimeHandlerCache(); - if (m_updater) - m_updater->update(DbIxStatus::DBIXS_DONE, string()); + statusUpdater()->update(DbIxStatus::DBIXS_DONE, string()); return ret; } @@ -252,7 +244,7 @@ bool ConfIndexer::indexFiles(list& ifiles, int flag) m_config->setKeyDir(cstr_null); bool ret = false; if (!m_fsindexer) - m_fsindexer = new FsIndexer(m_config, &m_db, m_updater); + m_fsindexer = new FsIndexer(m_config, &m_db); if (m_fsindexer) ret = m_fsindexer->indexFiles(myfiles, flag); LOGDEB2("ConfIndexer::indexFiles: fsindexer returned " << ret << ", " << @@ -261,7 +253,7 @@ bool ConfIndexer::indexFiles(list& ifiles, int flag) if (m_doweb && !myfiles.empty() && !(flag & IxFNoWeb)) { if (!m_webindexer) - m_webindexer = new WebQueueIndexer(m_config, &m_db, m_updater); + m_webindexer = new WebQueueIndexer(m_config, &m_db); if (m_webindexer) { ret = ret && m_webindexer->indexFiles(myfiles); } else { @@ -313,14 +305,14 @@ bool ConfIndexer::purgeFiles(list &files, int flag) bool ret = false; m_config->setKeyDir(cstr_null); if (!m_fsindexer) - m_fsindexer = new FsIndexer(m_config, &m_db, m_updater); + m_fsindexer = new FsIndexer(m_config, &m_db); if (m_fsindexer) ret = m_fsindexer->purgeFiles(myfiles); #ifndef DISABLE_WEB_INDEXER if (m_doweb && !myfiles.empty() && !(flag & IxFNoWeb)) { if (!m_webindexer) - m_webindexer = new WebQueueIndexer(m_config, &m_db, m_updater); + m_webindexer = new WebQueueIndexer(m_config, &m_db); if (m_webindexer) { ret = ret && m_webindexer->purgeFiles(myfiles); } else { diff --git a/src/index/indexer.h b/src/index/indexer.h index 1404b075..307aeb3d 100644 --- a/src/index/indexer.h +++ b/src/index/indexer.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2004 J.F.Dockes +/* Copyright (C) 2004-2021 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -16,7 +16,6 @@ */ #ifndef _INDEXER_H_INCLUDED_ #define _INDEXER_H_INCLUDED_ -#include "rclconfig.h" #include #include @@ -24,6 +23,7 @@ #include #include +#include "rclconfig.h" #include "rcldb.h" #include "rcldoc.h" #include "idxstatus.h" @@ -31,31 +31,6 @@ class FsIndexer; class WebQueueIndexer; -/** Callback to say what we're doing. If the update func returns false, we - * stop as soon as possible without corrupting state */ -class DbIxStatusUpdater { -public: -#ifdef IDX_THREADS - std::mutex m_mutex; -#endif - DbIxStatus status; - virtual ~DbIxStatusUpdater(){} - - // Convenience: change phase/fn and update - virtual bool update(DbIxStatus::Phase phase, const string& fn) - { -#ifdef IDX_THREADS - std::unique_lock lock(m_mutex); -#endif - status.phase = phase; - status.fn = fn; - return update(); - } - - // To be implemented by user for sending info somewhere - virtual bool update() = 0; -}; - /** * The top level batch indexing object. Processes the configuration, * then invokes file system walking or other to populate/update the @@ -64,7 +39,7 @@ public: class ConfIndexer { public: enum runStatus {IndexerOk, IndexerError}; - ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0); + ConfIndexer(RclConfig *cnf); virtual ~ConfIndexer(); // Indexer types. Maybe we'll have something more dynamic one day @@ -118,11 +93,10 @@ public: private: RclConfig *m_config; Rcl::Db m_db; - FsIndexer *m_fsindexer; - bool m_doweb; - WebQueueIndexer *m_webindexer; - DbIxStatusUpdater *m_updater; - string m_reason; + FsIndexer *m_fsindexer{nullptr}; + bool m_doweb{false}; + WebQueueIndexer *m_webindexer{nullptr}; + string m_reason; // The first time we index, we do things a bit differently to // avoid user frustration (make at least some results available diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp index 743ae5bd..2260adaa 100644 --- a/src/index/recollindex.cpp +++ b/src/index/recollindex.cpp @@ -60,7 +60,6 @@ using namespace std; #endif #include "execmd.h" #include "checkretryfailed.h" -#include "idxstatus.h" #include "circache.h" #include "idxdiags.h" @@ -117,79 +116,6 @@ static void cleanup() recoll_exitready(); } -// Receive status updates from the ongoing indexing operation -// Also check for an interrupt request and return the info to caller which -// should subsequently orderly terminate what it is doing. -class MyUpdater : public DbIxStatusUpdater { -public: - MyUpdater(const RclConfig *config) - : m_file(config->getIdxStatusFile().c_str()), - m_stopfilename(config->getIdxStopFile()), - m_prevphase(DbIxStatus::DBIXS_NONE) { - // The total number of files included in the index is actually - // difficult to compute from the index itself. For display - // purposes, we save it in the status file from indexing to - // indexing (mostly...) - string stf; - if (m_file.get("totfiles", stf)) { - status.totfiles = atoi(stf.c_str()); - } - } - - virtual bool update() { - // Update the status file. Avoid doing it too often. Always do - // it at the end (status DONE) - if (status.phase == DbIxStatus::DBIXS_DONE || - status.phase != m_prevphase || m_chron.millis() > 300) { - if (status.totfiles < status.filesdone || - status.phase == DbIxStatus::DBIXS_DONE) { - status.totfiles = status.filesdone; - } - m_prevphase = status.phase; - m_chron.restart(); - m_file.holdWrites(true); - m_file.set("phase", int(status.phase)); - m_file.set("docsdone", status.docsdone); - m_file.set("filesdone", status.filesdone); - m_file.set("fileerrors", status.fileerrors); - m_file.set("dbtotdocs", status.dbtotdocs); - m_file.set("totfiles", status.totfiles); - m_file.set("fn", status.fn); - m_file.set("hasmonitor", status.hasmonitor); - m_file.holdWrites(false); - } - if (path_exists(m_stopfilename)) { - LOGINF("recollindex: asking indexer to stop because " << - m_stopfilename << " exists\n"); - path_unlink(m_stopfilename); - stopindexing = true; - } - if (stopindexing) { - return false; - } - -#ifndef DISABLE_X11MON - // If we are in the monitor, we also need to check X11 status - // during the initial indexing pass (else the user could log - // out and the indexing would go on, not good (ie: if the user - // logs in again, the new recollindex will fail). - if ((op_flags & OPT_m) && !(op_flags & OPT_x) && !x11IsAlive()) { - LOGDEB("X11 session went away during initial indexing pass\n"); - stopindexing = true; - return false; - } -#endif - return true; - } - -private: - ConfSimple m_file; - string m_stopfilename; - Chrono m_chron; - DbIxStatus::Phase m_prevphase; -}; -static MyUpdater *updater; - // This holds the state of topdirs (exist+nonempty) on indexing // startup. If it changes after a resume from sleep we interrupt the // indexing (the assumption being that a volume has been mounted or @@ -233,7 +159,7 @@ static void sigcleanup(int sig) static void makeIndexerOrExit(RclConfig *config, bool inPlaceReset) { if (!confindexer) { - confindexer = new ConfIndexer(config, updater); + confindexer = new ConfIndexer(config); if (inPlaceReset) confindexer->setInPlaceReset(); } @@ -831,13 +757,20 @@ int main(int argc, char *argv[]) #endif Pidfile pidfile(config->getPidfile()); - updater = new MyUpdater(config); lockorexit(&pidfile, config); // Log something at LOGINFO to reset the trace file. Else at level // 3 it's not even truncated if all docs are up to date. LOGINFO("recollindex: starting up\n"); setMyPriority(config); + + // Init status updater + if (nullptr == statusUpdater(config, op_flags & OPT_x)) { + std::cerr << "Could not initialize status updater\n"; + LOGERR("Could not initialize status updater\n"); + exit(1); + } + statusUpdater->update(DbIxStatus::DBIXS_NONE, ""); if (op_flags & OPT_r) { if (aremain != 1) @@ -899,9 +832,7 @@ int main(int argc, char *argv[]) } else if (op_flags & OPT_m) { if (aremain != 0) Usage(); - if (updater) { - updater->status.hasmonitor = true; - } + statusUpdater()->setMonitor(true); if (!(op_flags&OPT_D)) { LOGDEB("recollindex: daemonizing\n"); #ifndef _WIN32 @@ -960,11 +891,8 @@ int main(int argc, char *argv[]) #endif } - if (updater) { - updater->status.phase = DbIxStatus::DBIXS_MONITOR; - updater->status.fn.clear(); - updater->update(); - } + statusUpdater()->update(DbIxStatus::DBIXS_MONITOR, ""); + int opts = RCLMON_NONE; if (op_flags & OPT_D) opts |= RCLMON_NOFORK; @@ -991,11 +919,7 @@ int main(int argc, char *argv[]) addIdxReason("indexer", confindexer->getReason()); cerr << confindexer->getReason() << endl; } - if (updater) { - updater->status.phase = DbIxStatus::DBIXS_DONE; - updater->status.fn.clear(); - updater->update(); - } + statusUpdater()->update(DbIxStatus::DBIXS_DONE, ""); flushIdxReasons(); return !status; } diff --git a/src/index/webqueue.cpp b/src/index/webqueue.cpp index 1159ba42..f6f63b06 100644 --- a/src/index/webqueue.cpp +++ b/src/index/webqueue.cpp @@ -173,8 +173,8 @@ public: // Initialize. Compute paths and create a temporary directory that will be // used by internfile() -WebQueueIndexer::WebQueueIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc) - : m_config(cnf), m_db(db), m_updater(updfunc) +WebQueueIndexer::WebQueueIndexer(RclConfig *cnf, Rcl::Db *db) + : m_config(cnf), m_db(db) { m_queuedir = m_config->getWebQueueDir(); path_catslash(m_queuedir); @@ -255,13 +255,7 @@ bool WebQueueIndexer::indexFromCache(const string& udi) void WebQueueIndexer::updstatus(const string& udi) { - if (m_updater) { - ++(m_updater->status.docsdone); - if (m_updater->status.dbtotdocs < m_updater->status.docsdone) - m_updater->status.dbtotdocs = m_updater->status.docsdone; - m_updater->status.fn = udi; - m_updater->update(); - } + statusUpdater()->update(DbIxStatus::DBIXS_FILES, udi, DbIxStatusUpdater::IncrDocsDone); } bool WebQueueIndexer::index() diff --git a/src/index/webqueue.h b/src/index/webqueue.h index 279c1dde..bf66f961 100644 --- a/src/index/webqueue.h +++ b/src/index/webqueue.h @@ -30,7 +30,6 @@ #include "fstreewalk.h" #include "rcldoc.h" -class DbIxStatusUpdater; class CirCache; class RclConfig; class WebStore; @@ -40,8 +39,7 @@ class Db; class WebQueueIndexer : public FsTreeWalkerCB { public: - WebQueueIndexer(RclConfig *cnf, Rcl::Db *db, - DbIxStatusUpdater *updfunc = 0); + WebQueueIndexer(RclConfig *cnf, Rcl::Db *db); ~WebQueueIndexer(); /** This is called by the top indexer in recollindex. @@ -69,7 +67,6 @@ private: Rcl::Db *m_db{nullptr}; WebStore *m_cache{nullptr}; std::string m_queuedir; - DbIxStatusUpdater *m_updater{nullptr}; // Don't process the cache. Set by indexFiles(). bool m_nocacheindex{false}; // Config: page erase interval. We normally keep only one diff --git a/src/qtgui/rclm_idx.cpp b/src/qtgui/rclm_idx.cpp index 3aa39aaa..4277e600 100644 --- a/src/qtgui/rclm_idx.cpp +++ b/src/qtgui/rclm_idx.cpp @@ -46,6 +46,7 @@ void RclMain::updateIdxStatus() switch (status.phase) { case DbIxStatus::DBIXS_NONE:phs=tr("None");break; case DbIxStatus::DBIXS_FILES: phs=tr("Updating");break; + case DbIxStatus::DBIXS_FLUSH: phs=tr("Flushing");break; case DbIxStatus::DBIXS_PURGE: phs=tr("Purge");break; case DbIxStatus::DBIXS_STEMDB: phs=tr("Stemdb");break; case DbIxStatus::DBIXS_CLOSING:phs=tr("Closing");break; diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 2dc3af5c..51be718a 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -63,6 +63,7 @@ using namespace std; #include "rclaspell.h" #endif #include "zlibut.h" +#include "idxstatus.h" #ifndef XAPIAN_AT_LEAST // Added in Xapian 1.4.2. Define it here for older versions @@ -2021,8 +2022,10 @@ bool Db::doFlush() } string ermsg; try { + statusUpdater()->update(DbIxStatus::DBIXS_FLUSH, ""); m_ndb->xwdb.commit(); } XCATCHERROR(ermsg); + statusUpdater()->update(DbIxStatus::DBIXS_NONE, ""); if (!ermsg.empty()) { LOGERR("Db::doFlush: flush() failed: " << ermsg << "\n"); return false; @@ -2238,8 +2241,10 @@ bool Db::purge() // because it doesn't really hurt. m_reason.clear(); try { + statusUpdater()->update(DbIxStatus::DBIXS_FLUSH, ""); m_ndb->xwdb.commit(); } XCATCHERROR(m_reason); + statusUpdater()->update(DbIxStatus::DBIXS_NONE, ""); if (!m_reason.empty()) { LOGERR("Db::purge: 1st flush failed: " << m_reason << "\n"); return false; @@ -2286,8 +2291,10 @@ bool Db::purge() m_reason.clear(); try { + statusUpdater()->update(DbIxStatus::DBIXS_FLUSH, ""); m_ndb->xwdb.commit(); } XCATCHERROR(m_reason); + statusUpdater()->update(DbIxStatus::DBIXS_NONE, ""); if (!m_reason.empty()) { LOGERR("Db::purge: 2nd flush failed: " << m_reason << "\n"); return false;