From ecc4db774e723ca86023b1e03bbf0c427803dd6f Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Mon, 24 Apr 2017 09:16:41 +0200 Subject: [PATCH] recollindex: add option to output more detailed explanation of partial or complete failure (e.g.: aspell problem) --- src/index/indexer.cpp | 61 +++++++++++----- src/index/recollindex.cpp | 150 ++++++++++++++++++++++++++------------ src/index/recollindex.h | 10 ++- 3 files changed, 153 insertions(+), 68 deletions(-) diff --git a/src/index/indexer.cpp b/src/index/indexer.cpp index ff336cb3..7de91e42 100644 --- a/src/index/indexer.cpp +++ b/src/index/indexer.cpp @@ -23,6 +23,7 @@ #include "cstr.h" #include "log.h" +#include "recollindex.h" #include "indexer.h" #include "fsindexer.h" #ifndef DISABLE_WEB_INDEXER @@ -60,13 +61,13 @@ bool ConfIndexer::runFirstIndexing() { // Indexing status file existing and not empty ? if (path_filesize(m_config->getIdxStatusFile()) > 0) { - LOGDEB0("ConfIndexer::runFirstIndexing: no: status file not empty\n" ); + LOGDEB0("ConfIndexer::runFirstIndexing: no: status file not empty\n"); return false; } // And only do this if the user has kept the default topdirs (~). vector tdl = m_config->getTopdirs(); if (tdl.size() != 1 || tdl[0].compare(path_canon(path_tildexpand("~")))) { - LOGDEB0("ConfIndexer::runFirstIndexing: no: not home only\n" ); + LOGDEB0("ConfIndexer::runFirstIndexing: no: not home only\n"); return false; } return true; @@ -74,7 +75,7 @@ bool ConfIndexer::runFirstIndexing() bool ConfIndexer::firstFsIndexingSequence() { - LOGDEB("ConfIndexer::firstFsIndexingSequence\n" ); + LOGDEB("ConfIndexer::firstFsIndexingSequence\n"); deleteZ(m_fsindexer); m_fsindexer = new FsIndexer(m_config, &m_db, m_updater); if (!m_fsindexer) { @@ -92,7 +93,9 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags) { Rcl::Db::OpenMode mode = resetbefore ? Rcl::Db::DbTrunc : Rcl::Db::DbUpd; if (!m_db.open(mode)) { - LOGERR("ConfIndexer: error opening database " << (m_config->getDbDir()) << " : " << (m_db.getReason()) << "\n" ); + LOGERR("ConfIndexer: error opening database " << m_config->getDbDir() << + " : " << m_db.getReason() << "\n"); + addIdxReason("indexer", m_db.getReason()); return false; } @@ -104,6 +107,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags) deleteZ(m_fsindexer); m_fsindexer = new FsIndexer(m_config, &m_db, m_updater); if (!m_fsindexer || !m_fsindexer->index(flags)) { + addIdxReason("indexer", "Index creation failed. See log"); m_db.close(); return false; } @@ -114,6 +118,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags) m_beagler = new BeagleQueueIndexer(m_config, &m_db, m_updater); if (!m_beagler || !m_beagler->index()) { m_db.close(); + addIdxReason("indexer", "Web index creation failed. See log"); return false; } } @@ -123,6 +128,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags) // filesystem anymore. Only if all *configured* indexers ran. if (m_updater && !m_updater->update(DbIxStatus::DBIXS_PURGE, string())) { m_db.close(); + addIdxReason("indexer", "Index purge failed. See log"); return false; } m_db.purge(); @@ -134,7 +140,9 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags) if (m_updater) m_updater->update(DbIxStatus::DBIXS_CLOSING, string()); if (!m_db.close()) { - LOGERR("ConfIndexer::index: error closing database in " << (m_config->getDbDir()) << "\n" ); + LOGERR("ConfIndexer::index: error closing database in " << + m_config->getDbDir() << "\n"); + addIdxReason("indexer", "Index close/flush failed. See log"); return false; } @@ -146,7 +154,11 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags) } if (m_updater && !m_updater->update(DbIxStatus::DBIXS_CLOSING, string())) return false; - ret = ret && createAspellDict(); + + // Don't fail indexing because of an aspell issue: we ignore the status. + // Messages were written to the reasons output + (void)createAspellDict(); + clearMimeHandlerCache(); if (m_updater) m_updater->update(DbIxStatus::DBIXS_DONE, string()); @@ -164,7 +176,8 @@ bool ConfIndexer::indexFiles(list& ifiles, int flag) myfiles.sort(); if (!m_db.open(Rcl::Db::DbUpd)) { - LOGERR("ConfIndexer: indexFiles error opening database " << (m_config->getDbDir()) << "\n" ); + LOGERR("ConfIndexer: indexFiles error opening database " << + m_config->getDbDir() << "\n"); return false; } m_config->setKeyDir(cstr_null); @@ -173,7 +186,8 @@ bool ConfIndexer::indexFiles(list& ifiles, int flag) m_fsindexer = new FsIndexer(m_config, &m_db, m_updater); if (m_fsindexer) ret = m_fsindexer->indexFiles(myfiles, flag); - LOGDEB2("ConfIndexer::indexFiles: fsindexer returned " << (ret) << ", " << (myfiles.size()) << " files remainining\n" ); + LOGDEB2("ConfIndexer::indexFiles: fsindexer returned " << ret << ", " << + myfiles.size() << " files remainining\n"); #ifndef DISABLE_WEB_INDEXER if (m_dobeagle && !myfiles.empty() && !(flag & IxFNoWeb)) { @@ -188,7 +202,8 @@ bool ConfIndexer::indexFiles(list& ifiles, int flag) #endif // The close would be done in our destructor, but we want status here if (!m_db.close()) { - LOGERR("ConfIndexer::index: error closing database in " << (m_config->getDbDir()) << "\n" ); + LOGERR("ConfIndexer::index: error closing database in " << + m_config->getDbDir() << "\n"); return false; } ifiles = myfiles; @@ -212,7 +227,8 @@ bool ConfIndexer::docsToPaths(vector &docs, vector &paths) // Filesystem document. The url has to be like file:// if (idoc.url.find(cstr_fileu) != 0) { - LOGERR("idx::docsToPaths: FS backend and non fs url: [" << (idoc.url) << "]\n" ); + LOGERR("idx::docsToPaths: FS backend and non fs url: [" << + idoc.url << "]\n"); continue; } paths.push_back(idoc.url.substr(7, string::npos)); @@ -244,7 +260,8 @@ bool ConfIndexer::purgeFiles(std::list &files, int flag) myfiles.sort(); if (!m_db.open(Rcl::Db::DbUpd)) { - LOGERR("ConfIndexer: purgeFiles error opening database " << (m_config->getDbDir()) << "\n" ); + LOGERR("ConfIndexer: purgeFiles error opening database " << + m_config->getDbDir() << "\n"); return false; } bool ret = false; @@ -268,7 +285,8 @@ bool ConfIndexer::purgeFiles(std::list &files, int flag) // The close would be done in our destructor, but we want status here if (!m_db.close()) { - LOGERR("ConfIndexer::purgefiles: error closing database in " << (m_config->getDbDir()) << "\n" ); + LOGERR("ConfIndexer::purgefiles: error closing database in " << + m_config->getDbDir() << "\n"); return false; } return ret; @@ -282,7 +300,8 @@ bool ConfIndexer::createStemmingDatabases() bool ret = true; if (m_config->getConfParam("indexstemminglanguages", slangs)) { if (!m_db.open(Rcl::Db::DbUpd)) { - LOGERR("ConfIndexer::createStemmingDb: could not open db\n" ); + LOGERR("ConfIndexer::createStemmingDb: could not open db\n"); + addIdxReason("stemming", "could not open db"); return false; } vector langs; @@ -297,6 +316,9 @@ bool ConfIndexer::createStemmingDatabases() m_db.deleteStemDb(*it); } ret = ret && m_db.createStemDbs(langs); + if (!ret) { + addIdxReason("stemming", "stem db creation failed"); + } } m_db.close(); return ret; @@ -315,7 +337,7 @@ bool ConfIndexer::createStemDb(const string &lang) // module, either from a configuration variable or the NLS environment. bool ConfIndexer::createAspellDict() { - LOGDEB2("ConfIndexer::createAspellDict()\n" ); + LOGDEB2("ConfIndexer::createAspellDict()\n"); #ifdef RCL_USE_ASPELL // For the benefit of the real-time indexer, we only initialize // noaspell from the configuration once. It can then be set to @@ -330,20 +352,23 @@ bool ConfIndexer::createAspellDict() return true; if (!m_db.open(Rcl::Db::DbRO)) { - LOGERR("ConfIndexer::createAspellDict: could not open db\n" ); + LOGERR("ConfIndexer::createAspellDict: could not open db\n"); return false; } Aspell aspell(m_config); string reason; if (!aspell.init(reason)) { - LOGERR("ConfIndexer::createAspellDict: aspell init failed: " << (reason) << "\n" ); + LOGERR("ConfIndexer::createAspellDict: aspell init failed: " << + reason << "\n"); noaspell = true; return false; } - LOGDEB("ConfIndexer::createAspellDict: creating dictionary\n" ); + LOGDEB("ConfIndexer::createAspellDict: creating dictionary\n"); if (!aspell.buildDict(m_db, reason)) { - LOGERR("ConfIndexer::createAspellDict: aspell buildDict failed: " << (reason) << "\n" ); + LOGERR("ConfIndexer::createAspellDict: aspell buildDict failed: " << + reason << "\n"); + addIdxReason("aspell", reason); noaspell = true; return false; } diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp index c1a86acd..ec8772c1 100644 --- a/src/index/recollindex.cpp +++ b/src/index/recollindex.cpp @@ -61,27 +61,28 @@ using namespace std; // Command line options static int op_flags; #define OPT_MOINS 0x1 -#define OPT_z 0x2 -#define OPT_h 0x4 -#define OPT_i 0x8 -#define OPT_s 0x10 -#define OPT_c 0x20 -#define OPT_S 0x40 -#define OPT_m 0x80 -#define OPT_D 0x100 -#define OPT_e 0x200 -#define OPT_w 0x400 -#define OPT_x 0x800 -#define OPT_l 0x1000 -#define OPT_b 0x2000 -#define OPT_f 0x4000 -#define OPT_C 0x8000 -#define OPT_Z 0x10000 -#define OPT_n 0x20000 -#define OPT_r 0x40000 -#define OPT_k 0x80000 -#define OPT_E 0x100000 -#define OPT_K 0x200000 +#define OPT_C 0x2 +#define OPT_D 0x4 +#define OPT_E 0x8 +#define OPT_K 0x10 +#define OPT_R 0x20 +#define OPT_S 0x40 +#define OPT_Z 0x80 +#define OPT_b 0x100 +#define OPT_c 0x200 +#define OPT_e 0x400 +#define OPT_f 0x800 +#define OPT_h 0x1000 +#define OPT_i 0x2000 +#define OPT_k 0x4000 +#define OPT_l 0x8000 +#define OPT_m 0x10000 +#define OPT_n 0x20000 +#define OPT_r 0x40000 +#define OPT_s 0x80000 +#define OPT_w 0x100000 +#define OPT_x 0x200000 +#define OPT_z 0x400000 ReExec *o_reexec; // Globals for atexit cleanup @@ -149,7 +150,7 @@ class MyUpdater : public DbIxStatusUpdater { // out and the indexing would go on, not good (ie: if the user // logs in again, the new recollindex will fail). if ((op_flags & OPT_m) && !(op_flags & OPT_x) && !x11IsAlive()) { - LOGDEB("X11 session went away during initial indexing pass\n" ); + LOGDEB("X11 session went away during initial indexing pass\n"); stopindexing = true; return false; } @@ -167,7 +168,7 @@ static MyUpdater *updater; static void sigcleanup(int sig) { fprintf(stderr, "Got signal, registering stop request\n"); - LOGDEB("Got signal, registering stop request\n" ); + LOGDEB("Got signal, registering stop request\n"); CancelCheck::instance().setCancel(); stopindexing = 1; } @@ -302,7 +303,7 @@ static bool checktopdirs(RclConfig *config, vector& nonexist) vector tdl; if (!config->getConfParam("topdirs", &tdl)) { cerr << "No 'topdirs' parameter in configuration\n"; - LOGERR("recollindex:No 'topdirs' parameter in configuration\n" );; + LOGERR("recollindex:No 'topdirs' parameter in configuration\n");; return false; } @@ -311,10 +312,10 @@ static bool checktopdirs(RclConfig *config, vector& nonexist) if (!it->size() || !path_isabsolute(*it)) { if ((*it)[0] == '~') { cerr << "Tilde expansion failed: " << *it << endl; - LOGERR("recollindex: tilde expansion failed: " << *it << "\n" ); + LOGERR("recollindex: tilde expansion failed: " << *it << "\n"); } else { cerr << "Not an absolute path: " << *it << endl; - LOGERR("recollindex: not an absolute path: " << *it << "\n" ); + LOGERR("recollindex: not an absolute path: " << *it << "\n"); } return false; } @@ -404,12 +405,41 @@ void lockorexit(Pidfile *pidfile) } } +static ConfSimple reasons; +static string reasonsfile; +void addIdxReason(string who, string reason) +{ + reason = neutchars(reason, "\r\n"); + if (!reasons.set(who, reason)) { + cerr << "addIdxReason: confsimple set failed\n"; + } +} +static void flushIdxReasons() +{ + if (reasonsfile.empty()) + return; + if (reasonsfile == "stdout") { + reasons.write(cout); + } else if (reasonsfile == "stderr") { + reasons.write(cerr); + } else { + ofstream out; + try { + out.open(reasonsfile, ofstream::out|ofstream::trunc); + reasons.write(out); + } catch (...) { + cerr << "Could not write reasons file " << reasonsfile << endl; + reasons.write(cerr); + } + } +} + int main(int argc, char **argv) { string a_config; int sleepsecs = 60; vector selpatterns; - + // The reexec struct is used by the daemon to shed memory after // the initial indexing pass and to restart when the configuration // changes @@ -449,6 +479,8 @@ int main(int argc, char **argv) selpatterns.push_back(*(++argv)); argc--; goto b1; case 'r': op_flags |= OPT_r; break; + case 'R': op_flags |= OPT_R; if (argc < 2) Usage(); + reasonsfile = *(++argv); argc--; goto b1; case 's': op_flags |= OPT_s; break; #ifdef RCL_USE_ASPELL case 'S': op_flags |= OPT_S; break; @@ -481,11 +513,14 @@ int main(int argc, char **argv) if ((op_flags & OPT_E) && (op_flags & ~(OPT_E|OPT_c))) { Usage(); } + string reason; RclInitFlags flags = (op_flags & OPT_m) && !(op_flags&OPT_D) ? RCLINIT_DAEMON : RCLINIT_IDX; config = recollinit(flags, cleanup, sigcleanup, reason, &a_config); if (config == 0 || !config->ok()) { + addIdxReason("init", reason); + flushIdxReasons(); cerr << "Configuration problem: " << reason << endl; exit(1); } @@ -494,8 +529,11 @@ int main(int argc, char **argv) #endif vector nonexist; - if (!checktopdirs(config, nonexist)) + if (!checktopdirs(config, nonexist)) { + addIdxReason("init", "topdirs not set"); + flushIdxReasons(); exit(1); + } if (nonexist.size()) { ostream& out = (op_flags & OPT_E) ? cout : cerr; @@ -515,14 +553,17 @@ int main(int argc, char **argv) string rundir; config->getConfParam("idxrundir", rundir); if (!rundir.compare("tmp")) { - LOGINFO("recollindex: changing current directory to [" << (tmplocation()) << "]\n" ); + LOGINFO("recollindex: changing current directory to [" << + tmplocation() << "]\n"); if (chdir(tmplocation().c_str()) < 0) { - LOGERR("chdir(" << (tmplocation()) << ") failed, errno " << (errno) << "\n" ); + LOGERR("chdir(" << tmplocation() << ") failed, errno " << errno << + "\n"); } } else if (!rundir.empty()) { - LOGINFO("recollindex: changing current directory to [" << (rundir) << "]\n" ); + LOGINFO("recollindex: changing current directory to [" << rundir << + "]\n"); if (chdir(rundir.c_str()) < 0) { - LOGERR("chdir(" << (rundir) << ") failed, errno " << (errno) << "\n" ); + LOGERR("chdir(" << rundir << ") failed, errno " << errno << "\n"); } } @@ -545,10 +586,10 @@ int main(int argc, char **argv) // Log something at LOGINFO to reset the trace file. Else at level // 3 it's not even truncated if all docs are up to date. - LOGINFO("recollindex: starting up\n" ); + LOGINFO("recollindex: starting up\n"); #ifndef _WIN32 if (setpriority(PRIO_PROCESS, 0, 20) != 0) { - LOGINFO("recollindex: can't setpriority(), errno " << (errno) << "\n" ); + LOGINFO("recollindex: can't setpriority(), errno " << errno << "\n"); } // Try to ionice. This does not work on all platforms rclIxIonice(config); @@ -559,8 +600,11 @@ int main(int argc, char **argv) Usage(); string top = *argv++; argc--; bool status = recursive_index(config, top, selpatterns); - if (confindexer && !confindexer->getReason().empty()) + if (confindexer && !confindexer->getReason().empty()) { + addIdxReason("indexer", confindexer->getReason()); cerr << confindexer->getReason() << endl; + } + flushIdxReasons(); exit(status ? 0 : 1); } else if (op_flags & (OPT_i|OPT_e)) { lockorexit(&pidfile); @@ -591,8 +635,11 @@ int main(int argc, char **argv) if (status && (op_flags & OPT_i)) { status = indexfiles(config, filenames); } - if (confindexer && !confindexer->getReason().empty()) + if (confindexer && !confindexer->getReason().empty()) { + addIdxReason("indexer", confindexer->getReason()); cerr << confindexer->getReason() << endl; + } + flushIdxReasons(); exit(status ? 0 : 1); } else if (op_flags & OPT_l) { if (argc != 0) @@ -620,12 +667,14 @@ int main(int argc, char **argv) Usage(); lockorexit(&pidfile); if (!(op_flags&OPT_D)) { - LOGDEB("recollindex: daemonizing\n" ); + LOGDEB("recollindex: daemonizing\n"); #ifndef _WIN32 if (daemon(0,0) != 0) { - fprintf(stderr, "daemon() failed, errno %d\n", errno); - LOGERR("daemon() failed, errno " << (errno) << "\n" ); - exit(1); + addIdxReason("monitor", "daemon() failed"); + cerr << "daemon() failed, errno " << errno << endl; + LOGERR("daemon() failed, errno " << errno << "\n"); + flushIdxReasons(); + exit(1); } #endif } @@ -635,29 +684,31 @@ int main(int argc, char **argv) // Not too sure if I have to redo the nice thing after daemon(), // can't hurt anyway (easier than testing on all platforms...) if (setpriority(PRIO_PROCESS, 0, 20) != 0) { - LOGINFO("recollindex: can't setpriority(), errno " << (errno) << "\n" ); + LOGINFO("recollindex: can't setpriority(), errno " << errno<< "\n"); } // Try to ionice. This does not work on all platforms rclIxIonice(config); #endif if (sleepsecs > 0) { - LOGDEB("recollindex: sleeping " << (sleepsecs) << "\n" ); + LOGDEB("recollindex: sleeping " << sleepsecs << "\n"); for (int i = 0; i < sleepsecs; i++) { sleep(1); // Check that x11 did not go away while we were sleeping. if (!(op_flags & OPT_x) && !x11IsAlive()) { - LOGDEB("X11 session went away during initial sleep period\n" ); + LOGDEB("X11 session went away during initial sleep period\n"); exit(0); } } } if (!(op_flags & OPT_n)) { makeIndexerOrExit(config, inPlaceReset); - LOGDEB("Recollindex: initial indexing pass before monitoring\n" ); + LOGDEB("Recollindex: initial indexing pass before monitoring\n"); if (!confindexer->index(rezero, ConfIndexer::IxTAll, indexerFlags) || stopindexing) { - LOGERR("recollindex, initial indexing pass failed, not going into monitor mode\n" ); + LOGERR("recollindex, initial indexing pass failed, " + "not going into monitor mode\n"); + flushIdxReasons(); exit(1); } else { // Record success of indexing pass with failed files retries. @@ -668,7 +719,8 @@ int main(int argc, char **argv) deleteZ(confindexer); #ifndef _WIN32 o_reexec->insertArgs(vector(1, "-n")); - LOGINFO("recollindex: reexecuting with -n after initial full pass\n" ); + LOGINFO("recollindex: reexecuting with -n after initial full " + "pass\n"); // Note that -n will be inside the reexec when we come // back, but the monitor will explicitely strip it before // starting a config change exec to ensure that we do a @@ -707,14 +759,16 @@ int main(int argc, char **argv) } if (!status) cerr << "Indexing failed" << endl; - if (!confindexer->getReason().empty()) + if (!confindexer->getReason().empty()) { + addIdxReason("indexer", confindexer->getReason()); cerr << confindexer->getReason() << endl; - + } if (updater) { updater->status.phase = DbIxStatus::DBIXS_DONE; updater->status.fn.clear(); updater->update(); } + flushIdxReasons(); return !status; } } diff --git a/src/index/recollindex.h b/src/index/recollindex.h index b1439746..ba92d7cc 100644 --- a/src/index/recollindex.h +++ b/src/index/recollindex.h @@ -16,15 +16,21 @@ */ #ifndef _recollindex_h_included_ #define _recollindex_h_included_ +#include +#include /** Helper methods in recollindex.cpp for initial checks/setup to index * a list of files (either from the monitor or the command line) */ -extern bool indexfiles(RclConfig *config, list &filenames); -extern bool purgefiles(RclConfig *config, list &filenames); +class RclConfig; +extern bool indexfiles(RclConfig *config, std::list &filenames); +extern bool purgefiles(RclConfig *config, std::list &filenames); extern bool createAuxDbs(RclConfig *config); extern int stopindexing; +// Try to explain what went wrong... +extern void addIdxReason(std::string who, std::string reason); + class ReExec; extern ReExec *o_reexec;