diff --git a/src/index/indexer.h b/src/index/indexer.h index feb81fb5..735d3942 100644 --- a/src/index/indexer.h +++ b/src/index/indexer.h @@ -119,6 +119,8 @@ class ConfIndexer { /** Purge a list of files. */ bool purgeFiles(list &files); + /** Set in place reset mode */ + void setInPlaceReset() {m_db.setInPlaceReset();} private: RclConfig *m_config; Rcl::Db m_db; diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp index e86b65f7..d5b1fb6b 100644 --- a/src/index/recollindex.cpp +++ b/src/index/recollindex.cpp @@ -65,6 +65,7 @@ static int op_flags; #define OPT_b 0x2000 #define OPT_f 0x4000 #define OPT_C 0x8000 +#define OPT_Z 0x10000 ReExec *o_reexec; @@ -147,15 +148,17 @@ static void sigcleanup(int sig) stopindexing = 1; } -static bool makeIndexer(RclConfig *config) +static void makeIndexerOrExit(RclConfig *config, bool inPlaceReset) { - if (!confindexer) + if (!confindexer) { confindexer = new ConfIndexer(config, updater); + if (inPlaceReset) + confindexer->setInPlaceReset(); + } if (!confindexer) { cerr << "Cannot create indexer" << endl; exit(1); } - return true; } void rclIxIonice(RclConfig *config) @@ -172,14 +175,13 @@ void rclIxIonice(RclConfig *config) // // This is called either from the command line or from the monitor. In // this case we're called repeatedly in the same process, and the -// confindexer is only created once by makeIndexer (but the db closed and +// confindexer is only created once by makeIndexerOrExit (but the db closed and // flushed every time) bool indexfiles(RclConfig *config, list &filenames) { if (filenames.empty()) return true; - if (!makeIndexer(config)) - return false; + makeIndexerOrExit(config, (op_flags & OPT_Z) != 0); return confindexer->indexFiles(filenames, (op_flags&OPT_f) ? ConfIndexer::IxFIgnoreSkip : ConfIndexer::IxFNone); @@ -190,16 +192,14 @@ bool purgefiles(RclConfig *config, list &filenames) { if (filenames.empty()) return true; - if (!makeIndexer(config)) - return false; + makeIndexerOrExit(config, (op_flags & OPT_Z) != 0); return confindexer->purgeFiles(filenames); } // Create stemming and spelling databases bool createAuxDbs(RclConfig *config) { - if (!makeIndexer(config)) - return false; + makeIndexerOrExit(config, false); if (!confindexer->createStemmingDatabases()) return false; @@ -213,8 +213,7 @@ bool createAuxDbs(RclConfig *config) // Create additional stem database static bool createstemdb(RclConfig *config, const string &lang) { - if (!makeIndexer(config)) - return false; + makeIndexerOrExit(config, false); return confindexer->createStemDb(lang); } @@ -224,9 +223,11 @@ static const char usage [] = "\n" "recollindex [-h] \n" " Print help\n" -"recollindex [-z] \n" +"recollindex [-z|-Z] \n" " Index everything according to configuration file\n" " -z : reset database before starting indexing\n" +" -Z : in place reset: consider all documents as changed. Can also\n" +" be combined with -i but not -m\n" #ifdef RCL_MONITOR "recollindex -m [-w ] -x [-D] [-C]\n" " Perform real time indexing. Don't become a daemon if -D is set.\n" @@ -245,8 +246,10 @@ static const char usage [] = " List available stemming languages\n" "recollindex -s \n" " Build stem database for additional language \n" +#ifdef FUTURE_IMPROVEMENT "recollindex -b\n" " Process the Beagle queue\n" +#endif #ifdef RCL_USE_ASPELL "recollindex -S\n" " Build aspell spelling dictionary.>\n" @@ -274,6 +277,11 @@ void lockorexit(Pidfile *pidfile) ". Return (other pid?): " << pid << endl; exit(1); } + if (pidfile->write_pid() != 0) { + cerr << "Can't become exclusive indexer: " << pidfile->getreason() << + endl; + exit(1); + } } int main(int argc, char **argv) @@ -315,6 +323,7 @@ int main(int argc, char **argv) Usage(); argc--; goto b1; case 'x': op_flags |= OPT_x; break; + case 'Z': op_flags |= OPT_Z; break; case 'z': op_flags |= OPT_z; break; default: Usage(); break; } @@ -332,6 +341,8 @@ int main(int argc, char **argv) if ((op_flags & OPT_z) && (op_flags & (OPT_i|OPT_e))) Usage(); + if ((op_flags & OPT_Z) && (op_flags & (OPT_m))) + Usage(); string reason; RclInitFlags flags = (op_flags & OPT_m) && !(op_flags&OPT_D) ? @@ -344,6 +355,7 @@ int main(int argc, char **argv) o_reexec->atexit(cleanup); bool rezero(op_flags & OPT_z); + bool inPlaceReset(op_flags & OPT_Z); Pidfile pidfile(config->getPidfile()); updater = new MyUpdater(config); @@ -355,7 +367,6 @@ int main(int argc, char **argv) if (op_flags & (OPT_i|OPT_e)) { lockorexit(&pidfile); - pidfile.write_pid(); list filenames; @@ -394,6 +405,11 @@ int main(int argc, char **argv) Usage(); string lang = *argv++; argc--; exit(!createstemdb(config, lang)); +#ifdef RCL_USE_ASPELL + } else if (op_flags & OPT_S) { + makeIndexerOrExit(config, inPlaceReset); + exit(!confindexer->createAspellDict()); +#endif // ASPELL #ifdef RCL_MONITOR } else if (op_flags & OPT_m) { @@ -408,6 +424,7 @@ int main(int argc, char **argv) exit(1); } } + // Need to rewrite pid, it changed pidfile.write_pid(); // Not too sure if I have to redo the nice thing after daemon(), @@ -429,7 +446,7 @@ int main(int argc, char **argv) } } } - confindexer = new ConfIndexer(config, updater); + makeIndexerOrExit(config, inPlaceReset); if (!confindexer->index(rezero, ConfIndexer::IxTAll) || stopindexing) { LOGERR(("recollindex, initial indexing pass failed, not going into monitor mode\n")); exit(1); @@ -452,19 +469,12 @@ int main(int argc, char **argv) exit(monret == false); #endif // MONITOR -#ifdef RCL_USE_ASPELL - } else if (op_flags & OPT_S) { - if (!makeIndexer(config)) - exit(1); - exit(!confindexer->createAspellDict()); -#endif // ASPELL } else if (op_flags & OPT_b) { cerr << "Not yet" << endl; return 1; } else { lockorexit(&pidfile); - pidfile.write_pid(); - confindexer = new ConfIndexer(config, updater); + makeIndexerOrExit(config, inPlaceReset); bool status = confindexer->index(rezero, ConfIndexer::IxTAll); if (!status) cerr << "Indexing failed" << endl; diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 69121b34..1f43dddd 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -558,6 +558,8 @@ vector Db::Native::makeAbstract(Xapian::docid docid, Query *query) /* Rcl::Db methods ///////////////////////////////// */ +bool Db::o_inPlaceReset; + Db::Db(RclConfig *cfp) : m_ndb(0), m_config(cfp), m_idxAbsTruncLen(250), m_synthAbsLen(250), m_synthAbsWordCtxLen(4), m_flushMb(-1), @@ -1404,6 +1406,12 @@ bool Db::needUpdate(const string &udi, const string& sig) if (m_ndb == 0) return false; + // If we are doing an in place reset, no need to test. Note that there is + // no need to update the existence map either, it will be done while + // indexing + if (o_inPlaceReset) + return true; + string uniterm = make_uniterm(udi); string ermsg; diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index 26d1adc3..92e65dc8 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -244,6 +244,14 @@ class Db { RclConfig *getConf() {return m_config;} + /** + Activate the "in place reset" mode where all documents are + considered as needing update. This is a global/per-process + option, and can't be reset. It should be set at the start of + the indexing pass + */ + static void setInPlaceReset() {o_inPlaceReset = true;} + /* This has to be public for access by embedded Query::Native */ Native *m_ndb; @@ -277,19 +285,22 @@ private: int m_occFirstCheck; // Maximum file system occupation percentage int m_maxFsOccupPc; - // Database directory string m_basedir; - // List of directories for additional databases to query list m_extraDbs; - OpenMode m_mode; - + // File existence vector: this is filled during the indexing pass. Any + // document whose bit is not set at the end is purged vector updated; - + // Stop terms: those don't get indexed. StopList m_stops; - + // When this is set, all documents are considered as needing a reindex. + // This implements an alternative to just erasing the index before + // beginning, with the advantage that, for small index formats updates, + // between releases the index remains available while being recreated. + static bool o_inPlaceReset; + // Reinitialize when adding/removing additional dbs bool adjustdbs(); bool stemExpand(const string &lang, const string &s, @@ -298,7 +309,7 @@ private: // Flush when idxflushmb is reached bool maybeflush(off_t moretext); - /* Copyconst and assignemt private and forbidden */ + /* Copyconst and assignement private and forbidden */ Db(const Db &) {} Db& operator=(const Db &) {return *this;}; };