add -Z "in place reset" option to recollindex

This commit is contained in:
Jean-Francois Dockes 2012-04-11 11:33:33 +02:00
parent 6a60ac73bf
commit c7c9c49437
4 changed files with 61 additions and 30 deletions

View File

@ -119,6 +119,8 @@ class ConfIndexer {
/** Purge a list of files. */ /** Purge a list of files. */
bool purgeFiles(list<string> &files); bool purgeFiles(list<string> &files);
/** Set in place reset mode */
void setInPlaceReset() {m_db.setInPlaceReset();}
private: private:
RclConfig *m_config; RclConfig *m_config;
Rcl::Db m_db; Rcl::Db m_db;

View File

@ -65,6 +65,7 @@ static int op_flags;
#define OPT_b 0x2000 #define OPT_b 0x2000
#define OPT_f 0x4000 #define OPT_f 0x4000
#define OPT_C 0x8000 #define OPT_C 0x8000
#define OPT_Z 0x10000
ReExec *o_reexec; ReExec *o_reexec;
@ -147,15 +148,17 @@ static void sigcleanup(int sig)
stopindexing = 1; stopindexing = 1;
} }
static bool makeIndexer(RclConfig *config) static void makeIndexerOrExit(RclConfig *config, bool inPlaceReset)
{ {
if (!confindexer) if (!confindexer) {
confindexer = new ConfIndexer(config, updater); confindexer = new ConfIndexer(config, updater);
if (inPlaceReset)
confindexer->setInPlaceReset();
}
if (!confindexer) { if (!confindexer) {
cerr << "Cannot create indexer" << endl; cerr << "Cannot create indexer" << endl;
exit(1); exit(1);
} }
return true;
} }
void rclIxIonice(RclConfig *config) void rclIxIonice(RclConfig *config)
@ -172,14 +175,13 @@ void rclIxIonice(RclConfig *config)
// //
// This is called either from the command line or from the monitor. In // This is called either from the command line or from the monitor. In
// this case we're called repeatedly in the same process, and the // this case we're called repeatedly in the same process, and the
// confindexer is only created once by makeIndexer (but the db closed and // confindexer is only created once by makeIndexerOrExit (but the db closed and
// flushed every time) // flushed every time)
bool indexfiles(RclConfig *config, list<string> &filenames) bool indexfiles(RclConfig *config, list<string> &filenames)
{ {
if (filenames.empty()) if (filenames.empty())
return true; return true;
if (!makeIndexer(config)) makeIndexerOrExit(config, (op_flags & OPT_Z) != 0);
return false;
return confindexer->indexFiles(filenames, (op_flags&OPT_f) ? return confindexer->indexFiles(filenames, (op_flags&OPT_f) ?
ConfIndexer::IxFIgnoreSkip : ConfIndexer::IxFIgnoreSkip :
ConfIndexer::IxFNone); ConfIndexer::IxFNone);
@ -190,16 +192,14 @@ bool purgefiles(RclConfig *config, list<string> &filenames)
{ {
if (filenames.empty()) if (filenames.empty())
return true; return true;
if (!makeIndexer(config)) makeIndexerOrExit(config, (op_flags & OPT_Z) != 0);
return false;
return confindexer->purgeFiles(filenames); return confindexer->purgeFiles(filenames);
} }
// Create stemming and spelling databases // Create stemming and spelling databases
bool createAuxDbs(RclConfig *config) bool createAuxDbs(RclConfig *config)
{ {
if (!makeIndexer(config)) makeIndexerOrExit(config, false);
return false;
if (!confindexer->createStemmingDatabases()) if (!confindexer->createStemmingDatabases())
return false; return false;
@ -213,8 +213,7 @@ bool createAuxDbs(RclConfig *config)
// Create additional stem database // Create additional stem database
static bool createstemdb(RclConfig *config, const string &lang) static bool createstemdb(RclConfig *config, const string &lang)
{ {
if (!makeIndexer(config)) makeIndexerOrExit(config, false);
return false;
return confindexer->createStemDb(lang); return confindexer->createStemDb(lang);
} }
@ -224,9 +223,11 @@ static const char usage [] =
"\n" "\n"
"recollindex [-h] \n" "recollindex [-h] \n"
" Print help\n" " Print help\n"
"recollindex [-z] \n" "recollindex [-z|-Z] \n"
" Index everything according to configuration file\n" " Index everything according to configuration file\n"
" -z : reset database before starting indexing\n" " -z : reset database before starting indexing\n"
" -Z : in place reset: consider all documents as changed. Can also\n"
" be combined with -i but not -m\n"
#ifdef RCL_MONITOR #ifdef RCL_MONITOR
"recollindex -m [-w <secs>] -x [-D] [-C]\n" "recollindex -m [-w <secs>] -x [-D] [-C]\n"
" Perform real time indexing. Don't become a daemon if -D is set.\n" " Perform real time indexing. Don't become a daemon if -D is set.\n"
@ -245,8 +246,10 @@ static const char usage [] =
" List available stemming languages\n" " List available stemming languages\n"
"recollindex -s <lang>\n" "recollindex -s <lang>\n"
" Build stem database for additional language <lang>\n" " Build stem database for additional language <lang>\n"
#ifdef FUTURE_IMPROVEMENT
"recollindex -b\n" "recollindex -b\n"
" Process the Beagle queue\n" " Process the Beagle queue\n"
#endif
#ifdef RCL_USE_ASPELL #ifdef RCL_USE_ASPELL
"recollindex -S\n" "recollindex -S\n"
" Build aspell spelling dictionary.>\n" " Build aspell spelling dictionary.>\n"
@ -274,6 +277,11 @@ void lockorexit(Pidfile *pidfile)
". Return (other pid?): " << pid << endl; ". Return (other pid?): " << pid << endl;
exit(1); exit(1);
} }
if (pidfile->write_pid() != 0) {
cerr << "Can't become exclusive indexer: " << pidfile->getreason() <<
endl;
exit(1);
}
} }
int main(int argc, char **argv) int main(int argc, char **argv)
@ -315,6 +323,7 @@ int main(int argc, char **argv)
Usage(); Usage();
argc--; goto b1; argc--; goto b1;
case 'x': op_flags |= OPT_x; break; case 'x': op_flags |= OPT_x; break;
case 'Z': op_flags |= OPT_Z; break;
case 'z': op_flags |= OPT_z; break; case 'z': op_flags |= OPT_z; break;
default: Usage(); break; default: Usage(); break;
} }
@ -332,6 +341,8 @@ int main(int argc, char **argv)
if ((op_flags & OPT_z) && (op_flags & (OPT_i|OPT_e))) if ((op_flags & OPT_z) && (op_flags & (OPT_i|OPT_e)))
Usage(); Usage();
if ((op_flags & OPT_Z) && (op_flags & (OPT_m)))
Usage();
string reason; string reason;
RclInitFlags flags = (op_flags & OPT_m) && !(op_flags&OPT_D) ? RclInitFlags flags = (op_flags & OPT_m) && !(op_flags&OPT_D) ?
@ -344,6 +355,7 @@ int main(int argc, char **argv)
o_reexec->atexit(cleanup); o_reexec->atexit(cleanup);
bool rezero(op_flags & OPT_z); bool rezero(op_flags & OPT_z);
bool inPlaceReset(op_flags & OPT_Z);
Pidfile pidfile(config->getPidfile()); Pidfile pidfile(config->getPidfile());
updater = new MyUpdater(config); updater = new MyUpdater(config);
@ -355,7 +367,6 @@ int main(int argc, char **argv)
if (op_flags & (OPT_i|OPT_e)) { if (op_flags & (OPT_i|OPT_e)) {
lockorexit(&pidfile); lockorexit(&pidfile);
pidfile.write_pid();
list<string> filenames; list<string> filenames;
@ -394,6 +405,11 @@ int main(int argc, char **argv)
Usage(); Usage();
string lang = *argv++; argc--; string lang = *argv++; argc--;
exit(!createstemdb(config, lang)); exit(!createstemdb(config, lang));
#ifdef RCL_USE_ASPELL
} else if (op_flags & OPT_S) {
makeIndexerOrExit(config, inPlaceReset);
exit(!confindexer->createAspellDict());
#endif // ASPELL
#ifdef RCL_MONITOR #ifdef RCL_MONITOR
} else if (op_flags & OPT_m) { } else if (op_flags & OPT_m) {
@ -408,6 +424,7 @@ int main(int argc, char **argv)
exit(1); exit(1);
} }
} }
// Need to rewrite pid, it changed
pidfile.write_pid(); pidfile.write_pid();
// Not too sure if I have to redo the nice thing after daemon(), // Not too sure if I have to redo the nice thing after daemon(),
@ -429,7 +446,7 @@ int main(int argc, char **argv)
} }
} }
} }
confindexer = new ConfIndexer(config, updater); makeIndexerOrExit(config, inPlaceReset);
if (!confindexer->index(rezero, ConfIndexer::IxTAll) || stopindexing) { if (!confindexer->index(rezero, ConfIndexer::IxTAll) || stopindexing) {
LOGERR(("recollindex, initial indexing pass failed, not going into monitor mode\n")); LOGERR(("recollindex, initial indexing pass failed, not going into monitor mode\n"));
exit(1); exit(1);
@ -452,19 +469,12 @@ int main(int argc, char **argv)
exit(monret == false); exit(monret == false);
#endif // MONITOR #endif // MONITOR
#ifdef RCL_USE_ASPELL
} else if (op_flags & OPT_S) {
if (!makeIndexer(config))
exit(1);
exit(!confindexer->createAspellDict());
#endif // ASPELL
} else if (op_flags & OPT_b) { } else if (op_flags & OPT_b) {
cerr << "Not yet" << endl; cerr << "Not yet" << endl;
return 1; return 1;
} else { } else {
lockorexit(&pidfile); lockorexit(&pidfile);
pidfile.write_pid(); makeIndexerOrExit(config, inPlaceReset);
confindexer = new ConfIndexer(config, updater);
bool status = confindexer->index(rezero, ConfIndexer::IxTAll); bool status = confindexer->index(rezero, ConfIndexer::IxTAll);
if (!status) if (!status)
cerr << "Indexing failed" << endl; cerr << "Indexing failed" << endl;

View File

@ -558,6 +558,8 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
/* Rcl::Db methods ///////////////////////////////// */ /* Rcl::Db methods ///////////////////////////////// */
bool Db::o_inPlaceReset;
Db::Db(RclConfig *cfp) Db::Db(RclConfig *cfp)
: m_ndb(0), m_config(cfp), m_idxAbsTruncLen(250), m_synthAbsLen(250), : m_ndb(0), m_config(cfp), m_idxAbsTruncLen(250), m_synthAbsLen(250),
m_synthAbsWordCtxLen(4), m_flushMb(-1), m_synthAbsWordCtxLen(4), m_flushMb(-1),
@ -1404,6 +1406,12 @@ bool Db::needUpdate(const string &udi, const string& sig)
if (m_ndb == 0) if (m_ndb == 0)
return false; return false;
// If we are doing an in place reset, no need to test. Note that there is
// no need to update the existence map either, it will be done while
// indexing
if (o_inPlaceReset)
return true;
string uniterm = make_uniterm(udi); string uniterm = make_uniterm(udi);
string ermsg; string ermsg;

View File

@ -244,6 +244,14 @@ class Db {
RclConfig *getConf() {return m_config;} RclConfig *getConf() {return m_config;}
/**
Activate the "in place reset" mode where all documents are
considered as needing update. This is a global/per-process
option, and can't be reset. It should be set at the start of
the indexing pass
*/
static void setInPlaceReset() {o_inPlaceReset = true;}
/* This has to be public for access by embedded Query::Native */ /* This has to be public for access by embedded Query::Native */
Native *m_ndb; Native *m_ndb;
@ -277,19 +285,22 @@ private:
int m_occFirstCheck; int m_occFirstCheck;
// Maximum file system occupation percentage // Maximum file system occupation percentage
int m_maxFsOccupPc; int m_maxFsOccupPc;
// Database directory // Database directory
string m_basedir; string m_basedir;
// List of directories for additional databases to query // List of directories for additional databases to query
list<string> m_extraDbs; list<string> m_extraDbs;
OpenMode m_mode; OpenMode m_mode;
// File existence vector: this is filled during the indexing pass. Any
// document whose bit is not set at the end is purged
vector<bool> updated; vector<bool> updated;
// Stop terms: those don't get indexed.
StopList m_stops; StopList m_stops;
// When this is set, all documents are considered as needing a reindex.
// This implements an alternative to just erasing the index before
// beginning, with the advantage that, for small index formats updates,
// between releases the index remains available while being recreated.
static bool o_inPlaceReset;
// Reinitialize when adding/removing additional dbs // Reinitialize when adding/removing additional dbs
bool adjustdbs(); bool adjustdbs();
bool stemExpand(const string &lang, const string &s, bool stemExpand(const string &lang, const string &s,
@ -298,7 +309,7 @@ private:
// Flush when idxflushmb is reached // Flush when idxflushmb is reached
bool maybeflush(off_t moretext); bool maybeflush(off_t moretext);
/* Copyconst and assignemt private and forbidden */ /* Copyconst and assignement private and forbidden */
Db(const Db &) {} Db(const Db &) {}
Db& operator=(const Db &) {return *this;}; Db& operator=(const Db &) {return *this;};
}; };