add -Z "in place reset" option to recollindex

This commit is contained in:
Jean-Francois Dockes 2012-04-11 11:33:33 +02:00
parent 6a60ac73bf
commit c7c9c49437
4 changed files with 61 additions and 30 deletions

View File

@ -119,6 +119,8 @@ class ConfIndexer {
/** Purge a list of files. */
bool purgeFiles(list<string> &files);
/** Set in place reset mode */
void setInPlaceReset() {m_db.setInPlaceReset();}
private:
RclConfig *m_config;
Rcl::Db m_db;

View File

@ -65,6 +65,7 @@ static int op_flags;
#define OPT_b 0x2000
#define OPT_f 0x4000
#define OPT_C 0x8000
#define OPT_Z 0x10000
ReExec *o_reexec;
@ -147,15 +148,17 @@ static void sigcleanup(int sig)
stopindexing = 1;
}
static bool makeIndexer(RclConfig *config)
static void makeIndexerOrExit(RclConfig *config, bool inPlaceReset)
{
if (!confindexer)
if (!confindexer) {
confindexer = new ConfIndexer(config, updater);
if (inPlaceReset)
confindexer->setInPlaceReset();
}
if (!confindexer) {
cerr << "Cannot create indexer" << endl;
exit(1);
}
return true;
}
void rclIxIonice(RclConfig *config)
@ -172,14 +175,13 @@ void rclIxIonice(RclConfig *config)
//
// This is called either from the command line or from the monitor. In
// this case we're called repeatedly in the same process, and the
// confindexer is only created once by makeIndexer (but the db closed and
// confindexer is only created once by makeIndexerOrExit (but the db closed and
// flushed every time)
bool indexfiles(RclConfig *config, list<string> &filenames)
{
if (filenames.empty())
return true;
if (!makeIndexer(config))
return false;
makeIndexerOrExit(config, (op_flags & OPT_Z) != 0);
return confindexer->indexFiles(filenames, (op_flags&OPT_f) ?
ConfIndexer::IxFIgnoreSkip :
ConfIndexer::IxFNone);
@ -190,16 +192,14 @@ bool purgefiles(RclConfig *config, list<string> &filenames)
{
if (filenames.empty())
return true;
if (!makeIndexer(config))
return false;
makeIndexerOrExit(config, (op_flags & OPT_Z) != 0);
return confindexer->purgeFiles(filenames);
}
// Create stemming and spelling databases
bool createAuxDbs(RclConfig *config)
{
if (!makeIndexer(config))
return false;
makeIndexerOrExit(config, false);
if (!confindexer->createStemmingDatabases())
return false;
@ -213,8 +213,7 @@ bool createAuxDbs(RclConfig *config)
// Create additional stem database
static bool createstemdb(RclConfig *config, const string &lang)
{
if (!makeIndexer(config))
return false;
makeIndexerOrExit(config, false);
return confindexer->createStemDb(lang);
}
@ -224,9 +223,11 @@ static const char usage [] =
"\n"
"recollindex [-h] \n"
" Print help\n"
"recollindex [-z] \n"
"recollindex [-z|-Z] \n"
" Index everything according to configuration file\n"
" -z : reset database before starting indexing\n"
" -Z : in place reset: consider all documents as changed. Can also\n"
" be combined with -i but not -m\n"
#ifdef RCL_MONITOR
"recollindex -m [-w <secs>] -x [-D] [-C]\n"
" Perform real time indexing. Don't become a daemon if -D is set.\n"
@ -245,8 +246,10 @@ static const char usage [] =
" List available stemming languages\n"
"recollindex -s <lang>\n"
" Build stem database for additional language <lang>\n"
#ifdef FUTURE_IMPROVEMENT
"recollindex -b\n"
" Process the Beagle queue\n"
#endif
#ifdef RCL_USE_ASPELL
"recollindex -S\n"
" Build aspell spelling dictionary.>\n"
@ -274,6 +277,11 @@ void lockorexit(Pidfile *pidfile)
". Return (other pid?): " << pid << endl;
exit(1);
}
if (pidfile->write_pid() != 0) {
cerr << "Can't become exclusive indexer: " << pidfile->getreason() <<
endl;
exit(1);
}
}
int main(int argc, char **argv)
@ -315,6 +323,7 @@ int main(int argc, char **argv)
Usage();
argc--; goto b1;
case 'x': op_flags |= OPT_x; break;
case 'Z': op_flags |= OPT_Z; break;
case 'z': op_flags |= OPT_z; break;
default: Usage(); break;
}
@ -332,6 +341,8 @@ int main(int argc, char **argv)
if ((op_flags & OPT_z) && (op_flags & (OPT_i|OPT_e)))
Usage();
if ((op_flags & OPT_Z) && (op_flags & (OPT_m)))
Usage();
string reason;
RclInitFlags flags = (op_flags & OPT_m) && !(op_flags&OPT_D) ?
@ -344,6 +355,7 @@ int main(int argc, char **argv)
o_reexec->atexit(cleanup);
bool rezero(op_flags & OPT_z);
bool inPlaceReset(op_flags & OPT_Z);
Pidfile pidfile(config->getPidfile());
updater = new MyUpdater(config);
@ -355,7 +367,6 @@ int main(int argc, char **argv)
if (op_flags & (OPT_i|OPT_e)) {
lockorexit(&pidfile);
pidfile.write_pid();
list<string> filenames;
@ -394,6 +405,11 @@ int main(int argc, char **argv)
Usage();
string lang = *argv++; argc--;
exit(!createstemdb(config, lang));
#ifdef RCL_USE_ASPELL
} else if (op_flags & OPT_S) {
makeIndexerOrExit(config, inPlaceReset);
exit(!confindexer->createAspellDict());
#endif // ASPELL
#ifdef RCL_MONITOR
} else if (op_flags & OPT_m) {
@ -408,6 +424,7 @@ int main(int argc, char **argv)
exit(1);
}
}
// Need to rewrite pid, it changed
pidfile.write_pid();
// Not too sure if I have to redo the nice thing after daemon(),
@ -429,7 +446,7 @@ int main(int argc, char **argv)
}
}
}
confindexer = new ConfIndexer(config, updater);
makeIndexerOrExit(config, inPlaceReset);
if (!confindexer->index(rezero, ConfIndexer::IxTAll) || stopindexing) {
LOGERR(("recollindex, initial indexing pass failed, not going into monitor mode\n"));
exit(1);
@ -452,19 +469,12 @@ int main(int argc, char **argv)
exit(monret == false);
#endif // MONITOR
#ifdef RCL_USE_ASPELL
} else if (op_flags & OPT_S) {
if (!makeIndexer(config))
exit(1);
exit(!confindexer->createAspellDict());
#endif // ASPELL
} else if (op_flags & OPT_b) {
cerr << "Not yet" << endl;
return 1;
} else {
lockorexit(&pidfile);
pidfile.write_pid();
confindexer = new ConfIndexer(config, updater);
makeIndexerOrExit(config, inPlaceReset);
bool status = confindexer->index(rezero, ConfIndexer::IxTAll);
if (!status)
cerr << "Indexing failed" << endl;

View File

@ -558,6 +558,8 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
/* Rcl::Db methods ///////////////////////////////// */
bool Db::o_inPlaceReset;
Db::Db(RclConfig *cfp)
: m_ndb(0), m_config(cfp), m_idxAbsTruncLen(250), m_synthAbsLen(250),
m_synthAbsWordCtxLen(4), m_flushMb(-1),
@ -1404,6 +1406,12 @@ bool Db::needUpdate(const string &udi, const string& sig)
if (m_ndb == 0)
return false;
// If we are doing an in place reset, no need to test. Note that there is
// no need to update the existence map either, it will be done while
// indexing
if (o_inPlaceReset)
return true;
string uniterm = make_uniterm(udi);
string ermsg;

View File

@ -244,6 +244,14 @@ class Db {
RclConfig *getConf() {return m_config;}
/**
Activate the "in place reset" mode where all documents are
considered as needing update. This is a global/per-process
option, and can't be reset. It should be set at the start of
the indexing pass
*/
static void setInPlaceReset() {o_inPlaceReset = true;}
/* This has to be public for access by embedded Query::Native */
Native *m_ndb;
@ -277,19 +285,22 @@ private:
int m_occFirstCheck;
// Maximum file system occupation percentage
int m_maxFsOccupPc;
// Database directory
string m_basedir;
// List of directories for additional databases to query
list<string> m_extraDbs;
OpenMode m_mode;
// File existence vector: this is filled during the indexing pass. Any
// document whose bit is not set at the end is purged
vector<bool> updated;
// Stop terms: those don't get indexed.
StopList m_stops;
// When this is set, all documents are considered as needing a reindex.
// This implements an alternative to just erasing the index before
// beginning, with the advantage that, for small index formats updates,
// between releases the index remains available while being recreated.
static bool o_inPlaceReset;
// Reinitialize when adding/removing additional dbs
bool adjustdbs();
bool stemExpand(const string &lang, const string &s,
@ -298,7 +309,7 @@ private:
// Flush when idxflushmb is reached
bool maybeflush(off_t moretext);
/* Copyconst and assignemt private and forbidden */
/* Copyconst and assignement private and forbidden */
Db(const Db &) {}
Db& operator=(const Db &) {return *this;};
};