add -Z "in place reset" option to recollindex
This commit is contained in:
parent
6a60ac73bf
commit
c7c9c49437
@ -119,6 +119,8 @@ class ConfIndexer {
|
||||
/** Purge a list of files. */
|
||||
bool purgeFiles(list<string> &files);
|
||||
|
||||
/** Set in place reset mode */
|
||||
void setInPlaceReset() {m_db.setInPlaceReset();}
|
||||
private:
|
||||
RclConfig *m_config;
|
||||
Rcl::Db m_db;
|
||||
|
||||
@ -65,6 +65,7 @@ static int op_flags;
|
||||
#define OPT_b 0x2000
|
||||
#define OPT_f 0x4000
|
||||
#define OPT_C 0x8000
|
||||
#define OPT_Z 0x10000
|
||||
|
||||
ReExec *o_reexec;
|
||||
|
||||
@ -147,15 +148,17 @@ static void sigcleanup(int sig)
|
||||
stopindexing = 1;
|
||||
}
|
||||
|
||||
static bool makeIndexer(RclConfig *config)
|
||||
static void makeIndexerOrExit(RclConfig *config, bool inPlaceReset)
|
||||
{
|
||||
if (!confindexer)
|
||||
if (!confindexer) {
|
||||
confindexer = new ConfIndexer(config, updater);
|
||||
if (inPlaceReset)
|
||||
confindexer->setInPlaceReset();
|
||||
}
|
||||
if (!confindexer) {
|
||||
cerr << "Cannot create indexer" << endl;
|
||||
exit(1);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void rclIxIonice(RclConfig *config)
|
||||
@ -172,14 +175,13 @@ void rclIxIonice(RclConfig *config)
|
||||
//
|
||||
// This is called either from the command line or from the monitor. In
|
||||
// this case we're called repeatedly in the same process, and the
|
||||
// confindexer is only created once by makeIndexer (but the db closed and
|
||||
// confindexer is only created once by makeIndexerOrExit (but the db closed and
|
||||
// flushed every time)
|
||||
bool indexfiles(RclConfig *config, list<string> &filenames)
|
||||
{
|
||||
if (filenames.empty())
|
||||
return true;
|
||||
if (!makeIndexer(config))
|
||||
return false;
|
||||
makeIndexerOrExit(config, (op_flags & OPT_Z) != 0);
|
||||
return confindexer->indexFiles(filenames, (op_flags&OPT_f) ?
|
||||
ConfIndexer::IxFIgnoreSkip :
|
||||
ConfIndexer::IxFNone);
|
||||
@ -190,16 +192,14 @@ bool purgefiles(RclConfig *config, list<string> &filenames)
|
||||
{
|
||||
if (filenames.empty())
|
||||
return true;
|
||||
if (!makeIndexer(config))
|
||||
return false;
|
||||
makeIndexerOrExit(config, (op_flags & OPT_Z) != 0);
|
||||
return confindexer->purgeFiles(filenames);
|
||||
}
|
||||
|
||||
// Create stemming and spelling databases
|
||||
bool createAuxDbs(RclConfig *config)
|
||||
{
|
||||
if (!makeIndexer(config))
|
||||
return false;
|
||||
makeIndexerOrExit(config, false);
|
||||
|
||||
if (!confindexer->createStemmingDatabases())
|
||||
return false;
|
||||
@ -213,8 +213,7 @@ bool createAuxDbs(RclConfig *config)
|
||||
// Create additional stem database
|
||||
static bool createstemdb(RclConfig *config, const string &lang)
|
||||
{
|
||||
if (!makeIndexer(config))
|
||||
return false;
|
||||
makeIndexerOrExit(config, false);
|
||||
return confindexer->createStemDb(lang);
|
||||
}
|
||||
|
||||
@ -224,9 +223,11 @@ static const char usage [] =
|
||||
"\n"
|
||||
"recollindex [-h] \n"
|
||||
" Print help\n"
|
||||
"recollindex [-z] \n"
|
||||
"recollindex [-z|-Z] \n"
|
||||
" Index everything according to configuration file\n"
|
||||
" -z : reset database before starting indexing\n"
|
||||
" -Z : in place reset: consider all documents as changed. Can also\n"
|
||||
" be combined with -i but not -m\n"
|
||||
#ifdef RCL_MONITOR
|
||||
"recollindex -m [-w <secs>] -x [-D] [-C]\n"
|
||||
" Perform real time indexing. Don't become a daemon if -D is set.\n"
|
||||
@ -245,8 +246,10 @@ static const char usage [] =
|
||||
" List available stemming languages\n"
|
||||
"recollindex -s <lang>\n"
|
||||
" Build stem database for additional language <lang>\n"
|
||||
#ifdef FUTURE_IMPROVEMENT
|
||||
"recollindex -b\n"
|
||||
" Process the Beagle queue\n"
|
||||
#endif
|
||||
#ifdef RCL_USE_ASPELL
|
||||
"recollindex -S\n"
|
||||
" Build aspell spelling dictionary.>\n"
|
||||
@ -274,6 +277,11 @@ void lockorexit(Pidfile *pidfile)
|
||||
". Return (other pid?): " << pid << endl;
|
||||
exit(1);
|
||||
}
|
||||
if (pidfile->write_pid() != 0) {
|
||||
cerr << "Can't become exclusive indexer: " << pidfile->getreason() <<
|
||||
endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
@ -315,6 +323,7 @@ int main(int argc, char **argv)
|
||||
Usage();
|
||||
argc--; goto b1;
|
||||
case 'x': op_flags |= OPT_x; break;
|
||||
case 'Z': op_flags |= OPT_Z; break;
|
||||
case 'z': op_flags |= OPT_z; break;
|
||||
default: Usage(); break;
|
||||
}
|
||||
@ -332,6 +341,8 @@ int main(int argc, char **argv)
|
||||
|
||||
if ((op_flags & OPT_z) && (op_flags & (OPT_i|OPT_e)))
|
||||
Usage();
|
||||
if ((op_flags & OPT_Z) && (op_flags & (OPT_m)))
|
||||
Usage();
|
||||
|
||||
string reason;
|
||||
RclInitFlags flags = (op_flags & OPT_m) && !(op_flags&OPT_D) ?
|
||||
@ -344,6 +355,7 @@ int main(int argc, char **argv)
|
||||
o_reexec->atexit(cleanup);
|
||||
|
||||
bool rezero(op_flags & OPT_z);
|
||||
bool inPlaceReset(op_flags & OPT_Z);
|
||||
Pidfile pidfile(config->getPidfile());
|
||||
updater = new MyUpdater(config);
|
||||
|
||||
@ -355,7 +367,6 @@ int main(int argc, char **argv)
|
||||
|
||||
if (op_flags & (OPT_i|OPT_e)) {
|
||||
lockorexit(&pidfile);
|
||||
pidfile.write_pid();
|
||||
|
||||
list<string> filenames;
|
||||
|
||||
@ -394,6 +405,11 @@ int main(int argc, char **argv)
|
||||
Usage();
|
||||
string lang = *argv++; argc--;
|
||||
exit(!createstemdb(config, lang));
|
||||
#ifdef RCL_USE_ASPELL
|
||||
} else if (op_flags & OPT_S) {
|
||||
makeIndexerOrExit(config, inPlaceReset);
|
||||
exit(!confindexer->createAspellDict());
|
||||
#endif // ASPELL
|
||||
|
||||
#ifdef RCL_MONITOR
|
||||
} else if (op_flags & OPT_m) {
|
||||
@ -408,6 +424,7 @@ int main(int argc, char **argv)
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
// Need to rewrite pid, it changed
|
||||
pidfile.write_pid();
|
||||
|
||||
// Not too sure if I have to redo the nice thing after daemon(),
|
||||
@ -429,7 +446,7 @@ int main(int argc, char **argv)
|
||||
}
|
||||
}
|
||||
}
|
||||
confindexer = new ConfIndexer(config, updater);
|
||||
makeIndexerOrExit(config, inPlaceReset);
|
||||
if (!confindexer->index(rezero, ConfIndexer::IxTAll) || stopindexing) {
|
||||
LOGERR(("recollindex, initial indexing pass failed, not going into monitor mode\n"));
|
||||
exit(1);
|
||||
@ -452,19 +469,12 @@ int main(int argc, char **argv)
|
||||
exit(monret == false);
|
||||
#endif // MONITOR
|
||||
|
||||
#ifdef RCL_USE_ASPELL
|
||||
} else if (op_flags & OPT_S) {
|
||||
if (!makeIndexer(config))
|
||||
exit(1);
|
||||
exit(!confindexer->createAspellDict());
|
||||
#endif // ASPELL
|
||||
} else if (op_flags & OPT_b) {
|
||||
cerr << "Not yet" << endl;
|
||||
return 1;
|
||||
} else {
|
||||
lockorexit(&pidfile);
|
||||
pidfile.write_pid();
|
||||
confindexer = new ConfIndexer(config, updater);
|
||||
makeIndexerOrExit(config, inPlaceReset);
|
||||
bool status = confindexer->index(rezero, ConfIndexer::IxTAll);
|
||||
if (!status)
|
||||
cerr << "Indexing failed" << endl;
|
||||
|
||||
@ -558,6 +558,8 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
||||
|
||||
/* Rcl::Db methods ///////////////////////////////// */
|
||||
|
||||
bool Db::o_inPlaceReset;
|
||||
|
||||
Db::Db(RclConfig *cfp)
|
||||
: m_ndb(0), m_config(cfp), m_idxAbsTruncLen(250), m_synthAbsLen(250),
|
||||
m_synthAbsWordCtxLen(4), m_flushMb(-1),
|
||||
@ -1404,6 +1406,12 @@ bool Db::needUpdate(const string &udi, const string& sig)
|
||||
if (m_ndb == 0)
|
||||
return false;
|
||||
|
||||
// If we are doing an in place reset, no need to test. Note that there is
|
||||
// no need to update the existence map either, it will be done while
|
||||
// indexing
|
||||
if (o_inPlaceReset)
|
||||
return true;
|
||||
|
||||
string uniterm = make_uniterm(udi);
|
||||
string ermsg;
|
||||
|
||||
|
||||
@ -244,6 +244,14 @@ class Db {
|
||||
|
||||
RclConfig *getConf() {return m_config;}
|
||||
|
||||
/**
|
||||
Activate the "in place reset" mode where all documents are
|
||||
considered as needing update. This is a global/per-process
|
||||
option, and can't be reset. It should be set at the start of
|
||||
the indexing pass
|
||||
*/
|
||||
static void setInPlaceReset() {o_inPlaceReset = true;}
|
||||
|
||||
/* This has to be public for access by embedded Query::Native */
|
||||
Native *m_ndb;
|
||||
|
||||
@ -277,19 +285,22 @@ private:
|
||||
int m_occFirstCheck;
|
||||
// Maximum file system occupation percentage
|
||||
int m_maxFsOccupPc;
|
||||
|
||||
// Database directory
|
||||
string m_basedir;
|
||||
|
||||
// List of directories for additional databases to query
|
||||
list<string> m_extraDbs;
|
||||
|
||||
OpenMode m_mode;
|
||||
|
||||
// File existence vector: this is filled during the indexing pass. Any
|
||||
// document whose bit is not set at the end is purged
|
||||
vector<bool> updated;
|
||||
|
||||
// Stop terms: those don't get indexed.
|
||||
StopList m_stops;
|
||||
|
||||
// When this is set, all documents are considered as needing a reindex.
|
||||
// This implements an alternative to just erasing the index before
|
||||
// beginning, with the advantage that, for small index formats updates,
|
||||
// between releases the index remains available while being recreated.
|
||||
static bool o_inPlaceReset;
|
||||
|
||||
// Reinitialize when adding/removing additional dbs
|
||||
bool adjustdbs();
|
||||
bool stemExpand(const string &lang, const string &s,
|
||||
@ -298,7 +309,7 @@ private:
|
||||
// Flush when idxflushmb is reached
|
||||
bool maybeflush(off_t moretext);
|
||||
|
||||
/* Copyconst and assignemt private and forbidden */
|
||||
/* Copyconst and assignement private and forbidden */
|
||||
Db(const Db &) {}
|
||||
Db& operator=(const Db &) {return *this;};
|
||||
};
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user