add -Z "in place reset" option to recollindex
This commit is contained in:
parent
6a60ac73bf
commit
c7c9c49437
@ -119,6 +119,8 @@ class ConfIndexer {
|
|||||||
/** Purge a list of files. */
|
/** Purge a list of files. */
|
||||||
bool purgeFiles(list<string> &files);
|
bool purgeFiles(list<string> &files);
|
||||||
|
|
||||||
|
/** Set in place reset mode */
|
||||||
|
void setInPlaceReset() {m_db.setInPlaceReset();}
|
||||||
private:
|
private:
|
||||||
RclConfig *m_config;
|
RclConfig *m_config;
|
||||||
Rcl::Db m_db;
|
Rcl::Db m_db;
|
||||||
|
|||||||
@ -65,6 +65,7 @@ static int op_flags;
|
|||||||
#define OPT_b 0x2000
|
#define OPT_b 0x2000
|
||||||
#define OPT_f 0x4000
|
#define OPT_f 0x4000
|
||||||
#define OPT_C 0x8000
|
#define OPT_C 0x8000
|
||||||
|
#define OPT_Z 0x10000
|
||||||
|
|
||||||
ReExec *o_reexec;
|
ReExec *o_reexec;
|
||||||
|
|
||||||
@ -147,15 +148,17 @@ static void sigcleanup(int sig)
|
|||||||
stopindexing = 1;
|
stopindexing = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool makeIndexer(RclConfig *config)
|
static void makeIndexerOrExit(RclConfig *config, bool inPlaceReset)
|
||||||
{
|
{
|
||||||
if (!confindexer)
|
if (!confindexer) {
|
||||||
confindexer = new ConfIndexer(config, updater);
|
confindexer = new ConfIndexer(config, updater);
|
||||||
|
if (inPlaceReset)
|
||||||
|
confindexer->setInPlaceReset();
|
||||||
|
}
|
||||||
if (!confindexer) {
|
if (!confindexer) {
|
||||||
cerr << "Cannot create indexer" << endl;
|
cerr << "Cannot create indexer" << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void rclIxIonice(RclConfig *config)
|
void rclIxIonice(RclConfig *config)
|
||||||
@ -172,14 +175,13 @@ void rclIxIonice(RclConfig *config)
|
|||||||
//
|
//
|
||||||
// This is called either from the command line or from the monitor. In
|
// This is called either from the command line or from the monitor. In
|
||||||
// this case we're called repeatedly in the same process, and the
|
// this case we're called repeatedly in the same process, and the
|
||||||
// confindexer is only created once by makeIndexer (but the db closed and
|
// confindexer is only created once by makeIndexerOrExit (but the db closed and
|
||||||
// flushed every time)
|
// flushed every time)
|
||||||
bool indexfiles(RclConfig *config, list<string> &filenames)
|
bool indexfiles(RclConfig *config, list<string> &filenames)
|
||||||
{
|
{
|
||||||
if (filenames.empty())
|
if (filenames.empty())
|
||||||
return true;
|
return true;
|
||||||
if (!makeIndexer(config))
|
makeIndexerOrExit(config, (op_flags & OPT_Z) != 0);
|
||||||
return false;
|
|
||||||
return confindexer->indexFiles(filenames, (op_flags&OPT_f) ?
|
return confindexer->indexFiles(filenames, (op_flags&OPT_f) ?
|
||||||
ConfIndexer::IxFIgnoreSkip :
|
ConfIndexer::IxFIgnoreSkip :
|
||||||
ConfIndexer::IxFNone);
|
ConfIndexer::IxFNone);
|
||||||
@ -190,16 +192,14 @@ bool purgefiles(RclConfig *config, list<string> &filenames)
|
|||||||
{
|
{
|
||||||
if (filenames.empty())
|
if (filenames.empty())
|
||||||
return true;
|
return true;
|
||||||
if (!makeIndexer(config))
|
makeIndexerOrExit(config, (op_flags & OPT_Z) != 0);
|
||||||
return false;
|
|
||||||
return confindexer->purgeFiles(filenames);
|
return confindexer->purgeFiles(filenames);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create stemming and spelling databases
|
// Create stemming and spelling databases
|
||||||
bool createAuxDbs(RclConfig *config)
|
bool createAuxDbs(RclConfig *config)
|
||||||
{
|
{
|
||||||
if (!makeIndexer(config))
|
makeIndexerOrExit(config, false);
|
||||||
return false;
|
|
||||||
|
|
||||||
if (!confindexer->createStemmingDatabases())
|
if (!confindexer->createStemmingDatabases())
|
||||||
return false;
|
return false;
|
||||||
@ -213,8 +213,7 @@ bool createAuxDbs(RclConfig *config)
|
|||||||
// Create additional stem database
|
// Create additional stem database
|
||||||
static bool createstemdb(RclConfig *config, const string &lang)
|
static bool createstemdb(RclConfig *config, const string &lang)
|
||||||
{
|
{
|
||||||
if (!makeIndexer(config))
|
makeIndexerOrExit(config, false);
|
||||||
return false;
|
|
||||||
return confindexer->createStemDb(lang);
|
return confindexer->createStemDb(lang);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -224,9 +223,11 @@ static const char usage [] =
|
|||||||
"\n"
|
"\n"
|
||||||
"recollindex [-h] \n"
|
"recollindex [-h] \n"
|
||||||
" Print help\n"
|
" Print help\n"
|
||||||
"recollindex [-z] \n"
|
"recollindex [-z|-Z] \n"
|
||||||
" Index everything according to configuration file\n"
|
" Index everything according to configuration file\n"
|
||||||
" -z : reset database before starting indexing\n"
|
" -z : reset database before starting indexing\n"
|
||||||
|
" -Z : in place reset: consider all documents as changed. Can also\n"
|
||||||
|
" be combined with -i but not -m\n"
|
||||||
#ifdef RCL_MONITOR
|
#ifdef RCL_MONITOR
|
||||||
"recollindex -m [-w <secs>] -x [-D] [-C]\n"
|
"recollindex -m [-w <secs>] -x [-D] [-C]\n"
|
||||||
" Perform real time indexing. Don't become a daemon if -D is set.\n"
|
" Perform real time indexing. Don't become a daemon if -D is set.\n"
|
||||||
@ -245,8 +246,10 @@ static const char usage [] =
|
|||||||
" List available stemming languages\n"
|
" List available stemming languages\n"
|
||||||
"recollindex -s <lang>\n"
|
"recollindex -s <lang>\n"
|
||||||
" Build stem database for additional language <lang>\n"
|
" Build stem database for additional language <lang>\n"
|
||||||
|
#ifdef FUTURE_IMPROVEMENT
|
||||||
"recollindex -b\n"
|
"recollindex -b\n"
|
||||||
" Process the Beagle queue\n"
|
" Process the Beagle queue\n"
|
||||||
|
#endif
|
||||||
#ifdef RCL_USE_ASPELL
|
#ifdef RCL_USE_ASPELL
|
||||||
"recollindex -S\n"
|
"recollindex -S\n"
|
||||||
" Build aspell spelling dictionary.>\n"
|
" Build aspell spelling dictionary.>\n"
|
||||||
@ -274,6 +277,11 @@ void lockorexit(Pidfile *pidfile)
|
|||||||
". Return (other pid?): " << pid << endl;
|
". Return (other pid?): " << pid << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
if (pidfile->write_pid() != 0) {
|
||||||
|
cerr << "Can't become exclusive indexer: " << pidfile->getreason() <<
|
||||||
|
endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
@ -315,6 +323,7 @@ int main(int argc, char **argv)
|
|||||||
Usage();
|
Usage();
|
||||||
argc--; goto b1;
|
argc--; goto b1;
|
||||||
case 'x': op_flags |= OPT_x; break;
|
case 'x': op_flags |= OPT_x; break;
|
||||||
|
case 'Z': op_flags |= OPT_Z; break;
|
||||||
case 'z': op_flags |= OPT_z; break;
|
case 'z': op_flags |= OPT_z; break;
|
||||||
default: Usage(); break;
|
default: Usage(); break;
|
||||||
}
|
}
|
||||||
@ -332,6 +341,8 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
if ((op_flags & OPT_z) && (op_flags & (OPT_i|OPT_e)))
|
if ((op_flags & OPT_z) && (op_flags & (OPT_i|OPT_e)))
|
||||||
Usage();
|
Usage();
|
||||||
|
if ((op_flags & OPT_Z) && (op_flags & (OPT_m)))
|
||||||
|
Usage();
|
||||||
|
|
||||||
string reason;
|
string reason;
|
||||||
RclInitFlags flags = (op_flags & OPT_m) && !(op_flags&OPT_D) ?
|
RclInitFlags flags = (op_flags & OPT_m) && !(op_flags&OPT_D) ?
|
||||||
@ -344,6 +355,7 @@ int main(int argc, char **argv)
|
|||||||
o_reexec->atexit(cleanup);
|
o_reexec->atexit(cleanup);
|
||||||
|
|
||||||
bool rezero(op_flags & OPT_z);
|
bool rezero(op_flags & OPT_z);
|
||||||
|
bool inPlaceReset(op_flags & OPT_Z);
|
||||||
Pidfile pidfile(config->getPidfile());
|
Pidfile pidfile(config->getPidfile());
|
||||||
updater = new MyUpdater(config);
|
updater = new MyUpdater(config);
|
||||||
|
|
||||||
@ -355,7 +367,6 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
if (op_flags & (OPT_i|OPT_e)) {
|
if (op_flags & (OPT_i|OPT_e)) {
|
||||||
lockorexit(&pidfile);
|
lockorexit(&pidfile);
|
||||||
pidfile.write_pid();
|
|
||||||
|
|
||||||
list<string> filenames;
|
list<string> filenames;
|
||||||
|
|
||||||
@ -394,6 +405,11 @@ int main(int argc, char **argv)
|
|||||||
Usage();
|
Usage();
|
||||||
string lang = *argv++; argc--;
|
string lang = *argv++; argc--;
|
||||||
exit(!createstemdb(config, lang));
|
exit(!createstemdb(config, lang));
|
||||||
|
#ifdef RCL_USE_ASPELL
|
||||||
|
} else if (op_flags & OPT_S) {
|
||||||
|
makeIndexerOrExit(config, inPlaceReset);
|
||||||
|
exit(!confindexer->createAspellDict());
|
||||||
|
#endif // ASPELL
|
||||||
|
|
||||||
#ifdef RCL_MONITOR
|
#ifdef RCL_MONITOR
|
||||||
} else if (op_flags & OPT_m) {
|
} else if (op_flags & OPT_m) {
|
||||||
@ -408,6 +424,7 @@ int main(int argc, char **argv)
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Need to rewrite pid, it changed
|
||||||
pidfile.write_pid();
|
pidfile.write_pid();
|
||||||
|
|
||||||
// Not too sure if I have to redo the nice thing after daemon(),
|
// Not too sure if I have to redo the nice thing after daemon(),
|
||||||
@ -429,7 +446,7 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
confindexer = new ConfIndexer(config, updater);
|
makeIndexerOrExit(config, inPlaceReset);
|
||||||
if (!confindexer->index(rezero, ConfIndexer::IxTAll) || stopindexing) {
|
if (!confindexer->index(rezero, ConfIndexer::IxTAll) || stopindexing) {
|
||||||
LOGERR(("recollindex, initial indexing pass failed, not going into monitor mode\n"));
|
LOGERR(("recollindex, initial indexing pass failed, not going into monitor mode\n"));
|
||||||
exit(1);
|
exit(1);
|
||||||
@ -452,19 +469,12 @@ int main(int argc, char **argv)
|
|||||||
exit(monret == false);
|
exit(monret == false);
|
||||||
#endif // MONITOR
|
#endif // MONITOR
|
||||||
|
|
||||||
#ifdef RCL_USE_ASPELL
|
|
||||||
} else if (op_flags & OPT_S) {
|
|
||||||
if (!makeIndexer(config))
|
|
||||||
exit(1);
|
|
||||||
exit(!confindexer->createAspellDict());
|
|
||||||
#endif // ASPELL
|
|
||||||
} else if (op_flags & OPT_b) {
|
} else if (op_flags & OPT_b) {
|
||||||
cerr << "Not yet" << endl;
|
cerr << "Not yet" << endl;
|
||||||
return 1;
|
return 1;
|
||||||
} else {
|
} else {
|
||||||
lockorexit(&pidfile);
|
lockorexit(&pidfile);
|
||||||
pidfile.write_pid();
|
makeIndexerOrExit(config, inPlaceReset);
|
||||||
confindexer = new ConfIndexer(config, updater);
|
|
||||||
bool status = confindexer->index(rezero, ConfIndexer::IxTAll);
|
bool status = confindexer->index(rezero, ConfIndexer::IxTAll);
|
||||||
if (!status)
|
if (!status)
|
||||||
cerr << "Indexing failed" << endl;
|
cerr << "Indexing failed" << endl;
|
||||||
|
|||||||
@ -558,6 +558,8 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
|||||||
|
|
||||||
/* Rcl::Db methods ///////////////////////////////// */
|
/* Rcl::Db methods ///////////////////////////////// */
|
||||||
|
|
||||||
|
bool Db::o_inPlaceReset;
|
||||||
|
|
||||||
Db::Db(RclConfig *cfp)
|
Db::Db(RclConfig *cfp)
|
||||||
: m_ndb(0), m_config(cfp), m_idxAbsTruncLen(250), m_synthAbsLen(250),
|
: m_ndb(0), m_config(cfp), m_idxAbsTruncLen(250), m_synthAbsLen(250),
|
||||||
m_synthAbsWordCtxLen(4), m_flushMb(-1),
|
m_synthAbsWordCtxLen(4), m_flushMb(-1),
|
||||||
@ -1404,6 +1406,12 @@ bool Db::needUpdate(const string &udi, const string& sig)
|
|||||||
if (m_ndb == 0)
|
if (m_ndb == 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
// If we are doing an in place reset, no need to test. Note that there is
|
||||||
|
// no need to update the existence map either, it will be done while
|
||||||
|
// indexing
|
||||||
|
if (o_inPlaceReset)
|
||||||
|
return true;
|
||||||
|
|
||||||
string uniterm = make_uniterm(udi);
|
string uniterm = make_uniterm(udi);
|
||||||
string ermsg;
|
string ermsg;
|
||||||
|
|
||||||
|
|||||||
@ -244,6 +244,14 @@ class Db {
|
|||||||
|
|
||||||
RclConfig *getConf() {return m_config;}
|
RclConfig *getConf() {return m_config;}
|
||||||
|
|
||||||
|
/**
|
||||||
|
Activate the "in place reset" mode where all documents are
|
||||||
|
considered as needing update. This is a global/per-process
|
||||||
|
option, and can't be reset. It should be set at the start of
|
||||||
|
the indexing pass
|
||||||
|
*/
|
||||||
|
static void setInPlaceReset() {o_inPlaceReset = true;}
|
||||||
|
|
||||||
/* This has to be public for access by embedded Query::Native */
|
/* This has to be public for access by embedded Query::Native */
|
||||||
Native *m_ndb;
|
Native *m_ndb;
|
||||||
|
|
||||||
@ -277,19 +285,22 @@ private:
|
|||||||
int m_occFirstCheck;
|
int m_occFirstCheck;
|
||||||
// Maximum file system occupation percentage
|
// Maximum file system occupation percentage
|
||||||
int m_maxFsOccupPc;
|
int m_maxFsOccupPc;
|
||||||
|
|
||||||
// Database directory
|
// Database directory
|
||||||
string m_basedir;
|
string m_basedir;
|
||||||
|
|
||||||
// List of directories for additional databases to query
|
// List of directories for additional databases to query
|
||||||
list<string> m_extraDbs;
|
list<string> m_extraDbs;
|
||||||
|
|
||||||
OpenMode m_mode;
|
OpenMode m_mode;
|
||||||
|
// File existence vector: this is filled during the indexing pass. Any
|
||||||
|
// document whose bit is not set at the end is purged
|
||||||
vector<bool> updated;
|
vector<bool> updated;
|
||||||
|
// Stop terms: those don't get indexed.
|
||||||
StopList m_stops;
|
StopList m_stops;
|
||||||
|
// When this is set, all documents are considered as needing a reindex.
|
||||||
|
// This implements an alternative to just erasing the index before
|
||||||
|
// beginning, with the advantage that, for small index formats updates,
|
||||||
|
// between releases the index remains available while being recreated.
|
||||||
|
static bool o_inPlaceReset;
|
||||||
|
|
||||||
// Reinitialize when adding/removing additional dbs
|
// Reinitialize when adding/removing additional dbs
|
||||||
bool adjustdbs();
|
bool adjustdbs();
|
||||||
bool stemExpand(const string &lang, const string &s,
|
bool stemExpand(const string &lang, const string &s,
|
||||||
@ -298,7 +309,7 @@ private:
|
|||||||
// Flush when idxflushmb is reached
|
// Flush when idxflushmb is reached
|
||||||
bool maybeflush(off_t moretext);
|
bool maybeflush(off_t moretext);
|
||||||
|
|
||||||
/* Copyconst and assignemt private and forbidden */
|
/* Copyconst and assignement private and forbidden */
|
||||||
Db(const Db &) {}
|
Db(const Db &) {}
|
||||||
Db& operator=(const Db &) {return *this;};
|
Db& operator=(const Db &) {return *this;};
|
||||||
};
|
};
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user