recollindex: do not retry files which previously failed to be indexed, except if they were changed since, or option -k is set

This commit is contained in:
Jean-Francois Dockes 2015-04-23 10:37:37 +02:00
parent 906312519c
commit e6c0ca403d
9 changed files with 209 additions and 86 deletions

View File

@ -13,16 +13,19 @@ recollindex \- indexing command for the Recoll full text search system
[ [
.B \-z|\-Z .B \-z|\-Z
] ]
[
.B \-k
]
.br .br
.B recollindex .B recollindex
[ [
.B \-c .B \-c
<configdir> <cd>
] ]
.B \-m .B \-m
[ [
.B \-w .B \-w
<seconds> <secs>
] ]
[ [
.B \-D .B \-D
@ -34,19 +37,22 @@ recollindex \- indexing command for the Recoll full text search system
.B \-C .B \-C
] ]
[ [
.B \-n .B \-n|-k
] ]
.br .br
.B recollindex .B recollindex
[ [
.B \-c .B \-c
<configdir> <cd>
] ]
.B \-i .B \-i
[ [
.B \-Z .B \-Z
] ]
[ [
.B \-k
]
[
.B \-f .B \-f
] ]
[<path [path ...]>] [<path [path ...]>]
@ -119,6 +125,17 @@ is given, the database will be erased before starting. If option
is given, the database will not be reset, but all files will be considered is given, the database will not be reset, but all files will be considered
as needing reindexing (in place reset). as needing reindexing (in place reset).
.PP .PP
By default,
.B recollindex
does not process again files which previously failed to index (for example
because of a missing helper program). This behaviour is new in version
1.21, error files were always retried in previous versions.
If option
.B \-k
is given,
.B recollindex
will try again to process all failed files.
.PP
If option If option
.B .B
\-m \-m

View File

@ -270,6 +270,13 @@
to the indexing command (<command>recollindex</command> to the indexing command (<command>recollindex</command>
<option>-z</option> or <option>-Z</option>).</para> <option>-z</option> or <option>-Z</option>).</para>
<para><command>recollindex</command> skips files which caused an
error during a previous pass. This is a performance
optimization, and a new behaviour in version 1.21 (failed files
were always retried by previous versions). The command line
option <option>-k</option> can be set to retry failed files, for
example after updating a filter.</para>
<para>The following sections give an overview of different <para>The following sections give an overview of different
aspects of the indexing processes and configuration, with links aspects of the indexing processes and configuration, with links
to detailed sections.</para> to detailed sections.</para>
@ -915,20 +922,25 @@ recoll
querying while it is rebuilt, which can be a significant querying while it is rebuilt, which can be a significant
advantage if it is very big (some installations need days advantage if it is very big (some installations need days
for a full index rebuild).</para> for a full index rebuild).</para>
<para>Option <option>-k</option> will force retrying files
which previously failed to be indexed, for example because
of a missing helper program.</para>
<para>Of special interest also, maybe, are <para>Of special interest also, maybe, are
the <option>-i</option> and the <option>-i</option> and <option>-f</option>
<option>-f</option> options. <option>-i</option> allows options. <option>-i</option> allows indexing an explicit
indexing an explicit list of files (given as command line list of files (given as command line parameters or read on
parameters or read on <literal>stdin</literal>). <literal>stdin</literal>). <option>-f</option> tells
<option>-f</option> tells
<command>recollindex</command> to ignore file selection <command>recollindex</command> to ignore file selection
parameters from the configuration. Together, these options allow parameters from the configuration. Together, these options
building a custom file selection process for some area of the allow building a custom file selection process for some area
file system, by adding the top directory to the of the file system, by adding the top directory to the
<varname>skippedPaths</varname> list and using an appropriate <varname>skippedPaths</varname> list and using an
file selection method to build the file list to be fed to appropriate file selection method to build the file list to
<command>recollindex</command> <option>-if</option>. be fed to <command>recollindex</command>
Trivial example:</para> <option>-if</option>. Trivial example:</para>
<programlisting> <programlisting>
find . -name indexable.txt -print | recollindex -if find . -name indexable.txt -print | recollindex -if
</programlisting> </programlisting>

View File

@ -99,7 +99,8 @@ public:
FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc) FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc)
: m_config(cnf), m_db(db), m_updater(updfunc), : m_config(cnf), m_db(db), m_updater(updfunc),
m_missing(new FSIFIMissingStore), m_detectxattronly(false) m_missing(new FSIFIMissingStore), m_detectxattronly(false),
m_noretryfailed(false)
#ifdef IDX_THREADS #ifdef IDX_THREADS
, m_iwqueue("Internfile", cnf->getThrConf(RclConfig::ThrIntern).first), , m_iwqueue("Internfile", cnf->getThrConf(RclConfig::ThrIntern).first),
m_dwqueue("Split", cnf->getThrConf(RclConfig::ThrSplit).first) m_dwqueue("Split", cnf->getThrConf(RclConfig::ThrSplit).first)
@ -172,8 +173,10 @@ bool FsIndexer::init()
} }
// Recursively index each directory in the topdirs: // Recursively index each directory in the topdirs:
bool FsIndexer::index(bool quickshallow) bool FsIndexer::index(int flags)
{ {
bool quickshallow = (flags & ConfIndexer::IxFQuickShallow) != 0;
m_noretryfailed = (flags & ConfIndexer::IxFNoRetryFailed) != 0;
Chrono chron; Chrono chron;
if (!init()) if (!init())
return false; return false;
@ -261,7 +264,7 @@ static bool matchesSkipped(const vector<string>& tdl,
for (vector<string>::const_iterator it = tdl.begin(); for (vector<string>::const_iterator it = tdl.begin();
it != tdl.end(); it++) { it != tdl.end(); it++) {
// the topdirs members are already canonized. // the topdirs members are already canonized.
LOGDEB2(("indexfiles:matchesskpp: comparing ancestor [%s] to " LOGDEB2(("matchesSkipped: comparing ancestor [%s] to "
"topdir [%s]\n", mpath.c_str(), it->c_str())); "topdir [%s]\n", mpath.c_str(), it->c_str()));
if (!mpath.compare(*it)) { if (!mpath.compare(*it)) {
topdir = *it; topdir = *it;
@ -324,9 +327,10 @@ goodpath:
/** /**
* Index individual files, out of a full tree run. No database purging * Index individual files, out of a full tree run. No database purging
*/ */
bool FsIndexer::indexFiles(list<string>& files, ConfIndexer::IxFlag flag) bool FsIndexer::indexFiles(list<string>& files, int flags)
{ {
LOGDEB(("FsIndexer::indexFiles\n")); LOGDEB(("FsIndexer::indexFiles\n"));
m_noretryfailed = (flags & ConfIndexer::IxFNoRetryFailed) != 0;
int ret = false; int ret = false;
if (!init()) if (!init())
@ -354,7 +358,7 @@ bool FsIndexer::indexFiles(list<string>& files, ConfIndexer::IxFlag flag)
walker.setSkippedNames(m_config->getSkippedNames()); walker.setSkippedNames(m_config->getSkippedNames());
// Check path against indexed areas and skipped names/paths // Check path against indexed areas and skipped names/paths
if (!(flag&ConfIndexer::IxFIgnoreSkip) && if (!(flags & ConfIndexer::IxFIgnoreSkip) &&
matchesSkipped(m_tdl, walker, *it)) { matchesSkipped(m_tdl, walker, *it)) {
it++; it++;
continue; continue;
@ -648,8 +652,14 @@ FsIndexer::processonefile(RclConfig *config,
makesig(stp, sig); makesig(stp, sig);
string udi; string udi;
make_udi(fn, cstr_null, udi); make_udi(fn, cstr_null, udi);
bool existingDoc; unsigned int existingDoc;
bool needupdate = m_db->needUpdate(udi, sig, &existingDoc); string oldsig;
bool needupdate;
if (m_noretryfailed) {
needupdate = m_db->needUpdate(udi, sig, &existingDoc, &oldsig);
} else {
needupdate = m_db->needUpdate(udi, sig, &existingDoc, 0);
}
// If ctime (which we use for the sig) differs from mtime, then at most // If ctime (which we use for the sig) differs from mtime, then at most
// the extended attributes were changed, no need to index content. // the extended attributes were changed, no need to index content.
@ -659,7 +669,24 @@ FsIndexer::processonefile(RclConfig *config,
// the ctime to avoid this // the ctime to avoid this
bool xattronly = m_detectxattronly && !m_db->inFullReset() && bool xattronly = m_detectxattronly && !m_db->inFullReset() &&
existingDoc && needupdate && (stp->st_mtime < stp->st_ctime); existingDoc && needupdate && (stp->st_mtime < stp->st_ctime);
LOGDEB(("processone: needupdate %d noretry %d existing %d oldsig [%s]\n",
needupdate, m_noretryfailed, existingDoc, oldsig.c_str()));
// If noretryfailed is set, check for a file which previously
// failed to index, and avoid re-processing it
if (needupdate && m_noretryfailed && existingDoc &&
!oldsig.empty() && *oldsig.rbegin() == '+') {
// Check that the sigs are the same except for the '+'. If the file
// actually changed, we always retry (maybe it was fixed)
string nold = oldsig.substr(0, oldsig.size()-1);
if (!nold.compare(sig)) {
LOGDEB(("processone: not retrying previously failed file\n"));
m_db->setExistingFlags(udi, existingDoc);
needupdate = false;
}
}
if (!needupdate) { if (!needupdate) {
LOGDEB0(("processone: up to date: %s\n", fn.c_str())); LOGDEB0(("processone: up to date: %s\n", fn.c_str()));
if (m_updater) { if (m_updater) {

View File

@ -60,11 +60,11 @@ class FsIndexer : public FsTreeWalkerCB {
* We open the database, * We open the database,
* then call a file system walk for each top-level directory. * then call a file system walk for each top-level directory.
*/ */
bool index(bool quickshallow = 0); bool index(int flags);
/** Index a list of files. No db cleaning or stemdb updating */ /** Index a list of files. No db cleaning or stemdb updating */
bool indexFiles(std::list<std::string> &files, ConfIndexer::IxFlag f = bool indexFiles(std::list<std::string> &files,
ConfIndexer::IxFNone); int f = ConfIndexer::IxFNone);
/** Purge a list of files. */ /** Purge a list of files. */
bool purgeFiles(std::list<std::string> &files); bool purgeFiles(std::list<std::string> &files);
@ -136,6 +136,9 @@ class FsIndexer : public FsTreeWalkerCB {
// needs a config option // needs a config option
bool m_detectxattronly; bool m_detectxattronly;
// No retry of previously failed files
bool m_noretryfailed;
#ifdef IDX_THREADS #ifdef IDX_THREADS
friend void *FsIndexerDbUpdWorker(void*); friend void *FsIndexerDbUpdWorker(void*);
friend void *FsIndexerInternfileWorker(void*); friend void *FsIndexerInternfileWorker(void*);

View File

@ -84,13 +84,13 @@ bool ConfIndexer::firstFsIndexingSequence()
} }
int flushmb = m_db.getFlushMb(); int flushmb = m_db.getFlushMb();
m_db.setFlushMb(2); m_db.setFlushMb(2);
m_fsindexer->index(true); m_fsindexer->index(IxFQuickShallow);
m_db.doFlush(); m_db.doFlush();
m_db.setFlushMb(flushmb); m_db.setFlushMb(flushmb);
return true; return true;
} }
bool ConfIndexer::index(bool resetbefore, ixType typestorun) bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags)
{ {
Rcl::Db::OpenMode mode = resetbefore ? Rcl::Db::DbTrunc : Rcl::Db::DbUpd; Rcl::Db::OpenMode mode = resetbefore ? Rcl::Db::DbTrunc : Rcl::Db::DbUpd;
if (!m_db.open(mode)) { if (!m_db.open(mode)) {
@ -106,7 +106,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun)
} }
deleteZ(m_fsindexer); deleteZ(m_fsindexer);
m_fsindexer = new FsIndexer(m_config, &m_db, m_updater); m_fsindexer = new FsIndexer(m_config, &m_db, m_updater);
if (!m_fsindexer || !m_fsindexer->index()) { if (!m_fsindexer || !m_fsindexer->index(flags)) {
m_db.close(); m_db.close();
return false; return false;
} }
@ -154,7 +154,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun)
return true; return true;
} }
bool ConfIndexer::indexFiles(list<string>& ifiles, IxFlag flag) bool ConfIndexer::indexFiles(list<string>& ifiles, int flag)
{ {
list<string> myfiles; list<string> myfiles;
string origcwd = m_config->getOrigCwd(); string origcwd = m_config->getOrigCwd();
@ -237,7 +237,7 @@ bool ConfIndexer::updateDocs(std::vector<Rcl::Doc> &docs, IxFlag flag)
return true; return true;
} }
bool ConfIndexer::purgeFiles(std::list<string> &files, IxFlag flag) bool ConfIndexer::purgeFiles(std::list<string> &files, int flag)
{ {
list<string> myfiles; list<string> myfiles;
string origcwd = m_config->getOrigCwd(); string origcwd = m_config->getOrigCwd();

View File

@ -102,10 +102,15 @@ class ConfIndexer {
enum IxFlag {IxFNone = 0, enum IxFlag {IxFNone = 0,
IxFIgnoreSkip = 1, // Ignore skipped lists IxFIgnoreSkip = 1, // Ignore skipped lists
IxFNoWeb = 2, // Do not process the web queue. IxFNoWeb = 2, // Do not process the web queue.
// First pass: just do the top files so that the user can
// try searching asap.
IxFQuickShallow = 4,
// Do not retry files which previously failed ('+' sigs)
IxFNoRetryFailed = 8,
}; };
/** Run indexers */ /** Run indexers */
bool index(bool resetbefore, ixType typestorun); bool index(bool resetbefore, ixType typestorun, int f = IxFNone);
const string &getReason() {return m_reason;} const string &getReason() {return m_reason;}
@ -122,14 +127,14 @@ class ConfIndexer {
static vector<string> getStemmerNames(); static vector<string> getStemmerNames();
/** Index a list of files. No db cleaning or stemdb updating */ /** Index a list of files. No db cleaning or stemdb updating */
bool indexFiles(list<string> &files, IxFlag f = IxFNone); bool indexFiles(list<string> &files, int f = IxFNone);
/** Update index for list of documents given as list of docs (out of query) /** Update index for list of documents given as list of docs (out of query)
*/ */
bool updateDocs(vector<Rcl::Doc> &docs, IxFlag f = IxFNone); bool updateDocs(vector<Rcl::Doc> &docs, IxFlag f = IxFNone);
static bool docsToPaths(vector<Rcl::Doc> &docs, vector<string> &paths); static bool docsToPaths(vector<Rcl::Doc> &docs, vector<string> &paths);
/** Purge a list of files. */ /** Purge a list of files. */
bool purgeFiles(list<string> &files, IxFlag f = IxFNone); bool purgeFiles(list<string> &files, int f = IxFNone);
/** Set in place reset mode */ /** Set in place reset mode */
void setInPlaceReset() {m_db.setInPlaceReset();} void setInPlaceReset() {m_db.setInPlaceReset();}

View File

@ -68,6 +68,7 @@ static int op_flags;
#define OPT_Z 0x10000 #define OPT_Z 0x10000
#define OPT_n 0x20000 #define OPT_n 0x20000
#define OPT_r 0x40000 #define OPT_r 0x40000
#define OPT_k 0x80000
ReExec *o_reexec; ReExec *o_reexec;
@ -182,7 +183,7 @@ public:
{ {
} }
virtual FsTreeWalker::Status virtual FsTreeWalker::Status
processone(const string & fn, const struct stat *, FsTreeWalker::CbFlag flg) processone(const string& fn, const struct stat *, FsTreeWalker::CbFlag flg)
{ {
if (flg == FsTreeWalker::FtwDirEnter || flg == FsTreeWalker::FtwRegular) if (flg == FsTreeWalker::FtwDirEnter || flg == FsTreeWalker::FtwRegular)
m_files.push_back(fn); m_files.push_back(fn);
@ -255,11 +256,12 @@ static const char usage [] =
"\n" "\n"
"recollindex [-h] \n" "recollindex [-h] \n"
" Print help\n" " Print help\n"
"recollindex [-z|-Z] \n" "recollindex [-z|-Z] [-k]\n"
" Index everything according to configuration file\n" " Index everything according to configuration file\n"
" -z : reset database before starting indexing\n" " -z : reset database before starting indexing\n"
" -Z : in place reset: consider all documents as changed. Can also\n" " -Z : in place reset: consider all documents as changed. Can also\n"
" be combined with -i or -r but not -m\n" " be combined with -i or -r but not -m\n"
" -k : retry files on which we previously failed\n"
#ifdef RCL_MONITOR #ifdef RCL_MONITOR
"recollindex -m [-w <secs>] -x [-D] [-C]\n" "recollindex -m [-w <secs>] -x [-D] [-C]\n"
" Perform real time indexing. Don't become a daemon if -D is set.\n" " Perform real time indexing. Don't become a daemon if -D is set.\n"
@ -282,8 +284,8 @@ static const char usage [] =
"recollindex -s <lang>\n" "recollindex -s <lang>\n"
" Build stem database for additional language <lang>\n" " Build stem database for additional language <lang>\n"
#ifdef FUTURE_IMPROVEMENT #ifdef FUTURE_IMPROVEMENT
"recollindex -b\n" "recollindex -W\n"
" Process the Beagle queue\n" " Process the Web queue\n"
#endif #endif
#ifdef RCL_USE_ASPELL #ifdef RCL_USE_ASPELL
"recollindex -S\n" "recollindex -S\n"
@ -351,6 +353,7 @@ int main(int argc, char **argv)
case 'f': op_flags |= OPT_f; break; case 'f': op_flags |= OPT_f; break;
case 'h': op_flags |= OPT_h; break; case 'h': op_flags |= OPT_h; break;
case 'i': op_flags |= OPT_i; break; case 'i': op_flags |= OPT_i; break;
case 'k': op_flags |= OPT_k; break;
case 'l': op_flags |= OPT_l; break; case 'l': op_flags |= OPT_l; break;
case 'm': op_flags |= OPT_m; break; case 'm': op_flags |= OPT_m; break;
case 'n': op_flags |= OPT_n; break; case 'n': op_flags |= OPT_n; break;
@ -415,6 +418,10 @@ int main(int argc, char **argv)
bool rezero((op_flags & OPT_z) != 0); bool rezero((op_flags & OPT_z) != 0);
bool inPlaceReset((op_flags & OPT_Z) != 0); bool inPlaceReset((op_flags & OPT_Z) != 0);
int indexerFlags = ConfIndexer::IxFNone;
if (!(op_flags & OPT_k))
indexerFlags |= ConfIndexer::IxFNoRetryFailed;
Pidfile pidfile(config->getPidfile()); Pidfile pidfile(config->getPidfile());
updater = new MyUpdater(config); updater = new MyUpdater(config);
@ -526,8 +533,8 @@ int main(int argc, char **argv)
if (!(op_flags & OPT_n)) { if (!(op_flags & OPT_n)) {
makeIndexerOrExit(config, inPlaceReset); makeIndexerOrExit(config, inPlaceReset);
LOGDEB(("Recollindex: initial indexing pass before monitoring\n")); LOGDEB(("Recollindex: initial indexing pass before monitoring\n"));
if (!confindexer->index(rezero, ConfIndexer::IxTAll) || if (!confindexer->index(rezero, ConfIndexer::IxTAll, indexerFlags)
stopindexing) { || stopindexing) {
LOGERR(("recollindex, initial indexing pass failed, " LOGERR(("recollindex, initial indexing pass failed, "
"not going into monitor mode\n")); "not going into monitor mode\n"));
exit(1); exit(1);
@ -564,7 +571,8 @@ int main(int argc, char **argv)
} else { } else {
lockorexit(&pidfile); lockorexit(&pidfile);
makeIndexerOrExit(config, inPlaceReset); makeIndexerOrExit(config, inPlaceReset);
bool status = confindexer->index(rezero, ConfIndexer::IxTAll); bool status = confindexer->index(rezero, ConfIndexer::IxTAll,
indexerFlags);
if (!status) if (!status)
cerr << "Indexing failed" << endl; cerr << "Indexing failed" << endl;
if (!confindexer->getReason().empty()) if (!confindexer->getReason().empty())

View File

@ -789,7 +789,6 @@ bool Db::open(OpenMode mode, OpenError *error)
// (now: Xapian 1.2) and the separate objects seem to // (now: Xapian 1.2) and the separate objects seem to
// trigger other Xapian issues, so the query db is now // trigger other Xapian issues, so the query db is now
// a clone of the update one. // a clone of the update one.
// m_ndb->xrdb = Xapian::Database(dir);
m_ndb->xrdb = m_ndb->xwdb; m_ndb->xrdb = m_ndb->xwdb;
LOGDEB(("Db::open: lastdocid: %d\n", LOGDEB(("Db::open: lastdocid: %d\n",
m_ndb->xwdb.get_lastdocid())); m_ndb->xwdb.get_lastdocid()));
@ -1725,23 +1724,70 @@ bool Db::doFlush()
return true; return true;
} }
void Db::setExistingFlags(const string& udi, unsigned int docid)
{
if (m_mode == DbRO)
return;
if (docid == (unsigned int)-1) {
LOGERR(("Db::setExistingFlags: called with bogus docid !!\n"));
return;
}
#ifdef IDX_THREADS
PTMutexLocker lock(m_ndb->m_mutex);
#endif
i_setExistingFlags(udi, docid);
}
void Db::i_setExistingFlags(const string& udi, unsigned int docid)
{
// Set the up to date flag for the document and its subdocs
if (docid >= updated.size()) {
LOGERR(("needUpdate: existing docid beyond "
"updated.size(). Udi [%s], docid %u, "
"updated.size() %u\n", udi.c_str(),
unsigned(docid), (unsigned)updated.size()));
return;
} else {
updated[docid] = true;
}
// Set the existence flag for all the subdocs (if any)
vector<Xapian::docid> docids;
if (!m_ndb->subDocs(udi, 0, docids)) {
LOGERR(("Rcl::Db::needUpdate: can't get subdocs\n"));
return;
}
for (vector<Xapian::docid>::iterator it = docids.begin();
it != docids.end(); it++) {
if (*it < updated.size()) {
LOGDEB2(("Db::needUpdate: docid %d set\n", *it));
updated[*it] = true;
}
}
}
// Test if doc given by udi has changed since last indexed (test sigs) // Test if doc given by udi has changed since last indexed (test sigs)
bool Db::needUpdate(const string &udi, const string& sig, bool *existed) bool Db::needUpdate(const string &udi, const string& sig,
unsigned int *docidp, string *osigp)
{ {
if (m_ndb == 0) if (m_ndb == 0)
return false; return false;
if (osigp)
osigp->clear();
if (docidp)
*docidp = 0;
// If we are doing an in place or full reset, no need to test. // If we are doing an in place or full reset, no need to test.
if (o_inPlaceReset || m_mode == DbTrunc) { if (o_inPlaceReset || m_mode == DbTrunc) {
// For in place reset, pretend the doc existed, to enable subdoc purge // For in place reset, pretend the doc existed, to enable
if (existed) // subdoc purge. The value is only used as a boolean in this case.
*existed = o_inPlaceReset; if (docidp && o_inPlaceReset) {
*docidp = -1;
}
return true; return true;
} }
if (existed)
*existed = false;
string uniterm = make_uniterm(udi); string uniterm = make_uniterm(udi);
string ermsg; string ermsg;
@ -1773,8 +1819,9 @@ bool Db::needUpdate(const string &udi, const string& sig, bool *existed)
return true; return true;
} }
if (existed) if (docidp) {
*existed = true; *docidp = *docid;
}
// Retrieve old file/doc signature from value // Retrieve old file/doc signature from value
string osig; string osig;
@ -1785,6 +1832,11 @@ bool Db::needUpdate(const string &udi, const string& sig, bool *existed)
} }
LOGDEB2(("Db::needUpdate: oldsig [%s] new [%s]\n", LOGDEB2(("Db::needUpdate: oldsig [%s] new [%s]\n",
osig.c_str(), sig.c_str())); osig.c_str(), sig.c_str()));
if (osigp) {
*osigp = osig;
}
// Compare new/old sig // Compare new/old sig
if (sig != osig) { if (sig != osig) {
LOGDEB(("Db::needUpdate:yes: olsig [%s] new [%s] [%s]\n", LOGDEB(("Db::needUpdate:yes: olsig [%s] new [%s] [%s]\n",
@ -1793,34 +1845,10 @@ bool Db::needUpdate(const string &udi, const string& sig, bool *existed)
return true; return true;
} }
// Up to date. // Up to date. Set the existance flags in the map for the doc and
// its subdocs.
LOGDEB(("Db::needUpdate:no: [%s]\n", uniterm.c_str())); LOGDEB(("Db::needUpdate:no: [%s]\n", uniterm.c_str()));
i_setExistingFlags(udi, *docid);
if (m_mode != DbRO) {
// Set the up to date flag for the document and its subdocs
if (*docid >= updated.size()) {
LOGERR(("needUpdate: existing docid beyond "
"updated.size(). Udi [%s], docid %u, "
"updated.size() %u\n", udi.c_str(),
unsigned(*docid), (unsigned)updated.size()));
} else {
updated[*docid] = true;
}
// Set the existence flag for all the subdocs (if any)
vector<Xapian::docid> docids;
if (!m_ndb->subDocs(udi, 0, docids)) {
LOGERR(("Rcl::Db::needUpdate: can't get subdocs\n"));
return true;
}
for (vector<Xapian::docid>::iterator it = docids.begin();
it != docids.end(); it++) {
if (*it < updated.size()) {
LOGDEB2(("Db::needUpdate: docid %d set\n", *it));
updated[*it] = true;
}
}
}
return false; return false;
} }

View File

@ -228,15 +228,36 @@ class Db {
/* Update-related methods ******************************************/ /* Update-related methods ******************************************/
/** Test if the db entry for the given udi is up to date (by /** Test if the db entry for the given udi is up to date.
* comparing the input and stored sigs). This is used both when *
* indexing and querying (before opening a document using stale info), * This is done by comparing the input and stored sigs. This is
* used both when indexing and querying (before opening a document
* using stale info).
*
* **This assumes that the udi pertains to the main index (idxi==0).** * **This assumes that the udi pertains to the main index (idxi==0).**
* Side-effect when the db is writeable: set the existence flag *
* for the file document and all subdocs if any (for later use by * Side-effect when the db is writeable and the document up to
* 'purge()') * date: set the existence flag for the file document and all
* subdocs if any (for later use by 'purge()')
*
* @param udi Unique Document Identifier (as chosen by indexer).
* @param sig New signature (as computed by indexer).
* @param xdocid[output] Non-zero if doc existed. Should be considered
* as opaque, to be used for a possible later call to setExistingFlags()
* Note that if inplaceReset is set, the return value is non-zero but not
* an actual docid, it's only used as a flag in this case.
* @param osig[output] old signature.
*/ */
bool needUpdate(const string &udi, const string& sig, bool *existed=0); bool needUpdate(const string &udi, const string& sig,
unsigned int *xdocid = 0, std::string *osig = 0);
/** Set the existance flags for the document and its eventual subdocuments
*
* This can be called by the indexer after needUpdate() has returned true,
* if the indexer does not wish to actually re-index (e.g.: the doc is
* known to cause errors).
*/
void setExistingFlags(const string& udi, unsigned int docid);
/** Indicate if we are doing a systematic reindex. This complements /** Indicate if we are doing a systematic reindex. This complements
needUpdate() return */ needUpdate() return */
@ -488,6 +509,8 @@ private:
friend void *DbUpdWorker(void*); friend void *DbUpdWorker(void*);
#endif // IDX_THREADS #endif // IDX_THREADS
// Internal form of setExistingFlags: no locking
void i_setExistingFlags(const string& udi, unsigned int docid);
// Internal form of close, can be called during destruction // Internal form of close, can be called during destruction
bool i_close(bool final); bool i_close(bool final);
// Reinitialize when adding/removing additional dbs // Reinitialize when adding/removing additional dbs