recollindex: do not retry files which previously failed to be indexed, except if they were changed since, or option -k is set
This commit is contained in:
parent
906312519c
commit
e6c0ca403d
@ -13,16 +13,19 @@ recollindex \- indexing command for the Recoll full text search system
|
||||
[
|
||||
.B \-z|\-Z
|
||||
]
|
||||
[
|
||||
.B \-k
|
||||
]
|
||||
.br
|
||||
.B recollindex
|
||||
[
|
||||
.B \-c
|
||||
<configdir>
|
||||
<cd>
|
||||
]
|
||||
.B \-m
|
||||
[
|
||||
.B \-w
|
||||
<seconds>
|
||||
<secs>
|
||||
]
|
||||
[
|
||||
.B \-D
|
||||
@ -34,19 +37,22 @@ recollindex \- indexing command for the Recoll full text search system
|
||||
.B \-C
|
||||
]
|
||||
[
|
||||
.B \-n
|
||||
.B \-n|-k
|
||||
]
|
||||
.br
|
||||
.B recollindex
|
||||
[
|
||||
.B \-c
|
||||
<configdir>
|
||||
<cd>
|
||||
]
|
||||
.B \-i
|
||||
[
|
||||
.B \-Z
|
||||
]
|
||||
[
|
||||
.B \-k
|
||||
]
|
||||
[
|
||||
.B \-f
|
||||
]
|
||||
[<path [path ...]>]
|
||||
@ -119,6 +125,17 @@ is given, the database will be erased before starting. If option
|
||||
is given, the database will not be reset, but all files will be considered
|
||||
as needing reindexing (in place reset).
|
||||
.PP
|
||||
By default,
|
||||
.B recollindex
|
||||
does not process again files which previously failed to index (for example
|
||||
because of a missing helper program). This behaviour is new in version
|
||||
1.21, error files were always retried in previous versions.
|
||||
If option
|
||||
.B \-k
|
||||
is given,
|
||||
.B recollindex
|
||||
will try again to process all failed files.
|
||||
.PP
|
||||
If option
|
||||
.B
|
||||
\-m
|
||||
|
||||
@ -270,6 +270,13 @@
|
||||
to the indexing command (<command>recollindex</command>
|
||||
<option>-z</option> or <option>-Z</option>).</para>
|
||||
|
||||
<para><command>recollindex</command> skips files which caused an
|
||||
error during a previous pass. This is a performance
|
||||
optimization, and a new behaviour in version 1.21 (failed files
|
||||
were always retried by previous versions). The command line
|
||||
option <option>-k</option> can be set to retry failed files, for
|
||||
example after updating a filter.</para>
|
||||
|
||||
<para>The following sections give an overview of different
|
||||
aspects of the indexing processes and configuration, with links
|
||||
to detailed sections.</para>
|
||||
@ -915,20 +922,25 @@ recoll
|
||||
querying while it is rebuilt, which can be a significant
|
||||
advantage if it is very big (some installations need days
|
||||
for a full index rebuild).</para>
|
||||
|
||||
<para>Option <option>-k</option> will force retrying files
|
||||
which previously failed to be indexed, for example because
|
||||
of a missing helper program.</para>
|
||||
|
||||
<para>Of special interest also, maybe, are
|
||||
the <option>-i</option> and
|
||||
<option>-f</option> options. <option>-i</option> allows
|
||||
indexing an explicit list of files (given as command line
|
||||
parameters or read on <literal>stdin</literal>).
|
||||
<option>-f</option> tells
|
||||
the <option>-i</option> and <option>-f</option>
|
||||
options. <option>-i</option> allows indexing an explicit
|
||||
list of files (given as command line parameters or read on
|
||||
<literal>stdin</literal>). <option>-f</option> tells
|
||||
<command>recollindex</command> to ignore file selection
|
||||
parameters from the configuration. Together, these options allow
|
||||
building a custom file selection process for some area of the
|
||||
file system, by adding the top directory to the
|
||||
<varname>skippedPaths</varname> list and using an appropriate
|
||||
file selection method to build the file list to be fed to
|
||||
<command>recollindex</command> <option>-if</option>.
|
||||
Trivial example:</para>
|
||||
parameters from the configuration. Together, these options
|
||||
allow building a custom file selection process for some area
|
||||
of the file system, by adding the top directory to the
|
||||
<varname>skippedPaths</varname> list and using an
|
||||
appropriate file selection method to build the file list to
|
||||
be fed to <command>recollindex</command>
|
||||
<option>-if</option>. Trivial example:</para>
|
||||
|
||||
<programlisting>
|
||||
find . -name indexable.txt -print | recollindex -if
|
||||
</programlisting>
|
||||
|
||||
@ -99,7 +99,8 @@ public:
|
||||
|
||||
FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc)
|
||||
: m_config(cnf), m_db(db), m_updater(updfunc),
|
||||
m_missing(new FSIFIMissingStore), m_detectxattronly(false)
|
||||
m_missing(new FSIFIMissingStore), m_detectxattronly(false),
|
||||
m_noretryfailed(false)
|
||||
#ifdef IDX_THREADS
|
||||
, m_iwqueue("Internfile", cnf->getThrConf(RclConfig::ThrIntern).first),
|
||||
m_dwqueue("Split", cnf->getThrConf(RclConfig::ThrSplit).first)
|
||||
@ -172,8 +173,10 @@ bool FsIndexer::init()
|
||||
}
|
||||
|
||||
// Recursively index each directory in the topdirs:
|
||||
bool FsIndexer::index(bool quickshallow)
|
||||
bool FsIndexer::index(int flags)
|
||||
{
|
||||
bool quickshallow = (flags & ConfIndexer::IxFQuickShallow) != 0;
|
||||
m_noretryfailed = (flags & ConfIndexer::IxFNoRetryFailed) != 0;
|
||||
Chrono chron;
|
||||
if (!init())
|
||||
return false;
|
||||
@ -261,7 +264,7 @@ static bool matchesSkipped(const vector<string>& tdl,
|
||||
for (vector<string>::const_iterator it = tdl.begin();
|
||||
it != tdl.end(); it++) {
|
||||
// the topdirs members are already canonized.
|
||||
LOGDEB2(("indexfiles:matchesskpp: comparing ancestor [%s] to "
|
||||
LOGDEB2(("matchesSkipped: comparing ancestor [%s] to "
|
||||
"topdir [%s]\n", mpath.c_str(), it->c_str()));
|
||||
if (!mpath.compare(*it)) {
|
||||
topdir = *it;
|
||||
@ -324,9 +327,10 @@ goodpath:
|
||||
/**
|
||||
* Index individual files, out of a full tree run. No database purging
|
||||
*/
|
||||
bool FsIndexer::indexFiles(list<string>& files, ConfIndexer::IxFlag flag)
|
||||
bool FsIndexer::indexFiles(list<string>& files, int flags)
|
||||
{
|
||||
LOGDEB(("FsIndexer::indexFiles\n"));
|
||||
m_noretryfailed = (flags & ConfIndexer::IxFNoRetryFailed) != 0;
|
||||
int ret = false;
|
||||
|
||||
if (!init())
|
||||
@ -354,7 +358,7 @@ bool FsIndexer::indexFiles(list<string>& files, ConfIndexer::IxFlag flag)
|
||||
|
||||
walker.setSkippedNames(m_config->getSkippedNames());
|
||||
// Check path against indexed areas and skipped names/paths
|
||||
if (!(flag&ConfIndexer::IxFIgnoreSkip) &&
|
||||
if (!(flags & ConfIndexer::IxFIgnoreSkip) &&
|
||||
matchesSkipped(m_tdl, walker, *it)) {
|
||||
it++;
|
||||
continue;
|
||||
@ -648,8 +652,14 @@ FsIndexer::processonefile(RclConfig *config,
|
||||
makesig(stp, sig);
|
||||
string udi;
|
||||
make_udi(fn, cstr_null, udi);
|
||||
bool existingDoc;
|
||||
bool needupdate = m_db->needUpdate(udi, sig, &existingDoc);
|
||||
unsigned int existingDoc;
|
||||
string oldsig;
|
||||
bool needupdate;
|
||||
if (m_noretryfailed) {
|
||||
needupdate = m_db->needUpdate(udi, sig, &existingDoc, &oldsig);
|
||||
} else {
|
||||
needupdate = m_db->needUpdate(udi, sig, &existingDoc, 0);
|
||||
}
|
||||
|
||||
// If ctime (which we use for the sig) differs from mtime, then at most
|
||||
// the extended attributes were changed, no need to index content.
|
||||
@ -659,7 +669,24 @@ FsIndexer::processonefile(RclConfig *config,
|
||||
// the ctime to avoid this
|
||||
bool xattronly = m_detectxattronly && !m_db->inFullReset() &&
|
||||
existingDoc && needupdate && (stp->st_mtime < stp->st_ctime);
|
||||
|
||||
|
||||
LOGDEB(("processone: needupdate %d noretry %d existing %d oldsig [%s]\n",
|
||||
needupdate, m_noretryfailed, existingDoc, oldsig.c_str()));
|
||||
|
||||
// If noretryfailed is set, check for a file which previously
|
||||
// failed to index, and avoid re-processing it
|
||||
if (needupdate && m_noretryfailed && existingDoc &&
|
||||
!oldsig.empty() && *oldsig.rbegin() == '+') {
|
||||
// Check that the sigs are the same except for the '+'. If the file
|
||||
// actually changed, we always retry (maybe it was fixed)
|
||||
string nold = oldsig.substr(0, oldsig.size()-1);
|
||||
if (!nold.compare(sig)) {
|
||||
LOGDEB(("processone: not retrying previously failed file\n"));
|
||||
m_db->setExistingFlags(udi, existingDoc);
|
||||
needupdate = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!needupdate) {
|
||||
LOGDEB0(("processone: up to date: %s\n", fn.c_str()));
|
||||
if (m_updater) {
|
||||
|
||||
@ -60,11 +60,11 @@ class FsIndexer : public FsTreeWalkerCB {
|
||||
* We open the database,
|
||||
* then call a file system walk for each top-level directory.
|
||||
*/
|
||||
bool index(bool quickshallow = 0);
|
||||
bool index(int flags);
|
||||
|
||||
/** Index a list of files. No db cleaning or stemdb updating */
|
||||
bool indexFiles(std::list<std::string> &files, ConfIndexer::IxFlag f =
|
||||
ConfIndexer::IxFNone);
|
||||
bool indexFiles(std::list<std::string> &files,
|
||||
int f = ConfIndexer::IxFNone);
|
||||
|
||||
/** Purge a list of files. */
|
||||
bool purgeFiles(std::list<std::string> &files);
|
||||
@ -136,6 +136,9 @@ class FsIndexer : public FsTreeWalkerCB {
|
||||
// needs a config option
|
||||
bool m_detectxattronly;
|
||||
|
||||
// No retry of previously failed files
|
||||
bool m_noretryfailed;
|
||||
|
||||
#ifdef IDX_THREADS
|
||||
friend void *FsIndexerDbUpdWorker(void*);
|
||||
friend void *FsIndexerInternfileWorker(void*);
|
||||
|
||||
@ -84,13 +84,13 @@ bool ConfIndexer::firstFsIndexingSequence()
|
||||
}
|
||||
int flushmb = m_db.getFlushMb();
|
||||
m_db.setFlushMb(2);
|
||||
m_fsindexer->index(true);
|
||||
m_fsindexer->index(IxFQuickShallow);
|
||||
m_db.doFlush();
|
||||
m_db.setFlushMb(flushmb);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ConfIndexer::index(bool resetbefore, ixType typestorun)
|
||||
bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags)
|
||||
{
|
||||
Rcl::Db::OpenMode mode = resetbefore ? Rcl::Db::DbTrunc : Rcl::Db::DbUpd;
|
||||
if (!m_db.open(mode)) {
|
||||
@ -106,7 +106,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun)
|
||||
}
|
||||
deleteZ(m_fsindexer);
|
||||
m_fsindexer = new FsIndexer(m_config, &m_db, m_updater);
|
||||
if (!m_fsindexer || !m_fsindexer->index()) {
|
||||
if (!m_fsindexer || !m_fsindexer->index(flags)) {
|
||||
m_db.close();
|
||||
return false;
|
||||
}
|
||||
@ -154,7 +154,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ConfIndexer::indexFiles(list<string>& ifiles, IxFlag flag)
|
||||
bool ConfIndexer::indexFiles(list<string>& ifiles, int flag)
|
||||
{
|
||||
list<string> myfiles;
|
||||
string origcwd = m_config->getOrigCwd();
|
||||
@ -237,7 +237,7 @@ bool ConfIndexer::updateDocs(std::vector<Rcl::Doc> &docs, IxFlag flag)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ConfIndexer::purgeFiles(std::list<string> &files, IxFlag flag)
|
||||
bool ConfIndexer::purgeFiles(std::list<string> &files, int flag)
|
||||
{
|
||||
list<string> myfiles;
|
||||
string origcwd = m_config->getOrigCwd();
|
||||
|
||||
@ -102,10 +102,15 @@ class ConfIndexer {
|
||||
enum IxFlag {IxFNone = 0,
|
||||
IxFIgnoreSkip = 1, // Ignore skipped lists
|
||||
IxFNoWeb = 2, // Do not process the web queue.
|
||||
// First pass: just do the top files so that the user can
|
||||
// try searching asap.
|
||||
IxFQuickShallow = 4,
|
||||
// Do not retry files which previously failed ('+' sigs)
|
||||
IxFNoRetryFailed = 8,
|
||||
};
|
||||
|
||||
/** Run indexers */
|
||||
bool index(bool resetbefore, ixType typestorun);
|
||||
bool index(bool resetbefore, ixType typestorun, int f = IxFNone);
|
||||
|
||||
const string &getReason() {return m_reason;}
|
||||
|
||||
@ -122,14 +127,14 @@ class ConfIndexer {
|
||||
static vector<string> getStemmerNames();
|
||||
|
||||
/** Index a list of files. No db cleaning or stemdb updating */
|
||||
bool indexFiles(list<string> &files, IxFlag f = IxFNone);
|
||||
bool indexFiles(list<string> &files, int f = IxFNone);
|
||||
|
||||
/** Update index for list of documents given as list of docs (out of query)
|
||||
*/
|
||||
bool updateDocs(vector<Rcl::Doc> &docs, IxFlag f = IxFNone);
|
||||
static bool docsToPaths(vector<Rcl::Doc> &docs, vector<string> &paths);
|
||||
/** Purge a list of files. */
|
||||
bool purgeFiles(list<string> &files, IxFlag f = IxFNone);
|
||||
bool purgeFiles(list<string> &files, int f = IxFNone);
|
||||
|
||||
/** Set in place reset mode */
|
||||
void setInPlaceReset() {m_db.setInPlaceReset();}
|
||||
|
||||
@ -68,6 +68,7 @@ static int op_flags;
|
||||
#define OPT_Z 0x10000
|
||||
#define OPT_n 0x20000
|
||||
#define OPT_r 0x40000
|
||||
#define OPT_k 0x80000
|
||||
|
||||
ReExec *o_reexec;
|
||||
|
||||
@ -182,7 +183,7 @@ public:
|
||||
{
|
||||
}
|
||||
virtual FsTreeWalker::Status
|
||||
processone(const string & fn, const struct stat *, FsTreeWalker::CbFlag flg)
|
||||
processone(const string& fn, const struct stat *, FsTreeWalker::CbFlag flg)
|
||||
{
|
||||
if (flg == FsTreeWalker::FtwDirEnter || flg == FsTreeWalker::FtwRegular)
|
||||
m_files.push_back(fn);
|
||||
@ -255,11 +256,12 @@ static const char usage [] =
|
||||
"\n"
|
||||
"recollindex [-h] \n"
|
||||
" Print help\n"
|
||||
"recollindex [-z|-Z] \n"
|
||||
"recollindex [-z|-Z] [-k]\n"
|
||||
" Index everything according to configuration file\n"
|
||||
" -z : reset database before starting indexing\n"
|
||||
" -Z : in place reset: consider all documents as changed. Can also\n"
|
||||
" be combined with -i or -r but not -m\n"
|
||||
" -k : retry files on which we previously failed\n"
|
||||
#ifdef RCL_MONITOR
|
||||
"recollindex -m [-w <secs>] -x [-D] [-C]\n"
|
||||
" Perform real time indexing. Don't become a daemon if -D is set.\n"
|
||||
@ -282,8 +284,8 @@ static const char usage [] =
|
||||
"recollindex -s <lang>\n"
|
||||
" Build stem database for additional language <lang>\n"
|
||||
#ifdef FUTURE_IMPROVEMENT
|
||||
"recollindex -b\n"
|
||||
" Process the Beagle queue\n"
|
||||
"recollindex -W\n"
|
||||
" Process the Web queue\n"
|
||||
#endif
|
||||
#ifdef RCL_USE_ASPELL
|
||||
"recollindex -S\n"
|
||||
@ -351,6 +353,7 @@ int main(int argc, char **argv)
|
||||
case 'f': op_flags |= OPT_f; break;
|
||||
case 'h': op_flags |= OPT_h; break;
|
||||
case 'i': op_flags |= OPT_i; break;
|
||||
case 'k': op_flags |= OPT_k; break;
|
||||
case 'l': op_flags |= OPT_l; break;
|
||||
case 'm': op_flags |= OPT_m; break;
|
||||
case 'n': op_flags |= OPT_n; break;
|
||||
@ -415,6 +418,10 @@ int main(int argc, char **argv)
|
||||
|
||||
bool rezero((op_flags & OPT_z) != 0);
|
||||
bool inPlaceReset((op_flags & OPT_Z) != 0);
|
||||
int indexerFlags = ConfIndexer::IxFNone;
|
||||
if (!(op_flags & OPT_k))
|
||||
indexerFlags |= ConfIndexer::IxFNoRetryFailed;
|
||||
|
||||
Pidfile pidfile(config->getPidfile());
|
||||
updater = new MyUpdater(config);
|
||||
|
||||
@ -526,8 +533,8 @@ int main(int argc, char **argv)
|
||||
if (!(op_flags & OPT_n)) {
|
||||
makeIndexerOrExit(config, inPlaceReset);
|
||||
LOGDEB(("Recollindex: initial indexing pass before monitoring\n"));
|
||||
if (!confindexer->index(rezero, ConfIndexer::IxTAll) ||
|
||||
stopindexing) {
|
||||
if (!confindexer->index(rezero, ConfIndexer::IxTAll, indexerFlags)
|
||||
|| stopindexing) {
|
||||
LOGERR(("recollindex, initial indexing pass failed, "
|
||||
"not going into monitor mode\n"));
|
||||
exit(1);
|
||||
@ -564,7 +571,8 @@ int main(int argc, char **argv)
|
||||
} else {
|
||||
lockorexit(&pidfile);
|
||||
makeIndexerOrExit(config, inPlaceReset);
|
||||
bool status = confindexer->index(rezero, ConfIndexer::IxTAll);
|
||||
bool status = confindexer->index(rezero, ConfIndexer::IxTAll,
|
||||
indexerFlags);
|
||||
if (!status)
|
||||
cerr << "Indexing failed" << endl;
|
||||
if (!confindexer->getReason().empty())
|
||||
|
||||
@ -789,7 +789,6 @@ bool Db::open(OpenMode mode, OpenError *error)
|
||||
// (now: Xapian 1.2) and the separate objects seem to
|
||||
// trigger other Xapian issues, so the query db is now
|
||||
// a clone of the update one.
|
||||
// m_ndb->xrdb = Xapian::Database(dir);
|
||||
m_ndb->xrdb = m_ndb->xwdb;
|
||||
LOGDEB(("Db::open: lastdocid: %d\n",
|
||||
m_ndb->xwdb.get_lastdocid()));
|
||||
@ -1725,23 +1724,70 @@ bool Db::doFlush()
|
||||
return true;
|
||||
}
|
||||
|
||||
void Db::setExistingFlags(const string& udi, unsigned int docid)
|
||||
{
|
||||
if (m_mode == DbRO)
|
||||
return;
|
||||
if (docid == (unsigned int)-1) {
|
||||
LOGERR(("Db::setExistingFlags: called with bogus docid !!\n"));
|
||||
return;
|
||||
}
|
||||
#ifdef IDX_THREADS
|
||||
PTMutexLocker lock(m_ndb->m_mutex);
|
||||
#endif
|
||||
i_setExistingFlags(udi, docid);
|
||||
}
|
||||
|
||||
void Db::i_setExistingFlags(const string& udi, unsigned int docid)
|
||||
{
|
||||
// Set the up to date flag for the document and its subdocs
|
||||
if (docid >= updated.size()) {
|
||||
LOGERR(("needUpdate: existing docid beyond "
|
||||
"updated.size(). Udi [%s], docid %u, "
|
||||
"updated.size() %u\n", udi.c_str(),
|
||||
unsigned(docid), (unsigned)updated.size()));
|
||||
return;
|
||||
} else {
|
||||
updated[docid] = true;
|
||||
}
|
||||
|
||||
// Set the existence flag for all the subdocs (if any)
|
||||
vector<Xapian::docid> docids;
|
||||
if (!m_ndb->subDocs(udi, 0, docids)) {
|
||||
LOGERR(("Rcl::Db::needUpdate: can't get subdocs\n"));
|
||||
return;
|
||||
}
|
||||
for (vector<Xapian::docid>::iterator it = docids.begin();
|
||||
it != docids.end(); it++) {
|
||||
if (*it < updated.size()) {
|
||||
LOGDEB2(("Db::needUpdate: docid %d set\n", *it));
|
||||
updated[*it] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test if doc given by udi has changed since last indexed (test sigs)
|
||||
bool Db::needUpdate(const string &udi, const string& sig, bool *existed)
|
||||
bool Db::needUpdate(const string &udi, const string& sig,
|
||||
unsigned int *docidp, string *osigp)
|
||||
{
|
||||
if (m_ndb == 0)
|
||||
return false;
|
||||
|
||||
if (osigp)
|
||||
osigp->clear();
|
||||
if (docidp)
|
||||
*docidp = 0;
|
||||
|
||||
// If we are doing an in place or full reset, no need to test.
|
||||
if (o_inPlaceReset || m_mode == DbTrunc) {
|
||||
// For in place reset, pretend the doc existed, to enable subdoc purge
|
||||
if (existed)
|
||||
*existed = o_inPlaceReset;
|
||||
// For in place reset, pretend the doc existed, to enable
|
||||
// subdoc purge. The value is only used as a boolean in this case.
|
||||
if (docidp && o_inPlaceReset) {
|
||||
*docidp = -1;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
if (existed)
|
||||
*existed = false;
|
||||
|
||||
string uniterm = make_uniterm(udi);
|
||||
string ermsg;
|
||||
|
||||
@ -1773,8 +1819,9 @@ bool Db::needUpdate(const string &udi, const string& sig, bool *existed)
|
||||
return true;
|
||||
}
|
||||
|
||||
if (existed)
|
||||
*existed = true;
|
||||
if (docidp) {
|
||||
*docidp = *docid;
|
||||
}
|
||||
|
||||
// Retrieve old file/doc signature from value
|
||||
string osig;
|
||||
@ -1785,6 +1832,11 @@ bool Db::needUpdate(const string &udi, const string& sig, bool *existed)
|
||||
}
|
||||
LOGDEB2(("Db::needUpdate: oldsig [%s] new [%s]\n",
|
||||
osig.c_str(), sig.c_str()));
|
||||
|
||||
if (osigp) {
|
||||
*osigp = osig;
|
||||
}
|
||||
|
||||
// Compare new/old sig
|
||||
if (sig != osig) {
|
||||
LOGDEB(("Db::needUpdate:yes: olsig [%s] new [%s] [%s]\n",
|
||||
@ -1793,34 +1845,10 @@ bool Db::needUpdate(const string &udi, const string& sig, bool *existed)
|
||||
return true;
|
||||
}
|
||||
|
||||
// Up to date.
|
||||
// Up to date. Set the existance flags in the map for the doc and
|
||||
// its subdocs.
|
||||
LOGDEB(("Db::needUpdate:no: [%s]\n", uniterm.c_str()));
|
||||
|
||||
if (m_mode != DbRO) {
|
||||
// Set the up to date flag for the document and its subdocs
|
||||
if (*docid >= updated.size()) {
|
||||
LOGERR(("needUpdate: existing docid beyond "
|
||||
"updated.size(). Udi [%s], docid %u, "
|
||||
"updated.size() %u\n", udi.c_str(),
|
||||
unsigned(*docid), (unsigned)updated.size()));
|
||||
} else {
|
||||
updated[*docid] = true;
|
||||
}
|
||||
|
||||
// Set the existence flag for all the subdocs (if any)
|
||||
vector<Xapian::docid> docids;
|
||||
if (!m_ndb->subDocs(udi, 0, docids)) {
|
||||
LOGERR(("Rcl::Db::needUpdate: can't get subdocs\n"));
|
||||
return true;
|
||||
}
|
||||
for (vector<Xapian::docid>::iterator it = docids.begin();
|
||||
it != docids.end(); it++) {
|
||||
if (*it < updated.size()) {
|
||||
LOGDEB2(("Db::needUpdate: docid %d set\n", *it));
|
||||
updated[*it] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
i_setExistingFlags(udi, *docid);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -228,15 +228,36 @@ class Db {
|
||||
|
||||
/* Update-related methods ******************************************/
|
||||
|
||||
/** Test if the db entry for the given udi is up to date (by
|
||||
* comparing the input and stored sigs). This is used both when
|
||||
* indexing and querying (before opening a document using stale info),
|
||||
/** Test if the db entry for the given udi is up to date.
|
||||
*
|
||||
* This is done by comparing the input and stored sigs. This is
|
||||
* used both when indexing and querying (before opening a document
|
||||
* using stale info).
|
||||
*
|
||||
* **This assumes that the udi pertains to the main index (idxi==0).**
|
||||
* Side-effect when the db is writeable: set the existence flag
|
||||
* for the file document and all subdocs if any (for later use by
|
||||
* 'purge()')
|
||||
*
|
||||
* Side-effect when the db is writeable and the document up to
|
||||
* date: set the existence flag for the file document and all
|
||||
* subdocs if any (for later use by 'purge()')
|
||||
*
|
||||
* @param udi Unique Document Identifier (as chosen by indexer).
|
||||
* @param sig New signature (as computed by indexer).
|
||||
* @param xdocid[output] Non-zero if doc existed. Should be considered
|
||||
* as opaque, to be used for a possible later call to setExistingFlags()
|
||||
* Note that if inplaceReset is set, the return value is non-zero but not
|
||||
* an actual docid, it's only used as a flag in this case.
|
||||
* @param osig[output] old signature.
|
||||
*/
|
||||
bool needUpdate(const string &udi, const string& sig, bool *existed=0);
|
||||
bool needUpdate(const string &udi, const string& sig,
|
||||
unsigned int *xdocid = 0, std::string *osig = 0);
|
||||
|
||||
/** Set the existance flags for the document and its eventual subdocuments
|
||||
*
|
||||
* This can be called by the indexer after needUpdate() has returned true,
|
||||
* if the indexer does not wish to actually re-index (e.g.: the doc is
|
||||
* known to cause errors).
|
||||
*/
|
||||
void setExistingFlags(const string& udi, unsigned int docid);
|
||||
|
||||
/** Indicate if we are doing a systematic reindex. This complements
|
||||
needUpdate() return */
|
||||
@ -488,6 +509,8 @@ private:
|
||||
friend void *DbUpdWorker(void*);
|
||||
#endif // IDX_THREADS
|
||||
|
||||
// Internal form of setExistingFlags: no locking
|
||||
void i_setExistingFlags(const string& udi, unsigned int docid);
|
||||
// Internal form of close, can be called during destruction
|
||||
bool i_close(bool final);
|
||||
// Reinitialize when adding/removing additional dbs
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user