Avoid purging documents from absent mountable volumes
This commit is contained in:
parent
c2691f68bf
commit
399c633efd
@ -16,6 +16,8 @@
|
||||
*/
|
||||
#include "autoconfig.h"
|
||||
|
||||
#include "fsindexer.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <cstring>
|
||||
@ -35,7 +37,6 @@
|
||||
#include "rcldb.h"
|
||||
#include "readfile.h"
|
||||
#include "indexer.h"
|
||||
#include "fsindexer.h"
|
||||
#include "transcode.h"
|
||||
#include "log.h"
|
||||
#include "internfile.h"
|
||||
@ -107,7 +108,7 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc)
|
||||
m_dwqueue("Split", cnf->getThrConf(RclConfig::ThrSplit).first)
|
||||
#endif // IDX_THREADS
|
||||
{
|
||||
LOGDEB1("FsIndexer::FsIndexer\n" );
|
||||
LOGDEB1("FsIndexer::FsIndexer\n");
|
||||
m_havelocalfields = m_config->hasNameAnywhere("localfields");
|
||||
m_config->getConfParam("detectxattronly", &m_detectxattronly);
|
||||
|
||||
@ -118,7 +119,7 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc)
|
||||
int internthreads = cnf->getThrConf(RclConfig::ThrIntern).second;
|
||||
if (internqlen >= 0) {
|
||||
if (!m_iwqueue.start(internthreads, FsIndexerInternfileWorker, this)) {
|
||||
LOGERR("FsIndexer::FsIndexer: intern worker start failed\n" );
|
||||
LOGERR("FsIndexer::FsIndexer: intern worker start failed\n");
|
||||
return;
|
||||
}
|
||||
m_haveInternQ = true;
|
||||
@ -127,28 +128,31 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc)
|
||||
int splitthreads = cnf->getThrConf(RclConfig::ThrSplit).second;
|
||||
if (splitqlen >= 0) {
|
||||
if (!m_dwqueue.start(splitthreads, FsIndexerDbUpdWorker, this)) {
|
||||
LOGERR("FsIndexer::FsIndexer: split worker start failed\n" );
|
||||
LOGERR("FsIndexer::FsIndexer: split worker start failed\n");
|
||||
return;
|
||||
}
|
||||
m_haveSplitQ = true;
|
||||
}
|
||||
LOGDEB("FsIndexer: threads: haveIQ " << (m_haveInternQ) << " iql " << (internqlen) << " iqts " << (internthreads) << " haveSQ " << (m_haveSplitQ) << " sql " << (splitqlen) << " sqts " << (splitthreads) << "\n" );
|
||||
LOGDEB("FsIndexer: threads: haveIQ " << m_haveInternQ << " iql " <<
|
||||
internqlen << " iqts " << internthreads << " haveSQ " <<
|
||||
m_haveSplitQ << " sql " << splitqlen << " sqts " << splitthreads <<
|
||||
"\n");
|
||||
#endif // IDX_THREADS
|
||||
}
|
||||
|
||||
FsIndexer::~FsIndexer()
|
||||
{
|
||||
LOGDEB1("FsIndexer::~FsIndexer()\n" );
|
||||
LOGDEB1("FsIndexer::~FsIndexer()\n");
|
||||
|
||||
#ifdef IDX_THREADS
|
||||
void *status;
|
||||
if (m_haveInternQ) {
|
||||
status = m_iwqueue.setTerminateAndWait();
|
||||
LOGDEB0("FsIndexer: internfile wrkr status: " << (status) << " (1->ok)\n" );
|
||||
LOGDEB0("FsIndexer: internfile wrkr status: "<< status << " (1->ok)\n");
|
||||
}
|
||||
if (m_haveSplitQ) {
|
||||
status = m_dwqueue.setTerminateAndWait();
|
||||
LOGDEB0("FsIndexer: dbupd worker status: " << (status) << " (1->ok)\n" );
|
||||
LOGDEB0("FsIndexer: dbupd worker status: " << status << " (1->ok)\n");
|
||||
}
|
||||
delete m_stableconfig;
|
||||
#endif // IDX_THREADS
|
||||
@ -161,13 +165,28 @@ bool FsIndexer::init()
|
||||
if (m_tdl.empty()) {
|
||||
m_tdl = m_config->getTopdirs();
|
||||
if (m_tdl.empty()) {
|
||||
LOGERR("FsIndexers: no topdirs list defined\n" );
|
||||
LOGERR("FsIndexers: no topdirs list defined\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if path is either non-existing or an empty directory.
|
||||
static bool path_empty(const string& path)
|
||||
{
|
||||
if (path_isdir(path)) {
|
||||
string reason;
|
||||
std::set<string> entries;
|
||||
if (!readdir(path, reason, entries) || entries.empty()) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
} else {
|
||||
return !path_exists(path);
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively index each directory in the topdirs:
|
||||
bool FsIndexer::index(int flags)
|
||||
{
|
||||
@ -190,14 +209,21 @@ bool FsIndexer::index(int flags)
|
||||
m_walker.setMaxDepth(2);
|
||||
}
|
||||
|
||||
for (vector<string>::const_iterator it = m_tdl.begin();
|
||||
it != m_tdl.end(); it++) {
|
||||
LOGDEB("FsIndexer::index: Indexing " << *it << " into " <<
|
||||
for (const auto& topdir : m_tdl) {
|
||||
LOGDEB("FsIndexer::index: Indexing " << topdir << " into " <<
|
||||
getDbDir() << "\n");
|
||||
|
||||
// If a topdirs member appears to be not here or not mounted
|
||||
// (empty), avoid deleting all the related index content by
|
||||
// marking the current docs as existing.
|
||||
if (path_empty(topdir)) {
|
||||
m_db->udiTreeMarkExisting(topdir);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Set the current directory in config so that subsequent
|
||||
// getConfParams() will get local values
|
||||
m_config->setKeyDir(*it);
|
||||
m_config->setKeyDir(topdir);
|
||||
|
||||
// Adjust the "follow symlinks" option
|
||||
bool follow;
|
||||
@ -214,8 +240,8 @@ bool FsIndexer::index(int flags)
|
||||
m_db->setAbstractParams(abslen, -1, -1);
|
||||
|
||||
// Walk the directory tree
|
||||
if (m_walker.walk(*it, *this) != FsTreeWalker::FtwOk) {
|
||||
LOGERR("FsIndexer::index: error while indexing " << *it <<
|
||||
if (m_walker.walk(topdir, *this) != FsTreeWalker::FtwOk) {
|
||||
LOGERR("FsIndexer::index: error while indexing " << topdir <<
|
||||
": " << m_walker.getReason() << "\n");
|
||||
return false;
|
||||
}
|
||||
@ -233,11 +259,12 @@ bool FsIndexer::index(int flags)
|
||||
string missing;
|
||||
m_missing->getMissingDescription(missing);
|
||||
if (!missing.empty()) {
|
||||
LOGINFO("FsIndexer::index missing helper program(s):\n" << (missing) << "\n" );
|
||||
LOGINFO("FsIndexer::index missing helper program(s):\n" <<
|
||||
missing << "\n");
|
||||
}
|
||||
m_config->storeMissingHelperDesc(missing);
|
||||
}
|
||||
LOGINFO("fsindexer index time: " << (chron.millis()) << " mS\n" );
|
||||
LOGINFO("fsindexer index time: " << chron.millis() << " mS\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -258,7 +285,8 @@ static bool matchesSkipped(const vector<string>& tdl,
|
||||
for (vector<string>::const_iterator it = tdl.begin();
|
||||
it != tdl.end(); it++) {
|
||||
// the topdirs members are already canonized.
|
||||
LOGDEB2("matchesSkipped: comparing ancestor [" << (mpath) << "] to topdir [" << (it) << "]\n" );
|
||||
LOGDEB2("matchesSkipped: comparing ancestor [" << mpath <<
|
||||
"] to topdir [" << it << "]\n");
|
||||
if (!mpath.compare(*it)) {
|
||||
topdir = *it;
|
||||
goto goodpath;
|
||||
@ -266,7 +294,7 @@ static bool matchesSkipped(const vector<string>& tdl,
|
||||
}
|
||||
|
||||
if (walker.inSkippedPaths(mpath, false)) {
|
||||
LOGDEB("FsIndexer::indexFiles: skipping [" << (path) << "] (skpp)\n" );
|
||||
LOGDEB("FsIndexer::indexFiles: skipping [" << path << "] (skpp)\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -280,12 +308,13 @@ static bool matchesSkipped(const vector<string>& tdl,
|
||||
// path did not shorten, something is seriously amiss
|
||||
// (could be an assert actually)
|
||||
if (mpath.length() >= len) {
|
||||
LOGERR("FsIndexer::indexFile: internal Error: path [" << (mpath) << "] did not shorten\n" );
|
||||
LOGERR("FsIndexer::indexFile: internal Error: path [" << mpath <<
|
||||
"] did not shorten\n");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// We get there if neither topdirs nor skippedPaths tests matched
|
||||
LOGDEB("FsIndexer::indexFiles: skipping [" << (path) << "] (ntd)\n" );
|
||||
LOGDEB("FsIndexer::indexFiles: skipping [" << path << "] (ntd)\n");
|
||||
return true;
|
||||
|
||||
goodpath:
|
||||
@ -295,7 +324,7 @@ goodpath:
|
||||
while (mpath.length() >= topdir.length() && mpath.length() > 1) {
|
||||
string fn = path_getsimple(mpath);
|
||||
if (walker.inSkippedNames(fn)) {
|
||||
LOGDEB("FsIndexer::indexFiles: skipping [" << (path) << "] (skpn)\n" );
|
||||
LOGDEB("FsIndexer::indexFiles: skipping [" << path << "] (skpn)\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -319,7 +348,7 @@ goodpath:
|
||||
*/
|
||||
bool FsIndexer::indexFiles(list<string>& files, int flags)
|
||||
{
|
||||
LOGDEB("FsIndexer::indexFiles\n" );
|
||||
LOGDEB("FsIndexer::indexFiles\n");
|
||||
m_noretryfailed = (flags & ConfIndexer::IxFNoRetryFailed) != 0;
|
||||
bool ret = false;
|
||||
|
||||
@ -337,7 +366,7 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
|
||||
walker.setSkippedPaths(m_config->getSkippedPaths());
|
||||
|
||||
for (list<string>::iterator it = files.begin(); it != files.end(); ) {
|
||||
LOGDEB2("FsIndexer::indexFiles: [" << (it) << "]\n" );
|
||||
LOGDEB2("FsIndexer::indexFiles: [" << it << "]\n");
|
||||
|
||||
m_config->setKeyDir(path_getfather(*it));
|
||||
if (m_havelocalfields)
|
||||
@ -357,7 +386,7 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
|
||||
struct stat stb;
|
||||
int ststat = path_fileprops(*it, &stb, follow);
|
||||
if (ststat != 0) {
|
||||
LOGERR("FsIndexer::indexFiles: (l)stat " << *it << ": " <<
|
||||
LOGERR("FsIndexer::indexFiles: (l)stat " << *it << ": " <<
|
||||
strerror(errno) << "\n");
|
||||
it++;
|
||||
continue;
|
||||
@ -365,7 +394,7 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
|
||||
|
||||
if (processone(*it, &stb, FsTreeWalker::FtwRegular) !=
|
||||
FsTreeWalker::FtwOk) {
|
||||
LOGERR("FsIndexer::indexFiles: processone failed\n" );
|
||||
LOGERR("FsIndexer::indexFiles: processone failed\n");
|
||||
goto out;
|
||||
}
|
||||
it = files.erase(it);
|
||||
@ -383,11 +412,11 @@ out:
|
||||
|
||||
// Purge possible orphan documents
|
||||
if (ret == true) {
|
||||
LOGDEB("Indexfiles: purging orphans\n" );
|
||||
LOGDEB("Indexfiles: purging orphans\n");
|
||||
const vector<string>& purgecandidates = m_purgeCandidates.getCandidates();
|
||||
for (vector<string>::const_iterator it = purgecandidates.begin();
|
||||
it != purgecandidates.end(); it++) {
|
||||
LOGDEB("Indexfiles: purging orphans for " << *it << "\n");
|
||||
LOGDEB("Indexfiles: purging orphans for " << *it << "\n");
|
||||
m_db->purgeOrphans(*it);
|
||||
}
|
||||
#ifdef IDX_THREADS
|
||||
@ -395,7 +424,7 @@ out:
|
||||
#endif // IDX_THREADS
|
||||
}
|
||||
|
||||
LOGDEB("FsIndexer::indexFiles: done\n" );
|
||||
LOGDEB("FsIndexer::indexFiles: done\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -403,7 +432,7 @@ out:
|
||||
/** Purge docs for given files out of the database */
|
||||
bool FsIndexer::purgeFiles(list<string>& files)
|
||||
{
|
||||
LOGDEB("FsIndexer::purgeFiles\n" );
|
||||
LOGDEB("FsIndexer::purgeFiles\n");
|
||||
bool ret = false;
|
||||
if (!init())
|
||||
return false;
|
||||
@ -415,7 +444,7 @@ bool FsIndexer::purgeFiles(list<string>& files)
|
||||
// found or deleted, false only in case of actual error
|
||||
bool existed;
|
||||
if (!m_db->purgeFile(udi, &existed)) {
|
||||
LOGERR("FsIndexer::purgeFiles: Database error\n" );
|
||||
LOGERR("FsIndexer::purgeFiles: Database error\n");
|
||||
goto out;
|
||||
}
|
||||
// If we actually deleted something, take it off the list
|
||||
@ -435,14 +464,14 @@ out:
|
||||
m_dwqueue.waitIdle();
|
||||
m_db->waitUpdIdle();
|
||||
#endif // IDX_THREADS
|
||||
LOGDEB("FsIndexer::purgeFiles: done\n" );
|
||||
LOGDEB("FsIndexer::purgeFiles: done\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Local fields can be set for fs subtrees in the configuration file
|
||||
void FsIndexer::localfieldsfromconf()
|
||||
{
|
||||
LOGDEB1("FsIndexer::localfieldsfromconf\n" );
|
||||
LOGDEB1("FsIndexer::localfieldsfromconf\n");
|
||||
|
||||
string sfields;
|
||||
m_config->getConfParam("localfields", sfields);
|
||||
@ -462,7 +491,8 @@ void FsIndexer::localfieldsfromconf()
|
||||
it != nmlst.end(); it++) {
|
||||
string nm = m_config->fieldCanon(*it);
|
||||
attrs.get(*it, m_localfields[nm]);
|
||||
LOGDEB2("FsIndexer::localfieldsfromconf: [" << (nm) << "]->[" << (m_localfields[nm]) << "]\n" );
|
||||
LOGDEB2("FsIndexer::localfieldsfromconf: [" << nm << "]->[" <<
|
||||
m_localfields[nm] << "]\n");
|
||||
}
|
||||
}
|
||||
|
||||
@ -501,9 +531,9 @@ void *FsIndexerDbUpdWorker(void * fsp)
|
||||
tqp->workerExit();
|
||||
return (void*)1;
|
||||
}
|
||||
LOGDEB0("FsIndexerDbUpdWorker: task ql " << (int(qsz)) << "\n" );
|
||||
LOGDEB0("FsIndexerDbUpdWorker: task ql " << qsz << "\n");
|
||||
if (!fip->m_db->addOrUpdate(tsk->udi, tsk->parent_udi, tsk->doc)) {
|
||||
LOGERR("FsIndexerDbUpdWorker: addOrUpdate failed\n" );
|
||||
LOGERR("FsIndexerDbUpdWorker: addOrUpdate failed\n");
|
||||
tqp->workerExit();
|
||||
return (void*)0;
|
||||
}
|
||||
@ -524,15 +554,15 @@ void *FsIndexerInternfileWorker(void * fsp)
|
||||
tqp->workerExit();
|
||||
return (void*)1;
|
||||
}
|
||||
LOGDEB0("FsIndexerInternfileWorker: task fn " << (tsk->fn) << "\n" );
|
||||
LOGDEB0("FsIndexerInternfileWorker: task fn " << tsk->fn << "\n");
|
||||
if (fip->processonefile(&myconf, tsk->fn, &tsk->statbuf,
|
||||
tsk->localfields) !=
|
||||
FsTreeWalker::FtwOk) {
|
||||
LOGERR("FsIndexerInternfileWorker: processone failed\n" );
|
||||
LOGERR("FsIndexerInternfileWorker: processone failed\n");
|
||||
tqp->workerExit();
|
||||
return (void*)0;
|
||||
}
|
||||
LOGDEB1("FsIndexerInternfileWorker: done fn " << (tsk->fn) << "\n" );
|
||||
LOGDEB1("FsIndexerInternfileWorker: done fn " << tsk->fn << "\n");
|
||||
delete tsk;
|
||||
}
|
||||
}
|
||||
@ -636,7 +666,9 @@ FsIndexer::processonefile(RclConfig *config,
|
||||
bool xattronly = m_detectxattronly && !m_db->inFullReset() &&
|
||||
existingDoc && needupdate && (stp->st_mtime < stp->st_ctime);
|
||||
|
||||
LOGDEB("processone: needupdate " << (needupdate) << " noretry " << (m_noretryfailed) << " existing " << (existingDoc) << " oldsig [" << (oldsig) << "]\n" );
|
||||
LOGDEB("processone: needupdate " << needupdate << " noretry " <<
|
||||
m_noretryfailed << " existing " << existingDoc << " oldsig [" <<
|
||||
oldsig << "]\n");
|
||||
|
||||
// If noretryfailed is set, check for a file which previously
|
||||
// failed to index, and avoid re-processing it
|
||||
@ -646,14 +678,14 @@ FsIndexer::processonefile(RclConfig *config,
|
||||
// actually changed, we always retry (maybe it was fixed)
|
||||
string nold = oldsig.substr(0, oldsig.size()-1);
|
||||
if (!nold.compare(sig)) {
|
||||
LOGDEB("processone: not retrying previously failed file\n" );
|
||||
LOGDEB("processone: not retrying previously failed file\n");
|
||||
m_db->setExistingFlags(udi, existingDoc);
|
||||
needupdate = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!needupdate) {
|
||||
LOGDEB0("processone: up to date: " << (fn) << "\n" );
|
||||
LOGDEB0("processone: up to date: " << fn << "\n");
|
||||
if (m_updater) {
|
||||
#ifdef IDX_THREADS
|
||||
std::unique_lock<std::mutex> locker(m_updater->m_mutex);
|
||||
@ -668,8 +700,8 @@ FsIndexer::processonefile(RclConfig *config,
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
|
||||
LOGDEB0("processone: processing: [" <<
|
||||
displayableBytes(stp->st_size) << "] " << fn << "\n");
|
||||
LOGDEB0("processone: processing: [" <<
|
||||
displayableBytes(stp->st_size) << "] " << fn << "\n");
|
||||
|
||||
// Note that we used to do the full path here, but I ended up
|
||||
// believing that it made more sense to use only the file name
|
||||
@ -703,7 +735,7 @@ FsIndexer::processonefile(RclConfig *config,
|
||||
try {
|
||||
fis = interner.internfile(doc);
|
||||
} catch (CancelExcept) {
|
||||
LOGERR("fsIndexer::processone: interrupted\n" );
|
||||
LOGERR("fsIndexer::processone: interrupted\n");
|
||||
return FsTreeWalker::FtwStop;
|
||||
}
|
||||
|
||||
@ -774,7 +806,7 @@ FsIndexer::processonefile(RclConfig *config,
|
||||
DbUpdTask *tp = new DbUpdTask(udi, doc.ipath.empty() ?
|
||||
cstr_null : parent_udi, doc);
|
||||
if (!m_dwqueue.put(tp)) {
|
||||
LOGERR("processonefile: wqueue.put failed\n" );
|
||||
LOGERR("processonefile: wqueue.put failed\n");
|
||||
return FsTreeWalker::FtwError;
|
||||
}
|
||||
} else {
|
||||
@ -813,7 +845,8 @@ FsIndexer::processonefile(RclConfig *config,
|
||||
// If this doc existed and it's a container, recording for
|
||||
// possible subdoc purge (this will be used only if we don't do a
|
||||
// db-wide purge, e.g. if we're called from indexfiles()).
|
||||
LOGDEB2("processOnefile: existingDoc " << (existingDoc) << " hadNonNullIpath " << (hadNonNullIpath) << "\n" );
|
||||
LOGDEB2("processOnefile: existingDoc " << existingDoc <<
|
||||
" hadNonNullIpath " << hadNonNullIpath << "\n");
|
||||
if (existingDoc && hadNonNullIpath) {
|
||||
m_purgeCandidates.record(parent_udi);
|
||||
}
|
||||
@ -826,7 +859,7 @@ FsIndexer::processonefile(RclConfig *config,
|
||||
// If xattronly is set, ONLY the extattr metadata is valid and will be used
|
||||
// by the following step.
|
||||
if (xattronly || hadNullIpath == false) {
|
||||
LOGDEB("Creating empty doc for file or pure xattr update\n" );
|
||||
LOGDEB("Creating empty doc for file or pure xattr update\n");
|
||||
Rcl::Doc fileDoc;
|
||||
if (xattronly) {
|
||||
map<string, string> xfields;
|
||||
|
||||
@ -46,7 +46,6 @@ using namespace std;
|
||||
#include "rclutil.h"
|
||||
#include "smallut.h"
|
||||
#include "chrono.h"
|
||||
#include "utf8iter.h"
|
||||
#include "searchdata.h"
|
||||
#include "rclquery.h"
|
||||
#include "rclquery_p.h"
|
||||
@ -144,21 +143,6 @@ static inline string make_parentterm(const string& udi)
|
||||
return pterm;
|
||||
}
|
||||
|
||||
static void utf8truncate(string& s, int maxlen)
|
||||
{
|
||||
if (s.size() <= string::size_type(maxlen)) {
|
||||
return;
|
||||
}
|
||||
Utf8Iter iter(s);
|
||||
string::size_type pos = 0;
|
||||
while (iter++ != string::npos)
|
||||
if (iter.getBpos() < string::size_type(maxlen)) {
|
||||
pos = iter.getBpos();
|
||||
}
|
||||
|
||||
s.erase(pos);
|
||||
}
|
||||
|
||||
Db::Native::Native(Db *db)
|
||||
: m_rcldb(db), m_isopen(false), m_iswritable(false),
|
||||
m_noversionwrite(false)
|
||||
@ -2023,11 +2007,10 @@ void Db::i_setExistingFlags(const string& udi, unsigned int docid)
|
||||
LOGERR("Rcl::Db::needUpdate: can't get subdocs\n");
|
||||
return;
|
||||
}
|
||||
for (vector<Xapian::docid>::iterator it = docids.begin();
|
||||
it != docids.end(); it++) {
|
||||
if (*it < updated.size()) {
|
||||
LOGDEB2("Db::needUpdate: docid " << (*it) << " set\n");
|
||||
updated[*it] = true;
|
||||
for (auto docid : docids) {
|
||||
if (docid < updated.size()) {
|
||||
LOGDEB2("Db::needUpdate: docid " << docid << " set\n");
|
||||
updated[docid] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2556,5 +2539,40 @@ bool Db::getSubDocs(const Doc &idoc, vector<Doc>& subdocs)
|
||||
return false;
|
||||
}
|
||||
|
||||
} // End namespace Rcl
|
||||
// Walk an UDI section (all UDIs beginning with input prefix), and
|
||||
// mark all docs and subdocs as existing. Caller beware: Makes sense
|
||||
// or not depending on the UDI structure for the data store. In practise,
|
||||
// used for absent FS mountable volumes.
|
||||
bool Db::udiTreeMarkExisting(const string& udi)
|
||||
{
|
||||
LOGDEB("Db::udiTreeWalk: " << udi << endl);
|
||||
string wrapd = wrap_prefix(udi_prefix);
|
||||
string expr = udi + "*";
|
||||
|
||||
#ifdef IDX_THREADS
|
||||
std::unique_lock<std::mutex> lock(m_ndb->m_mutex);
|
||||
#endif
|
||||
|
||||
bool ret = m_ndb->idxTermMatch_p(
|
||||
int(ET_WILD), cstr_null, expr,
|
||||
[this, &udi](const string& term, Xapian::termcount, Xapian::doccount) {
|
||||
Xapian::PostingIterator docid;
|
||||
XAPTRY(docid = m_ndb->xrdb.postlist_begin(term), m_ndb->xrdb,
|
||||
m_reason);
|
||||
if (!m_reason.empty()) {
|
||||
LOGERR("Db::udiTreeWalk: xapian::postlist_begin failed: " <<
|
||||
m_reason << "\n");
|
||||
return false;
|
||||
}
|
||||
if (docid == m_ndb->xrdb.postlist_end(term)) {
|
||||
LOGDEB("Db::udiTreeWalk:no doc for " << term << " ??\n");
|
||||
return false;
|
||||
}
|
||||
i_setExistingFlags(udi, *docid);
|
||||
LOGDEB("Db::udiTreeWalk: uniterm: " << term << endl);
|
||||
return true;
|
||||
}, wrapd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // End namespace Rcl
|
||||
|
||||
@ -483,7 +483,14 @@ public:
|
||||
|
||||
// Use empty fn for no synonyms
|
||||
bool setSynGroupsFile(const std::string& fn);
|
||||
|
||||
|
||||
// Mark all documents with an UDI having input as prefix as
|
||||
// existing. Only works if the UDIs for the store are
|
||||
// hierarchical of course. Used by FsIndexer to avoid purging
|
||||
// files for a topdir which is on a removable file system and
|
||||
// currently unmounted (topdir does not exist or is empty.
|
||||
bool udiTreeMarkExisting(const string& udi);
|
||||
|
||||
/* This has to be public for access by embedded Query::Native */
|
||||
Native *m_ndb;
|
||||
private:
|
||||
|
||||
@ -181,6 +181,13 @@ class Db::Native {
|
||||
*/
|
||||
bool subDocs(const string &udi, int idxi, vector<Xapian::docid>& docids);
|
||||
|
||||
/** Matcher */
|
||||
bool idxTermMatch_p(int typ_sens,const string &lang,const std::string &term,
|
||||
std::function<bool(const std::string& term,
|
||||
Xapian::termcount colfreq,
|
||||
Xapian::doccount termfreq)> client,
|
||||
const string& field);
|
||||
|
||||
/** Check if a page position list is defined */
|
||||
bool hasPages(Xapian::docid id);
|
||||
|
||||
|
||||
@ -330,34 +330,14 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
||||
return true;
|
||||
}
|
||||
|
||||
// Second phase of wildcard/regexp term expansion after case/diac
|
||||
// expansion: expand against main index terms
|
||||
bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
|
||||
TermMatchResult& res, int max, const string& field)
|
||||
bool Db::Native::idxTermMatch_p(
|
||||
int typ, const string &lang, const string &root,
|
||||
std::function<bool(const string& term,
|
||||
Xapian::termcount colfreq,
|
||||
Xapian::doccount termfreq)> client,
|
||||
const string& prefix)
|
||||
{
|
||||
int typ = matchTypeTp(typ_sens);
|
||||
LOGDEB1("Db::idxTermMatch: typ " << tmtptostr(typ) << " lang [" <<
|
||||
lang << "] term [" << root << "] max " << max << " field [" <<
|
||||
field << "] init res.size " << res.entries.size() << "\n");
|
||||
|
||||
if (typ == ET_STEM) {
|
||||
LOGFATAL("RCLDB: internal error: idxTermMatch called with ET_STEM\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
Xapian::Database xdb = m_ndb->xrdb;
|
||||
|
||||
string prefix;
|
||||
if (!field.empty()) {
|
||||
const FieldTraits *ftp = 0;
|
||||
if (!fieldToTraits(field, &ftp, true) || ftp->pfx.empty()) {
|
||||
LOGDEB("Db::termMatch: field is not indexed (no prefix): [" <<
|
||||
field << "]\n");
|
||||
} else {
|
||||
prefix = wrap_prefix(ftp->pfx);
|
||||
}
|
||||
}
|
||||
res.prefix = prefix;
|
||||
Xapian::Database xdb = xrdb;
|
||||
|
||||
std::shared_ptr<StrMatcher> matcher;
|
||||
if (typ == ET_REGEXP) {
|
||||
@ -418,35 +398,74 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
|
||||
if (matcher && !matcher->match(term))
|
||||
continue;
|
||||
|
||||
res.entries.push_back(
|
||||
TermMatchEntry(ixterm, xdb.get_collection_freq(ixterm),
|
||||
it.get_termfreq()));
|
||||
|
||||
// The problem with truncating here is that this is done
|
||||
// alphabetically and we may not keep the most frequent
|
||||
// terms. OTOH, not doing it may stall the program if
|
||||
// we are walking the whole term list. We compromise
|
||||
// by cutting at 2*max
|
||||
if (max > 0 && ++rcnt >= 2*max)
|
||||
if (!client(ixterm, xdb.get_collection_freq(ixterm),
|
||||
it.get_termfreq())) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
m_reason.erase();
|
||||
m_rcldb->m_reason.erase();
|
||||
break;
|
||||
} catch (const Xapian::DatabaseModifiedError &e) {
|
||||
m_reason = e.get_msg();
|
||||
m_rcldb->m_reason = e.get_msg();
|
||||
xdb.reopen();
|
||||
continue;
|
||||
} XCATCHERROR(m_reason);
|
||||
} XCATCHERROR(m_rcldb->m_reason);
|
||||
break;
|
||||
}
|
||||
if (!m_reason.empty()) {
|
||||
LOGERR("termMatch: " << m_reason << "\n");
|
||||
if (!m_rcldb->m_reason.empty()) {
|
||||
LOGERR("termMatch: " << m_rcldb->m_reason << "\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// Second phase of wildcard/regexp term expansion after case/diac
|
||||
// expansion: expand against main index terms
|
||||
bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
|
||||
TermMatchResult& res, int max, const string& field)
|
||||
{
|
||||
int typ = matchTypeTp(typ_sens);
|
||||
LOGDEB1("Db::idxTermMatch: typ " << tmtptostr(typ) << " lang [" <<
|
||||
lang << "] term [" << root << "] max " << max << " field [" <<
|
||||
field << "] init res.size " << res.entries.size() << "\n");
|
||||
|
||||
if (typ == ET_STEM) {
|
||||
LOGFATAL("RCLDB: internal error: idxTermMatch called with ET_STEM\n");
|
||||
abort();
|
||||
}
|
||||
string prefix;
|
||||
if (!field.empty()) {
|
||||
const FieldTraits *ftp = 0;
|
||||
if (!fieldToTraits(field, &ftp, true) || ftp->pfx.empty()) {
|
||||
LOGDEB("Db::termMatch: field is not indexed (no prefix): [" <<
|
||||
field << "]\n");
|
||||
} else {
|
||||
prefix = wrap_prefix(ftp->pfx);
|
||||
}
|
||||
}
|
||||
res.prefix = prefix;
|
||||
|
||||
int rcnt = 0;
|
||||
bool ret = m_ndb->idxTermMatch_p(
|
||||
typ, lang, root,
|
||||
[&res, &rcnt, max](const string& term,
|
||||
Xapian::termcount cf, Xapian::doccount tf) {
|
||||
res.entries.push_back(TermMatchEntry(term, cf, tf));
|
||||
// The problem with truncating here is that this is done
|
||||
// alphabetically and we may not keep the most frequent
|
||||
// terms. OTOH, not doing it may stall the program if
|
||||
// we are walking the whole term list. We compromise
|
||||
// by cutting at 2*max
|
||||
if (max > 0 && ++rcnt >= 2*max)
|
||||
return false;
|
||||
return true;
|
||||
}, prefix);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** Term list walking. */
|
||||
class TermIter {
|
||||
public:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user