Avoid purging documents from absent mountable volumes
This commit is contained in:
parent
c2691f68bf
commit
399c633efd
@ -16,6 +16,8 @@
|
|||||||
*/
|
*/
|
||||||
#include "autoconfig.h"
|
#include "autoconfig.h"
|
||||||
|
|
||||||
|
#include "fsindexer.h"
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
@ -35,7 +37,6 @@
|
|||||||
#include "rcldb.h"
|
#include "rcldb.h"
|
||||||
#include "readfile.h"
|
#include "readfile.h"
|
||||||
#include "indexer.h"
|
#include "indexer.h"
|
||||||
#include "fsindexer.h"
|
|
||||||
#include "transcode.h"
|
#include "transcode.h"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
#include "internfile.h"
|
#include "internfile.h"
|
||||||
@ -107,7 +108,7 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc)
|
|||||||
m_dwqueue("Split", cnf->getThrConf(RclConfig::ThrSplit).first)
|
m_dwqueue("Split", cnf->getThrConf(RclConfig::ThrSplit).first)
|
||||||
#endif // IDX_THREADS
|
#endif // IDX_THREADS
|
||||||
{
|
{
|
||||||
LOGDEB1("FsIndexer::FsIndexer\n" );
|
LOGDEB1("FsIndexer::FsIndexer\n");
|
||||||
m_havelocalfields = m_config->hasNameAnywhere("localfields");
|
m_havelocalfields = m_config->hasNameAnywhere("localfields");
|
||||||
m_config->getConfParam("detectxattronly", &m_detectxattronly);
|
m_config->getConfParam("detectxattronly", &m_detectxattronly);
|
||||||
|
|
||||||
@ -118,7 +119,7 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc)
|
|||||||
int internthreads = cnf->getThrConf(RclConfig::ThrIntern).second;
|
int internthreads = cnf->getThrConf(RclConfig::ThrIntern).second;
|
||||||
if (internqlen >= 0) {
|
if (internqlen >= 0) {
|
||||||
if (!m_iwqueue.start(internthreads, FsIndexerInternfileWorker, this)) {
|
if (!m_iwqueue.start(internthreads, FsIndexerInternfileWorker, this)) {
|
||||||
LOGERR("FsIndexer::FsIndexer: intern worker start failed\n" );
|
LOGERR("FsIndexer::FsIndexer: intern worker start failed\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
m_haveInternQ = true;
|
m_haveInternQ = true;
|
||||||
@ -127,28 +128,31 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc)
|
|||||||
int splitthreads = cnf->getThrConf(RclConfig::ThrSplit).second;
|
int splitthreads = cnf->getThrConf(RclConfig::ThrSplit).second;
|
||||||
if (splitqlen >= 0) {
|
if (splitqlen >= 0) {
|
||||||
if (!m_dwqueue.start(splitthreads, FsIndexerDbUpdWorker, this)) {
|
if (!m_dwqueue.start(splitthreads, FsIndexerDbUpdWorker, this)) {
|
||||||
LOGERR("FsIndexer::FsIndexer: split worker start failed\n" );
|
LOGERR("FsIndexer::FsIndexer: split worker start failed\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
m_haveSplitQ = true;
|
m_haveSplitQ = true;
|
||||||
}
|
}
|
||||||
LOGDEB("FsIndexer: threads: haveIQ " << (m_haveInternQ) << " iql " << (internqlen) << " iqts " << (internthreads) << " haveSQ " << (m_haveSplitQ) << " sql " << (splitqlen) << " sqts " << (splitthreads) << "\n" );
|
LOGDEB("FsIndexer: threads: haveIQ " << m_haveInternQ << " iql " <<
|
||||||
|
internqlen << " iqts " << internthreads << " haveSQ " <<
|
||||||
|
m_haveSplitQ << " sql " << splitqlen << " sqts " << splitthreads <<
|
||||||
|
"\n");
|
||||||
#endif // IDX_THREADS
|
#endif // IDX_THREADS
|
||||||
}
|
}
|
||||||
|
|
||||||
FsIndexer::~FsIndexer()
|
FsIndexer::~FsIndexer()
|
||||||
{
|
{
|
||||||
LOGDEB1("FsIndexer::~FsIndexer()\n" );
|
LOGDEB1("FsIndexer::~FsIndexer()\n");
|
||||||
|
|
||||||
#ifdef IDX_THREADS
|
#ifdef IDX_THREADS
|
||||||
void *status;
|
void *status;
|
||||||
if (m_haveInternQ) {
|
if (m_haveInternQ) {
|
||||||
status = m_iwqueue.setTerminateAndWait();
|
status = m_iwqueue.setTerminateAndWait();
|
||||||
LOGDEB0("FsIndexer: internfile wrkr status: " << (status) << " (1->ok)\n" );
|
LOGDEB0("FsIndexer: internfile wrkr status: "<< status << " (1->ok)\n");
|
||||||
}
|
}
|
||||||
if (m_haveSplitQ) {
|
if (m_haveSplitQ) {
|
||||||
status = m_dwqueue.setTerminateAndWait();
|
status = m_dwqueue.setTerminateAndWait();
|
||||||
LOGDEB0("FsIndexer: dbupd worker status: " << (status) << " (1->ok)\n" );
|
LOGDEB0("FsIndexer: dbupd worker status: " << status << " (1->ok)\n");
|
||||||
}
|
}
|
||||||
delete m_stableconfig;
|
delete m_stableconfig;
|
||||||
#endif // IDX_THREADS
|
#endif // IDX_THREADS
|
||||||
@ -161,13 +165,28 @@ bool FsIndexer::init()
|
|||||||
if (m_tdl.empty()) {
|
if (m_tdl.empty()) {
|
||||||
m_tdl = m_config->getTopdirs();
|
m_tdl = m_config->getTopdirs();
|
||||||
if (m_tdl.empty()) {
|
if (m_tdl.empty()) {
|
||||||
LOGERR("FsIndexers: no topdirs list defined\n" );
|
LOGERR("FsIndexers: no topdirs list defined\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if path is either non-existing or an empty directory.
|
||||||
|
static bool path_empty(const string& path)
|
||||||
|
{
|
||||||
|
if (path_isdir(path)) {
|
||||||
|
string reason;
|
||||||
|
std::set<string> entries;
|
||||||
|
if (!readdir(path, reason, entries) || entries.empty()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
return !path_exists(path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Recursively index each directory in the topdirs:
|
// Recursively index each directory in the topdirs:
|
||||||
bool FsIndexer::index(int flags)
|
bool FsIndexer::index(int flags)
|
||||||
{
|
{
|
||||||
@ -190,14 +209,21 @@ bool FsIndexer::index(int flags)
|
|||||||
m_walker.setMaxDepth(2);
|
m_walker.setMaxDepth(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (vector<string>::const_iterator it = m_tdl.begin();
|
for (const auto& topdir : m_tdl) {
|
||||||
it != m_tdl.end(); it++) {
|
LOGDEB("FsIndexer::index: Indexing " << topdir << " into " <<
|
||||||
LOGDEB("FsIndexer::index: Indexing " << *it << " into " <<
|
|
||||||
getDbDir() << "\n");
|
getDbDir() << "\n");
|
||||||
|
|
||||||
|
// If a topdirs member appears to be not here or not mounted
|
||||||
|
// (empty), avoid deleting all the related index content by
|
||||||
|
// marking the current docs as existing.
|
||||||
|
if (path_empty(topdir)) {
|
||||||
|
m_db->udiTreeMarkExisting(topdir);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Set the current directory in config so that subsequent
|
// Set the current directory in config so that subsequent
|
||||||
// getConfParams() will get local values
|
// getConfParams() will get local values
|
||||||
m_config->setKeyDir(*it);
|
m_config->setKeyDir(topdir);
|
||||||
|
|
||||||
// Adjust the "follow symlinks" option
|
// Adjust the "follow symlinks" option
|
||||||
bool follow;
|
bool follow;
|
||||||
@ -214,8 +240,8 @@ bool FsIndexer::index(int flags)
|
|||||||
m_db->setAbstractParams(abslen, -1, -1);
|
m_db->setAbstractParams(abslen, -1, -1);
|
||||||
|
|
||||||
// Walk the directory tree
|
// Walk the directory tree
|
||||||
if (m_walker.walk(*it, *this) != FsTreeWalker::FtwOk) {
|
if (m_walker.walk(topdir, *this) != FsTreeWalker::FtwOk) {
|
||||||
LOGERR("FsIndexer::index: error while indexing " << *it <<
|
LOGERR("FsIndexer::index: error while indexing " << topdir <<
|
||||||
": " << m_walker.getReason() << "\n");
|
": " << m_walker.getReason() << "\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -233,11 +259,12 @@ bool FsIndexer::index(int flags)
|
|||||||
string missing;
|
string missing;
|
||||||
m_missing->getMissingDescription(missing);
|
m_missing->getMissingDescription(missing);
|
||||||
if (!missing.empty()) {
|
if (!missing.empty()) {
|
||||||
LOGINFO("FsIndexer::index missing helper program(s):\n" << (missing) << "\n" );
|
LOGINFO("FsIndexer::index missing helper program(s):\n" <<
|
||||||
|
missing << "\n");
|
||||||
}
|
}
|
||||||
m_config->storeMissingHelperDesc(missing);
|
m_config->storeMissingHelperDesc(missing);
|
||||||
}
|
}
|
||||||
LOGINFO("fsindexer index time: " << (chron.millis()) << " mS\n" );
|
LOGINFO("fsindexer index time: " << chron.millis() << " mS\n");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -258,7 +285,8 @@ static bool matchesSkipped(const vector<string>& tdl,
|
|||||||
for (vector<string>::const_iterator it = tdl.begin();
|
for (vector<string>::const_iterator it = tdl.begin();
|
||||||
it != tdl.end(); it++) {
|
it != tdl.end(); it++) {
|
||||||
// the topdirs members are already canonized.
|
// the topdirs members are already canonized.
|
||||||
LOGDEB2("matchesSkipped: comparing ancestor [" << (mpath) << "] to topdir [" << (it) << "]\n" );
|
LOGDEB2("matchesSkipped: comparing ancestor [" << mpath <<
|
||||||
|
"] to topdir [" << it << "]\n");
|
||||||
if (!mpath.compare(*it)) {
|
if (!mpath.compare(*it)) {
|
||||||
topdir = *it;
|
topdir = *it;
|
||||||
goto goodpath;
|
goto goodpath;
|
||||||
@ -266,7 +294,7 @@ static bool matchesSkipped(const vector<string>& tdl,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (walker.inSkippedPaths(mpath, false)) {
|
if (walker.inSkippedPaths(mpath, false)) {
|
||||||
LOGDEB("FsIndexer::indexFiles: skipping [" << (path) << "] (skpp)\n" );
|
LOGDEB("FsIndexer::indexFiles: skipping [" << path << "] (skpp)\n");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -280,12 +308,13 @@ static bool matchesSkipped(const vector<string>& tdl,
|
|||||||
// path did not shorten, something is seriously amiss
|
// path did not shorten, something is seriously amiss
|
||||||
// (could be an assert actually)
|
// (could be an assert actually)
|
||||||
if (mpath.length() >= len) {
|
if (mpath.length() >= len) {
|
||||||
LOGERR("FsIndexer::indexFile: internal Error: path [" << (mpath) << "] did not shorten\n" );
|
LOGERR("FsIndexer::indexFile: internal Error: path [" << mpath <<
|
||||||
|
"] did not shorten\n");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// We get there if neither topdirs nor skippedPaths tests matched
|
// We get there if neither topdirs nor skippedPaths tests matched
|
||||||
LOGDEB("FsIndexer::indexFiles: skipping [" << (path) << "] (ntd)\n" );
|
LOGDEB("FsIndexer::indexFiles: skipping [" << path << "] (ntd)\n");
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
goodpath:
|
goodpath:
|
||||||
@ -295,7 +324,7 @@ goodpath:
|
|||||||
while (mpath.length() >= topdir.length() && mpath.length() > 1) {
|
while (mpath.length() >= topdir.length() && mpath.length() > 1) {
|
||||||
string fn = path_getsimple(mpath);
|
string fn = path_getsimple(mpath);
|
||||||
if (walker.inSkippedNames(fn)) {
|
if (walker.inSkippedNames(fn)) {
|
||||||
LOGDEB("FsIndexer::indexFiles: skipping [" << (path) << "] (skpn)\n" );
|
LOGDEB("FsIndexer::indexFiles: skipping [" << path << "] (skpn)\n");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -319,7 +348,7 @@ goodpath:
|
|||||||
*/
|
*/
|
||||||
bool FsIndexer::indexFiles(list<string>& files, int flags)
|
bool FsIndexer::indexFiles(list<string>& files, int flags)
|
||||||
{
|
{
|
||||||
LOGDEB("FsIndexer::indexFiles\n" );
|
LOGDEB("FsIndexer::indexFiles\n");
|
||||||
m_noretryfailed = (flags & ConfIndexer::IxFNoRetryFailed) != 0;
|
m_noretryfailed = (flags & ConfIndexer::IxFNoRetryFailed) != 0;
|
||||||
bool ret = false;
|
bool ret = false;
|
||||||
|
|
||||||
@ -337,7 +366,7 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
|
|||||||
walker.setSkippedPaths(m_config->getSkippedPaths());
|
walker.setSkippedPaths(m_config->getSkippedPaths());
|
||||||
|
|
||||||
for (list<string>::iterator it = files.begin(); it != files.end(); ) {
|
for (list<string>::iterator it = files.begin(); it != files.end(); ) {
|
||||||
LOGDEB2("FsIndexer::indexFiles: [" << (it) << "]\n" );
|
LOGDEB2("FsIndexer::indexFiles: [" << it << "]\n");
|
||||||
|
|
||||||
m_config->setKeyDir(path_getfather(*it));
|
m_config->setKeyDir(path_getfather(*it));
|
||||||
if (m_havelocalfields)
|
if (m_havelocalfields)
|
||||||
@ -357,7 +386,7 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
|
|||||||
struct stat stb;
|
struct stat stb;
|
||||||
int ststat = path_fileprops(*it, &stb, follow);
|
int ststat = path_fileprops(*it, &stb, follow);
|
||||||
if (ststat != 0) {
|
if (ststat != 0) {
|
||||||
LOGERR("FsIndexer::indexFiles: (l)stat " << *it << ": " <<
|
LOGERR("FsIndexer::indexFiles: (l)stat " << *it << ": " <<
|
||||||
strerror(errno) << "\n");
|
strerror(errno) << "\n");
|
||||||
it++;
|
it++;
|
||||||
continue;
|
continue;
|
||||||
@ -365,7 +394,7 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
|
|||||||
|
|
||||||
if (processone(*it, &stb, FsTreeWalker::FtwRegular) !=
|
if (processone(*it, &stb, FsTreeWalker::FtwRegular) !=
|
||||||
FsTreeWalker::FtwOk) {
|
FsTreeWalker::FtwOk) {
|
||||||
LOGERR("FsIndexer::indexFiles: processone failed\n" );
|
LOGERR("FsIndexer::indexFiles: processone failed\n");
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
it = files.erase(it);
|
it = files.erase(it);
|
||||||
@ -383,11 +412,11 @@ out:
|
|||||||
|
|
||||||
// Purge possible orphan documents
|
// Purge possible orphan documents
|
||||||
if (ret == true) {
|
if (ret == true) {
|
||||||
LOGDEB("Indexfiles: purging orphans\n" );
|
LOGDEB("Indexfiles: purging orphans\n");
|
||||||
const vector<string>& purgecandidates = m_purgeCandidates.getCandidates();
|
const vector<string>& purgecandidates = m_purgeCandidates.getCandidates();
|
||||||
for (vector<string>::const_iterator it = purgecandidates.begin();
|
for (vector<string>::const_iterator it = purgecandidates.begin();
|
||||||
it != purgecandidates.end(); it++) {
|
it != purgecandidates.end(); it++) {
|
||||||
LOGDEB("Indexfiles: purging orphans for " << *it << "\n");
|
LOGDEB("Indexfiles: purging orphans for " << *it << "\n");
|
||||||
m_db->purgeOrphans(*it);
|
m_db->purgeOrphans(*it);
|
||||||
}
|
}
|
||||||
#ifdef IDX_THREADS
|
#ifdef IDX_THREADS
|
||||||
@ -395,7 +424,7 @@ out:
|
|||||||
#endif // IDX_THREADS
|
#endif // IDX_THREADS
|
||||||
}
|
}
|
||||||
|
|
||||||
LOGDEB("FsIndexer::indexFiles: done\n" );
|
LOGDEB("FsIndexer::indexFiles: done\n");
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -403,7 +432,7 @@ out:
|
|||||||
/** Purge docs for given files out of the database */
|
/** Purge docs for given files out of the database */
|
||||||
bool FsIndexer::purgeFiles(list<string>& files)
|
bool FsIndexer::purgeFiles(list<string>& files)
|
||||||
{
|
{
|
||||||
LOGDEB("FsIndexer::purgeFiles\n" );
|
LOGDEB("FsIndexer::purgeFiles\n");
|
||||||
bool ret = false;
|
bool ret = false;
|
||||||
if (!init())
|
if (!init())
|
||||||
return false;
|
return false;
|
||||||
@ -415,7 +444,7 @@ bool FsIndexer::purgeFiles(list<string>& files)
|
|||||||
// found or deleted, false only in case of actual error
|
// found or deleted, false only in case of actual error
|
||||||
bool existed;
|
bool existed;
|
||||||
if (!m_db->purgeFile(udi, &existed)) {
|
if (!m_db->purgeFile(udi, &existed)) {
|
||||||
LOGERR("FsIndexer::purgeFiles: Database error\n" );
|
LOGERR("FsIndexer::purgeFiles: Database error\n");
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
// If we actually deleted something, take it off the list
|
// If we actually deleted something, take it off the list
|
||||||
@ -435,14 +464,14 @@ out:
|
|||||||
m_dwqueue.waitIdle();
|
m_dwqueue.waitIdle();
|
||||||
m_db->waitUpdIdle();
|
m_db->waitUpdIdle();
|
||||||
#endif // IDX_THREADS
|
#endif // IDX_THREADS
|
||||||
LOGDEB("FsIndexer::purgeFiles: done\n" );
|
LOGDEB("FsIndexer::purgeFiles: done\n");
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Local fields can be set for fs subtrees in the configuration file
|
// Local fields can be set for fs subtrees in the configuration file
|
||||||
void FsIndexer::localfieldsfromconf()
|
void FsIndexer::localfieldsfromconf()
|
||||||
{
|
{
|
||||||
LOGDEB1("FsIndexer::localfieldsfromconf\n" );
|
LOGDEB1("FsIndexer::localfieldsfromconf\n");
|
||||||
|
|
||||||
string sfields;
|
string sfields;
|
||||||
m_config->getConfParam("localfields", sfields);
|
m_config->getConfParam("localfields", sfields);
|
||||||
@ -462,7 +491,8 @@ void FsIndexer::localfieldsfromconf()
|
|||||||
it != nmlst.end(); it++) {
|
it != nmlst.end(); it++) {
|
||||||
string nm = m_config->fieldCanon(*it);
|
string nm = m_config->fieldCanon(*it);
|
||||||
attrs.get(*it, m_localfields[nm]);
|
attrs.get(*it, m_localfields[nm]);
|
||||||
LOGDEB2("FsIndexer::localfieldsfromconf: [" << (nm) << "]->[" << (m_localfields[nm]) << "]\n" );
|
LOGDEB2("FsIndexer::localfieldsfromconf: [" << nm << "]->[" <<
|
||||||
|
m_localfields[nm] << "]\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -501,9 +531,9 @@ void *FsIndexerDbUpdWorker(void * fsp)
|
|||||||
tqp->workerExit();
|
tqp->workerExit();
|
||||||
return (void*)1;
|
return (void*)1;
|
||||||
}
|
}
|
||||||
LOGDEB0("FsIndexerDbUpdWorker: task ql " << (int(qsz)) << "\n" );
|
LOGDEB0("FsIndexerDbUpdWorker: task ql " << qsz << "\n");
|
||||||
if (!fip->m_db->addOrUpdate(tsk->udi, tsk->parent_udi, tsk->doc)) {
|
if (!fip->m_db->addOrUpdate(tsk->udi, tsk->parent_udi, tsk->doc)) {
|
||||||
LOGERR("FsIndexerDbUpdWorker: addOrUpdate failed\n" );
|
LOGERR("FsIndexerDbUpdWorker: addOrUpdate failed\n");
|
||||||
tqp->workerExit();
|
tqp->workerExit();
|
||||||
return (void*)0;
|
return (void*)0;
|
||||||
}
|
}
|
||||||
@ -524,15 +554,15 @@ void *FsIndexerInternfileWorker(void * fsp)
|
|||||||
tqp->workerExit();
|
tqp->workerExit();
|
||||||
return (void*)1;
|
return (void*)1;
|
||||||
}
|
}
|
||||||
LOGDEB0("FsIndexerInternfileWorker: task fn " << (tsk->fn) << "\n" );
|
LOGDEB0("FsIndexerInternfileWorker: task fn " << tsk->fn << "\n");
|
||||||
if (fip->processonefile(&myconf, tsk->fn, &tsk->statbuf,
|
if (fip->processonefile(&myconf, tsk->fn, &tsk->statbuf,
|
||||||
tsk->localfields) !=
|
tsk->localfields) !=
|
||||||
FsTreeWalker::FtwOk) {
|
FsTreeWalker::FtwOk) {
|
||||||
LOGERR("FsIndexerInternfileWorker: processone failed\n" );
|
LOGERR("FsIndexerInternfileWorker: processone failed\n");
|
||||||
tqp->workerExit();
|
tqp->workerExit();
|
||||||
return (void*)0;
|
return (void*)0;
|
||||||
}
|
}
|
||||||
LOGDEB1("FsIndexerInternfileWorker: done fn " << (tsk->fn) << "\n" );
|
LOGDEB1("FsIndexerInternfileWorker: done fn " << tsk->fn << "\n");
|
||||||
delete tsk;
|
delete tsk;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -636,7 +666,9 @@ FsIndexer::processonefile(RclConfig *config,
|
|||||||
bool xattronly = m_detectxattronly && !m_db->inFullReset() &&
|
bool xattronly = m_detectxattronly && !m_db->inFullReset() &&
|
||||||
existingDoc && needupdate && (stp->st_mtime < stp->st_ctime);
|
existingDoc && needupdate && (stp->st_mtime < stp->st_ctime);
|
||||||
|
|
||||||
LOGDEB("processone: needupdate " << (needupdate) << " noretry " << (m_noretryfailed) << " existing " << (existingDoc) << " oldsig [" << (oldsig) << "]\n" );
|
LOGDEB("processone: needupdate " << needupdate << " noretry " <<
|
||||||
|
m_noretryfailed << " existing " << existingDoc << " oldsig [" <<
|
||||||
|
oldsig << "]\n");
|
||||||
|
|
||||||
// If noretryfailed is set, check for a file which previously
|
// If noretryfailed is set, check for a file which previously
|
||||||
// failed to index, and avoid re-processing it
|
// failed to index, and avoid re-processing it
|
||||||
@ -646,14 +678,14 @@ FsIndexer::processonefile(RclConfig *config,
|
|||||||
// actually changed, we always retry (maybe it was fixed)
|
// actually changed, we always retry (maybe it was fixed)
|
||||||
string nold = oldsig.substr(0, oldsig.size()-1);
|
string nold = oldsig.substr(0, oldsig.size()-1);
|
||||||
if (!nold.compare(sig)) {
|
if (!nold.compare(sig)) {
|
||||||
LOGDEB("processone: not retrying previously failed file\n" );
|
LOGDEB("processone: not retrying previously failed file\n");
|
||||||
m_db->setExistingFlags(udi, existingDoc);
|
m_db->setExistingFlags(udi, existingDoc);
|
||||||
needupdate = false;
|
needupdate = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!needupdate) {
|
if (!needupdate) {
|
||||||
LOGDEB0("processone: up to date: " << (fn) << "\n" );
|
LOGDEB0("processone: up to date: " << fn << "\n");
|
||||||
if (m_updater) {
|
if (m_updater) {
|
||||||
#ifdef IDX_THREADS
|
#ifdef IDX_THREADS
|
||||||
std::unique_lock<std::mutex> locker(m_updater->m_mutex);
|
std::unique_lock<std::mutex> locker(m_updater->m_mutex);
|
||||||
@ -668,8 +700,8 @@ FsIndexer::processonefile(RclConfig *config,
|
|||||||
return FsTreeWalker::FtwOk;
|
return FsTreeWalker::FtwOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOGDEB0("processone: processing: [" <<
|
LOGDEB0("processone: processing: [" <<
|
||||||
displayableBytes(stp->st_size) << "] " << fn << "\n");
|
displayableBytes(stp->st_size) << "] " << fn << "\n");
|
||||||
|
|
||||||
// Note that we used to do the full path here, but I ended up
|
// Note that we used to do the full path here, but I ended up
|
||||||
// believing that it made more sense to use only the file name
|
// believing that it made more sense to use only the file name
|
||||||
@ -703,7 +735,7 @@ FsIndexer::processonefile(RclConfig *config,
|
|||||||
try {
|
try {
|
||||||
fis = interner.internfile(doc);
|
fis = interner.internfile(doc);
|
||||||
} catch (CancelExcept) {
|
} catch (CancelExcept) {
|
||||||
LOGERR("fsIndexer::processone: interrupted\n" );
|
LOGERR("fsIndexer::processone: interrupted\n");
|
||||||
return FsTreeWalker::FtwStop;
|
return FsTreeWalker::FtwStop;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -774,7 +806,7 @@ FsIndexer::processonefile(RclConfig *config,
|
|||||||
DbUpdTask *tp = new DbUpdTask(udi, doc.ipath.empty() ?
|
DbUpdTask *tp = new DbUpdTask(udi, doc.ipath.empty() ?
|
||||||
cstr_null : parent_udi, doc);
|
cstr_null : parent_udi, doc);
|
||||||
if (!m_dwqueue.put(tp)) {
|
if (!m_dwqueue.put(tp)) {
|
||||||
LOGERR("processonefile: wqueue.put failed\n" );
|
LOGERR("processonefile: wqueue.put failed\n");
|
||||||
return FsTreeWalker::FtwError;
|
return FsTreeWalker::FtwError;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -813,7 +845,8 @@ FsIndexer::processonefile(RclConfig *config,
|
|||||||
// If this doc existed and it's a container, recording for
|
// If this doc existed and it's a container, recording for
|
||||||
// possible subdoc purge (this will be used only if we don't do a
|
// possible subdoc purge (this will be used only if we don't do a
|
||||||
// db-wide purge, e.g. if we're called from indexfiles()).
|
// db-wide purge, e.g. if we're called from indexfiles()).
|
||||||
LOGDEB2("processOnefile: existingDoc " << (existingDoc) << " hadNonNullIpath " << (hadNonNullIpath) << "\n" );
|
LOGDEB2("processOnefile: existingDoc " << existingDoc <<
|
||||||
|
" hadNonNullIpath " << hadNonNullIpath << "\n");
|
||||||
if (existingDoc && hadNonNullIpath) {
|
if (existingDoc && hadNonNullIpath) {
|
||||||
m_purgeCandidates.record(parent_udi);
|
m_purgeCandidates.record(parent_udi);
|
||||||
}
|
}
|
||||||
@ -826,7 +859,7 @@ FsIndexer::processonefile(RclConfig *config,
|
|||||||
// If xattronly is set, ONLY the extattr metadata is valid and will be used
|
// If xattronly is set, ONLY the extattr metadata is valid and will be used
|
||||||
// by the following step.
|
// by the following step.
|
||||||
if (xattronly || hadNullIpath == false) {
|
if (xattronly || hadNullIpath == false) {
|
||||||
LOGDEB("Creating empty doc for file or pure xattr update\n" );
|
LOGDEB("Creating empty doc for file or pure xattr update\n");
|
||||||
Rcl::Doc fileDoc;
|
Rcl::Doc fileDoc;
|
||||||
if (xattronly) {
|
if (xattronly) {
|
||||||
map<string, string> xfields;
|
map<string, string> xfields;
|
||||||
|
|||||||
@ -46,7 +46,6 @@ using namespace std;
|
|||||||
#include "rclutil.h"
|
#include "rclutil.h"
|
||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
#include "chrono.h"
|
#include "chrono.h"
|
||||||
#include "utf8iter.h"
|
|
||||||
#include "searchdata.h"
|
#include "searchdata.h"
|
||||||
#include "rclquery.h"
|
#include "rclquery.h"
|
||||||
#include "rclquery_p.h"
|
#include "rclquery_p.h"
|
||||||
@ -144,21 +143,6 @@ static inline string make_parentterm(const string& udi)
|
|||||||
return pterm;
|
return pterm;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void utf8truncate(string& s, int maxlen)
|
|
||||||
{
|
|
||||||
if (s.size() <= string::size_type(maxlen)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
Utf8Iter iter(s);
|
|
||||||
string::size_type pos = 0;
|
|
||||||
while (iter++ != string::npos)
|
|
||||||
if (iter.getBpos() < string::size_type(maxlen)) {
|
|
||||||
pos = iter.getBpos();
|
|
||||||
}
|
|
||||||
|
|
||||||
s.erase(pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
Db::Native::Native(Db *db)
|
Db::Native::Native(Db *db)
|
||||||
: m_rcldb(db), m_isopen(false), m_iswritable(false),
|
: m_rcldb(db), m_isopen(false), m_iswritable(false),
|
||||||
m_noversionwrite(false)
|
m_noversionwrite(false)
|
||||||
@ -2023,11 +2007,10 @@ void Db::i_setExistingFlags(const string& udi, unsigned int docid)
|
|||||||
LOGERR("Rcl::Db::needUpdate: can't get subdocs\n");
|
LOGERR("Rcl::Db::needUpdate: can't get subdocs\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
for (vector<Xapian::docid>::iterator it = docids.begin();
|
for (auto docid : docids) {
|
||||||
it != docids.end(); it++) {
|
if (docid < updated.size()) {
|
||||||
if (*it < updated.size()) {
|
LOGDEB2("Db::needUpdate: docid " << docid << " set\n");
|
||||||
LOGDEB2("Db::needUpdate: docid " << (*it) << " set\n");
|
updated[docid] = true;
|
||||||
updated[*it] = true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2556,5 +2539,40 @@ bool Db::getSubDocs(const Doc &idoc, vector<Doc>& subdocs)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // End namespace Rcl
|
// Walk an UDI section (all UDIs beginning with input prefix), and
|
||||||
|
// mark all docs and subdocs as existing. Caller beware: Makes sense
|
||||||
|
// or not depending on the UDI structure for the data store. In practise,
|
||||||
|
// used for absent FS mountable volumes.
|
||||||
|
bool Db::udiTreeMarkExisting(const string& udi)
|
||||||
|
{
|
||||||
|
LOGDEB("Db::udiTreeWalk: " << udi << endl);
|
||||||
|
string wrapd = wrap_prefix(udi_prefix);
|
||||||
|
string expr = udi + "*";
|
||||||
|
|
||||||
|
#ifdef IDX_THREADS
|
||||||
|
std::unique_lock<std::mutex> lock(m_ndb->m_mutex);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
bool ret = m_ndb->idxTermMatch_p(
|
||||||
|
int(ET_WILD), cstr_null, expr,
|
||||||
|
[this, &udi](const string& term, Xapian::termcount, Xapian::doccount) {
|
||||||
|
Xapian::PostingIterator docid;
|
||||||
|
XAPTRY(docid = m_ndb->xrdb.postlist_begin(term), m_ndb->xrdb,
|
||||||
|
m_reason);
|
||||||
|
if (!m_reason.empty()) {
|
||||||
|
LOGERR("Db::udiTreeWalk: xapian::postlist_begin failed: " <<
|
||||||
|
m_reason << "\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (docid == m_ndb->xrdb.postlist_end(term)) {
|
||||||
|
LOGDEB("Db::udiTreeWalk:no doc for " << term << " ??\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
i_setExistingFlags(udi, *docid);
|
||||||
|
LOGDEB("Db::udiTreeWalk: uniterm: " << term << endl);
|
||||||
|
return true;
|
||||||
|
}, wrapd);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // End namespace Rcl
|
||||||
|
|||||||
@ -483,7 +483,14 @@ public:
|
|||||||
|
|
||||||
// Use empty fn for no synonyms
|
// Use empty fn for no synonyms
|
||||||
bool setSynGroupsFile(const std::string& fn);
|
bool setSynGroupsFile(const std::string& fn);
|
||||||
|
|
||||||
|
// Mark all documents with an UDI having input as prefix as
|
||||||
|
// existing. Only works if the UDIs for the store are
|
||||||
|
// hierarchical of course. Used by FsIndexer to avoid purging
|
||||||
|
// files for a topdir which is on a removable file system and
|
||||||
|
// currently unmounted (topdir does not exist or is empty.
|
||||||
|
bool udiTreeMarkExisting(const string& udi);
|
||||||
|
|
||||||
/* This has to be public for access by embedded Query::Native */
|
/* This has to be public for access by embedded Query::Native */
|
||||||
Native *m_ndb;
|
Native *m_ndb;
|
||||||
private:
|
private:
|
||||||
|
|||||||
@ -181,6 +181,13 @@ class Db::Native {
|
|||||||
*/
|
*/
|
||||||
bool subDocs(const string &udi, int idxi, vector<Xapian::docid>& docids);
|
bool subDocs(const string &udi, int idxi, vector<Xapian::docid>& docids);
|
||||||
|
|
||||||
|
/** Matcher */
|
||||||
|
bool idxTermMatch_p(int typ_sens,const string &lang,const std::string &term,
|
||||||
|
std::function<bool(const std::string& term,
|
||||||
|
Xapian::termcount colfreq,
|
||||||
|
Xapian::doccount termfreq)> client,
|
||||||
|
const string& field);
|
||||||
|
|
||||||
/** Check if a page position list is defined */
|
/** Check if a page position list is defined */
|
||||||
bool hasPages(Xapian::docid id);
|
bool hasPages(Xapian::docid id);
|
||||||
|
|
||||||
|
|||||||
@ -330,34 +330,14 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Second phase of wildcard/regexp term expansion after case/diac
|
bool Db::Native::idxTermMatch_p(
|
||||||
// expansion: expand against main index terms
|
int typ, const string &lang, const string &root,
|
||||||
bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
|
std::function<bool(const string& term,
|
||||||
TermMatchResult& res, int max, const string& field)
|
Xapian::termcount colfreq,
|
||||||
|
Xapian::doccount termfreq)> client,
|
||||||
|
const string& prefix)
|
||||||
{
|
{
|
||||||
int typ = matchTypeTp(typ_sens);
|
Xapian::Database xdb = xrdb;
|
||||||
LOGDEB1("Db::idxTermMatch: typ " << tmtptostr(typ) << " lang [" <<
|
|
||||||
lang << "] term [" << root << "] max " << max << " field [" <<
|
|
||||||
field << "] init res.size " << res.entries.size() << "\n");
|
|
||||||
|
|
||||||
if (typ == ET_STEM) {
|
|
||||||
LOGFATAL("RCLDB: internal error: idxTermMatch called with ET_STEM\n");
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
Xapian::Database xdb = m_ndb->xrdb;
|
|
||||||
|
|
||||||
string prefix;
|
|
||||||
if (!field.empty()) {
|
|
||||||
const FieldTraits *ftp = 0;
|
|
||||||
if (!fieldToTraits(field, &ftp, true) || ftp->pfx.empty()) {
|
|
||||||
LOGDEB("Db::termMatch: field is not indexed (no prefix): [" <<
|
|
||||||
field << "]\n");
|
|
||||||
} else {
|
|
||||||
prefix = wrap_prefix(ftp->pfx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
res.prefix = prefix;
|
|
||||||
|
|
||||||
std::shared_ptr<StrMatcher> matcher;
|
std::shared_ptr<StrMatcher> matcher;
|
||||||
if (typ == ET_REGEXP) {
|
if (typ == ET_REGEXP) {
|
||||||
@ -418,35 +398,74 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
|
|||||||
if (matcher && !matcher->match(term))
|
if (matcher && !matcher->match(term))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
res.entries.push_back(
|
if (!client(ixterm, xdb.get_collection_freq(ixterm),
|
||||||
TermMatchEntry(ixterm, xdb.get_collection_freq(ixterm),
|
it.get_termfreq())) {
|
||||||
it.get_termfreq()));
|
|
||||||
|
|
||||||
// The problem with truncating here is that this is done
|
|
||||||
// alphabetically and we may not keep the most frequent
|
|
||||||
// terms. OTOH, not doing it may stall the program if
|
|
||||||
// we are walking the whole term list. We compromise
|
|
||||||
// by cutting at 2*max
|
|
||||||
if (max > 0 && ++rcnt >= 2*max)
|
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
m_reason.erase();
|
m_rcldb->m_reason.erase();
|
||||||
break;
|
break;
|
||||||
} catch (const Xapian::DatabaseModifiedError &e) {
|
} catch (const Xapian::DatabaseModifiedError &e) {
|
||||||
m_reason = e.get_msg();
|
m_rcldb->m_reason = e.get_msg();
|
||||||
xdb.reopen();
|
xdb.reopen();
|
||||||
continue;
|
continue;
|
||||||
} XCATCHERROR(m_reason);
|
} XCATCHERROR(m_rcldb->m_reason);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (!m_reason.empty()) {
|
if (!m_rcldb->m_reason.empty()) {
|
||||||
LOGERR("termMatch: " << m_reason << "\n");
|
LOGERR("termMatch: " << m_rcldb->m_reason << "\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Second phase of wildcard/regexp term expansion after case/diac
|
||||||
|
// expansion: expand against main index terms
|
||||||
|
bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
|
||||||
|
TermMatchResult& res, int max, const string& field)
|
||||||
|
{
|
||||||
|
int typ = matchTypeTp(typ_sens);
|
||||||
|
LOGDEB1("Db::idxTermMatch: typ " << tmtptostr(typ) << " lang [" <<
|
||||||
|
lang << "] term [" << root << "] max " << max << " field [" <<
|
||||||
|
field << "] init res.size " << res.entries.size() << "\n");
|
||||||
|
|
||||||
|
if (typ == ET_STEM) {
|
||||||
|
LOGFATAL("RCLDB: internal error: idxTermMatch called with ET_STEM\n");
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
string prefix;
|
||||||
|
if (!field.empty()) {
|
||||||
|
const FieldTraits *ftp = 0;
|
||||||
|
if (!fieldToTraits(field, &ftp, true) || ftp->pfx.empty()) {
|
||||||
|
LOGDEB("Db::termMatch: field is not indexed (no prefix): [" <<
|
||||||
|
field << "]\n");
|
||||||
|
} else {
|
||||||
|
prefix = wrap_prefix(ftp->pfx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
res.prefix = prefix;
|
||||||
|
|
||||||
|
int rcnt = 0;
|
||||||
|
bool ret = m_ndb->idxTermMatch_p(
|
||||||
|
typ, lang, root,
|
||||||
|
[&res, &rcnt, max](const string& term,
|
||||||
|
Xapian::termcount cf, Xapian::doccount tf) {
|
||||||
|
res.entries.push_back(TermMatchEntry(term, cf, tf));
|
||||||
|
// The problem with truncating here is that this is done
|
||||||
|
// alphabetically and we may not keep the most frequent
|
||||||
|
// terms. OTOH, not doing it may stall the program if
|
||||||
|
// we are walking the whole term list. We compromise
|
||||||
|
// by cutting at 2*max
|
||||||
|
if (max > 0 && ++rcnt >= 2*max)
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}, prefix);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/** Term list walking. */
|
/** Term list walking. */
|
||||||
class TermIter {
|
class TermIter {
|
||||||
public:
|
public:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user