Avoid purging existing subdocuments on file indexing error (e.g.: maybe a file lock issue that will go away)
This commit is contained in:
parent
db9fd248f3
commit
ee8c5410bd
@ -56,10 +56,10 @@ class DbUpdTask {
|
||||
public:
|
||||
// Take some care to avoid sharing string data (if string impl is cow)
|
||||
DbUpdTask(const string& u, const string& p, const Rcl::Doc& d)
|
||||
: udi(u.begin(), u.end()), parent_udi(p.begin(), p.end())
|
||||
{
|
||||
d.copyto(&doc);
|
||||
}
|
||||
: udi(u.begin(), u.end()), parent_udi(p.begin(), p.end())
|
||||
{
|
||||
d.copyto(&doc);
|
||||
}
|
||||
string udi;
|
||||
string parent_udi;
|
||||
Rcl::Doc doc;
|
||||
@ -70,11 +70,11 @@ class InternfileTask {
|
||||
public:
|
||||
// Take some care to avoid sharing string data (if string impl is cow)
|
||||
InternfileTask(const std::string &f, const struct stat *i_stp,
|
||||
map<string,string> lfields)
|
||||
: fn(f.begin(), f.end()), statbuf(*i_stp)
|
||||
{
|
||||
map_ss_cp_noshr(lfields, &localfields);
|
||||
}
|
||||
map<string,string> lfields)
|
||||
: fn(f.begin(), f.end()), statbuf(*i_stp)
|
||||
{
|
||||
map_ss_cp_noshr(lfields, &localfields);
|
||||
}
|
||||
string fn;
|
||||
struct stat statbuf;
|
||||
map<string,string> localfields;
|
||||
@ -91,12 +91,12 @@ class FSIFIMissingStore : public FIMissingStore {
|
||||
#endif
|
||||
public:
|
||||
virtual void addMissing(const string& prog, const string& mt)
|
||||
{
|
||||
{
|
||||
#ifdef IDX_THREADS
|
||||
std::unique_lock<std::mutex> locker(m_mutex);
|
||||
std::unique_lock<std::mutex> locker(m_mutex);
|
||||
#endif
|
||||
FIMissingStore::addMissing(prog, mt);
|
||||
}
|
||||
FIMissingStore::addMissing(prog, mt);
|
||||
}
|
||||
};
|
||||
|
||||
FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc)
|
||||
@ -118,20 +118,20 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc)
|
||||
int internqlen = cnf->getThrConf(RclConfig::ThrIntern).first;
|
||||
int internthreads = cnf->getThrConf(RclConfig::ThrIntern).second;
|
||||
if (internqlen >= 0) {
|
||||
if (!m_iwqueue.start(internthreads, FsIndexerInternfileWorker, this)) {
|
||||
LOGERR("FsIndexer::FsIndexer: intern worker start failed\n");
|
||||
return;
|
||||
}
|
||||
m_haveInternQ = true;
|
||||
if (!m_iwqueue.start(internthreads, FsIndexerInternfileWorker, this)) {
|
||||
LOGERR("FsIndexer::FsIndexer: intern worker start failed\n");
|
||||
return;
|
||||
}
|
||||
m_haveInternQ = true;
|
||||
}
|
||||
int splitqlen = cnf->getThrConf(RclConfig::ThrSplit).first;
|
||||
int splitthreads = cnf->getThrConf(RclConfig::ThrSplit).second;
|
||||
if (splitqlen >= 0) {
|
||||
if (!m_dwqueue.start(splitthreads, FsIndexerDbUpdWorker, this)) {
|
||||
LOGERR("FsIndexer::FsIndexer: split worker start failed\n");
|
||||
return;
|
||||
}
|
||||
m_haveSplitQ = true;
|
||||
if (!m_dwqueue.start(splitthreads, FsIndexerDbUpdWorker, this)) {
|
||||
LOGERR("FsIndexer::FsIndexer: split worker start failed\n");
|
||||
return;
|
||||
}
|
||||
m_haveSplitQ = true;
|
||||
}
|
||||
LOGDEB("FsIndexer: threads: haveIQ " << m_haveInternQ << " iql " <<
|
||||
internqlen << " iqts " << internthreads << " haveSQ " <<
|
||||
@ -147,12 +147,12 @@ FsIndexer::~FsIndexer()
|
||||
#ifdef IDX_THREADS
|
||||
void *status;
|
||||
if (m_haveInternQ) {
|
||||
status = m_iwqueue.setTerminateAndWait();
|
||||
LOGDEB0("FsIndexer: internfile wrkr status: "<< status << " (1->ok)\n");
|
||||
status = m_iwqueue.setTerminateAndWait();
|
||||
LOGDEB0("FsIndexer: internfile wrkr status: "<< status << " (1->ok)\n");
|
||||
}
|
||||
if (m_haveSplitQ) {
|
||||
status = m_dwqueue.setTerminateAndWait();
|
||||
LOGDEB0("FsIndexer: dbupd worker status: " << status << " (1->ok)\n");
|
||||
status = m_dwqueue.setTerminateAndWait();
|
||||
LOGDEB0("FsIndexer: dbupd worker status: " << status << " (1->ok)\n");
|
||||
}
|
||||
delete m_stableconfig;
|
||||
#endif // IDX_THREADS
|
||||
@ -179,23 +179,23 @@ bool FsIndexer::index(int flags)
|
||||
m_noretryfailed = (flags & ConfIndexer::IxFNoRetryFailed) != 0;
|
||||
Chrono chron;
|
||||
if (!init())
|
||||
return false;
|
||||
return false;
|
||||
|
||||
if (m_updater) {
|
||||
#ifdef IDX_THREADS
|
||||
std::unique_lock<std::mutex> locker(m_updater->m_mutex);
|
||||
#endif
|
||||
m_updater->status.dbtotdocs = m_db->docCnt();
|
||||
m_updater->status.dbtotdocs = m_db->docCnt();
|
||||
}
|
||||
|
||||
m_walker.setSkippedPaths(m_config->getSkippedPaths());
|
||||
if (quickshallow) {
|
||||
m_walker.setOpts(m_walker.getOpts() | FsTreeWalker::FtwSkipDotFiles);
|
||||
m_walker.setMaxDepth(2);
|
||||
m_walker.setOpts(m_walker.getOpts() | FsTreeWalker::FtwSkipDotFiles);
|
||||
m_walker.setMaxDepth(2);
|
||||
}
|
||||
|
||||
for (const auto& topdir : m_tdl) {
|
||||
LOGDEB("FsIndexer::index: Indexing " << topdir << " into " <<
|
||||
LOGDEB("FsIndexer::index: Indexing " << topdir << " into " <<
|
||||
getDbDir() << "\n");
|
||||
|
||||
// If a topdirs member appears to be not here or not mounted
|
||||
@ -206,48 +206,48 @@ bool FsIndexer::index(int flags)
|
||||
continue;
|
||||
}
|
||||
|
||||
// Set the current directory in config so that subsequent
|
||||
// getConfParams() will get local values
|
||||
m_config->setKeyDir(topdir);
|
||||
// Set the current directory in config so that subsequent
|
||||
// getConfParams() will get local values
|
||||
m_config->setKeyDir(topdir);
|
||||
|
||||
// Adjust the "follow symlinks" option
|
||||
bool follow;
|
||||
int opts = m_walker.getOpts();
|
||||
if (m_config->getConfParam("followLinks", &follow) && follow) {
|
||||
opts |= FsTreeWalker::FtwFollow;
|
||||
} else {
|
||||
opts &= ~FsTreeWalker::FtwFollow;
|
||||
}
|
||||
m_walker.setOpts(opts);
|
||||
// Adjust the "follow symlinks" option
|
||||
bool follow;
|
||||
int opts = m_walker.getOpts();
|
||||
if (m_config->getConfParam("followLinks", &follow) && follow) {
|
||||
opts |= FsTreeWalker::FtwFollow;
|
||||
} else {
|
||||
opts &= ~FsTreeWalker::FtwFollow;
|
||||
}
|
||||
m_walker.setOpts(opts);
|
||||
|
||||
int abslen;
|
||||
if (m_config->getConfParam("idxabsmlen", &abslen))
|
||||
m_db->setAbstractParams(abslen, -1, -1);
|
||||
int abslen;
|
||||
if (m_config->getConfParam("idxabsmlen", &abslen))
|
||||
m_db->setAbstractParams(abslen, -1, -1);
|
||||
|
||||
// Walk the directory tree
|
||||
if (m_walker.walk(topdir, *this) != FsTreeWalker::FtwOk) {
|
||||
LOGERR("FsIndexer::index: error while indexing " << topdir <<
|
||||
// Walk the directory tree
|
||||
if (m_walker.walk(topdir, *this) != FsTreeWalker::FtwOk) {
|
||||
LOGERR("FsIndexer::index: error while indexing " << topdir <<
|
||||
": " << m_walker.getReason() << "\n");
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef IDX_THREADS
|
||||
if (m_haveInternQ)
|
||||
m_iwqueue.waitIdle();
|
||||
m_iwqueue.waitIdle();
|
||||
if (m_haveSplitQ)
|
||||
m_dwqueue.waitIdle();
|
||||
m_dwqueue.waitIdle();
|
||||
m_db->waitUpdIdle();
|
||||
#endif // IDX_THREADS
|
||||
|
||||
if (m_missing) {
|
||||
string missing;
|
||||
m_missing->getMissingDescription(missing);
|
||||
if (!missing.empty()) {
|
||||
LOGINFO("FsIndexer::index missing helper program(s):\n" <<
|
||||
string missing;
|
||||
m_missing->getMissingDescription(missing);
|
||||
if (!missing.empty()) {
|
||||
LOGINFO("FsIndexer::index missing helper program(s):\n" <<
|
||||
missing << "\n");
|
||||
}
|
||||
m_config->storeMissingHelperDesc(missing);
|
||||
}
|
||||
m_config->storeMissingHelperDesc(missing);
|
||||
}
|
||||
LOGINFO("fsindexer index time: " << chron.millis() << " mS\n");
|
||||
return true;
|
||||
@ -342,7 +342,7 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
|
||||
|
||||
int abslen;
|
||||
if (m_config->getConfParam("idxabsmlen", &abslen))
|
||||
m_db->setAbstractParams(abslen, -1, -1);
|
||||
m_db->setAbstractParams(abslen, -1, -1);
|
||||
|
||||
m_purgeCandidates.setRecord(true);
|
||||
|
||||
@ -354,29 +354,29 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
|
||||
LOGDEB2("FsIndexer::indexFiles: [" << it << "]\n");
|
||||
|
||||
m_config->setKeyDir(path_getfather(*it));
|
||||
if (m_havelocalfields)
|
||||
localfieldsfromconf();
|
||||
if (m_havelocalfields)
|
||||
localfieldsfromconf();
|
||||
|
||||
bool follow = false;
|
||||
m_config->getConfParam("followLinks", &follow);
|
||||
bool follow = false;
|
||||
m_config->getConfParam("followLinks", &follow);
|
||||
|
||||
walker.setOnlyNames(m_config->getOnlyNames());
|
||||
walker.setSkippedNames(m_config->getSkippedNames());
|
||||
// Check path against indexed areas and skipped names/paths
|
||||
// Check path against indexed areas and skipped names/paths
|
||||
if (!(flags & ConfIndexer::IxFIgnoreSkip) &&
|
||||
matchesSkipped(m_tdl, walker, *it)) {
|
||||
matchesSkipped(m_tdl, walker, *it)) {
|
||||
it++;
|
||||
continue;
|
||||
continue;
|
||||
}
|
||||
|
||||
struct stat stb;
|
||||
int ststat = path_fileprops(*it, &stb, follow);
|
||||
if (ststat != 0) {
|
||||
LOGERR("FsIndexer::indexFiles: (l)stat " << *it << ": " <<
|
||||
struct stat stb;
|
||||
int ststat = path_fileprops(*it, &stb, follow);
|
||||
if (ststat != 0) {
|
||||
LOGERR("FsIndexer::indexFiles: (l)stat " << *it << ": " <<
|
||||
strerror(errno) << "\n");
|
||||
it++;
|
||||
continue;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (!(flags & ConfIndexer::IxFIgnoreSkip) &&
|
||||
(S_ISREG(stb.st_mode) || S_ISLNK(stb.st_mode))) {
|
||||
if (!walker.inOnlyNames(path_getsimple(*it))) {
|
||||
@ -384,11 +384,11 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (processone(*it, &stb, FsTreeWalker::FtwRegular) !=
|
||||
FsTreeWalker::FtwOk) {
|
||||
LOGERR("FsIndexer::indexFiles: processone failed\n");
|
||||
goto out;
|
||||
}
|
||||
if (processone(*it, &stb, FsTreeWalker::FtwRegular) !=
|
||||
FsTreeWalker::FtwOk) {
|
||||
LOGERR("FsIndexer::indexFiles: processone failed\n");
|
||||
goto out;
|
||||
}
|
||||
it = files.erase(it);
|
||||
}
|
||||
|
||||
@ -396,23 +396,23 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
|
||||
out:
|
||||
#ifdef IDX_THREADS
|
||||
if (m_haveInternQ)
|
||||
m_iwqueue.waitIdle();
|
||||
m_iwqueue.waitIdle();
|
||||
if (m_haveSplitQ)
|
||||
m_dwqueue.waitIdle();
|
||||
m_dwqueue.waitIdle();
|
||||
m_db->waitUpdIdle();
|
||||
#endif // IDX_THREADS
|
||||
|
||||
// Purge possible orphan documents
|
||||
if (ret == true) {
|
||||
LOGDEB("Indexfiles: purging orphans\n");
|
||||
const vector<string>& purgecandidates = m_purgeCandidates.getCandidates();
|
||||
for (vector<string>::const_iterator it = purgecandidates.begin();
|
||||
it != purgecandidates.end(); it++) {
|
||||
LOGDEB("Indexfiles: purging orphans for " << *it << "\n");
|
||||
m_db->purgeOrphans(*it);
|
||||
}
|
||||
LOGDEB("Indexfiles: purging orphans\n");
|
||||
const vector<string>& purgecandidates = m_purgeCandidates.getCandidates();
|
||||
for (vector<string>::const_iterator it = purgecandidates.begin();
|
||||
it != purgecandidates.end(); it++) {
|
||||
LOGDEB("Indexfiles: purging orphans for " << *it << "\n");
|
||||
m_db->purgeOrphans(*it);
|
||||
}
|
||||
#ifdef IDX_THREADS
|
||||
m_db->waitUpdIdle();
|
||||
m_db->waitUpdIdle();
|
||||
#endif // IDX_THREADS
|
||||
}
|
||||
|
||||
@ -427,18 +427,18 @@ bool FsIndexer::purgeFiles(list<string>& files)
|
||||
LOGDEB("FsIndexer::purgeFiles\n");
|
||||
bool ret = false;
|
||||
if (!init())
|
||||
return false;
|
||||
return false;
|
||||
|
||||
for (list<string>::iterator it = files.begin(); it != files.end(); ) {
|
||||
string udi;
|
||||
make_udi(*it, cstr_null, udi);
|
||||
string udi;
|
||||
make_udi(*it, cstr_null, udi);
|
||||
// rcldb::purgefile returns true if the udi was either not
|
||||
// found or deleted, false only in case of actual error
|
||||
bool existed;
|
||||
if (!m_db->purgeFile(udi, &existed)) {
|
||||
LOGERR("FsIndexer::purgeFiles: Database error\n");
|
||||
goto out;
|
||||
}
|
||||
if (!m_db->purgeFile(udi, &existed)) {
|
||||
LOGERR("FsIndexer::purgeFiles: Database error\n");
|
||||
goto out;
|
||||
}
|
||||
// If we actually deleted something, take it off the list
|
||||
if (existed) {
|
||||
it = files.erase(it);
|
||||
@ -451,9 +451,9 @@ bool FsIndexer::purgeFiles(list<string>& files)
|
||||
out:
|
||||
#ifdef IDX_THREADS
|
||||
if (m_haveInternQ)
|
||||
m_iwqueue.waitIdle();
|
||||
m_iwqueue.waitIdle();
|
||||
if (m_haveSplitQ)
|
||||
m_dwqueue.waitIdle();
|
||||
m_dwqueue.waitIdle();
|
||||
m_db->waitUpdIdle();
|
||||
#endif // IDX_THREADS
|
||||
LOGDEB("FsIndexer::purgeFiles: done\n");
|
||||
@ -468,12 +468,12 @@ void FsIndexer::localfieldsfromconf()
|
||||
string sfields;
|
||||
m_config->getConfParam("localfields", sfields);
|
||||
if (!sfields.compare(m_slocalfields))
|
||||
return;
|
||||
return;
|
||||
|
||||
m_slocalfields = sfields;
|
||||
m_localfields.clear();
|
||||
if (sfields.empty())
|
||||
return;
|
||||
return;
|
||||
|
||||
string value;
|
||||
ConfSimple attrs;
|
||||
@ -481,9 +481,9 @@ void FsIndexer::localfieldsfromconf()
|
||||
vector<string> nmlst = attrs.getNames(cstr_null);
|
||||
for (vector<string>::const_iterator it = nmlst.begin();
|
||||
it != nmlst.end(); it++) {
|
||||
string nm = m_config->fieldCanon(*it);
|
||||
attrs.get(*it, m_localfields[nm]);
|
||||
LOGDEB2("FsIndexer::localfieldsfromconf: [" << nm << "]->[" <<
|
||||
string nm = m_config->fieldCanon(*it);
|
||||
attrs.get(*it, m_localfields[nm]);
|
||||
LOGDEB2("FsIndexer::localfieldsfromconf: [" << nm << "]->[" <<
|
||||
m_localfields[nm] << "]\n");
|
||||
}
|
||||
}
|
||||
@ -491,11 +491,11 @@ void FsIndexer::localfieldsfromconf()
|
||||
void FsIndexer::setlocalfields(const map<string, string>& fields, Rcl::Doc& doc)
|
||||
{
|
||||
for (map<string, string>::const_iterator it = fields.begin();
|
||||
it != fields.end(); it++) {
|
||||
it != fields.end(); it++) {
|
||||
// Being chosen by the user, localfields override values from
|
||||
// the filter. The key is already canonic (see
|
||||
// localfieldsfromconf())
|
||||
doc.meta[it->first] = it->second;
|
||||
doc.meta[it->first] = it->second;
|
||||
}
|
||||
}
|
||||
|
||||
@ -518,18 +518,18 @@ void *FsIndexerDbUpdWorker(void * fsp)
|
||||
|
||||
DbUpdTask *tsk;
|
||||
for (;;) {
|
||||
size_t qsz;
|
||||
if (!tqp->take(&tsk, &qsz)) {
|
||||
tqp->workerExit();
|
||||
return (void*)1;
|
||||
}
|
||||
LOGDEB0("FsIndexerDbUpdWorker: task ql " << qsz << "\n");
|
||||
if (!fip->m_db->addOrUpdate(tsk->udi, tsk->parent_udi, tsk->doc)) {
|
||||
LOGERR("FsIndexerDbUpdWorker: addOrUpdate failed\n");
|
||||
tqp->workerExit();
|
||||
return (void*)0;
|
||||
}
|
||||
delete tsk;
|
||||
size_t qsz;
|
||||
if (!tqp->take(&tsk, &qsz)) {
|
||||
tqp->workerExit();
|
||||
return (void*)1;
|
||||
}
|
||||
LOGDEB0("FsIndexerDbUpdWorker: task ql " << qsz << "\n");
|
||||
if (!fip->m_db->addOrUpdate(tsk->udi, tsk->parent_udi, tsk->doc)) {
|
||||
LOGERR("FsIndexerDbUpdWorker: addOrUpdate failed\n");
|
||||
tqp->workerExit();
|
||||
return (void*)0;
|
||||
}
|
||||
delete tsk;
|
||||
}
|
||||
}
|
||||
|
||||
@ -542,20 +542,20 @@ void *FsIndexerInternfileWorker(void * fsp)
|
||||
|
||||
InternfileTask *tsk = 0;
|
||||
for (;;) {
|
||||
if (!tqp->take(&tsk)) {
|
||||
tqp->workerExit();
|
||||
return (void*)1;
|
||||
}
|
||||
LOGDEB0("FsIndexerInternfileWorker: task fn " << tsk->fn << "\n");
|
||||
if (fip->processonefile(&myconf, tsk->fn, &tsk->statbuf,
|
||||
tsk->localfields) !=
|
||||
FsTreeWalker::FtwOk) {
|
||||
LOGERR("FsIndexerInternfileWorker: processone failed\n");
|
||||
tqp->workerExit();
|
||||
return (void*)0;
|
||||
}
|
||||
LOGDEB1("FsIndexerInternfileWorker: done fn " << tsk->fn << "\n");
|
||||
delete tsk;
|
||||
if (!tqp->take(&tsk)) {
|
||||
tqp->workerExit();
|
||||
return (void*)1;
|
||||
}
|
||||
LOGDEB0("FsIndexerInternfileWorker: task fn " << tsk->fn << "\n");
|
||||
if (fip->processonefile(&myconf, tsk->fn, &tsk->statbuf,
|
||||
tsk->localfields) !=
|
||||
FsTreeWalker::FtwOk) {
|
||||
LOGERR("FsIndexerInternfileWorker: processone failed\n");
|
||||
tqp->workerExit();
|
||||
return (void*)0;
|
||||
}
|
||||
LOGDEB1("FsIndexerInternfileWorker: done fn " << tsk->fn << "\n");
|
||||
delete tsk;
|
||||
}
|
||||
}
|
||||
#endif // IDX_THREADS
|
||||
@ -573,33 +573,29 @@ void *FsIndexerInternfileWorker(void * fsp)
|
||||
/// mostly contains pretty raw utf8 data.
|
||||
FsTreeWalker::Status
|
||||
FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
||||
FsTreeWalker::CbFlag flg)
|
||||
FsTreeWalker::CbFlag flg)
|
||||
{
|
||||
if (m_updater) {
|
||||
#ifdef IDX_THREADS
|
||||
std::unique_lock<std::mutex> locker(m_updater->m_mutex);
|
||||
#endif
|
||||
if (!m_updater->update()) {
|
||||
return FsTreeWalker::FtwStop;
|
||||
}
|
||||
if (!m_updater->update()) {
|
||||
return FsTreeWalker::FtwStop;
|
||||
}
|
||||
}
|
||||
|
||||
// If we're changing directories, possibly adjust parameters (set
|
||||
// the current directory in configuration object)
|
||||
if (flg == FsTreeWalker::FtwDirEnter ||
|
||||
flg == FsTreeWalker::FtwDirReturn) {
|
||||
m_config->setKeyDir(fn);
|
||||
|
||||
// Set up filter/skipped patterns for this subtree.
|
||||
m_walker.setOnlyNames(m_config->getOnlyNames());
|
||||
m_walker.setSkippedNames(m_config->getSkippedNames());
|
||||
|
||||
if (flg == FsTreeWalker::FtwDirEnter || flg == FsTreeWalker::FtwDirReturn) {
|
||||
m_config->setKeyDir(fn);
|
||||
// Set up filter/skipped patterns for this subtree.
|
||||
m_walker.setOnlyNames(m_config->getOnlyNames());
|
||||
m_walker.setSkippedNames(m_config->getSkippedNames());
|
||||
// Adjust local fields from config for this subtree
|
||||
if (m_havelocalfields)
|
||||
localfieldsfromconf();
|
||||
|
||||
if (flg == FsTreeWalker::FtwDirReturn)
|
||||
return FsTreeWalker::FtwOk;
|
||||
if (m_havelocalfields)
|
||||
localfieldsfromconf();
|
||||
if (flg == FsTreeWalker::FtwDirReturn)
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
|
||||
#ifdef IDX_THREADS
|
||||
@ -608,7 +604,7 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
||||
if (m_iwqueue.put(tp)) {
|
||||
return FsTreeWalker::FtwOk;
|
||||
} else {
|
||||
return FsTreeWalker::FtwError;
|
||||
return FsTreeWalker::FtwError;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -616,10 +612,29 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
||||
return processonefile(m_config, fn, stp, m_localfields);
|
||||
}
|
||||
|
||||
// Start db update, either by queueing or by direct call
|
||||
bool FsIndexer::launchAddOrUpdate(const string& udi, const string& parent_udi,
|
||||
Rcl::Doc& doc)
|
||||
{
|
||||
#ifdef IDX_THREADS
|
||||
if (m_haveSplitQ) {
|
||||
DbUpdTask *tp = new DbUpdTask(udi, parent_udi, doc);
|
||||
if (!m_dwqueue.put(tp)) {
|
||||
LOGERR("processonefile: wqueue.put failed\n");
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return m_db->addOrUpdate(udi, parent_udi, doc);
|
||||
}
|
||||
|
||||
FsTreeWalker::Status
|
||||
FsIndexer::processonefile(RclConfig *config,
|
||||
const std::string &fn, const struct stat *stp,
|
||||
const map<string, string>& localfields)
|
||||
const std::string &fn, const struct stat *stp,
|
||||
const map<string, string>& localfields)
|
||||
{
|
||||
////////////////////
|
||||
// Check db up to date ? Doing this before file type
|
||||
@ -633,10 +648,9 @@ FsIndexer::processonefile(RclConfig *config,
|
||||
// excludedmimetypes, etc.
|
||||
config->setKeyDir(path_getfather(fn));
|
||||
|
||||
// Document signature. This is based on m/ctime and size and used
|
||||
// for the uptodate check (the value computed here is checked
|
||||
// against the stored one). Changing the computation forces a full
|
||||
// reindex of course.
|
||||
// File signature and up to date check. The sig is based on
|
||||
// m/ctime and size and the possibly new value is checked against
|
||||
// the stored one.
|
||||
string sig;
|
||||
makesig(stp, sig);
|
||||
string udi;
|
||||
@ -657,7 +671,7 @@ FsIndexer::processonefile(RclConfig *config,
|
||||
// miss the data update. We would have to store both the mtime and
|
||||
// the ctime to avoid this
|
||||
bool xattronly = m_detectxattronly && !m_db->inFullReset() &&
|
||||
existingDoc && needupdate && (stp->st_mtime < stp->st_ctime);
|
||||
existingDoc && needupdate && (stp->st_mtime < stp->st_ctime);
|
||||
|
||||
LOGDEB("processone: needupdate " << needupdate << " noretry " <<
|
||||
m_noretryfailed << " existing " << existingDoc << " oldsig [" <<
|
||||
@ -678,19 +692,19 @@ FsIndexer::processonefile(RclConfig *config,
|
||||
}
|
||||
|
||||
if (!needupdate) {
|
||||
LOGDEB0("processone: up to date: " << fn << "\n");
|
||||
if (m_updater) {
|
||||
LOGDEB0("processone: up to date: " << fn << "\n");
|
||||
if (m_updater) {
|
||||
#ifdef IDX_THREADS
|
||||
std::unique_lock<std::mutex> locker(m_updater->m_mutex);
|
||||
#endif
|
||||
// Status bar update, abort request etc.
|
||||
m_updater->status.fn = fn;
|
||||
++(m_updater->status.filesdone);
|
||||
if (!m_updater->update()) {
|
||||
return FsTreeWalker::FtwStop;
|
||||
}
|
||||
}
|
||||
return FsTreeWalker::FtwOk;
|
||||
// Status bar update, abort request etc.
|
||||
m_updater->status.fn = fn;
|
||||
++(m_updater->status.filesdone);
|
||||
if (!m_updater->update()) {
|
||||
return FsTreeWalker::FtwStop;
|
||||
}
|
||||
}
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
|
||||
LOGDEB0("processone: processing: [" <<
|
||||
@ -712,137 +726,135 @@ FsIndexer::processonefile(RclConfig *config,
|
||||
string mimetype;
|
||||
|
||||
if (!xattronly) {
|
||||
FileInterner interner(fn, stp, config, FileInterner::FIF_none);
|
||||
if (!interner.ok()) {
|
||||
// no indexing whatsoever in this case. This typically means that
|
||||
// indexallfilenames is not set
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
mimetype = interner.getMimetype();
|
||||
FileInterner interner(fn, stp, config, FileInterner::FIF_none);
|
||||
if (!interner.ok()) {
|
||||
// no indexing whatsoever in this case. This typically means that
|
||||
// indexallfilenames is not set
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
mimetype = interner.getMimetype();
|
||||
|
||||
interner.setMissingStore(m_missing);
|
||||
FileInterner::Status fis = FileInterner::FIAgain;
|
||||
bool hadNonNullIpath = false;
|
||||
while (fis == FileInterner::FIAgain) {
|
||||
doc.erase();
|
||||
try {
|
||||
fis = interner.internfile(doc);
|
||||
} catch (CancelExcept) {
|
||||
LOGERR("fsIndexer::processone: interrupted\n");
|
||||
return FsTreeWalker::FtwStop;
|
||||
}
|
||||
interner.setMissingStore(m_missing);
|
||||
FileInterner::Status fis = FileInterner::FIAgain;
|
||||
bool hadNonNullIpath = false;
|
||||
while (fis == FileInterner::FIAgain) {
|
||||
doc.erase();
|
||||
try {
|
||||
fis = interner.internfile(doc);
|
||||
} catch (CancelExcept) {
|
||||
LOGERR("fsIndexer::processone: interrupted\n");
|
||||
return FsTreeWalker::FtwStop;
|
||||
}
|
||||
|
||||
// We index at least the file name even if there was an error.
|
||||
// We'll change the signature to ensure that the indexing will
|
||||
// be retried every time.
|
||||
// We index at least the file name even if there was an error.
|
||||
// We'll change the signature to ensure that the indexing will
|
||||
// be retried every time.
|
||||
|
||||
// If there is an error and the base doc was already seen,
|
||||
// we're done
|
||||
if (fis == FileInterner::FIError && hadNullIpath)
|
||||
return FsTreeWalker::FtwOk;
|
||||
|
||||
// Internal access path for multi-document files. If empty, this is
|
||||
// for the main file.
|
||||
if (doc.ipath.empty()) {
|
||||
hadNullIpath = true;
|
||||
if (hadNonNullIpath) {
|
||||
// Note that only the filters can reliably compute
|
||||
// this. What we do is dependant of the doc order (if
|
||||
// we see the top doc first, we won't set the flag)
|
||||
doc.haschildren = true;
|
||||
}
|
||||
} else {
|
||||
hadNonNullIpath = true;
|
||||
}
|
||||
if (fis == FileInterner::FIError && hadNullIpath) {
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
|
||||
// Internal access path for multi-document files. If empty, this is
|
||||
// for the main file.
|
||||
if (doc.ipath.empty()) {
|
||||
hadNullIpath = true;
|
||||
if (hadNonNullIpath) {
|
||||
// Note that only the filters can reliably compute
|
||||
// this. What we do is dependant of the doc order (if
|
||||
// we see the top doc first, we won't set the flag)
|
||||
doc.haschildren = true;
|
||||
}
|
||||
} else {
|
||||
hadNonNullIpath = true;
|
||||
}
|
||||
make_udi(fn, doc.ipath, udi);
|
||||
|
||||
// Set file name, mod time and url if not done by
|
||||
// Set file name, mod time and url if not done by
|
||||
// filter. We used to set the top-level container file
|
||||
// name for all subdocs without a proper file name, but
|
||||
// this did not make sense (resulted in multiple not
|
||||
// useful hits on the subdocs when searching for the
|
||||
// file name).
|
||||
if (doc.fmtime.empty())
|
||||
doc.fmtime = ascdate;
|
||||
if (doc.url.empty())
|
||||
doc.url = path_pathtofileurl(fn);
|
||||
const string *fnp = 0;
|
||||
if (doc.ipath.empty()) {
|
||||
if (doc.fmtime.empty())
|
||||
doc.fmtime = ascdate;
|
||||
if (doc.url.empty())
|
||||
doc.url = path_pathtofileurl(fn);
|
||||
const string *fnp = 0;
|
||||
if (doc.ipath.empty()) {
|
||||
if (!doc.peekmeta(Rcl::Doc::keyfn, &fnp) || fnp->empty())
|
||||
doc.meta[Rcl::Doc::keyfn] = utf8fn;
|
||||
}
|
||||
// Set container file name for all docs, top or subdoc
|
||||
doc.meta[Rcl::Doc::keytcfn] = utf8fn;
|
||||
|
||||
doc.pcbytes = lltodecstr(stp->st_size);
|
||||
// Document signature for up to date checks. All subdocs inherit the
|
||||
// file's.
|
||||
doc.sig = sig;
|
||||
doc.pcbytes = lltodecstr(stp->st_size);
|
||||
// Document signature for up to date checks. All subdocs inherit the
|
||||
// file's.
|
||||
doc.sig = sig;
|
||||
|
||||
// If there was an error, ensure indexing will be
|
||||
// retried. This is for the once missing, later installed
|
||||
// filter case. It can make indexing much slower (if there are
|
||||
// myriads of such files, the ext script is executed for them
|
||||
// and fails every time)
|
||||
if (fis == FileInterner::FIError) {
|
||||
doc.sig += cstr_plus;
|
||||
}
|
||||
// If there was an error, ensure indexing will be
|
||||
// retried. This is for the once missing, later installed
|
||||
// filter case. It can make indexing much slower (if there are
|
||||
// myriads of such files, the ext script is executed for them
|
||||
// and fails every time)
|
||||
if (fis == FileInterner::FIError) {
|
||||
doc.sig += cstr_plus;
|
||||
}
|
||||
|
||||
// Possibly add fields from local config
|
||||
if (m_havelocalfields)
|
||||
setlocalfields(localfields, doc);
|
||||
// Possibly add fields from local config
|
||||
if (m_havelocalfields)
|
||||
setlocalfields(localfields, doc);
|
||||
|
||||
// Add document to database. If there is an ipath, add it
|
||||
// as a child of the file document.
|
||||
#ifdef IDX_THREADS
|
||||
if (m_haveSplitQ) {
|
||||
DbUpdTask *tp = new DbUpdTask(udi, doc.ipath.empty() ?
|
||||
cstr_null : parent_udi, doc);
|
||||
if (!m_dwqueue.put(tp)) {
|
||||
LOGERR("processonefile: wqueue.put failed\n");
|
||||
return FsTreeWalker::FtwError;
|
||||
}
|
||||
} else {
|
||||
#endif
|
||||
if (!m_db->addOrUpdate(udi, doc.ipath.empty() ?
|
||||
cstr_null : parent_udi, doc)) {
|
||||
return FsTreeWalker::FtwError;
|
||||
}
|
||||
#ifdef IDX_THREADS
|
||||
}
|
||||
#endif
|
||||
// Add document to database. If there is an ipath, add it
|
||||
// as a child of the file document.
|
||||
if (!launchAddOrUpdate(udi, doc.ipath.empty() ?
|
||||
cstr_null : parent_udi, doc)) {
|
||||
return FsTreeWalker::FtwError;
|
||||
}
|
||||
|
||||
// Tell what we are doing and check for interrupt request
|
||||
if (m_updater) {
|
||||
// Tell what we are doing and check for interrupt request
|
||||
if (m_updater) {
|
||||
#ifdef IDX_THREADS
|
||||
std::unique_lock<std::mutex> locker(m_updater->m_mutex);
|
||||
#endif
|
||||
++(m_updater->status.docsdone);
|
||||
if (m_updater->status.dbtotdocs < m_updater->status.docsdone)
|
||||
m_updater->status.dbtotdocs = m_updater->status.docsdone;
|
||||
m_updater->status.fn = fn;
|
||||
if (!doc.ipath.empty()) {
|
||||
m_updater->status.fn += "|" + doc.ipath;
|
||||
++(m_updater->status.docsdone);
|
||||
if (m_updater->status.dbtotdocs < m_updater->status.docsdone)
|
||||
m_updater->status.dbtotdocs = m_updater->status.docsdone;
|
||||
m_updater->status.fn = fn;
|
||||
if (!doc.ipath.empty()) {
|
||||
m_updater->status.fn += "|" + doc.ipath;
|
||||
} else {
|
||||
if (fis == FileInterner::FIError) {
|
||||
++(m_updater->status.fileerrors);
|
||||
}
|
||||
++(m_updater->status.filesdone);
|
||||
}
|
||||
if (!m_updater->update()) {
|
||||
return FsTreeWalker::FtwStop;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!m_updater->update()) {
|
||||
return FsTreeWalker::FtwStop;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If this doc existed and it's a container, recording for
|
||||
// possible subdoc purge (this will be used only if we don't do a
|
||||
// db-wide purge, e.g. if we're called from indexfiles()).
|
||||
LOGDEB2("processOnefile: existingDoc " << existingDoc <<
|
||||
" hadNonNullIpath " << hadNonNullIpath << "\n");
|
||||
if (existingDoc && hadNonNullIpath) {
|
||||
m_purgeCandidates.record(parent_udi);
|
||||
}
|
||||
if (fis == FileInterner::FIError) {
|
||||
// In case of error, avoid purging any existing
|
||||
// subdoc. For example on windows, this will avoid erasing
|
||||
// all the emails from a .ost because it is currently
|
||||
// locked by Outlook.
|
||||
LOGDEB("processonefile: internfile error, marking "
|
||||
"subdocs as existing\n");
|
||||
m_db->udiTreeMarkExisting(parent_udi);
|
||||
} else {
|
||||
// If this doc existed and it's a container, recording for
|
||||
// possible subdoc purge (this will be used only if we don't do a
|
||||
// db-wide purge, e.g. if we're called from indexfiles()).
|
||||
LOGDEB2("processOnefile: existingDoc " << existingDoc <<
|
||||
" hadNonNullIpath " << hadNonNullIpath << "\n");
|
||||
if (existingDoc && hadNonNullIpath) {
|
||||
m_purgeCandidates.record(parent_udi);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we had no instance with a null ipath, we create an empty
|
||||
@ -852,38 +864,30 @@ FsIndexer::processonefile(RclConfig *config,
|
||||
// If xattronly is set, ONLY the extattr metadata is valid and will be used
|
||||
// by the following step.
|
||||
if (xattronly || hadNullIpath == false) {
|
||||
LOGDEB("Creating empty doc for file or pure xattr update\n");
|
||||
Rcl::Doc fileDoc;
|
||||
if (xattronly) {
|
||||
map<string, string> xfields;
|
||||
reapXAttrs(config, fn, xfields);
|
||||
docFieldsFromXattrs(config, xfields, fileDoc);
|
||||
fileDoc.onlyxattr = true;
|
||||
} else {
|
||||
fileDoc.fmtime = ascdate;
|
||||
fileDoc.meta[Rcl::Doc::keyfn] =
|
||||
LOGDEB("Creating empty doc for file or pure xattr update\n");
|
||||
Rcl::Doc fileDoc;
|
||||
if (xattronly) {
|
||||
map<string, string> xfields;
|
||||
reapXAttrs(config, fn, xfields);
|
||||
docFieldsFromXattrs(config, xfields, fileDoc);
|
||||
fileDoc.onlyxattr = true;
|
||||
} else {
|
||||
fileDoc.fmtime = ascdate;
|
||||
fileDoc.meta[Rcl::Doc::keyfn] =
|
||||
fileDoc.meta[Rcl::Doc::keytcfn] = utf8fn;
|
||||
fileDoc.haschildren = true;
|
||||
fileDoc.mimetype = mimetype;
|
||||
fileDoc.url = path_pathtofileurl(fn);
|
||||
if (m_havelocalfields)
|
||||
setlocalfields(localfields, fileDoc);
|
||||
fileDoc.pcbytes = lltodecstr(stp->st_size);
|
||||
}
|
||||
fileDoc.haschildren = true;
|
||||
fileDoc.mimetype = mimetype;
|
||||
fileDoc.url = path_pathtofileurl(fn);
|
||||
if (m_havelocalfields)
|
||||
setlocalfields(localfields, fileDoc);
|
||||
fileDoc.pcbytes = lltodecstr(stp->st_size);
|
||||
}
|
||||
|
||||
fileDoc.sig = sig;
|
||||
fileDoc.sig = sig;
|
||||
|
||||
#ifdef IDX_THREADS
|
||||
if (m_haveSplitQ) {
|
||||
DbUpdTask *tp = new DbUpdTask(parent_udi, cstr_null, fileDoc);
|
||||
if (!m_dwqueue.put(tp))
|
||||
return FsTreeWalker::FtwError;
|
||||
else
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
#endif
|
||||
if (!m_db->addOrUpdate(parent_udi, cstr_null, fileDoc))
|
||||
return FsTreeWalker::FtwError;
|
||||
if (!launchAddOrUpdate(parent_udi, cstr_null, fileDoc)) {
|
||||
return FsTreeWalker::FtwError;
|
||||
}
|
||||
}
|
||||
|
||||
return FsTreeWalker::FtwOk;
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2009 J.F.Dockes
|
||||
/* Copyright (C) 2009-2019 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
@ -33,19 +33,23 @@ struct stat;
|
||||
class DbUpdTask;
|
||||
class InternfileTask;
|
||||
|
||||
namespace Rcl {
|
||||
class Doc;
|
||||
}
|
||||
|
||||
/** Index selected parts of the file system
|
||||
|
||||
Tree indexing: we inherits FsTreeWalkerCB so that, the processone()
|
||||
method is called by the file-system tree walk code for each file and
|
||||
directory. We keep all state needed while indexing, and finally call
|
||||
the methods to purge the db of stale entries and create the stemming
|
||||
databases.
|
||||
Tree indexing: we inherits FsTreeWalkerCB so that, the processone()
|
||||
method is called by the file-system tree walk code for each file and
|
||||
directory. We keep all state needed while indexing, and finally call
|
||||
the methods to purge the db of stale entries and create the stemming
|
||||
databases.
|
||||
|
||||
Single file(s) indexing: there are also calls to index or purge lists of files.
|
||||
No database purging or stem db updating in this case.
|
||||
Single file(s) indexing: there are also calls to index or purge lists of files.
|
||||
No database purging or stem db updating in this case.
|
||||
*/
|
||||
class FsIndexer : public FsTreeWalkerCB {
|
||||
public:
|
||||
public:
|
||||
/** Constructor does nothing but store parameters
|
||||
*
|
||||
* @param cnf Configuration data
|
||||
@ -76,39 +80,40 @@ class FsIndexer : public FsTreeWalkerCB {
|
||||
/** Make signature for file up to date checks */
|
||||
static void makesig(const struct stat *stp, string& out);
|
||||
|
||||
private:
|
||||
|
||||
private:
|
||||
|
||||
class PurgeCandidateRecorder {
|
||||
public:
|
||||
PurgeCandidateRecorder()
|
||||
: dorecord(false) {}
|
||||
void setRecord(bool onoff)
|
||||
{
|
||||
dorecord = onoff;
|
||||
}
|
||||
void record(const string& udi)
|
||||
{
|
||||
// This test does not need to be protected: the value is set at
|
||||
// init and never changed.
|
||||
if (!dorecord)
|
||||
return;
|
||||
PurgeCandidateRecorder()
|
||||
: dorecord(false) {}
|
||||
void setRecord(bool onoff) {
|
||||
dorecord = onoff;
|
||||
}
|
||||
void record(const string& udi) {
|
||||
// This test does not need to be protected: the value is set at
|
||||
// init and never changed.
|
||||
if (!dorecord)
|
||||
return;
|
||||
#ifdef IDX_THREADS
|
||||
std::unique_lock<std::mutex> locker(mutex);
|
||||
std::unique_lock<std::mutex> locker(mutex);
|
||||
#endif
|
||||
udis.push_back(udi);
|
||||
}
|
||||
const vector<string>& getCandidates()
|
||||
{
|
||||
return udis;
|
||||
}
|
||||
udis.push_back(udi);
|
||||
}
|
||||
const vector<string>& getCandidates() {
|
||||
return udis;
|
||||
}
|
||||
private:
|
||||
#ifdef IDX_THREADS
|
||||
std::mutex mutex;
|
||||
#endif
|
||||
bool dorecord;
|
||||
std::vector<std::string> udis;
|
||||
bool dorecord;
|
||||
std::vector<std::string> udis;
|
||||
};
|
||||
|
||||
bool launchAddOrUpdate(const std::string& udi,
|
||||
const std::string& parent_udi, Rcl::Doc& doc);
|
||||
|
||||
FsTreeWalker m_walker;
|
||||
RclConfig *m_config;
|
||||
Rcl::Db *m_db;
|
||||
@ -155,7 +160,7 @@ class FsIndexer : public FsTreeWalkerCB {
|
||||
string getDbDir() {return m_config->getDbDir();}
|
||||
FsTreeWalker::Status
|
||||
processonefile(RclConfig *config, const string &fn,
|
||||
const struct stat *, const map<string,string>& localfields);
|
||||
const struct stat *, const map<string,string>& localfields);
|
||||
};
|
||||
|
||||
#endif /* _fsindexer_h_included_ */
|
||||
|
||||
@ -2590,7 +2590,7 @@ bool Db::getSubDocs(const Doc &idoc, vector<Doc>& subdocs)
|
||||
// used for absent FS mountable volumes.
|
||||
bool Db::udiTreeMarkExisting(const string& udi)
|
||||
{
|
||||
LOGDEB("Db::udiTreeWalk: " << udi << endl);
|
||||
LOGDEB("Db::udiTreeMarkExisting: " << udi << endl);
|
||||
string wrapd = wrap_prefix(udi_prefix);
|
||||
string expr = udi + "*";
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user