From ee8c5410bdb882d94aade225178f36c4ac7cfdf6 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Fri, 21 Jun 2019 17:18:15 +0200 Subject: [PATCH] Avoid purging existing subdocuments on file indexing error (e.g.: maybe a file lock issue that will go away) --- src/index/fsindexer.cpp | 592 ++++++++++++++++++++-------------------- src/index/fsindexer.h | 69 ++--- src/rcldb/rcldb.cpp | 2 +- 3 files changed, 336 insertions(+), 327 deletions(-) diff --git a/src/index/fsindexer.cpp b/src/index/fsindexer.cpp index e697f3a8..a1f23f80 100644 --- a/src/index/fsindexer.cpp +++ b/src/index/fsindexer.cpp @@ -56,10 +56,10 @@ class DbUpdTask { public: // Take some care to avoid sharing string data (if string impl is cow) DbUpdTask(const string& u, const string& p, const Rcl::Doc& d) - : udi(u.begin(), u.end()), parent_udi(p.begin(), p.end()) - { - d.copyto(&doc); - } + : udi(u.begin(), u.end()), parent_udi(p.begin(), p.end()) + { + d.copyto(&doc); + } string udi; string parent_udi; Rcl::Doc doc; @@ -70,11 +70,11 @@ class InternfileTask { public: // Take some care to avoid sharing string data (if string impl is cow) InternfileTask(const std::string &f, const struct stat *i_stp, - map lfields) - : fn(f.begin(), f.end()), statbuf(*i_stp) - { - map_ss_cp_noshr(lfields, &localfields); - } + map lfields) + : fn(f.begin(), f.end()), statbuf(*i_stp) + { + map_ss_cp_noshr(lfields, &localfields); + } string fn; struct stat statbuf; map localfields; @@ -91,12 +91,12 @@ class FSIFIMissingStore : public FIMissingStore { #endif public: virtual void addMissing(const string& prog, const string& mt) - { + { #ifdef IDX_THREADS - std::unique_lock locker(m_mutex); + std::unique_lock locker(m_mutex); #endif - FIMissingStore::addMissing(prog, mt); - } + FIMissingStore::addMissing(prog, mt); + } }; FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc) @@ -118,20 +118,20 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc) int internqlen = cnf->getThrConf(RclConfig::ThrIntern).first; int internthreads = cnf->getThrConf(RclConfig::ThrIntern).second; if (internqlen >= 0) { - if (!m_iwqueue.start(internthreads, FsIndexerInternfileWorker, this)) { - LOGERR("FsIndexer::FsIndexer: intern worker start failed\n"); - return; - } - m_haveInternQ = true; + if (!m_iwqueue.start(internthreads, FsIndexerInternfileWorker, this)) { + LOGERR("FsIndexer::FsIndexer: intern worker start failed\n"); + return; + } + m_haveInternQ = true; } int splitqlen = cnf->getThrConf(RclConfig::ThrSplit).first; int splitthreads = cnf->getThrConf(RclConfig::ThrSplit).second; if (splitqlen >= 0) { - if (!m_dwqueue.start(splitthreads, FsIndexerDbUpdWorker, this)) { - LOGERR("FsIndexer::FsIndexer: split worker start failed\n"); - return; - } - m_haveSplitQ = true; + if (!m_dwqueue.start(splitthreads, FsIndexerDbUpdWorker, this)) { + LOGERR("FsIndexer::FsIndexer: split worker start failed\n"); + return; + } + m_haveSplitQ = true; } LOGDEB("FsIndexer: threads: haveIQ " << m_haveInternQ << " iql " << internqlen << " iqts " << internthreads << " haveSQ " << @@ -147,12 +147,12 @@ FsIndexer::~FsIndexer() #ifdef IDX_THREADS void *status; if (m_haveInternQ) { - status = m_iwqueue.setTerminateAndWait(); - LOGDEB0("FsIndexer: internfile wrkr status: "<< status << " (1->ok)\n"); + status = m_iwqueue.setTerminateAndWait(); + LOGDEB0("FsIndexer: internfile wrkr status: "<< status << " (1->ok)\n"); } if (m_haveSplitQ) { - status = m_dwqueue.setTerminateAndWait(); - LOGDEB0("FsIndexer: dbupd worker status: " << status << " (1->ok)\n"); + status = m_dwqueue.setTerminateAndWait(); + LOGDEB0("FsIndexer: dbupd worker status: " << status << " (1->ok)\n"); } delete m_stableconfig; #endif // IDX_THREADS @@ -179,23 +179,23 @@ bool FsIndexer::index(int flags) m_noretryfailed = (flags & ConfIndexer::IxFNoRetryFailed) != 0; Chrono chron; if (!init()) - return false; + return false; if (m_updater) { #ifdef IDX_THREADS std::unique_lock locker(m_updater->m_mutex); #endif - m_updater->status.dbtotdocs = m_db->docCnt(); + m_updater->status.dbtotdocs = m_db->docCnt(); } m_walker.setSkippedPaths(m_config->getSkippedPaths()); if (quickshallow) { - m_walker.setOpts(m_walker.getOpts() | FsTreeWalker::FtwSkipDotFiles); - m_walker.setMaxDepth(2); + m_walker.setOpts(m_walker.getOpts() | FsTreeWalker::FtwSkipDotFiles); + m_walker.setMaxDepth(2); } for (const auto& topdir : m_tdl) { - LOGDEB("FsIndexer::index: Indexing " << topdir << " into " << + LOGDEB("FsIndexer::index: Indexing " << topdir << " into " << getDbDir() << "\n"); // If a topdirs member appears to be not here or not mounted @@ -206,48 +206,48 @@ bool FsIndexer::index(int flags) continue; } - // Set the current directory in config so that subsequent - // getConfParams() will get local values - m_config->setKeyDir(topdir); + // Set the current directory in config so that subsequent + // getConfParams() will get local values + m_config->setKeyDir(topdir); - // Adjust the "follow symlinks" option - bool follow; - int opts = m_walker.getOpts(); - if (m_config->getConfParam("followLinks", &follow) && follow) { - opts |= FsTreeWalker::FtwFollow; - } else { - opts &= ~FsTreeWalker::FtwFollow; - } - m_walker.setOpts(opts); + // Adjust the "follow symlinks" option + bool follow; + int opts = m_walker.getOpts(); + if (m_config->getConfParam("followLinks", &follow) && follow) { + opts |= FsTreeWalker::FtwFollow; + } else { + opts &= ~FsTreeWalker::FtwFollow; + } + m_walker.setOpts(opts); - int abslen; - if (m_config->getConfParam("idxabsmlen", &abslen)) - m_db->setAbstractParams(abslen, -1, -1); + int abslen; + if (m_config->getConfParam("idxabsmlen", &abslen)) + m_db->setAbstractParams(abslen, -1, -1); - // Walk the directory tree - if (m_walker.walk(topdir, *this) != FsTreeWalker::FtwOk) { - LOGERR("FsIndexer::index: error while indexing " << topdir << + // Walk the directory tree + if (m_walker.walk(topdir, *this) != FsTreeWalker::FtwOk) { + LOGERR("FsIndexer::index: error while indexing " << topdir << ": " << m_walker.getReason() << "\n"); - return false; - } + return false; + } } #ifdef IDX_THREADS if (m_haveInternQ) - m_iwqueue.waitIdle(); + m_iwqueue.waitIdle(); if (m_haveSplitQ) - m_dwqueue.waitIdle(); + m_dwqueue.waitIdle(); m_db->waitUpdIdle(); #endif // IDX_THREADS if (m_missing) { - string missing; - m_missing->getMissingDescription(missing); - if (!missing.empty()) { - LOGINFO("FsIndexer::index missing helper program(s):\n" << + string missing; + m_missing->getMissingDescription(missing); + if (!missing.empty()) { + LOGINFO("FsIndexer::index missing helper program(s):\n" << missing << "\n"); - } - m_config->storeMissingHelperDesc(missing); + } + m_config->storeMissingHelperDesc(missing); } LOGINFO("fsindexer index time: " << chron.millis() << " mS\n"); return true; @@ -342,7 +342,7 @@ bool FsIndexer::indexFiles(list& files, int flags) int abslen; if (m_config->getConfParam("idxabsmlen", &abslen)) - m_db->setAbstractParams(abslen, -1, -1); + m_db->setAbstractParams(abslen, -1, -1); m_purgeCandidates.setRecord(true); @@ -354,29 +354,29 @@ bool FsIndexer::indexFiles(list& files, int flags) LOGDEB2("FsIndexer::indexFiles: [" << it << "]\n"); m_config->setKeyDir(path_getfather(*it)); - if (m_havelocalfields) - localfieldsfromconf(); + if (m_havelocalfields) + localfieldsfromconf(); - bool follow = false; - m_config->getConfParam("followLinks", &follow); + bool follow = false; + m_config->getConfParam("followLinks", &follow); walker.setOnlyNames(m_config->getOnlyNames()); walker.setSkippedNames(m_config->getSkippedNames()); - // Check path against indexed areas and skipped names/paths + // Check path against indexed areas and skipped names/paths if (!(flags & ConfIndexer::IxFIgnoreSkip) && - matchesSkipped(m_tdl, walker, *it)) { + matchesSkipped(m_tdl, walker, *it)) { it++; - continue; + continue; } - struct stat stb; - int ststat = path_fileprops(*it, &stb, follow); - if (ststat != 0) { - LOGERR("FsIndexer::indexFiles: (l)stat " << *it << ": " << + struct stat stb; + int ststat = path_fileprops(*it, &stb, follow); + if (ststat != 0) { + LOGERR("FsIndexer::indexFiles: (l)stat " << *it << ": " << strerror(errno) << "\n"); it++; - continue; - } + continue; + } if (!(flags & ConfIndexer::IxFIgnoreSkip) && (S_ISREG(stb.st_mode) || S_ISLNK(stb.st_mode))) { if (!walker.inOnlyNames(path_getsimple(*it))) { @@ -384,11 +384,11 @@ bool FsIndexer::indexFiles(list& files, int flags) continue; } } - if (processone(*it, &stb, FsTreeWalker::FtwRegular) != - FsTreeWalker::FtwOk) { - LOGERR("FsIndexer::indexFiles: processone failed\n"); - goto out; - } + if (processone(*it, &stb, FsTreeWalker::FtwRegular) != + FsTreeWalker::FtwOk) { + LOGERR("FsIndexer::indexFiles: processone failed\n"); + goto out; + } it = files.erase(it); } @@ -396,23 +396,23 @@ bool FsIndexer::indexFiles(list& files, int flags) out: #ifdef IDX_THREADS if (m_haveInternQ) - m_iwqueue.waitIdle(); + m_iwqueue.waitIdle(); if (m_haveSplitQ) - m_dwqueue.waitIdle(); + m_dwqueue.waitIdle(); m_db->waitUpdIdle(); #endif // IDX_THREADS // Purge possible orphan documents if (ret == true) { - LOGDEB("Indexfiles: purging orphans\n"); - const vector& purgecandidates = m_purgeCandidates.getCandidates(); - for (vector::const_iterator it = purgecandidates.begin(); - it != purgecandidates.end(); it++) { - LOGDEB("Indexfiles: purging orphans for " << *it << "\n"); - m_db->purgeOrphans(*it); - } + LOGDEB("Indexfiles: purging orphans\n"); + const vector& purgecandidates = m_purgeCandidates.getCandidates(); + for (vector::const_iterator it = purgecandidates.begin(); + it != purgecandidates.end(); it++) { + LOGDEB("Indexfiles: purging orphans for " << *it << "\n"); + m_db->purgeOrphans(*it); + } #ifdef IDX_THREADS - m_db->waitUpdIdle(); + m_db->waitUpdIdle(); #endif // IDX_THREADS } @@ -427,18 +427,18 @@ bool FsIndexer::purgeFiles(list& files) LOGDEB("FsIndexer::purgeFiles\n"); bool ret = false; if (!init()) - return false; + return false; for (list::iterator it = files.begin(); it != files.end(); ) { - string udi; - make_udi(*it, cstr_null, udi); + string udi; + make_udi(*it, cstr_null, udi); // rcldb::purgefile returns true if the udi was either not // found or deleted, false only in case of actual error bool existed; - if (!m_db->purgeFile(udi, &existed)) { - LOGERR("FsIndexer::purgeFiles: Database error\n"); - goto out; - } + if (!m_db->purgeFile(udi, &existed)) { + LOGERR("FsIndexer::purgeFiles: Database error\n"); + goto out; + } // If we actually deleted something, take it off the list if (existed) { it = files.erase(it); @@ -451,9 +451,9 @@ bool FsIndexer::purgeFiles(list& files) out: #ifdef IDX_THREADS if (m_haveInternQ) - m_iwqueue.waitIdle(); + m_iwqueue.waitIdle(); if (m_haveSplitQ) - m_dwqueue.waitIdle(); + m_dwqueue.waitIdle(); m_db->waitUpdIdle(); #endif // IDX_THREADS LOGDEB("FsIndexer::purgeFiles: done\n"); @@ -468,12 +468,12 @@ void FsIndexer::localfieldsfromconf() string sfields; m_config->getConfParam("localfields", sfields); if (!sfields.compare(m_slocalfields)) - return; + return; m_slocalfields = sfields; m_localfields.clear(); if (sfields.empty()) - return; + return; string value; ConfSimple attrs; @@ -481,9 +481,9 @@ void FsIndexer::localfieldsfromconf() vector nmlst = attrs.getNames(cstr_null); for (vector::const_iterator it = nmlst.begin(); it != nmlst.end(); it++) { - string nm = m_config->fieldCanon(*it); - attrs.get(*it, m_localfields[nm]); - LOGDEB2("FsIndexer::localfieldsfromconf: [" << nm << "]->[" << + string nm = m_config->fieldCanon(*it); + attrs.get(*it, m_localfields[nm]); + LOGDEB2("FsIndexer::localfieldsfromconf: [" << nm << "]->[" << m_localfields[nm] << "]\n"); } } @@ -491,11 +491,11 @@ void FsIndexer::localfieldsfromconf() void FsIndexer::setlocalfields(const map& fields, Rcl::Doc& doc) { for (map::const_iterator it = fields.begin(); - it != fields.end(); it++) { + it != fields.end(); it++) { // Being chosen by the user, localfields override values from // the filter. The key is already canonic (see // localfieldsfromconf()) - doc.meta[it->first] = it->second; + doc.meta[it->first] = it->second; } } @@ -518,18 +518,18 @@ void *FsIndexerDbUpdWorker(void * fsp) DbUpdTask *tsk; for (;;) { - size_t qsz; - if (!tqp->take(&tsk, &qsz)) { - tqp->workerExit(); - return (void*)1; - } - LOGDEB0("FsIndexerDbUpdWorker: task ql " << qsz << "\n"); - if (!fip->m_db->addOrUpdate(tsk->udi, tsk->parent_udi, tsk->doc)) { - LOGERR("FsIndexerDbUpdWorker: addOrUpdate failed\n"); - tqp->workerExit(); - return (void*)0; - } - delete tsk; + size_t qsz; + if (!tqp->take(&tsk, &qsz)) { + tqp->workerExit(); + return (void*)1; + } + LOGDEB0("FsIndexerDbUpdWorker: task ql " << qsz << "\n"); + if (!fip->m_db->addOrUpdate(tsk->udi, tsk->parent_udi, tsk->doc)) { + LOGERR("FsIndexerDbUpdWorker: addOrUpdate failed\n"); + tqp->workerExit(); + return (void*)0; + } + delete tsk; } } @@ -542,20 +542,20 @@ void *FsIndexerInternfileWorker(void * fsp) InternfileTask *tsk = 0; for (;;) { - if (!tqp->take(&tsk)) { - tqp->workerExit(); - return (void*)1; - } - LOGDEB0("FsIndexerInternfileWorker: task fn " << tsk->fn << "\n"); - if (fip->processonefile(&myconf, tsk->fn, &tsk->statbuf, - tsk->localfields) != - FsTreeWalker::FtwOk) { - LOGERR("FsIndexerInternfileWorker: processone failed\n"); - tqp->workerExit(); - return (void*)0; - } - LOGDEB1("FsIndexerInternfileWorker: done fn " << tsk->fn << "\n"); - delete tsk; + if (!tqp->take(&tsk)) { + tqp->workerExit(); + return (void*)1; + } + LOGDEB0("FsIndexerInternfileWorker: task fn " << tsk->fn << "\n"); + if (fip->processonefile(&myconf, tsk->fn, &tsk->statbuf, + tsk->localfields) != + FsTreeWalker::FtwOk) { + LOGERR("FsIndexerInternfileWorker: processone failed\n"); + tqp->workerExit(); + return (void*)0; + } + LOGDEB1("FsIndexerInternfileWorker: done fn " << tsk->fn << "\n"); + delete tsk; } } #endif // IDX_THREADS @@ -573,33 +573,29 @@ void *FsIndexerInternfileWorker(void * fsp) /// mostly contains pretty raw utf8 data. FsTreeWalker::Status FsIndexer::processone(const std::string &fn, const struct stat *stp, - FsTreeWalker::CbFlag flg) + FsTreeWalker::CbFlag flg) { if (m_updater) { #ifdef IDX_THREADS std::unique_lock locker(m_updater->m_mutex); #endif - if (!m_updater->update()) { - return FsTreeWalker::FtwStop; - } + if (!m_updater->update()) { + return FsTreeWalker::FtwStop; + } } // If we're changing directories, possibly adjust parameters (set // the current directory in configuration object) - if (flg == FsTreeWalker::FtwDirEnter || - flg == FsTreeWalker::FtwDirReturn) { - m_config->setKeyDir(fn); - - // Set up filter/skipped patterns for this subtree. - m_walker.setOnlyNames(m_config->getOnlyNames()); - m_walker.setSkippedNames(m_config->getSkippedNames()); - + if (flg == FsTreeWalker::FtwDirEnter || flg == FsTreeWalker::FtwDirReturn) { + m_config->setKeyDir(fn); + // Set up filter/skipped patterns for this subtree. + m_walker.setOnlyNames(m_config->getOnlyNames()); + m_walker.setSkippedNames(m_config->getSkippedNames()); // Adjust local fields from config for this subtree - if (m_havelocalfields) - localfieldsfromconf(); - - if (flg == FsTreeWalker::FtwDirReturn) - return FsTreeWalker::FtwOk; + if (m_havelocalfields) + localfieldsfromconf(); + if (flg == FsTreeWalker::FtwDirReturn) + return FsTreeWalker::FtwOk; } #ifdef IDX_THREADS @@ -608,7 +604,7 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, if (m_iwqueue.put(tp)) { return FsTreeWalker::FtwOk; } else { - return FsTreeWalker::FtwError; + return FsTreeWalker::FtwError; } } #endif @@ -616,10 +612,29 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, return processonefile(m_config, fn, stp, m_localfields); } +// Start db update, either by queueing or by direct call +bool FsIndexer::launchAddOrUpdate(const string& udi, const string& parent_udi, + Rcl::Doc& doc) +{ +#ifdef IDX_THREADS + if (m_haveSplitQ) { + DbUpdTask *tp = new DbUpdTask(udi, parent_udi, doc); + if (!m_dwqueue.put(tp)) { + LOGERR("processonefile: wqueue.put failed\n"); + return false; + } else { + return true; + } + } +#endif + + return m_db->addOrUpdate(udi, parent_udi, doc); +} + FsTreeWalker::Status FsIndexer::processonefile(RclConfig *config, - const std::string &fn, const struct stat *stp, - const map& localfields) + const std::string &fn, const struct stat *stp, + const map& localfields) { //////////////////// // Check db up to date ? Doing this before file type @@ -633,10 +648,9 @@ FsIndexer::processonefile(RclConfig *config, // excludedmimetypes, etc. config->setKeyDir(path_getfather(fn)); - // Document signature. This is based on m/ctime and size and used - // for the uptodate check (the value computed here is checked - // against the stored one). Changing the computation forces a full - // reindex of course. + // File signature and up to date check. The sig is based on + // m/ctime and size and the possibly new value is checked against + // the stored one. string sig; makesig(stp, sig); string udi; @@ -657,7 +671,7 @@ FsIndexer::processonefile(RclConfig *config, // miss the data update. We would have to store both the mtime and // the ctime to avoid this bool xattronly = m_detectxattronly && !m_db->inFullReset() && - existingDoc && needupdate && (stp->st_mtime < stp->st_ctime); + existingDoc && needupdate && (stp->st_mtime < stp->st_ctime); LOGDEB("processone: needupdate " << needupdate << " noretry " << m_noretryfailed << " existing " << existingDoc << " oldsig [" << @@ -678,19 +692,19 @@ FsIndexer::processonefile(RclConfig *config, } if (!needupdate) { - LOGDEB0("processone: up to date: " << fn << "\n"); - if (m_updater) { + LOGDEB0("processone: up to date: " << fn << "\n"); + if (m_updater) { #ifdef IDX_THREADS std::unique_lock locker(m_updater->m_mutex); #endif - // Status bar update, abort request etc. - m_updater->status.fn = fn; - ++(m_updater->status.filesdone); - if (!m_updater->update()) { - return FsTreeWalker::FtwStop; - } - } - return FsTreeWalker::FtwOk; + // Status bar update, abort request etc. + m_updater->status.fn = fn; + ++(m_updater->status.filesdone); + if (!m_updater->update()) { + return FsTreeWalker::FtwStop; + } + } + return FsTreeWalker::FtwOk; } LOGDEB0("processone: processing: [" << @@ -712,137 +726,135 @@ FsIndexer::processonefile(RclConfig *config, string mimetype; if (!xattronly) { - FileInterner interner(fn, stp, config, FileInterner::FIF_none); - if (!interner.ok()) { - // no indexing whatsoever in this case. This typically means that - // indexallfilenames is not set - return FsTreeWalker::FtwOk; - } - mimetype = interner.getMimetype(); + FileInterner interner(fn, stp, config, FileInterner::FIF_none); + if (!interner.ok()) { + // no indexing whatsoever in this case. This typically means that + // indexallfilenames is not set + return FsTreeWalker::FtwOk; + } + mimetype = interner.getMimetype(); - interner.setMissingStore(m_missing); - FileInterner::Status fis = FileInterner::FIAgain; - bool hadNonNullIpath = false; - while (fis == FileInterner::FIAgain) { - doc.erase(); - try { - fis = interner.internfile(doc); - } catch (CancelExcept) { - LOGERR("fsIndexer::processone: interrupted\n"); - return FsTreeWalker::FtwStop; - } + interner.setMissingStore(m_missing); + FileInterner::Status fis = FileInterner::FIAgain; + bool hadNonNullIpath = false; + while (fis == FileInterner::FIAgain) { + doc.erase(); + try { + fis = interner.internfile(doc); + } catch (CancelExcept) { + LOGERR("fsIndexer::processone: interrupted\n"); + return FsTreeWalker::FtwStop; + } - // We index at least the file name even if there was an error. - // We'll change the signature to ensure that the indexing will - // be retried every time. + // We index at least the file name even if there was an error. + // We'll change the signature to ensure that the indexing will + // be retried every time. // If there is an error and the base doc was already seen, // we're done - if (fis == FileInterner::FIError && hadNullIpath) - return FsTreeWalker::FtwOk; - - // Internal access path for multi-document files. If empty, this is - // for the main file. - if (doc.ipath.empty()) { - hadNullIpath = true; - if (hadNonNullIpath) { - // Note that only the filters can reliably compute - // this. What we do is dependant of the doc order (if - // we see the top doc first, we won't set the flag) - doc.haschildren = true; - } - } else { - hadNonNullIpath = true; - } + if (fis == FileInterner::FIError && hadNullIpath) { + return FsTreeWalker::FtwOk; + } + + // Internal access path for multi-document files. If empty, this is + // for the main file. + if (doc.ipath.empty()) { + hadNullIpath = true; + if (hadNonNullIpath) { + // Note that only the filters can reliably compute + // this. What we do is dependant of the doc order (if + // we see the top doc first, we won't set the flag) + doc.haschildren = true; + } + } else { + hadNonNullIpath = true; + } make_udi(fn, doc.ipath, udi); - // Set file name, mod time and url if not done by + // Set file name, mod time and url if not done by // filter. We used to set the top-level container file // name for all subdocs without a proper file name, but // this did not make sense (resulted in multiple not // useful hits on the subdocs when searching for the // file name). - if (doc.fmtime.empty()) - doc.fmtime = ascdate; - if (doc.url.empty()) - doc.url = path_pathtofileurl(fn); - const string *fnp = 0; - if (doc.ipath.empty()) { + if (doc.fmtime.empty()) + doc.fmtime = ascdate; + if (doc.url.empty()) + doc.url = path_pathtofileurl(fn); + const string *fnp = 0; + if (doc.ipath.empty()) { if (!doc.peekmeta(Rcl::Doc::keyfn, &fnp) || fnp->empty()) doc.meta[Rcl::Doc::keyfn] = utf8fn; } // Set container file name for all docs, top or subdoc doc.meta[Rcl::Doc::keytcfn] = utf8fn; - doc.pcbytes = lltodecstr(stp->st_size); - // Document signature for up to date checks. All subdocs inherit the - // file's. - doc.sig = sig; + doc.pcbytes = lltodecstr(stp->st_size); + // Document signature for up to date checks. All subdocs inherit the + // file's. + doc.sig = sig; - // If there was an error, ensure indexing will be - // retried. This is for the once missing, later installed - // filter case. It can make indexing much slower (if there are - // myriads of such files, the ext script is executed for them - // and fails every time) - if (fis == FileInterner::FIError) { - doc.sig += cstr_plus; - } + // If there was an error, ensure indexing will be + // retried. This is for the once missing, later installed + // filter case. It can make indexing much slower (if there are + // myriads of such files, the ext script is executed for them + // and fails every time) + if (fis == FileInterner::FIError) { + doc.sig += cstr_plus; + } - // Possibly add fields from local config - if (m_havelocalfields) - setlocalfields(localfields, doc); + // Possibly add fields from local config + if (m_havelocalfields) + setlocalfields(localfields, doc); - // Add document to database. If there is an ipath, add it - // as a child of the file document. -#ifdef IDX_THREADS - if (m_haveSplitQ) { - DbUpdTask *tp = new DbUpdTask(udi, doc.ipath.empty() ? - cstr_null : parent_udi, doc); - if (!m_dwqueue.put(tp)) { - LOGERR("processonefile: wqueue.put failed\n"); - return FsTreeWalker::FtwError; - } - } else { -#endif - if (!m_db->addOrUpdate(udi, doc.ipath.empty() ? - cstr_null : parent_udi, doc)) { - return FsTreeWalker::FtwError; - } -#ifdef IDX_THREADS - } -#endif + // Add document to database. If there is an ipath, add it + // as a child of the file document. + if (!launchAddOrUpdate(udi, doc.ipath.empty() ? + cstr_null : parent_udi, doc)) { + return FsTreeWalker::FtwError; + } - // Tell what we are doing and check for interrupt request - if (m_updater) { + // Tell what we are doing and check for interrupt request + if (m_updater) { #ifdef IDX_THREADS std::unique_lock locker(m_updater->m_mutex); #endif - ++(m_updater->status.docsdone); - if (m_updater->status.dbtotdocs < m_updater->status.docsdone) - m_updater->status.dbtotdocs = m_updater->status.docsdone; - m_updater->status.fn = fn; - if (!doc.ipath.empty()) { - m_updater->status.fn += "|" + doc.ipath; + ++(m_updater->status.docsdone); + if (m_updater->status.dbtotdocs < m_updater->status.docsdone) + m_updater->status.dbtotdocs = m_updater->status.docsdone; + m_updater->status.fn = fn; + if (!doc.ipath.empty()) { + m_updater->status.fn += "|" + doc.ipath; } else { if (fis == FileInterner::FIError) { ++(m_updater->status.fileerrors); } ++(m_updater->status.filesdone); } - if (!m_updater->update()) { - return FsTreeWalker::FtwStop; - } - } - } + if (!m_updater->update()) { + return FsTreeWalker::FtwStop; + } + } + } - // If this doc existed and it's a container, recording for - // possible subdoc purge (this will be used only if we don't do a - // db-wide purge, e.g. if we're called from indexfiles()). - LOGDEB2("processOnefile: existingDoc " << existingDoc << - " hadNonNullIpath " << hadNonNullIpath << "\n"); - if (existingDoc && hadNonNullIpath) { - m_purgeCandidates.record(parent_udi); - } + if (fis == FileInterner::FIError) { + // In case of error, avoid purging any existing + // subdoc. For example on windows, this will avoid erasing + // all the emails from a .ost because it is currently + // locked by Outlook. + LOGDEB("processonefile: internfile error, marking " + "subdocs as existing\n"); + m_db->udiTreeMarkExisting(parent_udi); + } else { + // If this doc existed and it's a container, recording for + // possible subdoc purge (this will be used only if we don't do a + // db-wide purge, e.g. if we're called from indexfiles()). + LOGDEB2("processOnefile: existingDoc " << existingDoc << + " hadNonNullIpath " << hadNonNullIpath << "\n"); + if (existingDoc && hadNonNullIpath) { + m_purgeCandidates.record(parent_udi); + } + } } // If we had no instance with a null ipath, we create an empty @@ -852,38 +864,30 @@ FsIndexer::processonefile(RclConfig *config, // If xattronly is set, ONLY the extattr metadata is valid and will be used // by the following step. if (xattronly || hadNullIpath == false) { - LOGDEB("Creating empty doc for file or pure xattr update\n"); - Rcl::Doc fileDoc; - if (xattronly) { - map xfields; - reapXAttrs(config, fn, xfields); - docFieldsFromXattrs(config, xfields, fileDoc); - fileDoc.onlyxattr = true; - } else { - fileDoc.fmtime = ascdate; - fileDoc.meta[Rcl::Doc::keyfn] = + LOGDEB("Creating empty doc for file or pure xattr update\n"); + Rcl::Doc fileDoc; + if (xattronly) { + map xfields; + reapXAttrs(config, fn, xfields); + docFieldsFromXattrs(config, xfields, fileDoc); + fileDoc.onlyxattr = true; + } else { + fileDoc.fmtime = ascdate; + fileDoc.meta[Rcl::Doc::keyfn] = fileDoc.meta[Rcl::Doc::keytcfn] = utf8fn; - fileDoc.haschildren = true; - fileDoc.mimetype = mimetype; - fileDoc.url = path_pathtofileurl(fn); - if (m_havelocalfields) - setlocalfields(localfields, fileDoc); - fileDoc.pcbytes = lltodecstr(stp->st_size); - } + fileDoc.haschildren = true; + fileDoc.mimetype = mimetype; + fileDoc.url = path_pathtofileurl(fn); + if (m_havelocalfields) + setlocalfields(localfields, fileDoc); + fileDoc.pcbytes = lltodecstr(stp->st_size); + } - fileDoc.sig = sig; + fileDoc.sig = sig; -#ifdef IDX_THREADS - if (m_haveSplitQ) { - DbUpdTask *tp = new DbUpdTask(parent_udi, cstr_null, fileDoc); - if (!m_dwqueue.put(tp)) - return FsTreeWalker::FtwError; - else - return FsTreeWalker::FtwOk; - } -#endif - if (!m_db->addOrUpdate(parent_udi, cstr_null, fileDoc)) - return FsTreeWalker::FtwError; + if (!launchAddOrUpdate(parent_udi, cstr_null, fileDoc)) { + return FsTreeWalker::FtwError; + } } return FsTreeWalker::FtwOk; diff --git a/src/index/fsindexer.h b/src/index/fsindexer.h index 24fb9c5f..6f42da86 100644 --- a/src/index/fsindexer.h +++ b/src/index/fsindexer.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2009 J.F.Dockes +/* Copyright (C) 2009-2019 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -33,19 +33,23 @@ struct stat; class DbUpdTask; class InternfileTask; +namespace Rcl { +class Doc; +} + /** Index selected parts of the file system -Tree indexing: we inherits FsTreeWalkerCB so that, the processone() -method is called by the file-system tree walk code for each file and -directory. We keep all state needed while indexing, and finally call -the methods to purge the db of stale entries and create the stemming -databases. + Tree indexing: we inherits FsTreeWalkerCB so that, the processone() + method is called by the file-system tree walk code for each file and + directory. We keep all state needed while indexing, and finally call + the methods to purge the db of stale entries and create the stemming + databases. -Single file(s) indexing: there are also calls to index or purge lists of files. -No database purging or stem db updating in this case. + Single file(s) indexing: there are also calls to index or purge lists of files. + No database purging or stem db updating in this case. */ class FsIndexer : public FsTreeWalkerCB { - public: +public: /** Constructor does nothing but store parameters * * @param cnf Configuration data @@ -76,39 +80,40 @@ class FsIndexer : public FsTreeWalkerCB { /** Make signature for file up to date checks */ static void makesig(const struct stat *stp, string& out); - private: + +private: class PurgeCandidateRecorder { public: - PurgeCandidateRecorder() - : dorecord(false) {} - void setRecord(bool onoff) - { - dorecord = onoff; - } - void record(const string& udi) - { - // This test does not need to be protected: the value is set at - // init and never changed. - if (!dorecord) - return; + PurgeCandidateRecorder() + : dorecord(false) {} + void setRecord(bool onoff) { + dorecord = onoff; + } + void record(const string& udi) { + // This test does not need to be protected: the value is set at + // init and never changed. + if (!dorecord) + return; #ifdef IDX_THREADS - std::unique_lock locker(mutex); + std::unique_lock locker(mutex); #endif - udis.push_back(udi); - } - const vector& getCandidates() - { - return udis; - } + udis.push_back(udi); + } + const vector& getCandidates() { + return udis; + } private: #ifdef IDX_THREADS std::mutex mutex; #endif - bool dorecord; - std::vector udis; + bool dorecord; + std::vector udis; }; + bool launchAddOrUpdate(const std::string& udi, + const std::string& parent_udi, Rcl::Doc& doc); + FsTreeWalker m_walker; RclConfig *m_config; Rcl::Db *m_db; @@ -155,7 +160,7 @@ class FsIndexer : public FsTreeWalkerCB { string getDbDir() {return m_config->getDbDir();} FsTreeWalker::Status processonefile(RclConfig *config, const string &fn, - const struct stat *, const map& localfields); + const struct stat *, const map& localfields); }; #endif /* _fsindexer_h_included_ */ diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index dbd6db53..d5df5c4c 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -2590,7 +2590,7 @@ bool Db::getSubDocs(const Doc &idoc, vector& subdocs) // used for absent FS mountable volumes. bool Db::udiTreeMarkExisting(const string& udi) { - LOGDEB("Db::udiTreeWalk: " << udi << endl); + LOGDEB("Db::udiTreeMarkExisting: " << udi << endl); string wrapd = wrap_prefix(udi_prefix); string expr = udi + "*";