diff --git a/src/index/fsindexer.cpp b/src/index/fsindexer.cpp index e851dcd9..a2a5423c 100644 --- a/src/index/fsindexer.cpp +++ b/src/index/fsindexer.cpp @@ -16,6 +16,8 @@ */ #include "autoconfig.h" +#include "fsindexer.h" + #include #include #include @@ -35,7 +37,6 @@ #include "rcldb.h" #include "readfile.h" #include "indexer.h" -#include "fsindexer.h" #include "transcode.h" #include "log.h" #include "internfile.h" @@ -107,7 +108,7 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc) m_dwqueue("Split", cnf->getThrConf(RclConfig::ThrSplit).first) #endif // IDX_THREADS { - LOGDEB1("FsIndexer::FsIndexer\n" ); + LOGDEB1("FsIndexer::FsIndexer\n"); m_havelocalfields = m_config->hasNameAnywhere("localfields"); m_config->getConfParam("detectxattronly", &m_detectxattronly); @@ -118,7 +119,7 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc) int internthreads = cnf->getThrConf(RclConfig::ThrIntern).second; if (internqlen >= 0) { if (!m_iwqueue.start(internthreads, FsIndexerInternfileWorker, this)) { - LOGERR("FsIndexer::FsIndexer: intern worker start failed\n" ); + LOGERR("FsIndexer::FsIndexer: intern worker start failed\n"); return; } m_haveInternQ = true; @@ -127,28 +128,31 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc) int splitthreads = cnf->getThrConf(RclConfig::ThrSplit).second; if (splitqlen >= 0) { if (!m_dwqueue.start(splitthreads, FsIndexerDbUpdWorker, this)) { - LOGERR("FsIndexer::FsIndexer: split worker start failed\n" ); + LOGERR("FsIndexer::FsIndexer: split worker start failed\n"); return; } m_haveSplitQ = true; } - LOGDEB("FsIndexer: threads: haveIQ " << (m_haveInternQ) << " iql " << (internqlen) << " iqts " << (internthreads) << " haveSQ " << (m_haveSplitQ) << " sql " << (splitqlen) << " sqts " << (splitthreads) << "\n" ); + LOGDEB("FsIndexer: threads: haveIQ " << m_haveInternQ << " iql " << + internqlen << " iqts " << internthreads << " haveSQ " << + m_haveSplitQ << " sql " << splitqlen << " sqts " << splitthreads << + "\n"); #endif // IDX_THREADS } FsIndexer::~FsIndexer() { - LOGDEB1("FsIndexer::~FsIndexer()\n" ); + LOGDEB1("FsIndexer::~FsIndexer()\n"); #ifdef IDX_THREADS void *status; if (m_haveInternQ) { status = m_iwqueue.setTerminateAndWait(); - LOGDEB0("FsIndexer: internfile wrkr status: " << (status) << " (1->ok)\n" ); + LOGDEB0("FsIndexer: internfile wrkr status: "<< status << " (1->ok)\n"); } if (m_haveSplitQ) { status = m_dwqueue.setTerminateAndWait(); - LOGDEB0("FsIndexer: dbupd worker status: " << (status) << " (1->ok)\n" ); + LOGDEB0("FsIndexer: dbupd worker status: " << status << " (1->ok)\n"); } delete m_stableconfig; #endif // IDX_THREADS @@ -161,13 +165,28 @@ bool FsIndexer::init() if (m_tdl.empty()) { m_tdl = m_config->getTopdirs(); if (m_tdl.empty()) { - LOGERR("FsIndexers: no topdirs list defined\n" ); + LOGERR("FsIndexers: no topdirs list defined\n"); return false; } } return true; } +// Check if path is either non-existing or an empty directory. +static bool path_empty(const string& path) +{ + if (path_isdir(path)) { + string reason; + std::set entries; + if (!readdir(path, reason, entries) || entries.empty()) { + return true; + } + return false; + } else { + return !path_exists(path); + } +} + // Recursively index each directory in the topdirs: bool FsIndexer::index(int flags) { @@ -190,14 +209,21 @@ bool FsIndexer::index(int flags) m_walker.setMaxDepth(2); } - for (vector::const_iterator it = m_tdl.begin(); - it != m_tdl.end(); it++) { - LOGDEB("FsIndexer::index: Indexing " << *it << " into " << + for (const auto& topdir : m_tdl) { + LOGDEB("FsIndexer::index: Indexing " << topdir << " into " << getDbDir() << "\n"); + // If a topdirs member appears to be not here or not mounted + // (empty), avoid deleting all the related index content by + // marking the current docs as existing. + if (path_empty(topdir)) { + m_db->udiTreeMarkExisting(topdir); + continue; + } + // Set the current directory in config so that subsequent // getConfParams() will get local values - m_config->setKeyDir(*it); + m_config->setKeyDir(topdir); // Adjust the "follow symlinks" option bool follow; @@ -214,8 +240,8 @@ bool FsIndexer::index(int flags) m_db->setAbstractParams(abslen, -1, -1); // Walk the directory tree - if (m_walker.walk(*it, *this) != FsTreeWalker::FtwOk) { - LOGERR("FsIndexer::index: error while indexing " << *it << + if (m_walker.walk(topdir, *this) != FsTreeWalker::FtwOk) { + LOGERR("FsIndexer::index: error while indexing " << topdir << ": " << m_walker.getReason() << "\n"); return false; } @@ -233,11 +259,12 @@ bool FsIndexer::index(int flags) string missing; m_missing->getMissingDescription(missing); if (!missing.empty()) { - LOGINFO("FsIndexer::index missing helper program(s):\n" << (missing) << "\n" ); + LOGINFO("FsIndexer::index missing helper program(s):\n" << + missing << "\n"); } m_config->storeMissingHelperDesc(missing); } - LOGINFO("fsindexer index time: " << (chron.millis()) << " mS\n" ); + LOGINFO("fsindexer index time: " << chron.millis() << " mS\n"); return true; } @@ -258,7 +285,8 @@ static bool matchesSkipped(const vector& tdl, for (vector::const_iterator it = tdl.begin(); it != tdl.end(); it++) { // the topdirs members are already canonized. - LOGDEB2("matchesSkipped: comparing ancestor [" << (mpath) << "] to topdir [" << (it) << "]\n" ); + LOGDEB2("matchesSkipped: comparing ancestor [" << mpath << + "] to topdir [" << it << "]\n"); if (!mpath.compare(*it)) { topdir = *it; goto goodpath; @@ -266,7 +294,7 @@ static bool matchesSkipped(const vector& tdl, } if (walker.inSkippedPaths(mpath, false)) { - LOGDEB("FsIndexer::indexFiles: skipping [" << (path) << "] (skpp)\n" ); + LOGDEB("FsIndexer::indexFiles: skipping [" << path << "] (skpp)\n"); return true; } @@ -280,12 +308,13 @@ static bool matchesSkipped(const vector& tdl, // path did not shorten, something is seriously amiss // (could be an assert actually) if (mpath.length() >= len) { - LOGERR("FsIndexer::indexFile: internal Error: path [" << (mpath) << "] did not shorten\n" ); + LOGERR("FsIndexer::indexFile: internal Error: path [" << mpath << + "] did not shorten\n"); return true; } } // We get there if neither topdirs nor skippedPaths tests matched - LOGDEB("FsIndexer::indexFiles: skipping [" << (path) << "] (ntd)\n" ); + LOGDEB("FsIndexer::indexFiles: skipping [" << path << "] (ntd)\n"); return true; goodpath: @@ -295,7 +324,7 @@ goodpath: while (mpath.length() >= topdir.length() && mpath.length() > 1) { string fn = path_getsimple(mpath); if (walker.inSkippedNames(fn)) { - LOGDEB("FsIndexer::indexFiles: skipping [" << (path) << "] (skpn)\n" ); + LOGDEB("FsIndexer::indexFiles: skipping [" << path << "] (skpn)\n"); return true; } @@ -319,7 +348,7 @@ goodpath: */ bool FsIndexer::indexFiles(list& files, int flags) { - LOGDEB("FsIndexer::indexFiles\n" ); + LOGDEB("FsIndexer::indexFiles\n"); m_noretryfailed = (flags & ConfIndexer::IxFNoRetryFailed) != 0; bool ret = false; @@ -337,7 +366,7 @@ bool FsIndexer::indexFiles(list& files, int flags) walker.setSkippedPaths(m_config->getSkippedPaths()); for (list::iterator it = files.begin(); it != files.end(); ) { - LOGDEB2("FsIndexer::indexFiles: [" << (it) << "]\n" ); + LOGDEB2("FsIndexer::indexFiles: [" << it << "]\n"); m_config->setKeyDir(path_getfather(*it)); if (m_havelocalfields) @@ -357,7 +386,7 @@ bool FsIndexer::indexFiles(list& files, int flags) struct stat stb; int ststat = path_fileprops(*it, &stb, follow); if (ststat != 0) { - LOGERR("FsIndexer::indexFiles: (l)stat " << *it << ": " << + LOGERR("FsIndexer::indexFiles: (l)stat " << *it << ": " << strerror(errno) << "\n"); it++; continue; @@ -365,7 +394,7 @@ bool FsIndexer::indexFiles(list& files, int flags) if (processone(*it, &stb, FsTreeWalker::FtwRegular) != FsTreeWalker::FtwOk) { - LOGERR("FsIndexer::indexFiles: processone failed\n" ); + LOGERR("FsIndexer::indexFiles: processone failed\n"); goto out; } it = files.erase(it); @@ -383,11 +412,11 @@ out: // Purge possible orphan documents if (ret == true) { - LOGDEB("Indexfiles: purging orphans\n" ); + LOGDEB("Indexfiles: purging orphans\n"); const vector& purgecandidates = m_purgeCandidates.getCandidates(); for (vector::const_iterator it = purgecandidates.begin(); it != purgecandidates.end(); it++) { - LOGDEB("Indexfiles: purging orphans for " << *it << "\n"); + LOGDEB("Indexfiles: purging orphans for " << *it << "\n"); m_db->purgeOrphans(*it); } #ifdef IDX_THREADS @@ -395,7 +424,7 @@ out: #endif // IDX_THREADS } - LOGDEB("FsIndexer::indexFiles: done\n" ); + LOGDEB("FsIndexer::indexFiles: done\n"); return ret; } @@ -403,7 +432,7 @@ out: /** Purge docs for given files out of the database */ bool FsIndexer::purgeFiles(list& files) { - LOGDEB("FsIndexer::purgeFiles\n" ); + LOGDEB("FsIndexer::purgeFiles\n"); bool ret = false; if (!init()) return false; @@ -415,7 +444,7 @@ bool FsIndexer::purgeFiles(list& files) // found or deleted, false only in case of actual error bool existed; if (!m_db->purgeFile(udi, &existed)) { - LOGERR("FsIndexer::purgeFiles: Database error\n" ); + LOGERR("FsIndexer::purgeFiles: Database error\n"); goto out; } // If we actually deleted something, take it off the list @@ -435,14 +464,14 @@ out: m_dwqueue.waitIdle(); m_db->waitUpdIdle(); #endif // IDX_THREADS - LOGDEB("FsIndexer::purgeFiles: done\n" ); + LOGDEB("FsIndexer::purgeFiles: done\n"); return ret; } // Local fields can be set for fs subtrees in the configuration file void FsIndexer::localfieldsfromconf() { - LOGDEB1("FsIndexer::localfieldsfromconf\n" ); + LOGDEB1("FsIndexer::localfieldsfromconf\n"); string sfields; m_config->getConfParam("localfields", sfields); @@ -462,7 +491,8 @@ void FsIndexer::localfieldsfromconf() it != nmlst.end(); it++) { string nm = m_config->fieldCanon(*it); attrs.get(*it, m_localfields[nm]); - LOGDEB2("FsIndexer::localfieldsfromconf: [" << (nm) << "]->[" << (m_localfields[nm]) << "]\n" ); + LOGDEB2("FsIndexer::localfieldsfromconf: [" << nm << "]->[" << + m_localfields[nm] << "]\n"); } } @@ -501,9 +531,9 @@ void *FsIndexerDbUpdWorker(void * fsp) tqp->workerExit(); return (void*)1; } - LOGDEB0("FsIndexerDbUpdWorker: task ql " << (int(qsz)) << "\n" ); + LOGDEB0("FsIndexerDbUpdWorker: task ql " << qsz << "\n"); if (!fip->m_db->addOrUpdate(tsk->udi, tsk->parent_udi, tsk->doc)) { - LOGERR("FsIndexerDbUpdWorker: addOrUpdate failed\n" ); + LOGERR("FsIndexerDbUpdWorker: addOrUpdate failed\n"); tqp->workerExit(); return (void*)0; } @@ -524,15 +554,15 @@ void *FsIndexerInternfileWorker(void * fsp) tqp->workerExit(); return (void*)1; } - LOGDEB0("FsIndexerInternfileWorker: task fn " << (tsk->fn) << "\n" ); + LOGDEB0("FsIndexerInternfileWorker: task fn " << tsk->fn << "\n"); if (fip->processonefile(&myconf, tsk->fn, &tsk->statbuf, tsk->localfields) != FsTreeWalker::FtwOk) { - LOGERR("FsIndexerInternfileWorker: processone failed\n" ); + LOGERR("FsIndexerInternfileWorker: processone failed\n"); tqp->workerExit(); return (void*)0; } - LOGDEB1("FsIndexerInternfileWorker: done fn " << (tsk->fn) << "\n" ); + LOGDEB1("FsIndexerInternfileWorker: done fn " << tsk->fn << "\n"); delete tsk; } } @@ -636,7 +666,9 @@ FsIndexer::processonefile(RclConfig *config, bool xattronly = m_detectxattronly && !m_db->inFullReset() && existingDoc && needupdate && (stp->st_mtime < stp->st_ctime); - LOGDEB("processone: needupdate " << (needupdate) << " noretry " << (m_noretryfailed) << " existing " << (existingDoc) << " oldsig [" << (oldsig) << "]\n" ); + LOGDEB("processone: needupdate " << needupdate << " noretry " << + m_noretryfailed << " existing " << existingDoc << " oldsig [" << + oldsig << "]\n"); // If noretryfailed is set, check for a file which previously // failed to index, and avoid re-processing it @@ -646,14 +678,14 @@ FsIndexer::processonefile(RclConfig *config, // actually changed, we always retry (maybe it was fixed) string nold = oldsig.substr(0, oldsig.size()-1); if (!nold.compare(sig)) { - LOGDEB("processone: not retrying previously failed file\n" ); + LOGDEB("processone: not retrying previously failed file\n"); m_db->setExistingFlags(udi, existingDoc); needupdate = false; } } if (!needupdate) { - LOGDEB0("processone: up to date: " << (fn) << "\n" ); + LOGDEB0("processone: up to date: " << fn << "\n"); if (m_updater) { #ifdef IDX_THREADS std::unique_lock locker(m_updater->m_mutex); @@ -668,8 +700,8 @@ FsIndexer::processonefile(RclConfig *config, return FsTreeWalker::FtwOk; } - LOGDEB0("processone: processing: [" << - displayableBytes(stp->st_size) << "] " << fn << "\n"); + LOGDEB0("processone: processing: [" << + displayableBytes(stp->st_size) << "] " << fn << "\n"); // Note that we used to do the full path here, but I ended up // believing that it made more sense to use only the file name @@ -703,7 +735,7 @@ FsIndexer::processonefile(RclConfig *config, try { fis = interner.internfile(doc); } catch (CancelExcept) { - LOGERR("fsIndexer::processone: interrupted\n" ); + LOGERR("fsIndexer::processone: interrupted\n"); return FsTreeWalker::FtwStop; } @@ -774,7 +806,7 @@ FsIndexer::processonefile(RclConfig *config, DbUpdTask *tp = new DbUpdTask(udi, doc.ipath.empty() ? cstr_null : parent_udi, doc); if (!m_dwqueue.put(tp)) { - LOGERR("processonefile: wqueue.put failed\n" ); + LOGERR("processonefile: wqueue.put failed\n"); return FsTreeWalker::FtwError; } } else { @@ -813,7 +845,8 @@ FsIndexer::processonefile(RclConfig *config, // If this doc existed and it's a container, recording for // possible subdoc purge (this will be used only if we don't do a // db-wide purge, e.g. if we're called from indexfiles()). - LOGDEB2("processOnefile: existingDoc " << (existingDoc) << " hadNonNullIpath " << (hadNonNullIpath) << "\n" ); + LOGDEB2("processOnefile: existingDoc " << existingDoc << + " hadNonNullIpath " << hadNonNullIpath << "\n"); if (existingDoc && hadNonNullIpath) { m_purgeCandidates.record(parent_udi); } @@ -826,7 +859,7 @@ FsIndexer::processonefile(RclConfig *config, // If xattronly is set, ONLY the extattr metadata is valid and will be used // by the following step. if (xattronly || hadNullIpath == false) { - LOGDEB("Creating empty doc for file or pure xattr update\n" ); + LOGDEB("Creating empty doc for file or pure xattr update\n"); Rcl::Doc fileDoc; if (xattronly) { map xfields; diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 73d75ba7..fdabb9f8 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -46,7 +46,6 @@ using namespace std; #include "rclutil.h" #include "smallut.h" #include "chrono.h" -#include "utf8iter.h" #include "searchdata.h" #include "rclquery.h" #include "rclquery_p.h" @@ -144,21 +143,6 @@ static inline string make_parentterm(const string& udi) return pterm; } -static void utf8truncate(string& s, int maxlen) -{ - if (s.size() <= string::size_type(maxlen)) { - return; - } - Utf8Iter iter(s); - string::size_type pos = 0; - while (iter++ != string::npos) - if (iter.getBpos() < string::size_type(maxlen)) { - pos = iter.getBpos(); - } - - s.erase(pos); -} - Db::Native::Native(Db *db) : m_rcldb(db), m_isopen(false), m_iswritable(false), m_noversionwrite(false) @@ -2023,11 +2007,10 @@ void Db::i_setExistingFlags(const string& udi, unsigned int docid) LOGERR("Rcl::Db::needUpdate: can't get subdocs\n"); return; } - for (vector::iterator it = docids.begin(); - it != docids.end(); it++) { - if (*it < updated.size()) { - LOGDEB2("Db::needUpdate: docid " << (*it) << " set\n"); - updated[*it] = true; + for (auto docid : docids) { + if (docid < updated.size()) { + LOGDEB2("Db::needUpdate: docid " << docid << " set\n"); + updated[docid] = true; } } } @@ -2556,5 +2539,40 @@ bool Db::getSubDocs(const Doc &idoc, vector& subdocs) return false; } -} // End namespace Rcl +// Walk an UDI section (all UDIs beginning with input prefix), and +// mark all docs and subdocs as existing. Caller beware: Makes sense +// or not depending on the UDI structure for the data store. In practise, +// used for absent FS mountable volumes. +bool Db::udiTreeMarkExisting(const string& udi) +{ + LOGDEB("Db::udiTreeWalk: " << udi << endl); + string wrapd = wrap_prefix(udi_prefix); + string expr = udi + "*"; +#ifdef IDX_THREADS + std::unique_lock lock(m_ndb->m_mutex); +#endif + + bool ret = m_ndb->idxTermMatch_p( + int(ET_WILD), cstr_null, expr, + [this, &udi](const string& term, Xapian::termcount, Xapian::doccount) { + Xapian::PostingIterator docid; + XAPTRY(docid = m_ndb->xrdb.postlist_begin(term), m_ndb->xrdb, + m_reason); + if (!m_reason.empty()) { + LOGERR("Db::udiTreeWalk: xapian::postlist_begin failed: " << + m_reason << "\n"); + return false; + } + if (docid == m_ndb->xrdb.postlist_end(term)) { + LOGDEB("Db::udiTreeWalk:no doc for " << term << " ??\n"); + return false; + } + i_setExistingFlags(udi, *docid); + LOGDEB("Db::udiTreeWalk: uniterm: " << term << endl); + return true; + }, wrapd); + return ret; +} + +} // End namespace Rcl diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index f3a62e92..b6876bcc 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -483,7 +483,14 @@ public: // Use empty fn for no synonyms bool setSynGroupsFile(const std::string& fn); - + + // Mark all documents with an UDI having input as prefix as + // existing. Only works if the UDIs for the store are + // hierarchical of course. Used by FsIndexer to avoid purging + // files for a topdir which is on a removable file system and + // currently unmounted (topdir does not exist or is empty. + bool udiTreeMarkExisting(const string& udi); + /* This has to be public for access by embedded Query::Native */ Native *m_ndb; private: diff --git a/src/rcldb/rcldb_p.h b/src/rcldb/rcldb_p.h index a21d3e38..4484c1e4 100644 --- a/src/rcldb/rcldb_p.h +++ b/src/rcldb/rcldb_p.h @@ -181,6 +181,13 @@ class Db::Native { */ bool subDocs(const string &udi, int idxi, vector& docids); + /** Matcher */ + bool idxTermMatch_p(int typ_sens,const string &lang,const std::string &term, + std::function client, + const string& field); + /** Check if a page position list is defined */ bool hasPages(Xapian::docid id); diff --git a/src/rcldb/rclterms.cpp b/src/rcldb/rclterms.cpp index 8b50566a..baaa56c4 100644 --- a/src/rcldb/rclterms.cpp +++ b/src/rcldb/rclterms.cpp @@ -330,34 +330,14 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term, return true; } -// Second phase of wildcard/regexp term expansion after case/diac -// expansion: expand against main index terms -bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root, - TermMatchResult& res, int max, const string& field) +bool Db::Native::idxTermMatch_p( + int typ, const string &lang, const string &root, + std::function client, + const string& prefix) { - int typ = matchTypeTp(typ_sens); - LOGDEB1("Db::idxTermMatch: typ " << tmtptostr(typ) << " lang [" << - lang << "] term [" << root << "] max " << max << " field [" << - field << "] init res.size " << res.entries.size() << "\n"); - - if (typ == ET_STEM) { - LOGFATAL("RCLDB: internal error: idxTermMatch called with ET_STEM\n"); - abort(); - } - - Xapian::Database xdb = m_ndb->xrdb; - - string prefix; - if (!field.empty()) { - const FieldTraits *ftp = 0; - if (!fieldToTraits(field, &ftp, true) || ftp->pfx.empty()) { - LOGDEB("Db::termMatch: field is not indexed (no prefix): [" << - field << "]\n"); - } else { - prefix = wrap_prefix(ftp->pfx); - } - } - res.prefix = prefix; + Xapian::Database xdb = xrdb; std::shared_ptr matcher; if (typ == ET_REGEXP) { @@ -418,35 +398,74 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root, if (matcher && !matcher->match(term)) continue; - res.entries.push_back( - TermMatchEntry(ixterm, xdb.get_collection_freq(ixterm), - it.get_termfreq())); - - // The problem with truncating here is that this is done - // alphabetically and we may not keep the most frequent - // terms. OTOH, not doing it may stall the program if - // we are walking the whole term list. We compromise - // by cutting at 2*max - if (max > 0 && ++rcnt >= 2*max) + if (!client(ixterm, xdb.get_collection_freq(ixterm), + it.get_termfreq())) { break; + } } - m_reason.erase(); + m_rcldb->m_reason.erase(); break; } catch (const Xapian::DatabaseModifiedError &e) { - m_reason = e.get_msg(); + m_rcldb->m_reason = e.get_msg(); xdb.reopen(); continue; - } XCATCHERROR(m_reason); + } XCATCHERROR(m_rcldb->m_reason); break; } - if (!m_reason.empty()) { - LOGERR("termMatch: " << m_reason << "\n"); + if (!m_rcldb->m_reason.empty()) { + LOGERR("termMatch: " << m_rcldb->m_reason << "\n"); return false; } return true; } + +// Second phase of wildcard/regexp term expansion after case/diac +// expansion: expand against main index terms +bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root, + TermMatchResult& res, int max, const string& field) +{ + int typ = matchTypeTp(typ_sens); + LOGDEB1("Db::idxTermMatch: typ " << tmtptostr(typ) << " lang [" << + lang << "] term [" << root << "] max " << max << " field [" << + field << "] init res.size " << res.entries.size() << "\n"); + + if (typ == ET_STEM) { + LOGFATAL("RCLDB: internal error: idxTermMatch called with ET_STEM\n"); + abort(); + } + string prefix; + if (!field.empty()) { + const FieldTraits *ftp = 0; + if (!fieldToTraits(field, &ftp, true) || ftp->pfx.empty()) { + LOGDEB("Db::termMatch: field is not indexed (no prefix): [" << + field << "]\n"); + } else { + prefix = wrap_prefix(ftp->pfx); + } + } + res.prefix = prefix; + + int rcnt = 0; + bool ret = m_ndb->idxTermMatch_p( + typ, lang, root, + [&res, &rcnt, max](const string& term, + Xapian::termcount cf, Xapian::doccount tf) { + res.entries.push_back(TermMatchEntry(term, cf, tf)); + // The problem with truncating here is that this is done + // alphabetically and we may not keep the most frequent + // terms. OTOH, not doing it may stall the program if + // we are walking the whole term list. We compromise + // by cutting at 2*max + if (max > 0 && ++rcnt >= 2*max) + return false; + return true; + }, prefix); + + return ret; +} + /** Term list walking. */ class TermIter { public: