Avoid purging documents from absent mountable volumes

2019-02-03 18:51:52 +01:00 · 2019-02-03 18:51:52 +01:00 · 399c633efd
commit 399c633efd
parent c2691f68bf
5 changed files with 197 additions and 113 deletions
--- a/src/index/fsindexer.cpp
+++ b/src/index/fsindexer.cpp
@ -16,6 +16,8 @@
 */
 #include "autoconfig.h"

+#include "fsindexer.h"
+
 #include <stdio.h>
 #include <errno.h>
 #include <cstring>
@ -35,7 +37,6 @@
 #include "rcldb.h"
 #include "readfile.h"
 #include "indexer.h"
-#include "fsindexer.h"
 #include "transcode.h"
 #include "log.h"
 #include "internfile.h"
@ -107,7 +108,7 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc)
      m_dwqueue("Split", cnf->getThrConf(RclConfig::ThrSplit).first)
 #endif // IDX_THREADS
 {
-    LOGDEB1("FsIndexer::FsIndexer\n" );
+    LOGDEB1("FsIndexer::FsIndexer\n");
    m_havelocalfields = m_config->hasNameAnywhere("localfields");
    m_config->getConfParam("detectxattronly", &m_detectxattronly);
    
@ -118,7 +119,7 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc)
    int internthreads = cnf->getThrConf(RclConfig::ThrIntern).second;
    if (internqlen >= 0) {
 	if (!m_iwqueue.start(internthreads, FsIndexerInternfileWorker, this)) {
-	    LOGERR("FsIndexer::FsIndexer: intern worker start failed\n" );
+	    LOGERR("FsIndexer::FsIndexer: intern worker start failed\n");
 	    return;
 	}
 	m_haveInternQ = true;
@ -127,28 +128,31 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc)
    int splitthreads = cnf->getThrConf(RclConfig::ThrSplit).second;
    if (splitqlen >= 0) {
 	if (!m_dwqueue.start(splitthreads, FsIndexerDbUpdWorker, this)) {
-	    LOGERR("FsIndexer::FsIndexer: split worker start failed\n" );
+	    LOGERR("FsIndexer::FsIndexer: split worker start failed\n");
 	    return;
 	}
 	m_haveSplitQ = true;
    }
-    LOGDEB("FsIndexer: threads: haveIQ "  << (m_haveInternQ) << " iql "  << (internqlen) << " iqts "  << (internthreads) << " haveSQ "  << (m_haveSplitQ) << " sql "  << (splitqlen) << " sqts "  << (splitthreads) << "\n" );
+    LOGDEB("FsIndexer: threads: haveIQ " << m_haveInternQ << " iql " <<
+           internqlen << " iqts " << internthreads << " haveSQ " <<
+           m_haveSplitQ << " sql " << splitqlen << " sqts " << splitthreads <<
+           "\n");
 #endif // IDX_THREADS
 }

 FsIndexer::~FsIndexer() 
 {
-    LOGDEB1("FsIndexer::~FsIndexer()\n" );
+    LOGDEB1("FsIndexer::~FsIndexer()\n");

 #ifdef IDX_THREADS
    void *status;
    if (m_haveInternQ) {
 	status = m_iwqueue.setTerminateAndWait();
-	LOGDEB0("FsIndexer: internfile wrkr status: "  << (status) << " (1->ok)\n" );
+	LOGDEB0("FsIndexer: internfile wrkr status: "<< status << " (1->ok)\n");
    }
    if (m_haveSplitQ) {
 	status = m_dwqueue.setTerminateAndWait();
-	LOGDEB0("FsIndexer: dbupd worker status: "  << (status) << " (1->ok)\n" );
+	LOGDEB0("FsIndexer: dbupd worker status: " << status << " (1->ok)\n");
    }
    delete m_stableconfig;
 #endif // IDX_THREADS
@ -161,13 +165,28 @@ bool FsIndexer::init()
    if (m_tdl.empty()) {
        m_tdl = m_config->getTopdirs();
        if (m_tdl.empty()) {
-            LOGERR("FsIndexers: no topdirs list defined\n" );
+            LOGERR("FsIndexers: no topdirs list defined\n");
            return false;
        }
    }
    return true;
 }

+// Check if path is either non-existing or an empty directory.
+static bool path_empty(const string& path)
+{
+    if (path_isdir(path)) {
+        string reason;
+        std::set<string> entries;
+        if (!readdir(path, reason, entries) || entries.empty()) {
+            return true;
+        }
+        return false;
+    } else {
+        return !path_exists(path);
+    }
+}
+
 // Recursively index each directory in the topdirs:
 bool FsIndexer::index(int flags)
 {
@ -190,14 +209,21 @@ bool FsIndexer::index(int flags)
 	m_walker.setMaxDepth(2);
    }

-    for (vector<string>::const_iterator it = m_tdl.begin();
-	 it != m_tdl.end(); it++) {
-	LOGDEB("FsIndexer::index: Indexing " << *it << " into " <<
+    for (const auto& topdir : m_tdl) {
+	LOGDEB("FsIndexer::index: Indexing " << topdir << " into " <<
               getDbDir() << "\n");

+        // If a topdirs member appears to be not here or not mounted
+        // (empty), avoid deleting all the related index content by
+        // marking the current docs as existing.
+        if (path_empty(topdir)) {
+            m_db->udiTreeMarkExisting(topdir);
+            continue;
+        }
+        
 	// Set the current directory in config so that subsequent
 	// getConfParams() will get local values
-	m_config->setKeyDir(*it);
+	m_config->setKeyDir(topdir);

 	// Adjust the "follow symlinks" option
 	bool follow;
@ -214,8 +240,8 @@ bool FsIndexer::index(int flags)
 	    m_db->setAbstractParams(abslen, -1, -1);

 	// Walk the directory tree
-	if (m_walker.walk(*it, *this) != FsTreeWalker::FtwOk) {
-	    LOGERR("FsIndexer::index: error while indexing "  << *it <<
+	if (m_walker.walk(topdir, *this) != FsTreeWalker::FtwOk) {
+	    LOGERR("FsIndexer::index: error while indexing " << topdir <<
                   ": " << m_walker.getReason() << "\n");
 	    return false;
 	}
@ -233,11 +259,12 @@ bool FsIndexer::index(int flags)
 	string missing;
 	m_missing->getMissingDescription(missing);
 	if (!missing.empty()) {
-	    LOGINFO("FsIndexer::index missing helper program(s):\n"  << (missing) << "\n" );
+	    LOGINFO("FsIndexer::index missing helper program(s):\n" <<
+                    missing << "\n");
 	}
 	m_config->storeMissingHelperDesc(missing);
    }
-    LOGINFO("fsindexer index time:  "  << (chron.millis()) << " mS\n" );
+    LOGINFO("fsindexer index time:  " << chron.millis() << " mS\n");
    return true;
 }

@ -258,7 +285,8 @@ static bool matchesSkipped(const vector<string>& tdl,
        for (vector<string>::const_iterator it = tdl.begin();  
             it != tdl.end(); it++) {
            // the topdirs members are already canonized.
-            LOGDEB2("matchesSkipped: comparing ancestor ["  << (mpath) << "] to topdir ["  << (it) << "]\n" );
+            LOGDEB2("matchesSkipped: comparing ancestor [" << mpath <<
+                    "] to topdir [" << it << "]\n");
            if (!mpath.compare(*it)) {
                topdir = *it;
                goto goodpath;
@ -266,7 +294,7 @@ static bool matchesSkipped(const vector<string>& tdl,
        }

        if (walker.inSkippedPaths(mpath, false)) {
-            LOGDEB("FsIndexer::indexFiles: skipping ["  << (path) << "] (skpp)\n" );
+            LOGDEB("FsIndexer::indexFiles: skipping [" << path << "] (skpp)\n");
            return true;
        }

@ -280,12 +308,13 @@ static bool matchesSkipped(const vector<string>& tdl,
        // path did not shorten, something is seriously amiss
        // (could be an assert actually)
        if (mpath.length() >= len) {
-            LOGERR("FsIndexer::indexFile: internal Error: path ["  << (mpath) << "] did not shorten\n" );
+            LOGERR("FsIndexer::indexFile: internal Error: path [" << mpath <<
+                   "] did not shorten\n");
            return true;
        }
    }
    // We get there if neither topdirs nor skippedPaths tests matched
-    LOGDEB("FsIndexer::indexFiles: skipping ["  << (path) << "] (ntd)\n" );
+    LOGDEB("FsIndexer::indexFiles: skipping [" << path << "] (ntd)\n");
    return true;

 goodpath:
@ -295,7 +324,7 @@ goodpath:
    while (mpath.length() >= topdir.length() && mpath.length() > 1) {
        string fn = path_getsimple(mpath);
        if (walker.inSkippedNames(fn)) {
-            LOGDEB("FsIndexer::indexFiles: skipping ["  << (path) << "] (skpn)\n" );
+            LOGDEB("FsIndexer::indexFiles: skipping [" << path << "] (skpn)\n");
            return true;
        }

@ -319,7 +348,7 @@ goodpath:
 */
 bool FsIndexer::indexFiles(list<string>& files, int flags)
 {
-    LOGDEB("FsIndexer::indexFiles\n" );
+    LOGDEB("FsIndexer::indexFiles\n");
    m_noretryfailed = (flags & ConfIndexer::IxFNoRetryFailed) != 0;
    bool ret = false;

@ -337,7 +366,7 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
    walker.setSkippedPaths(m_config->getSkippedPaths());

    for (list<string>::iterator it = files.begin(); it != files.end(); ) {
-        LOGDEB2("FsIndexer::indexFiles: ["  << (it) << "]\n" );
+        LOGDEB2("FsIndexer::indexFiles: [" << it << "]\n");

        m_config->setKeyDir(path_getfather(*it));
 	if (m_havelocalfields)
@ -357,7 +386,7 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
 	struct stat stb;
 	int ststat = path_fileprops(*it, &stb, follow);
 	if (ststat != 0) {
-	    LOGERR("FsIndexer::indexFiles: (l)stat "  << *it << ": "  <<
+	    LOGERR("FsIndexer::indexFiles: (l)stat " << *it << ": " <<
                   strerror(errno) << "\n");
            it++; 
 	    continue;
@ -365,7 +394,7 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)

 	if (processone(*it, &stb, FsTreeWalker::FtwRegular) != 
 	    FsTreeWalker::FtwOk) {
-	    LOGERR("FsIndexer::indexFiles: processone failed\n" );
+	    LOGERR("FsIndexer::indexFiles: processone failed\n");
 	    goto out;
 	}
        it = files.erase(it);
@ -383,11 +412,11 @@ out:

    // Purge possible orphan documents
    if (ret == true) {
-	LOGDEB("Indexfiles: purging orphans\n" );
+	LOGDEB("Indexfiles: purging orphans\n");
 	const vector<string>& purgecandidates = m_purgeCandidates.getCandidates();
 	for (vector<string>::const_iterator it = purgecandidates.begin();
 	     it != purgecandidates.end(); it++) {
-	    LOGDEB("Indexfiles: purging orphans for "  << *it << "\n");
+	    LOGDEB("Indexfiles: purging orphans for " << *it << "\n");
 	    m_db->purgeOrphans(*it);
 	}
 #ifdef IDX_THREADS
@ -395,7 +424,7 @@ out:
 #endif // IDX_THREADS
    }

-    LOGDEB("FsIndexer::indexFiles: done\n" );
+    LOGDEB("FsIndexer::indexFiles: done\n");
    return ret;
 }

@ -403,7 +432,7 @@ out:
 /** Purge docs for given files out of the database */
 bool FsIndexer::purgeFiles(list<string>& files)
 {
-    LOGDEB("FsIndexer::purgeFiles\n" );
+    LOGDEB("FsIndexer::purgeFiles\n");
    bool ret = false;
    if (!init())
 	return false;
@ -415,7 +444,7 @@ bool FsIndexer::purgeFiles(list<string>& files)
        // found or deleted, false only in case of actual error
        bool existed;
 	if (!m_db->purgeFile(udi, &existed)) {
-	    LOGERR("FsIndexer::purgeFiles: Database error\n" );
+	    LOGERR("FsIndexer::purgeFiles: Database error\n");
 	    goto out;
 	}
        // If we actually deleted something, take it off the list
@ -435,14 +464,14 @@ out:
 	m_dwqueue.waitIdle();
    m_db->waitUpdIdle();
 #endif // IDX_THREADS
-    LOGDEB("FsIndexer::purgeFiles: done\n" );
+    LOGDEB("FsIndexer::purgeFiles: done\n");
    return ret;
 }

 // Local fields can be set for fs subtrees in the configuration file 
 void FsIndexer::localfieldsfromconf()
 {
-    LOGDEB1("FsIndexer::localfieldsfromconf\n" );
+    LOGDEB1("FsIndexer::localfieldsfromconf\n");

    string sfields;
    m_config->getConfParam("localfields", sfields);
@ -462,7 +491,8 @@ void FsIndexer::localfieldsfromconf()
         it != nmlst.end(); it++) {
 	string nm = m_config->fieldCanon(*it);
 	attrs.get(*it, m_localfields[nm]);
-	LOGDEB2("FsIndexer::localfieldsfromconf: ["  << (nm) << "]->["  << (m_localfields[nm]) << "]\n" );
+	LOGDEB2("FsIndexer::localfieldsfromconf: [" << nm << "]->[" <<
+                m_localfields[nm] << "]\n");
    }
 }

@ -501,9 +531,9 @@ void *FsIndexerDbUpdWorker(void * fsp)
 	    tqp->workerExit();
 	    return (void*)1;
 	}
-	LOGDEB0("FsIndexerDbUpdWorker: task ql "  << (int(qsz)) << "\n" );
+	LOGDEB0("FsIndexerDbUpdWorker: task ql " << qsz << "\n");
 	if (!fip->m_db->addOrUpdate(tsk->udi, tsk->parent_udi, tsk->doc)) {
-	    LOGERR("FsIndexerDbUpdWorker: addOrUpdate failed\n" );
+	    LOGERR("FsIndexerDbUpdWorker: addOrUpdate failed\n");
 	    tqp->workerExit();
 	    return (void*)0;
 	}
@ -524,15 +554,15 @@ void *FsIndexerInternfileWorker(void * fsp)
 	    tqp->workerExit();
 	    return (void*)1;
 	}
-	LOGDEB0("FsIndexerInternfileWorker: task fn "  << (tsk->fn) << "\n" );
+	LOGDEB0("FsIndexerInternfileWorker: task fn " << tsk->fn << "\n");
 	if (fip->processonefile(&myconf, tsk->fn, &tsk->statbuf,
 				tsk->localfields) !=
 	    FsTreeWalker::FtwOk) {
-	    LOGERR("FsIndexerInternfileWorker: processone failed\n" );
+	    LOGERR("FsIndexerInternfileWorker: processone failed\n");
 	    tqp->workerExit();
 	    return (void*)0;
 	}
-	LOGDEB1("FsIndexerInternfileWorker: done fn "  << (tsk->fn) << "\n" );
+	LOGDEB1("FsIndexerInternfileWorker: done fn " << tsk->fn << "\n");
 	delete tsk;
    }
 }
@ -636,7 +666,9 @@ FsIndexer::processonefile(RclConfig *config,
    bool xattronly = m_detectxattronly && !m_db->inFullReset() && 
 	existingDoc && needupdate && (stp->st_mtime < stp->st_ctime);

-    LOGDEB("processone: needupdate "  << (needupdate) << " noretry "  << (m_noretryfailed) << " existing "  << (existingDoc) << " oldsig ["  << (oldsig) << "]\n" );
+    LOGDEB("processone: needupdate " << needupdate << " noretry " <<
+           m_noretryfailed << " existing " << existingDoc << " oldsig [" <<
+           oldsig << "]\n");

    // If noretryfailed is set, check for a file which previously
    // failed to index, and avoid re-processing it
@ -646,14 +678,14 @@ FsIndexer::processonefile(RclConfig *config,
        // actually changed, we always retry (maybe it was fixed)
        string nold = oldsig.substr(0, oldsig.size()-1);
        if (!nold.compare(sig)) {
-            LOGDEB("processone: not retrying previously failed file\n" );
+            LOGDEB("processone: not retrying previously failed file\n");
            m_db->setExistingFlags(udi, existingDoc);
            needupdate = false;
        }
    }

    if (!needupdate) {
-	LOGDEB0("processone: up to date: "  << (fn) << "\n" );
+	LOGDEB0("processone: up to date: " << fn << "\n");
 	if (m_updater) {
 #ifdef IDX_THREADS
            std::unique_lock<std::mutex> locker(m_updater->m_mutex);
@ -668,8 +700,8 @@ FsIndexer::processonefile(RclConfig *config,
 	return FsTreeWalker::FtwOk;
    }

-    LOGDEB0("processone: processing: ["  <<
-            displayableBytes(stp->st_size) << "] "  << fn << "\n");
+    LOGDEB0("processone: processing: [" <<
+            displayableBytes(stp->st_size) << "] " << fn << "\n");

    // Note that we used to do the full path here, but I ended up
    // believing that it made more sense to use only the file name
@ -703,7 +735,7 @@ FsIndexer::processonefile(RclConfig *config,
 	    try {
 		fis = interner.internfile(doc);
 	    } catch (CancelExcept) {
-		LOGERR("fsIndexer::processone: interrupted\n" );
+		LOGERR("fsIndexer::processone: interrupted\n");
 		return FsTreeWalker::FtwStop;
 	    }

@ -774,7 +806,7 @@ FsIndexer::processonefile(RclConfig *config,
 		DbUpdTask *tp = new DbUpdTask(udi, doc.ipath.empty() ? 
 					      cstr_null : parent_udi, doc);
 		if (!m_dwqueue.put(tp)) {
-		    LOGERR("processonefile: wqueue.put failed\n" );
+		    LOGERR("processonefile: wqueue.put failed\n");
 		    return FsTreeWalker::FtwError;
 		} 
 	    } else {
@ -813,7 +845,8 @@ FsIndexer::processonefile(RclConfig *config,
 	// If this doc existed and it's a container, recording for
 	// possible subdoc purge (this will be used only if we don't do a
 	// db-wide purge, e.g. if we're called from indexfiles()).
-	LOGDEB2("processOnefile: existingDoc "  << (existingDoc) << " hadNonNullIpath "  << (hadNonNullIpath) << "\n" );
+	LOGDEB2("processOnefile: existingDoc " << existingDoc <<
+                " hadNonNullIpath " << hadNonNullIpath << "\n");
 	if (existingDoc && hadNonNullIpath) {
 	    m_purgeCandidates.record(parent_udi);
 	}
@ -826,7 +859,7 @@ FsIndexer::processonefile(RclConfig *config,
    // If xattronly is set, ONLY the extattr metadata is valid and will be used
    // by the following step.
    if (xattronly || hadNullIpath == false) {
-	LOGDEB("Creating empty doc for file or pure xattr update\n" );
+	LOGDEB("Creating empty doc for file or pure xattr update\n");
 	Rcl::Doc fileDoc;
 	if (xattronly) {
 	    map<string, string> xfields;
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -46,7 +46,6 @@ using namespace std;
 #include "rclutil.h"
 #include "smallut.h"
 #include "chrono.h"
-#include "utf8iter.h"
 #include "searchdata.h"
 #include "rclquery.h"
 #include "rclquery_p.h"
@ -144,21 +143,6 @@ static inline string make_parentterm(const string& udi)
    return pterm;
 }

-static void utf8truncate(string& s, int maxlen)
-{
-    if (s.size() <= string::size_type(maxlen)) {
-        return;
-    }
-    Utf8Iter iter(s);
-    string::size_type pos = 0;
-    while (iter++ != string::npos)
-        if (iter.getBpos() < string::size_type(maxlen)) {
-            pos = iter.getBpos();
-        }
-
-    s.erase(pos);
-}
-
 Db::Native::Native(Db *db) 
    : m_rcldb(db), m_isopen(false), m_iswritable(false),
      m_noversionwrite(false)
@ -2023,11 +2007,10 @@ void Db::i_setExistingFlags(const string& udi, unsigned int docid)
        LOGERR("Rcl::Db::needUpdate: can't get subdocs\n");
        return;
    }
-    for (vector<Xapian::docid>::iterator it = docids.begin();
-         it != docids.end(); it++) {
-        if (*it < updated.size()) {
-            LOGDEB2("Db::needUpdate: docid " << (*it) << " set\n");
-            updated[*it] = true;
+    for (auto docid : docids) {
+        if (docid < updated.size()) {
+            LOGDEB2("Db::needUpdate: docid " << docid << " set\n");
+            updated[docid] = true;
        }
    }
 }
@ -2556,5 +2539,40 @@ bool Db::getSubDocs(const Doc &idoc, vector<Doc>& subdocs)
    return false;
 }

-} // End namespace Rcl
+// Walk an UDI section (all UDIs beginning with input prefix), and
+// mark all docs and subdocs as existing. Caller beware: Makes sense
+// or not depending on the UDI structure for the data store. In practise,
+// used for absent FS mountable volumes.
+bool Db::udiTreeMarkExisting(const string& udi)
+{
+    LOGDEB("Db::udiTreeWalk: " << udi << endl);
+    string wrapd = wrap_prefix(udi_prefix);
+    string expr = udi + "*";

+#ifdef IDX_THREADS
+    std::unique_lock<std::mutex> lock(m_ndb->m_mutex);
+#endif
+
+    bool ret = m_ndb->idxTermMatch_p(
+        int(ET_WILD), cstr_null, expr,
+        [this, &udi](const string& term, Xapian::termcount, Xapian::doccount) {
+            Xapian::PostingIterator docid;
+            XAPTRY(docid = m_ndb->xrdb.postlist_begin(term), m_ndb->xrdb,
+                   m_reason);
+            if (!m_reason.empty()) {
+                LOGERR("Db::udiTreeWalk: xapian::postlist_begin failed: " <<
+                       m_reason << "\n");
+                return false;
+            }
+            if (docid == m_ndb->xrdb.postlist_end(term)) {
+                LOGDEB("Db::udiTreeWalk:no doc for " << term << " ??\n");
+                return false;
+            }
+            i_setExistingFlags(udi, *docid);
+            LOGDEB("Db::udiTreeWalk: uniterm: " << term << endl);
+            return true;
+        }, wrapd);
+    return ret;
+}
+
+} // End namespace Rcl
--- a/src/rcldb/rcldb.h
+++ b/src/rcldb/rcldb.h
@ -483,7 +483,14 @@ public:

    // Use empty fn for no synonyms
    bool setSynGroupsFile(const std::string& fn);
-    
+
+    // Mark all documents with an UDI having input as prefix as
+    // existing.  Only works if the UDIs for the store are
+    // hierarchical of course.  Used by FsIndexer to avoid purging
+    // files for a topdir which is on a removable file system and
+    // currently unmounted (topdir does not exist or is empty.
+    bool udiTreeMarkExisting(const string& udi);
+
    /* This has to be public for access by embedded Query::Native */
    Native *m_ndb; 
 private:
--- a/src/rcldb/rcldb_p.h
+++ b/src/rcldb/rcldb_p.h
@ -181,6 +181,13 @@ class Db::Native {
     */
    bool subDocs(const string &udi, int idxi, vector<Xapian::docid>& docids);

+    /** Matcher */
+    bool idxTermMatch_p(int typ_sens,const string &lang,const std::string &term,
+                        std::function<bool(const std::string& term,
+                                           Xapian::termcount colfreq,
+                                           Xapian::doccount termfreq)> client,
+                        const string& field);
+
    /** Check if a page position list is defined */
    bool hasPages(Xapian::docid id);

--- a/src/rcldb/rclterms.cpp
+++ b/src/rcldb/rclterms.cpp
@ -330,34 +330,14 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
    return true;
 }

-// Second phase of wildcard/regexp term expansion after case/diac
-// expansion: expand against main index terms
-bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
-                      TermMatchResult& res, int max,  const string& field)
+bool Db::Native::idxTermMatch_p(
+    int typ, const string &lang, const string &root,
+    std::function<bool(const string& term,
+                       Xapian::termcount colfreq,
+                       Xapian::doccount termfreq)> client,
+    const string& prefix)
 {
-    int typ = matchTypeTp(typ_sens);
-    LOGDEB1("Db::idxTermMatch: typ " << tmtptostr(typ) << " lang [" <<
-            lang << "] term [" << root << "] max "  << max << " field [" <<
-            field << "] init res.size " << res.entries.size() << "\n");
-
-    if (typ == ET_STEM) {
-        LOGFATAL("RCLDB: internal error: idxTermMatch called with ET_STEM\n");
-        abort();
-    }
-
-    Xapian::Database xdb = m_ndb->xrdb;
-
-    string prefix;
-    if (!field.empty()) {
-        const FieldTraits *ftp = 0;
-        if (!fieldToTraits(field, &ftp, true) || ftp->pfx.empty()) {
-            LOGDEB("Db::termMatch: field is not indexed (no prefix): [" <<
-                   field << "]\n");
-        } else {
-            prefix = wrap_prefix(ftp->pfx);
-        }
-    }
-    res.prefix = prefix;
+    Xapian::Database xdb = xrdb;

    std::shared_ptr<StrMatcher> matcher;
    if (typ == ET_REGEXP) {
@ -418,35 +398,74 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
                if (matcher && !matcher->match(term))
                    continue;

-                res.entries.push_back(
-                    TermMatchEntry(ixterm, xdb.get_collection_freq(ixterm),
-                                   it.get_termfreq()));
-
-                // The problem with truncating here is that this is done
-                // alphabetically and we may not keep the most frequent 
-                // terms. OTOH, not doing it may stall the program if
-                // we are walking the whole term list. We compromise
-                // by cutting at 2*max
-                if (max > 0 && ++rcnt >= 2*max)
+                if (!client(ixterm, xdb.get_collection_freq(ixterm),
+                            it.get_termfreq())) {
                    break;
+                }
            }
-            m_reason.erase();
+            m_rcldb->m_reason.erase();
            break;
        } catch (const Xapian::DatabaseModifiedError &e) {
-            m_reason = e.get_msg();
+            m_rcldb->m_reason = e.get_msg();
            xdb.reopen();
            continue;
-        } XCATCHERROR(m_reason);
+        } XCATCHERROR(m_rcldb->m_reason);
        break;
    }
-    if (!m_reason.empty()) {
-        LOGERR("termMatch: " << m_reason << "\n");
+    if (!m_rcldb->m_reason.empty()) {
+        LOGERR("termMatch: " << m_rcldb->m_reason << "\n");
        return false;
    }

    return true;
 }

+
+// Second phase of wildcard/regexp term expansion after case/diac
+// expansion: expand against main index terms
+bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
+                      TermMatchResult& res, int max,  const string& field)
+{
+    int typ = matchTypeTp(typ_sens);
+    LOGDEB1("Db::idxTermMatch: typ " << tmtptostr(typ) << " lang [" <<
+            lang << "] term [" << root << "] max "  << max << " field [" <<
+            field << "] init res.size " << res.entries.size() << "\n");
+
+    if (typ == ET_STEM) {
+        LOGFATAL("RCLDB: internal error: idxTermMatch called with ET_STEM\n");
+        abort();
+    }
+    string prefix;
+    if (!field.empty()) {
+        const FieldTraits *ftp = 0;
+        if (!fieldToTraits(field, &ftp, true) || ftp->pfx.empty()) {
+            LOGDEB("Db::termMatch: field is not indexed (no prefix): [" <<
+                   field << "]\n");
+        } else {
+            prefix = wrap_prefix(ftp->pfx);
+        }
+    }
+    res.prefix = prefix;
+
+    int rcnt = 0;
+    bool ret = m_ndb->idxTermMatch_p(
+        typ, lang, root,
+        [&res, &rcnt, max](const string& term,
+                    Xapian::termcount cf, Xapian::doccount tf) {
+            res.entries.push_back(TermMatchEntry(term, cf, tf));
+            // The problem with truncating here is that this is done
+            // alphabetically and we may not keep the most frequent 
+            // terms. OTOH, not doing it may stall the program if
+            // we are walking the whole term list. We compromise
+            // by cutting at 2*max
+            if (max > 0 && ++rcnt >= 2*max)
+                return false;
+            return true;
+        }, prefix);
+
+    return ret;
+}
+
 /** Term list walking. */
 class TermIter {
 public: