Avoid multiple expansion of xapian term iterator

2019-02-01 09:07:28 +01:00 · 2019-02-01 09:07:28 +01:00 · 04f3449f99
commit 04f3449f99
parent 17a8d72227
2 changed files with 21 additions and 17 deletions
--- a/src/rcldb/expansiondbs.cpp
+++ b/src/rcldb/expansiondbs.cpp
@ -18,7 +18,10 @@

 #include "autoconfig.h"

+#include "expansiondbs.h"
+
 #include <memory>
+#include <string>

 #include "log.h"
 #include "utf8iter.h"
@ -28,7 +31,6 @@
 #include "xmacros.h"
 #include "rcldb.h"
 #include "stemdb.h"
-#include "expansiondbs.h"

 using namespace std;

@ -41,7 +43,7 @@ namespace Rcl {
 bool createExpansionDbs(Xapian::WritableDatabase& wdb, 
 			const vector<string>& langs)
 {
-    LOGDEB("StemDb::createExpansionDbs: languages: "  << (stringsToString(langs)) << "\n" );
+    LOGDEB("StemDb::createExpansionDbs: languages: " <<stringsToString(langs) << "\n");
    Chrono cron;

    // Erase and recreate all the expansion groups
@ -93,32 +95,33 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
 	// skip the rest one by one.
 	it.skip_to(wrap_prefix("Z"));
        for ( ;it != wdb.allterms_end(); it++) {
-	    if (has_prefix(*it))
+            const string term{*it};
+	    if (has_prefix(term))
 		continue;

 	    // Detect and skip CJK terms.
-	    Utf8Iter utfit(*it);
+	    Utf8Iter utfit(term);
            if (utfit.eof()) // Empty term?? Seems to happen.
                continue;
 	    if (TextSplit::isCJK(*utfit)) {
-		// LOGDEB("stemskipped: Skipping CJK\n" );
+		// LOGDEB("stemskipped: Skipping CJK\n");
 		continue;
 	    }

-	    string lower = *it;
+	    string lower = term;
 	    // If the index is raw, compute the case-folded term which
 	    // is the input to the stem db, and add a synonym from the
 	    // stripped term to the cased and accented one, for accent
 	    // and case expansion at query time
 	    if (!o_index_stripchars) {
-		unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD);
-		diacasedb.addSynonym(*it);
+		unacmaybefold(term, lower, "UTF-8", UNACOP_FOLD);
+		diacasedb.addSynonym(term);
 	    }

 	    // Dont' apply stemming to terms which don't look like
 	    // natural language words.
-            if (!Db::isSpellingCandidate(*it)) {
-                LOGDEB1("createExpansionDbs: skipped: ["  << ((*it)) << "]\n" );
+            if (!Db::isSpellingCandidate(term)) {
+                LOGDEB1("createExpansionDbs: skipped: [" << term << "]\n");
                continue;
            }

@ -144,11 +147,11 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
        }
    } XCATCHERROR(ermsg);
    if (!ermsg.empty()) {
-        LOGERR("Db::createStemDb: map build failed: "  << (ermsg) << "\n" );
+        LOGERR("Db::createStemDb: map build failed: " << ermsg << "\n");
        return false;
    }

-    LOGDEB("StemDb::createExpansionDbs: done: "  << (cron.secs()) << " S\n" );
+    LOGDEB("StemDb::createExpansionDbs: done: " << cron.secs() << " S\n");
    return true;
 }

--- a/src/rcldb/rclterms.cpp
+++ b/src/rcldb/rclterms.cpp
@ -400,9 +400,10 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
            if (!is.empty())
                it.skip_to(is.c_str());
            for (int rcnt = 0; it != xdb.allterms_end(); it++) {
+                const string ixterm{*it};
                // If we're beyond the terms matching the initial
                // section, end
-                if (!is.empty() && (*it).find(is) != 0)
+                if (!is.empty() && ixterm.find(is) != 0)
                    break;

                // Else try to match the term. The matcher content
@ -411,19 +412,19 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
                // the prefix.
                string term;
                if (!prefix.empty()) {
-                    term = (*it).substr(prefix.length());
+                    term = ixterm.substr(prefix.length());
                } else {
-                    if (has_prefix(*it)) {
+                    if (has_prefix(ixterm)) {
                        continue;
                    }
-                    term = *it;
+                    term = ixterm;
                }

                if (matcher && !matcher->match(term))
                    continue;

                res.entries.push_back(
-                    TermMatchEntry(*it, xdb.get_collection_freq(*it),
+                    TermMatchEntry(ixterm, xdb.get_collection_freq(ixterm),
                                   it.get_termfreq()));

                // The problem with truncating here is that this is done