whitespace and indents only

2020-04-14 09:25:13 +02:00 · 2020-04-14 09:25:13 +02:00 · 5dd8774b3c
commit 5dd8774b3c
parent d43bb992f7
3 changed files with 963 additions and 966 deletions
--- a/src/rcldb/expansiondbs.cpp
+++ b/src/rcldb/expansiondbs.cpp
@ -41,7 +41,7 @@ namespace Rcl {
 * We use Xapian synonyms subsets to store the expansions.
 */
 bool createExpansionDbs(Xapian::WritableDatabase& wdb, 
-			const vector<string>& langs)
+                        const vector<string>& langs)
 {
    LOGDEB("StemDb::createExpansionDbs: languages: " <<stringsToString(langs) << "\n");
    Chrono cron;
@ -51,99 +51,99 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
    // If langs is empty and we don't need casediac expansion, then no need to
    // walk the big list
    if (langs.empty()) {
-	if (o_index_stripchars)
-	    return true;
+        if (o_index_stripchars)
+            return true;
    }

    // Walk the list of all terms, and stem/unac each.
    string ermsg;
    try {
-	// Stem dbs
-	vector<XapWritableComputableSynFamMember> stemdbs;
-	// Note: tried to make this to work with stack-allocated objects, couldn't.
-	// Looks like a bug in copy constructors somewhere, can't guess where
-	vector<std::shared_ptr<SynTermTransStem> > stemmers;
-	for (unsigned int i = 0; i < langs.size(); i++) {
-	    stemmers.push_back(std::shared_ptr<SynTermTransStem>
-			       (new SynTermTransStem(langs[i])));
-	    stemdbs.push_back(
-		XapWritableComputableSynFamMember(wdb, synFamStem, langs[i], 
-						  stemmers.back().get()));
-	    stemdbs.back().recreate();
-	}
+        // Stem dbs
+        vector<XapWritableComputableSynFamMember> stemdbs;
+        // Note: tried to make this to work with stack-allocated objects, couldn't.
+        // Looks like a bug in copy constructors somewhere, can't guess where
+        vector<std::shared_ptr<SynTermTransStem> > stemmers;
+        for (unsigned int i = 0; i < langs.size(); i++) {
+            stemmers.push_back(std::shared_ptr<SynTermTransStem>
+                               (new SynTermTransStem(langs[i])));
+            stemdbs.push_back(
+                XapWritableComputableSynFamMember(wdb, synFamStem, langs[i], 
+                                                  stemmers.back().get()));
+            stemdbs.back().recreate();
+        }

-	// Unaccented stem dbs
-	vector<XapWritableComputableSynFamMember> unacstemdbs;
-	// We can reuse the same stemmer pointers, the objects are stateless.
-	if (!o_index_stripchars) {
-	    for (unsigned int i = 0; i < langs.size(); i++) {
-		unacstemdbs.push_back(
-		    XapWritableComputableSynFamMember(wdb, synFamStemUnac, langs[i], 
-						      stemmers.back().get()));
-		unacstemdbs.back().recreate();
-	    }
-	}
-	SynTermTransUnac transunac(UNACOP_UNACFOLD);
-	XapWritableComputableSynFamMember 
-	    diacasedb(wdb, synFamDiCa, "all", &transunac);
-	if (!o_index_stripchars)
-	    diacasedb.recreate();
+        // Unaccented stem dbs
+        vector<XapWritableComputableSynFamMember> unacstemdbs;
+        // We can reuse the same stemmer pointers, the objects are stateless.
+        if (!o_index_stripchars) {
+            for (unsigned int i = 0; i < langs.size(); i++) {
+                unacstemdbs.push_back(
+                    XapWritableComputableSynFamMember(
+						wdb, synFamStemUnac, langs[i], stemmers.back().get()));
+                unacstemdbs.back().recreate();
+            }
+        }
+        SynTermTransUnac transunac(UNACOP_UNACFOLD);
+        XapWritableComputableSynFamMember 
+            diacasedb(wdb, synFamDiCa, "all", &transunac);
+        if (!o_index_stripchars)
+            diacasedb.recreate();

-	Xapian::TermIterator it = wdb.allterms_begin();
-	// We'd want to skip to the first non-prefixed term, but this is a bit
-	// complicated, so we just jump over most of the prefixed term and then 
-	// skip the rest one by one.
-	it.skip_to(wrap_prefix("Z"));
+        Xapian::TermIterator it = wdb.allterms_begin();
+        // We'd want to skip to the first non-prefixed term, but this is a bit
+        // complicated, so we just jump over most of the prefixed term and then 
+        // skip the rest one by one.
+        it.skip_to(wrap_prefix("Z"));
        for ( ;it != wdb.allterms_end(); it++) {
            const string term{*it};
-	    if (has_prefix(term))
-		continue;
+            if (has_prefix(term))
+                continue;

-	    // Detect and skip CJK terms.
-	    Utf8Iter utfit(term);
+            // Detect and skip CJK terms.
+            Utf8Iter utfit(term);
            if (utfit.eof()) // Empty term?? Seems to happen.
                continue;
-	    if (TextSplit::isCJK(*utfit)) {
-		// LOGDEB("stemskipped: Skipping CJK\n");
-		continue;
-	    }
+            if (TextSplit::isCJK(*utfit)) {
+                // LOGDEB("stemskipped: Skipping CJK\n");
+                continue;
+            }

-	    string lower = term;
-	    // If the index is raw, compute the case-folded term which
-	    // is the input to the stem db, and add a synonym from the
-	    // stripped term to the cased and accented one, for accent
-	    // and case expansion at query time
-	    if (!o_index_stripchars) {
-		unacmaybefold(term, lower, "UTF-8", UNACOP_FOLD);
-		diacasedb.addSynonym(term);
-	    }
+            string lower = term;
+            // If the index is raw, compute the case-folded term which
+            // is the input to the stem db, and add a synonym from the
+            // stripped term to the cased and accented one, for accent
+            // and case expansion at query time
+            if (!o_index_stripchars) {
+                unacmaybefold(term, lower, "UTF-8", UNACOP_FOLD);
+                diacasedb.addSynonym(term);
+            }

-	    // Dont' apply stemming to terms which don't look like
-	    // natural language words.
+            // Dont' apply stemming to terms which don't look like
+            // natural language words.
            if (!Db::isSpellingCandidate(term)) {
                LOGDEB1("createExpansionDbs: skipped: [" << term << "]\n");
                continue;
            }

-	    // Create stemming synonym for every language. The input is the 
-	    // lowercase accented term
-	    for (unsigned int i = 0; i < langs.size(); i++) {
-		stemdbs[i].addSynonym(lower);
-	    }
+            // Create stemming synonym for every language. The input is the 
+            // lowercase accented term
+            for (unsigned int i = 0; i < langs.size(); i++) {
+                stemdbs[i].addSynonym(lower);
+            }

-	    // For a raw index, also maybe create a stem expansion for
-	    // the unaccented term. While this may be incorrect, it is
-	    // also necessary for searching in a diacritic-unsensitive
-	    // way on a raw index
-	    if (!o_index_stripchars) {
-		string unac;
-		unacmaybefold(lower, unac, "UTF-8", UNACOP_UNAC);
-		if (unac != lower) {
-		    for (unsigned int i = 0; i < langs.size(); i++) {
-			unacstemdbs[i].addSynonym(unac);
-		    }
-		}
-	    }
+            // For a raw index, also maybe create a stem expansion for
+            // the unaccented term. While this may be incorrect, it is
+            // also necessary for searching in a diacritic-unsensitive
+            // way on a raw index
+            if (!o_index_stripchars) {
+                string unac;
+                unacmaybefold(lower, unac, "UTF-8", UNACOP_UNAC);
+                if (unac != lower) {
+                    for (unsigned int i = 0; i < langs.size(); i++) {
+                        unacstemdbs[i].addSynonym(unac);
+                    }
+                }
+            }
        }
    } XCATCHERROR(ermsg);
    if (!ermsg.empty()) {
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
--- a/src/rcldb/termproc.h
+++ b/src/rcldb/termproc.h
@ -52,21 +52,18 @@ class TermProc {
 public:
    TermProc(TermProc* next) : m_next(next) {}
    virtual ~TermProc() {}
-    virtual bool takeword(const string &term, int pos, int bs, int be)
-    {
+    virtual bool takeword(const string &term, int pos, int bs, int be) {
        if (m_next)
            return m_next->takeword(term, pos, bs, be);
        else
            return true;
    }
    // newpage() is like takeword(), but for page breaks.
-    virtual void newpage(int pos)
-    {
+    virtual void newpage(int pos) {
        if (m_next)
            m_next->newpage(pos);
    }
-    virtual bool flush()
-    {
+    virtual bool flush() {
        if (m_next)
            return m_next->flush();
        else
@ -137,7 +134,7 @@ public:
            // We don't generate a fatal error because of a bad term,
            // but one has to put the limit somewhere
            if (m_unacerrors > 500 &&
-                    (double(m_totalterms) / double(m_unacerrors)) < 2.0) {
+                (double(m_totalterms) / double(m_unacerrors)) < 2.0) {
                // More than 1 error for every other term
                LOGERR("splitter::takeword: too many unac errors " <<
                       m_unacerrors << "/"  << m_totalterms << "\n");
@ -147,12 +144,12 @@ public:
        }

        if (otrm.empty()) {
-	    // It may happen in some weird cases that the output from
-	    // unac is empty (if the word actually consisted entirely
-	    // of diacritics ...)  The consequence is that a phrase
-	    // search won't work without addional slack.
+            // It may happen in some weird cases that the output from
+            // unac is empty (if the word actually consisted entirely
+            // of diacritics ...)  The consequence is that a phrase
+            // search won't work without addional slack.
            return true;
-	}
+        }

        // We should have a Japanese stemmer to handle this, but for
        // experimenting, let's do it here: remove 'prolounged sound
@ -174,34 +171,34 @@ public:
            return true;
        }
        
-	// It may also occur that unac introduces spaces in the string
-	// (when removing isolated accents, may happen for Greek
-	// for example). This is a pathological situation. We
-	// index all the resulting terms at the same pos because
-	// the surrounding code is not designed to handle a pos
-	// change in here. This means that phrase searches and
-	// snippets will be wrong, but at least searching for the
-	// terms will work.
-	bool hasspace = false;
-	for (string::const_iterator it = otrm.begin();it < otrm.end();it++) {
-	    if (*it == ' ') {
-		hasspace=true;
-		break;
-	    }
-	}
-	if (hasspace) {
+        // It may also occur that unac introduces spaces in the string
+        // (when removing isolated accents, may happen for Greek
+        // for example). This is a pathological situation. We
+        // index all the resulting terms at the same pos because
+        // the surrounding code is not designed to handle a pos
+        // change in here. This means that phrase searches and
+        // snippets will be wrong, but at least searching for the
+        // terms will work.
+        bool hasspace = false;
+        for (string::const_iterator it = otrm.begin();it < otrm.end();it++) {
+            if (*it == ' ') {
+                hasspace=true;
+                break;
+            }
+        }
+        if (hasspace) {
            std::vector<std::string> terms;
-	    stringToTokens(otrm, terms, " ", true);
-	    for (std::vector<std::string>::const_iterator it = terms.begin(); 
-		 it < terms.end(); it++) {
-		if (!TermProc::takeword(*it, pos, bs, be)) {
-		    return false;
-		}
-	    }
-	    return true;
-	} else {
-	    return TermProc::takeword(otrm, pos, bs, be);
-	}
+            stringToTokens(otrm, terms, " ", true);
+            for (std::vector<std::string>::const_iterator it = terms.begin(); 
+                 it < terms.end(); it++) {
+                if (!TermProc::takeword(*it, pos, bs, be)) {
+                    return false;
+                }
+            }
+            return true;
+        } else {
+            return TermProc::takeword(otrm, pos, bs, be);
+        }
    }

    virtual bool flush()