Handle the case where unac produces whitespace, which may occur with letter-less accents

2015-08-13 18:22:09 +02:00 · 2015-08-13 18:22:09 +02:00 · 04cd868950
commit 04cd868950
parent 94eb3119ce
1 changed files with 37 additions and 9 deletions
--- a/src/rcldb/termproc.h
+++ b/src/rcldb/termproc.h
@ -14,13 +14,12 @@
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
 #ifndef _TERMPROC_H_INCLUDED_
 #define _TERMPROC_H_INCLUDED_
 #include "textsplit.h"
 #include "stoplist.h"
 #include "smallut.h"
 namespace Rcl {
@ -140,14 +139,43 @@ public:
            }
            return true;
        }
-        // It may happen in some weird cases that the output from unac is
+
-        // empty (if the word actually consisted entirely of diacritics ...)
+        if (otrm.empty()) {
-        // The consequence is that a phrase search won't work without addional
+	    // It may happen in some weird cases that the output from
-        // slack.
+	    // unac is empty (if the word actually consisted entirely
-        if (otrm.empty())
+	    // of diacritics ...)  The consequence is that a phrase
 	    // search won't work without addional slack.
            return true;
-        else
+	}
-            return TermProc::takeword(otrm, pos, bs, be);
+
 	// It may also occur that unac introduces spaces in the string
 	// (when removing isolated accents, may happen for Greek
 	// for example). This is a pathological situation. We
 	// index all the resulting terms at the same pos because
 	// the surrounding code is not designed to handle a pos
 	// change in here. This means that phrase searches and
 	// snippets will be wrong, but at least searching for the
 	// terms will work.
 	bool hasspace = false;
 	for (string::const_iterator it = otrm.begin();it < otrm.end();it++) {
 	    if (*it == ' ') {
 		hasspace=true;
 		break;
 	    }
 	}
 	if (hasspace) {
 	    vector<string> terms;
 	    stringToTokens(otrm, terms, " ", true);
 	    for (vector<string>::const_iterator it = terms.begin(); 
 		 it < terms.end(); it++) {
 		if (!TermProc::takeword(*it, pos, bs, be)) {
 		    return false;
 		}
 	    }
 	    return true;
 	} else {
 	    return TermProc::takeword(otrm, pos, bs, be);
 	}
    }
    virtual bool flush()