From bf39719ac32846b2dd8eb5e751470c8aa83ca195 Mon Sep 17 00:00:00 2001
From: Jean-Francois Dockes <jfd@recoll.org>
Date: Sun, 13 Feb 2011 10:07:25 +0100
Subject: [PATCH] Indexing: need to truncate pathologically long path elements
 (would cause add_document error)

---
 src/rcldb/rcldb.cpp | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp
index 9dca6b09..38e4499a 100644
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@@ -815,9 +815,8 @@ private:
 // Get one term from the doc, remove accents and lowercase, then add posting
 bool TextSplitDb::takeword(const std::string &_term, int pos, int, int)
 {
-#if 0
-    LOGDEB(("TextSplitDb::takeword: [%s]\n", _term.c_str()));
-#endif
+    LOGDEB2(("TextSplitDb::takeword: [%s]\n", _term.c_str()));
+
     string term;
     if (!unacmaybefold(_term, term, "UTF-8", true)) {
 	LOGINFO(("Db::splitter::takeword: unac failed for [%s]\n", 
@@ -929,8 +928,12 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
 	splitter.curpos = 0;
 	newdocument.add_posting(pathelt_prefix, 
 				splitter.basepos + splitter.curpos++);
-	for (vector<string>::const_iterator it = vpath.begin(); 
+	for (vector<string>::iterator it = vpath.begin(); 
 	     it != vpath.end(); it++){
+	    if (it->length() > 230) {
+		// Just truncate it. May still be useful because of wildcards
+		*it = it->substr(230);
+	    }
 	    newdocument.add_posting(pathelt_prefix + *it, 
 				    splitter.basepos + splitter.curpos++);
 	}