From bf39719ac32846b2dd8eb5e751470c8aa83ca195 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Sun, 13 Feb 2011 10:07:25 +0100 Subject: [PATCH] Indexing: need to truncate pathologically long path elements (would cause add_document error) --- src/rcldb/rcldb.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 9dca6b09..38e4499a 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -815,9 +815,8 @@ private: // Get one term from the doc, remove accents and lowercase, then add posting bool TextSplitDb::takeword(const std::string &_term, int pos, int, int) { -#if 0 - LOGDEB(("TextSplitDb::takeword: [%s]\n", _term.c_str())); -#endif + LOGDEB2(("TextSplitDb::takeword: [%s]\n", _term.c_str())); + string term; if (!unacmaybefold(_term, term, "UTF-8", true)) { LOGINFO(("Db::splitter::takeword: unac failed for [%s]\n", @@ -929,8 +928,12 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, splitter.curpos = 0; newdocument.add_posting(pathelt_prefix, splitter.basepos + splitter.curpos++); - for (vector::const_iterator it = vpath.begin(); + for (vector::iterator it = vpath.begin(); it != vpath.end(); it++){ + if (it->length() > 230) { + // Just truncate it. May still be useful because of wildcards + *it = it->substr(230); + } newdocument.add_posting(pathelt_prefix + *it, splitter.basepos + splitter.curpos++); }