Indexing: need to truncate pathologically long path elements (would cause add_document error)

This commit is contained in:
Jean-Francois Dockes 2011-02-13 10:07:25 +01:00
parent 1c5fe35b7c
commit bf39719ac3

View File

@ -815,9 +815,8 @@ private:
// Get one term from the doc, remove accents and lowercase, then add posting // Get one term from the doc, remove accents and lowercase, then add posting
bool TextSplitDb::takeword(const std::string &_term, int pos, int, int) bool TextSplitDb::takeword(const std::string &_term, int pos, int, int)
{ {
#if 0 LOGDEB2(("TextSplitDb::takeword: [%s]\n", _term.c_str()));
LOGDEB(("TextSplitDb::takeword: [%s]\n", _term.c_str()));
#endif
string term; string term;
if (!unacmaybefold(_term, term, "UTF-8", true)) { if (!unacmaybefold(_term, term, "UTF-8", true)) {
LOGINFO(("Db::splitter::takeword: unac failed for [%s]\n", LOGINFO(("Db::splitter::takeword: unac failed for [%s]\n",
@ -929,8 +928,12 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
splitter.curpos = 0; splitter.curpos = 0;
newdocument.add_posting(pathelt_prefix, newdocument.add_posting(pathelt_prefix,
splitter.basepos + splitter.curpos++); splitter.basepos + splitter.curpos++);
for (vector<string>::const_iterator it = vpath.begin(); for (vector<string>::iterator it = vpath.begin();
it != vpath.end(); it++){ it != vpath.end(); it++){
if (it->length() > 230) {
// Just truncate it. May still be useful because of wildcards
*it = it->substr(230);
}
newdocument.add_posting(pathelt_prefix + *it, newdocument.add_posting(pathelt_prefix + *it,
splitter.basepos + splitter.curpos++); splitter.basepos + splitter.curpos++);
} }