From dd6acb07cc539db61ab31b8f54237bcdbaf7f7c9 Mon Sep 17 00:00:00 2001 From: dockes Date: Wed, 18 Nov 2009 10:26:47 +0000 Subject: [PATCH] mh_mail: use truncate_to_word to avoid cutting an utf8 char. rcldb: logdeb text_to_word errors --- src/internfile/mh_mail.cpp | 3 ++- src/rcldb/rcldb.cpp | 10 +++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/internfile/mh_mail.cpp b/src/internfile/mh_mail.cpp index 0ba0e1e5..fa80e88f 100644 --- a/src/internfile/mh_mail.cpp +++ b/src/internfile/mh_mail.cpp @@ -169,7 +169,8 @@ bool MimeHandlerMail::next_document() m_metaData[cstr_mimetype].c_str())); const string& txt = m_metaData[cstr_content]; if (m_startoftext < txt.size()) - m_metaData[cstr_abstract] = txt.substr(m_startoftext, 250); + m_metaData[cstr_abstract] = + truncate_to_word(txt.substr(m_startoftext), 250); } else { m_metaData[cstr_abstract] = ""; res = processAttach(); diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 87f8df03..b26f0d18 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -879,7 +879,8 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, // Split and index file name as document term(s) LOGDEB2(("Db::add: split file name [%s]\n", fn.c_str())); - splitter.text_to_words(doc.utf8fn); + if (!splitter.text_to_words(doc.utf8fn)) + LOGDEB(("Db::addOrUpdate: split failed for file name\n")); splitData.basepos += splitData.curpos + 100; // Index textual metadata. These are all indexed as text with @@ -901,7 +902,9 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, meta_it->first.c_str(), pfx.c_str(), meta_it->second.c_str())); splitData.setprefix(pfx); // Subject - splitter.text_to_words(meta_it->second); + if (!splitter.text_to_words(meta_it->second)) + LOGDEB(("Db::addOrUpdate: split failed for %s\n", + meta_it->first.c_str())); splitData.setprefix(string()); splitData.basepos += splitData.curpos + 100; } @@ -914,7 +917,8 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, // Split and index body text LOGDEB2(("Db::add: split body\n")); - splitter.text_to_words(doc.text); + if (!splitter.text_to_words(doc.text)) + LOGDEB(("Db::addOrUpdate: split failed for main text\n")); ////// Special terms for other metadata. No positions for these. // Mime type