m_words_in_span was always properly reset between invocations (if discardspan() was not called for some reason), resulting in crashes

This commit is contained in:
Jean-Francois Dockes 2017-05-15 10:26:38 +02:00
parent 4671f5ea25
commit 15ea565e9f
2 changed files with 18 additions and 9 deletions

View File

@ -468,8 +468,8 @@ inline bool TextSplit::doemit(bool spanerase, size_t _bp)
void TextSplit::discardspan() void TextSplit::discardspan()
{ {
m_span.clear();
m_words_in_span.clear(); m_words_in_span.clear();
m_span.erase();
m_spanpos = m_wordpos; m_spanpos = m_wordpos;
m_wordStart = 0; m_wordStart = 0;
m_wordLen = m_wordChars = 0; m_wordLen = m_wordChars = 0;
@ -513,10 +513,9 @@ bool TextSplit::text_to_words(const string &in)
if (in.empty()) if (in.empty())
return true; return true;
m_span.erase(); // Reset the data members relative to splitting state
m_inNumber = false; clearsplitstate();
m_wordStart = m_wordLen = m_wordChars = m_prevpos = m_prevlen = m_wordpos
= m_spanpos = 0;
bool pagepending = false; bool pagepending = false;
bool softhyphenpending = false; bool softhyphenpending = false;
@ -935,10 +934,12 @@ bool TextSplit::cjk_to_words(Utf8Iter *itp, unsigned int *cp)
} }
} }
m_span.erase(); // Reset state, saving term position, and return the found non-cjk
m_inNumber = false; // unicode character value. The current input byte offset is kept
m_wordStart = m_wordLen = m_wordChars = m_prevpos = m_prevlen = 0; // in the utf8Iter
m_spanpos = m_wordpos; int pos = m_wordpos;
clearsplitstate();
m_spanpos = m_wordpos = pos;
*cp = c; *cp = c;
return true; return true;
} }

View File

@ -213,6 +213,14 @@ private:
// Word length in characters. Declared but not updated if !TEXTSPLIT_STATS // Word length in characters. Declared but not updated if !TEXTSPLIT_STATS
unsigned int m_wordChars; unsigned int m_wordChars;
void clearsplitstate() {
m_span.clear();
m_words_in_span.clear();
m_inNumber = false;
m_wordStart = m_wordLen = m_wordpos = m_spanpos = m_prevpos =
m_prevlen = m_wordChars = 0;
}
// This processes cjk text: // This processes cjk text:
bool cjk_to_words(Utf8Iter *it, unsigned int *cp); bool cjk_to_words(Utf8Iter *it, unsigned int *cp);