Restore nonumbers number indexing exclusion function

This commit is contained in:
Jean-Francois Dockes 2020-08-22 10:07:58 +02:00
parent cbcfa7e9a1
commit 3f1dfa564c

View File

@ -521,20 +521,27 @@ inline bool TextSplit::doemit(bool spanerase, size_t _bp)
if (m_wordLen) { if (m_wordLen) {
// We have a current word. Remember it // We have a current word. Remember it
// Limit max span word count
if (m_words_in_span.size() >= 6) { if (m_words_in_span.size() >= 6) {
// Limit max span word count
spanerase = true; spanerase = true;
} }
m_words_in_span.push_back(pair<int,int>(m_wordStart, if (!(o_noNumbers && m_inNumber)) {
m_wordStart + m_wordLen)); m_words_in_span.push_back({m_wordStart, m_wordStart + m_wordLen});
m_wordpos++; m_wordpos++;
}
m_wordLen = m_wordChars = 0; m_wordLen = m_wordChars = 0;
} }
if (spanerase) { if (!spanerase) {
// We encountered a span-terminating character. Produce terms. // Not done with this span. Just update relative word start offset.
m_wordStart = int(m_span.length());
return true;
}
// Span is done (too long or span-terminating character). Produce
// terms and reset it.
string acronym; string acronym;
if (span_is_acronym(&acronym)) { if (span_is_acronym(&acronym)) {
if (!emitterm(false, acronym, m_spanpos, bp - m_span.length(), bp)) if (!emitterm(false, acronym, m_spanpos, bp - m_span.length(), bp))
@ -562,19 +569,12 @@ inline bool TextSplit::doemit(bool spanerase, size_t _bp)
goto breaktrimloop; goto breaktrimloop;
} }
} }
breaktrimloop: breaktrimloop:
if (!words_from_span(bp)) { if (!words_from_span(bp)) {
return false; return false;
} }
discardspan(); discardspan();
} else {
m_wordStart = int(m_span.length());
}
return true; return true;
} }