Restore nonumbers number indexing exclusion function
This commit is contained in:
parent
cbcfa7e9a1
commit
3f1dfa564c
@ -521,20 +521,27 @@ inline bool TextSplit::doemit(bool spanerase, size_t _bp)
|
||||
if (m_wordLen) {
|
||||
// We have a current word. Remember it
|
||||
|
||||
// Limit max span word count
|
||||
if (m_words_in_span.size() >= 6) {
|
||||
// Limit max span word count
|
||||
spanerase = true;
|
||||
}
|
||||
|
||||
m_words_in_span.push_back(pair<int,int>(m_wordStart,
|
||||
m_wordStart + m_wordLen));
|
||||
if (!(o_noNumbers && m_inNumber)) {
|
||||
m_words_in_span.push_back({m_wordStart, m_wordStart + m_wordLen});
|
||||
m_wordpos++;
|
||||
}
|
||||
m_wordLen = m_wordChars = 0;
|
||||
}
|
||||
|
||||
if (spanerase) {
|
||||
// We encountered a span-terminating character. Produce terms.
|
||||
if (!spanerase) {
|
||||
// Not done with this span. Just update relative word start offset.
|
||||
m_wordStart = int(m_span.length());
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// Span is done (too long or span-terminating character). Produce
|
||||
// terms and reset it.
|
||||
string acronym;
|
||||
if (span_is_acronym(&acronym)) {
|
||||
if (!emitterm(false, acronym, m_spanpos, bp - m_span.length(), bp))
|
||||
@ -562,19 +569,12 @@ inline bool TextSplit::doemit(bool spanerase, size_t _bp)
|
||||
goto breaktrimloop;
|
||||
}
|
||||
}
|
||||
breaktrimloop:
|
||||
breaktrimloop:
|
||||
|
||||
if (!words_from_span(bp)) {
|
||||
return false;
|
||||
}
|
||||
discardspan();
|
||||
|
||||
} else {
|
||||
|
||||
m_wordStart = int(m_span.length());
|
||||
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user