Restore nonumbers number indexing exclusion function
This commit is contained in:
parent
cbcfa7e9a1
commit
3f1dfa564c
@ -521,20 +521,27 @@ inline bool TextSplit::doemit(bool spanerase, size_t _bp)
|
|||||||
if (m_wordLen) {
|
if (m_wordLen) {
|
||||||
// We have a current word. Remember it
|
// We have a current word. Remember it
|
||||||
|
|
||||||
// Limit max span word count
|
|
||||||
if (m_words_in_span.size() >= 6) {
|
if (m_words_in_span.size() >= 6) {
|
||||||
|
// Limit max span word count
|
||||||
spanerase = true;
|
spanerase = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_words_in_span.push_back(pair<int,int>(m_wordStart,
|
if (!(o_noNumbers && m_inNumber)) {
|
||||||
m_wordStart + m_wordLen));
|
m_words_in_span.push_back({m_wordStart, m_wordStart + m_wordLen});
|
||||||
m_wordpos++;
|
m_wordpos++;
|
||||||
|
}
|
||||||
m_wordLen = m_wordChars = 0;
|
m_wordLen = m_wordChars = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (spanerase) {
|
if (!spanerase) {
|
||||||
// We encountered a span-terminating character. Produce terms.
|
// Not done with this span. Just update relative word start offset.
|
||||||
|
m_wordStart = int(m_span.length());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Span is done (too long or span-terminating character). Produce
|
||||||
|
// terms and reset it.
|
||||||
string acronym;
|
string acronym;
|
||||||
if (span_is_acronym(&acronym)) {
|
if (span_is_acronym(&acronym)) {
|
||||||
if (!emitterm(false, acronym, m_spanpos, bp - m_span.length(), bp))
|
if (!emitterm(false, acronym, m_spanpos, bp - m_span.length(), bp))
|
||||||
@ -568,13 +575,6 @@ inline bool TextSplit::doemit(bool spanerase, size_t _bp)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
discardspan();
|
discardspan();
|
||||||
|
|
||||||
} else {
|
|
||||||
|
|
||||||
m_wordStart = int(m_span.length());
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user