fix term byte offsets produced by new textsplit: for highlighting

This commit is contained in:
Jean-Francois Dockes 2014-04-24 12:42:10 +02:00
parent ece15318ab
commit 077aed3018
2 changed files with 6 additions and 4 deletions

View File

@ -278,7 +278,7 @@ bool TextSplit::span_is_acronym(string *acronym)
// Generate terms from span. Have to take into account the
// flags: ONLYSPANS, NOSPANS, noNumbers
bool TextSplit::words_from_span()
bool TextSplit::words_from_span(int bp)
{
#if 0
cerr << "Span: [" << m_span << "] " << " w_i_s size: " <<
@ -292,6 +292,8 @@ bool TextSplit::words_from_span()
#endif
unsigned int spanwords = m_words_in_span.size();
int pos = m_spanpos;
// Byte position of the span start
int spboffs = bp - m_span.size();
for (unsigned int i = 0;
i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords);
@ -309,7 +311,7 @@ bool TextSplit::words_from_span()
if (fin - deb > int(m_span.size()))
break;
string word(m_span.substr(deb, fin-deb));
if (!emitterm(j != i+1, word, pos, deb, fin))
if (!emitterm(j != i+1, word, pos, spboffs+deb, spboffs+fin))
return false;
}
}
@ -385,7 +387,7 @@ inline bool TextSplit::doemit(bool spanerase, int bp)
}
breaktrimloop:
if (!words_from_span()) {
if (!words_from_span(bp)) {
return false;
}
discardspan();

View File

@ -220,7 +220,7 @@ private:
bool doemit(bool spanerase, int bp);
void discardspan();
bool span_is_acronym(std::string *acronym);
bool words_from_span();
bool words_from_span(int bp);
};
#endif /* _TEXTSPLIT_H_INCLUDED_ */