fix term byte offsets produced by new textsplit: for highlighting

This commit is contained in:
Jean-Francois Dockes 2014-04-24 12:42:10 +02:00
parent ece15318ab
commit 077aed3018
2 changed files with 6 additions and 4 deletions

View File

@ -278,7 +278,7 @@ bool TextSplit::span_is_acronym(string *acronym)
// Generate terms from span. Have to take into account the // Generate terms from span. Have to take into account the
// flags: ONLYSPANS, NOSPANS, noNumbers // flags: ONLYSPANS, NOSPANS, noNumbers
bool TextSplit::words_from_span() bool TextSplit::words_from_span(int bp)
{ {
#if 0 #if 0
cerr << "Span: [" << m_span << "] " << " w_i_s size: " << cerr << "Span: [" << m_span << "] " << " w_i_s size: " <<
@ -292,6 +292,8 @@ bool TextSplit::words_from_span()
#endif #endif
unsigned int spanwords = m_words_in_span.size(); unsigned int spanwords = m_words_in_span.size();
int pos = m_spanpos; int pos = m_spanpos;
// Byte position of the span start
int spboffs = bp - m_span.size();
for (unsigned int i = 0; for (unsigned int i = 0;
i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords); i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords);
@ -309,7 +311,7 @@ bool TextSplit::words_from_span()
if (fin - deb > int(m_span.size())) if (fin - deb > int(m_span.size()))
break; break;
string word(m_span.substr(deb, fin-deb)); string word(m_span.substr(deb, fin-deb));
if (!emitterm(j != i+1, word, pos, deb, fin)) if (!emitterm(j != i+1, word, pos, spboffs+deb, spboffs+fin))
return false; return false;
} }
} }
@ -385,7 +387,7 @@ inline bool TextSplit::doemit(bool spanerase, int bp)
} }
breaktrimloop: breaktrimloop:
if (!words_from_span()) { if (!words_from_span(bp)) {
return false; return false;
} }
discardspan(); discardspan();

View File

@ -220,7 +220,7 @@ private:
bool doemit(bool spanerase, int bp); bool doemit(bool spanerase, int bp);
void discardspan(); void discardspan();
bool span_is_acronym(std::string *acronym); bool span_is_acronym(std::string *acronym);
bool words_from_span(); bool words_from_span(int bp);
}; };
#endif /* _TEXTSPLIT_H_INCLUDED_ */ #endif /* _TEXTSPLIT_H_INCLUDED_ */