From 077aed30182bf748bf430afdfd1ffb6eb34c04fc Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Thu, 24 Apr 2014 12:42:10 +0200 Subject: [PATCH] fix term byte offsets produced by new textsplit: for highlighting --- src/common/textsplit.cpp | 8 +++++--- src/common/textsplit.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/common/textsplit.cpp b/src/common/textsplit.cpp index 5fd5bb44..070b4b45 100644 --- a/src/common/textsplit.cpp +++ b/src/common/textsplit.cpp @@ -278,7 +278,7 @@ bool TextSplit::span_is_acronym(string *acronym) // Generate terms from span. Have to take into account the // flags: ONLYSPANS, NOSPANS, noNumbers -bool TextSplit::words_from_span() +bool TextSplit::words_from_span(int bp) { #if 0 cerr << "Span: [" << m_span << "] " << " w_i_s size: " << @@ -292,6 +292,8 @@ bool TextSplit::words_from_span() #endif unsigned int spanwords = m_words_in_span.size(); int pos = m_spanpos; + // Byte position of the span start + int spboffs = bp - m_span.size(); for (unsigned int i = 0; i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords); @@ -309,7 +311,7 @@ bool TextSplit::words_from_span() if (fin - deb > int(m_span.size())) break; string word(m_span.substr(deb, fin-deb)); - if (!emitterm(j != i+1, word, pos, deb, fin)) + if (!emitterm(j != i+1, word, pos, spboffs+deb, spboffs+fin)) return false; } } @@ -385,7 +387,7 @@ inline bool TextSplit::doemit(bool spanerase, int bp) } breaktrimloop: - if (!words_from_span()) { + if (!words_from_span(bp)) { return false; } discardspan(); diff --git a/src/common/textsplit.h b/src/common/textsplit.h index 6e80ce29..d0f6cd69 100644 --- a/src/common/textsplit.h +++ b/src/common/textsplit.h @@ -220,7 +220,7 @@ private: bool doemit(bool spanerase, int bp); void discardspan(); bool span_is_acronym(std::string *acronym); - bool words_from_span(); + bool words_from_span(int bp); }; #endif /* _TEXTSPLIT_H_INCLUDED_ */