fix term byte offsets produced by new textsplit: for highlighting
This commit is contained in:
parent
ece15318ab
commit
077aed3018
@ -278,7 +278,7 @@ bool TextSplit::span_is_acronym(string *acronym)
|
|||||||
|
|
||||||
// Generate terms from span. Have to take into account the
|
// Generate terms from span. Have to take into account the
|
||||||
// flags: ONLYSPANS, NOSPANS, noNumbers
|
// flags: ONLYSPANS, NOSPANS, noNumbers
|
||||||
bool TextSplit::words_from_span()
|
bool TextSplit::words_from_span(int bp)
|
||||||
{
|
{
|
||||||
#if 0
|
#if 0
|
||||||
cerr << "Span: [" << m_span << "] " << " w_i_s size: " <<
|
cerr << "Span: [" << m_span << "] " << " w_i_s size: " <<
|
||||||
@ -292,6 +292,8 @@ bool TextSplit::words_from_span()
|
|||||||
#endif
|
#endif
|
||||||
unsigned int spanwords = m_words_in_span.size();
|
unsigned int spanwords = m_words_in_span.size();
|
||||||
int pos = m_spanpos;
|
int pos = m_spanpos;
|
||||||
|
// Byte position of the span start
|
||||||
|
int spboffs = bp - m_span.size();
|
||||||
|
|
||||||
for (unsigned int i = 0;
|
for (unsigned int i = 0;
|
||||||
i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords);
|
i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords);
|
||||||
@ -309,7 +311,7 @@ bool TextSplit::words_from_span()
|
|||||||
if (fin - deb > int(m_span.size()))
|
if (fin - deb > int(m_span.size()))
|
||||||
break;
|
break;
|
||||||
string word(m_span.substr(deb, fin-deb));
|
string word(m_span.substr(deb, fin-deb));
|
||||||
if (!emitterm(j != i+1, word, pos, deb, fin))
|
if (!emitterm(j != i+1, word, pos, spboffs+deb, spboffs+fin))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -385,7 +387,7 @@ inline bool TextSplit::doemit(bool spanerase, int bp)
|
|||||||
}
|
}
|
||||||
breaktrimloop:
|
breaktrimloop:
|
||||||
|
|
||||||
if (!words_from_span()) {
|
if (!words_from_span(bp)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
discardspan();
|
discardspan();
|
||||||
|
|||||||
@ -220,7 +220,7 @@ private:
|
|||||||
bool doemit(bool spanerase, int bp);
|
bool doemit(bool spanerase, int bp);
|
||||||
void discardspan();
|
void discardspan();
|
||||||
bool span_is_acronym(std::string *acronym);
|
bool span_is_acronym(std::string *acronym);
|
||||||
bool words_from_span();
|
bool words_from_span(int bp);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _TEXTSPLIT_H_INCLUDED_ */
|
#endif /* _TEXTSPLIT_H_INCLUDED_ */
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user