Snippets generation: we did not store a possible last incomplete snippet at the end of the text

This commit is contained in:
Jean-Francois Dockes 2021-04-13 10:43:10 +02:00
parent ebcef6a1ff
commit fc0a48a524

View File

@ -36,7 +36,7 @@
using namespace std; using namespace std;
// #define DEBUGABSTRACT #undef DEBUGABSTRACT
#ifdef DEBUGABSTRACT #ifdef DEBUGABSTRACT
#define LOGABS LOGDEB #define LOGABS LOGDEB
#else #else
@ -221,8 +221,7 @@ public:
// Term group (phrase/near) handling // Term group (phrase/near) handling
m_plists[dumb].push_back(pos); m_plists[dumb].push_back(pos);
m_gpostobytes[pos] = pair<int,int>(bts, bte); m_gpostobytes[pos] = pair<int,int>(bts, bte);
LOGDEB1("Recorded bpos for pos " << pos << ": " << bts << " " << LOGDEB1("Recorded bpos for pos " << pos << ": " << bts << " " << bte << "\n");
bte << "\n");
} }
} }
#ifdef COMPUTE_HLZONES #ifdef COMPUTE_HLZONES
@ -278,6 +277,20 @@ public:
// After the text is split: use the group terms positions lists to // After the text is split: use the group terms positions lists to
// find the group matches. // find the group matches.
void updgroups() { void updgroups() {
// Possibly store current incomplete fragment (if match was
// close to the end of the text, so we did not close it):
if (m_curtermcoef != 0.0) {
m_fragments.push_back(
MatchFragment(m_curfrag.first, m_curfrag.second, m_curfragcoef,
#ifdef COMPUTE_HLZONES
m_curhlzones,
#endif
m_curhitpos, m_curterm));
m_totalcoef += m_curfragcoef;
m_curfragcoef = 0.0;
m_curtermcoef = 0.0;
}
LOGDEB("TextSplitABS: stored total " << m_fragments.size() << LOGDEB("TextSplitABS: stored total " << m_fragments.size() <<
" fragments" << endl); " fragments" << endl);
vector<GroupMatchEntry> tboffs; vector<GroupMatchEntry> tboffs;