snippets: fix to the group matching code

This commit is contained in:
Jean-Francois Dockes 2018-01-03 15:53:04 +01:00
parent 567401233a
commit a35de1ef1e
2 changed files with 20 additions and 8 deletions

View File

@ -261,13 +261,17 @@ public:
// always start the search where we previously stopped).
auto fragit = m_fragments.begin();
for (const auto& grpmatch : tboffs) {
while (fragit->start > grpmatch.offs.first) {
LOGDEB2("LOOKING FOR FRAGMENT: group: " << grpmatch.offs.first <<
"-" << grpmatch.offs.second << " curfrag " <<
fragit->start << "-" << fragit->stop << endl);
while (fragit->stop < grpmatch.offs.first) {
fragit++;
if (fragit == m_fragments.end()) {
return;
}
}
if (fragit->stop >= grpmatch.offs.second) {
if (fragit->start <= grpmatch.offs.first &&
fragit->stop >= grpmatch.offs.second) {
// grp in frag
fragit->coef += 10.0;
}

View File

@ -22,6 +22,7 @@
#include <limits.h>
#include "log.h"
#include "smallut.h"
using std::string;
using std::map;
@ -64,6 +65,13 @@ bool do_proximity_test(int window, vector<const vector<int>*>& plists,
return false;
}
#define DEBUGGROUPS
#ifdef DEBUGGROUPS
#define LOGRP LOGDEB
#else
#define LOGRP LOGDEB1
#endif
// Find NEAR matches for one group of terms
bool matchGroup(const HighlightData& hldata,
unsigned int grpidx,
@ -75,7 +83,7 @@ bool matchGroup(const HighlightData& hldata,
const vector<string>& terms = hldata.groups[grpidx];
int window = int(hldata.groups[grpidx].size() + hldata.slacks[grpidx]);
LOGDEB1("TextSplitPTR::matchGroup:d " << window << ": " <<
LOGRP("TextSplitPTR::matchGroup:d " << window << ": " <<
stringsToString(terms) << "\n");
// The position lists we are going to work with. We extract them from the
@ -91,7 +99,7 @@ bool matchGroup(const HighlightData& hldata,
for (const auto& term : terms) {
map<string, vector<int> >::const_iterator pl = inplists.find(term);
if (pl == inplists.end()) {
LOGDEB1("TextSplitPTR::matchGroup: [" << term <<
LOGRP("TextSplitPTR::matchGroup: [" << term <<
"] not found in plists\n");
return false;
}
@ -101,7 +109,7 @@ bool matchGroup(const HighlightData& hldata,
// I think this can't actually happen, was useful when we used to
// prune the groups, but doesn't hurt.
if (plists.size() < 2) {
LOGDEB1("TextSplitPTR::matchGroup: no actual groups found\n");
LOGRP("TextSplitPTR::matchGroup: no actual groups found\n");
return false;
}
// Sort the positions lists so that the shorter is first
@ -118,7 +126,7 @@ bool matchGroup(const HighlightData& hldata,
LOGERR("matchGroup: term for first list not found !?!\n");
return false;
}
LOGDEB1("matchGroup: walking the shortest plist. Term [" <<
LOGRP("matchGroup: walking the shortest plist. Term [" <<
it->second << "], len " << plists[0]->size() << "\n");
}
@ -132,7 +140,7 @@ bool matchGroup(const HighlightData& hldata,
int sta = INT_MAX, sto = 0;
LOGDEB2("MatchGroup: Testing at pos " << pos << "\n");
if (do_proximity_test(window,plists, 1, pos, pos, &sta, &sto, minpos)) {
LOGDEB1("TextSplitPTR::matchGroup: MATCH termpos [" << sta <<
LOGRP("TextSplitPTR::matchGroup: MATCH termpos [" << sta <<
"," << sto << "]\n");
// Maybe extend the window by 1st term position, this was not
// done by do_prox..
@ -151,7 +159,7 @@ bool matchGroup(const HighlightData& hldata,
<< sto << "\n");
}
} else {
LOGDEB1("matchGroup: no group match found at this position\n");
LOGRP("matchGroup: no group match found at this position\n");
}
}