snippets: fix to the group matching code

This commit is contained in:
Jean-Francois Dockes 2018-01-03 15:53:04 +01:00
parent 567401233a
commit a35de1ef1e
2 changed files with 20 additions and 8 deletions

View File

@ -261,13 +261,17 @@ public:
// always start the search where we previously stopped). // always start the search where we previously stopped).
auto fragit = m_fragments.begin(); auto fragit = m_fragments.begin();
for (const auto& grpmatch : tboffs) { for (const auto& grpmatch : tboffs) {
while (fragit->start > grpmatch.offs.first) { LOGDEB2("LOOKING FOR FRAGMENT: group: " << grpmatch.offs.first <<
"-" << grpmatch.offs.second << " curfrag " <<
fragit->start << "-" << fragit->stop << endl);
while (fragit->stop < grpmatch.offs.first) {
fragit++; fragit++;
if (fragit == m_fragments.end()) { if (fragit == m_fragments.end()) {
return; return;
} }
} }
if (fragit->stop >= grpmatch.offs.second) { if (fragit->start <= grpmatch.offs.first &&
fragit->stop >= grpmatch.offs.second) {
// grp in frag // grp in frag
fragit->coef += 10.0; fragit->coef += 10.0;
} }

View File

@ -22,6 +22,7 @@
#include <limits.h> #include <limits.h>
#include "log.h" #include "log.h"
#include "smallut.h"
using std::string; using std::string;
using std::map; using std::map;
@ -64,6 +65,13 @@ bool do_proximity_test(int window, vector<const vector<int>*>& plists,
return false; return false;
} }
#define DEBUGGROUPS
#ifdef DEBUGGROUPS
#define LOGRP LOGDEB
#else
#define LOGRP LOGDEB1
#endif
// Find NEAR matches for one group of terms // Find NEAR matches for one group of terms
bool matchGroup(const HighlightData& hldata, bool matchGroup(const HighlightData& hldata,
unsigned int grpidx, unsigned int grpidx,
@ -75,7 +83,7 @@ bool matchGroup(const HighlightData& hldata,
const vector<string>& terms = hldata.groups[grpidx]; const vector<string>& terms = hldata.groups[grpidx];
int window = int(hldata.groups[grpidx].size() + hldata.slacks[grpidx]); int window = int(hldata.groups[grpidx].size() + hldata.slacks[grpidx]);
LOGDEB1("TextSplitPTR::matchGroup:d " << window << ": " << LOGRP("TextSplitPTR::matchGroup:d " << window << ": " <<
stringsToString(terms) << "\n"); stringsToString(terms) << "\n");
// The position lists we are going to work with. We extract them from the // The position lists we are going to work with. We extract them from the
@ -91,7 +99,7 @@ bool matchGroup(const HighlightData& hldata,
for (const auto& term : terms) { for (const auto& term : terms) {
map<string, vector<int> >::const_iterator pl = inplists.find(term); map<string, vector<int> >::const_iterator pl = inplists.find(term);
if (pl == inplists.end()) { if (pl == inplists.end()) {
LOGDEB1("TextSplitPTR::matchGroup: [" << term << LOGRP("TextSplitPTR::matchGroup: [" << term <<
"] not found in plists\n"); "] not found in plists\n");
return false; return false;
} }
@ -101,7 +109,7 @@ bool matchGroup(const HighlightData& hldata,
// I think this can't actually happen, was useful when we used to // I think this can't actually happen, was useful when we used to
// prune the groups, but doesn't hurt. // prune the groups, but doesn't hurt.
if (plists.size() < 2) { if (plists.size() < 2) {
LOGDEB1("TextSplitPTR::matchGroup: no actual groups found\n"); LOGRP("TextSplitPTR::matchGroup: no actual groups found\n");
return false; return false;
} }
// Sort the positions lists so that the shorter is first // Sort the positions lists so that the shorter is first
@ -118,7 +126,7 @@ bool matchGroup(const HighlightData& hldata,
LOGERR("matchGroup: term for first list not found !?!\n"); LOGERR("matchGroup: term for first list not found !?!\n");
return false; return false;
} }
LOGDEB1("matchGroup: walking the shortest plist. Term [" << LOGRP("matchGroup: walking the shortest plist. Term [" <<
it->second << "], len " << plists[0]->size() << "\n"); it->second << "], len " << plists[0]->size() << "\n");
} }
@ -132,7 +140,7 @@ bool matchGroup(const HighlightData& hldata,
int sta = INT_MAX, sto = 0; int sta = INT_MAX, sto = 0;
LOGDEB2("MatchGroup: Testing at pos " << pos << "\n"); LOGDEB2("MatchGroup: Testing at pos " << pos << "\n");
if (do_proximity_test(window,plists, 1, pos, pos, &sta, &sto, minpos)) { if (do_proximity_test(window,plists, 1, pos, pos, &sta, &sto, minpos)) {
LOGDEB1("TextSplitPTR::matchGroup: MATCH termpos [" << sta << LOGRP("TextSplitPTR::matchGroup: MATCH termpos [" << sta <<
"," << sto << "]\n"); "," << sto << "]\n");
// Maybe extend the window by 1st term position, this was not // Maybe extend the window by 1st term position, this was not
// done by do_prox.. // done by do_prox..
@ -151,7 +159,7 @@ bool matchGroup(const HighlightData& hldata,
<< sto << "\n"); << sto << "\n");
} }
} else { } else {
LOGDEB1("matchGroup: no group match found at this position\n"); LOGRP("matchGroup: no group match found at this position\n");
} }
} }