snippets: fix to the group matching code
This commit is contained in:
parent
567401233a
commit
a35de1ef1e
@ -261,13 +261,17 @@ public:
|
|||||||
// always start the search where we previously stopped).
|
// always start the search where we previously stopped).
|
||||||
auto fragit = m_fragments.begin();
|
auto fragit = m_fragments.begin();
|
||||||
for (const auto& grpmatch : tboffs) {
|
for (const auto& grpmatch : tboffs) {
|
||||||
while (fragit->start > grpmatch.offs.first) {
|
LOGDEB2("LOOKING FOR FRAGMENT: group: " << grpmatch.offs.first <<
|
||||||
|
"-" << grpmatch.offs.second << " curfrag " <<
|
||||||
|
fragit->start << "-" << fragit->stop << endl);
|
||||||
|
while (fragit->stop < grpmatch.offs.first) {
|
||||||
fragit++;
|
fragit++;
|
||||||
if (fragit == m_fragments.end()) {
|
if (fragit == m_fragments.end()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (fragit->stop >= grpmatch.offs.second) {
|
if (fragit->start <= grpmatch.offs.first &&
|
||||||
|
fragit->stop >= grpmatch.offs.second) {
|
||||||
// grp in frag
|
// grp in frag
|
||||||
fragit->coef += 10.0;
|
fragit->coef += 10.0;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -22,6 +22,7 @@
|
|||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
|
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
|
#include "smallut.h"
|
||||||
|
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::map;
|
using std::map;
|
||||||
@ -64,6 +65,13 @@ bool do_proximity_test(int window, vector<const vector<int>*>& plists,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define DEBUGGROUPS
|
||||||
|
#ifdef DEBUGGROUPS
|
||||||
|
#define LOGRP LOGDEB
|
||||||
|
#else
|
||||||
|
#define LOGRP LOGDEB1
|
||||||
|
#endif
|
||||||
|
|
||||||
// Find NEAR matches for one group of terms
|
// Find NEAR matches for one group of terms
|
||||||
bool matchGroup(const HighlightData& hldata,
|
bool matchGroup(const HighlightData& hldata,
|
||||||
unsigned int grpidx,
|
unsigned int grpidx,
|
||||||
@ -75,7 +83,7 @@ bool matchGroup(const HighlightData& hldata,
|
|||||||
const vector<string>& terms = hldata.groups[grpidx];
|
const vector<string>& terms = hldata.groups[grpidx];
|
||||||
int window = int(hldata.groups[grpidx].size() + hldata.slacks[grpidx]);
|
int window = int(hldata.groups[grpidx].size() + hldata.slacks[grpidx]);
|
||||||
|
|
||||||
LOGDEB1("TextSplitPTR::matchGroup:d " << window << ": " <<
|
LOGRP("TextSplitPTR::matchGroup:d " << window << ": " <<
|
||||||
stringsToString(terms) << "\n");
|
stringsToString(terms) << "\n");
|
||||||
|
|
||||||
// The position lists we are going to work with. We extract them from the
|
// The position lists we are going to work with. We extract them from the
|
||||||
@ -91,7 +99,7 @@ bool matchGroup(const HighlightData& hldata,
|
|||||||
for (const auto& term : terms) {
|
for (const auto& term : terms) {
|
||||||
map<string, vector<int> >::const_iterator pl = inplists.find(term);
|
map<string, vector<int> >::const_iterator pl = inplists.find(term);
|
||||||
if (pl == inplists.end()) {
|
if (pl == inplists.end()) {
|
||||||
LOGDEB1("TextSplitPTR::matchGroup: [" << term <<
|
LOGRP("TextSplitPTR::matchGroup: [" << term <<
|
||||||
"] not found in plists\n");
|
"] not found in plists\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -101,7 +109,7 @@ bool matchGroup(const HighlightData& hldata,
|
|||||||
// I think this can't actually happen, was useful when we used to
|
// I think this can't actually happen, was useful when we used to
|
||||||
// prune the groups, but doesn't hurt.
|
// prune the groups, but doesn't hurt.
|
||||||
if (plists.size() < 2) {
|
if (plists.size() < 2) {
|
||||||
LOGDEB1("TextSplitPTR::matchGroup: no actual groups found\n");
|
LOGRP("TextSplitPTR::matchGroup: no actual groups found\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Sort the positions lists so that the shorter is first
|
// Sort the positions lists so that the shorter is first
|
||||||
@ -118,7 +126,7 @@ bool matchGroup(const HighlightData& hldata,
|
|||||||
LOGERR("matchGroup: term for first list not found !?!\n");
|
LOGERR("matchGroup: term for first list not found !?!\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
LOGDEB1("matchGroup: walking the shortest plist. Term [" <<
|
LOGRP("matchGroup: walking the shortest plist. Term [" <<
|
||||||
it->second << "], len " << plists[0]->size() << "\n");
|
it->second << "], len " << plists[0]->size() << "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -132,7 +140,7 @@ bool matchGroup(const HighlightData& hldata,
|
|||||||
int sta = INT_MAX, sto = 0;
|
int sta = INT_MAX, sto = 0;
|
||||||
LOGDEB2("MatchGroup: Testing at pos " << pos << "\n");
|
LOGDEB2("MatchGroup: Testing at pos " << pos << "\n");
|
||||||
if (do_proximity_test(window,plists, 1, pos, pos, &sta, &sto, minpos)) {
|
if (do_proximity_test(window,plists, 1, pos, pos, &sta, &sto, minpos)) {
|
||||||
LOGDEB1("TextSplitPTR::matchGroup: MATCH termpos [" << sta <<
|
LOGRP("TextSplitPTR::matchGroup: MATCH termpos [" << sta <<
|
||||||
"," << sto << "]\n");
|
"," << sto << "]\n");
|
||||||
// Maybe extend the window by 1st term position, this was not
|
// Maybe extend the window by 1st term position, this was not
|
||||||
// done by do_prox..
|
// done by do_prox..
|
||||||
@ -151,7 +159,7 @@ bool matchGroup(const HighlightData& hldata,
|
|||||||
<< sto << "\n");
|
<< sto << "\n");
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOGDEB1("matchGroup: no group match found at this position\n");
|
LOGRP("matchGroup: no group match found at this position\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user