diff --git a/src/rcldb/rclabsfromtext.cpp b/src/rcldb/rclabsfromtext.cpp index 9d37e22b..cd1bdfda 100644 --- a/src/rcldb/rclabsfromtext.cpp +++ b/src/rcldb/rclabsfromtext.cpp @@ -261,13 +261,17 @@ public: // always start the search where we previously stopped). auto fragit = m_fragments.begin(); for (const auto& grpmatch : tboffs) { - while (fragit->start > grpmatch.offs.first) { + LOGDEB2("LOOKING FOR FRAGMENT: group: " << grpmatch.offs.first << + "-" << grpmatch.offs.second << " curfrag " << + fragit->start << "-" << fragit->stop << endl); + while (fragit->stop < grpmatch.offs.first) { fragit++; if (fragit == m_fragments.end()) { return; } } - if (fragit->stop >= grpmatch.offs.second) { + if (fragit->start <= grpmatch.offs.first && + fragit->stop >= grpmatch.offs.second) { // grp in frag fragit->coef += 10.0; } diff --git a/src/utils/hldata.cpp b/src/utils/hldata.cpp index ee1d4d0d..e59bde2e 100644 --- a/src/utils/hldata.cpp +++ b/src/utils/hldata.cpp @@ -22,6 +22,7 @@ #include #include "log.h" +#include "smallut.h" using std::string; using std::map; @@ -64,6 +65,13 @@ bool do_proximity_test(int window, vector*>& plists, return false; } +#define DEBUGGROUPS +#ifdef DEBUGGROUPS +#define LOGRP LOGDEB +#else +#define LOGRP LOGDEB1 +#endif + // Find NEAR matches for one group of terms bool matchGroup(const HighlightData& hldata, unsigned int grpidx, @@ -75,7 +83,7 @@ bool matchGroup(const HighlightData& hldata, const vector& terms = hldata.groups[grpidx]; int window = int(hldata.groups[grpidx].size() + hldata.slacks[grpidx]); - LOGDEB1("TextSplitPTR::matchGroup:d " << window << ": " << + LOGRP("TextSplitPTR::matchGroup:d " << window << ": " << stringsToString(terms) << "\n"); // The position lists we are going to work with. We extract them from the @@ -91,7 +99,7 @@ bool matchGroup(const HighlightData& hldata, for (const auto& term : terms) { map >::const_iterator pl = inplists.find(term); if (pl == inplists.end()) { - LOGDEB1("TextSplitPTR::matchGroup: [" << term << + LOGRP("TextSplitPTR::matchGroup: [" << term << "] not found in plists\n"); return false; } @@ -101,7 +109,7 @@ bool matchGroup(const HighlightData& hldata, // I think this can't actually happen, was useful when we used to // prune the groups, but doesn't hurt. if (plists.size() < 2) { - LOGDEB1("TextSplitPTR::matchGroup: no actual groups found\n"); + LOGRP("TextSplitPTR::matchGroup: no actual groups found\n"); return false; } // Sort the positions lists so that the shorter is first @@ -118,7 +126,7 @@ bool matchGroup(const HighlightData& hldata, LOGERR("matchGroup: term for first list not found !?!\n"); return false; } - LOGDEB1("matchGroup: walking the shortest plist. Term [" << + LOGRP("matchGroup: walking the shortest plist. Term [" << it->second << "], len " << plists[0]->size() << "\n"); } @@ -132,7 +140,7 @@ bool matchGroup(const HighlightData& hldata, int sta = INT_MAX, sto = 0; LOGDEB2("MatchGroup: Testing at pos " << pos << "\n"); if (do_proximity_test(window,plists, 1, pos, pos, &sta, &sto, minpos)) { - LOGDEB1("TextSplitPTR::matchGroup: MATCH termpos [" << sta << + LOGRP("TextSplitPTR::matchGroup: MATCH termpos [" << sta << "," << sto << "]\n"); // Maybe extend the window by 1st term position, this was not // done by do_prox.. @@ -151,7 +159,7 @@ bool matchGroup(const HighlightData& hldata, << sto << "\n"); } } else { - LOGDEB1("matchGroup: no group match found at this position\n"); + LOGRP("matchGroup: no group match found at this position\n"); } }