highlighter for groups: do not perform the proximity test if the input can't match because one of the terms is not present. Will remove a lot of spurious highlighting

This commit is contained in:
Jean-Francois Dockes 2021-11-13 12:33:51 +01:00
parent 25e3f39f81
commit 996a8fd5b8

View File

@ -183,35 +183,34 @@ bool matchGroup(const HighlightData& hldata,
for (const auto& entry:inplists) { for (const auto& entry:inplists) {
allplterms += entry.first + " "; allplterms += entry.first + " ";
} }
LOGRP("matchGroup: isphrase " << isphrase << LOGRP("matchGroup: isphrase " << isphrase << ". Have plists for [" << allplterms << "]\n");
". Have plists for [" << allplterms << "]\n"); //LOGRP("matchGroup: hldata: " << hldata.toString() << std::endl);
LOGRP("matchGroup: hldata: " << hldata.toString() << std::endl);
int window = int(tg.orgroups.size() + tg.slack); int window = int(tg.orgroups.size() + tg.slack);
// The position lists we are going to work with. We extract them from the // The position lists we are going to work with. We extract them from the
// (string->plist) map // (string->plist) map
vector<OrPList> orplists; vector<OrPList> orplists;
// Find the position list for each term in the group and build the // Find the position list for each term in the group and build the combined lists for the term
// combined lists for the term or groups (each group is the result // or groups (each group is the result of the exansion of one user term). It is possible that
// of the exansion of one user term). It is possible that this // this particular group was not actually matched by the search, so that some terms are not
// particular group was not actually matched by the search, so // found, in which case we bail out.
// that some terms are not found, in which case we bail out.
for (const auto& group : tg.orgroups) { for (const auto& group : tg.orgroups) {
orplists.push_back(OrPList()); orplists.push_back(OrPList());
for (const auto& term : group) { for (const auto& term : group) {
const auto pl = inplists.find(term); const auto pl = inplists.find(term);
if (pl == inplists.end()) { if (pl == inplists.end()) {
LOGRP("TextSplitPTR::matchGroup: term [" << term << LOGRP("TextSplitPTR::matchGroup: term [" << term << "] not found in plists\n");
"] not found in plists\n");
continue; continue;
} }
orplists.back().addplist(pl->first, &(pl->second)); orplists.back().addplist(pl->first, &(pl->second));
} }
if (orplists.back().plists.empty()) { if (orplists.back().plists.empty()) {
LOGRP("No positions list found for group " << LOGRP("No positions list found for OR group [" << stringsToString(group) <<
stringsToString(group) << std::endl); "] : input has no group match, returning false\n");
orplists.pop_back(); return false;
} else {
LOGRP("Created OrPList has " << orplists.back().plists.size() << " members\n");
} }
} }
@ -256,8 +255,7 @@ bool matchGroup(const HighlightData& hldata,
tboffs.push_back(GroupMatchEntry(i1->second.first, tboffs.push_back(GroupMatchEntry(i1->second.first,
i2->second.second, grpidx)); i2->second.second, grpidx));
} else { } else {
LOGDEB0("matchGroup: no bpos found for " << sta << " or " LOGDEB0("matchGroup: no bpos found for " << sta << " or " << sto << "\n");
<< sto << "\n");
} }
} else { } else {
LOGRP("matchGroup: no group match found at this position\n"); LOGRP("matchGroup: no group match found at this position\n");
@ -267,6 +265,12 @@ bool matchGroup(const HighlightData& hldata,
return !tboffs.empty(); return !tboffs.empty();
} }
vector<CharFlags> kindflags {
CHARFLAGENTRY(HighlightData::TermGroup::TGK_TERM),
CHARFLAGENTRY(HighlightData::TermGroup::TGK_NEAR),
CHARFLAGENTRY(HighlightData::TermGroup::TGK_PHRASE),
};
string HighlightData::toString() const string HighlightData::toString() const
{ {
string out; string out;
@ -306,8 +310,8 @@ string HighlightData::toString() const
} }
out.append("}"); out.append("}");
} }
sprintf(cbuf, "%d", tg.slack); out.append("} ");
out.append("}").append(cbuf); out.append(valToString(kindflags, tg.kind)).append("-").append(lltodecstr(tg.slack));
} }
} }
out.append("\n"); out.append("\n");