abstract: ensure snippet does not extend below baseTextPosition, this prevents retrieving page numbers.

This commit is contained in:
Jean-Francois Dockes 2013-02-26 10:40:47 +01:00
parent a7b0c0c476
commit 3c8c8982e8

View File

@ -330,18 +330,15 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
} }
listList("Match terms: ", matchedTerms); listList("Match terms: ", matchedTerms);
// Retrieve the term freqencies for the query terms. This is // Retrieve the term frequencies for the query terms. This is
// actually computed only once for a query, and for all terms in // actually computed only once for a query, and for all terms in
// the query (not only the matches for this doc) // the query (not only the matches for this doc)
setDbWideQTermsFreqs(); setDbWideQTermsFreqs();
// Build a sorted by quality container for the match terms We are // Build a sorted by quality container for the match terms We are
// going to try and show text around the less common search terms. // going to try and show text around the less common search terms.
// TOBEDONE: terms issued from an original one by stem expansion // Terms issued from an original one by stem expansion are
// should be somehow aggregated here, else, it may happen that // aggregated by the qualityTerms() routine.
// such a group prevents displaying matches for other terms (by
// removing its meaning from the maximum occurrences per term test
// used while walking the list below)
multimap<double, vector<string> > byQ; multimap<double, vector<string> > byQ;
double totalweight = qualityTerms(docid, matchedTerms, byQ); double totalweight = qualityTerms(docid, matchedTerms, byQ);
LOGABS(("makeAbstract:%d: computed Qcoefs.\n", chron.ms())); LOGABS(("makeAbstract:%d: computed Qcoefs.\n", chron.ms()));
@ -430,8 +427,8 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
int ipos = *pos; int ipos = *pos;
if (ipos < int(baseTextPosition)) // Not in text body if (ipos < int(baseTextPosition)) // Not in text body
continue; continue;
LOGABS(("makeAbstract: [%s] at pos %d grpoccs %d maxgrpoccs %d\n", LOGABS(("makeAbstract: [%s] at pos %d grpoccs %d maxgrpoccs"
qterm.c_str(), ipos, grpoccs, maxgrpoccs)); " %d\n", qterm.c_str(), ipos, grpoccs, maxgrpoccs));
totaloccs++; totaloccs++;
grpoccs++; grpoccs++;
@ -440,7 +437,8 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
// step by inserting empty strings. Special provisions // step by inserting empty strings. Special provisions
// for adding ellipsis and for positions overlapped by // for adding ellipsis and for positions overlapped by
// the match term. // the match term.
unsigned int sta = MAX(0, ipos - ctxwords); unsigned int sta = MAX(int(baseTextPosition),
ipos - ctxwords);
unsigned int sto = ipos + qtrmwrdcnt-1 + unsigned int sto = ipos + qtrmwrdcnt-1 +
m_q->m_db->getAbsCtxLen(); m_q->m_db->getAbsCtxLen();
for (unsigned int ii = sta; ii <= sto; ii++) { for (unsigned int ii = sta; ii <= sto; ii++) {
@ -455,7 +453,7 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
} else if (!sparseDoc[ii].compare(cstr_ellipsis)) { } else if (!sparseDoc[ii].compare(cstr_ellipsis)) {
// For an empty slot, the test has a side // For an empty slot, the test has a side
// effect of inserting an empty string which // effect of inserting an empty string which
// is what we want // is what we want.
sparseDoc[ii] = emptys; sparseDoc[ii] = emptys;
} }
} }
@ -526,7 +524,7 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) { if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) {
ret = ABSRES_TERMMISS; ret = ABSRES_TERMMISS;
LOGDEB0(("makeAbstract: max term count cutoff %d\n", LOGDEB0(("makeAbstract: max term count cutoff %d\n",
m_q->m_snipMaxPosWalk)); m_q->m_snipMaxPosWalk));
break; break;
} }
// If we are beyond the max possible position, stop // If we are beyond the max possible position, stop
@ -580,9 +578,11 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
string term; string term;
for (map<unsigned int, string>::const_iterator it = sparseDoc.begin(); for (map<unsigned int, string>::const_iterator it = sparseDoc.begin();
it != sparseDoc.end(); it++) { it != sparseDoc.end(); it++) {
LOGDEB2(("Abtract:output %u -> [%s]\n", it->first,it->second.c_str())); LOGDEB2(("Abtract:output %u -> [%s]\n", it->first, it->second.c_str()));
if (!occupiedmarker.compare(it->second)) if (!occupiedmarker.compare(it->second)) {
LOGDEB(("Abstract: qtrm position not filled ??\n"));
continue; continue;
}
if (chunk.empty() && !vpbreaks.empty()) { if (chunk.empty() && !vpbreaks.empty()) {
page = ndb->getPageNumberForPosition(vpbreaks, it->first); page = ndb->getPageNumberForPosition(vpbreaks, it->first);
if (page < 0) if (page < 0)