Snippet generation: limit positions walk to max hit position. Return status code when truncated walk possibly generated incomplete snippets. Implement config variabl for max pos walk
This commit is contained in:
parent
33ee759701
commit
a16d047f8d
@ -342,6 +342,10 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
|
|||||||
// them with their snippets.
|
// them with their snippets.
|
||||||
unordered_set<unsigned int> searchTermPositions;
|
unordered_set<unsigned int> searchTermPositions;
|
||||||
|
|
||||||
|
// Remember max position. Used to stop walking positions lists while
|
||||||
|
// populating the adjacent slots.
|
||||||
|
unsigned int maxpos = 0;
|
||||||
|
|
||||||
// Total number of occurences for all terms. We stop when we have too much
|
// Total number of occurences for all terms. We stop when we have too much
|
||||||
unsigned int totaloccs = 0;
|
unsigned int totaloccs = 0;
|
||||||
|
|
||||||
@ -419,6 +423,8 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
|
|||||||
if (ii == (unsigned int)ipos) {
|
if (ii == (unsigned int)ipos) {
|
||||||
sparseDoc[ii] = qterm;
|
sparseDoc[ii] = qterm;
|
||||||
searchTermPositions.insert(ii);
|
searchTermPositions.insert(ii);
|
||||||
|
if (ii > maxpos)
|
||||||
|
maxpos = ii;
|
||||||
} else if (ii > (unsigned int)ipos &&
|
} else if (ii > (unsigned int)ipos &&
|
||||||
ii < (unsigned int)ipos + qtrmwrdcnt) {
|
ii < (unsigned int)ipos + qtrmwrdcnt) {
|
||||||
sparseDoc[ii] = occupiedmarker;
|
sparseDoc[ii] = occupiedmarker;
|
||||||
@ -460,6 +466,7 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
|
|||||||
}
|
}
|
||||||
LOGABS(("makeAbstract:%d:chosen number of positions %d\n",
|
LOGABS(("makeAbstract:%d:chosen number of positions %d\n",
|
||||||
chron.millis(), totaloccs));
|
chron.millis(), totaloccs));
|
||||||
|
maxpos += ctxwords + 1;
|
||||||
|
|
||||||
// This can happen if there are term occurences in the keywords
|
// This can happen if there are term occurences in the keywords
|
||||||
// etc. but not elsewhere ?
|
// etc. but not elsewhere ?
|
||||||
@ -475,28 +482,34 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
|
|||||||
// which is bad.
|
// which is bad.
|
||||||
{
|
{
|
||||||
Xapian::TermIterator term;
|
Xapian::TermIterator term;
|
||||||
int cutoff = 500 * 1000;
|
int cutoff = m_q->m_snipMaxPosWalk;
|
||||||
|
|
||||||
for (term = xrdb.termlist_begin(docid);
|
for (term = xrdb.termlist_begin(docid);
|
||||||
term != xrdb.termlist_end(docid); term++) {
|
term != xrdb.termlist_end(docid); term++) {
|
||||||
// Ignore prefixed terms
|
// Ignore prefixed terms
|
||||||
if (has_prefix(*term))
|
if (has_prefix(*term))
|
||||||
continue;
|
continue;
|
||||||
if (cutoff-- < 0) {
|
if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) {
|
||||||
ret = ABSRES_TRUNC;
|
ret = ABSRES_TERMMISS;
|
||||||
LOGDEB0(("makeAbstract: max term count cutoff\n"));
|
LOGDEB0(("makeAbstract: max term count cutoff %d\n",
|
||||||
|
m_q->m_snipMaxPosWalk));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
map<unsigned int, string>::iterator vit;
|
||||||
Xapian::PositionIterator pos;
|
Xapian::PositionIterator pos;
|
||||||
for (pos = xrdb.positionlist_begin(docid, *term);
|
for (pos = xrdb.positionlist_begin(docid, *term);
|
||||||
pos != xrdb.positionlist_end(docid, *term); pos++) {
|
pos != xrdb.positionlist_end(docid, *term); pos++) {
|
||||||
if (cutoff-- < 0) {
|
if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) {
|
||||||
ret = ABSRES_TRUNC;
|
ret = ABSRES_TERMMISS;
|
||||||
LOGDEB0(("makeAbstract: max term count cutoff\n"));
|
LOGDEB0(("makeAbstract: max term count cutoff %d\n",
|
||||||
|
m_q->m_snipMaxPosWalk));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// If we are beyond the max possible position, stop
|
||||||
|
// for this term
|
||||||
|
if (*pos > maxpos) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
map<unsigned int, string>::iterator vit;
|
|
||||||
if ((vit = sparseDoc.find(*pos)) != sparseDoc.end()) {
|
if ((vit = sparseDoc.find(*pos)) != sparseDoc.end()) {
|
||||||
// Don't replace a term: the terms list is in
|
// Don't replace a term: the terms list is in
|
||||||
// alphabetic order, and we may have several terms
|
// alphabetic order, and we may have several terms
|
||||||
|
|||||||
@ -1618,7 +1618,7 @@ bool Db::termMatch(MatchType typ, const string &lang,
|
|||||||
case 0: is = prefix; break;
|
case 0: is = prefix; break;
|
||||||
default: is = prefix + droot.substr(0, es); break;
|
default: is = prefix + droot.substr(0, es); break;
|
||||||
}
|
}
|
||||||
LOGDEB(("termMatch: initsec: [%s]\n", is.c_str()));
|
LOGDEB1(("termMatch: initsec: [%s]\n", is.c_str()));
|
||||||
|
|
||||||
for (int tries = 0; tries < 2; tries++) {
|
for (int tries = 0; tries < 2; tries++) {
|
||||||
try {
|
try {
|
||||||
|
|||||||
@ -141,8 +141,10 @@ private:
|
|||||||
|
|
||||||
Query::Query(Db *db)
|
Query::Query(Db *db)
|
||||||
: m_nq(new Native(this)), m_db(db), m_sorter(0), m_sortAscending(true),
|
: m_nq(new Native(this)), m_db(db), m_sorter(0), m_sortAscending(true),
|
||||||
m_collapseDuplicates(false), m_resCnt(-1)
|
m_collapseDuplicates(false), m_resCnt(-1), m_snipMaxPosWalk(1000000)
|
||||||
{
|
{
|
||||||
|
if (db)
|
||||||
|
db->getConf()->getConfParam("snippetMaxPosWalk", &m_snipMaxPosWalk);
|
||||||
}
|
}
|
||||||
|
|
||||||
Query::~Query()
|
Query::~Query()
|
||||||
|
|||||||
@ -32,7 +32,8 @@ class Doc;
|
|||||||
enum abstract_result {
|
enum abstract_result {
|
||||||
ABSRES_ERROR = 0,
|
ABSRES_ERROR = 0,
|
||||||
ABSRES_OK = 1,
|
ABSRES_OK = 1,
|
||||||
ABSRES_TRUNC = 2
|
ABSRES_TRUNC = 2,
|
||||||
|
ABSRES_TERMMISS = 3
|
||||||
};
|
};
|
||||||
|
|
||||||
// Snippet entry for makeDocAbstract
|
// Snippet entry for makeDocAbstract
|
||||||
@ -126,6 +127,7 @@ private:
|
|||||||
bool m_collapseDuplicates;
|
bool m_collapseDuplicates;
|
||||||
int m_resCnt;
|
int m_resCnt;
|
||||||
RefCntr<SearchData> m_sd;
|
RefCntr<SearchData> m_sd;
|
||||||
|
int m_snipMaxPosWalk;
|
||||||
|
|
||||||
/* Copyconst and assignement private and forbidden */
|
/* Copyconst and assignement private and forbidden */
|
||||||
Query(const Query &) {}
|
Query(const Query &) {}
|
||||||
|
|||||||
@ -598,8 +598,8 @@ public:
|
|||||||
if (m_ts->lastpos < pos)
|
if (m_ts->lastpos < pos)
|
||||||
m_ts->lastpos = pos;
|
m_ts->lastpos = pos;
|
||||||
bool noexpand = be ? m_ts->curnostemexp : true;
|
bool noexpand = be ? m_ts->curnostemexp : true;
|
||||||
LOGDEB(("TermProcQ::takeword: pushing [%s] pos %d noexp %d\n",
|
LOGDEB1(("TermProcQ::takeword: pushing [%s] pos %d noexp %d\n",
|
||||||
term.c_str(), pos, noexpand));
|
term.c_str(), pos, noexpand));
|
||||||
if (m_terms[pos].size() < term.size()) {
|
if (m_terms[pos].size() < term.size()) {
|
||||||
m_terms[pos] = term;
|
m_terms[pos] = term;
|
||||||
m_nste[pos] = noexpand;
|
m_nste[pos] = noexpand;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user