improved detection of incomplete snippets lists

This commit is contained in:
Jean-Francois Dockes 2013-02-26 15:49:36 +01:00
parent 87120798c2
commit 3abfd00635
6 changed files with 47 additions and 25 deletions

View File

@ -136,19 +136,26 @@ void SnippetsW::init()
"<html><head>"
"<meta http-equiv=\"content-type\" "
"content=\"text/html; charset=utf-8\"></head>"
"<body style='overflow-x: scroll; white-space: nowrap'>"
"<table>"
"<body>"
"<table style='overflow-x: scroll; white-space: nowrap'>"
;
g_hiliter.set_inputhtml(false);
bool nomatch = true;
for (vector<Rcl::Snippet>::const_iterator it = vpabs.begin();
it != vpabs.end(); it++) {
if (it->page == -1) {
oss << "<tr><td colspan=\"2\">" <<
it->snippet << "</td></tr>" << endl;
continue;
}
list<string> lr;
if (!g_hiliter.plaintorich(it->snippet, lr, hdata)) {
LOGDEB1(("No match for [%s]\n", it->snippet.c_str()));
continue;
}
nomatch = false;
oss << "<tr><td>";
if (it->page > 0) {
oss << "<a href=\"P" << it->page << "T" << it->term << "\">"
@ -156,6 +163,13 @@ void SnippetsW::init()
}
oss << "</td><td>" << lr.front().c_str() << "</td></tr>" << endl;
}
oss << "</table>" << endl;
if (nomatch) {
oss.str("<html><head></head><body>");
oss << "<p>Sorry, no exact match was found within limits. "
"Probably the document is very big "
"and the snippets generator got lost in a maze...</p>" << endl;
}
oss << "</body></html>";
#ifdef SNIPPETS_WEBKIT
browser->setHtml(QString::fromUtf8(oss.str().c_str()));

View File

@ -69,6 +69,7 @@ int DocSequenceDb::getResCnt()
}
return m_rescnt;
}
static const string cstr_mre("[...]");
// This one only gets called to fill-up the snippets window
@ -81,18 +82,22 @@ bool DocSequenceDb::getAbstract(Rcl::Doc &doc, vector<Rcl::Snippet>& vpabs)
// Have to put the limit somewhere.
int maxoccs = 1000;
Rcl::abstract_result ret = Rcl::ABSRES_ERROR;
int ret = Rcl::ABSRES_ERROR;
if (m_q->whatDb()) {
ret = m_q->makeDocAbstract(doc, vpabs, maxoccs,
m_q->whatDb()->getAbsCtxLen()+ 2);
}
if (vpabs.empty())
vpabs.push_back(Rcl::Snippet(0, doc.meta[Rcl::Doc::keyabs]));
LOGDEB(("DocSequenceDb::getAbstract: got ret %d vpabs len %u\n", ret,
vpabs.size()));
if (vpabs.empty()) {
return true;
}
// If the list was probably truncated, indicate it.
if (ret == Rcl::ABSRES_TRUNC) {
if (ret | Rcl::ABSRES_TRUNC) {
vpabs.push_back(Rcl::Snippet(-1, cstr_mre));
} else if (ret == Rcl::ABSRES_TERMMISS) {
}
if (ret | Rcl::ABSRES_TERMMISS) {
vpabs.insert(vpabs.begin(),
Rcl::Snippet(-1, "(Words missing in snippets)"));
}

View File

@ -309,9 +309,9 @@ int Query::Native::getFirstMatchPage(Xapian::docid docid, string& term)
//
// DatabaseModified and other general exceptions are catched and
// possibly retried by our caller
abstract_result Query::Native::makeAbstract(Xapian::docid docid,
vector<Snippet>& vabs,
int imaxoccs, int ictxwords)
int Query::Native::makeAbstract(Xapian::docid docid,
vector<Snippet>& vabs,
int imaxoccs, int ictxwords)
{
Chrono chron;
LOGABS(("makeAbstract: docid %ld imaxoccs %d ictxwords %d\n",
@ -381,7 +381,7 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
LOGABS(("makeAbstract:%d: mxttloccs %d ctxwords %d\n",
chron.ms(), maxtotaloccs, ctxwords));
abstract_result ret = ABSRES_OK;
int ret = ABSRES_OK;
// Let's go populate
for (map<double, vector<string> >::reverse_iterator mit = byQ.rbegin();
@ -466,11 +466,14 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
}
// Group done ?
if (grpoccs >= maxgrpoccs)
if (grpoccs >= maxgrpoccs) {
ret |= ABSRES_TRUNC;
LOGABS(("Db::makeAbstract: max group occs cutoff\n"));
break;
}
// Global done ?
if (totaloccs >= maxtotaloccs) {
ret = ABSRES_TRUNC;
ret |= ABSRES_TRUNC;
LOGABS(("Db::makeAbstract: max occurrences cutoff\n"));
break;
}
@ -480,7 +483,7 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
}
if (totaloccs >= maxtotaloccs) {
ret = ABSRES_TRUNC;
ret |= ABSRES_TRUNC;
LOGABS(("Db::makeAbstract: max1 occurrences cutoff\n"));
break;
}
@ -511,7 +514,7 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
if (has_prefix(*term))
continue;
if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) {
ret = ABSRES_TERMMISS;
ret |= ABSRES_TERMMISS;
LOGDEB0(("makeAbstract: max term count cutoff %d\n",
m_q->m_snipMaxPosWalk));
break;
@ -522,7 +525,7 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
for (pos = xrdb.positionlist_begin(docid, *term);
pos != xrdb.positionlist_end(docid, *term); pos++) {
if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) {
ret = ABSRES_TERMMISS;
ret |= ABSRES_TERMMISS;
LOGDEB0(("makeAbstract: max term count cutoff %d\n",
m_q->m_snipMaxPosWalk));
break;

View File

@ -264,16 +264,16 @@ bool Query::getQueryTerms(vector<string>& terms)
return true;
}
abstract_result Query::makeDocAbstract(Doc &doc,
vector<Snippet>& abstract,
int maxoccs, int ctxwords)
int Query::makeDocAbstract(Doc &doc,
vector<Snippet>& abstract,
int maxoccs, int ctxwords)
{
LOGDEB(("makeDocAbstract: maxoccs %d ctxwords %d\n", maxoccs, ctxwords));
if (!m_db || !m_db->m_ndb || !m_db->m_ndb->m_isopen || !m_nq) {
LOGERR(("Query::makeDocAbstract: no db or no nq\n"));
return ABSRES_ERROR;
}
abstract_result ret = ABSRES_ERROR;
int ret = ABSRES_ERROR;
XAPTRY(ret = m_nq->makeAbstract(doc.xdocid, abstract, maxoccs, ctxwords),
m_db->m_ndb->xrdb, m_reason);
if (!m_reason.empty()) {

View File

@ -33,7 +33,7 @@ enum abstract_result {
ABSRES_ERROR = 0,
ABSRES_OK = 1,
ABSRES_TRUNC = 2,
ABSRES_TERMMISS = 3
ABSRES_TERMMISS = 4
};
// Snippet entry for makeDocAbstract
@ -110,8 +110,8 @@ class Query {
// Returned as a snippets vector
bool makeDocAbstract(Doc &doc, std::vector<std::string>& abstract);
// Returned as a vector of pair<page,snippet> page is 0 if unknown
abstract_result makeDocAbstract(Doc &doc, std::vector<Snippet>& abst,
int maxoccs= -1, int ctxwords = -1);
int makeDocAbstract(Doc &doc, std::vector<Snippet>& abst,
int maxoccs= -1, int ctxwords = -1);
/** Retrieve page number for first match for term */
int getFirstMatchPage(Doc &doc, std::string& term);

View File

@ -59,8 +59,8 @@ public:
}
/** Return a list of terms which matched for a specific result document */
bool getMatchTerms(unsigned long xdocid, std::vector<std::string>& terms);
abstract_result makeAbstract(Xapian::docid id, vector<Snippet>&,
int maxoccs = -1, int ctxwords = -1);
int makeAbstract(Xapian::docid id, vector<Snippet>&,
int maxoccs = -1, int ctxwords = -1);
int getFirstMatchPage(Xapian::docid docid, std::string& term);
void setDbWideQTermsFreqs();
double qualityTerms(Xapian::docid docid,