improved detection of incomplete snippets lists
This commit is contained in:
parent
87120798c2
commit
3abfd00635
@ -136,19 +136,26 @@ void SnippetsW::init()
|
||||
"<html><head>"
|
||||
"<meta http-equiv=\"content-type\" "
|
||||
"content=\"text/html; charset=utf-8\"></head>"
|
||||
"<body style='overflow-x: scroll; white-space: nowrap'>"
|
||||
"<table>"
|
||||
"<body>"
|
||||
"<table style='overflow-x: scroll; white-space: nowrap'>"
|
||||
;
|
||||
|
||||
g_hiliter.set_inputhtml(false);
|
||||
bool nomatch = true;
|
||||
|
||||
for (vector<Rcl::Snippet>::const_iterator it = vpabs.begin();
|
||||
it != vpabs.end(); it++) {
|
||||
if (it->page == -1) {
|
||||
oss << "<tr><td colspan=\"2\">" <<
|
||||
it->snippet << "</td></tr>" << endl;
|
||||
continue;
|
||||
}
|
||||
list<string> lr;
|
||||
if (!g_hiliter.plaintorich(it->snippet, lr, hdata)) {
|
||||
LOGDEB1(("No match for [%s]\n", it->snippet.c_str()));
|
||||
continue;
|
||||
}
|
||||
nomatch = false;
|
||||
oss << "<tr><td>";
|
||||
if (it->page > 0) {
|
||||
oss << "<a href=\"P" << it->page << "T" << it->term << "\">"
|
||||
@ -156,6 +163,13 @@ void SnippetsW::init()
|
||||
}
|
||||
oss << "</td><td>" << lr.front().c_str() << "</td></tr>" << endl;
|
||||
}
|
||||
oss << "</table>" << endl;
|
||||
if (nomatch) {
|
||||
oss.str("<html><head></head><body>");
|
||||
oss << "<p>Sorry, no exact match was found within limits. "
|
||||
"Probably the document is very big "
|
||||
"and the snippets generator got lost in a maze...</p>" << endl;
|
||||
}
|
||||
oss << "</body></html>";
|
||||
#ifdef SNIPPETS_WEBKIT
|
||||
browser->setHtml(QString::fromUtf8(oss.str().c_str()));
|
||||
|
||||
@ -69,6 +69,7 @@ int DocSequenceDb::getResCnt()
|
||||
}
|
||||
return m_rescnt;
|
||||
}
|
||||
|
||||
static const string cstr_mre("[...]");
|
||||
|
||||
// This one only gets called to fill-up the snippets window
|
||||
@ -81,18 +82,22 @@ bool DocSequenceDb::getAbstract(Rcl::Doc &doc, vector<Rcl::Snippet>& vpabs)
|
||||
|
||||
// Have to put the limit somewhere.
|
||||
int maxoccs = 1000;
|
||||
Rcl::abstract_result ret = Rcl::ABSRES_ERROR;
|
||||
int ret = Rcl::ABSRES_ERROR;
|
||||
if (m_q->whatDb()) {
|
||||
ret = m_q->makeDocAbstract(doc, vpabs, maxoccs,
|
||||
m_q->whatDb()->getAbsCtxLen()+ 2);
|
||||
}
|
||||
if (vpabs.empty())
|
||||
vpabs.push_back(Rcl::Snippet(0, doc.meta[Rcl::Doc::keyabs]));
|
||||
LOGDEB(("DocSequenceDb::getAbstract: got ret %d vpabs len %u\n", ret,
|
||||
vpabs.size()));
|
||||
if (vpabs.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// If the list was probably truncated, indicate it.
|
||||
if (ret == Rcl::ABSRES_TRUNC) {
|
||||
if (ret | Rcl::ABSRES_TRUNC) {
|
||||
vpabs.push_back(Rcl::Snippet(-1, cstr_mre));
|
||||
} else if (ret == Rcl::ABSRES_TERMMISS) {
|
||||
}
|
||||
if (ret | Rcl::ABSRES_TERMMISS) {
|
||||
vpabs.insert(vpabs.begin(),
|
||||
Rcl::Snippet(-1, "(Words missing in snippets)"));
|
||||
}
|
||||
|
||||
@ -309,9 +309,9 @@ int Query::Native::getFirstMatchPage(Xapian::docid docid, string& term)
|
||||
//
|
||||
// DatabaseModified and other general exceptions are catched and
|
||||
// possibly retried by our caller
|
||||
abstract_result Query::Native::makeAbstract(Xapian::docid docid,
|
||||
vector<Snippet>& vabs,
|
||||
int imaxoccs, int ictxwords)
|
||||
int Query::Native::makeAbstract(Xapian::docid docid,
|
||||
vector<Snippet>& vabs,
|
||||
int imaxoccs, int ictxwords)
|
||||
{
|
||||
Chrono chron;
|
||||
LOGABS(("makeAbstract: docid %ld imaxoccs %d ictxwords %d\n",
|
||||
@ -381,7 +381,7 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
|
||||
LOGABS(("makeAbstract:%d: mxttloccs %d ctxwords %d\n",
|
||||
chron.ms(), maxtotaloccs, ctxwords));
|
||||
|
||||
abstract_result ret = ABSRES_OK;
|
||||
int ret = ABSRES_OK;
|
||||
|
||||
// Let's go populate
|
||||
for (map<double, vector<string> >::reverse_iterator mit = byQ.rbegin();
|
||||
@ -466,11 +466,14 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
|
||||
}
|
||||
|
||||
// Group done ?
|
||||
if (grpoccs >= maxgrpoccs)
|
||||
if (grpoccs >= maxgrpoccs) {
|
||||
ret |= ABSRES_TRUNC;
|
||||
LOGABS(("Db::makeAbstract: max group occs cutoff\n"));
|
||||
break;
|
||||
}
|
||||
// Global done ?
|
||||
if (totaloccs >= maxtotaloccs) {
|
||||
ret = ABSRES_TRUNC;
|
||||
ret |= ABSRES_TRUNC;
|
||||
LOGABS(("Db::makeAbstract: max occurrences cutoff\n"));
|
||||
break;
|
||||
}
|
||||
@ -480,7 +483,7 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
|
||||
}
|
||||
|
||||
if (totaloccs >= maxtotaloccs) {
|
||||
ret = ABSRES_TRUNC;
|
||||
ret |= ABSRES_TRUNC;
|
||||
LOGABS(("Db::makeAbstract: max1 occurrences cutoff\n"));
|
||||
break;
|
||||
}
|
||||
@ -511,7 +514,7 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
|
||||
if (has_prefix(*term))
|
||||
continue;
|
||||
if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) {
|
||||
ret = ABSRES_TERMMISS;
|
||||
ret |= ABSRES_TERMMISS;
|
||||
LOGDEB0(("makeAbstract: max term count cutoff %d\n",
|
||||
m_q->m_snipMaxPosWalk));
|
||||
break;
|
||||
@ -522,7 +525,7 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
|
||||
for (pos = xrdb.positionlist_begin(docid, *term);
|
||||
pos != xrdb.positionlist_end(docid, *term); pos++) {
|
||||
if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) {
|
||||
ret = ABSRES_TERMMISS;
|
||||
ret |= ABSRES_TERMMISS;
|
||||
LOGDEB0(("makeAbstract: max term count cutoff %d\n",
|
||||
m_q->m_snipMaxPosWalk));
|
||||
break;
|
||||
|
||||
@ -264,16 +264,16 @@ bool Query::getQueryTerms(vector<string>& terms)
|
||||
return true;
|
||||
}
|
||||
|
||||
abstract_result Query::makeDocAbstract(Doc &doc,
|
||||
vector<Snippet>& abstract,
|
||||
int maxoccs, int ctxwords)
|
||||
int Query::makeDocAbstract(Doc &doc,
|
||||
vector<Snippet>& abstract,
|
||||
int maxoccs, int ctxwords)
|
||||
{
|
||||
LOGDEB(("makeDocAbstract: maxoccs %d ctxwords %d\n", maxoccs, ctxwords));
|
||||
if (!m_db || !m_db->m_ndb || !m_db->m_ndb->m_isopen || !m_nq) {
|
||||
LOGERR(("Query::makeDocAbstract: no db or no nq\n"));
|
||||
return ABSRES_ERROR;
|
||||
}
|
||||
abstract_result ret = ABSRES_ERROR;
|
||||
int ret = ABSRES_ERROR;
|
||||
XAPTRY(ret = m_nq->makeAbstract(doc.xdocid, abstract, maxoccs, ctxwords),
|
||||
m_db->m_ndb->xrdb, m_reason);
|
||||
if (!m_reason.empty()) {
|
||||
|
||||
@ -33,7 +33,7 @@ enum abstract_result {
|
||||
ABSRES_ERROR = 0,
|
||||
ABSRES_OK = 1,
|
||||
ABSRES_TRUNC = 2,
|
||||
ABSRES_TERMMISS = 3
|
||||
ABSRES_TERMMISS = 4
|
||||
};
|
||||
|
||||
// Snippet entry for makeDocAbstract
|
||||
@ -110,8 +110,8 @@ class Query {
|
||||
// Returned as a snippets vector
|
||||
bool makeDocAbstract(Doc &doc, std::vector<std::string>& abstract);
|
||||
// Returned as a vector of pair<page,snippet> page is 0 if unknown
|
||||
abstract_result makeDocAbstract(Doc &doc, std::vector<Snippet>& abst,
|
||||
int maxoccs= -1, int ctxwords = -1);
|
||||
int makeDocAbstract(Doc &doc, std::vector<Snippet>& abst,
|
||||
int maxoccs= -1, int ctxwords = -1);
|
||||
/** Retrieve page number for first match for term */
|
||||
int getFirstMatchPage(Doc &doc, std::string& term);
|
||||
|
||||
|
||||
@ -59,8 +59,8 @@ public:
|
||||
}
|
||||
/** Return a list of terms which matched for a specific result document */
|
||||
bool getMatchTerms(unsigned long xdocid, std::vector<std::string>& terms);
|
||||
abstract_result makeAbstract(Xapian::docid id, vector<Snippet>&,
|
||||
int maxoccs = -1, int ctxwords = -1);
|
||||
int makeAbstract(Xapian::docid id, vector<Snippet>&,
|
||||
int maxoccs = -1, int ctxwords = -1);
|
||||
int getFirstMatchPage(Xapian::docid docid, std::string& term);
|
||||
void setDbWideQTermsFreqs();
|
||||
double qualityTerms(Xapian::docid docid,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user