improved detection of incomplete snippets lists
This commit is contained in:
parent
87120798c2
commit
3abfd00635
@ -136,19 +136,26 @@ void SnippetsW::init()
|
|||||||
"<html><head>"
|
"<html><head>"
|
||||||
"<meta http-equiv=\"content-type\" "
|
"<meta http-equiv=\"content-type\" "
|
||||||
"content=\"text/html; charset=utf-8\"></head>"
|
"content=\"text/html; charset=utf-8\"></head>"
|
||||||
"<body style='overflow-x: scroll; white-space: nowrap'>"
|
"<body>"
|
||||||
"<table>"
|
"<table style='overflow-x: scroll; white-space: nowrap'>"
|
||||||
;
|
;
|
||||||
|
|
||||||
g_hiliter.set_inputhtml(false);
|
g_hiliter.set_inputhtml(false);
|
||||||
|
bool nomatch = true;
|
||||||
|
|
||||||
for (vector<Rcl::Snippet>::const_iterator it = vpabs.begin();
|
for (vector<Rcl::Snippet>::const_iterator it = vpabs.begin();
|
||||||
it != vpabs.end(); it++) {
|
it != vpabs.end(); it++) {
|
||||||
|
if (it->page == -1) {
|
||||||
|
oss << "<tr><td colspan=\"2\">" <<
|
||||||
|
it->snippet << "</td></tr>" << endl;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
list<string> lr;
|
list<string> lr;
|
||||||
if (!g_hiliter.plaintorich(it->snippet, lr, hdata)) {
|
if (!g_hiliter.plaintorich(it->snippet, lr, hdata)) {
|
||||||
LOGDEB1(("No match for [%s]\n", it->snippet.c_str()));
|
LOGDEB1(("No match for [%s]\n", it->snippet.c_str()));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
nomatch = false;
|
||||||
oss << "<tr><td>";
|
oss << "<tr><td>";
|
||||||
if (it->page > 0) {
|
if (it->page > 0) {
|
||||||
oss << "<a href=\"P" << it->page << "T" << it->term << "\">"
|
oss << "<a href=\"P" << it->page << "T" << it->term << "\">"
|
||||||
@ -156,6 +163,13 @@ void SnippetsW::init()
|
|||||||
}
|
}
|
||||||
oss << "</td><td>" << lr.front().c_str() << "</td></tr>" << endl;
|
oss << "</td><td>" << lr.front().c_str() << "</td></tr>" << endl;
|
||||||
}
|
}
|
||||||
|
oss << "</table>" << endl;
|
||||||
|
if (nomatch) {
|
||||||
|
oss.str("<html><head></head><body>");
|
||||||
|
oss << "<p>Sorry, no exact match was found within limits. "
|
||||||
|
"Probably the document is very big "
|
||||||
|
"and the snippets generator got lost in a maze...</p>" << endl;
|
||||||
|
}
|
||||||
oss << "</body></html>";
|
oss << "</body></html>";
|
||||||
#ifdef SNIPPETS_WEBKIT
|
#ifdef SNIPPETS_WEBKIT
|
||||||
browser->setHtml(QString::fromUtf8(oss.str().c_str()));
|
browser->setHtml(QString::fromUtf8(oss.str().c_str()));
|
||||||
|
|||||||
@ -69,6 +69,7 @@ int DocSequenceDb::getResCnt()
|
|||||||
}
|
}
|
||||||
return m_rescnt;
|
return m_rescnt;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const string cstr_mre("[...]");
|
static const string cstr_mre("[...]");
|
||||||
|
|
||||||
// This one only gets called to fill-up the snippets window
|
// This one only gets called to fill-up the snippets window
|
||||||
@ -81,18 +82,22 @@ bool DocSequenceDb::getAbstract(Rcl::Doc &doc, vector<Rcl::Snippet>& vpabs)
|
|||||||
|
|
||||||
// Have to put the limit somewhere.
|
// Have to put the limit somewhere.
|
||||||
int maxoccs = 1000;
|
int maxoccs = 1000;
|
||||||
Rcl::abstract_result ret = Rcl::ABSRES_ERROR;
|
int ret = Rcl::ABSRES_ERROR;
|
||||||
if (m_q->whatDb()) {
|
if (m_q->whatDb()) {
|
||||||
ret = m_q->makeDocAbstract(doc, vpabs, maxoccs,
|
ret = m_q->makeDocAbstract(doc, vpabs, maxoccs,
|
||||||
m_q->whatDb()->getAbsCtxLen()+ 2);
|
m_q->whatDb()->getAbsCtxLen()+ 2);
|
||||||
}
|
}
|
||||||
if (vpabs.empty())
|
LOGDEB(("DocSequenceDb::getAbstract: got ret %d vpabs len %u\n", ret,
|
||||||
vpabs.push_back(Rcl::Snippet(0, doc.meta[Rcl::Doc::keyabs]));
|
vpabs.size()));
|
||||||
|
if (vpabs.empty()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// If the list was probably truncated, indicate it.
|
// If the list was probably truncated, indicate it.
|
||||||
if (ret == Rcl::ABSRES_TRUNC) {
|
if (ret | Rcl::ABSRES_TRUNC) {
|
||||||
vpabs.push_back(Rcl::Snippet(-1, cstr_mre));
|
vpabs.push_back(Rcl::Snippet(-1, cstr_mre));
|
||||||
} else if (ret == Rcl::ABSRES_TERMMISS) {
|
}
|
||||||
|
if (ret | Rcl::ABSRES_TERMMISS) {
|
||||||
vpabs.insert(vpabs.begin(),
|
vpabs.insert(vpabs.begin(),
|
||||||
Rcl::Snippet(-1, "(Words missing in snippets)"));
|
Rcl::Snippet(-1, "(Words missing in snippets)"));
|
||||||
}
|
}
|
||||||
|
|||||||
@ -309,9 +309,9 @@ int Query::Native::getFirstMatchPage(Xapian::docid docid, string& term)
|
|||||||
//
|
//
|
||||||
// DatabaseModified and other general exceptions are catched and
|
// DatabaseModified and other general exceptions are catched and
|
||||||
// possibly retried by our caller
|
// possibly retried by our caller
|
||||||
abstract_result Query::Native::makeAbstract(Xapian::docid docid,
|
int Query::Native::makeAbstract(Xapian::docid docid,
|
||||||
vector<Snippet>& vabs,
|
vector<Snippet>& vabs,
|
||||||
int imaxoccs, int ictxwords)
|
int imaxoccs, int ictxwords)
|
||||||
{
|
{
|
||||||
Chrono chron;
|
Chrono chron;
|
||||||
LOGABS(("makeAbstract: docid %ld imaxoccs %d ictxwords %d\n",
|
LOGABS(("makeAbstract: docid %ld imaxoccs %d ictxwords %d\n",
|
||||||
@ -381,7 +381,7 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
|
|||||||
LOGABS(("makeAbstract:%d: mxttloccs %d ctxwords %d\n",
|
LOGABS(("makeAbstract:%d: mxttloccs %d ctxwords %d\n",
|
||||||
chron.ms(), maxtotaloccs, ctxwords));
|
chron.ms(), maxtotaloccs, ctxwords));
|
||||||
|
|
||||||
abstract_result ret = ABSRES_OK;
|
int ret = ABSRES_OK;
|
||||||
|
|
||||||
// Let's go populate
|
// Let's go populate
|
||||||
for (map<double, vector<string> >::reverse_iterator mit = byQ.rbegin();
|
for (map<double, vector<string> >::reverse_iterator mit = byQ.rbegin();
|
||||||
@ -466,11 +466,14 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Group done ?
|
// Group done ?
|
||||||
if (grpoccs >= maxgrpoccs)
|
if (grpoccs >= maxgrpoccs) {
|
||||||
|
ret |= ABSRES_TRUNC;
|
||||||
|
LOGABS(("Db::makeAbstract: max group occs cutoff\n"));
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
// Global done ?
|
// Global done ?
|
||||||
if (totaloccs >= maxtotaloccs) {
|
if (totaloccs >= maxtotaloccs) {
|
||||||
ret = ABSRES_TRUNC;
|
ret |= ABSRES_TRUNC;
|
||||||
LOGABS(("Db::makeAbstract: max occurrences cutoff\n"));
|
LOGABS(("Db::makeAbstract: max occurrences cutoff\n"));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -480,7 +483,7 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (totaloccs >= maxtotaloccs) {
|
if (totaloccs >= maxtotaloccs) {
|
||||||
ret = ABSRES_TRUNC;
|
ret |= ABSRES_TRUNC;
|
||||||
LOGABS(("Db::makeAbstract: max1 occurrences cutoff\n"));
|
LOGABS(("Db::makeAbstract: max1 occurrences cutoff\n"));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -511,7 +514,7 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
|
|||||||
if (has_prefix(*term))
|
if (has_prefix(*term))
|
||||||
continue;
|
continue;
|
||||||
if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) {
|
if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) {
|
||||||
ret = ABSRES_TERMMISS;
|
ret |= ABSRES_TERMMISS;
|
||||||
LOGDEB0(("makeAbstract: max term count cutoff %d\n",
|
LOGDEB0(("makeAbstract: max term count cutoff %d\n",
|
||||||
m_q->m_snipMaxPosWalk));
|
m_q->m_snipMaxPosWalk));
|
||||||
break;
|
break;
|
||||||
@ -522,7 +525,7 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
|
|||||||
for (pos = xrdb.positionlist_begin(docid, *term);
|
for (pos = xrdb.positionlist_begin(docid, *term);
|
||||||
pos != xrdb.positionlist_end(docid, *term); pos++) {
|
pos != xrdb.positionlist_end(docid, *term); pos++) {
|
||||||
if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) {
|
if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) {
|
||||||
ret = ABSRES_TERMMISS;
|
ret |= ABSRES_TERMMISS;
|
||||||
LOGDEB0(("makeAbstract: max term count cutoff %d\n",
|
LOGDEB0(("makeAbstract: max term count cutoff %d\n",
|
||||||
m_q->m_snipMaxPosWalk));
|
m_q->m_snipMaxPosWalk));
|
||||||
break;
|
break;
|
||||||
|
|||||||
@ -264,16 +264,16 @@ bool Query::getQueryTerms(vector<string>& terms)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
abstract_result Query::makeDocAbstract(Doc &doc,
|
int Query::makeDocAbstract(Doc &doc,
|
||||||
vector<Snippet>& abstract,
|
vector<Snippet>& abstract,
|
||||||
int maxoccs, int ctxwords)
|
int maxoccs, int ctxwords)
|
||||||
{
|
{
|
||||||
LOGDEB(("makeDocAbstract: maxoccs %d ctxwords %d\n", maxoccs, ctxwords));
|
LOGDEB(("makeDocAbstract: maxoccs %d ctxwords %d\n", maxoccs, ctxwords));
|
||||||
if (!m_db || !m_db->m_ndb || !m_db->m_ndb->m_isopen || !m_nq) {
|
if (!m_db || !m_db->m_ndb || !m_db->m_ndb->m_isopen || !m_nq) {
|
||||||
LOGERR(("Query::makeDocAbstract: no db or no nq\n"));
|
LOGERR(("Query::makeDocAbstract: no db or no nq\n"));
|
||||||
return ABSRES_ERROR;
|
return ABSRES_ERROR;
|
||||||
}
|
}
|
||||||
abstract_result ret = ABSRES_ERROR;
|
int ret = ABSRES_ERROR;
|
||||||
XAPTRY(ret = m_nq->makeAbstract(doc.xdocid, abstract, maxoccs, ctxwords),
|
XAPTRY(ret = m_nq->makeAbstract(doc.xdocid, abstract, maxoccs, ctxwords),
|
||||||
m_db->m_ndb->xrdb, m_reason);
|
m_db->m_ndb->xrdb, m_reason);
|
||||||
if (!m_reason.empty()) {
|
if (!m_reason.empty()) {
|
||||||
|
|||||||
@ -33,7 +33,7 @@ enum abstract_result {
|
|||||||
ABSRES_ERROR = 0,
|
ABSRES_ERROR = 0,
|
||||||
ABSRES_OK = 1,
|
ABSRES_OK = 1,
|
||||||
ABSRES_TRUNC = 2,
|
ABSRES_TRUNC = 2,
|
||||||
ABSRES_TERMMISS = 3
|
ABSRES_TERMMISS = 4
|
||||||
};
|
};
|
||||||
|
|
||||||
// Snippet entry for makeDocAbstract
|
// Snippet entry for makeDocAbstract
|
||||||
@ -110,8 +110,8 @@ class Query {
|
|||||||
// Returned as a snippets vector
|
// Returned as a snippets vector
|
||||||
bool makeDocAbstract(Doc &doc, std::vector<std::string>& abstract);
|
bool makeDocAbstract(Doc &doc, std::vector<std::string>& abstract);
|
||||||
// Returned as a vector of pair<page,snippet> page is 0 if unknown
|
// Returned as a vector of pair<page,snippet> page is 0 if unknown
|
||||||
abstract_result makeDocAbstract(Doc &doc, std::vector<Snippet>& abst,
|
int makeDocAbstract(Doc &doc, std::vector<Snippet>& abst,
|
||||||
int maxoccs= -1, int ctxwords = -1);
|
int maxoccs= -1, int ctxwords = -1);
|
||||||
/** Retrieve page number for first match for term */
|
/** Retrieve page number for first match for term */
|
||||||
int getFirstMatchPage(Doc &doc, std::string& term);
|
int getFirstMatchPage(Doc &doc, std::string& term);
|
||||||
|
|
||||||
|
|||||||
@ -59,8 +59,8 @@ public:
|
|||||||
}
|
}
|
||||||
/** Return a list of terms which matched for a specific result document */
|
/** Return a list of terms which matched for a specific result document */
|
||||||
bool getMatchTerms(unsigned long xdocid, std::vector<std::string>& terms);
|
bool getMatchTerms(unsigned long xdocid, std::vector<std::string>& terms);
|
||||||
abstract_result makeAbstract(Xapian::docid id, vector<Snippet>&,
|
int makeAbstract(Xapian::docid id, vector<Snippet>&,
|
||||||
int maxoccs = -1, int ctxwords = -1);
|
int maxoccs = -1, int ctxwords = -1);
|
||||||
int getFirstMatchPage(Xapian::docid docid, std::string& term);
|
int getFirstMatchPage(Xapian::docid docid, std::string& term);
|
||||||
void setDbWideQTermsFreqs();
|
void setDbWideQTermsFreqs();
|
||||||
double qualityTerms(Xapian::docid docid,
|
double qualityTerms(Xapian::docid docid,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user