use only match terms to build doc abstract, not all query terms (might save a little effort)

This commit is contained in:
dockes 2009-11-26 07:17:40 +00:00
parent 58ba06eb9d
commit 217b7018d6
3 changed files with 35 additions and 14 deletions

View File

@ -173,9 +173,8 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
}
// Remove prefixes (caps) from a list of terms.
static list<string> noPrefixList(const list<string>& in)
static void noPrefixList(const list<string>& in, list<string>& out)
{
list<string> out;
for (list<string>::const_iterator qit = in.begin();
qit != in.end(); qit++) {
if ('A' <= qit->at(0) && qit->at(0) <= 'Z') {
@ -189,7 +188,6 @@ static list<string> noPrefixList(const list<string>& in)
out.push_back(*qit);
}
}
return out;
}
//#define DEBUGABSTRACT 1
@ -198,6 +196,14 @@ static list<string> noPrefixList(const list<string>& in)
#else
#define LOGABS LOGDEB2
#endif
static void listList(const string& what, const list<string>&l)
{
string a;
for (list<string>::const_iterator it = l.begin(); it != l.end(); it++) {
a = a + *it + " ";
}
LOGDEB(("%s: %s\n", what.c_str(), a.c_str()));
}
// Build a document abstract by extracting text chunks around the query terms
// This uses the db termlists, not the original document.
@ -210,22 +216,32 @@ string Db::Native::makeAbstract(Xapian::docid docid, Query *query)
LOGDEB(("makeAbstract:%d: maxlen %d wWidth %d\n", chron.ms(),
m_rcldb->m_synthAbsLen, m_rcldb->m_synthAbsWordCtxLen));
list<string> iterms;
query->getQueryTerms(iterms);
list<string> terms;
list<string> terms = noPrefixList(iterms);
if (terms.empty()) {
return string();
{
list<string> iterms;
query->getMatchTerms(docid, iterms);
noPrefixList(iterms, terms);
if (terms.empty()) {
LOGDEB(("makeAbstract::Empty term list\n"));
return string();
}
}
// listList("Match terms: ", terms);
// Retrieve db-wide frequencies for the query terms
// Retrieve db-wide frequencies for the query terms (we do this once per
// query, using all the query terms, not only the document match terms)
if (query->m_nq->termfreqs.empty()) {
list<string> iqterms, qterms;
query->getQueryTerms(iqterms);
noPrefixList(iqterms, qterms);
// listList("Query terms: ", qterms);
double doccnt = xrdb.get_doccount();
if (doccnt == 0) doccnt = 1;
for (list<string>::const_iterator qit = terms.begin();
qit != terms.end(); qit++) {
for (list<string>::const_iterator qit = qterms.begin();
qit != qterms.end(); qit++) {
query->m_nq->termfreqs[*qit] = xrdb.get_termfreq(*qit) / doccnt;
LOGABS(("makeAbstract: [%s] db freq %.1e\n", qit->c_str(),
LOGDEB(("makeAbstract: [%s] db freq %.1e\n", qit->c_str(),
query->m_nq->termfreqs[*qit]));
}
LOGABS(("makeAbstract:%d: got termfreqs\n", chron.ms()));
@ -450,7 +466,7 @@ string Db::Native::makeAbstract(Xapian::docid docid, Query *query)
}
#endif
LOGDEB(("makeAbstract:%d: extracting\n", chron.millis()));
LOGABS(("makeAbstract:%d: extracting\n", chron.millis()));
// Finally build the abstract by walking the map (in order of position)
string abstract;

View File

@ -225,6 +225,10 @@ bool Query::getQueryTerms(list<string>& terms)
}
bool Query::getMatchTerms(const Doc& doc, list<string>& terms)
{
return getMatchTerms(doc.xdocid, terms);
}
bool Query::getMatchTerms(unsigned long xdocid, list<string>& terms)
{
if (ISNULL(m_nq) || !m_nq->xenquire) {
LOGERR(("Query::getMatchTerms: no query opened\n"));
@ -233,7 +237,7 @@ bool Query::getMatchTerms(const Doc& doc, list<string>& terms)
terms.clear();
Xapian::TermIterator it;
Xapian::docid id = Xapian::docid(doc.xdocid);
Xapian::docid id = Xapian::docid(xdocid);
XAPTRY(terms.insert(terms.begin(),
m_nq->xenquire->get_matching_terms_begin(id),

View File

@ -77,6 +77,7 @@ class Query {
/** Return a list of terms which matched for a specific result document */
bool getMatchTerms(const Doc& doc, list<string>& terms);
bool getMatchTerms(unsigned long xdocid, list<string>& terms);
/** Expand query to look for documents like the one passed in */
list<string> expand(const Doc &doc);