use only match terms to build doc abstract, not all query terms (might save a little effort)
This commit is contained in:
parent
58ba06eb9d
commit
217b7018d6
@ -173,9 +173,8 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Remove prefixes (caps) from a list of terms.
|
// Remove prefixes (caps) from a list of terms.
|
||||||
static list<string> noPrefixList(const list<string>& in)
|
static void noPrefixList(const list<string>& in, list<string>& out)
|
||||||
{
|
{
|
||||||
list<string> out;
|
|
||||||
for (list<string>::const_iterator qit = in.begin();
|
for (list<string>::const_iterator qit = in.begin();
|
||||||
qit != in.end(); qit++) {
|
qit != in.end(); qit++) {
|
||||||
if ('A' <= qit->at(0) && qit->at(0) <= 'Z') {
|
if ('A' <= qit->at(0) && qit->at(0) <= 'Z') {
|
||||||
@ -189,7 +188,6 @@ static list<string> noPrefixList(const list<string>& in)
|
|||||||
out.push_back(*qit);
|
out.push_back(*qit);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return out;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//#define DEBUGABSTRACT 1
|
//#define DEBUGABSTRACT 1
|
||||||
@ -198,6 +196,14 @@ static list<string> noPrefixList(const list<string>& in)
|
|||||||
#else
|
#else
|
||||||
#define LOGABS LOGDEB2
|
#define LOGABS LOGDEB2
|
||||||
#endif
|
#endif
|
||||||
|
static void listList(const string& what, const list<string>&l)
|
||||||
|
{
|
||||||
|
string a;
|
||||||
|
for (list<string>::const_iterator it = l.begin(); it != l.end(); it++) {
|
||||||
|
a = a + *it + " ";
|
||||||
|
}
|
||||||
|
LOGDEB(("%s: %s\n", what.c_str(), a.c_str()));
|
||||||
|
}
|
||||||
|
|
||||||
// Build a document abstract by extracting text chunks around the query terms
|
// Build a document abstract by extracting text chunks around the query terms
|
||||||
// This uses the db termlists, not the original document.
|
// This uses the db termlists, not the original document.
|
||||||
@ -210,22 +216,32 @@ string Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
|||||||
LOGDEB(("makeAbstract:%d: maxlen %d wWidth %d\n", chron.ms(),
|
LOGDEB(("makeAbstract:%d: maxlen %d wWidth %d\n", chron.ms(),
|
||||||
m_rcldb->m_synthAbsLen, m_rcldb->m_synthAbsWordCtxLen));
|
m_rcldb->m_synthAbsLen, m_rcldb->m_synthAbsWordCtxLen));
|
||||||
|
|
||||||
list<string> iterms;
|
list<string> terms;
|
||||||
query->getQueryTerms(iterms);
|
|
||||||
|
|
||||||
list<string> terms = noPrefixList(iterms);
|
{
|
||||||
if (terms.empty()) {
|
list<string> iterms;
|
||||||
return string();
|
query->getMatchTerms(docid, iterms);
|
||||||
|
noPrefixList(iterms, terms);
|
||||||
|
if (terms.empty()) {
|
||||||
|
LOGDEB(("makeAbstract::Empty term list\n"));
|
||||||
|
return string();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
// listList("Match terms: ", terms);
|
||||||
|
|
||||||
// Retrieve db-wide frequencies for the query terms
|
// Retrieve db-wide frequencies for the query terms (we do this once per
|
||||||
|
// query, using all the query terms, not only the document match terms)
|
||||||
if (query->m_nq->termfreqs.empty()) {
|
if (query->m_nq->termfreqs.empty()) {
|
||||||
|
list<string> iqterms, qterms;
|
||||||
|
query->getQueryTerms(iqterms);
|
||||||
|
noPrefixList(iqterms, qterms);
|
||||||
|
// listList("Query terms: ", qterms);
|
||||||
double doccnt = xrdb.get_doccount();
|
double doccnt = xrdb.get_doccount();
|
||||||
if (doccnt == 0) doccnt = 1;
|
if (doccnt == 0) doccnt = 1;
|
||||||
for (list<string>::const_iterator qit = terms.begin();
|
for (list<string>::const_iterator qit = qterms.begin();
|
||||||
qit != terms.end(); qit++) {
|
qit != qterms.end(); qit++) {
|
||||||
query->m_nq->termfreqs[*qit] = xrdb.get_termfreq(*qit) / doccnt;
|
query->m_nq->termfreqs[*qit] = xrdb.get_termfreq(*qit) / doccnt;
|
||||||
LOGABS(("makeAbstract: [%s] db freq %.1e\n", qit->c_str(),
|
LOGDEB(("makeAbstract: [%s] db freq %.1e\n", qit->c_str(),
|
||||||
query->m_nq->termfreqs[*qit]));
|
query->m_nq->termfreqs[*qit]));
|
||||||
}
|
}
|
||||||
LOGABS(("makeAbstract:%d: got termfreqs\n", chron.ms()));
|
LOGABS(("makeAbstract:%d: got termfreqs\n", chron.ms()));
|
||||||
@ -450,7 +466,7 @@ string Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
LOGDEB(("makeAbstract:%d: extracting\n", chron.millis()));
|
LOGABS(("makeAbstract:%d: extracting\n", chron.millis()));
|
||||||
|
|
||||||
// Finally build the abstract by walking the map (in order of position)
|
// Finally build the abstract by walking the map (in order of position)
|
||||||
string abstract;
|
string abstract;
|
||||||
|
|||||||
@ -225,6 +225,10 @@ bool Query::getQueryTerms(list<string>& terms)
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool Query::getMatchTerms(const Doc& doc, list<string>& terms)
|
bool Query::getMatchTerms(const Doc& doc, list<string>& terms)
|
||||||
|
{
|
||||||
|
return getMatchTerms(doc.xdocid, terms);
|
||||||
|
}
|
||||||
|
bool Query::getMatchTerms(unsigned long xdocid, list<string>& terms)
|
||||||
{
|
{
|
||||||
if (ISNULL(m_nq) || !m_nq->xenquire) {
|
if (ISNULL(m_nq) || !m_nq->xenquire) {
|
||||||
LOGERR(("Query::getMatchTerms: no query opened\n"));
|
LOGERR(("Query::getMatchTerms: no query opened\n"));
|
||||||
@ -233,7 +237,7 @@ bool Query::getMatchTerms(const Doc& doc, list<string>& terms)
|
|||||||
|
|
||||||
terms.clear();
|
terms.clear();
|
||||||
Xapian::TermIterator it;
|
Xapian::TermIterator it;
|
||||||
Xapian::docid id = Xapian::docid(doc.xdocid);
|
Xapian::docid id = Xapian::docid(xdocid);
|
||||||
|
|
||||||
XAPTRY(terms.insert(terms.begin(),
|
XAPTRY(terms.insert(terms.begin(),
|
||||||
m_nq->xenquire->get_matching_terms_begin(id),
|
m_nq->xenquire->get_matching_terms_begin(id),
|
||||||
|
|||||||
@ -77,6 +77,7 @@ class Query {
|
|||||||
|
|
||||||
/** Return a list of terms which matched for a specific result document */
|
/** Return a list of terms which matched for a specific result document */
|
||||||
bool getMatchTerms(const Doc& doc, list<string>& terms);
|
bool getMatchTerms(const Doc& doc, list<string>& terms);
|
||||||
|
bool getMatchTerms(unsigned long xdocid, list<string>& terms);
|
||||||
|
|
||||||
/** Expand query to look for documents like the one passed in */
|
/** Expand query to look for documents like the one passed in */
|
||||||
list<string> expand(const Doc &doc);
|
list<string> expand(const Doc &doc);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user