adjust makeAbstract for prefixed terms
This commit is contained in:
parent
9b2db79f97
commit
238cd56d6c
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.102 2007-01-17 13:53:41 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.103 2007-01-19 10:23:26 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -214,29 +214,49 @@ bool Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static list<string> noPrefixList(const list<string>& in)
|
||||||
|
{
|
||||||
|
list<string> out;
|
||||||
|
for (list<string>::const_iterator qit = in.begin();
|
||||||
|
qit != in.end(); qit++) {
|
||||||
|
if ('A' <= qit->at(0) && qit->at(0) <= 'Z') {
|
||||||
|
string term = *qit;
|
||||||
|
while (term.length() && 'A' <= term.at(0) && term.at(0) <= 'Z')
|
||||||
|
term.erase(0, 1);
|
||||||
|
if (term.length())
|
||||||
|
out.push_back(term);
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
out.push_back(*qit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
// Build a document abstract by extracting text chunks around the query terms
|
// Build a document abstract by extracting text chunks around the query terms
|
||||||
// This uses the db termlists, not the original document.
|
// This uses the db termlists, not the original document.
|
||||||
string Native::makeAbstract(Xapian::docid docid, const list<string>& terms)
|
string Native::makeAbstract(Xapian::docid docid, const list<string>& iterms)
|
||||||
{
|
{
|
||||||
Chrono chron;
|
Chrono chron;
|
||||||
LOGDEB(("makeAbstract:%d: maxlen %d wWidth %d\n", chron.ms(),
|
LOGDEB2(("makeAbstract:%d: maxlen %d wWidth %d\n", chron.ms(),
|
||||||
m_db->m_synthAbsLen, m_db->m_synthAbsWordCtxLen));
|
m_db->m_synthAbsLen, m_db->m_synthAbsWordCtxLen));
|
||||||
|
|
||||||
|
list<string> terms = noPrefixList(iterms);
|
||||||
if (terms.empty()) {
|
if (terms.empty()) {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
// We may want to use the db-wide freqs to tune the abstracts one
|
// We may want to use the db-wide freqs to tune the abstracts one
|
||||||
// day but we currently don't
|
// day but we currently don't
|
||||||
#if 0
|
#if 0
|
||||||
if (m_termfreqs.empty()) {
|
if (m_termfreqs.empty()) {
|
||||||
for (list<string>::const_iterator qit = terms.begin();
|
for (list<string>::const_iterator qit = terms.begin();
|
||||||
qit != terms.end(); qit++) {
|
qit != terms.end(); qit++) {
|
||||||
m_termfreqs[*qit] = db.get_termfreq(*qit);
|
m_termfreqs[*qit] = db.get_termfreq(*qit);
|
||||||
LOGDEB2(("makeAbstract: [%s] db freq %d\n", qit->c_str(),
|
LOGDEB(("makeAbstract: [%s] db freq %d\n", qit->c_str(),
|
||||||
m_termfreqs[*qit]));
|
m_termfreqs[*qit]));
|
||||||
}
|
}
|
||||||
LOGDEB2(("makeAbstract:%d: got termfreqs\n", chron.ms()));
|
LOGDEB(("makeAbstract:%d: got termfreqs\n", chron.ms()));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -360,6 +380,9 @@ string Native::makeAbstract(Xapian::docid docid, const list<string>& terms)
|
|||||||
|
|
||||||
for (term = db.termlist_begin(docid);
|
for (term = db.termlist_begin(docid);
|
||||||
term != db.termlist_end(docid); term++) {
|
term != db.termlist_end(docid); term++) {
|
||||||
|
// Ignore prefixed terms
|
||||||
|
if ('A' <= (*term).at(0) && (*term).at(0) <= 'Z')
|
||||||
|
continue;
|
||||||
if (cutoff-- < 0) {
|
if (cutoff-- < 0) {
|
||||||
LOGDEB(("makeAbstract: max term count cutoff\n"));
|
LOGDEB(("makeAbstract: max term count cutoff\n"));
|
||||||
break;
|
break;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user