suppress some sources of spurious ellipsises in abstracts

This commit is contained in:
Jean-Francois Dockes 2011-08-24 14:51:59 +02:00
parent f1f6d0cf07
commit a380873029
5 changed files with 20 additions and 35 deletions

View File

@ -69,7 +69,7 @@ class DocSeqFiltSpec {
by native capability (ex: docseqdb can sort and filter). The
implementation might be nicer by using more sophisticated c++ with
multiple inheritance of sort and filter virtual interfaces, but
the current one will have to do for now...
the current one will have to do for now.
*/
class DocSequence {
public:
@ -97,17 +97,6 @@ class DocSequence {
abs.push_back(doc.meta[Rcl::Doc::keyabs]);
return true;
}
virtual string getAbstract(Rcl::Doc& doc) {
vector<string> v;
getAbstract(doc, v);
string abstract;
for (vector<string>::const_iterator it = v.begin();
it != v.end(); it++) {
abstract += *it;
abstract += "... ";
}
return abstract;
}
virtual bool getEnclosing(Rcl::Doc&, Rcl::Doc&) = 0;
/** Get estimated total count in results */
@ -169,12 +158,6 @@ public:
return false;
return m_seq->getAbstract(doc, abs);
}
virtual string getAbstract(Rcl::Doc& doc)
{
if (m_seq.isNull())
return "";
return m_seq->getAbstract(doc);
}
virtual string getDescription()
{
if (m_seq.isNull())

View File

@ -184,12 +184,14 @@ void ResListPager::displayDoc(RclConfig *config,
for (vector<string>::const_iterator it = vabs.begin();
it != vabs.end(); it++) {
// No need to call escapeHtml(), plaintorich handles it
list<string> lr;
m_hiliter->set_inputhtml(false);
m_hiliter->plaintorich(*it, lr, hdata);
richabst += lr.front();
richabst += absSep();
if (!it->empty()) {
// No need to call escapeHtml(), plaintorich handles it
list<string> lr;
m_hiliter->set_inputhtml(false);
m_hiliter->plaintorich(*it, lr, hdata);
richabst += lr.front();
richabst += absSep();
}
}
}

View File

@ -73,6 +73,9 @@ namespace Rcl {
#endif
const string pathelt_prefix = "XP";
// This is used as a marker inside the abstract frag lists, but
// normally doesn't remain in final output (which is built with a
// custom sep. by our caller).
static const string ellipsis("...");
string version_string(){
@ -418,7 +421,7 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
sparseDoc[ii] = emptys;
}
}
// Add ... at the end. This may be replaced later by
// Add ellipsis at the end. This may be replaced later by
// an overlapping extract. Take care not to replace an
// empty string here, we really want an empty slot,
// use find()
@ -442,14 +445,16 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
// This can happen if there are term occurences in the keywords
// etc. but not elsewhere ?
if (qtermposs.size() == 0)
if (qtermposs.size() == 0) {
LOGDEB1(("makeAbstract: no occurrences\n"));
return vector<string>();
}
// Walk all document's terms position lists and populate slots
// around the query terms. We arbitrarily truncate the list to
// avoid taking forever. If we do cutoff, the abstract may be
// inconsistant (missing words, potentially altering meaning),
// which is bad...
// which is bad.
{
Xapian::TermIterator term;
int cutoff = 500 * 1000;
@ -532,10 +537,6 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
}
if (!chunk.empty())
vabs.push_back(chunk);
// This happens for docs with no terms (only filename) indexed? I'll fix
// one day (yeah)
if (vabs.size() == 1 && !vabs[0].compare("... "))
vabs.clear();
LOGDEB2(("makeAbtract: done in %d mS\n", chron.millis()));
return vabs;
@ -920,7 +921,7 @@ static const string nc("\n\r\x0c");
// Add document in internal form to the database: index the terms in
// the title abstract and body and add special terms for file name,
// date, mime type ... , create the document data record (more
// date, mime type etc. , create the document data record (more
// metadata), and update database
bool Db::addOrUpdate(const string &udi, const string &parent_udi,
const Doc &idoc)
@ -1072,7 +1073,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
if (!parent_udi.empty()) {
newdocument.add_term(make_parentterm(parent_udi));
}
// Dates etc...
// Dates etc.
time_t mtime = atol(doc.dmtime.empty() ? doc.fmtime.c_str() :
doc.dmtime.c_str());
struct tm *tm = localtime(&mtime);

View File

@ -448,7 +448,6 @@ string truncate_to_word(const string &input, string::size_type maxlen)
} else {
output.erase(space);
}
output += " ...";
}
return output;
}

View File

@ -43,7 +43,7 @@ using std::string;
bool transcode(const string &in, string &out, const string &icode,
const string &ocode, int *ecnt)
{
LOGDEB(("Transcode: %s -> %s\n", icode.c_str(), ocode.c_str()));
LOGDEB2(("Transcode: %s -> %s\n", icode.c_str(), ocode.c_str()));
#ifdef ICONV_CACHE_OPEN
static iconv_t ic = (iconv_t)-1;
static string cachedicode;