suppress some sources of spurious ellipsises in abstracts

This commit is contained in:
Jean-Francois Dockes 2011-08-24 14:51:59 +02:00
parent f1f6d0cf07
commit a380873029
5 changed files with 20 additions and 35 deletions

View File

@ -69,7 +69,7 @@ class DocSeqFiltSpec {
by native capability (ex: docseqdb can sort and filter). The by native capability (ex: docseqdb can sort and filter). The
implementation might be nicer by using more sophisticated c++ with implementation might be nicer by using more sophisticated c++ with
multiple inheritance of sort and filter virtual interfaces, but multiple inheritance of sort and filter virtual interfaces, but
the current one will have to do for now... the current one will have to do for now.
*/ */
class DocSequence { class DocSequence {
public: public:
@ -97,17 +97,6 @@ class DocSequence {
abs.push_back(doc.meta[Rcl::Doc::keyabs]); abs.push_back(doc.meta[Rcl::Doc::keyabs]);
return true; return true;
} }
virtual string getAbstract(Rcl::Doc& doc) {
vector<string> v;
getAbstract(doc, v);
string abstract;
for (vector<string>::const_iterator it = v.begin();
it != v.end(); it++) {
abstract += *it;
abstract += "... ";
}
return abstract;
}
virtual bool getEnclosing(Rcl::Doc&, Rcl::Doc&) = 0; virtual bool getEnclosing(Rcl::Doc&, Rcl::Doc&) = 0;
/** Get estimated total count in results */ /** Get estimated total count in results */
@ -169,12 +158,6 @@ public:
return false; return false;
return m_seq->getAbstract(doc, abs); return m_seq->getAbstract(doc, abs);
} }
virtual string getAbstract(Rcl::Doc& doc)
{
if (m_seq.isNull())
return "";
return m_seq->getAbstract(doc);
}
virtual string getDescription() virtual string getDescription()
{ {
if (m_seq.isNull()) if (m_seq.isNull())

View File

@ -184,12 +184,14 @@ void ResListPager::displayDoc(RclConfig *config,
for (vector<string>::const_iterator it = vabs.begin(); for (vector<string>::const_iterator it = vabs.begin();
it != vabs.end(); it++) { it != vabs.end(); it++) {
// No need to call escapeHtml(), plaintorich handles it if (!it->empty()) {
list<string> lr; // No need to call escapeHtml(), plaintorich handles it
m_hiliter->set_inputhtml(false); list<string> lr;
m_hiliter->plaintorich(*it, lr, hdata); m_hiliter->set_inputhtml(false);
richabst += lr.front(); m_hiliter->plaintorich(*it, lr, hdata);
richabst += absSep(); richabst += lr.front();
richabst += absSep();
}
} }
} }

View File

@ -73,6 +73,9 @@ namespace Rcl {
#endif #endif
const string pathelt_prefix = "XP"; const string pathelt_prefix = "XP";
// This is used as a marker inside the abstract frag lists, but
// normally doesn't remain in final output (which is built with a
// custom sep. by our caller).
static const string ellipsis("..."); static const string ellipsis("...");
string version_string(){ string version_string(){
@ -418,7 +421,7 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
sparseDoc[ii] = emptys; sparseDoc[ii] = emptys;
} }
} }
// Add ... at the end. This may be replaced later by // Add ellipsis at the end. This may be replaced later by
// an overlapping extract. Take care not to replace an // an overlapping extract. Take care not to replace an
// empty string here, we really want an empty slot, // empty string here, we really want an empty slot,
// use find() // use find()
@ -442,14 +445,16 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
// This can happen if there are term occurences in the keywords // This can happen if there are term occurences in the keywords
// etc. but not elsewhere ? // etc. but not elsewhere ?
if (qtermposs.size() == 0) if (qtermposs.size() == 0) {
LOGDEB1(("makeAbstract: no occurrences\n"));
return vector<string>(); return vector<string>();
}
// Walk all document's terms position lists and populate slots // Walk all document's terms position lists and populate slots
// around the query terms. We arbitrarily truncate the list to // around the query terms. We arbitrarily truncate the list to
// avoid taking forever. If we do cutoff, the abstract may be // avoid taking forever. If we do cutoff, the abstract may be
// inconsistant (missing words, potentially altering meaning), // inconsistant (missing words, potentially altering meaning),
// which is bad... // which is bad.
{ {
Xapian::TermIterator term; Xapian::TermIterator term;
int cutoff = 500 * 1000; int cutoff = 500 * 1000;
@ -532,10 +537,6 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
} }
if (!chunk.empty()) if (!chunk.empty())
vabs.push_back(chunk); vabs.push_back(chunk);
// This happens for docs with no terms (only filename) indexed? I'll fix
// one day (yeah)
if (vabs.size() == 1 && !vabs[0].compare("... "))
vabs.clear();
LOGDEB2(("makeAbtract: done in %d mS\n", chron.millis())); LOGDEB2(("makeAbtract: done in %d mS\n", chron.millis()));
return vabs; return vabs;
@ -920,7 +921,7 @@ static const string nc("\n\r\x0c");
// Add document in internal form to the database: index the terms in // Add document in internal form to the database: index the terms in
// the title abstract and body and add special terms for file name, // the title abstract and body and add special terms for file name,
// date, mime type ... , create the document data record (more // date, mime type etc. , create the document data record (more
// metadata), and update database // metadata), and update database
bool Db::addOrUpdate(const string &udi, const string &parent_udi, bool Db::addOrUpdate(const string &udi, const string &parent_udi,
const Doc &idoc) const Doc &idoc)
@ -1072,7 +1073,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
if (!parent_udi.empty()) { if (!parent_udi.empty()) {
newdocument.add_term(make_parentterm(parent_udi)); newdocument.add_term(make_parentterm(parent_udi));
} }
// Dates etc... // Dates etc.
time_t mtime = atol(doc.dmtime.empty() ? doc.fmtime.c_str() : time_t mtime = atol(doc.dmtime.empty() ? doc.fmtime.c_str() :
doc.dmtime.c_str()); doc.dmtime.c_str());
struct tm *tm = localtime(&mtime); struct tm *tm = localtime(&mtime);

View File

@ -448,7 +448,6 @@ string truncate_to_word(const string &input, string::size_type maxlen)
} else { } else {
output.erase(space); output.erase(space);
} }
output += " ...";
} }
return output; return output;
} }

View File

@ -43,7 +43,7 @@ using std::string;
bool transcode(const string &in, string &out, const string &icode, bool transcode(const string &in, string &out, const string &icode,
const string &ocode, int *ecnt) const string &ocode, int *ecnt)
{ {
LOGDEB(("Transcode: %s -> %s\n", icode.c_str(), ocode.c_str())); LOGDEB2(("Transcode: %s -> %s\n", icode.c_str(), ocode.c_str()));
#ifdef ICONV_CACHE_OPEN #ifdef ICONV_CACHE_OPEN
static iconv_t ic = (iconv_t)-1; static iconv_t ic = (iconv_t)-1;
static string cachedicode; static string cachedicode;