suppress some sources of spurious ellipsises in abstracts
This commit is contained in:
parent
f1f6d0cf07
commit
a380873029
@ -69,7 +69,7 @@ class DocSeqFiltSpec {
|
||||
by native capability (ex: docseqdb can sort and filter). The
|
||||
implementation might be nicer by using more sophisticated c++ with
|
||||
multiple inheritance of sort and filter virtual interfaces, but
|
||||
the current one will have to do for now...
|
||||
the current one will have to do for now.
|
||||
*/
|
||||
class DocSequence {
|
||||
public:
|
||||
@ -97,17 +97,6 @@ class DocSequence {
|
||||
abs.push_back(doc.meta[Rcl::Doc::keyabs]);
|
||||
return true;
|
||||
}
|
||||
virtual string getAbstract(Rcl::Doc& doc) {
|
||||
vector<string> v;
|
||||
getAbstract(doc, v);
|
||||
string abstract;
|
||||
for (vector<string>::const_iterator it = v.begin();
|
||||
it != v.end(); it++) {
|
||||
abstract += *it;
|
||||
abstract += "... ";
|
||||
}
|
||||
return abstract;
|
||||
}
|
||||
virtual bool getEnclosing(Rcl::Doc&, Rcl::Doc&) = 0;
|
||||
|
||||
/** Get estimated total count in results */
|
||||
@ -169,12 +158,6 @@ public:
|
||||
return false;
|
||||
return m_seq->getAbstract(doc, abs);
|
||||
}
|
||||
virtual string getAbstract(Rcl::Doc& doc)
|
||||
{
|
||||
if (m_seq.isNull())
|
||||
return "";
|
||||
return m_seq->getAbstract(doc);
|
||||
}
|
||||
virtual string getDescription()
|
||||
{
|
||||
if (m_seq.isNull())
|
||||
|
||||
@ -184,12 +184,14 @@ void ResListPager::displayDoc(RclConfig *config,
|
||||
|
||||
for (vector<string>::const_iterator it = vabs.begin();
|
||||
it != vabs.end(); it++) {
|
||||
// No need to call escapeHtml(), plaintorich handles it
|
||||
list<string> lr;
|
||||
m_hiliter->set_inputhtml(false);
|
||||
m_hiliter->plaintorich(*it, lr, hdata);
|
||||
richabst += lr.front();
|
||||
richabst += absSep();
|
||||
if (!it->empty()) {
|
||||
// No need to call escapeHtml(), plaintorich handles it
|
||||
list<string> lr;
|
||||
m_hiliter->set_inputhtml(false);
|
||||
m_hiliter->plaintorich(*it, lr, hdata);
|
||||
richabst += lr.front();
|
||||
richabst += absSep();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -73,6 +73,9 @@ namespace Rcl {
|
||||
#endif
|
||||
|
||||
const string pathelt_prefix = "XP";
|
||||
// This is used as a marker inside the abstract frag lists, but
|
||||
// normally doesn't remain in final output (which is built with a
|
||||
// custom sep. by our caller).
|
||||
static const string ellipsis("...");
|
||||
|
||||
string version_string(){
|
||||
@ -418,7 +421,7 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
||||
sparseDoc[ii] = emptys;
|
||||
}
|
||||
}
|
||||
// Add ... at the end. This may be replaced later by
|
||||
// Add ellipsis at the end. This may be replaced later by
|
||||
// an overlapping extract. Take care not to replace an
|
||||
// empty string here, we really want an empty slot,
|
||||
// use find()
|
||||
@ -442,14 +445,16 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
||||
|
||||
// This can happen if there are term occurences in the keywords
|
||||
// etc. but not elsewhere ?
|
||||
if (qtermposs.size() == 0)
|
||||
if (qtermposs.size() == 0) {
|
||||
LOGDEB1(("makeAbstract: no occurrences\n"));
|
||||
return vector<string>();
|
||||
}
|
||||
|
||||
// Walk all document's terms position lists and populate slots
|
||||
// around the query terms. We arbitrarily truncate the list to
|
||||
// avoid taking forever. If we do cutoff, the abstract may be
|
||||
// inconsistant (missing words, potentially altering meaning),
|
||||
// which is bad...
|
||||
// which is bad.
|
||||
{
|
||||
Xapian::TermIterator term;
|
||||
int cutoff = 500 * 1000;
|
||||
@ -532,10 +537,6 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
||||
}
|
||||
if (!chunk.empty())
|
||||
vabs.push_back(chunk);
|
||||
// This happens for docs with no terms (only filename) indexed? I'll fix
|
||||
// one day (yeah)
|
||||
if (vabs.size() == 1 && !vabs[0].compare("... "))
|
||||
vabs.clear();
|
||||
|
||||
LOGDEB2(("makeAbtract: done in %d mS\n", chron.millis()));
|
||||
return vabs;
|
||||
@ -920,7 +921,7 @@ static const string nc("\n\r\x0c");
|
||||
|
||||
// Add document in internal form to the database: index the terms in
|
||||
// the title abstract and body and add special terms for file name,
|
||||
// date, mime type ... , create the document data record (more
|
||||
// date, mime type etc. , create the document data record (more
|
||||
// metadata), and update database
|
||||
bool Db::addOrUpdate(const string &udi, const string &parent_udi,
|
||||
const Doc &idoc)
|
||||
@ -1072,7 +1073,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
|
||||
if (!parent_udi.empty()) {
|
||||
newdocument.add_term(make_parentterm(parent_udi));
|
||||
}
|
||||
// Dates etc...
|
||||
// Dates etc.
|
||||
time_t mtime = atol(doc.dmtime.empty() ? doc.fmtime.c_str() :
|
||||
doc.dmtime.c_str());
|
||||
struct tm *tm = localtime(&mtime);
|
||||
|
||||
@ -448,7 +448,6 @@ string truncate_to_word(const string &input, string::size_type maxlen)
|
||||
} else {
|
||||
output.erase(space);
|
||||
}
|
||||
output += " ...";
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
@ -43,7 +43,7 @@ using std::string;
|
||||
bool transcode(const string &in, string &out, const string &icode,
|
||||
const string &ocode, int *ecnt)
|
||||
{
|
||||
LOGDEB(("Transcode: %s -> %s\n", icode.c_str(), ocode.c_str()));
|
||||
LOGDEB2(("Transcode: %s -> %s\n", icode.c_str(), ocode.c_str()));
|
||||
#ifdef ICONV_CACHE_OPEN
|
||||
static iconv_t ic = (iconv_t)-1;
|
||||
static string cachedicode;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user