suppress some sources of spurious ellipsises in abstracts
This commit is contained in:
parent
f1f6d0cf07
commit
a380873029
@ -69,7 +69,7 @@ class DocSeqFiltSpec {
|
|||||||
by native capability (ex: docseqdb can sort and filter). The
|
by native capability (ex: docseqdb can sort and filter). The
|
||||||
implementation might be nicer by using more sophisticated c++ with
|
implementation might be nicer by using more sophisticated c++ with
|
||||||
multiple inheritance of sort and filter virtual interfaces, but
|
multiple inheritance of sort and filter virtual interfaces, but
|
||||||
the current one will have to do for now...
|
the current one will have to do for now.
|
||||||
*/
|
*/
|
||||||
class DocSequence {
|
class DocSequence {
|
||||||
public:
|
public:
|
||||||
@ -97,17 +97,6 @@ class DocSequence {
|
|||||||
abs.push_back(doc.meta[Rcl::Doc::keyabs]);
|
abs.push_back(doc.meta[Rcl::Doc::keyabs]);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
virtual string getAbstract(Rcl::Doc& doc) {
|
|
||||||
vector<string> v;
|
|
||||||
getAbstract(doc, v);
|
|
||||||
string abstract;
|
|
||||||
for (vector<string>::const_iterator it = v.begin();
|
|
||||||
it != v.end(); it++) {
|
|
||||||
abstract += *it;
|
|
||||||
abstract += "... ";
|
|
||||||
}
|
|
||||||
return abstract;
|
|
||||||
}
|
|
||||||
virtual bool getEnclosing(Rcl::Doc&, Rcl::Doc&) = 0;
|
virtual bool getEnclosing(Rcl::Doc&, Rcl::Doc&) = 0;
|
||||||
|
|
||||||
/** Get estimated total count in results */
|
/** Get estimated total count in results */
|
||||||
@ -169,12 +158,6 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
return m_seq->getAbstract(doc, abs);
|
return m_seq->getAbstract(doc, abs);
|
||||||
}
|
}
|
||||||
virtual string getAbstract(Rcl::Doc& doc)
|
|
||||||
{
|
|
||||||
if (m_seq.isNull())
|
|
||||||
return "";
|
|
||||||
return m_seq->getAbstract(doc);
|
|
||||||
}
|
|
||||||
virtual string getDescription()
|
virtual string getDescription()
|
||||||
{
|
{
|
||||||
if (m_seq.isNull())
|
if (m_seq.isNull())
|
||||||
|
|||||||
@ -184,12 +184,14 @@ void ResListPager::displayDoc(RclConfig *config,
|
|||||||
|
|
||||||
for (vector<string>::const_iterator it = vabs.begin();
|
for (vector<string>::const_iterator it = vabs.begin();
|
||||||
it != vabs.end(); it++) {
|
it != vabs.end(); it++) {
|
||||||
// No need to call escapeHtml(), plaintorich handles it
|
if (!it->empty()) {
|
||||||
list<string> lr;
|
// No need to call escapeHtml(), plaintorich handles it
|
||||||
m_hiliter->set_inputhtml(false);
|
list<string> lr;
|
||||||
m_hiliter->plaintorich(*it, lr, hdata);
|
m_hiliter->set_inputhtml(false);
|
||||||
richabst += lr.front();
|
m_hiliter->plaintorich(*it, lr, hdata);
|
||||||
richabst += absSep();
|
richabst += lr.front();
|
||||||
|
richabst += absSep();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -73,6 +73,9 @@ namespace Rcl {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
const string pathelt_prefix = "XP";
|
const string pathelt_prefix = "XP";
|
||||||
|
// This is used as a marker inside the abstract frag lists, but
|
||||||
|
// normally doesn't remain in final output (which is built with a
|
||||||
|
// custom sep. by our caller).
|
||||||
static const string ellipsis("...");
|
static const string ellipsis("...");
|
||||||
|
|
||||||
string version_string(){
|
string version_string(){
|
||||||
@ -418,7 +421,7 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
|||||||
sparseDoc[ii] = emptys;
|
sparseDoc[ii] = emptys;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Add ... at the end. This may be replaced later by
|
// Add ellipsis at the end. This may be replaced later by
|
||||||
// an overlapping extract. Take care not to replace an
|
// an overlapping extract. Take care not to replace an
|
||||||
// empty string here, we really want an empty slot,
|
// empty string here, we really want an empty slot,
|
||||||
// use find()
|
// use find()
|
||||||
@ -442,14 +445,16 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
|||||||
|
|
||||||
// This can happen if there are term occurences in the keywords
|
// This can happen if there are term occurences in the keywords
|
||||||
// etc. but not elsewhere ?
|
// etc. but not elsewhere ?
|
||||||
if (qtermposs.size() == 0)
|
if (qtermposs.size() == 0) {
|
||||||
|
LOGDEB1(("makeAbstract: no occurrences\n"));
|
||||||
return vector<string>();
|
return vector<string>();
|
||||||
|
}
|
||||||
|
|
||||||
// Walk all document's terms position lists and populate slots
|
// Walk all document's terms position lists and populate slots
|
||||||
// around the query terms. We arbitrarily truncate the list to
|
// around the query terms. We arbitrarily truncate the list to
|
||||||
// avoid taking forever. If we do cutoff, the abstract may be
|
// avoid taking forever. If we do cutoff, the abstract may be
|
||||||
// inconsistant (missing words, potentially altering meaning),
|
// inconsistant (missing words, potentially altering meaning),
|
||||||
// which is bad...
|
// which is bad.
|
||||||
{
|
{
|
||||||
Xapian::TermIterator term;
|
Xapian::TermIterator term;
|
||||||
int cutoff = 500 * 1000;
|
int cutoff = 500 * 1000;
|
||||||
@ -532,10 +537,6 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
|||||||
}
|
}
|
||||||
if (!chunk.empty())
|
if (!chunk.empty())
|
||||||
vabs.push_back(chunk);
|
vabs.push_back(chunk);
|
||||||
// This happens for docs with no terms (only filename) indexed? I'll fix
|
|
||||||
// one day (yeah)
|
|
||||||
if (vabs.size() == 1 && !vabs[0].compare("... "))
|
|
||||||
vabs.clear();
|
|
||||||
|
|
||||||
LOGDEB2(("makeAbtract: done in %d mS\n", chron.millis()));
|
LOGDEB2(("makeAbtract: done in %d mS\n", chron.millis()));
|
||||||
return vabs;
|
return vabs;
|
||||||
@ -920,7 +921,7 @@ static const string nc("\n\r\x0c");
|
|||||||
|
|
||||||
// Add document in internal form to the database: index the terms in
|
// Add document in internal form to the database: index the terms in
|
||||||
// the title abstract and body and add special terms for file name,
|
// the title abstract and body and add special terms for file name,
|
||||||
// date, mime type ... , create the document data record (more
|
// date, mime type etc. , create the document data record (more
|
||||||
// metadata), and update database
|
// metadata), and update database
|
||||||
bool Db::addOrUpdate(const string &udi, const string &parent_udi,
|
bool Db::addOrUpdate(const string &udi, const string &parent_udi,
|
||||||
const Doc &idoc)
|
const Doc &idoc)
|
||||||
@ -1072,7 +1073,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
|
|||||||
if (!parent_udi.empty()) {
|
if (!parent_udi.empty()) {
|
||||||
newdocument.add_term(make_parentterm(parent_udi));
|
newdocument.add_term(make_parentterm(parent_udi));
|
||||||
}
|
}
|
||||||
// Dates etc...
|
// Dates etc.
|
||||||
time_t mtime = atol(doc.dmtime.empty() ? doc.fmtime.c_str() :
|
time_t mtime = atol(doc.dmtime.empty() ? doc.fmtime.c_str() :
|
||||||
doc.dmtime.c_str());
|
doc.dmtime.c_str());
|
||||||
struct tm *tm = localtime(&mtime);
|
struct tm *tm = localtime(&mtime);
|
||||||
|
|||||||
@ -448,7 +448,6 @@ string truncate_to_word(const string &input, string::size_type maxlen)
|
|||||||
} else {
|
} else {
|
||||||
output.erase(space);
|
output.erase(space);
|
||||||
}
|
}
|
||||||
output += " ...";
|
|
||||||
}
|
}
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -43,7 +43,7 @@ using std::string;
|
|||||||
bool transcode(const string &in, string &out, const string &icode,
|
bool transcode(const string &in, string &out, const string &icode,
|
||||||
const string &ocode, int *ecnt)
|
const string &ocode, int *ecnt)
|
||||||
{
|
{
|
||||||
LOGDEB(("Transcode: %s -> %s\n", icode.c_str(), ocode.c_str()));
|
LOGDEB2(("Transcode: %s -> %s\n", icode.c_str(), ocode.c_str()));
|
||||||
#ifdef ICONV_CACHE_OPEN
|
#ifdef ICONV_CACHE_OPEN
|
||||||
static iconv_t ic = (iconv_t)-1;
|
static iconv_t ic = (iconv_t)-1;
|
||||||
static string cachedicode;
|
static string cachedicode;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user