Keep only the main Snippet-producing makeabstract in rclquery, further formatting done in using modules

This commit is contained in:
Jean-Francois Dockes 2019-05-13 18:11:23 +02:00
parent ee5a260d54
commit 34d43d1188
4 changed files with 53 additions and 67 deletions

View File

@ -20,6 +20,8 @@
#include <time.h> #include <time.h>
#include <list> #include <list>
#include <sstream>
#include <vector>
#include "docseqdb.h" #include "docseqdb.h"
#include "rcldb.h" #include "rcldb.h"
@ -27,6 +29,7 @@
#include "wasatorcl.h" #include "wasatorcl.h"
using std::list; using std::list;
using std::vector;
DocSequenceDb::DocSequenceDb(std::shared_ptr<Rcl::Db> db, DocSequenceDb::DocSequenceDb(std::shared_ptr<Rcl::Db> db,
std::shared_ptr<Rcl::Query> q, const string &t, std::shared_ptr<Rcl::Query> q, const string &t,
@ -114,8 +117,19 @@ bool DocSequenceDb::getAbstract(Rcl::Doc &doc, vector<string>& vabs)
return false; return false;
if (m_q->whatDb() && if (m_q->whatDb() &&
m_queryBuildAbstract && (doc.syntabs || m_queryReplaceAbstract)) { m_queryBuildAbstract && (doc.syntabs || m_queryReplaceAbstract)) {
m_q->makeDocAbstract(doc, vabs); vector<Rcl::Snippet> vpabs;
} m_q->makeDocAbstract(doc, vpabs);
for (const auto& snippet : vpabs) {
string chunk;
if (snippet.page > 0) {
std::ostringstream ss;
ss << snippet.page;
chunk += string(" [p ") + ss.str() + "] ";
}
chunk += snippet.snippet;
vabs.push_back(chunk);
}
}
if (vabs.empty()) if (vabs.empty())
vabs.push_back(doc.meta[Rcl::Doc::keyabs]); vabs.push_back(doc.meta[Rcl::Doc::keyabs]);
return true; return true;

View File

@ -56,6 +56,19 @@ bool dump_contents(RclConfig *rclconfig, Rcl::Doc& idoc)
return true; return true;
} }
static const string cstr_ellipsis("...");
static void stringAbstract(Rcl::Query& query, Rcl::Doc doc, string& abstract)
{
abstract.clear();
vector<Rcl::Snippet> vpabs;
query.makeDocAbstract(doc, vpabs);
for (const auto& snippet : vpabs) {
abstract.append(snippet.snippet);
abstract.append(cstr_ellipsis);
}
}
void output_fields(vector<string> fields, Rcl::Doc& doc, void output_fields(vector<string> fields, Rcl::Doc& doc,
Rcl::Query& query, Rcl::Db& rcldb, bool printnames) Rcl::Query& query, Rcl::Db& rcldb, bool printnames)
{ {
@ -70,7 +83,7 @@ void output_fields(vector<string> fields, Rcl::Doc& doc,
string out; string out;
if (!it->compare("abstract")) { if (!it->compare("abstract")) {
string abstract; string abstract;
query.makeDocAbstract(doc, abstract); stringAbstract(query, doc, abstract);
base64_encode(abstract, out); base64_encode(abstract, out);
} else if (!it->compare("xdocid")) { } else if (!it->compare("xdocid")) {
char cdocid[30]; char cdocid[30];
@ -412,7 +425,8 @@ endopts:
} }
if (op_flags & OPT_A) { if (op_flags & OPT_A) {
string abstract; string abstract;
if (query.makeDocAbstract(doc, abstract)) { stringAbstract(query, doc, abstract);
if (abstract.size()) {
cout << "ABSTRACT" << endl; cout << "ABSTRACT" << endl;
cout << abstract << endl; cout << abstract << endl;
cout << "/ABSTRACT" << endl; cout << "/ABSTRACT" << endl;

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2008 J.F.Dockes /* Copyright (C) 2008-2019 J.F.Dockes
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or * the Free Software Foundation; either version 2 of the License, or
@ -41,10 +41,6 @@
using namespace std; using namespace std;
namespace Rcl { namespace Rcl {
// This is used as a marker inside the abstract frag lists, but
// normally doesn't remain in final output (which is built with a
// custom sep. by our caller).
static const string cstr_ellipsis("...");
// Field names inside the index data record may differ from the rcldoc ones // Field names inside the index data record may differ from the rcldoc ones
// (esp.: caption / title) // (esp.: caption / title)
@ -285,37 +281,6 @@ int Query::makeDocAbstract(const Doc &doc, vector<Snippet>& abstract,
return ret; return ret;
} }
bool Query::makeDocAbstract(const Doc &doc, vector<string>& abstract)
{
vector<Snippet> vpabs;
if (!makeDocAbstract(doc, vpabs))
return false;
for (vector<Snippet>::const_iterator it = vpabs.begin();
it != vpabs.end(); it++) {
string chunk;
if (it->page > 0) {
ostringstream ss;
ss << it->page;
chunk += string(" [p ") + ss.str() + "] ";
}
chunk += it->snippet;
abstract.push_back(chunk);
}
return true;
}
bool Query::makeDocAbstract(const Doc &doc, string& abstract)
{
vector<Snippet> vpabs;
if (!makeDocAbstract(doc, vpabs))
return false;
for (vector<Snippet>::const_iterator it = vpabs.begin();
it != vpabs.end(); it++) {
abstract.append(it->snippet);
abstract.append(cstr_ellipsis);
}
return m_reason.empty() ? true : false;
}
int Query::getFirstMatchPage(const Doc &doc, string& term) int Query::getFirstMatchPage(const Doc &doc, string& term)
{ {

View File

@ -16,15 +16,14 @@
*/ */
#ifndef _rclquery_h_included_ #ifndef _rclquery_h_included_
#define _rclquery_h_included_ #define _rclquery_h_included_
#include <string> #include <string>
#include <vector> #include <vector>
#include <memory> #include <memory>
#include "searchdata.h" #include "searchdata.h"
#ifndef NO_NAMESPACES
namespace Rcl { namespace Rcl {
#endif
class Db; class Db;
class Doc; class Doc;
@ -40,20 +39,17 @@ enum abstract_result {
class Snippet { class Snippet {
public: public:
Snippet(int page, const std::string& snip) Snippet(int page, const std::string& snip)
: page(page), snippet(snip) : page(page), snippet(snip) { }
{ Snippet& setTerm(const std::string& trm) {
} term = trm;
Snippet& setTerm(const std::string& trm) return *this;
{
term = trm;
return *this;
} }
int page; int page;
std::string term; std::string term;
std::string snippet; std::string snippet;
}; };
/** /**
* An Rcl::Query is a question (SearchData) applied to a * An Rcl::Query is a question (SearchData) applied to a
* database. Handles access to the results. Somewhat equivalent to a * database. Handles access to the results. Somewhat equivalent to a
@ -61,27 +57,27 @@ public:
* *
*/ */
class Query { class Query {
public: public:
Query(Db *db); Query(Db *db);
~Query(); ~Query();
/** Get explanation about last error */ /** Get explanation about last error */
std::string getReason() const { std::string getReason() const {
return m_reason; return m_reason;
} }
/** Choose sort order. Must be called before setQuery */ /** Choose sort order. Must be called before setQuery */
void setSortBy(const std::string& fld, bool ascending = true); void setSortBy(const std::string& fld, bool ascending = true);
const std::string& getSortBy() const { const std::string& getSortBy() const {
return m_sortField; return m_sortField;
} }
bool getSortAscending() const { bool getSortAscending() const {
return m_sortAscending; return m_sortAscending;
} }
/** Return or filter results with identical content checksum */ /** Return or filter results with identical content checksum */
void setCollapseDuplicates(bool on) { void setCollapseDuplicates(bool on) {
m_collapseDuplicates = on; m_collapseDuplicates = on;
} }
/** Accept data describing the search and query the index. This can /** Accept data describing the search and query the index. This can
@ -100,21 +96,20 @@ class Query {
bool getQueryTerms(std::vector<std::string>& terms); bool getQueryTerms(std::vector<std::string>& terms);
/** Build synthetic abstract for document, extracting chunks relevant for /** Build synthetic abstract for document, extracting chunks relevant for
* the input query. This uses index data only (no access to the file) */ * the input query.
// Abstract returned as one string * This uses index data only (no access to the file)
bool makeDocAbstract(const Doc &doc, std::string& abstract); * For each returned snippet, page is 0 if unknown, else > 0
// Returned as a snippets vector */
bool makeDocAbstract(const Doc &doc, std::vector<std::string>& abstract);
// Returned as a vector of pair<page,snippet> page is 0 if unknown
int makeDocAbstract(const Doc &doc, std::vector<Snippet>& abst, int makeDocAbstract(const Doc &doc, std::vector<Snippet>& abst,
int maxoccs= -1, int ctxwords = -1); int maxoccs= -1, int ctxwords = -1);
/** Retrieve page number for first match for "significant" query term /** Retrieve page number for first match for "significant" query term
* @param term returns the chosen term */ * @param term returns the chosen term */
int getFirstMatchPage(const Doc &doc, std::string& term); int getFirstMatchPage(const Doc &doc, std::string& term);
/** Retrieve a reference to the searchData we are using */ /** Retrieve a reference to the searchData we are using */
std::shared_ptr<SearchData> getSD() { std::shared_ptr<SearchData> getSD() {
return m_sd; return m_sd;
} }
/** Expand query to look for documents like the one passed in */ /** Expand query to look for documents like the one passed in */
@ -122,7 +117,7 @@ class Query {
/** Return the Db we're set for */ /** Return the Db we're set for */
Db *whatDb() const { Db *whatDb() const {
return m_db; return m_db;
} }
/* make this public for access from embedded Db::Native */ /* make this public for access from embedded Db::Native */
@ -145,9 +140,7 @@ private:
Query & operator=(const Query &) {return *this;}; Query & operator=(const Query &) {return *this;};
}; };
#ifndef NO_NAMESPACES
} }
#endif // NO_NAMESPACES
#endif /* _rclquery_h_included_ */ #endif /* _rclquery_h_included_ */