Keep only the main Snippet-producing makeabstract in rclquery, further formatting done in using modules

This commit is contained in:
Jean-Francois Dockes 2019-05-13 18:11:23 +02:00
parent ee5a260d54
commit 34d43d1188
4 changed files with 53 additions and 67 deletions

View File

@ -20,6 +20,8 @@
#include <time.h>
#include <list>
#include <sstream>
#include <vector>
#include "docseqdb.h"
#include "rcldb.h"
@ -27,6 +29,7 @@
#include "wasatorcl.h"
using std::list;
using std::vector;
DocSequenceDb::DocSequenceDb(std::shared_ptr<Rcl::Db> db,
std::shared_ptr<Rcl::Query> q, const string &t,
@ -114,8 +117,19 @@ bool DocSequenceDb::getAbstract(Rcl::Doc &doc, vector<string>& vabs)
return false;
if (m_q->whatDb() &&
m_queryBuildAbstract && (doc.syntabs || m_queryReplaceAbstract)) {
m_q->makeDocAbstract(doc, vabs);
}
vector<Rcl::Snippet> vpabs;
m_q->makeDocAbstract(doc, vpabs);
for (const auto& snippet : vpabs) {
string chunk;
if (snippet.page > 0) {
std::ostringstream ss;
ss << snippet.page;
chunk += string(" [p ") + ss.str() + "] ";
}
chunk += snippet.snippet;
vabs.push_back(chunk);
}
}
if (vabs.empty())
vabs.push_back(doc.meta[Rcl::Doc::keyabs]);
return true;

View File

@ -56,6 +56,19 @@ bool dump_contents(RclConfig *rclconfig, Rcl::Doc& idoc)
return true;
}
static const string cstr_ellipsis("...");
static void stringAbstract(Rcl::Query& query, Rcl::Doc doc, string& abstract)
{
abstract.clear();
vector<Rcl::Snippet> vpabs;
query.makeDocAbstract(doc, vpabs);
for (const auto& snippet : vpabs) {
abstract.append(snippet.snippet);
abstract.append(cstr_ellipsis);
}
}
void output_fields(vector<string> fields, Rcl::Doc& doc,
Rcl::Query& query, Rcl::Db& rcldb, bool printnames)
{
@ -70,7 +83,7 @@ void output_fields(vector<string> fields, Rcl::Doc& doc,
string out;
if (!it->compare("abstract")) {
string abstract;
query.makeDocAbstract(doc, abstract);
stringAbstract(query, doc, abstract);
base64_encode(abstract, out);
} else if (!it->compare("xdocid")) {
char cdocid[30];
@ -412,7 +425,8 @@ endopts:
}
if (op_flags & OPT_A) {
string abstract;
if (query.makeDocAbstract(doc, abstract)) {
stringAbstract(query, doc, abstract);
if (abstract.size()) {
cout << "ABSTRACT" << endl;
cout << abstract << endl;
cout << "/ABSTRACT" << endl;

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2008 J.F.Dockes
/* Copyright (C) 2008-2019 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@ -41,10 +41,6 @@
using namespace std;
namespace Rcl {
// This is used as a marker inside the abstract frag lists, but
// normally doesn't remain in final output (which is built with a
// custom sep. by our caller).
static const string cstr_ellipsis("...");
// Field names inside the index data record may differ from the rcldoc ones
// (esp.: caption / title)
@ -285,37 +281,6 @@ int Query::makeDocAbstract(const Doc &doc, vector<Snippet>& abstract,
return ret;
}
bool Query::makeDocAbstract(const Doc &doc, vector<string>& abstract)
{
vector<Snippet> vpabs;
if (!makeDocAbstract(doc, vpabs))
return false;
for (vector<Snippet>::const_iterator it = vpabs.begin();
it != vpabs.end(); it++) {
string chunk;
if (it->page > 0) {
ostringstream ss;
ss << it->page;
chunk += string(" [p ") + ss.str() + "] ";
}
chunk += it->snippet;
abstract.push_back(chunk);
}
return true;
}
bool Query::makeDocAbstract(const Doc &doc, string& abstract)
{
vector<Snippet> vpabs;
if (!makeDocAbstract(doc, vpabs))
return false;
for (vector<Snippet>::const_iterator it = vpabs.begin();
it != vpabs.end(); it++) {
abstract.append(it->snippet);
abstract.append(cstr_ellipsis);
}
return m_reason.empty() ? true : false;
}
int Query::getFirstMatchPage(const Doc &doc, string& term)
{

View File

@ -16,15 +16,14 @@
*/
#ifndef _rclquery_h_included_
#define _rclquery_h_included_
#include <string>
#include <vector>
#include <memory>
#include "searchdata.h"
#ifndef NO_NAMESPACES
namespace Rcl {
#endif
class Db;
class Doc;
@ -40,20 +39,17 @@ enum abstract_result {
class Snippet {
public:
Snippet(int page, const std::string& snip)
: page(page), snippet(snip)
{
}
Snippet& setTerm(const std::string& trm)
{
term = trm;
return *this;
: page(page), snippet(snip) { }
Snippet& setTerm(const std::string& trm) {
term = trm;
return *this;
}
int page;
std::string term;
std::string snippet;
};
/**
* An Rcl::Query is a question (SearchData) applied to a
* database. Handles access to the results. Somewhat equivalent to a
@ -61,27 +57,27 @@ public:
*
*/
class Query {
public:
public:
Query(Db *db);
~Query();
/** Get explanation about last error */
std::string getReason() const {
return m_reason;
return m_reason;
}
/** Choose sort order. Must be called before setQuery */
void setSortBy(const std::string& fld, bool ascending = true);
const std::string& getSortBy() const {
return m_sortField;
return m_sortField;
}
bool getSortAscending() const {
return m_sortAscending;
return m_sortAscending;
}
/** Return or filter results with identical content checksum */
void setCollapseDuplicates(bool on) {
m_collapseDuplicates = on;
m_collapseDuplicates = on;
}
/** Accept data describing the search and query the index. This can
@ -100,21 +96,20 @@ class Query {
bool getQueryTerms(std::vector<std::string>& terms);
/** Build synthetic abstract for document, extracting chunks relevant for
* the input query. This uses index data only (no access to the file) */
// Abstract returned as one string
bool makeDocAbstract(const Doc &doc, std::string& abstract);
// Returned as a snippets vector
bool makeDocAbstract(const Doc &doc, std::vector<std::string>& abstract);
// Returned as a vector of pair<page,snippet> page is 0 if unknown
* the input query.
* This uses index data only (no access to the file)
* For each returned snippet, page is 0 if unknown, else > 0
*/
int makeDocAbstract(const Doc &doc, std::vector<Snippet>& abst,
int maxoccs= -1, int ctxwords = -1);
int maxoccs= -1, int ctxwords = -1);
/** Retrieve page number for first match for "significant" query term
* @param term returns the chosen term */
int getFirstMatchPage(const Doc &doc, std::string& term);
/** Retrieve a reference to the searchData we are using */
std::shared_ptr<SearchData> getSD() {
return m_sd;
return m_sd;
}
/** Expand query to look for documents like the one passed in */
@ -122,7 +117,7 @@ class Query {
/** Return the Db we're set for */
Db *whatDb() const {
return m_db;
return m_db;
}
/* make this public for access from embedded Db::Native */
@ -145,9 +140,7 @@ private:
Query & operator=(const Query &) {return *this;};
};
#ifndef NO_NAMESPACES
}
#endif // NO_NAMESPACES
#endif /* _rclquery_h_included_ */