Reverted 34d43d1188adfddb8fd8a4f7c7a28158a8b534f4

Keep only the main Snippet-producing makeabstract in rclquery, further
  formatting done in using modules
This was just a bad idea. The common methods are also used by the python module
This commit is contained in:
Jean-Francois Dockes 2019-05-17 10:19:03 +02:00
parent a5810508ed
commit 8ddcc578ac
4 changed files with 55 additions and 41 deletions

View File

@ -20,8 +20,6 @@
#include <time.h>
#include <list>
#include <sstream>
#include <vector>
#include "docseqdb.h"
#include "rcldb.h"
@ -29,7 +27,6 @@
#include "wasatorcl.h"
using std::list;
using std::vector;
DocSequenceDb::DocSequenceDb(std::shared_ptr<Rcl::Db> db,
std::shared_ptr<Rcl::Query> q, const string &t,
@ -117,18 +114,7 @@ bool DocSequenceDb::getAbstract(Rcl::Doc &doc, vector<string>& vabs)
return false;
if (m_q->whatDb() &&
m_queryBuildAbstract && (doc.syntabs || m_queryReplaceAbstract)) {
vector<Rcl::Snippet> vpabs;
m_q->makeDocAbstract(doc, vpabs);
for (const auto& snippet : vpabs) {
string chunk;
if (snippet.page > 0) {
std::ostringstream ss;
ss << snippet.page;
chunk += string(" [p ") + ss.str() + "] ";
}
chunk += snippet.snippet;
vabs.push_back(chunk);
}
m_q->makeDocAbstract(doc, vabs);
}
if (vabs.empty())
vabs.push_back(doc.meta[Rcl::Doc::keyabs]);

View File

@ -56,19 +56,6 @@ bool dump_contents(RclConfig *rclconfig, Rcl::Doc& idoc)
return true;
}
static const string cstr_ellipsis("...");
static void stringAbstract(Rcl::Query& query, Rcl::Doc doc, string& abstract)
{
abstract.clear();
vector<Rcl::Snippet> vpabs;
query.makeDocAbstract(doc, vpabs);
for (const auto& snippet : vpabs) {
abstract.append(snippet.snippet);
abstract.append(cstr_ellipsis);
}
}
void output_fields(vector<string> fields, Rcl::Doc& doc,
Rcl::Query& query, Rcl::Db& rcldb, bool printnames)
{
@ -83,7 +70,7 @@ void output_fields(vector<string> fields, Rcl::Doc& doc,
string out;
if (!it->compare("abstract")) {
string abstract;
stringAbstract(query, doc, abstract);
query.makeDocAbstract(doc, abstract);
base64_encode(abstract, out);
} else if (!it->compare("xdocid")) {
char cdocid[30];
@ -425,8 +412,7 @@ endopts:
}
if (op_flags & OPT_A) {
string abstract;
stringAbstract(query, doc, abstract);
if (abstract.size()) {
if (query.makeDocAbstract(doc, abstract)) {
cout << "ABSTRACT" << endl;
cout << abstract << endl;
cout << "/ABSTRACT" << endl;

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2008-2019 J.F.Dockes
/* Copyright (C) 2008 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@ -41,6 +41,10 @@
using namespace std;
namespace Rcl {
// This is used as a marker inside the abstract frag lists, but
// normally doesn't remain in final output (which is built with a
// custom sep. by our caller).
static const string cstr_ellipsis("...");
// Field names inside the index data record may differ from the rcldoc ones
// (esp.: caption / title)
@ -281,6 +285,37 @@ int Query::makeDocAbstract(const Doc &doc, vector<Snippet>& abstract,
return ret;
}
bool Query::makeDocAbstract(const Doc &doc, vector<string>& abstract)
{
vector<Snippet> vpabs;
if (!makeDocAbstract(doc, vpabs))
return false;
for (vector<Snippet>::const_iterator it = vpabs.begin();
it != vpabs.end(); it++) {
string chunk;
if (it->page > 0) {
ostringstream ss;
ss << it->page;
chunk += string(" [p ") + ss.str() + "] ";
}
chunk += it->snippet;
abstract.push_back(chunk);
}
return true;
}
bool Query::makeDocAbstract(const Doc &doc, string& abstract)
{
vector<Snippet> vpabs;
if (!makeDocAbstract(doc, vpabs))
return false;
for (vector<Snippet>::const_iterator it = vpabs.begin();
it != vpabs.end(); it++) {
abstract.append(it->snippet);
abstract.append(cstr_ellipsis);
}
return m_reason.empty() ? true : false;
}
int Query::getFirstMatchPage(const Doc &doc, string& term)
{

View File

@ -16,14 +16,15 @@
*/
#ifndef _rclquery_h_included_
#define _rclquery_h_included_
#include <string>
#include <vector>
#include <memory>
#include <memory>
#include "searchdata.h"
#ifndef NO_NAMESPACES
namespace Rcl {
#endif
class Db;
class Doc;
@ -39,8 +40,11 @@ enum abstract_result {
class Snippet {
public:
Snippet(int page, const std::string& snip)
: page(page), snippet(snip) { }
Snippet& setTerm(const std::string& trm) {
: page(page), snippet(snip)
{
}
Snippet& setTerm(const std::string& trm)
{
term = trm;
return *this;
}
@ -96,13 +100,14 @@ public:
bool getQueryTerms(std::vector<std::string>& terms);
/** Build synthetic abstract for document, extracting chunks relevant for
* the input query.
* This uses index data only (no access to the file)
* For each returned snippet, page is 0 if unknown, else > 0
*/
* the input query. This uses index data only (no access to the file) */
// Abstract returned as one string
bool makeDocAbstract(const Doc &doc, std::string& abstract);
// Returned as a snippets vector
bool makeDocAbstract(const Doc &doc, std::vector<std::string>& abstract);
// Returned as a vector of pair<page,snippet> page is 0 if unknown
int makeDocAbstract(const Doc &doc, std::vector<Snippet>& abst,
int maxoccs= -1, int ctxwords = -1);
/** Retrieve page number for first match for "significant" query term
* @param term returns the chosen term */
int getFirstMatchPage(const Doc &doc, std::string& term);
@ -140,7 +145,9 @@ private:
Query & operator=(const Query &) {return *this;};
};
#ifndef NO_NAMESPACES
}
#endif // NO_NAMESPACES
#endif /* _rclquery_h_included_ */