Reverted 34d43d1188adfddb8fd8a4f7c7a28158a8b534f4
Keep only the main Snippet-producing makeabstract in rclquery, further formatting done in using modules This was just a bad idea. The common methods are also used by the python module
This commit is contained in:
parent
a5810508ed
commit
8ddcc578ac
@ -20,8 +20,6 @@
|
||||
#include <time.h>
|
||||
|
||||
#include <list>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
#include "docseqdb.h"
|
||||
#include "rcldb.h"
|
||||
@ -29,7 +27,6 @@
|
||||
#include "wasatorcl.h"
|
||||
|
||||
using std::list;
|
||||
using std::vector;
|
||||
|
||||
DocSequenceDb::DocSequenceDb(std::shared_ptr<Rcl::Db> db,
|
||||
std::shared_ptr<Rcl::Query> q, const string &t,
|
||||
@ -117,18 +114,7 @@ bool DocSequenceDb::getAbstract(Rcl::Doc &doc, vector<string>& vabs)
|
||||
return false;
|
||||
if (m_q->whatDb() &&
|
||||
m_queryBuildAbstract && (doc.syntabs || m_queryReplaceAbstract)) {
|
||||
vector<Rcl::Snippet> vpabs;
|
||||
m_q->makeDocAbstract(doc, vpabs);
|
||||
for (const auto& snippet : vpabs) {
|
||||
string chunk;
|
||||
if (snippet.page > 0) {
|
||||
std::ostringstream ss;
|
||||
ss << snippet.page;
|
||||
chunk += string(" [p ") + ss.str() + "] ";
|
||||
}
|
||||
chunk += snippet.snippet;
|
||||
vabs.push_back(chunk);
|
||||
}
|
||||
m_q->makeDocAbstract(doc, vabs);
|
||||
}
|
||||
if (vabs.empty())
|
||||
vabs.push_back(doc.meta[Rcl::Doc::keyabs]);
|
||||
|
||||
@ -56,19 +56,6 @@ bool dump_contents(RclConfig *rclconfig, Rcl::Doc& idoc)
|
||||
return true;
|
||||
}
|
||||
|
||||
static const string cstr_ellipsis("...");
|
||||
|
||||
static void stringAbstract(Rcl::Query& query, Rcl::Doc doc, string& abstract)
|
||||
{
|
||||
abstract.clear();
|
||||
vector<Rcl::Snippet> vpabs;
|
||||
query.makeDocAbstract(doc, vpabs);
|
||||
for (const auto& snippet : vpabs) {
|
||||
abstract.append(snippet.snippet);
|
||||
abstract.append(cstr_ellipsis);
|
||||
}
|
||||
}
|
||||
|
||||
void output_fields(vector<string> fields, Rcl::Doc& doc,
|
||||
Rcl::Query& query, Rcl::Db& rcldb, bool printnames)
|
||||
{
|
||||
@ -83,7 +70,7 @@ void output_fields(vector<string> fields, Rcl::Doc& doc,
|
||||
string out;
|
||||
if (!it->compare("abstract")) {
|
||||
string abstract;
|
||||
stringAbstract(query, doc, abstract);
|
||||
query.makeDocAbstract(doc, abstract);
|
||||
base64_encode(abstract, out);
|
||||
} else if (!it->compare("xdocid")) {
|
||||
char cdocid[30];
|
||||
@ -425,8 +412,7 @@ endopts:
|
||||
}
|
||||
if (op_flags & OPT_A) {
|
||||
string abstract;
|
||||
stringAbstract(query, doc, abstract);
|
||||
if (abstract.size()) {
|
||||
if (query.makeDocAbstract(doc, abstract)) {
|
||||
cout << "ABSTRACT" << endl;
|
||||
cout << abstract << endl;
|
||||
cout << "/ABSTRACT" << endl;
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2008-2019 J.F.Dockes
|
||||
/* Copyright (C) 2008 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
@ -41,6 +41,10 @@
|
||||
using namespace std;
|
||||
|
||||
namespace Rcl {
|
||||
// This is used as a marker inside the abstract frag lists, but
|
||||
// normally doesn't remain in final output (which is built with a
|
||||
// custom sep. by our caller).
|
||||
static const string cstr_ellipsis("...");
|
||||
|
||||
// Field names inside the index data record may differ from the rcldoc ones
|
||||
// (esp.: caption / title)
|
||||
@ -281,6 +285,37 @@ int Query::makeDocAbstract(const Doc &doc, vector<Snippet>& abstract,
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool Query::makeDocAbstract(const Doc &doc, vector<string>& abstract)
|
||||
{
|
||||
vector<Snippet> vpabs;
|
||||
if (!makeDocAbstract(doc, vpabs))
|
||||
return false;
|
||||
for (vector<Snippet>::const_iterator it = vpabs.begin();
|
||||
it != vpabs.end(); it++) {
|
||||
string chunk;
|
||||
if (it->page > 0) {
|
||||
ostringstream ss;
|
||||
ss << it->page;
|
||||
chunk += string(" [p ") + ss.str() + "] ";
|
||||
}
|
||||
chunk += it->snippet;
|
||||
abstract.push_back(chunk);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Query::makeDocAbstract(const Doc &doc, string& abstract)
|
||||
{
|
||||
vector<Snippet> vpabs;
|
||||
if (!makeDocAbstract(doc, vpabs))
|
||||
return false;
|
||||
for (vector<Snippet>::const_iterator it = vpabs.begin();
|
||||
it != vpabs.end(); it++) {
|
||||
abstract.append(it->snippet);
|
||||
abstract.append(cstr_ellipsis);
|
||||
}
|
||||
return m_reason.empty() ? true : false;
|
||||
}
|
||||
|
||||
int Query::getFirstMatchPage(const Doc &doc, string& term)
|
||||
{
|
||||
|
||||
@ -16,14 +16,15 @@
|
||||
*/
|
||||
#ifndef _rclquery_h_included_
|
||||
#define _rclquery_h_included_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include <memory>
|
||||
#include "searchdata.h"
|
||||
|
||||
#ifndef NO_NAMESPACES
|
||||
namespace Rcl {
|
||||
#endif
|
||||
|
||||
class Db;
|
||||
class Doc;
|
||||
@ -39,8 +40,11 @@ enum abstract_result {
|
||||
class Snippet {
|
||||
public:
|
||||
Snippet(int page, const std::string& snip)
|
||||
: page(page), snippet(snip) { }
|
||||
Snippet& setTerm(const std::string& trm) {
|
||||
: page(page), snippet(snip)
|
||||
{
|
||||
}
|
||||
Snippet& setTerm(const std::string& trm)
|
||||
{
|
||||
term = trm;
|
||||
return *this;
|
||||
}
|
||||
@ -96,13 +100,14 @@ public:
|
||||
bool getQueryTerms(std::vector<std::string>& terms);
|
||||
|
||||
/** Build synthetic abstract for document, extracting chunks relevant for
|
||||
* the input query.
|
||||
* This uses index data only (no access to the file)
|
||||
* For each returned snippet, page is 0 if unknown, else > 0
|
||||
*/
|
||||
* the input query. This uses index data only (no access to the file) */
|
||||
// Abstract returned as one string
|
||||
bool makeDocAbstract(const Doc &doc, std::string& abstract);
|
||||
// Returned as a snippets vector
|
||||
bool makeDocAbstract(const Doc &doc, std::vector<std::string>& abstract);
|
||||
// Returned as a vector of pair<page,snippet> page is 0 if unknown
|
||||
int makeDocAbstract(const Doc &doc, std::vector<Snippet>& abst,
|
||||
int maxoccs= -1, int ctxwords = -1);
|
||||
|
||||
/** Retrieve page number for first match for "significant" query term
|
||||
* @param term returns the chosen term */
|
||||
int getFirstMatchPage(const Doc &doc, std::string& term);
|
||||
@ -140,7 +145,9 @@ private:
|
||||
Query & operator=(const Query &) {return *this;};
|
||||
};
|
||||
|
||||
#ifndef NO_NAMESPACES
|
||||
}
|
||||
#endif // NO_NAMESPACES
|
||||
|
||||
|
||||
#endif /* _rclquery_h_included_ */
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user