Keep only the main Snippet-producing makeabstract in rclquery, further formatting done in using modules
This commit is contained in:
parent
ee5a260d54
commit
34d43d1188
@ -20,6 +20,8 @@
|
|||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
#include <list>
|
#include <list>
|
||||||
|
#include <sstream>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "docseqdb.h"
|
#include "docseqdb.h"
|
||||||
#include "rcldb.h"
|
#include "rcldb.h"
|
||||||
@ -27,6 +29,7 @@
|
|||||||
#include "wasatorcl.h"
|
#include "wasatorcl.h"
|
||||||
|
|
||||||
using std::list;
|
using std::list;
|
||||||
|
using std::vector;
|
||||||
|
|
||||||
DocSequenceDb::DocSequenceDb(std::shared_ptr<Rcl::Db> db,
|
DocSequenceDb::DocSequenceDb(std::shared_ptr<Rcl::Db> db,
|
||||||
std::shared_ptr<Rcl::Query> q, const string &t,
|
std::shared_ptr<Rcl::Query> q, const string &t,
|
||||||
@ -114,8 +117,19 @@ bool DocSequenceDb::getAbstract(Rcl::Doc &doc, vector<string>& vabs)
|
|||||||
return false;
|
return false;
|
||||||
if (m_q->whatDb() &&
|
if (m_q->whatDb() &&
|
||||||
m_queryBuildAbstract && (doc.syntabs || m_queryReplaceAbstract)) {
|
m_queryBuildAbstract && (doc.syntabs || m_queryReplaceAbstract)) {
|
||||||
m_q->makeDocAbstract(doc, vabs);
|
vector<Rcl::Snippet> vpabs;
|
||||||
}
|
m_q->makeDocAbstract(doc, vpabs);
|
||||||
|
for (const auto& snippet : vpabs) {
|
||||||
|
string chunk;
|
||||||
|
if (snippet.page > 0) {
|
||||||
|
std::ostringstream ss;
|
||||||
|
ss << snippet.page;
|
||||||
|
chunk += string(" [p ") + ss.str() + "] ";
|
||||||
|
}
|
||||||
|
chunk += snippet.snippet;
|
||||||
|
vabs.push_back(chunk);
|
||||||
|
}
|
||||||
|
}
|
||||||
if (vabs.empty())
|
if (vabs.empty())
|
||||||
vabs.push_back(doc.meta[Rcl::Doc::keyabs]);
|
vabs.push_back(doc.meta[Rcl::Doc::keyabs]);
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@ -56,6 +56,19 @@ bool dump_contents(RclConfig *rclconfig, Rcl::Doc& idoc)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const string cstr_ellipsis("...");
|
||||||
|
|
||||||
|
static void stringAbstract(Rcl::Query& query, Rcl::Doc doc, string& abstract)
|
||||||
|
{
|
||||||
|
abstract.clear();
|
||||||
|
vector<Rcl::Snippet> vpabs;
|
||||||
|
query.makeDocAbstract(doc, vpabs);
|
||||||
|
for (const auto& snippet : vpabs) {
|
||||||
|
abstract.append(snippet.snippet);
|
||||||
|
abstract.append(cstr_ellipsis);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void output_fields(vector<string> fields, Rcl::Doc& doc,
|
void output_fields(vector<string> fields, Rcl::Doc& doc,
|
||||||
Rcl::Query& query, Rcl::Db& rcldb, bool printnames)
|
Rcl::Query& query, Rcl::Db& rcldb, bool printnames)
|
||||||
{
|
{
|
||||||
@ -70,7 +83,7 @@ void output_fields(vector<string> fields, Rcl::Doc& doc,
|
|||||||
string out;
|
string out;
|
||||||
if (!it->compare("abstract")) {
|
if (!it->compare("abstract")) {
|
||||||
string abstract;
|
string abstract;
|
||||||
query.makeDocAbstract(doc, abstract);
|
stringAbstract(query, doc, abstract);
|
||||||
base64_encode(abstract, out);
|
base64_encode(abstract, out);
|
||||||
} else if (!it->compare("xdocid")) {
|
} else if (!it->compare("xdocid")) {
|
||||||
char cdocid[30];
|
char cdocid[30];
|
||||||
@ -412,7 +425,8 @@ endopts:
|
|||||||
}
|
}
|
||||||
if (op_flags & OPT_A) {
|
if (op_flags & OPT_A) {
|
||||||
string abstract;
|
string abstract;
|
||||||
if (query.makeDocAbstract(doc, abstract)) {
|
stringAbstract(query, doc, abstract);
|
||||||
|
if (abstract.size()) {
|
||||||
cout << "ABSTRACT" << endl;
|
cout << "ABSTRACT" << endl;
|
||||||
cout << abstract << endl;
|
cout << abstract << endl;
|
||||||
cout << "/ABSTRACT" << endl;
|
cout << "/ABSTRACT" << endl;
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2008 J.F.Dockes
|
/* Copyright (C) 2008-2019 J.F.Dockes
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
* the Free Software Foundation; either version 2 of the License, or
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
@ -41,10 +41,6 @@
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
namespace Rcl {
|
namespace Rcl {
|
||||||
// This is used as a marker inside the abstract frag lists, but
|
|
||||||
// normally doesn't remain in final output (which is built with a
|
|
||||||
// custom sep. by our caller).
|
|
||||||
static const string cstr_ellipsis("...");
|
|
||||||
|
|
||||||
// Field names inside the index data record may differ from the rcldoc ones
|
// Field names inside the index data record may differ from the rcldoc ones
|
||||||
// (esp.: caption / title)
|
// (esp.: caption / title)
|
||||||
@ -285,37 +281,6 @@ int Query::makeDocAbstract(const Doc &doc, vector<Snippet>& abstract,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Query::makeDocAbstract(const Doc &doc, vector<string>& abstract)
|
|
||||||
{
|
|
||||||
vector<Snippet> vpabs;
|
|
||||||
if (!makeDocAbstract(doc, vpabs))
|
|
||||||
return false;
|
|
||||||
for (vector<Snippet>::const_iterator it = vpabs.begin();
|
|
||||||
it != vpabs.end(); it++) {
|
|
||||||
string chunk;
|
|
||||||
if (it->page > 0) {
|
|
||||||
ostringstream ss;
|
|
||||||
ss << it->page;
|
|
||||||
chunk += string(" [p ") + ss.str() + "] ";
|
|
||||||
}
|
|
||||||
chunk += it->snippet;
|
|
||||||
abstract.push_back(chunk);
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Query::makeDocAbstract(const Doc &doc, string& abstract)
|
|
||||||
{
|
|
||||||
vector<Snippet> vpabs;
|
|
||||||
if (!makeDocAbstract(doc, vpabs))
|
|
||||||
return false;
|
|
||||||
for (vector<Snippet>::const_iterator it = vpabs.begin();
|
|
||||||
it != vpabs.end(); it++) {
|
|
||||||
abstract.append(it->snippet);
|
|
||||||
abstract.append(cstr_ellipsis);
|
|
||||||
}
|
|
||||||
return m_reason.empty() ? true : false;
|
|
||||||
}
|
|
||||||
|
|
||||||
int Query::getFirstMatchPage(const Doc &doc, string& term)
|
int Query::getFirstMatchPage(const Doc &doc, string& term)
|
||||||
{
|
{
|
||||||
|
|||||||
@ -16,15 +16,14 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _rclquery_h_included_
|
#ifndef _rclquery_h_included_
|
||||||
#define _rclquery_h_included_
|
#define _rclquery_h_included_
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
#include "searchdata.h"
|
#include "searchdata.h"
|
||||||
|
|
||||||
#ifndef NO_NAMESPACES
|
|
||||||
namespace Rcl {
|
namespace Rcl {
|
||||||
#endif
|
|
||||||
|
|
||||||
class Db;
|
class Db;
|
||||||
class Doc;
|
class Doc;
|
||||||
@ -40,20 +39,17 @@ enum abstract_result {
|
|||||||
class Snippet {
|
class Snippet {
|
||||||
public:
|
public:
|
||||||
Snippet(int page, const std::string& snip)
|
Snippet(int page, const std::string& snip)
|
||||||
: page(page), snippet(snip)
|
: page(page), snippet(snip) { }
|
||||||
{
|
Snippet& setTerm(const std::string& trm) {
|
||||||
}
|
term = trm;
|
||||||
Snippet& setTerm(const std::string& trm)
|
return *this;
|
||||||
{
|
|
||||||
term = trm;
|
|
||||||
return *this;
|
|
||||||
}
|
}
|
||||||
int page;
|
int page;
|
||||||
std::string term;
|
std::string term;
|
||||||
std::string snippet;
|
std::string snippet;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An Rcl::Query is a question (SearchData) applied to a
|
* An Rcl::Query is a question (SearchData) applied to a
|
||||||
* database. Handles access to the results. Somewhat equivalent to a
|
* database. Handles access to the results. Somewhat equivalent to a
|
||||||
@ -61,27 +57,27 @@ public:
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
class Query {
|
class Query {
|
||||||
public:
|
public:
|
||||||
Query(Db *db);
|
Query(Db *db);
|
||||||
~Query();
|
~Query();
|
||||||
|
|
||||||
/** Get explanation about last error */
|
/** Get explanation about last error */
|
||||||
std::string getReason() const {
|
std::string getReason() const {
|
||||||
return m_reason;
|
return m_reason;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Choose sort order. Must be called before setQuery */
|
/** Choose sort order. Must be called before setQuery */
|
||||||
void setSortBy(const std::string& fld, bool ascending = true);
|
void setSortBy(const std::string& fld, bool ascending = true);
|
||||||
const std::string& getSortBy() const {
|
const std::string& getSortBy() const {
|
||||||
return m_sortField;
|
return m_sortField;
|
||||||
}
|
}
|
||||||
bool getSortAscending() const {
|
bool getSortAscending() const {
|
||||||
return m_sortAscending;
|
return m_sortAscending;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Return or filter results with identical content checksum */
|
/** Return or filter results with identical content checksum */
|
||||||
void setCollapseDuplicates(bool on) {
|
void setCollapseDuplicates(bool on) {
|
||||||
m_collapseDuplicates = on;
|
m_collapseDuplicates = on;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Accept data describing the search and query the index. This can
|
/** Accept data describing the search and query the index. This can
|
||||||
@ -100,21 +96,20 @@ class Query {
|
|||||||
bool getQueryTerms(std::vector<std::string>& terms);
|
bool getQueryTerms(std::vector<std::string>& terms);
|
||||||
|
|
||||||
/** Build synthetic abstract for document, extracting chunks relevant for
|
/** Build synthetic abstract for document, extracting chunks relevant for
|
||||||
* the input query. This uses index data only (no access to the file) */
|
* the input query.
|
||||||
// Abstract returned as one string
|
* This uses index data only (no access to the file)
|
||||||
bool makeDocAbstract(const Doc &doc, std::string& abstract);
|
* For each returned snippet, page is 0 if unknown, else > 0
|
||||||
// Returned as a snippets vector
|
*/
|
||||||
bool makeDocAbstract(const Doc &doc, std::vector<std::string>& abstract);
|
|
||||||
// Returned as a vector of pair<page,snippet> page is 0 if unknown
|
|
||||||
int makeDocAbstract(const Doc &doc, std::vector<Snippet>& abst,
|
int makeDocAbstract(const Doc &doc, std::vector<Snippet>& abst,
|
||||||
int maxoccs= -1, int ctxwords = -1);
|
int maxoccs= -1, int ctxwords = -1);
|
||||||
|
|
||||||
/** Retrieve page number for first match for "significant" query term
|
/** Retrieve page number for first match for "significant" query term
|
||||||
* @param term returns the chosen term */
|
* @param term returns the chosen term */
|
||||||
int getFirstMatchPage(const Doc &doc, std::string& term);
|
int getFirstMatchPage(const Doc &doc, std::string& term);
|
||||||
|
|
||||||
/** Retrieve a reference to the searchData we are using */
|
/** Retrieve a reference to the searchData we are using */
|
||||||
std::shared_ptr<SearchData> getSD() {
|
std::shared_ptr<SearchData> getSD() {
|
||||||
return m_sd;
|
return m_sd;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Expand query to look for documents like the one passed in */
|
/** Expand query to look for documents like the one passed in */
|
||||||
@ -122,7 +117,7 @@ class Query {
|
|||||||
|
|
||||||
/** Return the Db we're set for */
|
/** Return the Db we're set for */
|
||||||
Db *whatDb() const {
|
Db *whatDb() const {
|
||||||
return m_db;
|
return m_db;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* make this public for access from embedded Db::Native */
|
/* make this public for access from embedded Db::Native */
|
||||||
@ -145,9 +140,7 @@ private:
|
|||||||
Query & operator=(const Query &) {return *this;};
|
Query & operator=(const Query &) {return *this;};
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef NO_NAMESPACES
|
|
||||||
}
|
}
|
||||||
#endif // NO_NAMESPACES
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* _rclquery_h_included_ */
|
#endif /* _rclquery_h_included_ */
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user