From 34d43d1188adfddb8fd8a4f7c7a28158a8b534f4 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Mon, 13 May 2019 18:11:23 +0200 Subject: [PATCH] Keep only the main Snippet-producing makeabstract in rclquery, further formatting done in using modules --- src/query/docseqdb.cpp | 18 ++++++++++++++-- src/query/recollq.cpp | 18 ++++++++++++++-- src/rcldb/rclquery.cpp | 37 +-------------------------------- src/rcldb/rclquery.h | 47 ++++++++++++++++++------------------------ 4 files changed, 53 insertions(+), 67 deletions(-) diff --git a/src/query/docseqdb.cpp b/src/query/docseqdb.cpp index 2343e56e..2003e465 100644 --- a/src/query/docseqdb.cpp +++ b/src/query/docseqdb.cpp @@ -20,6 +20,8 @@ #include #include +#include +#include #include "docseqdb.h" #include "rcldb.h" @@ -27,6 +29,7 @@ #include "wasatorcl.h" using std::list; +using std::vector; DocSequenceDb::DocSequenceDb(std::shared_ptr db, std::shared_ptr q, const string &t, @@ -114,8 +117,19 @@ bool DocSequenceDb::getAbstract(Rcl::Doc &doc, vector& vabs) return false; if (m_q->whatDb() && m_queryBuildAbstract && (doc.syntabs || m_queryReplaceAbstract)) { - m_q->makeDocAbstract(doc, vabs); - } + vector vpabs; + m_q->makeDocAbstract(doc, vpabs); + for (const auto& snippet : vpabs) { + string chunk; + if (snippet.page > 0) { + std::ostringstream ss; + ss << snippet.page; + chunk += string(" [p ") + ss.str() + "] "; + } + chunk += snippet.snippet; + vabs.push_back(chunk); + } + } if (vabs.empty()) vabs.push_back(doc.meta[Rcl::Doc::keyabs]); return true; diff --git a/src/query/recollq.cpp b/src/query/recollq.cpp index e2483f07..8d7fb2dc 100644 --- a/src/query/recollq.cpp +++ b/src/query/recollq.cpp @@ -56,6 +56,19 @@ bool dump_contents(RclConfig *rclconfig, Rcl::Doc& idoc) return true; } +static const string cstr_ellipsis("..."); + +static void stringAbstract(Rcl::Query& query, Rcl::Doc doc, string& abstract) +{ + abstract.clear(); + vector vpabs; + query.makeDocAbstract(doc, vpabs); + for (const auto& snippet : vpabs) { + abstract.append(snippet.snippet); + abstract.append(cstr_ellipsis); + } +} + void output_fields(vector fields, Rcl::Doc& doc, Rcl::Query& query, Rcl::Db& rcldb, bool printnames) { @@ -70,7 +83,7 @@ void output_fields(vector fields, Rcl::Doc& doc, string out; if (!it->compare("abstract")) { string abstract; - query.makeDocAbstract(doc, abstract); + stringAbstract(query, doc, abstract); base64_encode(abstract, out); } else if (!it->compare("xdocid")) { char cdocid[30]; @@ -412,7 +425,8 @@ endopts: } if (op_flags & OPT_A) { string abstract; - if (query.makeDocAbstract(doc, abstract)) { + stringAbstract(query, doc, abstract); + if (abstract.size()) { cout << "ABSTRACT" << endl; cout << abstract << endl; cout << "/ABSTRACT" << endl; diff --git a/src/rcldb/rclquery.cpp b/src/rcldb/rclquery.cpp index 7d9e66e1..ff031189 100644 --- a/src/rcldb/rclquery.cpp +++ b/src/rcldb/rclquery.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2008 J.F.Dockes +/* Copyright (C) 2008-2019 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -41,10 +41,6 @@ using namespace std; namespace Rcl { -// This is used as a marker inside the abstract frag lists, but -// normally doesn't remain in final output (which is built with a -// custom sep. by our caller). -static const string cstr_ellipsis("..."); // Field names inside the index data record may differ from the rcldoc ones // (esp.: caption / title) @@ -285,37 +281,6 @@ int Query::makeDocAbstract(const Doc &doc, vector& abstract, return ret; } -bool Query::makeDocAbstract(const Doc &doc, vector& abstract) -{ - vector vpabs; - if (!makeDocAbstract(doc, vpabs)) - return false; - for (vector::const_iterator it = vpabs.begin(); - it != vpabs.end(); it++) { - string chunk; - if (it->page > 0) { - ostringstream ss; - ss << it->page; - chunk += string(" [p ") + ss.str() + "] "; - } - chunk += it->snippet; - abstract.push_back(chunk); - } - return true; -} - -bool Query::makeDocAbstract(const Doc &doc, string& abstract) -{ - vector vpabs; - if (!makeDocAbstract(doc, vpabs)) - return false; - for (vector::const_iterator it = vpabs.begin(); - it != vpabs.end(); it++) { - abstract.append(it->snippet); - abstract.append(cstr_ellipsis); - } - return m_reason.empty() ? true : false; -} int Query::getFirstMatchPage(const Doc &doc, string& term) { diff --git a/src/rcldb/rclquery.h b/src/rcldb/rclquery.h index 55b502ac..0d00379c 100644 --- a/src/rcldb/rclquery.h +++ b/src/rcldb/rclquery.h @@ -16,15 +16,14 @@ */ #ifndef _rclquery_h_included_ #define _rclquery_h_included_ + #include #include - #include + #include "searchdata.h" -#ifndef NO_NAMESPACES namespace Rcl { -#endif class Db; class Doc; @@ -40,20 +39,17 @@ enum abstract_result { class Snippet { public: Snippet(int page, const std::string& snip) - : page(page), snippet(snip) - { - } - Snippet& setTerm(const std::string& trm) - { - term = trm; - return *this; + : page(page), snippet(snip) { } + Snippet& setTerm(const std::string& trm) { + term = trm; + return *this; } int page; std::string term; std::string snippet; }; - + /** * An Rcl::Query is a question (SearchData) applied to a * database. Handles access to the results. Somewhat equivalent to a @@ -61,27 +57,27 @@ public: * */ class Query { - public: +public: Query(Db *db); ~Query(); /** Get explanation about last error */ std::string getReason() const { - return m_reason; + return m_reason; } /** Choose sort order. Must be called before setQuery */ void setSortBy(const std::string& fld, bool ascending = true); const std::string& getSortBy() const { - return m_sortField; + return m_sortField; } bool getSortAscending() const { - return m_sortAscending; + return m_sortAscending; } /** Return or filter results with identical content checksum */ void setCollapseDuplicates(bool on) { - m_collapseDuplicates = on; + m_collapseDuplicates = on; } /** Accept data describing the search and query the index. This can @@ -100,21 +96,20 @@ class Query { bool getQueryTerms(std::vector& terms); /** Build synthetic abstract for document, extracting chunks relevant for - * the input query. This uses index data only (no access to the file) */ - // Abstract returned as one string - bool makeDocAbstract(const Doc &doc, std::string& abstract); - // Returned as a snippets vector - bool makeDocAbstract(const Doc &doc, std::vector& abstract); - // Returned as a vector of pair page is 0 if unknown + * the input query. + * This uses index data only (no access to the file) + * For each returned snippet, page is 0 if unknown, else > 0 + */ int makeDocAbstract(const Doc &doc, std::vector& abst, - int maxoccs= -1, int ctxwords = -1); + int maxoccs= -1, int ctxwords = -1); + /** Retrieve page number for first match for "significant" query term * @param term returns the chosen term */ int getFirstMatchPage(const Doc &doc, std::string& term); /** Retrieve a reference to the searchData we are using */ std::shared_ptr getSD() { - return m_sd; + return m_sd; } /** Expand query to look for documents like the one passed in */ @@ -122,7 +117,7 @@ class Query { /** Return the Db we're set for */ Db *whatDb() const { - return m_db; + return m_db; } /* make this public for access from embedded Db::Native */ @@ -145,9 +140,7 @@ private: Query & operator=(const Query &) {return *this;}; }; -#ifndef NO_NAMESPACES } -#endif // NO_NAMESPACES #endif /* _rclquery_h_included_ */