Add support for "issub" special field specifying that the results should be standalone (issub:0) or embedded (issub:1)
This commit is contained in:
parent
d1058dc676
commit
e5c320ca51
@ -50,19 +50,6 @@ std::shared_ptr<SearchData> wasaStringToRcl(
|
|||||||
return sd;
|
return sd;
|
||||||
}
|
}
|
||||||
|
|
||||||
WasaParserDriver::WasaParserDriver(const RclConfig *c, const std::string sl,
|
|
||||||
const std::string& as)
|
|
||||||
: m_stemlang(sl), m_autosuffs(as), m_config(c),
|
|
||||||
m_index(0), m_result(0), m_haveDates(false),
|
|
||||||
m_maxSize(-1), m_minSize(-1)
|
|
||||||
{
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
WasaParserDriver::~WasaParserDriver()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
SearchData *WasaParserDriver::parse(const std::string& in)
|
SearchData *WasaParserDriver::parse(const std::string& in)
|
||||||
{
|
{
|
||||||
m_input = in;
|
m_input = in;
|
||||||
@ -83,13 +70,11 @@ SearchData *WasaParserDriver::parse(const std::string& in)
|
|||||||
return m_result;
|
return m_result;
|
||||||
|
|
||||||
// Set the top level filters (types, dates, size)
|
// Set the top level filters (types, dates, size)
|
||||||
for (vector<string>::const_iterator it = m_filetypes.begin();
|
for (const auto& ft : m_filetypes) {
|
||||||
it != m_filetypes.end(); it++) {
|
m_result->addFiletype(ft);
|
||||||
m_result->addFiletype(*it);
|
|
||||||
}
|
}
|
||||||
for (vector<string>::const_iterator it = m_nfiletypes.begin();
|
for (const auto& ft : m_nfiletypes) {
|
||||||
it != m_nfiletypes.end(); it++) {
|
m_result->remFiletype(ft);
|
||||||
m_result->remFiletype(*it);
|
|
||||||
}
|
}
|
||||||
if (m_haveDates) {
|
if (m_haveDates) {
|
||||||
m_result->setDateSpan(&m_dates);
|
m_result->setDateSpan(&m_dates);
|
||||||
@ -100,6 +85,10 @@ SearchData *WasaParserDriver::parse(const std::string& in)
|
|||||||
if (m_maxSize != -1) {
|
if (m_maxSize != -1) {
|
||||||
m_result->setMaxSize(m_maxSize);
|
m_result->setMaxSize(m_maxSize);
|
||||||
}
|
}
|
||||||
|
if (m_subSpec != Rcl::SearchData::SUBDOC_ANY) {
|
||||||
|
m_result->setSubSpec(m_subSpec);
|
||||||
|
}
|
||||||
|
|
||||||
//if (m_result) m_result->dump(cout);
|
//if (m_result) m_result->dump(cout);
|
||||||
return m_result;
|
return m_result;
|
||||||
}
|
}
|
||||||
@ -122,8 +111,7 @@ void WasaParserDriver::UNGETCHAR(int c)
|
|||||||
|
|
||||||
// Add clause to query, handling special pseudo-clauses for size/date
|
// Add clause to query, handling special pseudo-clauses for size/date
|
||||||
// etc. (mostly determined on field name).
|
// etc. (mostly determined on field name).
|
||||||
bool WasaParserDriver::addClause(SearchData *sd,
|
bool WasaParserDriver::addClause(SearchData *sd, SearchDataClauseSimple* cl)
|
||||||
SearchDataClauseSimple* cl)
|
|
||||||
{
|
{
|
||||||
if (cl->getfield().empty()) {
|
if (cl->getfield().empty()) {
|
||||||
// Simple clause with empty field spec.
|
// Simple clause with empty field spec.
|
||||||
@ -156,6 +144,13 @@ bool WasaParserDriver::addClause(SearchData *sd,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Filtering for standalone- or sub-documents
|
||||||
|
if (!fld.compare("issub")) {
|
||||||
|
m_subSpec = atoi(cl->gettext().c_str());
|
||||||
|
delete cl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (!fld.compare("rclcat") || !fld.compare("type")) {
|
if (!fld.compare("rclcat") || !fld.compare("type")) {
|
||||||
vector<string> mtypes;
|
vector<string> mtypes;
|
||||||
if (m_config && m_config->getMimeCatTypes(cl->gettext(), mtypes)) {
|
if (m_config && m_config->getMimeCatTypes(cl->gettext(), mtypes)) {
|
||||||
@ -231,8 +226,7 @@ bool WasaParserDriver::addClause(SearchData *sd,
|
|||||||
|
|
||||||
if (!fld.compare("dir")) {
|
if (!fld.compare("dir")) {
|
||||||
// dir filtering special case
|
// dir filtering special case
|
||||||
SearchDataClausePath *nclause =
|
SearchDataClausePath *nclause = new SearchDataClausePath(cl->gettext(), cl->getexclude());
|
||||||
new SearchDataClausePath(cl->gettext(), cl->getexclude());
|
|
||||||
delete cl;
|
delete cl;
|
||||||
return sd->addClause(nclause);
|
return sd->addClause(nclause);
|
||||||
}
|
}
|
||||||
@ -258,8 +252,7 @@ bool WasaParserDriver::addClause(SearchData *sd,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (tp != SCLT_FILENAME) {
|
if (tp != SCLT_FILENAME) {
|
||||||
SearchDataClauseSimple *ncl =
|
SearchDataClauseSimple *ncl = new SearchDataClauseSimple(tp, ns, ofld);
|
||||||
new SearchDataClauseSimple(tp, ns, ofld);
|
|
||||||
delete cl;
|
delete cl;
|
||||||
return sd->addClause(ncl);
|
return sd->addClause(ncl);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -22,14 +22,12 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
|
#include "searchdata.h"
|
||||||
|
|
||||||
class WasaParserDriver;
|
class WasaParserDriver;
|
||||||
namespace Rcl {
|
|
||||||
class SearchData;
|
|
||||||
class SearchDataClauseSimple;
|
|
||||||
}
|
|
||||||
namespace yy {
|
namespace yy {
|
||||||
class parser;
|
class parser;
|
||||||
}
|
}
|
||||||
|
|
||||||
class RclConfig;
|
class RclConfig;
|
||||||
@ -37,9 +35,10 @@ class RclConfig;
|
|||||||
class WasaParserDriver {
|
class WasaParserDriver {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
WasaParserDriver(const RclConfig *c, const std::string sl,
|
WasaParserDriver(const RclConfig *c, const std::string sl, const std::string& as)
|
||||||
const std::string& as);
|
: m_stemlang(sl), m_autosuffs(as), m_config(c) {}
|
||||||
~WasaParserDriver();
|
|
||||||
|
~WasaParserDriver() {}
|
||||||
|
|
||||||
Rcl::SearchData *parse(const std::string&);
|
Rcl::SearchData *parse(const std::string&);
|
||||||
bool addClause(Rcl::SearchData *sd, Rcl::SearchDataClauseSimple* cl);
|
bool addClause(Rcl::SearchData *sd, Rcl::SearchDataClauseSimple* cl);
|
||||||
@ -67,20 +66,20 @@ private:
|
|||||||
// input string.
|
// input string.
|
||||||
std::string m_input;
|
std::string m_input;
|
||||||
// Current position in m_input
|
// Current position in m_input
|
||||||
unsigned int m_index;
|
unsigned int m_index{0};
|
||||||
// Characters pushed-back, ready for next getchar.
|
// Characters pushed-back, ready for next getchar.
|
||||||
std::stack<int> m_returns;
|
std::stack<int> m_returns;
|
||||||
// Result, set by parser.
|
// Result, set by parser.
|
||||||
Rcl::SearchData *m_result;
|
Rcl::SearchData *m_result{nullptr};
|
||||||
|
|
||||||
// Storage for top level filters
|
// Storage for top level filters
|
||||||
std::vector<std::string> m_filetypes;
|
std::vector<std::string> m_filetypes;
|
||||||
std::vector<std::string> m_nfiletypes;
|
std::vector<std::string> m_nfiletypes;
|
||||||
bool m_haveDates;
|
bool m_haveDates{false};
|
||||||
DateInterval m_dates; // Restrict to date interval
|
DateInterval m_dates; // Restrict to date interval
|
||||||
int64_t m_maxSize;
|
int64_t m_maxSize{-1};
|
||||||
int64_t m_minSize;
|
int64_t m_minSize{-1};
|
||||||
|
int m_subSpec{Rcl::SearchData::SUBDOC_ANY};
|
||||||
std::string m_reason;
|
std::string m_reason;
|
||||||
|
|
||||||
// Let the quoted string reader store qualifiers in there, simpler
|
// Let the quoted string reader store qualifiers in there, simpler
|
||||||
|
|||||||
@ -2557,7 +2557,7 @@ bool Db::getSubDocs(const Doc &idoc, vector<Doc>& subdocs)
|
|||||||
LOGERR("Db::getSubDocs: xapian error: " << m_reason << "\n");
|
LOGERR("Db::getSubDocs: xapian error: " << m_reason << "\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (xit == xdoc.termlist_end()) {
|
if (xit == xdoc.termlist_end() || get_prefix(*xit) != parent_prefix) {
|
||||||
LOGERR("Db::getSubDocs: parent term not found\n");
|
LOGERR("Db::getSubDocs: parent term not found\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -2642,7 +2642,7 @@ bool Db::getContainerDoc(const Doc &idoc, Doc& ctdoc)
|
|||||||
LOGERR("Db::getContainerDoc: xapian error: " << m_reason << "\n");
|
LOGERR("Db::getContainerDoc: xapian error: " << m_reason << "\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (xit == xdoc.termlist_end()) {
|
if (xit == xdoc.termlist_end() || get_prefix(*xit) != parent_prefix) {
|
||||||
LOGERR("Db::getContainerDoc: parent term not found\n");
|
LOGERR("Db::getContainerDoc: parent term not found\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -151,6 +151,20 @@ inline string strip_prefix(const string& trm)
|
|||||||
return trm.substr(st);
|
return trm.substr(st);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline string get_prefix(const string& trm)
|
||||||
|
{
|
||||||
|
if (!has_prefix(trm))
|
||||||
|
return trm;
|
||||||
|
string::size_type st = 0;
|
||||||
|
if (o_index_stripchars) {
|
||||||
|
st = trm.find_first_not_of("ABCDEFIJKLMNOPQRSTUVWXYZ");
|
||||||
|
return trm.substr(0, st);
|
||||||
|
} else {
|
||||||
|
st = trm.find_last_of(":") + 1;
|
||||||
|
return trm.substr(1, st-2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
inline string wrap_prefix(const string& pfx)
|
inline string wrap_prefix(const string& pfx)
|
||||||
{
|
{
|
||||||
if (o_index_stripchars) {
|
if (o_index_stripchars) {
|
||||||
|
|||||||
@ -152,8 +152,7 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
Query::Query(Db *db)
|
Query::Query(Db *db)
|
||||||
: m_nq(new Native(this)), m_db(db), m_sorter(0), m_sortAscending(true),
|
: m_nq(new Native(this)), m_db(db)
|
||||||
m_collapseDuplicates(false), m_resCnt(-1), m_snipMaxPosWalk(1000000)
|
|
||||||
{
|
{
|
||||||
if (db)
|
if (db)
|
||||||
db->getConf()->getConfParam("snippetMaxPosWalk", &m_snipMaxPosWalk);
|
db->getConf()->getConfParam("snippetMaxPosWalk", &m_snipMaxPosWalk);
|
||||||
@ -179,6 +178,27 @@ void Query::setSortBy(const string& fld, bool ascending) {
|
|||||||
(m_sortAscending ? "ascending" : "descending") << "\n");
|
(m_sortAscending ? "ascending" : "descending") << "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const string parent_prefix{"F"};
|
||||||
|
|
||||||
|
class SubdocDecider : public Xapian::MatchDecider {
|
||||||
|
public:
|
||||||
|
SubdocDecider(bool sel) : MatchDecider(), m_select(sel) {}
|
||||||
|
virtual ~SubdocDecider() {}
|
||||||
|
|
||||||
|
virtual bool operator()(const Xapian::Document &doc) const {
|
||||||
|
bool hasparent{false};
|
||||||
|
try {
|
||||||
|
Xapian::TermIterator xit = doc.termlist_begin();
|
||||||
|
xit.skip_to(wrap_prefix(parent_prefix));
|
||||||
|
hasparent = (xit != doc.termlist_end()) && (get_prefix(*xit) == parent_prefix);
|
||||||
|
} catch (...) {
|
||||||
|
}
|
||||||
|
return hasparent == m_select;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool m_select;
|
||||||
|
};
|
||||||
|
|
||||||
// Prepare query out of user search data
|
// Prepare query out of user search data
|
||||||
bool Query::setQuery(std::shared_ptr<SearchData> sdata)
|
bool Query::setQuery(std::shared_ptr<SearchData> sdata)
|
||||||
{
|
{
|
||||||
@ -199,9 +219,14 @@ bool Query::setQuery(std::shared_ptr<SearchData> sdata)
|
|||||||
m_reason += sdata->getReason();
|
m_reason += sdata->getReason();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_nq->xquery = xq;
|
m_nq->xquery = xq;
|
||||||
|
|
||||||
|
if (sdata->getSubSpec() == SearchData::SUBDOC_NO) {
|
||||||
|
m_nq->subdecider = new SubdocDecider(false);
|
||||||
|
} else if (sdata->getSubSpec() == SearchData::SUBDOC_YES) {
|
||||||
|
m_nq->subdecider = new SubdocDecider(true);
|
||||||
|
}
|
||||||
|
|
||||||
string d;
|
string d;
|
||||||
for (int tries = 0; tries < 2; tries++) {
|
for (int tries = 0; tries < 2; tries++) {
|
||||||
try {
|
try {
|
||||||
@ -361,7 +386,8 @@ int Query::getResCnt(int checkatleast, bool useestimate)
|
|||||||
Chrono chron;
|
Chrono chron;
|
||||||
XAPTRY(if (checkatleast == -1)
|
XAPTRY(if (checkatleast == -1)
|
||||||
checkatleast = m_db->docCnt();
|
checkatleast = m_db->docCnt();
|
||||||
m_nq->xmset = m_nq->xenquire->get_mset(0, qquantum, checkatleast),
|
m_nq->xmset = m_nq->xenquire->get_mset(
|
||||||
|
0, qquantum, checkatleast, 0, m_nq->subdecider),
|
||||||
m_db->m_ndb->xrdb, m_reason);
|
m_db->m_ndb->xrdb, m_reason);
|
||||||
if (!m_reason.empty()) {
|
if (!m_reason.empty()) {
|
||||||
LOGERR("xenquire->get_mset: exception: " << m_reason << "\n");
|
LOGERR("xenquire->get_mset: exception: " << m_reason << "\n");
|
||||||
@ -401,10 +427,9 @@ bool Query::getDoc(int xapi, Doc &doc, bool fetchtext)
|
|||||||
if (!(xapi >= first && xapi <= last)) {
|
if (!(xapi >= first && xapi <= last)) {
|
||||||
LOGDEB("Fetching for first " << xapi << ", count " << qquantum << "\n");
|
LOGDEB("Fetching for first " << xapi << ", count " << qquantum << "\n");
|
||||||
|
|
||||||
XAPTRY(m_nq->xmset = m_nq->xenquire->get_mset(xapi, qquantum,
|
XAPTRY(m_nq->xmset = m_nq->xenquire->get_mset(
|
||||||
(const Xapian::RSet *)0),
|
xapi, qquantum, nullptr, m_nq->subdecider),
|
||||||
m_db->m_ndb->xrdb, m_reason);
|
m_db->m_ndb->xrdb, m_reason);
|
||||||
|
|
||||||
if (!m_reason.empty()) {
|
if (!m_reason.empty()) {
|
||||||
LOGERR("enquire->get_mset: exception: " << m_reason << "\n");
|
LOGERR("enquire->get_mset: exception: " << m_reason << "\n");
|
||||||
return false;
|
return false;
|
||||||
|
|||||||
@ -139,13 +139,13 @@ public:
|
|||||||
private:
|
private:
|
||||||
std::string m_reason; // Error explanation
|
std::string m_reason; // Error explanation
|
||||||
Db *m_db;
|
Db *m_db;
|
||||||
void *m_sorter;
|
void *m_sorter{nullptr};
|
||||||
std::string m_sortField;
|
std::string m_sortField;
|
||||||
bool m_sortAscending;
|
bool m_sortAscending{true};
|
||||||
bool m_collapseDuplicates;
|
bool m_collapseDuplicates{false};
|
||||||
int m_resCnt;
|
int m_resCnt{-1};
|
||||||
std::shared_ptr<SearchData> m_sd;
|
std::shared_ptr<SearchData> m_sd;
|
||||||
int m_snipMaxPosWalk;
|
int m_snipMaxPosWalk{1000000};
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef NO_NAMESPACES
|
#ifndef NO_NAMESPACES
|
||||||
|
|||||||
@ -32,24 +32,26 @@ namespace Rcl {
|
|||||||
class Query::Native {
|
class Query::Native {
|
||||||
public:
|
public:
|
||||||
// The query I belong to
|
// The query I belong to
|
||||||
Query *m_q;
|
Query *m_q{nullptr};
|
||||||
// query descriptor: terms and subqueries joined by operators
|
// query descriptor: terms and subqueries joined by operators
|
||||||
// (or/and etc...)
|
// (or/and etc...)
|
||||||
Xapian::Query xquery;
|
Xapian::Query xquery;
|
||||||
// Open query descriptor.
|
// Open query descriptor.
|
||||||
Xapian::Enquire *xenquire;
|
Xapian::Enquire *xenquire{nullptr};
|
||||||
// Partial result set
|
// Partial result set
|
||||||
Xapian::MSet xmset;
|
Xapian::MSet xmset;
|
||||||
// Term frequencies for current query. See makeAbstract, setQuery
|
// Term frequencies for current query. See makeAbstract, setQuery
|
||||||
std::map<std::string, double> termfreqs;
|
std::map<std::string, double> termfreqs;
|
||||||
|
Xapian::MatchDecider *subdecider{nullptr};
|
||||||
|
|
||||||
Native(Query *q)
|
Native(Query *q)
|
||||||
: m_q(q), xenquire(0) { }
|
: m_q(q), xenquire(0) {}
|
||||||
~Native() {
|
~Native() {
|
||||||
clear();
|
clear();
|
||||||
}
|
}
|
||||||
void clear() {
|
void clear() {
|
||||||
delete xenquire; xenquire = 0;
|
deleteZ(xenquire);
|
||||||
|
deleteZ(subdecider);
|
||||||
termfreqs.clear();
|
termfreqs.clear();
|
||||||
}
|
}
|
||||||
/** Return a list of terms which matched for a specific result document */
|
/** Return a list of terms which matched for a specific result document */
|
||||||
|
|||||||
@ -114,6 +114,17 @@ public:
|
|||||||
void setMinSize(int64_t size) {m_minSize = size;}
|
void setMinSize(int64_t size) {m_minSize = size;}
|
||||||
void setMaxSize(int64_t size) {m_maxSize = size;}
|
void setMaxSize(int64_t size) {m_maxSize = size;}
|
||||||
|
|
||||||
|
enum SubdocSpec {SUBDOC_ANY = -1, SUBDOC_NO = 0, SUBDOC_YES = 1};
|
||||||
|
void setSubSpec(int spec) {
|
||||||
|
switch (spec) {
|
||||||
|
case SUBDOC_ANY:
|
||||||
|
case SUBDOC_NO:
|
||||||
|
case SUBDOC_YES:
|
||||||
|
m_subspec = spec;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int getSubSpec() {return m_subspec;}
|
||||||
|
|
||||||
/** Set date span for filtering results */
|
/** Set date span for filtering results */
|
||||||
void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;}
|
void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;}
|
||||||
|
|
||||||
@ -174,11 +185,13 @@ private:
|
|||||||
std::shared_ptr<SearchDataClauseDist> m_autophrase;
|
std::shared_ptr<SearchDataClauseDist> m_autophrase;
|
||||||
|
|
||||||
// Special stuff produced by input which looks like a clause but means
|
// Special stuff produced by input which looks like a clause but means
|
||||||
// something else (date and size specs)
|
// something else (date, size specs, etc.)
|
||||||
bool m_haveDates{false};
|
bool m_haveDates{false};
|
||||||
DateInterval m_dates; // Restrict to date interval
|
DateInterval m_dates; // Restrict to date interval
|
||||||
int64_t m_maxSize{-1};
|
int64_t m_maxSize{-1};
|
||||||
int64_t m_minSize{-1};
|
int64_t m_minSize{-1};
|
||||||
|
// Filtering for subdocs: -1:any, 0: only free-standing, 1: only subdocs
|
||||||
|
int m_subspec{SUBDOC_ANY};
|
||||||
|
|
||||||
// Printable expanded version of the complete query, retrieved/set
|
// Printable expanded version of the complete query, retrieved/set
|
||||||
// from rcldb after the Xapian::setQuery() call
|
// from rcldb after the Xapian::setQuery() call
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user