Add support for "issub" special field specifying that the results should be standalone (issub:0) or embedded (issub:1)
This commit is contained in:
parent
d1058dc676
commit
e5c320ca51
@ -50,19 +50,6 @@ std::shared_ptr<SearchData> wasaStringToRcl(
|
||||
return sd;
|
||||
}
|
||||
|
||||
WasaParserDriver::WasaParserDriver(const RclConfig *c, const std::string sl,
|
||||
const std::string& as)
|
||||
: m_stemlang(sl), m_autosuffs(as), m_config(c),
|
||||
m_index(0), m_result(0), m_haveDates(false),
|
||||
m_maxSize(-1), m_minSize(-1)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
WasaParserDriver::~WasaParserDriver()
|
||||
{
|
||||
}
|
||||
|
||||
SearchData *WasaParserDriver::parse(const std::string& in)
|
||||
{
|
||||
m_input = in;
|
||||
@ -83,13 +70,11 @@ SearchData *WasaParserDriver::parse(const std::string& in)
|
||||
return m_result;
|
||||
|
||||
// Set the top level filters (types, dates, size)
|
||||
for (vector<string>::const_iterator it = m_filetypes.begin();
|
||||
it != m_filetypes.end(); it++) {
|
||||
m_result->addFiletype(*it);
|
||||
for (const auto& ft : m_filetypes) {
|
||||
m_result->addFiletype(ft);
|
||||
}
|
||||
for (vector<string>::const_iterator it = m_nfiletypes.begin();
|
||||
it != m_nfiletypes.end(); it++) {
|
||||
m_result->remFiletype(*it);
|
||||
for (const auto& ft : m_nfiletypes) {
|
||||
m_result->remFiletype(ft);
|
||||
}
|
||||
if (m_haveDates) {
|
||||
m_result->setDateSpan(&m_dates);
|
||||
@ -100,6 +85,10 @@ SearchData *WasaParserDriver::parse(const std::string& in)
|
||||
if (m_maxSize != -1) {
|
||||
m_result->setMaxSize(m_maxSize);
|
||||
}
|
||||
if (m_subSpec != Rcl::SearchData::SUBDOC_ANY) {
|
||||
m_result->setSubSpec(m_subSpec);
|
||||
}
|
||||
|
||||
//if (m_result) m_result->dump(cout);
|
||||
return m_result;
|
||||
}
|
||||
@ -122,8 +111,7 @@ void WasaParserDriver::UNGETCHAR(int c)
|
||||
|
||||
// Add clause to query, handling special pseudo-clauses for size/date
|
||||
// etc. (mostly determined on field name).
|
||||
bool WasaParserDriver::addClause(SearchData *sd,
|
||||
SearchDataClauseSimple* cl)
|
||||
bool WasaParserDriver::addClause(SearchData *sd, SearchDataClauseSimple* cl)
|
||||
{
|
||||
if (cl->getfield().empty()) {
|
||||
// Simple clause with empty field spec.
|
||||
@ -132,7 +120,7 @@ bool WasaParserDriver::addClause(SearchData *sd,
|
||||
if (!m_autosuffs.empty()) {
|
||||
vector<string> asfv;
|
||||
if (stringToStrings(m_autosuffs, asfv)) {
|
||||
if (find_if(asfv.begin(), asfv.end(),
|
||||
if (find_if(asfv.begin(), asfv.end(),
|
||||
StringIcmpPred(cl->gettext())) != asfv.end()) {
|
||||
cl->setfield("ext");
|
||||
cl->addModifier(SearchDataClause::SDCM_NOSTEMMING);
|
||||
@ -156,6 +144,13 @@ bool WasaParserDriver::addClause(SearchData *sd,
|
||||
return false;
|
||||
}
|
||||
|
||||
// Filtering for standalone- or sub-documents
|
||||
if (!fld.compare("issub")) {
|
||||
m_subSpec = atoi(cl->gettext().c_str());
|
||||
delete cl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!fld.compare("rclcat") || !fld.compare("type")) {
|
||||
vector<string> mtypes;
|
||||
if (m_config && m_config->getMimeCatTypes(cl->gettext(), mtypes)) {
|
||||
@ -231,8 +226,7 @@ bool WasaParserDriver::addClause(SearchData *sd,
|
||||
|
||||
if (!fld.compare("dir")) {
|
||||
// dir filtering special case
|
||||
SearchDataClausePath *nclause =
|
||||
new SearchDataClausePath(cl->gettext(), cl->getexclude());
|
||||
SearchDataClausePath *nclause = new SearchDataClausePath(cl->gettext(), cl->getexclude());
|
||||
delete cl;
|
||||
return sd->addClause(nclause);
|
||||
}
|
||||
@ -258,8 +252,7 @@ bool WasaParserDriver::addClause(SearchData *sd,
|
||||
}
|
||||
|
||||
if (tp != SCLT_FILENAME) {
|
||||
SearchDataClauseSimple *ncl =
|
||||
new SearchDataClauseSimple(tp, ns, ofld);
|
||||
SearchDataClauseSimple *ncl = new SearchDataClauseSimple(tp, ns, ofld);
|
||||
delete cl;
|
||||
return sd->addClause(ncl);
|
||||
}
|
||||
|
||||
@ -22,14 +22,12 @@
|
||||
#include <vector>
|
||||
|
||||
#include "smallut.h"
|
||||
#include "searchdata.h"
|
||||
|
||||
class WasaParserDriver;
|
||||
namespace Rcl {
|
||||
class SearchData;
|
||||
class SearchDataClauseSimple;
|
||||
}
|
||||
|
||||
namespace yy {
|
||||
class parser;
|
||||
class parser;
|
||||
}
|
||||
|
||||
class RclConfig;
|
||||
@ -37,9 +35,10 @@ class RclConfig;
|
||||
class WasaParserDriver {
|
||||
public:
|
||||
|
||||
WasaParserDriver(const RclConfig *c, const std::string sl,
|
||||
const std::string& as);
|
||||
~WasaParserDriver();
|
||||
WasaParserDriver(const RclConfig *c, const std::string sl, const std::string& as)
|
||||
: m_stemlang(sl), m_autosuffs(as), m_config(c) {}
|
||||
|
||||
~WasaParserDriver() {}
|
||||
|
||||
Rcl::SearchData *parse(const std::string&);
|
||||
bool addClause(Rcl::SearchData *sd, Rcl::SearchDataClauseSimple* cl);
|
||||
@ -67,20 +66,20 @@ private:
|
||||
// input string.
|
||||
std::string m_input;
|
||||
// Current position in m_input
|
||||
unsigned int m_index;
|
||||
unsigned int m_index{0};
|
||||
// Characters pushed-back, ready for next getchar.
|
||||
std::stack<int> m_returns;
|
||||
// Result, set by parser.
|
||||
Rcl::SearchData *m_result;
|
||||
Rcl::SearchData *m_result{nullptr};
|
||||
|
||||
// Storage for top level filters
|
||||
std::vector<std::string> m_filetypes;
|
||||
std::vector<std::string> m_nfiletypes;
|
||||
bool m_haveDates;
|
||||
bool m_haveDates{false};
|
||||
DateInterval m_dates; // Restrict to date interval
|
||||
int64_t m_maxSize;
|
||||
int64_t m_minSize;
|
||||
|
||||
int64_t m_maxSize{-1};
|
||||
int64_t m_minSize{-1};
|
||||
int m_subSpec{Rcl::SearchData::SUBDOC_ANY};
|
||||
std::string m_reason;
|
||||
|
||||
// Let the quoted string reader store qualifiers in there, simpler
|
||||
|
||||
@ -2557,7 +2557,7 @@ bool Db::getSubDocs(const Doc &idoc, vector<Doc>& subdocs)
|
||||
LOGERR("Db::getSubDocs: xapian error: " << m_reason << "\n");
|
||||
return false;
|
||||
}
|
||||
if (xit == xdoc.termlist_end()) {
|
||||
if (xit == xdoc.termlist_end() || get_prefix(*xit) != parent_prefix) {
|
||||
LOGERR("Db::getSubDocs: parent term not found\n");
|
||||
return false;
|
||||
}
|
||||
@ -2642,7 +2642,7 @@ bool Db::getContainerDoc(const Doc &idoc, Doc& ctdoc)
|
||||
LOGERR("Db::getContainerDoc: xapian error: " << m_reason << "\n");
|
||||
return false;
|
||||
}
|
||||
if (xit == xdoc.termlist_end()) {
|
||||
if (xit == xdoc.termlist_end() || get_prefix(*xit) != parent_prefix) {
|
||||
LOGERR("Db::getContainerDoc: parent term not found\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -151,6 +151,20 @@ inline string strip_prefix(const string& trm)
|
||||
return trm.substr(st);
|
||||
}
|
||||
|
||||
inline string get_prefix(const string& trm)
|
||||
{
|
||||
if (!has_prefix(trm))
|
||||
return trm;
|
||||
string::size_type st = 0;
|
||||
if (o_index_stripchars) {
|
||||
st = trm.find_first_not_of("ABCDEFIJKLMNOPQRSTUVWXYZ");
|
||||
return trm.substr(0, st);
|
||||
} else {
|
||||
st = trm.find_last_of(":") + 1;
|
||||
return trm.substr(1, st-2);
|
||||
}
|
||||
}
|
||||
|
||||
inline string wrap_prefix(const string& pfx)
|
||||
{
|
||||
if (o_index_stripchars) {
|
||||
|
||||
@ -152,8 +152,7 @@ private:
|
||||
};
|
||||
|
||||
Query::Query(Db *db)
|
||||
: m_nq(new Native(this)), m_db(db), m_sorter(0), m_sortAscending(true),
|
||||
m_collapseDuplicates(false), m_resCnt(-1), m_snipMaxPosWalk(1000000)
|
||||
: m_nq(new Native(this)), m_db(db)
|
||||
{
|
||||
if (db)
|
||||
db->getConf()->getConfParam("snippetMaxPosWalk", &m_snipMaxPosWalk);
|
||||
@ -179,6 +178,27 @@ void Query::setSortBy(const string& fld, bool ascending) {
|
||||
(m_sortAscending ? "ascending" : "descending") << "\n");
|
||||
}
|
||||
|
||||
static const string parent_prefix{"F"};
|
||||
|
||||
class SubdocDecider : public Xapian::MatchDecider {
|
||||
public:
|
||||
SubdocDecider(bool sel) : MatchDecider(), m_select(sel) {}
|
||||
virtual ~SubdocDecider() {}
|
||||
|
||||
virtual bool operator()(const Xapian::Document &doc) const {
|
||||
bool hasparent{false};
|
||||
try {
|
||||
Xapian::TermIterator xit = doc.termlist_begin();
|
||||
xit.skip_to(wrap_prefix(parent_prefix));
|
||||
hasparent = (xit != doc.termlist_end()) && (get_prefix(*xit) == parent_prefix);
|
||||
} catch (...) {
|
||||
}
|
||||
return hasparent == m_select;
|
||||
}
|
||||
|
||||
bool m_select;
|
||||
};
|
||||
|
||||
// Prepare query out of user search data
|
||||
bool Query::setQuery(std::shared_ptr<SearchData> sdata)
|
||||
{
|
||||
@ -199,8 +219,13 @@ bool Query::setQuery(std::shared_ptr<SearchData> sdata)
|
||||
m_reason += sdata->getReason();
|
||||
return false;
|
||||
}
|
||||
|
||||
m_nq->xquery = xq;
|
||||
|
||||
if (sdata->getSubSpec() == SearchData::SUBDOC_NO) {
|
||||
m_nq->subdecider = new SubdocDecider(false);
|
||||
} else if (sdata->getSubSpec() == SearchData::SUBDOC_YES) {
|
||||
m_nq->subdecider = new SubdocDecider(true);
|
||||
}
|
||||
|
||||
string d;
|
||||
for (int tries = 0; tries < 2; tries++) {
|
||||
@ -361,7 +386,8 @@ int Query::getResCnt(int checkatleast, bool useestimate)
|
||||
Chrono chron;
|
||||
XAPTRY(if (checkatleast == -1)
|
||||
checkatleast = m_db->docCnt();
|
||||
m_nq->xmset = m_nq->xenquire->get_mset(0, qquantum, checkatleast),
|
||||
m_nq->xmset = m_nq->xenquire->get_mset(
|
||||
0, qquantum, checkatleast, 0, m_nq->subdecider),
|
||||
m_db->m_ndb->xrdb, m_reason);
|
||||
if (!m_reason.empty()) {
|
||||
LOGERR("xenquire->get_mset: exception: " << m_reason << "\n");
|
||||
@ -401,10 +427,9 @@ bool Query::getDoc(int xapi, Doc &doc, bool fetchtext)
|
||||
if (!(xapi >= first && xapi <= last)) {
|
||||
LOGDEB("Fetching for first " << xapi << ", count " << qquantum << "\n");
|
||||
|
||||
XAPTRY(m_nq->xmset = m_nq->xenquire->get_mset(xapi, qquantum,
|
||||
(const Xapian::RSet *)0),
|
||||
XAPTRY(m_nq->xmset = m_nq->xenquire->get_mset(
|
||||
xapi, qquantum, nullptr, m_nq->subdecider),
|
||||
m_db->m_ndb->xrdb, m_reason);
|
||||
|
||||
if (!m_reason.empty()) {
|
||||
LOGERR("enquire->get_mset: exception: " << m_reason << "\n");
|
||||
return false;
|
||||
|
||||
@ -139,13 +139,13 @@ public:
|
||||
private:
|
||||
std::string m_reason; // Error explanation
|
||||
Db *m_db;
|
||||
void *m_sorter;
|
||||
void *m_sorter{nullptr};
|
||||
std::string m_sortField;
|
||||
bool m_sortAscending;
|
||||
bool m_collapseDuplicates;
|
||||
int m_resCnt;
|
||||
bool m_sortAscending{true};
|
||||
bool m_collapseDuplicates{false};
|
||||
int m_resCnt{-1};
|
||||
std::shared_ptr<SearchData> m_sd;
|
||||
int m_snipMaxPosWalk;
|
||||
int m_snipMaxPosWalk{1000000};
|
||||
};
|
||||
|
||||
#ifndef NO_NAMESPACES
|
||||
|
||||
@ -32,24 +32,26 @@ namespace Rcl {
|
||||
class Query::Native {
|
||||
public:
|
||||
// The query I belong to
|
||||
Query *m_q;
|
||||
Query *m_q{nullptr};
|
||||
// query descriptor: terms and subqueries joined by operators
|
||||
// (or/and etc...)
|
||||
Xapian::Query xquery;
|
||||
// Open query descriptor.
|
||||
Xapian::Enquire *xenquire;
|
||||
Xapian::Enquire *xenquire{nullptr};
|
||||
// Partial result set
|
||||
Xapian::MSet xmset;
|
||||
// Term frequencies for current query. See makeAbstract, setQuery
|
||||
std::map<std::string, double> termfreqs;
|
||||
|
||||
Xapian::MatchDecider *subdecider{nullptr};
|
||||
|
||||
Native(Query *q)
|
||||
: m_q(q), xenquire(0) { }
|
||||
: m_q(q), xenquire(0) {}
|
||||
~Native() {
|
||||
clear();
|
||||
}
|
||||
void clear() {
|
||||
delete xenquire; xenquire = 0;
|
||||
deleteZ(xenquire);
|
||||
deleteZ(subdecider);
|
||||
termfreqs.clear();
|
||||
}
|
||||
/** Return a list of terms which matched for a specific result document */
|
||||
|
||||
@ -114,6 +114,17 @@ public:
|
||||
void setMinSize(int64_t size) {m_minSize = size;}
|
||||
void setMaxSize(int64_t size) {m_maxSize = size;}
|
||||
|
||||
enum SubdocSpec {SUBDOC_ANY = -1, SUBDOC_NO = 0, SUBDOC_YES = 1};
|
||||
void setSubSpec(int spec) {
|
||||
switch (spec) {
|
||||
case SUBDOC_ANY:
|
||||
case SUBDOC_NO:
|
||||
case SUBDOC_YES:
|
||||
m_subspec = spec;
|
||||
}
|
||||
}
|
||||
int getSubSpec() {return m_subspec;}
|
||||
|
||||
/** Set date span for filtering results */
|
||||
void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;}
|
||||
|
||||
@ -174,12 +185,14 @@ private:
|
||||
std::shared_ptr<SearchDataClauseDist> m_autophrase;
|
||||
|
||||
// Special stuff produced by input which looks like a clause but means
|
||||
// something else (date and size specs)
|
||||
// something else (date, size specs, etc.)
|
||||
bool m_haveDates{false};
|
||||
DateInterval m_dates; // Restrict to date interval
|
||||
int64_t m_maxSize{-1};
|
||||
int64_t m_minSize{-1};
|
||||
|
||||
// Filtering for subdocs: -1:any, 0: only free-standing, 1: only subdocs
|
||||
int m_subspec{SUBDOC_ANY};
|
||||
|
||||
// Printable expanded version of the complete query, retrieved/set
|
||||
// from rcldb after the Xapian::setQuery() call
|
||||
std::string m_description;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user