Search: allow negative directory filtering (all except from dir). Emit more explicit errors for other unallowed negative search clauses.
This commit is contained in:
parent
9ea442f1fe
commit
e883c4d04e
@ -139,7 +139,7 @@ void SSearch::startSimpleSearch()
|
|||||||
else
|
else
|
||||||
sdata = wasaStringToRcl(rclconfig, u8, reason);
|
sdata = wasaStringToRcl(rclconfig, u8, reason);
|
||||||
if (sdata == 0) {
|
if (sdata == 0) {
|
||||||
QMessageBox::warning(0, "Recoll", tr("Bad query string") +
|
QMessageBox::warning(0, "Recoll", tr("Bad query string") + ": " +
|
||||||
QString::fromAscii(reason.c_str()));
|
QString::fromAscii(reason.c_str()));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -35,28 +35,15 @@ using std::list;
|
|||||||
#include "refcntr.h"
|
#include "refcntr.h"
|
||||||
#include "textsplit.h"
|
#include "textsplit.h"
|
||||||
|
|
||||||
Rcl::SearchData *wasaStringToRcl(RclConfig *config,
|
static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
|
||||||
const string &qs, string &reason,
|
const string& autosuffs, string& reason)
|
||||||
const string& autosuffs)
|
|
||||||
{
|
{
|
||||||
StringToWasaQuery parser;
|
if (wasa == 0) {
|
||||||
WasaQuery *wq = parser.stringToQuery(qs, reason);
|
reason = "NULL query";
|
||||||
if (wq == 0)
|
|
||||||
return 0;
|
|
||||||
Rcl::SearchData *rq = wasaQueryToRcl(config, wq, autosuffs);
|
|
||||||
if (rq == 0) {
|
|
||||||
reason = "Failed translating xesam query structure to recoll";
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return rq;
|
|
||||||
}
|
|
||||||
|
|
||||||
Rcl::SearchData *wasaQueryToRcl(RclConfig *config,
|
|
||||||
WasaQuery *wasa, const string& autosuffs)
|
|
||||||
{
|
|
||||||
if (wasa == 0)
|
|
||||||
return 0;
|
|
||||||
if (wasa->m_op != WasaQuery::OP_AND && wasa->m_op != WasaQuery::OP_OR) {
|
if (wasa->m_op != WasaQuery::OP_AND && wasa->m_op != WasaQuery::OP_OR) {
|
||||||
|
reason = "Top query neither AND nor OR ?";
|
||||||
LOGERR(("wasaQueryToRcl: top query neither AND nor OR!\n"));
|
LOGERR(("wasaQueryToRcl: top query neither AND nor OR!\n"));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -70,44 +57,80 @@ Rcl::SearchData *wasaQueryToRcl(RclConfig *config,
|
|||||||
WasaQuery::subqlist_t::iterator it;
|
WasaQuery::subqlist_t::iterator it;
|
||||||
Rcl::SearchDataClause *nclause;
|
Rcl::SearchDataClause *nclause;
|
||||||
|
|
||||||
|
// Walk the list of clauses. Some pseudo-field types need special
|
||||||
|
// processing, which results in setting data in the top struct
|
||||||
|
// instead of adding a clause. We check for these first
|
||||||
for (it = wasa->m_subs.begin(); it != wasa->m_subs.end(); it++) {
|
for (it = wasa->m_subs.begin(); it != wasa->m_subs.end(); it++) {
|
||||||
|
|
||||||
|
if (!stringicmp("mime", (*it)->m_fieldspec) ||
|
||||||
|
!stringicmp("format", (*it)->m_fieldspec)) {
|
||||||
|
if ((*it)->m_op != WasaQuery::OP_LEAF) {
|
||||||
|
reason = "Negative mime/format clauses not supported yet";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
sdata->addFiletype((*it)->m_value);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Xesam uses "type", we also support "rclcat", for broad
|
||||||
|
// categories like "audio", "presentation", etc.
|
||||||
|
if (!stringicmp("rclcat", (*it)->m_fieldspec) ||
|
||||||
|
!stringicmp("type", (*it)->m_fieldspec)) {
|
||||||
|
if ((*it)->m_op != WasaQuery::OP_LEAF) {
|
||||||
|
reason = "Negative rclcat/type clauses not supported yet";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
list<string> mtypes;
|
||||||
|
if (config && config->getMimeCatTypes((*it)->m_value, mtypes)
|
||||||
|
&& !mtypes.empty()) {
|
||||||
|
for (list<string>::iterator mit = mtypes.begin();
|
||||||
|
mit != mtypes.end(); mit++) {
|
||||||
|
sdata->addFiletype(*mit);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
reason = "Unknown rclcat/type value: no mime types found";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filtering on location
|
||||||
|
if (!stringicmp("dir", (*it)->m_fieldspec)) {
|
||||||
|
sdata->setTopdir((*it)->m_value, (*it)->m_op == WasaQuery::OP_EXCL);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle "date" spec
|
||||||
|
if (!stringicmp("date", (*it)->m_fieldspec)) {
|
||||||
|
if ((*it)->m_op != WasaQuery::OP_LEAF) {
|
||||||
|
reason = "Negative date filtering not supported";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
DateInterval di;
|
||||||
|
if (!parsedateinterval((*it)->m_value, &di)) {
|
||||||
|
LOGERR(("wasaQueryToRcl: bad date interval format\n"));
|
||||||
|
reason = "Bad date interval format";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
LOGDEB(("wasaQueryToRcl:: date span: %d-%d-%d/%d-%d-%d\n",
|
||||||
|
di.y1,di.m1,di.d1, di.y2,di.m2,di.d2));
|
||||||
|
sdata->setDateSpan(&di);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// "Regular" processing follows:
|
||||||
switch ((*it)->m_op) {
|
switch ((*it)->m_op) {
|
||||||
case WasaQuery::OP_NULL:
|
case WasaQuery::OP_NULL:
|
||||||
case WasaQuery::OP_AND:
|
case WasaQuery::OP_AND:
|
||||||
default:
|
default:
|
||||||
LOGINFO(("wasaQueryToRcl: found bad NULL or AND q type in list\n"));
|
reason = "Found bad NULL or AND query type in list";
|
||||||
|
LOGERR(("wasaQueryToRcl: found bad NULL or AND q type in list\n"));
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
case WasaQuery::OP_LEAF: {
|
case WasaQuery::OP_LEAF: {
|
||||||
LOGDEB2(("wasaQueryToRcl: leaf clause [%s]:[%s]\n",
|
LOGDEB2(("wasaQueryToRcl: leaf clause [%s]:[%s]\n",
|
||||||
(*it)->m_fieldspec.c_str(), (*it)->m_value.c_str()));
|
(*it)->m_fieldspec.c_str(), (*it)->m_value.c_str()));
|
||||||
|
|
||||||
// Special cases (mime, category, dir filter ...). Not pretty.
|
|
||||||
|
|
||||||
if (!stringicmp("mime", (*it)->m_fieldspec) ||
|
|
||||||
!stringicmp("format", (*it)->m_fieldspec)
|
|
||||||
) {
|
|
||||||
sdata->addFiletype((*it)->m_value);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Xesam uses "type", we also support "rclcat", for broad
|
|
||||||
// categories like "audio", "presentation", etc.
|
|
||||||
if (!stringicmp("rclcat", (*it)->m_fieldspec) ||
|
|
||||||
!stringicmp("type", (*it)->m_fieldspec)) {
|
|
||||||
list<string> mtypes;
|
|
||||||
if (config && config->getMimeCatTypes((*it)->m_value, mtypes)) {
|
|
||||||
for (list<string>::iterator mit = mtypes.begin();
|
|
||||||
mit != mtypes.end(); mit++) {
|
|
||||||
sdata->addFiletype(*mit);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (!stringicmp("dir", (*it)->m_fieldspec)) {
|
|
||||||
sdata->setTopdir((*it)->m_value);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Change terms found in the "autosuffs" list into "ext"
|
// Change terms found in the "autosuffs" list into "ext"
|
||||||
// field queries
|
// field queries
|
||||||
if ((*it)->m_fieldspec.empty() && !autosuffs.empty()) {
|
if ((*it)->m_fieldspec.empty() && !autosuffs.empty()) {
|
||||||
@ -121,21 +144,6 @@ Rcl::SearchData *wasaQueryToRcl(RclConfig *config,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle "date" spec
|
|
||||||
if (!stringicmp("date", (*it)->m_fieldspec)) {
|
|
||||||
DateInterval di;
|
|
||||||
if (!parsedateinterval((*it)->m_value, &di)) {
|
|
||||||
LOGERR(("wasaQueryToRcl: bad date interval format\n"));
|
|
||||||
// Process rest of query anyway ?
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
LOGDEB(("wasaQueryToRcl:: date span: %d-%d-%d/%d-%d-%d\n",
|
|
||||||
di.y1,di.m1,di.d1, di.y2,di.m2,di.d2))
|
|
||||||
sdata->setDateSpan(&di);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// "Regular" processing follows:
|
|
||||||
unsigned int mods = (unsigned int)(*it)->m_modifiers;
|
unsigned int mods = (unsigned int)(*it)->m_modifiers;
|
||||||
|
|
||||||
if (TextSplit::hasVisibleWhite((*it)->m_value)) {
|
if (TextSplit::hasVisibleWhite((*it)->m_value)) {
|
||||||
@ -154,6 +162,7 @@ Rcl::SearchData *wasaQueryToRcl(RclConfig *config,
|
|||||||
(*it)->m_fieldspec);
|
(*it)->m_fieldspec);
|
||||||
}
|
}
|
||||||
if (nclause == 0) {
|
if (nclause == 0) {
|
||||||
|
reason = "Out of memory";
|
||||||
LOGERR(("wasaQueryToRcl: out of memory\n"));
|
LOGERR(("wasaQueryToRcl: out of memory\n"));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -176,12 +185,14 @@ Rcl::SearchData *wasaQueryToRcl(RclConfig *config,
|
|||||||
// but should work. If there is actually a single
|
// but should work. If there is actually a single
|
||||||
// word, it will not be taken as a phrase, and
|
// word, it will not be taken as a phrase, and
|
||||||
// stem-expansion will work normally
|
// stem-expansion will work normally
|
||||||
|
// Have to do this because searchdata has nothing like and_not
|
||||||
nclause = new Rcl::SearchDataClauseSimple(Rcl::SCLT_EXCL,
|
nclause = new Rcl::SearchDataClauseSimple(Rcl::SCLT_EXCL,
|
||||||
string("\"") +
|
string("\"") +
|
||||||
(*it)->m_value + "\"",
|
(*it)->m_value + "\"",
|
||||||
(*it)->m_fieldspec);
|
(*it)->m_fieldspec);
|
||||||
|
|
||||||
if (nclause == 0) {
|
if (nclause == 0) {
|
||||||
|
reason = "Out of memory";
|
||||||
LOGERR(("wasaQueryToRcl: out of memory\n"));
|
LOGERR(("wasaQueryToRcl: out of memory\n"));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -194,7 +205,8 @@ Rcl::SearchData *wasaQueryToRcl(RclConfig *config,
|
|||||||
LOGDEB2(("wasaQueryToRcl: OR clause [%s]:[%s]\n",
|
LOGDEB2(("wasaQueryToRcl: OR clause [%s]:[%s]\n",
|
||||||
(*it)->m_fieldspec.c_str(), (*it)->m_value.c_str()));
|
(*it)->m_fieldspec.c_str(), (*it)->m_value.c_str()));
|
||||||
// Create a subquery.
|
// Create a subquery.
|
||||||
Rcl::SearchData *sub = wasaQueryToRcl(config, *it);
|
Rcl::SearchData *sub =
|
||||||
|
wasaQueryToRcl(config, *it, autosuffs, reason);
|
||||||
if (sub == 0) {
|
if (sub == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -203,6 +215,7 @@ Rcl::SearchData *wasaQueryToRcl(RclConfig *config,
|
|||||||
RefCntr<Rcl::SearchData>(sub));
|
RefCntr<Rcl::SearchData>(sub));
|
||||||
if (nclause == 0) {
|
if (nclause == 0) {
|
||||||
LOGERR(("wasaQueryToRcl: out of memory\n"));
|
LOGERR(("wasaQueryToRcl: out of memory\n"));
|
||||||
|
reason = "Out of memory";
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if ((*it)->m_modifiers & WasaQuery::WQM_NOSTEM)
|
if ((*it)->m_modifiers & WasaQuery::WQM_NOSTEM)
|
||||||
@ -213,3 +226,14 @@ Rcl::SearchData *wasaQueryToRcl(RclConfig *config,
|
|||||||
|
|
||||||
return sdata;
|
return sdata;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Rcl::SearchData *wasaStringToRcl(RclConfig *config,
|
||||||
|
const string &qs, string &reason,
|
||||||
|
const string& autosuffs)
|
||||||
|
{
|
||||||
|
StringToWasaQuery parser;
|
||||||
|
WasaQuery *wq = parser.stringToQuery(qs, reason);
|
||||||
|
if (wq == 0)
|
||||||
|
return 0;
|
||||||
|
return wasaQueryToRcl(config, wq, autosuffs, reason);
|
||||||
|
}
|
||||||
|
|||||||
@ -29,8 +29,4 @@ class RclConfig;
|
|||||||
extern Rcl::SearchData *wasaStringToRcl(RclConfig *,
|
extern Rcl::SearchData *wasaStringToRcl(RclConfig *,
|
||||||
const string& query, string &reason,
|
const string& query, string &reason,
|
||||||
const string& autosuffs = string());
|
const string& autosuffs = string());
|
||||||
class WasaQuery;
|
|
||||||
extern Rcl::SearchData *wasaQueryToRcl(RclConfig *, WasaQuery *wasa,
|
|
||||||
const string& autosuffs = string());
|
|
||||||
|
|
||||||
#endif /* _WASATORCL_H_INCLUDED_ */
|
#endif /* _WASATORCL_H_INCLUDED_ */
|
||||||
|
|||||||
@ -259,9 +259,10 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
|
|||||||
it != vpath.end(); it++){
|
it != vpath.end(); it++){
|
||||||
pvpath.push_back(pathelt_prefix + *it);
|
pvpath.push_back(pathelt_prefix + *it);
|
||||||
}
|
}
|
||||||
xq = Xapian::Query(Xapian::Query::OP_FILTER, xq,
|
xq = Xapian::Query(m_topdirexcl ?
|
||||||
Xapian::Query(Xapian::Query::OP_PHRASE,
|
Xapian::Query::OP_AND_NOT:Xapian::Query::OP_FILTER,
|
||||||
pvpath.begin(), pvpath.end()));
|
xq, Xapian::Query(Xapian::Query::OP_PHRASE,
|
||||||
|
pvpath.begin(), pvpath.end()));
|
||||||
}
|
}
|
||||||
|
|
||||||
*((Xapian::Query *)d) = xq;
|
*((Xapian::Query *)d) = xq;
|
||||||
@ -355,6 +356,7 @@ void SearchData::erase() {
|
|||||||
m_query.clear();
|
m_query.clear();
|
||||||
m_filetypes.clear();
|
m_filetypes.clear();
|
||||||
m_topdir.erase();
|
m_topdir.erase();
|
||||||
|
m_topdirexcl = false;
|
||||||
m_description.erase();
|
m_description.erase();
|
||||||
m_reason.erase();
|
m_reason.erase();
|
||||||
m_haveDates = false;
|
m_haveDates = false;
|
||||||
|
|||||||
@ -72,7 +72,8 @@ class SearchDataClause;
|
|||||||
class SearchData {
|
class SearchData {
|
||||||
public:
|
public:
|
||||||
SearchData(SClType tp)
|
SearchData(SClType tp)
|
||||||
: m_tp(tp), m_haveDates(false), m_haveWildCards(false)
|
: m_tp(tp), m_topdirexcl(false), m_haveDates(false),
|
||||||
|
m_haveWildCards(false)
|
||||||
{
|
{
|
||||||
if (m_tp != SCLT_OR && m_tp != SCLT_AND)
|
if (m_tp != SCLT_OR && m_tp != SCLT_AND)
|
||||||
m_tp = SCLT_OR;
|
m_tp = SCLT_OR;
|
||||||
@ -102,8 +103,11 @@ public:
|
|||||||
bool maybeAddAutoPhrase();
|
bool maybeAddAutoPhrase();
|
||||||
|
|
||||||
/** Set/get top subdirectory for filtering results */
|
/** Set/get top subdirectory for filtering results */
|
||||||
void setTopdir(const string& t) {m_topdir = t;}
|
void setTopdir(const string& t, bool excl = false)
|
||||||
string getTopdir() {return m_topdir;}
|
{
|
||||||
|
m_topdir = t;
|
||||||
|
m_topdirexcl = excl;
|
||||||
|
}
|
||||||
|
|
||||||
/** Set date span for filtering results */
|
/** Set date span for filtering results */
|
||||||
void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;}
|
void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;}
|
||||||
@ -138,6 +142,7 @@ private:
|
|||||||
vector<SearchDataClause*> m_query;
|
vector<SearchDataClause*> m_query;
|
||||||
vector<string> m_filetypes; // Restrict to filetypes if set.
|
vector<string> m_filetypes; // Restrict to filetypes if set.
|
||||||
string m_topdir; // Restrict to subtree.
|
string m_topdir; // Restrict to subtree.
|
||||||
|
bool m_topdirexcl; // Invert meaning
|
||||||
bool m_haveDates;
|
bool m_haveDates;
|
||||||
DateInterval m_dates; // Restrict to date interval
|
DateInterval m_dates; // Restrict to date interval
|
||||||
// Printable expanded version of the complete query, retrieved/set
|
// Printable expanded version of the complete query, retrieved/set
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user