Search: allow negative directory filtering (all except from dir). Emit more explicit errors for other unallowed negative search clauses.

This commit is contained in:
Jean-Francois Dockes 2011-03-30 14:35:09 +02:00
parent 9ea442f1fe
commit e883c4d04e
5 changed files with 100 additions and 73 deletions

View File

@ -139,7 +139,7 @@ void SSearch::startSimpleSearch()
else else
sdata = wasaStringToRcl(rclconfig, u8, reason); sdata = wasaStringToRcl(rclconfig, u8, reason);
if (sdata == 0) { if (sdata == 0) {
QMessageBox::warning(0, "Recoll", tr("Bad query string") + QMessageBox::warning(0, "Recoll", tr("Bad query string") + ": " +
QString::fromAscii(reason.c_str())); QString::fromAscii(reason.c_str()));
return; return;
} }

View File

@ -35,28 +35,15 @@ using std::list;
#include "refcntr.h" #include "refcntr.h"
#include "textsplit.h" #include "textsplit.h"
Rcl::SearchData *wasaStringToRcl(RclConfig *config, static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
const string &qs, string &reason, const string& autosuffs, string& reason)
const string& autosuffs)
{ {
StringToWasaQuery parser; if (wasa == 0) {
WasaQuery *wq = parser.stringToQuery(qs, reason); reason = "NULL query";
if (wq == 0)
return 0;
Rcl::SearchData *rq = wasaQueryToRcl(config, wq, autosuffs);
if (rq == 0) {
reason = "Failed translating xesam query structure to recoll";
return 0; return 0;
} }
return rq;
}
Rcl::SearchData *wasaQueryToRcl(RclConfig *config,
WasaQuery *wasa, const string& autosuffs)
{
if (wasa == 0)
return 0;
if (wasa->m_op != WasaQuery::OP_AND && wasa->m_op != WasaQuery::OP_OR) { if (wasa->m_op != WasaQuery::OP_AND && wasa->m_op != WasaQuery::OP_OR) {
reason = "Top query neither AND nor OR ?";
LOGERR(("wasaQueryToRcl: top query neither AND nor OR!\n")); LOGERR(("wasaQueryToRcl: top query neither AND nor OR!\n"));
return 0; return 0;
} }
@ -70,44 +57,80 @@ Rcl::SearchData *wasaQueryToRcl(RclConfig *config,
WasaQuery::subqlist_t::iterator it; WasaQuery::subqlist_t::iterator it;
Rcl::SearchDataClause *nclause; Rcl::SearchDataClause *nclause;
// Walk the list of clauses. Some pseudo-field types need special
// processing, which results in setting data in the top struct
// instead of adding a clause. We check for these first
for (it = wasa->m_subs.begin(); it != wasa->m_subs.end(); it++) { for (it = wasa->m_subs.begin(); it != wasa->m_subs.end(); it++) {
if (!stringicmp("mime", (*it)->m_fieldspec) ||
!stringicmp("format", (*it)->m_fieldspec)) {
if ((*it)->m_op != WasaQuery::OP_LEAF) {
reason = "Negative mime/format clauses not supported yet";
return 0;
}
sdata->addFiletype((*it)->m_value);
continue;
}
// Xesam uses "type", we also support "rclcat", for broad
// categories like "audio", "presentation", etc.
if (!stringicmp("rclcat", (*it)->m_fieldspec) ||
!stringicmp("type", (*it)->m_fieldspec)) {
if ((*it)->m_op != WasaQuery::OP_LEAF) {
reason = "Negative rclcat/type clauses not supported yet";
return 0;
}
list<string> mtypes;
if (config && config->getMimeCatTypes((*it)->m_value, mtypes)
&& !mtypes.empty()) {
for (list<string>::iterator mit = mtypes.begin();
mit != mtypes.end(); mit++) {
sdata->addFiletype(*mit);
}
} else {
reason = "Unknown rclcat/type value: no mime types found";
return 0;
}
continue;
}
// Filtering on location
if (!stringicmp("dir", (*it)->m_fieldspec)) {
sdata->setTopdir((*it)->m_value, (*it)->m_op == WasaQuery::OP_EXCL);
continue;
}
// Handle "date" spec
if (!stringicmp("date", (*it)->m_fieldspec)) {
if ((*it)->m_op != WasaQuery::OP_LEAF) {
reason = "Negative date filtering not supported";
return 0;
}
DateInterval di;
if (!parsedateinterval((*it)->m_value, &di)) {
LOGERR(("wasaQueryToRcl: bad date interval format\n"));
reason = "Bad date interval format";
return 0;
}
LOGDEB(("wasaQueryToRcl:: date span: %d-%d-%d/%d-%d-%d\n",
di.y1,di.m1,di.d1, di.y2,di.m2,di.d2));
sdata->setDateSpan(&di);
continue;
}
// "Regular" processing follows:
switch ((*it)->m_op) { switch ((*it)->m_op) {
case WasaQuery::OP_NULL: case WasaQuery::OP_NULL:
case WasaQuery::OP_AND: case WasaQuery::OP_AND:
default: default:
LOGINFO(("wasaQueryToRcl: found bad NULL or AND q type in list\n")); reason = "Found bad NULL or AND query type in list";
LOGERR(("wasaQueryToRcl: found bad NULL or AND q type in list\n"));
continue; continue;
case WasaQuery::OP_LEAF: { case WasaQuery::OP_LEAF: {
LOGDEB2(("wasaQueryToRcl: leaf clause [%s]:[%s]\n", LOGDEB2(("wasaQueryToRcl: leaf clause [%s]:[%s]\n",
(*it)->m_fieldspec.c_str(), (*it)->m_value.c_str())); (*it)->m_fieldspec.c_str(), (*it)->m_value.c_str()));
// Special cases (mime, category, dir filter ...). Not pretty.
if (!stringicmp("mime", (*it)->m_fieldspec) ||
!stringicmp("format", (*it)->m_fieldspec)
) {
sdata->addFiletype((*it)->m_value);
break;
}
// Xesam uses "type", we also support "rclcat", for broad
// categories like "audio", "presentation", etc.
if (!stringicmp("rclcat", (*it)->m_fieldspec) ||
!stringicmp("type", (*it)->m_fieldspec)) {
list<string> mtypes;
if (config && config->getMimeCatTypes((*it)->m_value, mtypes)) {
for (list<string>::iterator mit = mtypes.begin();
mit != mtypes.end(); mit++) {
sdata->addFiletype(*mit);
}
}
break;
}
if (!stringicmp("dir", (*it)->m_fieldspec)) {
sdata->setTopdir((*it)->m_value);
break;
}
// Change terms found in the "autosuffs" list into "ext" // Change terms found in the "autosuffs" list into "ext"
// field queries // field queries
if ((*it)->m_fieldspec.empty() && !autosuffs.empty()) { if ((*it)->m_fieldspec.empty() && !autosuffs.empty()) {
@ -121,21 +144,6 @@ Rcl::SearchData *wasaQueryToRcl(RclConfig *config,
} }
} }
// Handle "date" spec
if (!stringicmp("date", (*it)->m_fieldspec)) {
DateInterval di;
if (!parsedateinterval((*it)->m_value, &di)) {
LOGERR(("wasaQueryToRcl: bad date interval format\n"));
// Process rest of query anyway ?
break;
}
LOGDEB(("wasaQueryToRcl:: date span: %d-%d-%d/%d-%d-%d\n",
di.y1,di.m1,di.d1, di.y2,di.m2,di.d2))
sdata->setDateSpan(&di);
break;
}
// "Regular" processing follows:
unsigned int mods = (unsigned int)(*it)->m_modifiers; unsigned int mods = (unsigned int)(*it)->m_modifiers;
if (TextSplit::hasVisibleWhite((*it)->m_value)) { if (TextSplit::hasVisibleWhite((*it)->m_value)) {
@ -154,6 +162,7 @@ Rcl::SearchData *wasaQueryToRcl(RclConfig *config,
(*it)->m_fieldspec); (*it)->m_fieldspec);
} }
if (nclause == 0) { if (nclause == 0) {
reason = "Out of memory";
LOGERR(("wasaQueryToRcl: out of memory\n")); LOGERR(("wasaQueryToRcl: out of memory\n"));
return 0; return 0;
} }
@ -176,12 +185,14 @@ Rcl::SearchData *wasaQueryToRcl(RclConfig *config,
// but should work. If there is actually a single // but should work. If there is actually a single
// word, it will not be taken as a phrase, and // word, it will not be taken as a phrase, and
// stem-expansion will work normally // stem-expansion will work normally
// Have to do this because searchdata has nothing like and_not
nclause = new Rcl::SearchDataClauseSimple(Rcl::SCLT_EXCL, nclause = new Rcl::SearchDataClauseSimple(Rcl::SCLT_EXCL,
string("\"") + string("\"") +
(*it)->m_value + "\"", (*it)->m_value + "\"",
(*it)->m_fieldspec); (*it)->m_fieldspec);
if (nclause == 0) { if (nclause == 0) {
reason = "Out of memory";
LOGERR(("wasaQueryToRcl: out of memory\n")); LOGERR(("wasaQueryToRcl: out of memory\n"));
return 0; return 0;
} }
@ -194,7 +205,8 @@ Rcl::SearchData *wasaQueryToRcl(RclConfig *config,
LOGDEB2(("wasaQueryToRcl: OR clause [%s]:[%s]\n", LOGDEB2(("wasaQueryToRcl: OR clause [%s]:[%s]\n",
(*it)->m_fieldspec.c_str(), (*it)->m_value.c_str())); (*it)->m_fieldspec.c_str(), (*it)->m_value.c_str()));
// Create a subquery. // Create a subquery.
Rcl::SearchData *sub = wasaQueryToRcl(config, *it); Rcl::SearchData *sub =
wasaQueryToRcl(config, *it, autosuffs, reason);
if (sub == 0) { if (sub == 0) {
continue; continue;
} }
@ -203,6 +215,7 @@ Rcl::SearchData *wasaQueryToRcl(RclConfig *config,
RefCntr<Rcl::SearchData>(sub)); RefCntr<Rcl::SearchData>(sub));
if (nclause == 0) { if (nclause == 0) {
LOGERR(("wasaQueryToRcl: out of memory\n")); LOGERR(("wasaQueryToRcl: out of memory\n"));
reason = "Out of memory";
return 0; return 0;
} }
if ((*it)->m_modifiers & WasaQuery::WQM_NOSTEM) if ((*it)->m_modifiers & WasaQuery::WQM_NOSTEM)
@ -213,3 +226,14 @@ Rcl::SearchData *wasaQueryToRcl(RclConfig *config,
return sdata; return sdata;
} }
Rcl::SearchData *wasaStringToRcl(RclConfig *config,
const string &qs, string &reason,
const string& autosuffs)
{
StringToWasaQuery parser;
WasaQuery *wq = parser.stringToQuery(qs, reason);
if (wq == 0)
return 0;
return wasaQueryToRcl(config, wq, autosuffs, reason);
}

View File

@ -29,8 +29,4 @@ class RclConfig;
extern Rcl::SearchData *wasaStringToRcl(RclConfig *, extern Rcl::SearchData *wasaStringToRcl(RclConfig *,
const string& query, string &reason, const string& query, string &reason,
const string& autosuffs = string()); const string& autosuffs = string());
class WasaQuery;
extern Rcl::SearchData *wasaQueryToRcl(RclConfig *, WasaQuery *wasa,
const string& autosuffs = string());
#endif /* _WASATORCL_H_INCLUDED_ */ #endif /* _WASATORCL_H_INCLUDED_ */

View File

@ -259,9 +259,10 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
it != vpath.end(); it++){ it != vpath.end(); it++){
pvpath.push_back(pathelt_prefix + *it); pvpath.push_back(pathelt_prefix + *it);
} }
xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, xq = Xapian::Query(m_topdirexcl ?
Xapian::Query(Xapian::Query::OP_PHRASE, Xapian::Query::OP_AND_NOT:Xapian::Query::OP_FILTER,
pvpath.begin(), pvpath.end())); xq, Xapian::Query(Xapian::Query::OP_PHRASE,
pvpath.begin(), pvpath.end()));
} }
*((Xapian::Query *)d) = xq; *((Xapian::Query *)d) = xq;
@ -355,6 +356,7 @@ void SearchData::erase() {
m_query.clear(); m_query.clear();
m_filetypes.clear(); m_filetypes.clear();
m_topdir.erase(); m_topdir.erase();
m_topdirexcl = false;
m_description.erase(); m_description.erase();
m_reason.erase(); m_reason.erase();
m_haveDates = false; m_haveDates = false;

View File

@ -72,7 +72,8 @@ class SearchDataClause;
class SearchData { class SearchData {
public: public:
SearchData(SClType tp) SearchData(SClType tp)
: m_tp(tp), m_haveDates(false), m_haveWildCards(false) : m_tp(tp), m_topdirexcl(false), m_haveDates(false),
m_haveWildCards(false)
{ {
if (m_tp != SCLT_OR && m_tp != SCLT_AND) if (m_tp != SCLT_OR && m_tp != SCLT_AND)
m_tp = SCLT_OR; m_tp = SCLT_OR;
@ -102,8 +103,11 @@ public:
bool maybeAddAutoPhrase(); bool maybeAddAutoPhrase();
/** Set/get top subdirectory for filtering results */ /** Set/get top subdirectory for filtering results */
void setTopdir(const string& t) {m_topdir = t;} void setTopdir(const string& t, bool excl = false)
string getTopdir() {return m_topdir;} {
m_topdir = t;
m_topdirexcl = excl;
}
/** Set date span for filtering results */ /** Set date span for filtering results */
void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;} void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;}
@ -138,6 +142,7 @@ private:
vector<SearchDataClause*> m_query; vector<SearchDataClause*> m_query;
vector<string> m_filetypes; // Restrict to filetypes if set. vector<string> m_filetypes; // Restrict to filetypes if set.
string m_topdir; // Restrict to subtree. string m_topdir; // Restrict to subtree.
bool m_topdirexcl; // Invert meaning
bool m_haveDates; bool m_haveDates;
DateInterval m_dates; // Restrict to date interval DateInterval m_dates; // Restrict to date interval
// Printable expanded version of the complete query, retrieved/set // Printable expanded version of the complete query, retrieved/set