Query language: it used to be that an entry lacking whitespace but

splittable, like [term1,term2] was transformed into a phrase search, which
made sense in some cases, but no so many. This commit changes this:
[term1,term2] now means term1 AND term2. [term1/term2] means
term1 OR term2. This is especially useful for field searches where you
would previously be forced to repeat the field name for every term.
[somefield:term1 somefield:term2] can now be expressed as somefield:term1,term2
This commit is contained in:
Jean-Francois Dockes 2014-06-12 17:12:08 +02:00
parent 5f05f5d588
commit 4a783beadc
6 changed files with 30 additions and 37 deletions

View File

@ -312,16 +312,8 @@ bool RecollProtocol::doSearch(const QueryDesc& qd)
if (opt == 'f') {
clp = new Rcl::SearchDataClauseFilename(qs);
} else {
// If there is no white space inside the query, then the user
// certainly means it as a phrase.
bool isreallyaphrase = false;
if (!TextSplit::hasVisibleWhite(qs))
isreallyaphrase = true;
clp = isreallyaphrase ?
new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, qs, 0) :
new Rcl::SearchDataClauseSimple(opt == 'o' ?
Rcl::SCLT_OR : Rcl::SCLT_AND,
qs);
clp = new Rcl::SearchDataClauseSimple(opt == 'o' ? Rcl::SCLT_OR :
Rcl::SCLT_AND, qs);
}
sd = new Rcl::SearchData(Rcl::SCLT_OR, "english");
if (sd && clp)

View File

@ -255,10 +255,6 @@ bool SSearch::startSimpleSearch(const string& u8, int maxexp)
Rcl::SearchDataClause *clp = 0;
if (tp == SST_FNM) {
clp = new Rcl::SearchDataClauseFilename(u8);
} else if (!TextSplit::hasVisibleWhite(u8)) {
// If there is no white space inside the query, then the user
// certainly means it as a phrase.
clp = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, u8, 0);
} else {
// ANY or ALL, several words.
if (tp == SST_ANY) {

View File

@ -317,16 +317,9 @@ endopts:
if (op_flags & OPT_f) {
clp = new Rcl::SearchDataClauseFilename(qs);
} else {
// If there is no white space inside the query, then the user
// certainly means it as a phrase.
bool isreallyaphrase = false;
if (!TextSplit::hasVisibleWhite(qs))
isreallyaphrase = true;
clp = isreallyaphrase ?
new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, qs, 0) :
new Rcl::SearchDataClauseSimple((op_flags & OPT_o)?
Rcl::SCLT_OR : Rcl::SCLT_AND,
qs);
clp = new Rcl::SearchDataClauseSimple((op_flags & OPT_o)?
Rcl::SCLT_OR : Rcl::SCLT_AND,
qs);
}
if (sd)
sd->addClause(clp);

View File

@ -330,6 +330,7 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
unsigned int mods = 0;
if (checkSubMatch(SMI_QUOTED, match, reason)) {
nclause->m_value = match;
mods |= WasaQuery::WQM_QUOTED;
} else if (checkSubMatch(SMI_TERM, match, reason)) {
nclause->m_value = match;
}

View File

@ -53,12 +53,12 @@ public:
supports "contain" except for a size field */
enum Rel {REL_NULL, REL_EQUALS, REL_CONTAINS, REL_LT, REL_LTE,
REL_GT, REL_GTE};
/** Modifiers for term handling: case/diacritics handling,
stemming control */
/** Modifiers for terms: case/diacritics handling,
stemming control... */
enum Modifier {WQM_CASESENS = 1, WQM_DIACSENS = 2, WQM_NOSTEM = 4,
WQM_BOOST = 8, WQM_PROX = 0x10, WQM_SLOPPY = 0x20,
WQM_WORDS = 0x40, WQM_PHRASESLACK = 0x80, WQM_REGEX = 0x100,
WQM_FUZZY = 0x200};
WQM_FUZZY = 0x200, WQM_QUOTED = 0x400};
typedef vector<WasaQuery*> subqlist_t;

View File

@ -200,12 +200,8 @@ static Rcl::SearchData *wasaQueryToRcl(const RclConfig *config,
LOGERR(("wasaQueryToRcl: excl clause inside OR list!\n"));
continue;
}
// I'm not sure I understand the phrase/near detection
// thereafter anymore, maybe it would be better to have an
// explicit flag. Mods can only be set after a double
// quote.
if (TextSplit::hasVisibleWhite((*it)->m_value) || mods) {
if (mods & WasaQuery::WQM_QUOTED) {
Rcl::SClType tp = (mods & WasaQuery::WQM_PROX) ?
Rcl::SCLT_NEAR :
Rcl::SCLT_PHRASE;
@ -213,12 +209,27 @@ static Rcl::SearchData *wasaQueryToRcl(const RclConfig *config,
(*it)->m_slack,
(*it)->m_fieldspec);
} else {
Rcl::SClType tp = (*it)->m_exclude ?
Rcl::SCLT_OR:
// If term has commas or slashes inside, take it
// as a list, turn the slashes/commas to spaces,
// leave unquoted. Otherwise, this would end up as
// a phrase query. This is a handy way to enter
// multiple terms to be searched inside a
// field. We interpret ',' as AND, and '/' as
// OR. No mixes allowed and ',' wins.
Rcl::SClType tp = (*it)->m_exclude ? Rcl::SCLT_OR:
Rcl::SCLT_AND;
nclause =
new Rcl::SearchDataClauseSimple(tp, (*it)->m_value,
(*it)->m_fieldspec);
string ns = neutchars((*it)->m_value, ",");
if (ns.compare((*it)->m_value)) {
// had ','
tp = Rcl::SCLT_AND;
} else {
ns = neutchars((*it)->m_value, "/");
if (ns.compare((*it)->m_value)) {
tp = Rcl::SCLT_OR;
}
}
nclause = new Rcl::SearchDataClauseSimple(tp, ns,
(*it)->m_fieldspec);
}
nclause->setexclude((*it)->m_exclude);
}