Query language: it used to be that an entry lacking whitespace but
splittable, like [term1,term2] was transformed into a phrase search, which made sense in some cases, but no so many. This commit changes this: [term1,term2] now means term1 AND term2. [term1/term2] means term1 OR term2. This is especially useful for field searches where you would previously be forced to repeat the field name for every term. [somefield:term1 somefield:term2] can now be expressed as somefield:term1,term2
This commit is contained in:
parent
5f05f5d588
commit
4a783beadc
@ -312,16 +312,8 @@ bool RecollProtocol::doSearch(const QueryDesc& qd)
|
||||
if (opt == 'f') {
|
||||
clp = new Rcl::SearchDataClauseFilename(qs);
|
||||
} else {
|
||||
// If there is no white space inside the query, then the user
|
||||
// certainly means it as a phrase.
|
||||
bool isreallyaphrase = false;
|
||||
if (!TextSplit::hasVisibleWhite(qs))
|
||||
isreallyaphrase = true;
|
||||
clp = isreallyaphrase ?
|
||||
new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, qs, 0) :
|
||||
new Rcl::SearchDataClauseSimple(opt == 'o' ?
|
||||
Rcl::SCLT_OR : Rcl::SCLT_AND,
|
||||
qs);
|
||||
clp = new Rcl::SearchDataClauseSimple(opt == 'o' ? Rcl::SCLT_OR :
|
||||
Rcl::SCLT_AND, qs);
|
||||
}
|
||||
sd = new Rcl::SearchData(Rcl::SCLT_OR, "english");
|
||||
if (sd && clp)
|
||||
|
||||
@ -255,10 +255,6 @@ bool SSearch::startSimpleSearch(const string& u8, int maxexp)
|
||||
Rcl::SearchDataClause *clp = 0;
|
||||
if (tp == SST_FNM) {
|
||||
clp = new Rcl::SearchDataClauseFilename(u8);
|
||||
} else if (!TextSplit::hasVisibleWhite(u8)) {
|
||||
// If there is no white space inside the query, then the user
|
||||
// certainly means it as a phrase.
|
||||
clp = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, u8, 0);
|
||||
} else {
|
||||
// ANY or ALL, several words.
|
||||
if (tp == SST_ANY) {
|
||||
|
||||
@ -317,16 +317,9 @@ endopts:
|
||||
if (op_flags & OPT_f) {
|
||||
clp = new Rcl::SearchDataClauseFilename(qs);
|
||||
} else {
|
||||
// If there is no white space inside the query, then the user
|
||||
// certainly means it as a phrase.
|
||||
bool isreallyaphrase = false;
|
||||
if (!TextSplit::hasVisibleWhite(qs))
|
||||
isreallyaphrase = true;
|
||||
clp = isreallyaphrase ?
|
||||
new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, qs, 0) :
|
||||
new Rcl::SearchDataClauseSimple((op_flags & OPT_o)?
|
||||
Rcl::SCLT_OR : Rcl::SCLT_AND,
|
||||
qs);
|
||||
clp = new Rcl::SearchDataClauseSimple((op_flags & OPT_o)?
|
||||
Rcl::SCLT_OR : Rcl::SCLT_AND,
|
||||
qs);
|
||||
}
|
||||
if (sd)
|
||||
sd->addClause(clp);
|
||||
|
||||
@ -330,6 +330,7 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
|
||||
unsigned int mods = 0;
|
||||
if (checkSubMatch(SMI_QUOTED, match, reason)) {
|
||||
nclause->m_value = match;
|
||||
mods |= WasaQuery::WQM_QUOTED;
|
||||
} else if (checkSubMatch(SMI_TERM, match, reason)) {
|
||||
nclause->m_value = match;
|
||||
}
|
||||
|
||||
@ -53,12 +53,12 @@ public:
|
||||
supports "contain" except for a size field */
|
||||
enum Rel {REL_NULL, REL_EQUALS, REL_CONTAINS, REL_LT, REL_LTE,
|
||||
REL_GT, REL_GTE};
|
||||
/** Modifiers for term handling: case/diacritics handling,
|
||||
stemming control */
|
||||
/** Modifiers for terms: case/diacritics handling,
|
||||
stemming control... */
|
||||
enum Modifier {WQM_CASESENS = 1, WQM_DIACSENS = 2, WQM_NOSTEM = 4,
|
||||
WQM_BOOST = 8, WQM_PROX = 0x10, WQM_SLOPPY = 0x20,
|
||||
WQM_WORDS = 0x40, WQM_PHRASESLACK = 0x80, WQM_REGEX = 0x100,
|
||||
WQM_FUZZY = 0x200};
|
||||
WQM_FUZZY = 0x200, WQM_QUOTED = 0x400};
|
||||
|
||||
typedef vector<WasaQuery*> subqlist_t;
|
||||
|
||||
|
||||
@ -200,12 +200,8 @@ static Rcl::SearchData *wasaQueryToRcl(const RclConfig *config,
|
||||
LOGERR(("wasaQueryToRcl: excl clause inside OR list!\n"));
|
||||
continue;
|
||||
}
|
||||
// I'm not sure I understand the phrase/near detection
|
||||
// thereafter anymore, maybe it would be better to have an
|
||||
// explicit flag. Mods can only be set after a double
|
||||
// quote.
|
||||
if (TextSplit::hasVisibleWhite((*it)->m_value) || mods) {
|
||||
|
||||
if (mods & WasaQuery::WQM_QUOTED) {
|
||||
Rcl::SClType tp = (mods & WasaQuery::WQM_PROX) ?
|
||||
Rcl::SCLT_NEAR :
|
||||
Rcl::SCLT_PHRASE;
|
||||
@ -213,12 +209,27 @@ static Rcl::SearchData *wasaQueryToRcl(const RclConfig *config,
|
||||
(*it)->m_slack,
|
||||
(*it)->m_fieldspec);
|
||||
} else {
|
||||
Rcl::SClType tp = (*it)->m_exclude ?
|
||||
Rcl::SCLT_OR:
|
||||
// If term has commas or slashes inside, take it
|
||||
// as a list, turn the slashes/commas to spaces,
|
||||
// leave unquoted. Otherwise, this would end up as
|
||||
// a phrase query. This is a handy way to enter
|
||||
// multiple terms to be searched inside a
|
||||
// field. We interpret ',' as AND, and '/' as
|
||||
// OR. No mixes allowed and ',' wins.
|
||||
Rcl::SClType tp = (*it)->m_exclude ? Rcl::SCLT_OR:
|
||||
Rcl::SCLT_AND;
|
||||
nclause =
|
||||
new Rcl::SearchDataClauseSimple(tp, (*it)->m_value,
|
||||
(*it)->m_fieldspec);
|
||||
string ns = neutchars((*it)->m_value, ",");
|
||||
if (ns.compare((*it)->m_value)) {
|
||||
// had ','
|
||||
tp = Rcl::SCLT_AND;
|
||||
} else {
|
||||
ns = neutchars((*it)->m_value, "/");
|
||||
if (ns.compare((*it)->m_value)) {
|
||||
tp = Rcl::SCLT_OR;
|
||||
}
|
||||
}
|
||||
nclause = new Rcl::SearchDataClauseSimple(tp, ns,
|
||||
(*it)->m_fieldspec);
|
||||
}
|
||||
nclause->setexclude((*it)->m_exclude);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user