diff --git a/src/query/wasastringtoquery.cpp b/src/query/wasastringtoquery.cpp index f01c7aa4..f242c1e1 100644 --- a/src/query/wasastringtoquery.cpp +++ b/src/query/wasastringtoquery.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: wasastringtoquery.cpp,v 1.7 2008-07-01 11:51:51 dockes Exp $ (C) 2006 J.F.Dockes"; +static char rcsid[] = "@(#$Id: wasastringtoquery.cpp,v 1.8 2008-08-26 13:47:21 dockes Exp $ (C) 2006 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -41,10 +41,24 @@ WasaQuery::~WasaQuery() m_subs.clear(); } +static const char* reltosrel(WasaQuery::Rel rel) +{ + switch (rel) { + case WasaQuery::REL_EQUALS: return "="; + case WasaQuery::REL_CONTAINS: return ":"; + case WasaQuery::REL_LT: return "<"; + case WasaQuery::REL_LTE: return "<="; + case WasaQuery::REL_GT: return ">"; + case WasaQuery::REL_GTE: return ">="; + default: return "?"; + } +} + void WasaQuery::describe(string &desc) const { desc += "("; - string fieldspec = m_fieldspec.empty() ? string() : m_fieldspec + ": "; + string fieldspec = m_fieldspec.empty() ? string() : m_fieldspec + + reltosrel(m_rel); switch (m_op) { case OP_NULL: desc += "NULL"; @@ -71,6 +85,7 @@ void WasaQuery::describe(string &desc) const desc.erase(desc.length() - 1); desc += ")"; if (m_modifiers != 0) { + if (m_modifiers & WQM_BOOST) desc += "BOOST|"; if (m_modifiers & WQM_CASESENS) desc += "CASESENS|"; if (m_modifiers & WQM_DIACSENS) desc += "DIACSENS|"; if (m_modifiers & WQM_NOSTEM) desc += "NOSTEM|"; @@ -96,14 +111,6 @@ void WasaQuery::describe(string &desc) const * key:Value * or * Value - ([+-]?) # Required or Prohibited (optional) - (\w+:)? # Key (optional) - ( # Query Text - (\"([^\"]*)\"?)# quoted - | # or - ([^\s\"]+) # unquoted - ) - "; */ /* The master regular expression used to parse a query string @@ -113,41 +120,47 @@ void WasaQuery::describe(string &desc) const static const char * parserExpr = "([oO][rR]|\\|\\|)[[:space:]]*" //1 OR,or,|| "|" - "(" //2 - "([+-])?" //3 Force or exclude indicator - "(" //4 - "([[:alpha:]][[:alnum:]]*)" //5 Field spec: "fieldname:" - ":)?" - "(" //6 - "(\"" //7 - "([^\"]+)" //8 "A quoted term" + "([Aa][Nn][Dd]|&&)[[:space:]]*" // 2 AND,and,&& (ignored, default) + "|" + "(" //3 + "([+-])?" //4 Force or exclude indicator + "(" //5 + "([[:alpha:]][[:alnum:]:]*)" //6 Field spec: ie: "dc:title:letitre" + "[[:space:]]*" + "(:|=|<|>|<=|>=)" //7 Relation + "[[:space:]]*)?" + "(" //8 + "(\"" //9 + "([^\"]+)" //10 "A quoted term" "\")" - "([a-zA-Z0-9]*)" //9 modifiers + "([a-zA-Z0-9]*)" //11 modifiers "|" - "([^[:space:]\"]+)" //10 ANormalTerm + "([^[:space:]\"]+)" //12 ANormalTerm ")" ")[[:space:]]*" ; // For debugging the parser. But see also NMATCH static const char *matchNames[] = { - /*0*/ "", - /*1*/ "OR", - /*2*/ "", - /*3*/ "+-", - /*4*/ "", - /*5*/ "FIELD", - /*6*/ "", - /*7*/ "", - /*8*/ "QUOTEDTERM", - /*9*/ "MODIIFIERS", - /*10*/ "TERM", + /* 0*/ "", + /* 1*/ "OR", + /* 2*/ "AND", + /* 3*/ "", + /* 4*/ "+-", + /* 5*/ "", + /* 6*/ "FIELD", + /* 7*/ "RELATION", + /* 8*/ "", + /* 9*/ "", + /*10*/ "QUOTEDTERM", + /*11*/ "MODIIFIERS", + /*12*/ "TERM", }; #define NMATCH (sizeof(matchNames) / sizeof(char *)) // Symbolic names for the interesting submatch indices -enum SbMatchIdx {SMI_OR=1, SMI_PM=3, SMI_FIELD=5, SMI_QUOTED=8, - SMI_MODIF=9, SMI_TERM=10}; +enum SbMatchIdx {SMI_OR=1, SMI_AND=2, SMI_PM=4, SMI_FIELD=6, SMI_REL=7, + SMI_QUOTED=10, SMI_MODIF=11, SMI_TERM=12}; static const int maxmatchlen = 1024; static const int errbuflen = 300; @@ -284,6 +297,10 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason) } prev_or = true; + } else if (checkSubMatch(SMI_AND, match, reason)) { + // Do nothing, AND is the default. We might want to check for + // errors like consecutive ANDs, or OR AND + } else { WasaQuery *nclause = new WasaQuery; @@ -312,19 +329,20 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason) unsigned int mods = 0; for (unsigned int i = 0; i < strlen(match); i++) { switch (match[i]) { - case 'C': mods |= WasaQuery::WQM_CASESENS; break; - case 'D': mods |= WasaQuery::WQM_DIACSENS; break; - case 'l': mods |= WasaQuery::WQM_NOSTEM; break; - case 'e': mods |= WasaQuery::WQM_CASESENS | - WasaQuery::WQM_DIACSENS | - WasaQuery::WQM_NOSTEM; break; - case 'f': mods |= WasaQuery::WQM_FUZZY; break; case 'b': mods |= WasaQuery::WQM_BOOST; break; + case 'c': break; + case 'C': mods |= WasaQuery::WQM_CASESENS; break; + case 'd': break; + case 'D': mods |= WasaQuery::WQM_DIACSENS; break; + case 'e': mods |= WasaQuery::WQM_CASESENS | WasaQuery::WQM_DIACSENS | WasaQuery::WQM_NOSTEM; break; + case 'f': mods |= WasaQuery::WQM_FUZZY; break; + case 'l': mods |= WasaQuery::WQM_NOSTEM; break; + case 'L': break; + case 'o': mods |= WasaQuery::WQM_PHRASESLACK; break; case 'p': mods |= WasaQuery::WQM_PROX; break; + case 'r': mods |= WasaQuery::WQM_REGEX; break; case 's': mods |= WasaQuery::WQM_SLOPPY; break; case 'w': mods |= WasaQuery::WQM_WORDS; break; - case 'o': mods |= WasaQuery::WQM_PHRASESLACK; break; - case 'r': mods |= WasaQuery::WQM_REGEX; break; } } nclause->m_modifiers = WasaQuery::Modifier(mods); @@ -336,6 +354,29 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason) // etc. here but this went away from the spec. See 1.4 // if it comes back nclause->m_fieldspec = match; + if (checkSubMatch(SMI_REL, match, reason)) { + switch (match[0]) { + case '=':nclause->m_rel = WasaQuery::REL_EQUALS;break; + case ':':nclause->m_rel = WasaQuery::REL_CONTAINS;break; + case '<': + if (match[1] == '=') + nclause->m_rel = WasaQuery::REL_LTE; + else + nclause->m_rel = WasaQuery::REL_LT; + break; + case '>': + if (match[1] == '=') + nclause->m_rel = WasaQuery::REL_GTE; + else + nclause->m_rel = WasaQuery::REL_GT; + break; + default: + nclause->m_rel = WasaQuery::REL_CONTAINS; + } + } else { + // ?? If field matched we should have a relation + nclause->m_rel = WasaQuery::REL_CONTAINS; + } } // +- indicator ? @@ -345,7 +386,6 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason) nclause->m_op = WasaQuery::OP_LEAF; } - if (prev_or) { // The precedent token was an OR, add new clause to or chain //DPRINT((stderr, "Adding to OR chain\n")); diff --git a/src/query/wasastringtoquery.h b/src/query/wasastringtoquery.h index 69bc7b66..6ce25625 100644 --- a/src/query/wasastringtoquery.h +++ b/src/query/wasastringtoquery.h @@ -1,6 +1,6 @@ #ifndef _WASASTRINGTOQUERY_H_INCLUDED_ #define _WASASTRINGTOQUERY_H_INCLUDED_ -/* @(#$Id: wasastringtoquery.h,v 1.6 2008-01-17 11:14:13 dockes Exp $ (C) 2006 J.F.Dockes */ +/* @(#$Id: wasastringtoquery.h,v 1.7 2008-08-26 13:47:21 dockes Exp $ (C) 2006 J.F.Dockes */ /* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -23,10 +23,12 @@ using std::string; using std::vector; +/* Note: Xesam used to be named wasabi. We changed the references to wasabi in + the comments, but not the code */ /** - * A simple class to represent a parsed wasabiSimple query element. - * Can hold a string value or an array of subqueries. + * A simple class to represent a parsed Xesam user language element. + * Can hold one leaf element or an array of subqueries to be joined by AND/OR * * The complete query is represented by a top WasaQuery holding a * chain of ANDed subclauses. Some of the subclauses may be themselves @@ -35,14 +37,30 @@ using std::vector; * * For LEAF elements, the value can hold one or several words. In the * latter case, it should be interpreted as a phrase (comes from a - * user-entered "quoted string"). + * user-entered "quoted string"), except if the modifier flags say otherwise. * * Some fields only make sense either for compound or LEAF queries. This * is commented for each. We should subclass really. + * + * Note that wasaStringToQuery supposedly parses the whole Xesam + * User Search Language v 0.95, but that some elements are dropped or + * ignored during the translation to a native Recoll query in wasaToRcl */ class WasaQuery { public: + /** Type of this element: leaf or AND/OR chain */ enum Op {OP_NULL, OP_LEAF, OP_EXCL, OP_OR, OP_AND}; + /** Relation to be searched between field and value. Recoll actually only + supports "contain" */ + enum Rel {REL_NULL, REL_EQUALS, REL_CONTAINS, REL_LT, REL_LTE, + REL_GT, REL_GTE}; + /** Modifiers for term handling: case/diacritics handling, + stemming control */ + enum Modifier {WQM_CASESENS = 1, WQM_DIACSENS = 2, WQM_NOSTEM = 4, + WQM_BOOST = 8, WQM_PROX = 0x10, WQM_SLOPPY = 0x20, + WQM_WORDS = 0x40, WQM_PHRASESLACK = 0x80, WQM_REGEX = 0x100, + WQM_FUZZY = 0x200}; + typedef vector subqlist_t; WasaQuery() @@ -59,6 +77,8 @@ public: /** Field specification if any (ie: title, author ...) Only OPT_LEAF */ string m_fieldspec; + /** Relation between field and value: =, :, <,>,<=, >= */ + WasaQuery::Rel m_rel; /* String value. Valid for op == OP_LEAF or EXCL */ string m_value; @@ -66,13 +86,7 @@ public: /** Subqueries. Valid for conjunctions */ vector m_subs; - /** Modifiers for term handling: case/diacritics handling, - stemming control */ - enum Modifier {WQM_CASESENS = 1, WQM_DIACSENS = 2, WQM_NOSTEM = 4, - WQM_BOOST = 8, WQM_PROX = 0x10, WQM_SLOPPY = 0x20, - WQM_WORDS = 0x40, WQM_PHRASESLACK = 0x80, WQM_REGEX = 0x100, - WQM_FUZZY = 0x200}; - unsigned int m_modifiers; + unsigned int m_modifiers; }; /** diff --git a/src/query/wasatorcl.cpp b/src/query/wasatorcl.cpp index 3cc963fe..01fad69c 100644 --- a/src/query/wasatorcl.cpp +++ b/src/query/wasatorcl.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: wasatorcl.cpp,v 1.13 2008-01-16 11:14:38 dockes Exp $ (C) 2006 J.F.Dockes"; +static char rcsid[] = "@(#$Id: wasatorcl.cpp,v 1.14 2008-08-26 13:47:21 dockes Exp $ (C) 2006 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -68,9 +68,11 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa) LOGINFO(("wasaQueryToRcl: found bad NULL or AND q type in list\n")); continue; case WasaQuery::OP_LEAF: - + unsigned int mods = (unsigned int)(*it)->m_modifiers; // Special cases (mime, category, dir filter ...). Not pretty. - if (!stringicmp("mime", (*it)->m_fieldspec)) { + if (!stringicmp("mime", (*it)->m_fieldspec) || + !stringicmp("format", (*it)->m_fieldspec) + ) { sdata->addFiletype((*it)->m_value); break; } @@ -95,8 +97,14 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa) } if ((*it)->m_value.find_first_of(" \t\n\r") != string::npos) { - nclause = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, - (*it)->m_value, 0, + int slack = (mods & WasaQuery::WQM_PHRASESLACK) ? 10 : 0; + Rcl::SClType tp = Rcl::SCLT_PHRASE; + if (mods & WasaQuery::WQM_PROX) { + tp = Rcl::SCLT_NEAR; + slack = 10; + } + nclause = new Rcl::SearchDataClauseDist(tp, (*it)->m_value, + slack, (*it)->m_fieldspec); } else { nclause = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND,