diff --git a/src/query/wasaparse.y b/src/query/wasaparse.y deleted file mode 100644 index e3701b0d..00000000 --- a/src/query/wasaparse.y +++ /dev/null @@ -1,479 +0,0 @@ -%{ -#include - -#include -#include - -#include "searchdata.h" -#include "wasaparse.h" -#include "wasaparse.tab.h" - -using namespace std; - -int yylex(yy::parser::semantic_type *); -void yyerror(char const *); -void logwhere(const char *); -class Expression; -static void qualify(Rcl::SearchDataClauseDist *, const string &); - -string stemlang("english"); - -static void addSubQuery(Rcl::SearchData *sd, Rcl::SearchData *sq) -{ - sd->addClause(new Rcl::SearchDataClauseSub(RefCntr(sq))); -} - -static Rcl::SearchData *g_result; -%} - -%skeleton "lalr1.cc" -%defines - -%union { - string *str; - Rcl::SearchDataClauseSimple *cl; - Rcl::SearchData *sd; -} - -%type qualquote -%type fieldexpr -%type term -%type orchain -%type query - -%left AND -%right OR - -%token EQUALS -%token CONTAINS -%token SMALLEREQ -%token SMALLER -%token GREATEREQ -%token GREATER - -%token WORD -%token QUOTED -%token QUALIFIERS - -%% - -query: fieldexpr -{ - cerr << "q: fieldexpr" << endl; - Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, stemlang); - sd->addClause($1); - $$ = sd; - g_result = sd; - } -| query fieldexpr -{ - cerr << "q: query fieldexpr" << endl; - $1->addClause($2); - $$ = $1; - g_result = $$; -} -| query AND fieldexpr -{ - cerr << "q: query AND fieldexpr" << endl; - $1->addClause($3); - $$ = $1; - g_result = $$; -} -| query AND orchain -{ - cerr << "q: query AND orchain"; - addSubQuery($1, $3); - $$ = $1; - g_result = $$; -} -| query orchain -{ - cerr << "q: query orchain" << endl; - addSubQuery($1, $2); - $$ = $1; - g_result = $$; -} -| orchain -{ - cerr << "q: orchain" << endl; - Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, stemlang); - addSubQuery(sd, $1); - $$ = sd; - g_result = $$; -} -| '(' query ')' -{ - cerr << "( query )" << endl; - $$ = $2; - g_result = $$; -} -; - -orchain: -fieldexpr OR fieldexpr -{ - cerr << "orchain: fieldexpr[" << $1->gettext() << "] OR fieldexpr[" << - $3->gettext() << "]" << endl; - Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_OR, stemlang); - sd->addClause($1); - sd->addClause($3); - $$ = sd; -} -| orchain OR fieldexpr -{ - cerr << "orchain: orchain OR fieldexpr[" << $3->gettext() << "]" << endl; - $1->addClause($3); - $$ = $1; -} -; - -fieldexpr: term -{ - //cerr << "simple fieldexpr: " << $1->gettext() << endl; - $$ = $1; -} -| WORD EQUALS term -{ - //cerr << *$1 << " = " << $3->gettext() << endl; - $3->setfield(*$1); - $3->setrel(Rcl::SearchDataClause::REL_EQUALS); - $$ = $3; -} -| WORD CONTAINS term -{ - //cerr << *$1 << " : " << $3->gettext() << endl; - $3->setfield(*$1); - $3->setrel(Rcl::SearchDataClause::REL_CONTAINS); - $$ = $3; -} -| WORD SMALLER term -{ - //cerr << *$1 << " < " << $3->gettext() << endl; - $3->setfield(*$1); - $3->setrel(Rcl::SearchDataClause::REL_LT); - $$ = $3; -} -| WORD SMALLEREQ term -{ - //cerr << *$1 << " <= " << $3->gettext() << endl; - $3->setfield(*$1); - $3->setrel(Rcl::SearchDataClause::REL_LTE); - $$ = $3; -} -| WORD GREATER term -{ - //cerr << *$1 << " > " << $3->gettext() << endl; - $3->setfield(*$1); - $3->setrel(Rcl::SearchDataClause::REL_GT); - $$ = $3; -} -| WORD GREATEREQ term -{ - //cerr << *$1 << " >= " << $3->gettext() << endl; - $3->setfield(*$1); - $3->setrel(Rcl::SearchDataClause::REL_GTE); - $$ = $3; -} -| '-' fieldexpr -{ - //cerr << "- fieldexpr[" << $2->gettext() << "]" << endl; - $2->setexclude(true); - $$ = $2; -} -; - -term: WORD -{ - //cerr << "term[" << *$1 << "]" << endl; - $$ = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, *$1); -} -| qualquote -{ - $$ = $1; -} -; - -qualquote: QUOTED -{ - cerr << "QUOTED[" << *$1 << "]" << endl; - $$ = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, *$1, 0); -} -| QUOTED QUALIFIERS -{ - cerr << "QUOTED[" << *$1 << "] QUALIFIERS[" << *$2 << "]" << endl; - Rcl::SearchDataClauseDist *cl = - new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, *$1, 0); - qualify(cl, *$2); - $$ = cl; -} -; - - -%% - -#include -#include - -void yyerror (char const *s) -{ - cerr << s << endl; -} - -void logwhere(const char *s) -{ - cerr << s << endl; -} - -// Look for int at index, skip and return new index found? value. -static unsigned int qualGetInt(const string& q, unsigned int cur, int *pval) -{ - unsigned int ncur = cur; - if (cur < q.size() - 1) { - char *endptr; - int val = strtol(&q[cur + 1], &endptr, 10); - if (endptr != &q[cur + 1]) { - ncur += endptr - &q[cur + 1]; - *pval = val; - } - } - return ncur; -} - -static void qualify(Rcl::SearchDataClauseDist *cl, const string& quals) -{ - cerr << "qualify(" << cl << ", " << quals << ")" << endl; - for (unsigned int i = 0; i < quals.length(); i++) { - //fprintf(stderr, "qual char %c\n", quals[i]); - switch (quals[i]) { - case 'b': - cl->setWeight(10.0); - break; - case 'c': break; - case 'C': - cl->addModifier(Rcl::SearchDataClause::SDCM_CASESENS); - break; - case 'd': break; - case 'D': - cl->addModifier(Rcl::SearchDataClause::SDCM_DIACSENS); - break; - case 'e': - cl->addModifier(Rcl::SearchDataClause::SDCM_CASESENS); - cl->addModifier(Rcl::SearchDataClause::SDCM_DIACSENS); - cl->addModifier(Rcl::SearchDataClause::SDCM_NOSTEMMING); - break; - case 'l': - cl->addModifier(Rcl::SearchDataClause::SDCM_NOSTEMMING); - break; - case 'L': break; - case 'o': - { - int slack = 10; - i = qualGetInt(quals, i, &slack); - cl->setslack(slack); - //cerr << "set slack " << cl->getslack() << " done" << endl; - } - break; - case 'p': - cl->setTp(Rcl::SCLT_NEAR); - if (cl->getslack() == 0) { - cl->setslack(10); - //cerr << "set slack " << cl->getslack() << " done" << endl; - } - break; - case '.':case '0':case '1':case '2':case '3':case '4': - case '5':case '6':case '7':case '8':case '9': - { - int n = 0; - float factor = 1.0; - if (sscanf(&(quals[i]), "%f %n", &factor, &n)) { - if (factor != 1.0) { - cl->setWeight(factor); - } - } - if (n > 0) - i += n - 1; - } - default: - break; - } - } -} - - -static stack g_returns; -static string g_input; -static unsigned int g_index; - -int GETCHAR() -{ - if (!g_returns.empty()) { - int c = g_returns.top(); - g_returns.pop(); - return c; - } - if (g_index < g_input.size()) - return g_input[g_index++]; - return 0; -} -static void UNGETCHAR(int c) -{ - g_returns.push(c); -} - -// Simpler to let the quoted string reader store qualifiers in there, -// because their nature is determined by the absence of white space -// after the closing dquote. e.g "some term"abc. We could avoid this -// by making white space a token -static string qualifiers; - -// specialstartchars are special only at the beginning of a token -// (e.g. doctor-who is a term, not 2 terms separated by '-') -static string specialstartchars("-"); -// specialinchars are special everywhere except inside a quoted string -static string specialinchars(":=<>()"); -static string whites(" \t\n\r"); - -// Called with the first dquote already read -static int parseString(yy::parser::semantic_type *yylval) -{ - string* value = new string(); - qualifiers.clear(); - int c; - while ((c = GETCHAR())) { - switch (c) { - case '\\': - /* Escape: get next char */ - c = GETCHAR(); - if (c == 0) { - value->push_back(c); - goto out; - } - value->push_back(c); - break; - case '"': - /* End of string. Look for qualifiers */ - while ((c = GETCHAR()) && whites.find_first_of(c) == string::npos) - qualifiers.push_back(c); - goto out; - default: - value->push_back(c); - } - } -out: - //cerr << "GOT QUOTED ["<str = value; - return yy::parser::token::QUOTED; -} - - -int yylex(yy::parser::semantic_type *yylval) -{ - //cerr << "yylex: input [" << g_input.substr(g_index) << "]" << endl; - - if (!qualifiers.empty()) { - yylval->str = new string(); - yylval->str->swap(qualifiers); - return yy::parser::token::QUALIFIERS; - } - - int c; - - /* Skip white space. */ - while ((c = GETCHAR ()) && whites.find_first_of(c) != string::npos) - continue; - - if (c == 0) - return 0; - - if (specialstartchars.find_first_of(c) != string::npos) { - //cerr << "yylex: return " << c << endl; - return c; - } - - // field-term relations - switch (c) { - case '=': return yy::parser::token::EQUALS; - case ':': return yy::parser::token::CONTAINS; - case '<': { - int c1 = GETCHAR(); - if (c1 == '=') { - return yy::parser::token::SMALLEREQ; - } else { - UNGETCHAR(c); - return yy::parser::token::SMALLER; - } - } - case '>': { - int c1 = GETCHAR(); - if (c1 == '=') { - return yy::parser::token::GREATEREQ; - } else { - UNGETCHAR(c); - return yy::parser::token::GREATER; - } - } - case '(': case ')': - return c; - } - - if (c == '"') - return parseString(yylval); - - UNGETCHAR(c); - - // Other chars start a term or field name or reserved word - string* word = new string(); - while ((c = GETCHAR())) { - if (whites.find_first_of(c) != string::npos) { - //cerr << "Word broken by whitespace" << endl; - break; - } else if (specialinchars.find_first_of(c) != string::npos) { - //cerr << "Word broken by special char" << endl; - UNGETCHAR(c); - break; - } else if (c == 0) { - //cerr << "Word broken by EOF" << endl; - break; - } else { - word->push_back(c); - } - } - - if (!word->compare("AND") || !word->compare("&&")) { - delete word; - return yy::parser::token::AND; - } else if (!word->compare("OR") || !word->compare("||")) { - delete word; - return yy::parser::token::OR; - } - -// cerr << "Got word [" << word << "]" << endl; - yylval->str = word; - return yy::parser::token::WORD; -} - -void yy::parser::error(location_type const&, string const& m) -{ - cerr << m << endl; -} - -Rcl::SearchData *wasaparse(const string& in) -{ - cerr << "wasaparse(" << in << ")" << endl; - - g_index = 0; - g_returns = stack(); - g_input = in; - delete g_result; - g_result = 0; - - yy::parser parser; - if (parser.parse() != 0) { - // Error - cerr << "Parse failed" << endl; - delete g_result; - g_result = 0; - } - cerr << "wasaparse: returning " << g_result << endl; - return g_result; -}