none
This commit is contained in:
parent
9176cdd147
commit
c9deaa2d31
453
src/query/wasa.y
Normal file
453
src/query/wasa.y
Normal file
@ -0,0 +1,453 @@
|
||||
%{
|
||||
#include <stdio.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "searchdata.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int yylex(void);
|
||||
void yyerror(char const *);
|
||||
void logwhere(const char *);
|
||||
class Expression;
|
||||
static void qualify(Rcl::SearchDataClauseDist *, const string &);
|
||||
|
||||
string stemlang("english");
|
||||
|
||||
%}
|
||||
|
||||
%union {
|
||||
string *str;
|
||||
Rcl::SearchDataClauseSimple *cl;
|
||||
Rcl::SearchData *sd;
|
||||
}
|
||||
|
||||
%type <cl> qualquote
|
||||
%type <cl> fieldexpr
|
||||
%type <cl> term
|
||||
%type <sd> orchain
|
||||
%type <sd> query
|
||||
|
||||
%left AND
|
||||
%right OR
|
||||
|
||||
%token EQUALS
|
||||
%token CONTAINS
|
||||
%token SMALLEREQ
|
||||
%token SMALLER
|
||||
%token GREATEREQ
|
||||
%token GREATER
|
||||
|
||||
%token <str> WORD
|
||||
%token <str> QUOTED
|
||||
%token <str> QUALIFIERS
|
||||
|
||||
%%
|
||||
|
||||
query: fieldexpr
|
||||
{
|
||||
Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, stemlang);
|
||||
sd->addClause($1);
|
||||
$$ = sd;
|
||||
cerr << "q: fieldexpr" << endl;
|
||||
}
|
||||
| query fieldexpr
|
||||
{
|
||||
cerr << "q: query fieldexpr" << endl;
|
||||
$1->addClause($2);
|
||||
$$ = $1;
|
||||
}
|
||||
| query AND fieldexpr
|
||||
{
|
||||
cerr << "q: query AND fieldexpr" << endl;
|
||||
$1->addClause($3);
|
||||
$$ = $1;
|
||||
}
|
||||
| query AND orchain
|
||||
{
|
||||
cerr << "q: query AND orchain";
|
||||
Rcl::SearchDataClauseSub *sub =
|
||||
new Rcl::SearchDataClauseSub(RefCntr<Rcl::SearchData>($1));
|
||||
$1->addClause(sub);
|
||||
$$ = $1;
|
||||
}
|
||||
| query orchain
|
||||
{
|
||||
cerr << "q: query orchain" << endl;
|
||||
Rcl::SearchDataClauseSub *sub =
|
||||
new Rcl::SearchDataClauseSub(RefCntr<Rcl::SearchData>($1));
|
||||
$1->addClause(sub);
|
||||
$$ = $1;
|
||||
}
|
||||
| orchain
|
||||
{
|
||||
cerr << "q: orchain" << endl;
|
||||
$$ = $1;
|
||||
}
|
||||
| '(' query ')'
|
||||
{
|
||||
cerr << "( query )" << endl;
|
||||
$$ = $2;
|
||||
}
|
||||
;
|
||||
|
||||
orchain:
|
||||
fieldexpr OR fieldexpr
|
||||
{
|
||||
cerr << "orchain: fieldexpr[" << $1->gettext() << "] OR fieldexpr[" <<
|
||||
$3->gettext() << "]" << endl;
|
||||
Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_OR, stemlang);
|
||||
sd->addClause($1);
|
||||
sd->addClause($3);
|
||||
$$ = sd;
|
||||
}
|
||||
| orchain OR fieldexpr
|
||||
{
|
||||
cerr << "orchain: orchain OR fieldexpr[" << $3->gettext() << "]" << endl;
|
||||
$1->addClause($3);
|
||||
$$ = $1;
|
||||
}
|
||||
;
|
||||
|
||||
fieldexpr: term
|
||||
{
|
||||
//cerr << "simple fieldexpr: " << $1->gettext() << endl;
|
||||
$$ = $1;
|
||||
}
|
||||
| WORD EQUALS term
|
||||
{
|
||||
//cerr << *$1 << " = " << $3->gettext() << endl;
|
||||
$3->setfield(*$1);
|
||||
$3->setrel(Rcl::SearchDataClause::REL_EQUALS);
|
||||
$$ = $3;
|
||||
}
|
||||
| WORD CONTAINS term
|
||||
{
|
||||
//cerr << *$1 << " : " << $3->gettext() << endl;
|
||||
$3->setfield(*$1);
|
||||
$3->setrel(Rcl::SearchDataClause::REL_CONTAINS);
|
||||
$$ = $3;
|
||||
}
|
||||
| WORD SMALLER term
|
||||
{
|
||||
//cerr << *$1 << " < " << $3->gettext() << endl;
|
||||
$3->setfield(*$1);
|
||||
$3->setrel(Rcl::SearchDataClause::REL_LT);
|
||||
$$ = $3;
|
||||
}
|
||||
| WORD SMALLEREQ term
|
||||
{
|
||||
//cerr << *$1 << " <= " << $3->gettext() << endl;
|
||||
$3->setfield(*$1);
|
||||
$3->setrel(Rcl::SearchDataClause::REL_LTE);
|
||||
$$ = $3;
|
||||
}
|
||||
| WORD GREATER term
|
||||
{
|
||||
//cerr << *$1 << " > " << $3->gettext() << endl;
|
||||
$3->setfield(*$1);
|
||||
$3->setrel(Rcl::SearchDataClause::REL_GT);
|
||||
$$ = $3;
|
||||
}
|
||||
| WORD GREATEREQ term
|
||||
{
|
||||
//cerr << *$1 << " >= " << $3->gettext() << endl;
|
||||
$3->setfield(*$1);
|
||||
$3->setrel(Rcl::SearchDataClause::REL_GTE);
|
||||
$$ = $3;
|
||||
}
|
||||
| '-' fieldexpr
|
||||
{
|
||||
//cerr << "- fieldexpr[" << $2->gettext() << "]" << endl;
|
||||
$2->setexclude(true);
|
||||
$$ = $2;
|
||||
}
|
||||
;
|
||||
|
||||
term: WORD
|
||||
{
|
||||
//cerr << "term[" << *$1 << "]" << endl;
|
||||
$$ = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, *$1);
|
||||
}
|
||||
| qualquote
|
||||
{
|
||||
$$ = $1;
|
||||
}
|
||||
;
|
||||
|
||||
qualquote: QUOTED
|
||||
{
|
||||
cerr << "QUOTED[" << *$1 << "]" << endl;
|
||||
$$ = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, *$1, 0);
|
||||
}
|
||||
| QUOTED QUALIFIERS
|
||||
{
|
||||
cerr << "QUOTED[" << *$1 << "] QUALIFIERS[" << *$2 << "]" << endl;
|
||||
Rcl::SearchDataClauseDist *cl =
|
||||
new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, *$1, 0);
|
||||
qualify(cl, *$2);
|
||||
$$ = cl;
|
||||
}
|
||||
;
|
||||
|
||||
|
||||
%%
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stack>
|
||||
|
||||
void yyerror (char const *s)
|
||||
{
|
||||
cerr << s << endl;
|
||||
}
|
||||
|
||||
void logwhere(const char *s)
|
||||
{
|
||||
cerr << s << endl;
|
||||
}
|
||||
|
||||
// Look for int at index, skip and return new index found? value.
|
||||
static unsigned int qualGetInt(const string& q, unsigned int cur, int *pval)
|
||||
{
|
||||
unsigned int ncur = cur;
|
||||
if (cur < q.size() - 1) {
|
||||
char *endptr;
|
||||
int val = strtol(&q[cur + 1], &endptr, 10);
|
||||
if (endptr != &q[cur + 1]) {
|
||||
ncur += endptr - &q[cur + 1];
|
||||
*pval = val;
|
||||
}
|
||||
}
|
||||
return ncur;
|
||||
}
|
||||
|
||||
static void qualify(Rcl::SearchDataClauseDist *cl, const string& quals)
|
||||
{
|
||||
cerr << "qualify(" << cl << ", " << quals << ")" << endl;
|
||||
for (unsigned int i = 0; i < quals.length(); i++) {
|
||||
//fprintf(stderr, "qual char %c\n", quals[i]);
|
||||
switch (quals[i]) {
|
||||
case 'b':
|
||||
cl->setWeight(10.0);
|
||||
break;
|
||||
case 'c': break;
|
||||
case 'C':
|
||||
cl->addModifier(Rcl::SearchDataClause::SDCM_CASESENS);
|
||||
break;
|
||||
case 'd': break;
|
||||
case 'D':
|
||||
cl->addModifier(Rcl::SearchDataClause::SDCM_DIACSENS);
|
||||
break;
|
||||
case 'e':
|
||||
cl->addModifier(Rcl::SearchDataClause::SDCM_CASESENS);
|
||||
cl->addModifier(Rcl::SearchDataClause::SDCM_DIACSENS);
|
||||
cl->addModifier(Rcl::SearchDataClause::SDCM_NOSTEMMING);
|
||||
break;
|
||||
case 'l':
|
||||
cl->addModifier(Rcl::SearchDataClause::SDCM_NOSTEMMING);
|
||||
break;
|
||||
case 'L': break;
|
||||
case 'o':
|
||||
{
|
||||
int slack = 10;
|
||||
i = qualGetInt(quals, i, &slack);
|
||||
cl->setslack(slack);
|
||||
//cerr << "set slack " << cl->getslack() << " done" << endl;
|
||||
}
|
||||
break;
|
||||
case 'p':
|
||||
cl->setTp(Rcl::SCLT_NEAR);
|
||||
if (cl->getslack() == 0) {
|
||||
cl->setslack(10);
|
||||
//cerr << "set slack " << cl->getslack() << " done" << endl;
|
||||
}
|
||||
break;
|
||||
case '.':case '0':case '1':case '2':case '3':case '4':
|
||||
case '5':case '6':case '7':case '8':case '9':
|
||||
{
|
||||
int n = 0;
|
||||
float factor = 1.0;
|
||||
if (sscanf(&(quals[i]), "%f %n", &factor, &n)) {
|
||||
if (factor != 1.0) {
|
||||
cl->setWeight(factor);
|
||||
}
|
||||
}
|
||||
if (n > 0)
|
||||
i += n - 1;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static stack<int> returns;
|
||||
static string input;
|
||||
static unsigned int index;
|
||||
|
||||
int GETCHAR()
|
||||
{
|
||||
if (!returns.empty()) {
|
||||
int c = returns.top();
|
||||
returns.pop();
|
||||
return c;
|
||||
}
|
||||
if (index < input.size())
|
||||
return input[index++];
|
||||
return 0;
|
||||
}
|
||||
static void UNGETCHAR(int c)
|
||||
{
|
||||
returns.push(c);
|
||||
}
|
||||
|
||||
// Simpler to let the quoted string reader store qualifiers in there,
|
||||
// because their nature is determined by the absence of white space
|
||||
// after the closing dquote. e.g "some term"abc. We could avoid this
|
||||
// by making white space a token
|
||||
static string qualifiers;
|
||||
|
||||
// specialstartchars are special only at the beginning of a token
|
||||
// (e.g. doctor-who is a term, not 2 terms separated by '-')
|
||||
static string specialstartchars("-");
|
||||
// specialinchars are special everywhere except inside a quoted string
|
||||
static string specialinchars(":=<>()");
|
||||
static string whites(" \t\n\r");
|
||||
|
||||
// Called with the first dquote already read
|
||||
static int parseString()
|
||||
{
|
||||
string* value = new string();
|
||||
qualifiers.clear();
|
||||
int c;
|
||||
while ((c = GETCHAR())) {
|
||||
switch (c) {
|
||||
case '\\':
|
||||
/* Escape: get next char */
|
||||
c = GETCHAR();
|
||||
if (c == 0) {
|
||||
value->push_back(c);
|
||||
goto out;
|
||||
}
|
||||
value->push_back(c);
|
||||
break;
|
||||
case '"':
|
||||
/* End of string. Look for qualifiers */
|
||||
while ((c = GETCHAR()) && whites.find_first_of(c) == string::npos)
|
||||
qualifiers.push_back(c);
|
||||
goto out;
|
||||
default:
|
||||
value->push_back(c);
|
||||
}
|
||||
}
|
||||
out:
|
||||
//cerr << "GOT QUOTED ["<<value<<"] quals [" << qualifiers << "]" << endl;
|
||||
yylval.str = value;
|
||||
return QUOTED;
|
||||
}
|
||||
|
||||
|
||||
int yylex(void)
|
||||
{
|
||||
// cerr << "yylex: input [" << input.substr(index) << "]" << endl;
|
||||
|
||||
if (!qualifiers.empty()) {
|
||||
yylval.str = new string();
|
||||
yylval.str->swap(qualifiers);
|
||||
return QUALIFIERS;
|
||||
}
|
||||
|
||||
int c;
|
||||
|
||||
/* Skip white space. */
|
||||
while ((c = GETCHAR ()) && whites.find_first_of(c) != string::npos)
|
||||
continue;
|
||||
|
||||
if (c == 0)
|
||||
return 0;
|
||||
|
||||
if (specialstartchars.find_first_of(c) != string::npos) {
|
||||
//cerr << "yylex: return " << c << endl;
|
||||
return c;
|
||||
}
|
||||
|
||||
// field-term relations
|
||||
switch (c) {
|
||||
case '=': return EQUALS;
|
||||
case ':': return CONTAINS;
|
||||
case '<': {
|
||||
int c1 = GETCHAR();
|
||||
if (c1 == '=') {
|
||||
return SMALLEREQ;
|
||||
} else {
|
||||
UNGETCHAR(c);
|
||||
return SMALLER;
|
||||
}
|
||||
}
|
||||
case '>': {
|
||||
int c1 = GETCHAR();
|
||||
if (c1 == '=') {
|
||||
return GREATEREQ;
|
||||
} else {
|
||||
UNGETCHAR(c);
|
||||
return GREATER;
|
||||
}
|
||||
}
|
||||
case '(': case ')':
|
||||
return c;
|
||||
}
|
||||
|
||||
if (c == '"')
|
||||
return parseString();
|
||||
|
||||
UNGETCHAR(c);
|
||||
|
||||
// Other chars start a term or field name or reserved word
|
||||
string* word = new string();
|
||||
while ((c = GETCHAR())) {
|
||||
if (whites.find_first_of(c) != string::npos) {
|
||||
//cerr << "Word broken by whitespace" << endl;
|
||||
break;
|
||||
} else if (specialinchars.find_first_of(c) != string::npos) {
|
||||
//cerr << "Word broken by special char" << endl;
|
||||
UNGETCHAR(c);
|
||||
break;
|
||||
} else if (c == 0) {
|
||||
//cerr << "Word broken by EOF" << endl;
|
||||
break;
|
||||
} else {
|
||||
word->push_back(c);
|
||||
}
|
||||
}
|
||||
|
||||
if (!word->compare("AND") || !word->compare("&&")) {
|
||||
delete word;
|
||||
return AND;
|
||||
} else if (!word->compare("OR") || !word->compare("||")) {
|
||||
delete word;
|
||||
return OR;
|
||||
}
|
||||
|
||||
// cerr << "Got word [" << word << "]" << endl;
|
||||
yylval.str = word;
|
||||
return WORD;
|
||||
}
|
||||
|
||||
int main (int argc, const char *argv[])
|
||||
{
|
||||
argc--;argv++;
|
||||
if (argc == 0)
|
||||
return 1;
|
||||
while (argc--) {
|
||||
input += *argv++;
|
||||
input += " ";
|
||||
}
|
||||
|
||||
index = 0;
|
||||
returns = stack<int>();
|
||||
|
||||
return yyparse();
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user