Fix the language parser to properly handle multiple mime type specs (fix regression over 1.20)

This commit is contained in:
Jean-Francois Dockes 2016-01-29 14:03:09 +01:00
parent 3cda808ac4
commit c289085003
6 changed files with 238 additions and 124 deletions

View File

@ -32,7 +32,7 @@
// First part of user declarations. // First part of user declarations.
#line 1 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:399 #line 1 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:399
#define YYDEBUG 1 #define YYDEBUG 1
#include "autoconfig.h" #include "autoconfig.h"
@ -48,7 +48,7 @@
using namespace std; using namespace std;
// #define LOG_PARSER //#define LOG_PARSER
#ifdef LOG_PARSER #ifdef LOG_PARSER
#define LOGP(X) {cerr << X;} #define LOGP(X) {cerr << X;}
#else #else
@ -63,11 +63,13 @@ static void qualify(Rcl::SearchDataClauseDist *, const string &);
static void addSubQuery(WasaParserDriver *d, static void addSubQuery(WasaParserDriver *d,
Rcl::SearchData *sd, Rcl::SearchData *sq) Rcl::SearchData *sd, Rcl::SearchData *sq)
{ {
sd->addClause(new Rcl::SearchDataClauseSub(STD_SHARED_PTR<Rcl::SearchData>(sq))); if (sd && sq)
sd->addClause(
new Rcl::SearchDataClauseSub(STD_SHARED_PTR<Rcl::SearchData>(sq)));
} }
#line 71 "y.tab.c" // lalr1.cc:399 #line 73 "y.tab.c" // lalr1.cc:399
# ifndef YY_NULLPTR # ifndef YY_NULLPTR
# if defined __cplusplus && 201103L <= __cplusplus # if defined __cplusplus && 201103L <= __cplusplus
@ -81,7 +83,7 @@ static void addSubQuery(WasaParserDriver *d,
// User implementation prologue. // User implementation prologue.
#line 85 "y.tab.c" // lalr1.cc:407 #line 87 "y.tab.c" // lalr1.cc:407
#ifndef YY_ #ifndef YY_
@ -167,7 +169,7 @@ static void addSubQuery(WasaParserDriver *d,
namespace yy { namespace yy {
#line 171 "y.tab.c" // lalr1.cc:474 #line 173 "y.tab.c" // lalr1.cc:474
/* Return YYSTR after stripping away unnecessary quotes and /* Return YYSTR after stripping away unnecessary quotes and
backslashes, so that it's suitable for yyerror. The heuristic is backslashes, so that it's suitable for yyerror. The heuristic is
@ -384,30 +386,30 @@ namespace yy {
{ {
case 3: // WORD case 3: // WORD
#line 49 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:599 #line 51 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:599
{delete (yysym.value.str);} {delete (yysym.value.str);}
#line 390 "y.tab.c" // lalr1.cc:599 #line 392 "y.tab.c" // lalr1.cc:599
break; break;
case 4: // QUOTED case 4: // QUOTED
#line 49 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:599 #line 51 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:599
{delete (yysym.value.str);} {delete (yysym.value.str);}
#line 397 "y.tab.c" // lalr1.cc:599 #line 399 "y.tab.c" // lalr1.cc:599
break; break;
case 5: // QUALIFIERS case 5: // QUALIFIERS
#line 49 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:599 #line 51 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:599
{delete (yysym.value.str);} {delete (yysym.value.str);}
#line 404 "y.tab.c" // lalr1.cc:599 #line 406 "y.tab.c" // lalr1.cc:599
break; break;
case 22: // complexfieldname case 22: // complexfieldname
#line 49 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:599 #line 51 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:599
{delete (yysym.value.str);} {delete (yysym.value.str);}
#line 411 "y.tab.c" // lalr1.cc:599 #line 413 "y.tab.c" // lalr1.cc:599
break; break;
@ -647,83 +649,100 @@ namespace yy {
switch (yyn) switch (yyn)
{ {
case 2: case 2:
#line 70 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 72 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
{ {
LOGP("END PARSING\n"); // It's possible that we end up with no query (e.g.: because just a
d->m_result = (yystack_[0].value.sd); // date filter was set, no terms). Allocate an empty query so that we
// have something to set the global criteria on (this will yield a
// Xapian search like <alldocuments> FILTER xxx
if ((yystack_[0].value.sd) == 0)
d->m_result = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
else
d->m_result = (yystack_[0].value.sd);
} }
#line 656 "y.tab.c" // lalr1.cc:847 #line 664 "y.tab.c" // lalr1.cc:847
break; break;
case 3: case 3:
#line 77 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 85 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
{ {
LOGP("q: query query\n"); LOGP("q: query query\n");
Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang); Rcl::SearchData *sd = 0;
addSubQuery(d, sd, (yystack_[1].value.sd)); if ((yystack_[1].value.sd) || (yystack_[0].value.sd)) {
addSubQuery(d, sd, (yystack_[0].value.sd)); sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
addSubQuery(d, sd, (yystack_[1].value.sd));
addSubQuery(d, sd, (yystack_[0].value.sd));
}
(yylhs.value.sd) = sd; (yylhs.value.sd) = sd;
} }
#line 668 "y.tab.c" // lalr1.cc:847 #line 679 "y.tab.c" // lalr1.cc:847
break; break;
case 4: case 4:
#line 85 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 96 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
{ {
LOGP("q: query AND query\n"); LOGP("q: query AND query\n");
Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang); Rcl::SearchData *sd = 0;
addSubQuery(d, sd, (yystack_[2].value.sd)); if ((yystack_[2].value.sd) || (yystack_[0].value.sd)) {
addSubQuery(d, sd, (yystack_[0].value.sd)); sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
addSubQuery(d, sd, (yystack_[2].value.sd));
addSubQuery(d, sd, (yystack_[0].value.sd));
}
(yylhs.value.sd) = sd; (yylhs.value.sd) = sd;
} }
#line 680 "y.tab.c" // lalr1.cc:847
break;
case 5:
#line 93 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
{
LOGP("q: query OR query\n");
Rcl::SearchData *top = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_OR, d->m_stemlang);
addSubQuery(d, sd, (yystack_[2].value.sd));
addSubQuery(d, sd, (yystack_[0].value.sd));
addSubQuery(d, top, sd);
(yylhs.value.sd) = top;
}
#line 694 "y.tab.c" // lalr1.cc:847 #line 694 "y.tab.c" // lalr1.cc:847
break; break;
case 5:
#line 107 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
{
LOGP("query: query OR query\n");
Rcl::SearchData *top = 0;
if ((yystack_[2].value.sd) || (yystack_[0].value.sd)) {
top = new Rcl::SearchData(Rcl::SCLT_OR, d->m_stemlang);
addSubQuery(d, top, (yystack_[2].value.sd));
addSubQuery(d, top, (yystack_[0].value.sd));
}
(yylhs.value.sd) = top;
}
#line 709 "y.tab.c" // lalr1.cc:847
break;
case 6: case 6:
#line 103 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 118 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
{ {
LOGP("q: ( query )\n"); LOGP("q: ( query )\n");
(yylhs.value.sd) = (yystack_[1].value.sd); (yylhs.value.sd) = (yystack_[1].value.sd);
} }
#line 703 "y.tab.c" // lalr1.cc:847 #line 718 "y.tab.c" // lalr1.cc:847
break; break;
case 7: case 7:
#line 109 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 124 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
{ {
LOGP("q: fieldexpr\n"); LOGP("q: fieldexpr\n");
Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang); Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
d->addClause(sd, (yystack_[0].value.cl)); if (d->addClause(sd, (yystack_[0].value.cl))) {
(yylhs.value.sd) = sd; (yylhs.value.sd) = sd;
} else {
delete sd;
(yylhs.value.sd) = 0;
}
} }
#line 714 "y.tab.c" // lalr1.cc:847 #line 733 "y.tab.c" // lalr1.cc:847
break; break;
case 8: case 8:
#line 118 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 137 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
{ {
LOGP("fe: simple fieldexpr: " << (yystack_[0].value.cl)->gettext() << endl); LOGP("fe: simple fieldexpr: " << (yystack_[0].value.cl)->gettext() << endl);
(yylhs.value.cl) = (yystack_[0].value.cl); (yylhs.value.cl) = (yystack_[0].value.cl);
} }
#line 723 "y.tab.c" // lalr1.cc:847 #line 742 "y.tab.c" // lalr1.cc:847
break; break;
case 9: case 9:
#line 123 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 142 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
{ {
LOGP("fe: " << *(yystack_[2].value.str) << " = " << (yystack_[0].value.cl)->gettext() << endl); LOGP("fe: " << *(yystack_[2].value.str) << " = " << (yystack_[0].value.cl)->gettext() << endl);
(yystack_[0].value.cl)->setfield(*(yystack_[2].value.str)); (yystack_[0].value.cl)->setfield(*(yystack_[2].value.str));
@ -731,11 +750,11 @@ namespace yy {
(yylhs.value.cl) = (yystack_[0].value.cl); (yylhs.value.cl) = (yystack_[0].value.cl);
delete (yystack_[2].value.str); delete (yystack_[2].value.str);
} }
#line 735 "y.tab.c" // lalr1.cc:847 #line 754 "y.tab.c" // lalr1.cc:847
break; break;
case 10: case 10:
#line 131 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 150 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
{ {
LOGP("fe: " << *(yystack_[2].value.str) << " : " << (yystack_[0].value.cl)->gettext() << endl); LOGP("fe: " << *(yystack_[2].value.str) << " : " << (yystack_[0].value.cl)->gettext() << endl);
(yystack_[0].value.cl)->setfield(*(yystack_[2].value.str)); (yystack_[0].value.cl)->setfield(*(yystack_[2].value.str));
@ -743,11 +762,11 @@ namespace yy {
(yylhs.value.cl) = (yystack_[0].value.cl); (yylhs.value.cl) = (yystack_[0].value.cl);
delete (yystack_[2].value.str); delete (yystack_[2].value.str);
} }
#line 747 "y.tab.c" // lalr1.cc:847 #line 766 "y.tab.c" // lalr1.cc:847
break; break;
case 11: case 11:
#line 139 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 158 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
{ {
LOGP(cerr << "fe: " << *(yystack_[2].value.str) << " < " << (yystack_[0].value.cl)->gettext() << endl); LOGP(cerr << "fe: " << *(yystack_[2].value.str) << " < " << (yystack_[0].value.cl)->gettext() << endl);
(yystack_[0].value.cl)->setfield(*(yystack_[2].value.str)); (yystack_[0].value.cl)->setfield(*(yystack_[2].value.str));
@ -755,11 +774,11 @@ namespace yy {
(yylhs.value.cl) = (yystack_[0].value.cl); (yylhs.value.cl) = (yystack_[0].value.cl);
delete (yystack_[2].value.str); delete (yystack_[2].value.str);
} }
#line 759 "y.tab.c" // lalr1.cc:847 #line 778 "y.tab.c" // lalr1.cc:847
break; break;
case 12: case 12:
#line 147 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 166 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
{ {
LOGP("fe: " << *(yystack_[2].value.str) << " <= " << (yystack_[0].value.cl)->gettext() << endl); LOGP("fe: " << *(yystack_[2].value.str) << " <= " << (yystack_[0].value.cl)->gettext() << endl);
(yystack_[0].value.cl)->setfield(*(yystack_[2].value.str)); (yystack_[0].value.cl)->setfield(*(yystack_[2].value.str));
@ -767,11 +786,11 @@ namespace yy {
(yylhs.value.cl) = (yystack_[0].value.cl); (yylhs.value.cl) = (yystack_[0].value.cl);
delete (yystack_[2].value.str); delete (yystack_[2].value.str);
} }
#line 771 "y.tab.c" // lalr1.cc:847 #line 790 "y.tab.c" // lalr1.cc:847
break; break;
case 13: case 13:
#line 155 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 174 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
{ {
LOGP("fe: " << *(yystack_[2].value.str) << " > " << (yystack_[0].value.cl)->gettext() << endl); LOGP("fe: " << *(yystack_[2].value.str) << " > " << (yystack_[0].value.cl)->gettext() << endl);
(yystack_[0].value.cl)->setfield(*(yystack_[2].value.str)); (yystack_[0].value.cl)->setfield(*(yystack_[2].value.str));
@ -779,11 +798,11 @@ namespace yy {
(yylhs.value.cl) = (yystack_[0].value.cl); (yylhs.value.cl) = (yystack_[0].value.cl);
delete (yystack_[2].value.str); delete (yystack_[2].value.str);
} }
#line 783 "y.tab.c" // lalr1.cc:847 #line 802 "y.tab.c" // lalr1.cc:847
break; break;
case 14: case 14:
#line 163 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 182 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
{ {
LOGP("fe: " << *(yystack_[2].value.str) << " >= " << (yystack_[0].value.cl)->gettext() << endl); LOGP("fe: " << *(yystack_[2].value.str) << " >= " << (yystack_[0].value.cl)->gettext() << endl);
(yystack_[0].value.cl)->setfield(*(yystack_[2].value.str)); (yystack_[0].value.cl)->setfield(*(yystack_[2].value.str));
@ -791,69 +810,69 @@ namespace yy {
(yylhs.value.cl) = (yystack_[0].value.cl); (yylhs.value.cl) = (yystack_[0].value.cl);
delete (yystack_[2].value.str); delete (yystack_[2].value.str);
} }
#line 795 "y.tab.c" // lalr1.cc:847 #line 814 "y.tab.c" // lalr1.cc:847
break; break;
case 15: case 15:
#line 171 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 190 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
{ {
LOGP("fe: - fieldexpr[" << (yystack_[0].value.cl)->gettext() << "]" << endl); LOGP("fe: - fieldexpr[" << (yystack_[0].value.cl)->gettext() << "]" << endl);
(yystack_[0].value.cl)->setexclude(true); (yystack_[0].value.cl)->setexclude(true);
(yylhs.value.cl) = (yystack_[0].value.cl); (yylhs.value.cl) = (yystack_[0].value.cl);
} }
#line 805 "y.tab.c" // lalr1.cc:847 #line 824 "y.tab.c" // lalr1.cc:847
break; break;
case 16: case 16:
#line 181 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 200 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
{ {
LOGP("cfn: WORD" << endl); LOGP("cfn: WORD" << endl);
(yylhs.value.str) = (yystack_[0].value.str); (yylhs.value.str) = (yystack_[0].value.str);
} }
#line 814 "y.tab.c" // lalr1.cc:847 #line 833 "y.tab.c" // lalr1.cc:847
break; break;
case 17: case 17:
#line 187 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 206 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
{ {
LOGP("cfn: complexfieldname ':' WORD" << endl); LOGP("cfn: complexfieldname ':' WORD" << endl);
(yylhs.value.str) = new string(*(yystack_[2].value.str) + string(":") + *(yystack_[0].value.str)); (yylhs.value.str) = new string(*(yystack_[2].value.str) + string(":") + *(yystack_[0].value.str));
delete (yystack_[2].value.str); delete (yystack_[2].value.str);
delete (yystack_[0].value.str); delete (yystack_[0].value.str);
} }
#line 825 "y.tab.c" // lalr1.cc:847 #line 844 "y.tab.c" // lalr1.cc:847
break; break;
case 18: case 18:
#line 196 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 215 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
{ {
LOGP("term[" << *(yystack_[0].value.str) << "]" << endl); LOGP("term[" << *(yystack_[0].value.str) << "]" << endl);
(yylhs.value.cl) = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, *(yystack_[0].value.str)); (yylhs.value.cl) = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, *(yystack_[0].value.str));
delete (yystack_[0].value.str); delete (yystack_[0].value.str);
} }
#line 835 "y.tab.c" // lalr1.cc:847 #line 854 "y.tab.c" // lalr1.cc:847
break; break;
case 19: case 19:
#line 202 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 221 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
{ {
(yylhs.value.cl) = (yystack_[0].value.cl); (yylhs.value.cl) = (yystack_[0].value.cl);
} }
#line 843 "y.tab.c" // lalr1.cc:847 #line 862 "y.tab.c" // lalr1.cc:847
break; break;
case 20: case 20:
#line 208 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 227 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
{ {
LOGP("QUOTED[" << *(yystack_[0].value.str) << "]" << endl); LOGP("QUOTED[" << *(yystack_[0].value.str) << "]" << endl);
(yylhs.value.cl) = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, *(yystack_[0].value.str), 0); (yylhs.value.cl) = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, *(yystack_[0].value.str), 0);
delete (yystack_[0].value.str); delete (yystack_[0].value.str);
} }
#line 853 "y.tab.c" // lalr1.cc:847 #line 872 "y.tab.c" // lalr1.cc:847
break; break;
case 21: case 21:
#line 214 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 233 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
{ {
LOGP("QUOTED[" << *(yystack_[1].value.str) << "] QUALIFIERS[" << *(yystack_[0].value.str) << "]" << endl); LOGP("QUOTED[" << *(yystack_[1].value.str) << "] QUALIFIERS[" << *(yystack_[0].value.str) << "]" << endl);
Rcl::SearchDataClauseDist *cl = Rcl::SearchDataClauseDist *cl =
@ -863,11 +882,11 @@ namespace yy {
delete (yystack_[1].value.str); delete (yystack_[1].value.str);
delete (yystack_[0].value.str); delete (yystack_[0].value.str);
} }
#line 867 "y.tab.c" // lalr1.cc:847 #line 886 "y.tab.c" // lalr1.cc:847
break; break;
#line 871 "y.tab.c" // lalr1.cc:847 #line 890 "y.tab.c" // lalr1.cc:847
default: default:
break; break;
} }
@ -1220,9 +1239,9 @@ namespace yy {
const unsigned char const unsigned char
parser::yyrline_[] = parser::yyrline_[] =
{ {
0, 69, 69, 76, 84, 92, 102, 108, 117, 122, 0, 71, 71, 84, 95, 106, 117, 123, 136, 141,
130, 138, 146, 154, 162, 170, 180, 186, 195, 201, 149, 157, 165, 173, 181, 189, 199, 205, 214, 220,
207, 213 226, 232
}; };
// Print the state stack on the debug stream. // Print the state stack on the debug stream.
@ -1304,8 +1323,8 @@ namespace yy {
} // yy } // yy
#line 1308 "y.tab.c" // lalr1.cc:1155 #line 1327 "y.tab.c" // lalr1.cc:1155
#line 225 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:1156 #line 244 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:1156
#include <ctype.h> #include <ctype.h>

View File

@ -123,7 +123,7 @@ namespace yy {
/// Symbol semantic values. /// Symbol semantic values.
union semantic_type union semantic_type
{ {
#line 44 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:372 #line 46 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:372
std::string *str; std::string *str;
Rcl::SearchDataClauseSimple *cl; Rcl::SearchDataClauseSimple *cl;

View File

@ -28,7 +28,9 @@ static void qualify(Rcl::SearchDataClauseDist *, const string &);
static void addSubQuery(WasaParserDriver *d, static void addSubQuery(WasaParserDriver *d,
Rcl::SearchData *sd, Rcl::SearchData *sq) Rcl::SearchData *sd, Rcl::SearchData *sq)
{ {
sd->addClause(new Rcl::SearchDataClauseSub(STD_SHARED_PTR<Rcl::SearchData>(sq))); if (sd && sq)
sd->addClause(
new Rcl::SearchDataClauseSub(STD_SHARED_PTR<Rcl::SearchData>(sq)));
} }
%} %}
@ -54,8 +56,8 @@ static void addSubQuery(WasaParserDriver *d,
%type <sd> query %type <sd> query
%type <str> complexfieldname %type <str> complexfieldname
/* Non operator tokens need precedence because of the possibility of /* Non operator tokens need precedence because of the possibility of
concatenation which needs to have lower prec than OR */ concatenation which needs to have lower prec than OR */
%left <str> WORD %left <str> WORD
%left <str> QUOTED %left <str> QUOTED
%left <str> QUALIFIERS %left <str> QUALIFIERS
@ -68,35 +70,48 @@ static void addSubQuery(WasaParserDriver *d,
topquery: query topquery: query
{ {
LOGP("END PARSING\n"); // It's possible that we end up with no query (e.g.: because just a
d->m_result = $1; // date filter was set, no terms). Allocate an empty query so that we
// have something to set the global criteria on (this will yield a
// Xapian search like <alldocuments> FILTER xxx
if ($1 == 0)
d->m_result = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
else
d->m_result = $1;
} }
query: query:
query query %prec UCONCAT query query %prec UCONCAT
{ {
LOGP("q: query query\n"); LOGP("q: query query\n");
Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang); Rcl::SearchData *sd = 0;
addSubQuery(d, sd, $1); if ($1 || $2) {
addSubQuery(d, sd, $2); sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
addSubQuery(d, sd, $1);
addSubQuery(d, sd, $2);
}
$$ = sd; $$ = sd;
} }
| query AND query | query AND query
{ {
LOGP("q: query AND query\n"); LOGP("q: query AND query\n");
Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang); Rcl::SearchData *sd = 0;
addSubQuery(d, sd, $1); if ($1 || $3) {
addSubQuery(d, sd, $3); sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
addSubQuery(d, sd, $1);
addSubQuery(d, sd, $3);
}
$$ = sd; $$ = sd;
} }
| query OR query | query OR query
{ {
LOGP("q: query OR query\n"); LOGP("query: query OR query\n");
Rcl::SearchData *top = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang); Rcl::SearchData *top = 0;
Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_OR, d->m_stemlang); if ($1 || $3) {
addSubQuery(d, sd, $1); top = new Rcl::SearchData(Rcl::SCLT_OR, d->m_stemlang);
addSubQuery(d, sd, $3); addSubQuery(d, top, $1);
addSubQuery(d, top, sd); addSubQuery(d, top, $3);
}
$$ = top; $$ = top;
} }
| '(' query ')' | '(' query ')'
@ -109,8 +124,12 @@ fieldexpr %prec UCONCAT
{ {
LOGP("q: fieldexpr\n"); LOGP("q: fieldexpr\n");
Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang); Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
d->addClause(sd, $1); if (d->addClause(sd, $1)) {
$$ = sd; $$ = sd;
} else {
delete sd;
$$ = 0;
}
} }
; ;

View File

@ -51,6 +51,19 @@ SearchData *wasaStringToRcl(const RclConfig *config,
return sd; return sd;
} }
WasaParserDriver::WasaParserDriver(const RclConfig *c, const std::string sl,
const std::string& as)
: m_stemlang(sl), m_autosuffs(as), m_config(c),
m_index(0), m_result(0), m_haveDates(false),
m_maxSize((size_t)-1), m_minSize((size_t)-1)
{
}
WasaParserDriver::~WasaParserDriver()
{
}
SearchData *WasaParserDriver::parse(const std::string& in) SearchData *WasaParserDriver::parse(const std::string& in)
{ {
m_input = in; m_input = in;
@ -67,6 +80,28 @@ SearchData *WasaParserDriver::parse(const std::string& in)
m_result = 0; m_result = 0;
} }
if (m_result == 0)
return m_result;
// Set the top level filters (types, dates, size)
for (vector<string>::const_iterator it = m_filetypes.begin();
it != m_filetypes.end(); it++) {
m_result->addFiletype(*it);
}
for (vector<string>::const_iterator it = m_nfiletypes.begin();
it != m_nfiletypes.end(); it++) {
m_result->remFiletype(*it);
}
if (m_haveDates) {
m_result->setDateSpan(&m_dates);
}
if (m_minSize != (size_t)-1) {
m_result->setMinSize(m_minSize);
}
if (m_maxSize != (size_t)-1) {
m_result->setMaxSize(m_maxSize);
}
//if (m_result) m_result->dump(cout);
return m_result; return m_result;
} }
@ -114,12 +149,12 @@ bool WasaParserDriver::addClause(SearchData *sd,
// MIME types and categories // MIME types and categories
if (!fld.compare("mime") || !fld.compare("format")) { if (!fld.compare("mime") || !fld.compare("format")) {
if (cl->getexclude()) { if (cl->getexclude()) {
sd->remFiletype(cl->gettext()); m_nfiletypes.push_back(cl->gettext());
} else { } else {
sd->addFiletype(cl->gettext()); m_filetypes.push_back(cl->gettext());
} }
delete cl; delete cl;
return true; return false;
} }
if (!fld.compare("rclcat") || !fld.compare("type")) { if (!fld.compare("rclcat") || !fld.compare("type")) {
@ -128,14 +163,14 @@ bool WasaParserDriver::addClause(SearchData *sd,
for (vector<string>::iterator mit = mtypes.begin(); for (vector<string>::iterator mit = mtypes.begin();
mit != mtypes.end(); mit++) { mit != mtypes.end(); mit++) {
if (cl->getexclude()) { if (cl->getexclude()) {
sd->remFiletype(*mit); m_nfiletypes.push_back(*mit);
} else { } else {
sd->addFiletype(*mit); m_filetypes.push_back(*mit);
} }
} }
} }
delete cl; delete cl;
return true; return false;
} }
// Handle "date" spec // Handle "date" spec
@ -150,9 +185,10 @@ bool WasaParserDriver::addClause(SearchData *sd,
} }
LOGDEB(("addClause:: date span: %d-%d-%d/%d-%d-%d\n", LOGDEB(("addClause:: date span: %d-%d-%d/%d-%d-%d\n",
di.y1,di.m1,di.d1, di.y2,di.m2,di.d2)); di.y1,di.m1,di.d1, di.y2,di.m2,di.d2));
sd->setDateSpan(&di); m_haveDates = true;
m_dates = di;
delete cl; delete cl;
return true; return false;
} }
// Handle "size" spec // Handle "size" spec
@ -178,22 +214,21 @@ bool WasaParserDriver::addClause(SearchData *sd,
switch (rel) { switch (rel) {
case SearchDataClause::REL_EQUALS: case SearchDataClause::REL_EQUALS:
sd->setMaxSize(size); m_maxSize = m_minSize = size;
sd->setMinSize(size);
break; break;
case SearchDataClause::REL_LT: case SearchDataClause::REL_LT:
case SearchDataClause::REL_LTE: case SearchDataClause::REL_LTE:
sd->setMaxSize(size); m_maxSize = size;
break; break;
case SearchDataClause::REL_GT: case SearchDataClause::REL_GT:
case SearchDataClause::REL_GTE: case SearchDataClause::REL_GTE:
sd->setMinSize(size); m_minSize = size;
break; break;
default: default:
m_reason = "Bad relation operator with size query. Use > < or ="; m_reason = "Bad relation operator with size query. Use > < or =";
return false; return false;
} }
return true; return false;
} }
if (!fld.compare("dir")) { if (!fld.compare("dir")) {

View File

@ -19,6 +19,9 @@
#include <string> #include <string>
#include <stack> #include <stack>
#include <vector>
#include "smallut.h"
class WasaParserDriver; class WasaParserDriver;
namespace Rcl { namespace Rcl {
@ -35,10 +38,9 @@ class WasaParserDriver {
public: public:
WasaParserDriver(const RclConfig *c, const std::string sl, WasaParserDriver(const RclConfig *c, const std::string sl,
const std::string& as) const std::string& as);
: m_stemlang(sl), m_autosuffs(as), m_config(c), ~WasaParserDriver();
m_index(0), m_result(0) {}
Rcl::SearchData *parse(const std::string&); Rcl::SearchData *parse(const std::string&);
bool addClause(Rcl::SearchData *sd, Rcl::SearchDataClauseSimple* cl); bool addClause(Rcl::SearchData *sd, Rcl::SearchDataClauseSimple* cl);
@ -62,11 +64,23 @@ private:
std::string m_autosuffs; std::string m_autosuffs;
const RclConfig *m_config; const RclConfig *m_config;
// input string.
std::string m_input; std::string m_input;
// Current position in m_input
unsigned int m_index; unsigned int m_index;
// Characters pushed-back, ready for next getchar.
std::stack<int> m_returns; std::stack<int> m_returns;
// Result, set by parser.
Rcl::SearchData *m_result; Rcl::SearchData *m_result;
// Storage for top level filters
std::vector<std::string> m_filetypes;
std::vector<std::string> m_nfiletypes;
bool m_haveDates;
DateInterval m_dates; // Restrict to date interval
size_t m_maxSize;
size_t m_minSize;
std::string m_reason; std::string m_reason;
// Let the quoted string reader store qualifiers in there, simpler // Let the quoted string reader store qualifiers in there, simpler

View File

@ -210,14 +210,35 @@ void SearchData::simplify()
clsubp->getSub()->simplify(); clsubp->getSub()->simplify();
// If this subquery has special attributes, it's not a // If this subquery has special attributes, it's not a
// candidate for collapsing // candidate for collapsing, except if it has no clauses, because
// then, we just pick the attributes.
if (!clsubp->getSub()->m_filetypes.empty() || if (!clsubp->getSub()->m_filetypes.empty() ||
!clsubp->getSub()->m_nfiletypes.empty() || !clsubp->getSub()->m_nfiletypes.empty() ||
clsubp->getSub()->m_haveDates || clsubp->getSub()->m_haveDates ||
clsubp->getSub()->m_maxSize != size_t(-1) || clsubp->getSub()->m_maxSize != size_t(-1) ||
clsubp->getSub()->m_minSize != size_t(-1) || clsubp->getSub()->m_minSize != size_t(-1) ||
clsubp->getSub()->m_haveWildCards) clsubp->getSub()->m_haveWildCards) {
continue; if (!clsubp->getSub()->m_query.empty())
continue;
m_filetypes.insert(m_filetypes.end(),
clsubp->getSub()->m_filetypes.begin(),
clsubp->getSub()->m_filetypes.end());
m_nfiletypes.insert(m_nfiletypes.end(),
clsubp->getSub()->m_nfiletypes.begin(),
clsubp->getSub()->m_nfiletypes.end());
if (clsubp->getSub()->m_haveDates && !m_haveDates) {
m_dates = clsubp->getSub()->m_dates;
}
if (m_maxSize == size_t(-1))
m_maxSize = clsubp->getSub()->m_maxSize;
if (m_minSize == size_t(-1))
m_minSize = clsubp->getSub()->m_minSize;
m_haveWildCards = m_haveWildCards ||
clsubp->getSub()->m_haveWildCards;
// And then let the clauses processing go on, there are
// none anyway, we will just delete the subquery.
}
bool allsametp = true; bool allsametp = true;
for (qlist_it_t it1 = clsubp->getSub()->m_query.begin(); for (qlist_it_t it1 = clsubp->getSub()->m_query.begin();
@ -277,18 +298,22 @@ static const char * tpToString(SClType t)
} }
} }
static string dumptabs;
void SearchData::dump(ostream& o) const void SearchData::dump(ostream& o) const
{ {
o << "SearchData: " << tpToString(m_tp) << " qs " << int(m_query.size()) << o << dumptabs <<
"SearchData: " << tpToString(m_tp) << " qs " << int(m_query.size()) <<
" ft " << m_filetypes.size() << " nft " << m_nfiletypes.size() << " ft " << m_filetypes.size() << " nft " << m_nfiletypes.size() <<
" hd " << m_haveDates << " maxs " << int(m_maxSize) << " mins " << " hd " << m_haveDates << " maxs " << int(m_maxSize) << " mins " <<
int(m_minSize) << " wc " << m_haveWildCards << "\n"; int(m_minSize) << " wc " << m_haveWildCards << "\n";
for (std::vector<SearchDataClause*>::const_iterator it = for (std::vector<SearchDataClause*>::const_iterator it =
m_query.begin(); it != m_query.end(); it++) { m_query.begin(); it != m_query.end(); it++) {
o << dumptabs;
(*it)->dump(o); (*it)->dump(o);
o << "\n"; o << "\n";
} }
o << "\n"; // o << dumptabs << "\n";
} }
void SearchDataClause::dump(ostream& o) const void SearchDataClause::dump(ostream& o) const
@ -341,8 +366,10 @@ void SearchDataClauseDist::dump(ostream& o) const
void SearchDataClauseSub::dump(ostream& o) const void SearchDataClauseSub::dump(ostream& o) const
{ {
o << "ClauseSub {\n"; o << "ClauseSub {\n";
dumptabs += '\t';
m_sub->dump(o); m_sub->dump(o);
o << "}"; dumptabs.erase(dumptabs.size()- 1);
o << dumptabs << "}";
} }
} // Namespace Rcl } // Namespace Rcl