Implemented range queries, based on storing fields in xapian values

This commit is contained in:
Jean-Francois Dockes 2018-01-24 09:43:20 +01:00
parent 26d15dbe4a
commit 595e419d93
20 changed files with 875 additions and 428 deletions

View File

@ -165,6 +165,8 @@ rcldb/rclquery.cpp \
rcldb/rclquery.h \
rcldb/rclquery_p.h \
rcldb/rclterms.cpp \
rcldb/rclvalues.cpp \
rcldb/rclvalues.h \
rcldb/searchdata.cpp \
rcldb/searchdata.h \
rcldb/searchdatatox.cpp \

View File

@ -936,15 +936,15 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
// Build a direct map avoiding all indirections for field to
// prefix translation
// Add direct prefixes from the [prefixes] section
vector<string>tps = m_fields->getNames("prefixes");
for (vector<string>::const_iterator it = tps.begin();
it != tps.end(); it++) {
vector<string> tps = m_fields->getNames("prefixes");
for (const auto& fieldname : tps) {
string val;
m_fields->get(*it, val, "prefixes");
m_fields->get(fieldname, val, "prefixes");
ConfSimple attrs;
FieldTraits ft;
// fieldname = prefix ; attr1=val;attr2=val...
if (!valueSplitAttributes(val, ft.pfx, attrs)) {
LOGERR("readFieldsConfig: bad config line for [" << *it <<
LOGERR("readFieldsConfig: bad config line for [" << fieldname <<
"]: [" << val << "]\n");
return 0;
}
@ -957,21 +957,67 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
ft.pfxonly = stringToBool(tval);
if (attrs.get("noterms", tval))
ft.noterms = stringToBool(tval);
m_fldtotraits[stringtolower(*it)] = ft;
LOGDEB2("readFieldsConfig: [" << *it << "] -> [" << ft.pfx <<
m_fldtotraits[stringtolower(fieldname)] = ft;
LOGDEB2("readFieldsConfig: [" << fieldname << "] -> [" << ft.pfx <<
"] " << ft.wdfinc << " " << ft.boost << "\n");
}
// Values section
tps = m_fields->getNames("values");
for (const auto& fieldname : tps) {
string canonic = stringtolower(fieldname); // canonic name
string val;
m_fields->get(fieldname, val, "values");
ConfSimple attrs;
string svslot;
// fieldname = valueslot ; attr1=val;attr2=val...
if (!valueSplitAttributes(val, svslot, attrs)) {
LOGERR("readFieldsConfig: bad value line for [" << fieldname <<
"]: [" << val << "]\n");
return 0;
}
uint32_t valueslot = uint32_t(atoi(svslot.c_str()));
if (valueslot == 0) {
LOGERR("readFieldsConfig: found 0 value slot for [" << fieldname <<
"]: [" << val << "]\n");
continue;
}
string tval;
FieldTraits::ValueType valuetype{FieldTraits::STR};
if (attrs.get("type", tval)) {
if (tval == "string") {
valuetype = FieldTraits::STR;
} else if (tval == "int") {
valuetype = FieldTraits::INT;
} else {
LOGERR("readFieldsConfig: bad type for value for " <<
fieldname << " : " << tval << endl);
return 0;
}
}
int valuelen{0};
if (attrs.get("len", tval)) {
valuelen = atoi(tval.c_str());
}
// Find or insert traits entry
const auto pit =
m_fldtotraits.insert(
pair<string, FieldTraits>(canonic, FieldTraits())).first;
pit->second.valueslot = valueslot;
pit->second.valuetype = valuetype;
pit->second.valuelen = valuelen;
}
// Add prefixes for aliases and build alias-to-canonic map while
// we're at it. Having the aliases in the prefix map avoids an
// additional indirection at index time.
tps = m_fields->getNames("aliases");
for (vector<string>::const_iterator it = tps.begin();
it != tps.end(); it++){
string canonic = stringtolower(*it); // canonic name
for (const auto& fieldname : tps) {
string canonic = stringtolower(fieldname); // canonic name
FieldTraits ft;
map<string, FieldTraits>::const_iterator pit =
m_fldtotraits.find(canonic);
const auto pit = m_fldtotraits.find(canonic);
if (pit != m_fldtotraits.end()) {
ft = pit->second;
}
@ -979,53 +1025,45 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
m_fields->get(canonic, aliases, "aliases");
vector<string> l;
stringToStrings(aliases, l);
for (vector<string>::const_iterator ait = l.begin();
ait != l.end(); ait++) {
for (const auto& alias : l) {
if (pit != m_fldtotraits.end())
m_fldtotraits[stringtolower(*ait)] = ft;
m_aliastocanon[stringtolower(*ait)] = canonic;
m_fldtotraits[stringtolower(alias)] = ft;
m_aliastocanon[stringtolower(alias)] = canonic;
}
}
// Query aliases map
tps = m_fields->getNames("queryaliases");
for (vector<string>::const_iterator it = tps.begin();
it != tps.end(); it++){
string canonic = stringtolower(*it); // canonic name
for (const auto& entry: tps) {
string canonic = stringtolower(entry); // canonic name
string aliases;
m_fields->get(canonic, aliases, "queryaliases");
vector<string> l;
stringToStrings(aliases, l);
for (vector<string>::const_iterator ait = l.begin();
ait != l.end(); ait++) {
m_aliastoqcanon[stringtolower(*ait)] = canonic;
for (const auto& alias : l) {
m_aliastoqcanon[stringtolower(alias)] = canonic;
}
}
#if 0
for (map<string, FieldTraits>::const_iterator it = m_fldtotraits.begin();
it != m_fldtotraits.end(); it++) {
LOGDEB("readFieldsConfig: [" << *it << "] -> [" << it->second.pfx <<
LOGDEB("readFieldsConfig: [" << entry << "] -> [" << it->second.pfx <<
"] " << it->second.wdfinc << " " << it->second.boost << "\n");
}
#endif
vector<string> sl = m_fields->getNames("stored");
if (!sl.empty()) {
for (vector<string>::const_iterator it = sl.begin();
it != sl.end(); it++) {
string fld = fieldCanon(stringtolower(*it));
m_storedFields.insert(fld);
}
for (const auto& fieldname : sl) {
m_storedFields.insert(fieldCanon(stringtolower(fieldname)));
}
// Extended file attribute to field translations
vector<string>xattrs = m_fields->getNames("xattrtofields");
for (vector<string>::const_iterator it = xattrs.begin();
it != xattrs.end(); it++) {
for (const auto& xattr : xattrs) {
string val;
m_fields->get(*it, val, "xattrtofields");
m_xattrtofld[*it] = val;
m_fields->get(xattr, val, "xattrtofields");
m_xattrtofld[xattr] = val;
}
return true;

View File

@ -79,6 +79,10 @@ struct MDReaper {
// Data associated to a indexed field name:
struct FieldTraits {
string pfx; // indexing prefix,
uint32_t valueslot{0};
enum ValueType {STR, INT};
ValueType valuetype{STR};
int valuelen{0};
int wdfinc{1}; // Index time term frequency increment (default 1)
double boost{1.0}; // Query time boost (default 1.0)
bool pfxonly{false}; // Suppress prefix-less indexing

View File

@ -59,6 +59,7 @@ private:
{
currentText = whatclause = "";
text.clear();
text2.clear();
field.clear();
slack = 0;
d = m = y = di.d1 = di.m1 = di.y1 = di.d2 = di.m2 = di.y2 = 0;
@ -69,7 +70,7 @@ private:
// Temporary data while parsing.
QString currentText;
QString whatclause;
string field, text;
string field, text, text2;
int slack;
int d, m, y;
DateInterval di;
@ -120,6 +121,8 @@ bool SDHXMLHandler::endElement(const QString & /* namespaceURI */,
field = base64_decode(qs2utf8s(currentText.trimmed()));
} else if (qName == "T") {
text = base64_decode(qs2utf8s(currentText.trimmed()));
} else if (qName == "T2") {
text2 = base64_decode(qs2utf8s(currentText.trimmed()));
} else if (qName == "S") {
slack = atoi((const char *)currentText.toUtf8());
} else if (qName == "C") {
@ -130,6 +133,9 @@ bool SDHXMLHandler::endElement(const QString & /* namespaceURI */,
} else if (whatclause == "OR") {
c = new SearchDataClauseSimple(SCLT_OR, text, field);
c->setexclude(exclude);
} else if (whatclause == "RG") {
c = new SearchDataClauseRange(text, text2, field);
c->setexclude(exclude);
} else if (whatclause == "EX") {
// Compat with old hist. We don't generete EX (SCLT_EXCL) anymore
// it's replaced with OR + exclude flag

View File

@ -1,8 +1,8 @@
// A Bison parser, made by GNU Bison 3.0.2.
// A Bison parser, made by GNU Bison 3.0.4.
// Locations for Bison parsers in C++
// Copyright (C) 2002-2013 Free Software Foundation, Inc.
// Copyright (C) 2002-2015 Free Software Foundation, Inc.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@ -42,7 +42,7 @@
namespace yy {
#line 46 "location.hh" // location.cc:291
#line 46 "location.hh" // location.cc:296
/// Abstract a location.
class location
{
@ -111,36 +111,42 @@ namespace yy {
position end;
};
/// Join two location objects to create a location.
inline location operator+ (location res, const location& end)
/// Join two locations, in place.
inline location& operator+= (location& res, const location& end)
{
res.end = end.end;
return res;
}
/// Change end position in place.
/// Join two locations.
inline location operator+ (location res, const location& end)
{
return res += end;
}
/// Add \a width columns to the end position, in place.
inline location& operator+= (location& res, int width)
{
res.columns (width);
return res;
}
/// Change end position.
/// Add \a width columns to the end position.
inline location operator+ (location res, int width)
{
return res += width;
}
/// Change end position in place.
/// Subtract \a width columns to the end position, in place.
inline location& operator-= (location& res, int width)
{
return res += -width;
}
/// Change end position.
inline location operator- (const location& begin, int width)
/// Subtract \a width columns to the end position.
inline location operator- (location res, int width)
{
return begin + -width;
return res -= width;
}
/// Compare two location objects.
@ -168,8 +174,7 @@ namespace yy {
operator<< (std::basic_ostream<YYChar>& ostr, const location& loc)
{
unsigned int end_col = 0 < loc.end.column ? loc.end.column - 1 : 0;
ostr << loc.begin// << "(" << loc.end << ") "
;
ostr << loc.begin;
if (loc.end.filename
&& (!loc.begin.filename
|| *loc.begin.filename != *loc.end.filename))
@ -183,5 +188,5 @@ namespace yy {
} // yy
#line 187 "location.hh" // location.cc:291
#line 192 "location.hh" // location.cc:296
#endif // !YY_YY_LOCATION_HH_INCLUDED

View File

@ -1,8 +1,8 @@
// A Bison parser, made by GNU Bison 3.0.2.
// A Bison parser, made by GNU Bison 3.0.4.
// Positions for Bison parsers in C++
// Copyright (C) 2002-2013 Free Software Foundation, Inc.
// Copyright (C) 2002-2015 Free Software Foundation, Inc.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@ -52,7 +52,7 @@
namespace yy {
#line 56 "position.hh" // location.cc:291
#line 56 "position.hh" // location.cc:296
/// Abstract a position.
class position
{
@ -114,7 +114,7 @@ namespace yy {
}
};
/// Add and assign a position.
/// Add \a width columns, in place.
inline position&
operator+= (position& res, int width)
{
@ -122,21 +122,21 @@ namespace yy {
return res;
}
/// Add two position objects.
/// Add \a width columns.
inline position
operator+ (position res, int width)
{
return res += width;
}
/// Add and assign a position.
/// Subtract \a width columns, in place.
inline position&
operator-= (position& res, int width)
{
return res += -width;
}
/// Add two position objects.
/// Subtract \a width columns.
inline position
operator- (position res, int width)
{
@ -176,5 +176,5 @@ namespace yy {
} // yy
#line 180 "position.hh" // location.cc:291
#line 180 "position.hh" // location.cc:296
#endif // !YY_YY_POSITION_HH_INCLUDED

View File

@ -1,8 +1,8 @@
// A Bison parser, made by GNU Bison 3.0.2.
// A Bison parser, made by GNU Bison 3.0.4.
// Stack handling for Bison parsers in C++
// Copyright (C) 2002-2013 Free Software Foundation, Inc.
// Copyright (C) 2002-2015 Free Software Foundation, Inc.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@ -42,7 +42,7 @@
namespace yy {
#line 46 "stack.hh" // stack.hh:133
#line 46 "stack.hh" // stack.hh:132
template <class T, class S = std::vector<T> >
class stack
{
@ -54,12 +54,12 @@ namespace yy {
stack ()
: seq_ ()
{
seq_.reserve (200);
}
stack (unsigned int n)
: seq_ (n)
{
}
{}
inline
T&
@ -136,8 +136,7 @@ namespace yy {
slice (const S& stack, unsigned int range)
: stack_ (stack)
, range_ (range)
{
}
{}
inline
const T&
@ -153,6 +152,6 @@ namespace yy {
} // yy
#line 157 "stack.hh" // stack.hh:133
#line 156 "stack.hh" // stack.hh:132
#endif // !YY_YY_STACK_HH_INCLUDED

View File

@ -1,8 +1,8 @@
// A Bison parser, made by GNU Bison 3.0.2.
// A Bison parser, made by GNU Bison 3.0.4.
// Skeleton implementation for Bison LALR(1) parsers in C++
// Copyright (C) 2002-2013 Free Software Foundation, Inc.
// Copyright (C) 2002-2015 Free Software Foundation, Inc.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@ -32,7 +32,7 @@
// First part of user declarations.
#line 1 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:399
#line 1 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:404
#define YYDEBUG 1
#include "autoconfig.h"
@ -48,7 +48,7 @@
using namespace std;
// #define LOG_PARSER
//#define LOG_PARSER
#ifdef LOG_PARSER
#define LOGP(X) {cerr << X;}
#else
@ -69,7 +69,7 @@ static void addSubQuery(WasaParserDriver *d,
}
#line 73 "y.tab.c" // lalr1.cc:399
#line 73 "y.tab.c" // lalr1.cc:404
# ifndef YY_NULLPTR
# if defined __cplusplus && 201103L <= __cplusplus
@ -83,7 +83,7 @@ static void addSubQuery(WasaParserDriver *d,
// User implementation prologue.
#line 87 "y.tab.c" // lalr1.cc:407
#line 87 "y.tab.c" // lalr1.cc:412
#ifndef YY_
@ -160,7 +160,7 @@ static void addSubQuery(WasaParserDriver *d,
#endif // !YYDEBUG
#define yyerrok (yyerrstatus_ = 0)
#define yyclearin (yyempty = true)
#define yyclearin (yyla.clear ())
#define YYACCEPT goto yyacceptlab
#define YYABORT goto yyabortlab
@ -169,7 +169,7 @@ static void addSubQuery(WasaParserDriver *d,
namespace yy {
#line 173 "y.tab.c" // lalr1.cc:474
#line 173 "y.tab.c" // lalr1.cc:479
/* Return YYSTR after stripping away unnecessary quotes and
backslashes, so that it's suitable for yyerror. The heuristic is
@ -273,6 +273,23 @@ namespace yy {
inline
parser::basic_symbol<Base>::~basic_symbol ()
{
clear ();
}
template <typename Base>
inline
void
parser::basic_symbol<Base>::clear ()
{
Base::clear ();
}
template <typename Base>
inline
bool
parser::basic_symbol<Base>::empty () const
{
return Base::type_get () == empty_symbol;
}
template <typename Base>
@ -288,7 +305,7 @@ namespace yy {
// by_type.
inline
parser::by_type::by_type ()
: type (empty)
: type (empty_symbol)
{}
inline
@ -301,12 +318,19 @@ namespace yy {
: type (yytranslate_ (t))
{}
inline
void
parser::by_type::clear ()
{
type = empty_symbol;
}
inline
void
parser::by_type::move (by_type& that)
{
type = that.type;
that.type = empty;
that.clear ();
}
inline
@ -320,7 +344,7 @@ namespace yy {
// by_state.
inline
parser::by_state::by_state ()
: state (empty)
: state (empty_state)
{}
inline
@ -328,12 +352,19 @@ namespace yy {
: state (other.state)
{}
inline
void
parser::by_state::clear ()
{
state = empty_state;
}
inline
void
parser::by_state::move (by_state& that)
{
state = that.state;
that.state = empty;
that.clear ();
}
inline
@ -345,7 +376,10 @@ namespace yy {
parser::symbol_number_type
parser::by_state::type_get () const
{
return state == empty ? 0 : yystos_[state];
if (state == empty_state)
return empty_symbol;
else
return yystos_[state];
}
inline
@ -359,7 +393,7 @@ namespace yy {
{
value = that.value;
// that is emptied.
that.type = empty;
that.type = empty_symbol;
}
inline
@ -386,30 +420,30 @@ namespace yy {
{
case 3: // WORD
#line 51 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:599
#line 52 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:614
{delete (yysym.value.str);}
#line 392 "y.tab.c" // lalr1.cc:599
#line 426 "y.tab.c" // lalr1.cc:614
break;
case 4: // QUOTED
#line 51 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:599
#line 52 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:614
{delete (yysym.value.str);}
#line 399 "y.tab.c" // lalr1.cc:599
#line 433 "y.tab.c" // lalr1.cc:614
break;
case 5: // QUALIFIERS
#line 51 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:599
#line 52 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:614
{delete (yysym.value.str);}
#line 406 "y.tab.c" // lalr1.cc:599
#line 440 "y.tab.c" // lalr1.cc:614
break;
case 22: // complexfieldname
case 23: // complexfieldname
#line 51 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:599
#line 52 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:614
{delete (yysym.value.str);}
#line 413 "y.tab.c" // lalr1.cc:599
#line 447 "y.tab.c" // lalr1.cc:614
break;
@ -427,6 +461,10 @@ namespace yy {
std::ostream& yyoutput = yyo;
YYUSE (yyoutput);
symbol_number_type yytype = yysym.type_get ();
// Avoid a (spurious) G++ 4.8 warning about "array subscript is
// below array bounds".
if (yysym.empty ())
std::abort ();
yyo << (yytype < yyntokens_ ? "token" : "nterm")
<< ' ' << yytname_[yytype] << " ("
<< yysym.location << ": ";
@ -511,9 +549,6 @@ namespace yy {
int
parser::parse ()
{
/// Whether yyla contains a lookahead.
bool yyempty = true;
// State.
int yyn;
/// Length of the RHS of the rule being reduced.
@ -565,7 +600,7 @@ namespace yy {
goto yydefault;
// Read a lookahead token.
if (yyempty)
if (yyla.empty ())
{
YYCDEBUG << "Reading a token: ";
try
@ -577,7 +612,6 @@ namespace yy {
error (yyexc);
goto yyerrlab1;
}
yyempty = false;
}
YY_SYMBOL_PRINT ("Next token is", yyla);
@ -597,9 +631,6 @@ namespace yy {
goto yyreduce;
}
// Discard the token being shifted.
yyempty = true;
// Count tokens shifted since error; after three, turn off error status.
if (yyerrstatus_)
--yyerrstatus_;
@ -649,7 +680,7 @@ namespace yy {
switch (yyn)
{
case 2:
#line 72 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
#line 74 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
// It's possible that we end up with no query (e.g.: because just a
// date filter was set, no terms). Allocate an empty query so that we
@ -660,11 +691,11 @@ namespace yy {
else
d->m_result = (yystack_[0].value.sd);
}
#line 664 "y.tab.c" // lalr1.cc:847
#line 695 "y.tab.c" // lalr1.cc:859
break;
case 3:
#line 85 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
#line 87 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("q: query query\n");
Rcl::SearchData *sd = 0;
@ -675,11 +706,11 @@ namespace yy {
}
(yylhs.value.sd) = sd;
}
#line 679 "y.tab.c" // lalr1.cc:847
#line 710 "y.tab.c" // lalr1.cc:859
break;
case 4:
#line 96 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
#line 98 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("q: query AND query\n");
Rcl::SearchData *sd = 0;
@ -690,11 +721,11 @@ namespace yy {
}
(yylhs.value.sd) = sd;
}
#line 694 "y.tab.c" // lalr1.cc:847
#line 725 "y.tab.c" // lalr1.cc:859
break;
case 5:
#line 107 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
#line 109 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("query: query OR query\n");
Rcl::SearchData *top = 0;
@ -705,20 +736,20 @@ namespace yy {
}
(yylhs.value.sd) = top;
}
#line 709 "y.tab.c" // lalr1.cc:847
#line 740 "y.tab.c" // lalr1.cc:859
break;
case 6:
#line 118 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
#line 120 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("q: ( query )\n");
(yylhs.value.sd) = (yystack_[1].value.sd);
}
#line 718 "y.tab.c" // lalr1.cc:847
#line 749 "y.tab.c" // lalr1.cc:859
break;
case 7:
#line 124 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
#line 126 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("q: fieldexpr\n");
Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
@ -729,20 +760,20 @@ namespace yy {
(yylhs.value.sd) = 0;
}
}
#line 733 "y.tab.c" // lalr1.cc:847
#line 764 "y.tab.c" // lalr1.cc:859
break;
case 8:
#line 137 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
#line 139 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("fe: simple fieldexpr: " << (yystack_[0].value.cl)->gettext() << endl);
(yylhs.value.cl) = (yystack_[0].value.cl);
}
#line 742 "y.tab.c" // lalr1.cc:847
#line 773 "y.tab.c" // lalr1.cc:859
break;
case 9:
#line 142 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
#line 144 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("fe: " << *(yystack_[2].value.str) << " = " << (yystack_[0].value.cl)->gettext() << endl);
(yystack_[0].value.cl)->setfield(*(yystack_[2].value.str));
@ -750,11 +781,11 @@ namespace yy {
(yylhs.value.cl) = (yystack_[0].value.cl);
delete (yystack_[2].value.str);
}
#line 754 "y.tab.c" // lalr1.cc:847
#line 785 "y.tab.c" // lalr1.cc:859
break;
case 10:
#line 150 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
#line 152 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("fe: " << *(yystack_[2].value.str) << " : " << (yystack_[0].value.cl)->gettext() << endl);
(yystack_[0].value.cl)->setfield(*(yystack_[2].value.str));
@ -762,23 +793,35 @@ namespace yy {
(yylhs.value.cl) = (yystack_[0].value.cl);
delete (yystack_[2].value.str);
}
#line 766 "y.tab.c" // lalr1.cc:847
#line 797 "y.tab.c" // lalr1.cc:859
break;
case 11:
#line 158 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
#line 160 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP(cerr << "fe: " << *(yystack_[2].value.str) << " < " << (yystack_[0].value.cl)->gettext() << endl);
LOGP("fe: " << *(yystack_[2].value.str) << " : " << (yystack_[0].value.rg)->gettext() << endl);
(yystack_[0].value.rg)->setfield(*(yystack_[2].value.str));
(yystack_[0].value.rg)->setrel(Rcl::SearchDataClause::REL_CONTAINS);
(yylhs.value.cl) = (yystack_[0].value.rg);
delete (yystack_[2].value.str);
}
#line 809 "y.tab.c" // lalr1.cc:859
break;
case 12:
#line 168 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("fe: " << *(yystack_[2].value.str) << " < " << (yystack_[0].value.cl)->gettext() << endl);
(yystack_[0].value.cl)->setfield(*(yystack_[2].value.str));
(yystack_[0].value.cl)->setrel(Rcl::SearchDataClause::REL_LT);
(yylhs.value.cl) = (yystack_[0].value.cl);
delete (yystack_[2].value.str);
}
#line 778 "y.tab.c" // lalr1.cc:847
#line 821 "y.tab.c" // lalr1.cc:859
break;
case 12:
#line 166 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
case 13:
#line 176 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("fe: " << *(yystack_[2].value.str) << " <= " << (yystack_[0].value.cl)->gettext() << endl);
(yystack_[0].value.cl)->setfield(*(yystack_[2].value.str));
@ -786,11 +829,11 @@ namespace yy {
(yylhs.value.cl) = (yystack_[0].value.cl);
delete (yystack_[2].value.str);
}
#line 790 "y.tab.c" // lalr1.cc:847
#line 833 "y.tab.c" // lalr1.cc:859
break;
case 13:
#line 174 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
case 14:
#line 184 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("fe: " << *(yystack_[2].value.str) << " > " << (yystack_[0].value.cl)->gettext() << endl);
(yystack_[0].value.cl)->setfield(*(yystack_[2].value.str));
@ -798,11 +841,11 @@ namespace yy {
(yylhs.value.cl) = (yystack_[0].value.cl);
delete (yystack_[2].value.str);
}
#line 802 "y.tab.c" // lalr1.cc:847
#line 845 "y.tab.c" // lalr1.cc:859
break;
case 14:
#line 182 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
case 15:
#line 192 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("fe: " << *(yystack_[2].value.str) << " >= " << (yystack_[0].value.cl)->gettext() << endl);
(yystack_[0].value.cl)->setfield(*(yystack_[2].value.str));
@ -810,69 +853,100 @@ namespace yy {
(yylhs.value.cl) = (yystack_[0].value.cl);
delete (yystack_[2].value.str);
}
#line 814 "y.tab.c" // lalr1.cc:847
#line 857 "y.tab.c" // lalr1.cc:859
break;
case 15:
#line 190 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
case 16:
#line 200 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("fe: - fieldexpr[" << (yystack_[0].value.cl)->gettext() << "]" << endl);
(yystack_[0].value.cl)->setexclude(true);
(yylhs.value.cl) = (yystack_[0].value.cl);
}
#line 824 "y.tab.c" // lalr1.cc:847
#line 867 "y.tab.c" // lalr1.cc:859
break;
case 16:
#line 200 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
case 17:
#line 210 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("cfn: WORD" << endl);
(yylhs.value.str) = (yystack_[0].value.str);
}
#line 833 "y.tab.c" // lalr1.cc:847
#line 876 "y.tab.c" // lalr1.cc:859
break;
case 17:
#line 206 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
case 18:
#line 216 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("cfn: complexfieldname ':' WORD" << endl);
(yylhs.value.str) = new string(*(yystack_[2].value.str) + string(":") + *(yystack_[0].value.str));
delete (yystack_[2].value.str);
delete (yystack_[0].value.str);
}
#line 844 "y.tab.c" // lalr1.cc:847
#line 887 "y.tab.c" // lalr1.cc:859
break;
case 18:
#line 215 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
case 19:
#line 225 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("Range: " << *(yystack_[2].value.str) << string(" .. ") << *(yystack_[0].value.str) << endl);
(yylhs.value.rg) = new Rcl::SearchDataClauseRange(*(yystack_[2].value.str), *(yystack_[0].value.str));
delete (yystack_[2].value.str);
delete (yystack_[0].value.str);
}
#line 898 "y.tab.c" // lalr1.cc:859
break;
case 20:
#line 233 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("Range: " << "" << string(" .. ") << *(yystack_[0].value.str) << endl);
(yylhs.value.rg) = new Rcl::SearchDataClauseRange("", *(yystack_[0].value.str));
delete (yystack_[0].value.str);
}
#line 908 "y.tab.c" // lalr1.cc:859
break;
case 21:
#line 240 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("Range: " << *(yystack_[1].value.str) << string(" .. ") << "" << endl);
(yylhs.value.rg) = new Rcl::SearchDataClauseRange(*(yystack_[1].value.str), "");
delete (yystack_[1].value.str);
}
#line 918 "y.tab.c" // lalr1.cc:859
break;
case 22:
#line 249 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("term[" << *(yystack_[0].value.str) << "]" << endl);
(yylhs.value.cl) = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, *(yystack_[0].value.str));
delete (yystack_[0].value.str);
}
#line 854 "y.tab.c" // lalr1.cc:847
#line 928 "y.tab.c" // lalr1.cc:859
break;
case 19:
#line 221 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
case 23:
#line 255 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
(yylhs.value.cl) = (yystack_[0].value.cl);
}
#line 862 "y.tab.c" // lalr1.cc:847
#line 936 "y.tab.c" // lalr1.cc:859
break;
case 20:
#line 227 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
case 24:
#line 261 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("QUOTED[" << *(yystack_[0].value.str) << "]" << endl);
(yylhs.value.cl) = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, *(yystack_[0].value.str), 0);
delete (yystack_[0].value.str);
}
#line 872 "y.tab.c" // lalr1.cc:847
#line 946 "y.tab.c" // lalr1.cc:859
break;
case 21:
#line 233 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847
case 25:
#line 267 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("QUOTED[" << *(yystack_[1].value.str) << "] QUALIFIERS[" << *(yystack_[0].value.str) << "]" << endl);
Rcl::SearchDataClauseDist *cl =
@ -882,11 +956,11 @@ namespace yy {
delete (yystack_[1].value.str);
delete (yystack_[0].value.str);
}
#line 886 "y.tab.c" // lalr1.cc:847
#line 960 "y.tab.c" // lalr1.cc:859
break;
#line 890 "y.tab.c" // lalr1.cc:847
#line 964 "y.tab.c" // lalr1.cc:859
default:
break;
}
@ -914,8 +988,7 @@ namespace yy {
if (!yyerrstatus_)
{
++yynerrs_;
error (yyla.location, yysyntax_error_ (yystack_[0].state,
yyempty ? yyempty_ : yyla.type_get ()));
error (yyla.location, yysyntax_error_ (yystack_[0].state, yyla));
}
@ -928,10 +1001,10 @@ namespace yy {
// Return failure if at end of input.
if (yyla.type_get () == yyeof_)
YYABORT;
else if (!yyempty)
else if (!yyla.empty ())
{
yy_destroy_ ("Error: discarding", yyla);
yyempty = true;
yyla.clear ();
}
}
@ -1007,7 +1080,7 @@ namespace yy {
goto yyreturn;
yyreturn:
if (!yyempty)
if (!yyla.empty ())
yy_destroy_ ("Cleanup: discarding lookahead", yyla);
/* Do not reclaim the symbols of the rule whose action triggered
@ -1027,7 +1100,7 @@ namespace yy {
<< std::endl;
// Do not try to display the values of the reclaimed symbols,
// as their printer might throw an exception.
if (!yyempty)
if (!yyla.empty ())
yy_destroy_ (YY_NULLPTR, yyla);
while (1 < yystack_.size ())
@ -1047,9 +1120,8 @@ namespace yy {
// Generate an error message.
std::string
parser::yysyntax_error_ (state_type yystate, symbol_number_type yytoken) const
parser::yysyntax_error_ (state_type yystate, const symbol_type& yyla) const
{
std::string yyres;
// Number of reported tokens (one for the "unexpected", one per
// "expected").
size_t yycount = 0;
@ -1063,7 +1135,7 @@ namespace yy {
the only way this function was invoked is if the default action
is an error action. In that case, don't check for expected
tokens because there are none.
- The only way there can be no lookahead present (in yytoken) is
- The only way there can be no lookahead present (in yyla) is
if this state is a consistent state with a default action.
Thus, detecting the absence of a lookahead is sufficient to
determine that there is no unexpected or expected token to
@ -1083,8 +1155,9 @@ namespace yy {
token that will not be accepted due to an error action in a
later state.
*/
if (yytoken != yyempty_)
if (!yyla.empty ())
{
int yytoken = yyla.type_get ();
yyarg[yycount++] = yytname_[yytoken];
int yyn = yypact_[yystate];
if (!yy_pact_value_is_default_ (yyn))
@ -1127,6 +1200,7 @@ namespace yy {
#undef YYCASE_
}
std::string yyres;
// Argument number.
size_t yyi = 0;
for (char const* yyp = yyformat; *yyp; ++yyp)
@ -1143,83 +1217,85 @@ namespace yy {
const signed char parser::yypact_ninf_ = -3;
const signed char parser::yytable_ninf_ = -18;
const signed char parser::yytable_ninf_ = -19;
const signed char
parser::yypact_[] =
{
24, 25, 3, 24, 26, 6, 16, -3, 31, -3,
-3, -3, 1, -3, -3, 24, 24, 4, -2, 9,
-2, -2, -2, -2, -3, 4, -3, -3, -3, 37,
-3, -3, -3, -3, -3
31, 32, 3, 31, 33, 6, 14, -3, 38, -3,
-3, -3, 1, -3, -3, 31, 31, 4, -2, 9,
-2, -2, -2, -2, -3, 4, -3, -3, -3, 16,
18, -3, -3, -3, -3, -3, -3, 22, -3, -3
};
const unsigned char
parser::yydefact_[] =
{
0, 18, 20, 0, 0, 0, 2, 7, 0, 8,
19, 21, 0, 15, 1, 0, 0, 3, 0, 0,
0, 0, 0, 0, 6, 4, 5, 18, 9, 18,
10, 12, 11, 14, 13
0, 22, 24, 0, 0, 0, 2, 7, 0, 8,
23, 25, 0, 16, 1, 0, 0, 3, 0, 0,
0, 0, 0, 0, 6, 4, 5, 22, 9, 22,
0, 11, 10, 13, 12, 15, 14, 21, 20, 19
};
const signed char
parser::yypgoto_[] =
{
-3, -3, 0, 13, -3, 36, -3
-3, -3, 0, 34, -3, -3, 37, -3
};
const signed char
parser::yydefgoto_[] =
{
-1, 5, 17, 7, 8, 9, 10
-1, 5, 17, 7, 8, 31, 9, 10
};
const signed char
parser::yytable_[] =
{
6, 27, 2, 12, 1, 2, 14, 15, 11, 3,
4, 16, 29, 2, 16, 25, 26, 13, 24, 1,
2, 0, 15, 0, 3, 4, 16, 1, 2, 1,
2, 0, 3, 4, 0, 4, -16, -16, -16, -16,
-16, -16, 18, 19, 20, 21, 22, 23, -17, -17,
-17, -17, -17, -17, 28, 30, 31, 32, 33, 34
4, 16, 29, 2, 16, 25, 26, 1, 2, 24,
15, 38, 3, 4, 16, 39, 30, -18, -18, -18,
-18, -18, -18, 37, 1, 2, 1, 2, 13, 3,
4, 0, 4, -17, -17, -17, -17, -17, -17, 18,
19, 20, 21, 22, 23, 28, 32, 33, 34, 35,
36
};
const signed char
parser::yycheck_[] =
{
0, 3, 4, 3, 3, 4, 0, 6, 5, 8,
9, 10, 3, 4, 10, 15, 16, 4, 17, 3,
4, -1, 6, -1, 8, 9, 10, 3, 4, 3,
4, -1, 8, 9, -1, 9, 11, 12, 13, 14,
15, 16, 11, 12, 13, 14, 15, 16, 11, 12,
13, 14, 15, 16, 18, 19, 20, 21, 22, 23
9, 10, 3, 4, 10, 15, 16, 3, 4, 18,
6, 3, 8, 9, 10, 3, 17, 11, 12, 13,
14, 15, 16, 17, 3, 4, 3, 4, 4, 8,
9, -1, 9, 11, 12, 13, 14, 15, 16, 11,
12, 13, 14, 15, 16, 18, 19, 20, 21, 22,
23
};
const unsigned char
parser::yystos_[] =
{
0, 3, 4, 8, 9, 19, 20, 21, 22, 23,
24, 5, 20, 21, 0, 6, 10, 20, 11, 12,
13, 14, 15, 16, 17, 20, 20, 3, 23, 3,
23, 23, 23, 23, 23
0, 3, 4, 8, 9, 20, 21, 22, 23, 25,
26, 5, 21, 22, 0, 6, 10, 21, 11, 12,
13, 14, 15, 16, 18, 21, 21, 3, 25, 3,
17, 24, 25, 25, 25, 25, 25, 17, 3, 3
};
const unsigned char
parser::yyr1_[] =
{
0, 18, 19, 20, 20, 20, 20, 20, 21, 21,
21, 21, 21, 21, 21, 21, 22, 22, 23, 23,
24, 24
0, 19, 20, 21, 21, 21, 21, 21, 22, 22,
22, 22, 22, 22, 22, 22, 22, 23, 23, 24,
24, 24, 25, 25, 26, 26
};
const unsigned char
parser::yyr2_[] =
{
0, 2, 1, 2, 3, 3, 3, 1, 1, 3,
3, 3, 3, 3, 3, 2, 1, 3, 1, 1,
1, 2
3, 3, 3, 3, 3, 3, 2, 1, 3, 3,
2, 2, 1, 1, 1, 2
};
@ -1231,17 +1307,17 @@ namespace yy {
{
"$end", "error", "$undefined", "WORD", "QUOTED", "QUALIFIERS", "AND",
"UCONCAT", "'('", "'-'", "OR", "EQUALS", "CONTAINS", "SMALLEREQ",
"SMALLER", "GREATEREQ", "GREATER", "')'", "$accept", "topquery", "query",
"fieldexpr", "complexfieldname", "term", "qualquote", YY_NULLPTR
"SMALLER", "GREATEREQ", "GREATER", "RANGE", "')'", "$accept", "topquery",
"query", "fieldexpr", "complexfieldname", "range", "term", "qualquote", YY_NULLPTR
};
#if YYDEBUG
const unsigned char
const unsigned short int
parser::yyrline_[] =
{
0, 71, 71, 84, 95, 106, 117, 123, 136, 141,
149, 157, 165, 173, 181, 189, 199, 205, 214, 220,
226, 232
0, 73, 73, 86, 97, 108, 119, 125, 138, 143,
151, 159, 167, 175, 183, 191, 199, 209, 215, 224,
232, 239, 248, 254, 260, 266
};
// Print the state stack on the debug stream.
@ -1286,7 +1362,7 @@ namespace yy {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
8, 17, 2, 2, 2, 9, 2, 2, 2, 2,
8, 18, 2, 2, 2, 9, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@ -1308,9 +1384,10 @@ namespace yy {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
5, 6, 7, 10, 11, 12, 13, 14, 15, 16
5, 6, 7, 10, 11, 12, 13, 14, 15, 16,
17
};
const unsigned int user_token_number_max_ = 269;
const unsigned int user_token_number_max_ = 270;
const token_number_type undef_token_ = 2;
if (static_cast<int>(t) <= yyeof_)
@ -1323,8 +1400,8 @@ namespace yy {
} // yy
#line 1327 "y.tab.c" // lalr1.cc:1155
#line 244 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:1156
#line 1404 "y.tab.c" // lalr1.cc:1167
#line 278 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:1168
#include <ctype.h>
@ -1473,7 +1550,7 @@ int yylex(yy::parser::semantic_type *yylval, yy::parser::location_type *,
return c;
}
// field-term relations
// field-term relations, and ranges
switch (c) {
case '=': return yy::parser::token::EQUALS;
case ':': return yy::parser::token::CONTAINS;
@ -1486,6 +1563,15 @@ int yylex(yy::parser::semantic_type *yylval, yy::parser::location_type *,
return yy::parser::token::SMALLER;
}
}
case '.': {
int c1 = d->GETCHAR();
if (c1 == '.') {
return yy::parser::token::RANGE;
} else {
d->UNGETCHAR(c1);
break;
}
}
case '>': {
int c1 = d->GETCHAR();
if (c1 == '=') {
@ -1514,6 +1600,16 @@ int yylex(yy::parser::semantic_type *yylval, yy::parser::location_type *,
//cerr << "Word broken by special char" << endl;
d->UNGETCHAR(c);
break;
} else if (c == '.') {
int c1 = d->GETCHAR();
if (c1 == '.') {
d->UNGETCHAR(c1);
d->UNGETCHAR(c);
break;
} else {
d->UNGETCHAR(c1);
word->push_back(c);
}
} else if (c == 0) {
//cerr << "Word broken by EOF" << endl;
break;

View File

@ -1,8 +1,8 @@
// A Bison parser, made by GNU Bison 3.0.2.
// A Bison parser, made by GNU Bison 3.0.4.
// Skeleton interface for Bison LALR(1) parsers in C++
// Copyright (C) 2002-2013 Free Software Foundation, Inc.
// Copyright (C) 2002-2015 Free Software Foundation, Inc.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@ -41,10 +41,11 @@
# define YY_YY_Y_TAB_H_INCLUDED
# include <vector>
# include <cstdlib> // std::abort
# include <iostream>
# include <stdexcept>
# include <string>
# include <vector>
# include "stack.hh"
# include "location.hh"
@ -109,7 +110,7 @@
namespace yy {
#line 113 "y.tab.h" // lalr1.cc:372
#line 114 "y.tab.h" // lalr1.cc:377
@ -123,13 +124,14 @@ namespace yy {
/// Symbol semantic values.
union semantic_type
{
#line 46 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:372
#line 46 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:377
std::string *str;
Rcl::SearchDataClauseRange *rg;
Rcl::SearchDataClauseSimple *cl;
Rcl::SearchData *sd;
#line 133 "y.tab.h" // lalr1.cc:372
#line 135 "y.tab.h" // lalr1.cc:377
};
#else
typedef YYSTYPE semantic_type;
@ -160,16 +162,20 @@ namespace yy {
SMALLEREQ = 266,
SMALLER = 267,
GREATEREQ = 268,
GREATER = 269
GREATER = 269,
RANGE = 270
};
};
/// (External) token type, as returned by yylex.
typedef token::yytokentype token_type;
/// Internal symbol number.
/// Symbol type: an internal symbol number.
typedef int symbol_number_type;
/// The symbol type number to denote an empty symbol.
enum { empty_symbol = -2 };
/// Internal symbol number for tokens (subsumed by symbol_number_type).
typedef unsigned char token_number_type;
@ -200,8 +206,15 @@ namespace yy {
const semantic_type& v,
const location_type& l);
/// Destroy the symbol.
~basic_symbol ();
/// Destroy contents, and record that is empty.
void clear ();
/// Whether empty.
bool empty () const;
/// Destructive move, \a s is emptied into this.
void move (basic_symbol& s);
@ -231,21 +244,23 @@ namespace yy {
/// Constructor from (external) token numbers.
by_type (kind_type t);
/// Record that this symbol is empty.
void clear ();
/// Steal the symbol type from \a that.
void move (by_type& that);
/// The (internal) type number (corresponding to \a type).
/// -1 when this symbol is empty.
/// \a empty when empty.
symbol_number_type type_get () const;
/// The token.
token_type token () const;
enum { empty = 0 };
/// The symbol type.
/// -1 when this symbol is empty.
token_number_type type;
/// \a empty_symbol when empty.
/// An int, not token_number_type, to be able to store empty_symbol.
int type;
};
/// "External" symbols: returned by the scanner.
@ -292,9 +307,9 @@ namespace yy {
/// Generate an error message.
/// \param yystate the state where the error occurred.
/// \param yytoken the lookahead token type, or yyempty_.
/// \param yyla the lookahead token.
virtual std::string yysyntax_error_ (state_type yystate,
symbol_number_type yytoken) const;
const symbol_type& yyla) const;
/// Compute post-reduction state.
/// \param yystate the current state
@ -357,7 +372,7 @@ namespace yy {
static const char* const yytname_[];
#if YYDEBUG
// YYRLINE[YYN] -- Source line where rule number YYN was defined.
static const unsigned char yyrline_[];
static const unsigned short int yyrline_[];
/// Report on the debug stream that the rule \a r is going to be reduced.
virtual void yy_reduce_print_ (int r);
/// Print the state stack on the debug stream.
@ -397,16 +412,21 @@ namespace yy {
/// Copy constructor.
by_state (const by_state& other);
/// Record that this symbol is empty.
void clear ();
/// Steal the symbol type from \a that.
void move (by_state& that);
/// The (internal) type number (corresponding to \a state).
/// "empty" when empty.
/// \a empty_symbol when empty.
symbol_number_type type_get () const;
enum { empty = 0 };
/// The state number used to denote an empty symbol.
enum { empty_state = -1 };
/// The state.
/// \a empty when empty.
state_type state;
};
@ -447,17 +467,16 @@ namespace yy {
/// Pop \a n symbols the three stacks.
void yypop_ (unsigned int n = 1);
// Constants.
/// Constants.
enum
{
yyeof_ = 0,
yylast_ = 59, ///< Last index in yytable_.
yynnts_ = 7, ///< Number of nonterminal symbols.
yyempty_ = -2,
yylast_ = 60, ///< Last index in yytable_.
yynnts_ = 8, ///< Number of nonterminal symbols.
yyfinal_ = 14, ///< Termination state number.
yyterror_ = 1,
yyerrcode_ = 256,
yyntokens_ = 18 ///< Number of tokens.
yyntokens_ = 19 ///< Number of tokens.
};
@ -468,7 +487,7 @@ namespace yy {
} // yy
#line 472 "y.tab.h" // lalr1.cc:372
#line 491 "y.tab.h" // lalr1.cc:377

View File

@ -13,7 +13,7 @@
using namespace std;
// #define LOG_PARSER
//#define LOG_PARSER
#ifdef LOG_PARSER
#define LOGP(X) {cerr << X;}
#else
@ -45,6 +45,7 @@ static void addSubQuery(WasaParserDriver *d,
%union {
std::string *str;
Rcl::SearchDataClauseRange *rg;
Rcl::SearchDataClauseSimple *cl;
Rcl::SearchData *sd;
}
@ -52,6 +53,7 @@ static void addSubQuery(WasaParserDriver *d,
%type <cl> qualquote
%type <cl> fieldexpr
%type <rg> range
%type <cl> term
%type <sd> query
%type <str> complexfieldname
@ -64,7 +66,7 @@ static void addSubQuery(WasaParserDriver *d,
%left AND UCONCAT '(' '-'
%left OR
%token EQUALS CONTAINS SMALLEREQ SMALLER GREATEREQ GREATER
%token EQUALS CONTAINS SMALLEREQ SMALLER GREATEREQ GREATER RANGE
%%
@ -154,9 +156,17 @@ fieldexpr: term
$$ = $3;
delete $1;
}
| complexfieldname CONTAINS range
{
LOGP("fe: " << *$1 << " : " << $3->gettext() << endl);
$3->setfield(*$1);
$3->setrel(Rcl::SearchDataClause::REL_CONTAINS);
$$ = $3;
delete $1;
}
| complexfieldname SMALLER term
{
LOGP(cerr << "fe: " << *$1 << " < " << $3->gettext() << endl);
LOGP("fe: " << *$1 << " < " << $3->gettext() << endl);
$3->setfield(*$1);
$3->setrel(Rcl::SearchDataClause::REL_LT);
$$ = $3;
@ -210,6 +220,30 @@ complexfieldname CONTAINS WORD
delete $3;
}
range:
WORD RANGE WORD
{
LOGP("Range: " << *$1 << string(" .. ") << *$3 << endl);
$$ = new Rcl::SearchDataClauseRange(*$1, *$3);
delete $1;
delete $3;
}
|
RANGE WORD
{
LOGP("Range: " << "" << string(" .. ") << *$2 << endl);
$$ = new Rcl::SearchDataClauseRange("", *$2);
delete $2;
}
|
WORD RANGE
{
LOGP("Range: " << *$1 << string(" .. ") << "" << endl);
$$ = new Rcl::SearchDataClauseRange(*$1, "");
delete $1;
}
;
term:
WORD
{
@ -389,7 +423,7 @@ int yylex(yy::parser::semantic_type *yylval, yy::parser::location_type *,
return c;
}
// field-term relations
// field-term relations, and ranges
switch (c) {
case '=': return yy::parser::token::EQUALS;
case ':': return yy::parser::token::CONTAINS;
@ -402,6 +436,15 @@ int yylex(yy::parser::semantic_type *yylval, yy::parser::location_type *,
return yy::parser::token::SMALLER;
}
}
case '.': {
int c1 = d->GETCHAR();
if (c1 == '.') {
return yy::parser::token::RANGE;
} else {
d->UNGETCHAR(c1);
break;
}
}
case '>': {
int c1 = d->GETCHAR();
if (c1 == '=') {
@ -430,6 +473,16 @@ int yylex(yy::parser::semantic_type *yylval, yy::parser::location_type *,
//cerr << "Word broken by special char" << endl;
d->UNGETCHAR(c);
break;
} else if (c == '.') {
int c1 = d->GETCHAR();
if (c1 == '.') {
d->UNGETCHAR(c1);
d->UNGETCHAR(c);
break;
} else {
d->UNGETCHAR(c1);
word->push_back(c);
}
} else if (c == 0) {
//cerr << "Word broken by EOF" << endl;
break;

View File

@ -182,7 +182,8 @@ bool WasaParserDriver::addClause(SearchData *sd,
delete cl;
return false;
}
LOGDEB("addClause:: date span: " << (di.y1) << "-" << (di.m1) << "-" << (di.d1) << "/" << (di.y2) << "-" << (di.m2) << "-" << (di.d2) << "\n" );
LOGDEB("addClause:: date span: " << di.y1 << "-" << di.m1 << "-"
<< di.d1 << "/" << di.y2 << "-" << di.m2 << "-" << di.d2 << "\n");
m_haveDates = true;
m_dates = di;
delete cl;

View File

@ -50,6 +50,7 @@ using namespace std;
#include "searchdata.h"
#include "rclquery.h"
#include "rclquery_p.h"
#include "rclvalues.h"
#include "md5ut.h"
#include "rclversion.h"
#include "cancelcheck.h"
@ -1489,7 +1490,8 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
for (vector<string>::iterator it = vpath.begin();
it != vpath.end(); it++){
if (it->length() > 230) {
// Just truncate it. May still be useful because of wildcards
// Just truncate it. May still be useful because
// of wildcards
*it = it->substr(0, 230);
}
newdocument.add_posting(wrap_prefix(pathelt_prefix) + *it,
@ -1504,26 +1506,36 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
//
// The order has no importance, and we set a position gap of 100
// between fields to avoid false proximity matches.
map<string, string>::iterator meta_it;
for (meta_it = doc.meta.begin(); meta_it != doc.meta.end(); meta_it++) {
if (!meta_it->second.empty()) {
const FieldTraits *ftp;
// We don't test for an empty prefix here. Some fields are part
// of the internal conf with an empty prefix (ie: abstract).
if (!fieldToTraits(meta_it->first, &ftp)) {
LOGDEB0("Db::add: no prefix for field [" <<
meta_it->first << "], no indexing\n");
continue;
}
LOGDEB0("Db::add: field [" << meta_it->first << "] pfx [" <<
for (const auto& entry: doc.meta) {
if (entry.second.empty()) {
continue;
}
const FieldTraits *ftp{nullptr};
fieldToTraits(entry.first, &ftp);
if (ftp && ftp->valueslot) {
LOGDEB("Adding value: for field " << entry.first << " slot "
<< ftp->valueslot << endl);
add_field_value(newdocument, *ftp, entry.second);
}
// There was an old comment here about not testing for
// empty prefix, and we indeed did not test. I don't think
// that it makes sense any more (and was in disagreement
// with the LOG message. Really now: no prefix: no
// indexing.
if (ftp && !ftp->pfx.empty()) {
LOGDEB0("Db::add: field [" << entry.first << "] pfx [" <<
ftp->pfx << "] inc " << ftp->wdfinc << ": [" <<
meta_it->second << "]\n");
entry.second << "]\n");
splitter.setTraits(*ftp);
if (!splitter.text_to_words(meta_it->second)) {
LOGDEB("Db::addOrUpdate: split failed for " <<
meta_it->first << "\n");
if (!splitter.text_to_words(entry.second)) {
LOGDEB("Db::addOrUpdate: split failed for " <<
entry.first << "\n");
}
}
} else {
LOGDEB0("Db::add: no prefix for field [" <<
entry.first << "], no indexing\n");
}
}
// Reset to no prefix and default params
@ -1578,8 +1590,8 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
if (doc.getmeta(Doc::keyfn, &utf8fn) && !utf8fn.empty()) {
string fn;
if (unacmaybefold(utf8fn, fn, "UTF-8", UNACOP_UNACFOLD)) {
// We should truncate after extracting the extension, but this is
// a pathological case anyway
// We should truncate after extracting the extension,
// but this is a pathological case anyway
if (fn.size() > 230)
utf8truncate(fn, 230);
string::size_type pos = fn.rfind('.');
@ -1587,7 +1599,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
newdocument.add_boolean_term(wrap_prefix(fileext_prefix) +
fn.substr(pos + 1));
}
newdocument.add_term(wrap_prefix(unsplitfilename_prefix) + fn, 0);
newdocument.add_term(wrap_prefix(unsplitfilename_prefix) + fn,0);
}
}

View File

@ -197,7 +197,7 @@ class Db::Native {
std::string rawtextMetaKey(Xapian::docid did) {
// Xapian's Olly Betts avises to use a key which will
// sort the same as the docid (which we do), and to
// use Xapian's pack_uint_preserving_sort() which is
// use Xapian's pack.h:pack_uint_preserving_sort() which is
// efficient but hard to read. I'd wager that this
// does not make much of a difference. 10 ascii bytes
// gives us 10 billion docs, which is enough (says I).

87
src/rcldb/rclvalues.cpp Normal file
View File

@ -0,0 +1,87 @@
/* Copyright (C) 2004-2018 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "autoconfig.h"
#include <string>
#include "xapian.h"
#include "rclconfig.h"
#include "smallut.h"
#include "log.h"
using namespace std;
namespace Rcl {
void add_field_value(Xapian::Document& xdoc, const FieldTraits& ft,
const string& data)
{
string ndata{data};
switch (ft.valuetype) {
case FieldTraits::STR:
break;
case FieldTraits::INT:
{
int len = ft.valuelen ? ft.valuelen : 10;
leftzeropad(ndata, len);
}
}
LOGDEB0("Rcl::add_field_value: slot " << ft.valueslot << " [" <<
ndata << "]\n");
xdoc.add_value(ft.valueslot, ndata);
}
string convert_field_value(const FieldTraits& ft,
const string& data)
{
string ndata(data);
switch (ft.valuetype) {
case FieldTraits::STR:
break;
case FieldTraits::INT:
{
if (ndata.empty())
break;
// Apply suffixes
char c = ndata.back();
string zeroes;
switch(c) {
case 'k':case 'K': zeroes = "000";break;
case 'm':case 'M': zeroes = "000000";break;
case 'g':case 'G': zeroes = "000000000";break;
case 't':case 'T': zeroes = "000000000000";break;
default: break;
}
if (!zeroes.empty()) {
ndata.pop_back();
ndata += zeroes;
}
int len = ft.valuelen ? ft.valuelen : 10;
leftzeropad(ndata, len);
}
}
return ndata;
}
}

29
src/rcldb/rclvalues.h Normal file
View File

@ -0,0 +1,29 @@
#ifndef _RCLVALUES_H_INCLUDED_
#define _RCLVALUES_H_INCLUDED_
/* Copyright (C) 2004-2018 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include <string>
namespace Rcl {
extern void add_field_value(Xapian::Document& xdoc, const FieldTraits& ft,
const std::string& data);
extern std::string convert_field_value(const FieldTraits& ft,
const std::string& data);
}
#endif /* _RCLVALUES_H_INCLUDED_ */

View File

@ -348,6 +348,14 @@ void SearchDataClausePath::dump(ostream& o) const
o << "[" << m_text << "]";
}
void SearchDataClauseRange::dump(ostream& o) const
{
o << "ClauseRange: ";
if (m_exclude)
o << " - ";
o << "[" << gettext() << "]";
}
void SearchDataClauseDist::dump(ostream& o) const
{
if (m_tp == SCLT_NEAR)

View File

@ -41,9 +41,8 @@ namespace Rcl {
/** Search clause types */
enum SClType {
SCLT_AND,
SCLT_OR, SCLT_FILENAME, SCLT_PHRASE, SCLT_NEAR, SCLT_PATH,
SCLT_SUB
SCLT_AND, SCLT_OR, SCLT_FILENAME, SCLT_PHRASE, SCLT_NEAR,
SCLT_PATH, SCLT_RANGE, SCLT_SUB,
};
class SearchDataClause;
@ -79,16 +78,14 @@ class SearchDataClauseDist;
class SearchData {
public:
SearchData(SClType tp, const string& stemlang)
: m_tp(tp), m_stemlang(stemlang)
{
if (m_tp != SCLT_OR && m_tp != SCLT_AND)
m_tp = SCLT_OR;
commoninit();
: m_tp(tp), m_stemlang(stemlang) {
if (m_tp != SCLT_OR && m_tp != SCLT_AND)
m_tp = SCLT_OR;
commoninit();
}
SearchData()
: m_tp(SCLT_AND)
{
commoninit();
: m_tp(SCLT_AND) {
commoninit();
}
~SearchData();
@ -142,21 +139,19 @@ public:
void setDescription(const std::string& d) {m_description = d;}
/** Return an XML version of the contents, for storage in search history
by the GUI */
by the GUI */
string asXML();
void setTp(SClType tp)
{
m_tp = tp;
void setTp(SClType tp) {
m_tp = tp;
}
SClType getTp() {
return m_tp;
}
void setMaxExpand(int max)
{
m_softmaxexpand = max;
void setMaxExpand(int max) {
m_softmaxexpand = max;
}
bool getAutoDiac() {return m_autodiacsens;}
bool getAutoCase() {return m_autocasesens;}
@ -217,8 +212,8 @@ private:
bool expandFileTypes(Rcl::Db &db, std::vector<std::string>& exptps);
bool clausesToQuery(Rcl::Db &db, SClType tp,
std::vector<SearchDataClause*>& query,
string& reason, void *d);
std::vector<SearchDataClause*>& query,
string& reason, void *d);
void commoninit();
/* Copyconst and assignment private and forbidden */
@ -229,9 +224,9 @@ private:
class SearchDataClause {
public:
enum Modifier {SDCM_NONE=0, SDCM_NOSTEMMING=0x1, SDCM_ANCHORSTART=0x2,
SDCM_ANCHOREND=0x4, SDCM_CASESENS=0x8, SDCM_DIACSENS=0x10,
SDCM_NOTERMS=0x20, // Don't include terms for highlighting
SDCM_NOSYNS = 0x40, // Don't perform synonym expansion
SDCM_ANCHOREND=0x4, SDCM_CASESENS=0x8, SDCM_DIACSENS=0x10,
SDCM_NOTERMS=0x20, // Don't include terms for highlighting
SDCM_NOSYNS = 0x40, // Don't perform synonym expansion
// Aargh special case. pathelts are case/diac-sensitive
// even in a stripped index
SDCM_PATHELT = 0x80,
@ -239,70 +234,57 @@ public:
enum Relation {REL_CONTAINS, REL_EQUALS, REL_LT, REL_LTE, REL_GT, REL_GTE};
SearchDataClause(SClType tp)
: m_tp(tp), m_parentSearch(0), m_haveWildCards(0),
m_modifiers(SDCM_NONE), m_weight(1.0), m_exclude(false),
m_rel(REL_CONTAINS)
{}
: m_tp(tp), m_parentSearch(0), m_haveWildCards(0),
m_modifiers(SDCM_NONE), m_weight(1.0), m_exclude(false),
m_rel(REL_CONTAINS) {}
virtual ~SearchDataClause() {}
virtual bool toNativeQuery(Rcl::Db &db, void *) = 0;
bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;}
virtual std::string getReason() const {return m_reason;}
virtual void getTerms(HighlightData&) const {}
SClType getTp() const
{
return m_tp;
SClType getTp() const {
return m_tp;
}
void setTp(SClType tp) {
m_tp = tp;
}
void setParent(SearchData *p)
{
m_parentSearch = p;
void setParent(SearchData *p) {
m_parentSearch = p;
}
string getStemLang()
{
return (m_modifiers & SDCM_NOSTEMMING) || m_parentSearch == 0 ?
cstr_null : m_parentSearch->getStemLang();
string getStemLang() {
return (m_modifiers & SDCM_NOSTEMMING) || m_parentSearch == 0 ?
cstr_null : m_parentSearch->getStemLang();
}
bool getAutoDiac()
{
return m_parentSearch ? m_parentSearch->getAutoDiac() : false;
bool getAutoDiac() {
return m_parentSearch ? m_parentSearch->getAutoDiac() : false;
}
bool getAutoCase()
{
return m_parentSearch ? m_parentSearch->getAutoCase() : true;
bool getAutoCase() {
return m_parentSearch ? m_parentSearch->getAutoCase() : true;
}
int getMaxExp()
{
return m_parentSearch ? m_parentSearch->getMaxExp() : 10000;
int getMaxExp() {
return m_parentSearch ? m_parentSearch->getMaxExp() : 10000;
}
size_t getMaxCl()
{
return m_parentSearch ? m_parentSearch->getMaxCl() : 100000;
size_t getMaxCl() {
return m_parentSearch ? m_parentSearch->getMaxCl() : 100000;
}
int getSoftMaxExp()
{
return m_parentSearch ? m_parentSearch->getSoftMaxExp() : -1;
int getSoftMaxExp() {
return m_parentSearch ? m_parentSearch->getSoftMaxExp() : -1;
}
virtual void addModifier(Modifier mod)
{
m_modifiers = m_modifiers | mod;
virtual void addModifier(Modifier mod) {
m_modifiers = m_modifiers | mod;
}
virtual unsigned int getmodifiers() {
return m_modifiers;
return m_modifiers;
}
virtual void setWeight(float w)
{
m_weight = w;
virtual void setWeight(float w) {
m_weight = w;
}
virtual bool getexclude() const
{
return m_exclude;
virtual bool getexclude() const {
return m_exclude;
}
virtual void setexclude(bool onoff)
{
m_exclude = onoff;
virtual void setexclude(bool onoff) {
m_exclude = onoff;
}
virtual void setrel(Relation rel) {
m_rel = rel;
@ -322,15 +304,6 @@ protected:
float m_weight;
bool m_exclude;
Relation m_rel;
private:
SearchDataClause(const SearchDataClause&)
{
}
SearchDataClause& operator=(const SearchDataClause&)
{
return *this;
}
};
/**
@ -341,37 +314,30 @@ class TermProcQ;
class SearchDataClauseSimple : public SearchDataClause {
public:
SearchDataClauseSimple(SClType tp, const std::string& txt,
const std::string& fld = std::string())
: SearchDataClause(tp), m_text(txt), m_field(fld), m_curcl(0)
{
m_haveWildCards =
(txt.find_first_of(cstr_minwilds) != std::string::npos);
const std::string& fld = std::string())
: SearchDataClause(tp), m_text(txt), m_field(fld), m_curcl(0) {
m_haveWildCards =
(txt.find_first_of(cstr_minwilds) != std::string::npos);
}
SearchDataClauseSimple(const std::string& txt, SClType tp)
: SearchDataClause(tp), m_text(txt), m_curcl(0)
{
m_haveWildCards =
(txt.find_first_of(cstr_minwilds) != std::string::npos);
: SearchDataClause(tp), m_text(txt), m_curcl(0) {
m_haveWildCards =
(txt.find_first_of(cstr_minwilds) != std::string::npos);
}
virtual ~SearchDataClauseSimple()
{
}
virtual ~SearchDataClauseSimple() {}
/** Translate to Xapian query */
virtual bool toNativeQuery(Rcl::Db &, void *);
virtual void getTerms(HighlightData& hldata) const
{
hldata.append(m_hldata);
virtual void getTerms(HighlightData& hldata) const {
hldata.append(m_hldata);
}
virtual const std::string& gettext()
{
return m_text;
virtual const std::string& gettext() const {
return m_text;
}
virtual const std::string& getfield()
{
return m_field;
virtual const std::string& getfield() const {
return m_field;
}
virtual void setfield(const string& field) {
m_field = field;
@ -384,22 +350,49 @@ protected:
HighlightData m_hldata;
// Current count of Xapian clauses, to check against expansion limit
size_t m_curcl;
bool processUserString(Rcl::Db &db, const string &iq,
std::string &ermsg,
void* pq, int slack = 0, bool useNear = false);
std::string &ermsg,
void* pq, int slack = 0, bool useNear = false);
bool expandTerm(Rcl::Db &db, std::string& ermsg, int mods,
const std::string& term,
std::vector<std::string>& exp,
const std::string& term,
std::vector<std::string>& exp,
std::string& sterm, const std::string& prefix,
std::vector<std::string>* multiwords = 0);
std::vector<std::string>* multiwords = 0);
// After splitting entry on whitespace: process non-phrase element
void processSimpleSpan(Rcl::Db &db, string& ermsg, const string& span,
int mods, void *pq);
int mods, void *pq);
// Process phrase/near element
void processPhraseOrNear(Rcl::Db &db, string& ermsg, TermProcQ *splitData,
int mods, void *pq, bool useNear, int slack);
int mods, void *pq, bool useNear, int slack);
};
class SearchDataClauseRange : public SearchDataClauseSimple {
public:
SearchDataClauseRange(const std::string& t1, const std::string& t2,
const std::string& fld = std::string())
: SearchDataClauseSimple(SCLT_RANGE, t1, fld), m_t2(t2) {}
// This is for 'upgrading' a clauseSimple with eq/gt/lt... rel to
// a range. Either of t1 or t2 or both can be set to the original
// text, which is why they are passed as separate parameters
SearchDataClauseRange(const SearchDataClauseSimple& cl,
const std::string& t1, const std::string& t2)
: SearchDataClauseSimple(cl) {
m_text = t1;
m_t2 = t2;
}
virtual ~SearchDataClauseRange() {}
virtual void dump(ostream& o) const;
virtual const std::string& gettext2() const {
return m_t2;
}
virtual bool toNativeQuery(Rcl::Db &db, void *);
protected:
std::string m_t2;
};
/**
* Filename search clause. This is special because term expansion is only
@ -412,15 +405,12 @@ protected:
class SearchDataClauseFilename : public SearchDataClauseSimple {
public:
SearchDataClauseFilename(const std::string& txt)
: SearchDataClauseSimple(txt, SCLT_FILENAME)
{
// File name searches don't count when looking for wild cards.
m_haveWildCards = false;
: SearchDataClauseSimple(txt, SCLT_FILENAME) {
// File name searches don't count when looking for wild cards.
m_haveWildCards = false;
}
virtual ~SearchDataClauseFilename()
{
}
virtual ~SearchDataClauseFilename() {}
virtual bool toNativeQuery(Rcl::Db &, void *);
virtual void dump(ostream& o) const;
@ -450,15 +440,12 @@ public:
class SearchDataClausePath : public SearchDataClauseSimple {
public:
SearchDataClausePath(const std::string& txt, bool excl = false)
: SearchDataClauseSimple(SCLT_PATH, txt, "dir")
{
m_exclude = excl;
m_haveWildCards = false;
: SearchDataClauseSimple(SCLT_PATH, txt, "dir") {
m_exclude = excl;
m_haveWildCards = false;
}
virtual ~SearchDataClausePath()
{
}
virtual ~SearchDataClausePath() {}
virtual bool toNativeQuery(Rcl::Db &, void *);
virtual void dump(ostream& o) const;
@ -471,19 +458,14 @@ public:
class SearchDataClauseDist : public SearchDataClauseSimple {
public:
SearchDataClauseDist(SClType tp, const std::string& txt, int slack,
const std::string& fld = std::string())
: SearchDataClauseSimple(tp, txt, fld), m_slack(slack)
{
}
const std::string& fld = std::string())
: SearchDataClauseSimple(tp, txt, fld), m_slack(slack) {}
virtual ~SearchDataClauseDist()
{
}
virtual ~SearchDataClauseDist() {}
virtual bool toNativeQuery(Rcl::Db &, void *);
virtual int getslack() const
{
return m_slack;
virtual int getslack() const {
return m_slack;
}
virtual void setslack(int slack) {
m_slack = slack;
@ -497,20 +479,16 @@ private:
class SearchDataClauseSub : public SearchDataClause {
public:
SearchDataClauseSub(std::shared_ptr<SearchData> sub)
: SearchDataClause(SCLT_SUB), m_sub(sub)
{
}
virtual bool toNativeQuery(Rcl::Db &db, void *p)
{
bool ret = m_sub->toNativeQuery(db, p);
if (!ret)
m_reason = m_sub->getReason();
return ret;
: SearchDataClause(SCLT_SUB), m_sub(sub) {}
virtual bool toNativeQuery(Rcl::Db &db, void *p) {
bool ret = m_sub->toNativeQuery(db, p);
if (!ret)
m_reason = m_sub->getReason();
return ret;
}
virtual void getTerms(HighlightData& hldata) const
{
m_sub.get()->getTerms(hldata);
virtual void getTerms(HighlightData& hldata) const {
m_sub.get()->getTerms(hldata);
}
virtual std::shared_ptr<SearchData> getSub() {
return m_sub;

View File

@ -47,11 +47,10 @@ using namespace std;
#include "expansiondbs.h"
#include "base64.h"
#include "daterange.h"
#include "rclvalues.h"
namespace Rcl {
typedef vector<SearchDataClause *>::iterator qlist_it_t;
static const int original_term_wqf_booster = 10;
// Expand doc categories and mime type wild card expressions
@ -62,7 +61,7 @@ bool SearchData::expandFileTypes(Db &db, vector<string>& tps)
{
const RclConfig *cfg = db.getConf();
if (!cfg) {
LOGFATAL("Db::expandFileTypes: null configuration!!\n" );
LOGFATAL("Db::expandFileTypes: null configuration!!\n");
return false;
}
vector<string> exptps;
@ -110,15 +109,16 @@ bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
string& reason, void *d)
{
Xapian::Query xq;
for (qlist_it_t it = query.begin(); it != query.end(); it++) {
for (auto& clausep : query) {
Xapian::Query nq;
if (!(*it)->toNativeQuery(db, &nq)) {
LOGERR("SearchData::clausesToQuery: toNativeQuery failed: " << ((*it)->getReason()) << "\n" );
reason += (*it)->getReason() + " ";
if (!clausep->toNativeQuery(db, &nq)) {
LOGERR("SearchData::clausesToQuery: toNativeQuery failed: "
<< clausep->getReason() << "\n");
reason += clausep->getReason() + " ";
return false;
}
if (nq.empty()) {
LOGDEB("SearchData::clausesToQuery: skipping empty clause\n" );
LOGDEB("SearchData::clausesToQuery: skipping empty clause\n");
continue;
}
// If this structure is an AND list, must use AND_NOT for excl clauses.
@ -126,7 +126,7 @@ bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
// addClause())
Xapian::Query::op op;
if (tp == SCLT_AND) {
if ((*it)->getexclude()) {
if (clausep->getexclude()) {
op = Xapian::Query::OP_AND_NOT;
} else {
op = Xapian::Query::OP_AND;
@ -143,7 +143,7 @@ bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
xq = Xapian::Query(op, xq, nq);
}
if (int(xq.get_length()) >= getMaxCl()) {
LOGERR("" << (maxXapClauseMsg) << "\n" );
LOGERR("" << maxXapClauseMsg << "\n");
m_reason += maxXapClauseMsg;
if (!o_index_stripchars)
m_reason += maxXapClauseCaseDiacMsg;
@ -151,7 +151,7 @@ bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
}
}
LOGDEB0("SearchData::clausesToQuery: got " << (xq.get_length()) << " clauses\n" );
LOGDEB0("SearchData::clausesToQuery: got " << xq.get_length()<<" clauses\n");
if (xq.empty())
xq = Xapian::Query::MatchAll;
@ -162,7 +162,7 @@ bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
{
LOGDEB("SearchData::toNativeQuery: stemlang [" << (m_stemlang) << "]\n" );
LOGDEB("SearchData::toNativeQuery: stemlang [" << m_stemlang << "]\n");
m_reason.erase();
db.getConf()->getConfParam("maxTermExpand", &m_maxexp);
@ -174,7 +174,8 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
// Xapian query tree
Xapian::Query xq;
if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq)) {
LOGERR("SearchData::toNativeQuery: clausesToQuery failed. reason: " << (m_reason) << "\n" );
LOGERR("SearchData::toNativeQuery: clausesToQuery failed. reason: "
<< m_reason << "\n");
return false;
}
@ -183,7 +184,7 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
if (m_dates.y1 == 0 || m_dates.y2 == 0) {
int minyear = 1970, maxyear = 2100;
if (!db.maxYearSpan(&minyear, &maxyear)) {
LOGERR("Can't retrieve index min/max dates\n" );
LOGERR("Can't retrieve index min/max dates\n");
//whatever, go on.
}
@ -198,16 +199,18 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
m_dates.d2 = 31;
}
}
LOGDEB("Db::toNativeQuery: date interval: " << (m_dates.y1) << "-" << (m_dates.m1) << "-" << (m_dates.d1) << "/" << (m_dates.y2) << "-" << (m_dates.m2) << "-" << (m_dates.d2) << "\n" );
LOGDEB("Db::toNativeQuery: date interval: " << m_dates.y1 <<
"-" << m_dates.m1 << "-" << m_dates.d1 << "/" <<
m_dates.y2 << "-" << m_dates.m2 << "-" << m_dates.d2 << "\n");
Xapian::Query dq = date_range_filter(m_dates.y1, m_dates.m1, m_dates.d1,
m_dates.y2, m_dates.m2, m_dates.d2);
if (dq.empty()) {
LOGINFO("Db::toNativeQuery: date filter is empty\n" );
LOGINFO("Db::toNativeQuery: date filter is empty\n");
}
// If no probabilistic query is provided then promote the daterange
// filter to be THE query instead of filtering an empty query.
if (xq.empty()) {
LOGINFO("Db::toNativeQuery: proba query is empty\n" );
LOGINFO("Db::toNativeQuery: proba query is empty\n");
xq = dq;
} else {
xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, dq);
@ -239,7 +242,7 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
// If no probabilistic query is provided then promote the
// filter to be THE query instead of filtering an empty query.
if (xq.empty()) {
LOGINFO("Db::toNativeQuery: proba query is empty\n" );
LOGINFO("Db::toNativeQuery: proba query is empty\n");
xq = sq;
} else {
xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, sq);
@ -263,7 +266,7 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
for (vector<string>::iterator it = m_filetypes.begin();
it != m_filetypes.end(); it++) {
string term = wrap_prefix(mimetype_prefix) + *it;
LOGDEB0("Adding file type term: [" << (term) << "]\n" );
LOGDEB0("Adding file type term: [" << term << "]\n");
tq = tq.empty() ? Xapian::Query(term) :
Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
}
@ -278,7 +281,7 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
for (vector<string>::iterator it = m_nfiletypes.begin();
it != m_nfiletypes.end(); it++) {
string term = wrap_prefix(mimetype_prefix) + *it;
LOGDEB0("Adding negative file type term: [" << (term) << "]\n" );
LOGDEB0("Adding negative file type term: [" << term << "]\n");
tq = tq.empty() ? Xapian::Query(term) :
Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
}
@ -333,7 +336,8 @@ public:
if (m_lastpos < pos)
m_lastpos = pos;
bool noexpand = be ? m_ts->nostemexp() : true;
LOGDEB1("TermProcQ::takeword: pushing [" << (term) << "] pos " << (pos) << " noexp " << (noexpand) << "\n" );
LOGDEB1("TermProcQ::takeword: pushing [" << term << "] pos " <<
pos << " noexp " << noexpand << "\n");
if (m_terms[pos].size() < term.size()) {
m_terms[pos] = term;
m_nste[pos] = noexpand;
@ -577,13 +581,12 @@ static void prefix_vector(vector<string>& v, const string& prefix)
}
}
void SearchDataClauseSimple::
processSimpleSpan(Rcl::Db &db, string& ermsg,
const string& span,
int mods, void * pq)
void SearchDataClauseSimple::processSimpleSpan(
Rcl::Db &db, string& ermsg, const string& span, int mods, void *pq)
{
vector<Xapian::Query>& pqueries(*(vector<Xapian::Query>*)pq);
LOGDEB0("StringToXapianQ::processSimpleSpan: [" << (span) << "] mods 0x" << ((unsigned int)mods) << "\n" );
LOGDEB0("StringToXapianQ::processSimpleSpan: [" << span << "] mods 0x"
<< (unsigned int)mods << "\n");
vector<string> exp;
string sterm; // dumb version of user term
@ -679,7 +682,7 @@ void SearchDataClauseSimple::processPhraseOrNear(Rcl::Db &db, string& ermsg,
vector<bool>::const_iterator nxit = splitData->nostemexps().begin();
for (vector<string>::const_iterator it = splitData->terms().begin();
it != splitData->terms().end(); it++, nxit++) {
LOGDEB0("ProcessPhrase: processing [" << *it << "]\n" );
LOGDEB0("ProcessPhrase: processing [" << *it << "]\n");
// Adjust when we do stem expansion. Not if disabled by
// caller, not inside phrases, and some versions of xapian
// will accept only one OR clause inside NEAR.
@ -695,7 +698,8 @@ void SearchDataClauseSimple::processPhraseOrNear(Rcl::Db &db, string& ermsg,
vector<string> exp;
if (!expandTerm(db, ermsg, lmods, *it, exp, sterm, prefix))
return;
LOGDEB0("ProcessPhraseOrNear: exp size " << (exp.size()) << ", exp: " << (stringsToString(exp)) << "\n" );
LOGDEB0("ProcessPhraseOrNear: exp size " << exp.size() << ", exp: " <<
stringsToString(exp) << "\n");
// groups is used for highlighting, we don't want prefixes in there.
vector<string> noprefs;
for (vector<string>::const_iterator it = exp.begin();
@ -721,7 +725,8 @@ void SearchDataClauseSimple::processPhraseOrNear(Rcl::Db &db, string& ermsg,
// Generate an appropriate PHRASE/NEAR query with adjusted slack
// For phrases, give a relevance boost like we do for original terms
LOGDEB2("PHRASE/NEAR: alltermcount " << (splitData->alltermcount()) << " lastpos " << (splitData->lastpos()) << "\n" );
LOGDEB2("PHRASE/NEAR: alltermcount " << splitData->alltermcount() <<
" lastpos " << splitData->lastpos() << "\n");
Xapian::Query xq(op, orqueries.begin(), orqueries.end(),
splitData->lastpos() + 1 + slack);
if (op == Xapian::Query::OP_PHRASE)
@ -791,7 +796,8 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
vector<Xapian::Query> &pqueries(*(vector<Xapian::Query>*)pq);
int mods = m_modifiers;
LOGDEB("StringToXapianQ:pUS:: qstr [" << (iq) << "] fld [" << (m_field) << "] mods 0x" << (mods) << " slack " << (slack) << " near " << (useNear) << "\n" );
LOGDEB("StringToXapianQ:pUS:: qstr [" << iq << "] fld [" << m_field <<
"] mods 0x"<<mods<<" slack " << slack << " near " << useNear <<"\n");
ermsg.erase();
m_curcl = 0;
const StopList stops = db.getStopList();
@ -811,7 +817,7 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
try {
for (vector<string>::iterator it = phrases.begin();
it != phrases.end(); it++) {
LOGDEB0("strToXapianQ: phrase/word: [" << *it << "]\n" );
LOGDEB0("strToXapianQ: phrase/word: [" << *it << "]\n");
// Anchoring modifiers
int amods = stringToMods(*it);
int terminc = amods != 0 ? 1 : 0;
@ -849,7 +855,7 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
slack += tpq.lastpos() - int(tpq.terms().size()) + 1;
LOGDEB0("strToXapianQ: termcount: " << (tpq.terms().size()) << "\n" );
LOGDEB0("strToXapianQ: termcount: " << tpq.terms().size() << "\n");
switch (tpq.terms().size() + terminc) {
case 0:
continue;// ??
@ -884,7 +890,7 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
ermsg = "Caught unknown exception";
}
if (!ermsg.empty()) {
LOGERR("stringToXapianQueries: " << (ermsg) << "\n" );
LOGERR("stringToXapianQueries: " << ermsg << "\n");
return false;
}
return true;
@ -893,7 +899,35 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
// Translate a simple OR or AND search clause.
bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)
{
LOGDEB("SearchDataClauseSimple::toNativeQuery: fld [" << (m_field) << "] val [" << (m_text) << "] stemlang [" << (getStemLang()) << "]\n" );
LOGDEB("SearchDataClauseSimple::toNativeQuery: fld [" << m_field <<
"] val [" << m_text << "] stemlang [" << getStemLang() << "]\n");
// Transform (in)equalities into a range query
switch (getrel()) {
case REL_EQUALS:
{
SearchDataClauseRange cl(*this, gettext(), gettext());
bool ret = cl.toNativeQuery(db, p);
m_reason = cl.getReason();
return ret;
}
case REL_LT: case REL_LTE:
{
SearchDataClauseRange cl(*this, "", gettext());
bool ret = cl.toNativeQuery(db, p);
m_reason = cl.getReason();
return ret;
}
case REL_GT: case REL_GTE:
{
SearchDataClauseRange cl(*this, gettext(), "");
bool ret = cl.toNativeQuery(db, p);
m_reason = cl.getReason();
return ret;
}
default:
break;
}
Xapian::Query *qp = (Xapian::Query *)p;
*qp = Xapian::Query();
@ -903,7 +937,7 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)
case SCLT_AND: op = Xapian::Query::OP_AND; break;
case SCLT_OR: op = Xapian::Query::OP_OR; break;
default:
LOGERR("SearchDataClauseSimple: bad m_tp " << (m_tp) << "\n" );
LOGERR("SearchDataClauseSimple: bad m_tp " << m_tp << "\n");
m_reason = "Internal error";
return false;
}
@ -912,7 +946,7 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)
if (!processUserString(db, m_text, m_reason, &pqueries))
return false;
if (pqueries.empty()) {
LOGERR("SearchDataClauseSimple: resolved to null query\n" );
LOGERR("SearchDataClauseSimple: resolved to null query\n");
m_reason = string("Resolved to null query. Term too long ? : [" +
m_text + string("]"));
return false;
@ -925,6 +959,58 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)
return true;
}
// Translate a range clause. This only works if a Xapian value slot
// was attributed to the field.
bool SearchDataClauseRange::toNativeQuery(Rcl::Db &db, void *p)
{
LOGDEB("SearchDataClauseRange::toNativeQuery: " << m_field <<
" :[" << m_text << ".." << m_t2 << "]\n");
Xapian::Query *qp = (Xapian::Query *)p;
*qp = Xapian::Query();
if (m_field.empty() || (m_text.empty() && m_t2.empty())) {
m_reason = "Range clause needs a field and a value";
return false;
}
// Get the value number for the field from the configuration
const FieldTraits *ftp;
if (!db.fieldToTraits(m_field, &ftp, true)) {
m_reason = string("field ") + m_field + " not found in configuration";
return false;
}
if (ftp->valueslot == 0) {
m_reason = string("No value slot specified in configuration for field ")
+ m_field;
return false;
}
LOGDEB("SearchDataClauseRange: value slot " << ftp->valueslot << endl);
// Build Xapian VALUE query.
string errstr;
try {
if (m_text.empty()) {
*qp = Xapian::Query(Xapian::Query::OP_VALUE_LE,
ftp->valueslot, convert_field_value(*ftp, m_t2));
} else if (m_t2.empty()) {
*qp = Xapian::Query(Xapian::Query::OP_VALUE_GE, ftp->valueslot,
convert_field_value(*ftp, m_text));
} else {
*qp = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, ftp->valueslot,
convert_field_value(*ftp, m_text),
convert_field_value(*ftp, m_t2));
}
}
XCATCHERROR(errstr);
if (!errstr.empty()) {
LOGERR("SearchDataClauseRange: range query creation failed for slot "<<
ftp->valueslot << endl);
m_reason = "Range query creation failed\n";
*qp = Xapian::Query();
return false;
}
return true;
}
// Translate a FILENAME search clause. This always comes
// from a "filename" search from the gui or recollq. A query language
// "filename:"-prefixed field will not go through here, but through
@ -1018,7 +1104,7 @@ bool SearchDataClausePath::toNativeQuery(Rcl::Db &db, void *p)
// Translate NEAR or PHRASE clause.
bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p)
{
LOGDEB("SearchDataClauseDist::toNativeQuery\n" );
LOGDEB("SearchDataClauseDist::toNativeQuery\n");
Xapian::Query *qp = (Xapian::Query *)p;
*qp = Xapian::Query();
@ -1037,7 +1123,7 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p)
if (!processUserString(db, s, m_reason, &pqueries, m_slack, useNear))
return false;
if (pqueries.empty()) {
LOGERR("SearchDataClauseDist: resolved to null query\n" );
LOGERR("SearchDataClauseDist: resolved to null query\n");
m_reason = string("Resolved to null query. Term too long ? : [" +
m_text + string("]"));
return false;

View File

@ -42,6 +42,7 @@ static string tpToString(SClType tp)
case SCLT_FILENAME: return "FN";
case SCLT_PHRASE: return "PH";
case SCLT_NEAR: return "NE";
case SCLT_RANGE: return "RG";
case SCLT_SUB: return "SU"; // Unsupported actually
default: return "UN";
}
@ -101,6 +102,15 @@ string SearchData::asXML()
endl;
}
os << "<T>" << base64_encode(cl->gettext()) << "</T>" << endl;
if (cl->getTp() == SCLT_RANGE) {
SearchDataClauseRange *clr =
dynamic_cast<SearchDataClauseRange*>(cl);
const string& t = clr->gettext2();
if (!t.empty()) {
os << "<T2>" << base64_encode(clr->gettext2()) <<
"</T2>" << endl;
}
}
if (cl->getTp() == SCLT_NEAR || cl->getTp() == SCLT_PHRASE) {
SearchDataClauseDist *cld =
dynamic_cast<SearchDataClauseDist*>(cl);

View File

@ -65,6 +65,20 @@ rclbes = XB ; noterms = 1
# *** USE XY for beginning your local prefixes *** ie:
# myfield = XYMYPREF
[values]
###########
## Fields which will be stored in Xapian values, authorizing range query
## processing.
# Entries are specified as 'fieldname = valueslot;[px=val1;py=val2...]'.
# Xapian value slots are 32 bits numbers. Numbers below 1000 are reserved
# by Recoll or Xapian. Numbers above are available for user configuration
# Values have types, which can be 'int' or 'string' at the moment. ints have
# an additional 'len' attributes, which specifies the padding size used for
# sorting (leading zeroes: all xapian sorting is text-based). 10 is fine
# for an unsigned 32 bits integer.
# myfield = 1001; type=int; len = 10
# mystrfield = 1002; type = string
[stored]
############################
# Some fields are stored in the document data record inside the index and