Implemented range queries, based on storing fields in xapian values

This commit is contained in:
Jean-Francois Dockes 2018-01-24 09:43:20 +01:00
parent 26d15dbe4a
commit 595e419d93
20 changed files with 875 additions and 428 deletions

View File

@ -165,6 +165,8 @@ rcldb/rclquery.cpp \
rcldb/rclquery.h \ rcldb/rclquery.h \
rcldb/rclquery_p.h \ rcldb/rclquery_p.h \
rcldb/rclterms.cpp \ rcldb/rclterms.cpp \
rcldb/rclvalues.cpp \
rcldb/rclvalues.h \
rcldb/searchdata.cpp \ rcldb/searchdata.cpp \
rcldb/searchdata.h \ rcldb/searchdata.h \
rcldb/searchdatatox.cpp \ rcldb/searchdatatox.cpp \

View File

@ -936,15 +936,15 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
// Build a direct map avoiding all indirections for field to // Build a direct map avoiding all indirections for field to
// prefix translation // prefix translation
// Add direct prefixes from the [prefixes] section // Add direct prefixes from the [prefixes] section
vector<string>tps = m_fields->getNames("prefixes"); vector<string> tps = m_fields->getNames("prefixes");
for (vector<string>::const_iterator it = tps.begin(); for (const auto& fieldname : tps) {
it != tps.end(); it++) {
string val; string val;
m_fields->get(*it, val, "prefixes"); m_fields->get(fieldname, val, "prefixes");
ConfSimple attrs; ConfSimple attrs;
FieldTraits ft; FieldTraits ft;
// fieldname = prefix ; attr1=val;attr2=val...
if (!valueSplitAttributes(val, ft.pfx, attrs)) { if (!valueSplitAttributes(val, ft.pfx, attrs)) {
LOGERR("readFieldsConfig: bad config line for [" << *it << LOGERR("readFieldsConfig: bad config line for [" << fieldname <<
"]: [" << val << "]\n"); "]: [" << val << "]\n");
return 0; return 0;
} }
@ -957,21 +957,67 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
ft.pfxonly = stringToBool(tval); ft.pfxonly = stringToBool(tval);
if (attrs.get("noterms", tval)) if (attrs.get("noterms", tval))
ft.noterms = stringToBool(tval); ft.noterms = stringToBool(tval);
m_fldtotraits[stringtolower(*it)] = ft; m_fldtotraits[stringtolower(fieldname)] = ft;
LOGDEB2("readFieldsConfig: [" << *it << "] -> [" << ft.pfx << LOGDEB2("readFieldsConfig: [" << fieldname << "] -> [" << ft.pfx <<
"] " << ft.wdfinc << " " << ft.boost << "\n"); "] " << ft.wdfinc << " " << ft.boost << "\n");
} }
// Values section
tps = m_fields->getNames("values");
for (const auto& fieldname : tps) {
string canonic = stringtolower(fieldname); // canonic name
string val;
m_fields->get(fieldname, val, "values");
ConfSimple attrs;
string svslot;
// fieldname = valueslot ; attr1=val;attr2=val...
if (!valueSplitAttributes(val, svslot, attrs)) {
LOGERR("readFieldsConfig: bad value line for [" << fieldname <<
"]: [" << val << "]\n");
return 0;
}
uint32_t valueslot = uint32_t(atoi(svslot.c_str()));
if (valueslot == 0) {
LOGERR("readFieldsConfig: found 0 value slot for [" << fieldname <<
"]: [" << val << "]\n");
continue;
}
string tval;
FieldTraits::ValueType valuetype{FieldTraits::STR};
if (attrs.get("type", tval)) {
if (tval == "string") {
valuetype = FieldTraits::STR;
} else if (tval == "int") {
valuetype = FieldTraits::INT;
} else {
LOGERR("readFieldsConfig: bad type for value for " <<
fieldname << " : " << tval << endl);
return 0;
}
}
int valuelen{0};
if (attrs.get("len", tval)) {
valuelen = atoi(tval.c_str());
}
// Find or insert traits entry
const auto pit =
m_fldtotraits.insert(
pair<string, FieldTraits>(canonic, FieldTraits())).first;
pit->second.valueslot = valueslot;
pit->second.valuetype = valuetype;
pit->second.valuelen = valuelen;
}
// Add prefixes for aliases and build alias-to-canonic map while // Add prefixes for aliases and build alias-to-canonic map while
// we're at it. Having the aliases in the prefix map avoids an // we're at it. Having the aliases in the prefix map avoids an
// additional indirection at index time. // additional indirection at index time.
tps = m_fields->getNames("aliases"); tps = m_fields->getNames("aliases");
for (vector<string>::const_iterator it = tps.begin(); for (const auto& fieldname : tps) {
it != tps.end(); it++){ string canonic = stringtolower(fieldname); // canonic name
string canonic = stringtolower(*it); // canonic name
FieldTraits ft; FieldTraits ft;
map<string, FieldTraits>::const_iterator pit = const auto pit = m_fldtotraits.find(canonic);
m_fldtotraits.find(canonic);
if (pit != m_fldtotraits.end()) { if (pit != m_fldtotraits.end()) {
ft = pit->second; ft = pit->second;
} }
@ -979,53 +1025,45 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
m_fields->get(canonic, aliases, "aliases"); m_fields->get(canonic, aliases, "aliases");
vector<string> l; vector<string> l;
stringToStrings(aliases, l); stringToStrings(aliases, l);
for (vector<string>::const_iterator ait = l.begin(); for (const auto& alias : l) {
ait != l.end(); ait++) {
if (pit != m_fldtotraits.end()) if (pit != m_fldtotraits.end())
m_fldtotraits[stringtolower(*ait)] = ft; m_fldtotraits[stringtolower(alias)] = ft;
m_aliastocanon[stringtolower(*ait)] = canonic; m_aliastocanon[stringtolower(alias)] = canonic;
} }
} }
// Query aliases map // Query aliases map
tps = m_fields->getNames("queryaliases"); tps = m_fields->getNames("queryaliases");
for (vector<string>::const_iterator it = tps.begin(); for (const auto& entry: tps) {
it != tps.end(); it++){ string canonic = stringtolower(entry); // canonic name
string canonic = stringtolower(*it); // canonic name
string aliases; string aliases;
m_fields->get(canonic, aliases, "queryaliases"); m_fields->get(canonic, aliases, "queryaliases");
vector<string> l; vector<string> l;
stringToStrings(aliases, l); stringToStrings(aliases, l);
for (vector<string>::const_iterator ait = l.begin(); for (const auto& alias : l) {
ait != l.end(); ait++) { m_aliastoqcanon[stringtolower(alias)] = canonic;
m_aliastoqcanon[stringtolower(*ait)] = canonic;
} }
} }
#if 0 #if 0
for (map<string, FieldTraits>::const_iterator it = m_fldtotraits.begin(); for (map<string, FieldTraits>::const_iterator it = m_fldtotraits.begin();
it != m_fldtotraits.end(); it++) { it != m_fldtotraits.end(); it++) {
LOGDEB("readFieldsConfig: [" << *it << "] -> [" << it->second.pfx << LOGDEB("readFieldsConfig: [" << entry << "] -> [" << it->second.pfx <<
"] " << it->second.wdfinc << " " << it->second.boost << "\n"); "] " << it->second.wdfinc << " " << it->second.boost << "\n");
} }
#endif #endif
vector<string> sl = m_fields->getNames("stored"); vector<string> sl = m_fields->getNames("stored");
if (!sl.empty()) { for (const auto& fieldname : sl) {
for (vector<string>::const_iterator it = sl.begin(); m_storedFields.insert(fieldCanon(stringtolower(fieldname)));
it != sl.end(); it++) {
string fld = fieldCanon(stringtolower(*it));
m_storedFields.insert(fld);
}
} }
// Extended file attribute to field translations // Extended file attribute to field translations
vector<string>xattrs = m_fields->getNames("xattrtofields"); vector<string>xattrs = m_fields->getNames("xattrtofields");
for (vector<string>::const_iterator it = xattrs.begin(); for (const auto& xattr : xattrs) {
it != xattrs.end(); it++) {
string val; string val;
m_fields->get(*it, val, "xattrtofields"); m_fields->get(xattr, val, "xattrtofields");
m_xattrtofld[*it] = val; m_xattrtofld[xattr] = val;
} }
return true; return true;

View File

@ -79,6 +79,10 @@ struct MDReaper {
// Data associated to a indexed field name: // Data associated to a indexed field name:
struct FieldTraits { struct FieldTraits {
string pfx; // indexing prefix, string pfx; // indexing prefix,
uint32_t valueslot{0};
enum ValueType {STR, INT};
ValueType valuetype{STR};
int valuelen{0};
int wdfinc{1}; // Index time term frequency increment (default 1) int wdfinc{1}; // Index time term frequency increment (default 1)
double boost{1.0}; // Query time boost (default 1.0) double boost{1.0}; // Query time boost (default 1.0)
bool pfxonly{false}; // Suppress prefix-less indexing bool pfxonly{false}; // Suppress prefix-less indexing

View File

@ -59,6 +59,7 @@ private:
{ {
currentText = whatclause = ""; currentText = whatclause = "";
text.clear(); text.clear();
text2.clear();
field.clear(); field.clear();
slack = 0; slack = 0;
d = m = y = di.d1 = di.m1 = di.y1 = di.d2 = di.m2 = di.y2 = 0; d = m = y = di.d1 = di.m1 = di.y1 = di.d2 = di.m2 = di.y2 = 0;
@ -69,7 +70,7 @@ private:
// Temporary data while parsing. // Temporary data while parsing.
QString currentText; QString currentText;
QString whatclause; QString whatclause;
string field, text; string field, text, text2;
int slack; int slack;
int d, m, y; int d, m, y;
DateInterval di; DateInterval di;
@ -120,6 +121,8 @@ bool SDHXMLHandler::endElement(const QString & /* namespaceURI */,
field = base64_decode(qs2utf8s(currentText.trimmed())); field = base64_decode(qs2utf8s(currentText.trimmed()));
} else if (qName == "T") { } else if (qName == "T") {
text = base64_decode(qs2utf8s(currentText.trimmed())); text = base64_decode(qs2utf8s(currentText.trimmed()));
} else if (qName == "T2") {
text2 = base64_decode(qs2utf8s(currentText.trimmed()));
} else if (qName == "S") { } else if (qName == "S") {
slack = atoi((const char *)currentText.toUtf8()); slack = atoi((const char *)currentText.toUtf8());
} else if (qName == "C") { } else if (qName == "C") {
@ -130,6 +133,9 @@ bool SDHXMLHandler::endElement(const QString & /* namespaceURI */,
} else if (whatclause == "OR") { } else if (whatclause == "OR") {
c = new SearchDataClauseSimple(SCLT_OR, text, field); c = new SearchDataClauseSimple(SCLT_OR, text, field);
c->setexclude(exclude); c->setexclude(exclude);
} else if (whatclause == "RG") {
c = new SearchDataClauseRange(text, text2, field);
c->setexclude(exclude);
} else if (whatclause == "EX") { } else if (whatclause == "EX") {
// Compat with old hist. We don't generete EX (SCLT_EXCL) anymore // Compat with old hist. We don't generete EX (SCLT_EXCL) anymore
// it's replaced with OR + exclude flag // it's replaced with OR + exclude flag

View File

@ -1,8 +1,8 @@
// A Bison parser, made by GNU Bison 3.0.2. // A Bison parser, made by GNU Bison 3.0.4.
// Locations for Bison parsers in C++ // Locations for Bison parsers in C++
// Copyright (C) 2002-2013 Free Software Foundation, Inc. // Copyright (C) 2002-2015 Free Software Foundation, Inc.
// This program is free software: you can redistribute it and/or modify // This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by // it under the terms of the GNU General Public License as published by
@ -42,7 +42,7 @@
namespace yy { namespace yy {
#line 46 "location.hh" // location.cc:291 #line 46 "location.hh" // location.cc:296
/// Abstract a location. /// Abstract a location.
class location class location
{ {
@ -111,36 +111,42 @@ namespace yy {
position end; position end;
}; };
/// Join two location objects to create a location. /// Join two locations, in place.
inline location operator+ (location res, const location& end) inline location& operator+= (location& res, const location& end)
{ {
res.end = end.end; res.end = end.end;
return res; return res;
} }
/// Change end position in place. /// Join two locations.
inline location operator+ (location res, const location& end)
{
return res += end;
}
/// Add \a width columns to the end position, in place.
inline location& operator+= (location& res, int width) inline location& operator+= (location& res, int width)
{ {
res.columns (width); res.columns (width);
return res; return res;
} }
/// Change end position. /// Add \a width columns to the end position.
inline location operator+ (location res, int width) inline location operator+ (location res, int width)
{ {
return res += width; return res += width;
} }
/// Change end position in place. /// Subtract \a width columns to the end position, in place.
inline location& operator-= (location& res, int width) inline location& operator-= (location& res, int width)
{ {
return res += -width; return res += -width;
} }
/// Change end position. /// Subtract \a width columns to the end position.
inline location operator- (const location& begin, int width) inline location operator- (location res, int width)
{ {
return begin + -width; return res -= width;
} }
/// Compare two location objects. /// Compare two location objects.
@ -168,8 +174,7 @@ namespace yy {
operator<< (std::basic_ostream<YYChar>& ostr, const location& loc) operator<< (std::basic_ostream<YYChar>& ostr, const location& loc)
{ {
unsigned int end_col = 0 < loc.end.column ? loc.end.column - 1 : 0; unsigned int end_col = 0 < loc.end.column ? loc.end.column - 1 : 0;
ostr << loc.begin// << "(" << loc.end << ") " ostr << loc.begin;
;
if (loc.end.filename if (loc.end.filename
&& (!loc.begin.filename && (!loc.begin.filename
|| *loc.begin.filename != *loc.end.filename)) || *loc.begin.filename != *loc.end.filename))
@ -183,5 +188,5 @@ namespace yy {
} // yy } // yy
#line 187 "location.hh" // location.cc:291 #line 192 "location.hh" // location.cc:296
#endif // !YY_YY_LOCATION_HH_INCLUDED #endif // !YY_YY_LOCATION_HH_INCLUDED

View File

@ -1,8 +1,8 @@
// A Bison parser, made by GNU Bison 3.0.2. // A Bison parser, made by GNU Bison 3.0.4.
// Positions for Bison parsers in C++ // Positions for Bison parsers in C++
// Copyright (C) 2002-2013 Free Software Foundation, Inc. // Copyright (C) 2002-2015 Free Software Foundation, Inc.
// This program is free software: you can redistribute it and/or modify // This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by // it under the terms of the GNU General Public License as published by
@ -52,7 +52,7 @@
namespace yy { namespace yy {
#line 56 "position.hh" // location.cc:291 #line 56 "position.hh" // location.cc:296
/// Abstract a position. /// Abstract a position.
class position class position
{ {
@ -114,7 +114,7 @@ namespace yy {
} }
}; };
/// Add and assign a position. /// Add \a width columns, in place.
inline position& inline position&
operator+= (position& res, int width) operator+= (position& res, int width)
{ {
@ -122,21 +122,21 @@ namespace yy {
return res; return res;
} }
/// Add two position objects. /// Add \a width columns.
inline position inline position
operator+ (position res, int width) operator+ (position res, int width)
{ {
return res += width; return res += width;
} }
/// Add and assign a position. /// Subtract \a width columns, in place.
inline position& inline position&
operator-= (position& res, int width) operator-= (position& res, int width)
{ {
return res += -width; return res += -width;
} }
/// Add two position objects. /// Subtract \a width columns.
inline position inline position
operator- (position res, int width) operator- (position res, int width)
{ {
@ -176,5 +176,5 @@ namespace yy {
} // yy } // yy
#line 180 "position.hh" // location.cc:291 #line 180 "position.hh" // location.cc:296
#endif // !YY_YY_POSITION_HH_INCLUDED #endif // !YY_YY_POSITION_HH_INCLUDED

View File

@ -1,8 +1,8 @@
// A Bison parser, made by GNU Bison 3.0.2. // A Bison parser, made by GNU Bison 3.0.4.
// Stack handling for Bison parsers in C++ // Stack handling for Bison parsers in C++
// Copyright (C) 2002-2013 Free Software Foundation, Inc. // Copyright (C) 2002-2015 Free Software Foundation, Inc.
// This program is free software: you can redistribute it and/or modify // This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by // it under the terms of the GNU General Public License as published by
@ -42,7 +42,7 @@
namespace yy { namespace yy {
#line 46 "stack.hh" // stack.hh:133 #line 46 "stack.hh" // stack.hh:132
template <class T, class S = std::vector<T> > template <class T, class S = std::vector<T> >
class stack class stack
{ {
@ -54,12 +54,12 @@ namespace yy {
stack () stack ()
: seq_ () : seq_ ()
{ {
seq_.reserve (200);
} }
stack (unsigned int n) stack (unsigned int n)
: seq_ (n) : seq_ (n)
{ {}
}
inline inline
T& T&
@ -136,8 +136,7 @@ namespace yy {
slice (const S& stack, unsigned int range) slice (const S& stack, unsigned int range)
: stack_ (stack) : stack_ (stack)
, range_ (range) , range_ (range)
{ {}
}
inline inline
const T& const T&
@ -153,6 +152,6 @@ namespace yy {
} // yy } // yy
#line 157 "stack.hh" // stack.hh:133 #line 156 "stack.hh" // stack.hh:132
#endif // !YY_YY_STACK_HH_INCLUDED #endif // !YY_YY_STACK_HH_INCLUDED

View File

@ -1,8 +1,8 @@
// A Bison parser, made by GNU Bison 3.0.2. // A Bison parser, made by GNU Bison 3.0.4.
// Skeleton implementation for Bison LALR(1) parsers in C++ // Skeleton implementation for Bison LALR(1) parsers in C++
// Copyright (C) 2002-2013 Free Software Foundation, Inc. // Copyright (C) 2002-2015 Free Software Foundation, Inc.
// This program is free software: you can redistribute it and/or modify // This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by // it under the terms of the GNU General Public License as published by
@ -32,7 +32,7 @@
// First part of user declarations. // First part of user declarations.
#line 1 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:399 #line 1 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:404
#define YYDEBUG 1 #define YYDEBUG 1
#include "autoconfig.h" #include "autoconfig.h"
@ -48,7 +48,7 @@
using namespace std; using namespace std;
// #define LOG_PARSER //#define LOG_PARSER
#ifdef LOG_PARSER #ifdef LOG_PARSER
#define LOGP(X) {cerr << X;} #define LOGP(X) {cerr << X;}
#else #else
@ -69,7 +69,7 @@ static void addSubQuery(WasaParserDriver *d,
} }
#line 73 "y.tab.c" // lalr1.cc:399 #line 73 "y.tab.c" // lalr1.cc:404
# ifndef YY_NULLPTR # ifndef YY_NULLPTR
# if defined __cplusplus && 201103L <= __cplusplus # if defined __cplusplus && 201103L <= __cplusplus
@ -83,7 +83,7 @@ static void addSubQuery(WasaParserDriver *d,
// User implementation prologue. // User implementation prologue.
#line 87 "y.tab.c" // lalr1.cc:407 #line 87 "y.tab.c" // lalr1.cc:412
#ifndef YY_ #ifndef YY_
@ -160,7 +160,7 @@ static void addSubQuery(WasaParserDriver *d,
#endif // !YYDEBUG #endif // !YYDEBUG
#define yyerrok (yyerrstatus_ = 0) #define yyerrok (yyerrstatus_ = 0)
#define yyclearin (yyempty = true) #define yyclearin (yyla.clear ())
#define YYACCEPT goto yyacceptlab #define YYACCEPT goto yyacceptlab
#define YYABORT goto yyabortlab #define YYABORT goto yyabortlab
@ -169,7 +169,7 @@ static void addSubQuery(WasaParserDriver *d,
namespace yy { namespace yy {
#line 173 "y.tab.c" // lalr1.cc:474 #line 173 "y.tab.c" // lalr1.cc:479
/* Return YYSTR after stripping away unnecessary quotes and /* Return YYSTR after stripping away unnecessary quotes and
backslashes, so that it's suitable for yyerror. The heuristic is backslashes, so that it's suitable for yyerror. The heuristic is
@ -273,6 +273,23 @@ namespace yy {
inline inline
parser::basic_symbol<Base>::~basic_symbol () parser::basic_symbol<Base>::~basic_symbol ()
{ {
clear ();
}
template <typename Base>
inline
void
parser::basic_symbol<Base>::clear ()
{
Base::clear ();
}
template <typename Base>
inline
bool
parser::basic_symbol<Base>::empty () const
{
return Base::type_get () == empty_symbol;
} }
template <typename Base> template <typename Base>
@ -288,7 +305,7 @@ namespace yy {
// by_type. // by_type.
inline inline
parser::by_type::by_type () parser::by_type::by_type ()
: type (empty) : type (empty_symbol)
{} {}
inline inline
@ -301,12 +318,19 @@ namespace yy {
: type (yytranslate_ (t)) : type (yytranslate_ (t))
{} {}
inline
void
parser::by_type::clear ()
{
type = empty_symbol;
}
inline inline
void void
parser::by_type::move (by_type& that) parser::by_type::move (by_type& that)
{ {
type = that.type; type = that.type;
that.type = empty; that.clear ();
} }
inline inline
@ -320,7 +344,7 @@ namespace yy {
// by_state. // by_state.
inline inline
parser::by_state::by_state () parser::by_state::by_state ()
: state (empty) : state (empty_state)
{} {}
inline inline
@ -328,12 +352,19 @@ namespace yy {
: state (other.state) : state (other.state)
{} {}
inline
void
parser::by_state::clear ()
{
state = empty_state;
}
inline inline
void void
parser::by_state::move (by_state& that) parser::by_state::move (by_state& that)
{ {
state = that.state; state = that.state;
that.state = empty; that.clear ();
} }
inline inline
@ -345,7 +376,10 @@ namespace yy {
parser::symbol_number_type parser::symbol_number_type
parser::by_state::type_get () const parser::by_state::type_get () const
{ {
return state == empty ? 0 : yystos_[state]; if (state == empty_state)
return empty_symbol;
else
return yystos_[state];
} }
inline inline
@ -359,7 +393,7 @@ namespace yy {
{ {
value = that.value; value = that.value;
// that is emptied. // that is emptied.
that.type = empty; that.type = empty_symbol;
} }
inline inline
@ -386,30 +420,30 @@ namespace yy {
{ {
case 3: // WORD case 3: // WORD
#line 51 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:599 #line 52 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:614
{delete (yysym.value.str);} {delete (yysym.value.str);}
#line 392 "y.tab.c" // lalr1.cc:599 #line 426 "y.tab.c" // lalr1.cc:614
break; break;
case 4: // QUOTED case 4: // QUOTED
#line 51 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:599 #line 52 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:614
{delete (yysym.value.str);} {delete (yysym.value.str);}
#line 399 "y.tab.c" // lalr1.cc:599 #line 433 "y.tab.c" // lalr1.cc:614
break; break;
case 5: // QUALIFIERS case 5: // QUALIFIERS
#line 51 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:599 #line 52 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:614
{delete (yysym.value.str);} {delete (yysym.value.str);}
#line 406 "y.tab.c" // lalr1.cc:599 #line 440 "y.tab.c" // lalr1.cc:614
break; break;
case 22: // complexfieldname case 23: // complexfieldname
#line 51 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:599 #line 52 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:614
{delete (yysym.value.str);} {delete (yysym.value.str);}
#line 413 "y.tab.c" // lalr1.cc:599 #line 447 "y.tab.c" // lalr1.cc:614
break; break;
@ -427,6 +461,10 @@ namespace yy {
std::ostream& yyoutput = yyo; std::ostream& yyoutput = yyo;
YYUSE (yyoutput); YYUSE (yyoutput);
symbol_number_type yytype = yysym.type_get (); symbol_number_type yytype = yysym.type_get ();
// Avoid a (spurious) G++ 4.8 warning about "array subscript is
// below array bounds".
if (yysym.empty ())
std::abort ();
yyo << (yytype < yyntokens_ ? "token" : "nterm") yyo << (yytype < yyntokens_ ? "token" : "nterm")
<< ' ' << yytname_[yytype] << " (" << ' ' << yytname_[yytype] << " ("
<< yysym.location << ": "; << yysym.location << ": ";
@ -511,9 +549,6 @@ namespace yy {
int int
parser::parse () parser::parse ()
{ {
/// Whether yyla contains a lookahead.
bool yyempty = true;
// State. // State.
int yyn; int yyn;
/// Length of the RHS of the rule being reduced. /// Length of the RHS of the rule being reduced.
@ -565,7 +600,7 @@ namespace yy {
goto yydefault; goto yydefault;
// Read a lookahead token. // Read a lookahead token.
if (yyempty) if (yyla.empty ())
{ {
YYCDEBUG << "Reading a token: "; YYCDEBUG << "Reading a token: ";
try try
@ -577,7 +612,6 @@ namespace yy {
error (yyexc); error (yyexc);
goto yyerrlab1; goto yyerrlab1;
} }
yyempty = false;
} }
YY_SYMBOL_PRINT ("Next token is", yyla); YY_SYMBOL_PRINT ("Next token is", yyla);
@ -597,9 +631,6 @@ namespace yy {
goto yyreduce; goto yyreduce;
} }
// Discard the token being shifted.
yyempty = true;
// Count tokens shifted since error; after three, turn off error status. // Count tokens shifted since error; after three, turn off error status.
if (yyerrstatus_) if (yyerrstatus_)
--yyerrstatus_; --yyerrstatus_;
@ -649,7 +680,7 @@ namespace yy {
switch (yyn) switch (yyn)
{ {
case 2: case 2:
#line 72 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 74 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{ {
// It's possible that we end up with no query (e.g.: because just a // It's possible that we end up with no query (e.g.: because just a
// date filter was set, no terms). Allocate an empty query so that we // date filter was set, no terms). Allocate an empty query so that we
@ -660,11 +691,11 @@ namespace yy {
else else
d->m_result = (yystack_[0].value.sd); d->m_result = (yystack_[0].value.sd);
} }
#line 664 "y.tab.c" // lalr1.cc:847 #line 695 "y.tab.c" // lalr1.cc:859
break; break;
case 3: case 3:
#line 85 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 87 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{ {
LOGP("q: query query\n"); LOGP("q: query query\n");
Rcl::SearchData *sd = 0; Rcl::SearchData *sd = 0;
@ -675,11 +706,11 @@ namespace yy {
} }
(yylhs.value.sd) = sd; (yylhs.value.sd) = sd;
} }
#line 679 "y.tab.c" // lalr1.cc:847 #line 710 "y.tab.c" // lalr1.cc:859
break; break;
case 4: case 4:
#line 96 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 98 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{ {
LOGP("q: query AND query\n"); LOGP("q: query AND query\n");
Rcl::SearchData *sd = 0; Rcl::SearchData *sd = 0;
@ -690,11 +721,11 @@ namespace yy {
} }
(yylhs.value.sd) = sd; (yylhs.value.sd) = sd;
} }
#line 694 "y.tab.c" // lalr1.cc:847 #line 725 "y.tab.c" // lalr1.cc:859
break; break;
case 5: case 5:
#line 107 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 109 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{ {
LOGP("query: query OR query\n"); LOGP("query: query OR query\n");
Rcl::SearchData *top = 0; Rcl::SearchData *top = 0;
@ -705,20 +736,20 @@ namespace yy {
} }
(yylhs.value.sd) = top; (yylhs.value.sd) = top;
} }
#line 709 "y.tab.c" // lalr1.cc:847 #line 740 "y.tab.c" // lalr1.cc:859
break; break;
case 6: case 6:
#line 118 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 120 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{ {
LOGP("q: ( query )\n"); LOGP("q: ( query )\n");
(yylhs.value.sd) = (yystack_[1].value.sd); (yylhs.value.sd) = (yystack_[1].value.sd);
} }
#line 718 "y.tab.c" // lalr1.cc:847 #line 749 "y.tab.c" // lalr1.cc:859
break; break;
case 7: case 7:
#line 124 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 126 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{ {
LOGP("q: fieldexpr\n"); LOGP("q: fieldexpr\n");
Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang); Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
@ -729,20 +760,20 @@ namespace yy {
(yylhs.value.sd) = 0; (yylhs.value.sd) = 0;
} }
} }
#line 733 "y.tab.c" // lalr1.cc:847 #line 764 "y.tab.c" // lalr1.cc:859
break; break;
case 8: case 8:
#line 137 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 139 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{ {
LOGP("fe: simple fieldexpr: " << (yystack_[0].value.cl)->gettext() << endl); LOGP("fe: simple fieldexpr: " << (yystack_[0].value.cl)->gettext() << endl);
(yylhs.value.cl) = (yystack_[0].value.cl); (yylhs.value.cl) = (yystack_[0].value.cl);
} }
#line 742 "y.tab.c" // lalr1.cc:847 #line 773 "y.tab.c" // lalr1.cc:859
break; break;
case 9: case 9:
#line 142 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 144 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{ {
LOGP("fe: " << *(yystack_[2].value.str) << " = " << (yystack_[0].value.cl)->gettext() << endl); LOGP("fe: " << *(yystack_[2].value.str) << " = " << (yystack_[0].value.cl)->gettext() << endl);
(yystack_[0].value.cl)->setfield(*(yystack_[2].value.str)); (yystack_[0].value.cl)->setfield(*(yystack_[2].value.str));
@ -750,11 +781,11 @@ namespace yy {
(yylhs.value.cl) = (yystack_[0].value.cl); (yylhs.value.cl) = (yystack_[0].value.cl);
delete (yystack_[2].value.str); delete (yystack_[2].value.str);
} }
#line 754 "y.tab.c" // lalr1.cc:847 #line 785 "y.tab.c" // lalr1.cc:859
break; break;
case 10: case 10:
#line 150 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 152 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{ {
LOGP("fe: " << *(yystack_[2].value.str) << " : " << (yystack_[0].value.cl)->gettext() << endl); LOGP("fe: " << *(yystack_[2].value.str) << " : " << (yystack_[0].value.cl)->gettext() << endl);
(yystack_[0].value.cl)->setfield(*(yystack_[2].value.str)); (yystack_[0].value.cl)->setfield(*(yystack_[2].value.str));
@ -762,23 +793,35 @@ namespace yy {
(yylhs.value.cl) = (yystack_[0].value.cl); (yylhs.value.cl) = (yystack_[0].value.cl);
delete (yystack_[2].value.str); delete (yystack_[2].value.str);
} }
#line 766 "y.tab.c" // lalr1.cc:847 #line 797 "y.tab.c" // lalr1.cc:859
break; break;
case 11: case 11:
#line 158 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 160 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{ {
LOGP(cerr << "fe: " << *(yystack_[2].value.str) << " < " << (yystack_[0].value.cl)->gettext() << endl); LOGP("fe: " << *(yystack_[2].value.str) << " : " << (yystack_[0].value.rg)->gettext() << endl);
(yystack_[0].value.rg)->setfield(*(yystack_[2].value.str));
(yystack_[0].value.rg)->setrel(Rcl::SearchDataClause::REL_CONTAINS);
(yylhs.value.cl) = (yystack_[0].value.rg);
delete (yystack_[2].value.str);
}
#line 809 "y.tab.c" // lalr1.cc:859
break;
case 12:
#line 168 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("fe: " << *(yystack_[2].value.str) << " < " << (yystack_[0].value.cl)->gettext() << endl);
(yystack_[0].value.cl)->setfield(*(yystack_[2].value.str)); (yystack_[0].value.cl)->setfield(*(yystack_[2].value.str));
(yystack_[0].value.cl)->setrel(Rcl::SearchDataClause::REL_LT); (yystack_[0].value.cl)->setrel(Rcl::SearchDataClause::REL_LT);
(yylhs.value.cl) = (yystack_[0].value.cl); (yylhs.value.cl) = (yystack_[0].value.cl);
delete (yystack_[2].value.str); delete (yystack_[2].value.str);
} }
#line 778 "y.tab.c" // lalr1.cc:847 #line 821 "y.tab.c" // lalr1.cc:859
break; break;
case 12: case 13:
#line 166 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 176 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{ {
LOGP("fe: " << *(yystack_[2].value.str) << " <= " << (yystack_[0].value.cl)->gettext() << endl); LOGP("fe: " << *(yystack_[2].value.str) << " <= " << (yystack_[0].value.cl)->gettext() << endl);
(yystack_[0].value.cl)->setfield(*(yystack_[2].value.str)); (yystack_[0].value.cl)->setfield(*(yystack_[2].value.str));
@ -786,11 +829,11 @@ namespace yy {
(yylhs.value.cl) = (yystack_[0].value.cl); (yylhs.value.cl) = (yystack_[0].value.cl);
delete (yystack_[2].value.str); delete (yystack_[2].value.str);
} }
#line 790 "y.tab.c" // lalr1.cc:847 #line 833 "y.tab.c" // lalr1.cc:859
break; break;
case 13: case 14:
#line 174 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 184 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{ {
LOGP("fe: " << *(yystack_[2].value.str) << " > " << (yystack_[0].value.cl)->gettext() << endl); LOGP("fe: " << *(yystack_[2].value.str) << " > " << (yystack_[0].value.cl)->gettext() << endl);
(yystack_[0].value.cl)->setfield(*(yystack_[2].value.str)); (yystack_[0].value.cl)->setfield(*(yystack_[2].value.str));
@ -798,11 +841,11 @@ namespace yy {
(yylhs.value.cl) = (yystack_[0].value.cl); (yylhs.value.cl) = (yystack_[0].value.cl);
delete (yystack_[2].value.str); delete (yystack_[2].value.str);
} }
#line 802 "y.tab.c" // lalr1.cc:847 #line 845 "y.tab.c" // lalr1.cc:859
break; break;
case 14: case 15:
#line 182 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 192 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{ {
LOGP("fe: " << *(yystack_[2].value.str) << " >= " << (yystack_[0].value.cl)->gettext() << endl); LOGP("fe: " << *(yystack_[2].value.str) << " >= " << (yystack_[0].value.cl)->gettext() << endl);
(yystack_[0].value.cl)->setfield(*(yystack_[2].value.str)); (yystack_[0].value.cl)->setfield(*(yystack_[2].value.str));
@ -810,69 +853,100 @@ namespace yy {
(yylhs.value.cl) = (yystack_[0].value.cl); (yylhs.value.cl) = (yystack_[0].value.cl);
delete (yystack_[2].value.str); delete (yystack_[2].value.str);
} }
#line 814 "y.tab.c" // lalr1.cc:847 #line 857 "y.tab.c" // lalr1.cc:859
break; break;
case 15: case 16:
#line 190 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 200 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{ {
LOGP("fe: - fieldexpr[" << (yystack_[0].value.cl)->gettext() << "]" << endl); LOGP("fe: - fieldexpr[" << (yystack_[0].value.cl)->gettext() << "]" << endl);
(yystack_[0].value.cl)->setexclude(true); (yystack_[0].value.cl)->setexclude(true);
(yylhs.value.cl) = (yystack_[0].value.cl); (yylhs.value.cl) = (yystack_[0].value.cl);
} }
#line 824 "y.tab.c" // lalr1.cc:847 #line 867 "y.tab.c" // lalr1.cc:859
break; break;
case 16: case 17:
#line 200 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 210 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{ {
LOGP("cfn: WORD" << endl); LOGP("cfn: WORD" << endl);
(yylhs.value.str) = (yystack_[0].value.str); (yylhs.value.str) = (yystack_[0].value.str);
} }
#line 833 "y.tab.c" // lalr1.cc:847 #line 876 "y.tab.c" // lalr1.cc:859
break; break;
case 17: case 18:
#line 206 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 216 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{ {
LOGP("cfn: complexfieldname ':' WORD" << endl); LOGP("cfn: complexfieldname ':' WORD" << endl);
(yylhs.value.str) = new string(*(yystack_[2].value.str) + string(":") + *(yystack_[0].value.str)); (yylhs.value.str) = new string(*(yystack_[2].value.str) + string(":") + *(yystack_[0].value.str));
delete (yystack_[2].value.str); delete (yystack_[2].value.str);
delete (yystack_[0].value.str); delete (yystack_[0].value.str);
} }
#line 844 "y.tab.c" // lalr1.cc:847 #line 887 "y.tab.c" // lalr1.cc:859
break; break;
case 18: case 19:
#line 215 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 225 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("Range: " << *(yystack_[2].value.str) << string(" .. ") << *(yystack_[0].value.str) << endl);
(yylhs.value.rg) = new Rcl::SearchDataClauseRange(*(yystack_[2].value.str), *(yystack_[0].value.str));
delete (yystack_[2].value.str);
delete (yystack_[0].value.str);
}
#line 898 "y.tab.c" // lalr1.cc:859
break;
case 20:
#line 233 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("Range: " << "" << string(" .. ") << *(yystack_[0].value.str) << endl);
(yylhs.value.rg) = new Rcl::SearchDataClauseRange("", *(yystack_[0].value.str));
delete (yystack_[0].value.str);
}
#line 908 "y.tab.c" // lalr1.cc:859
break;
case 21:
#line 240 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{
LOGP("Range: " << *(yystack_[1].value.str) << string(" .. ") << "" << endl);
(yylhs.value.rg) = new Rcl::SearchDataClauseRange(*(yystack_[1].value.str), "");
delete (yystack_[1].value.str);
}
#line 918 "y.tab.c" // lalr1.cc:859
break;
case 22:
#line 249 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{ {
LOGP("term[" << *(yystack_[0].value.str) << "]" << endl); LOGP("term[" << *(yystack_[0].value.str) << "]" << endl);
(yylhs.value.cl) = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, *(yystack_[0].value.str)); (yylhs.value.cl) = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, *(yystack_[0].value.str));
delete (yystack_[0].value.str); delete (yystack_[0].value.str);
} }
#line 854 "y.tab.c" // lalr1.cc:847 #line 928 "y.tab.c" // lalr1.cc:859
break; break;
case 19: case 23:
#line 221 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 255 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{ {
(yylhs.value.cl) = (yystack_[0].value.cl); (yylhs.value.cl) = (yystack_[0].value.cl);
} }
#line 862 "y.tab.c" // lalr1.cc:847 #line 936 "y.tab.c" // lalr1.cc:859
break; break;
case 20: case 24:
#line 227 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 261 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{ {
LOGP("QUOTED[" << *(yystack_[0].value.str) << "]" << endl); LOGP("QUOTED[" << *(yystack_[0].value.str) << "]" << endl);
(yylhs.value.cl) = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, *(yystack_[0].value.str), 0); (yylhs.value.cl) = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, *(yystack_[0].value.str), 0);
delete (yystack_[0].value.str); delete (yystack_[0].value.str);
} }
#line 872 "y.tab.c" // lalr1.cc:847 #line 946 "y.tab.c" // lalr1.cc:859
break; break;
case 21: case 25:
#line 233 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 #line 267 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859
{ {
LOGP("QUOTED[" << *(yystack_[1].value.str) << "] QUALIFIERS[" << *(yystack_[0].value.str) << "]" << endl); LOGP("QUOTED[" << *(yystack_[1].value.str) << "] QUALIFIERS[" << *(yystack_[0].value.str) << "]" << endl);
Rcl::SearchDataClauseDist *cl = Rcl::SearchDataClauseDist *cl =
@ -882,11 +956,11 @@ namespace yy {
delete (yystack_[1].value.str); delete (yystack_[1].value.str);
delete (yystack_[0].value.str); delete (yystack_[0].value.str);
} }
#line 886 "y.tab.c" // lalr1.cc:847 #line 960 "y.tab.c" // lalr1.cc:859
break; break;
#line 890 "y.tab.c" // lalr1.cc:847 #line 964 "y.tab.c" // lalr1.cc:859
default: default:
break; break;
} }
@ -914,8 +988,7 @@ namespace yy {
if (!yyerrstatus_) if (!yyerrstatus_)
{ {
++yynerrs_; ++yynerrs_;
error (yyla.location, yysyntax_error_ (yystack_[0].state, error (yyla.location, yysyntax_error_ (yystack_[0].state, yyla));
yyempty ? yyempty_ : yyla.type_get ()));
} }
@ -928,10 +1001,10 @@ namespace yy {
// Return failure if at end of input. // Return failure if at end of input.
if (yyla.type_get () == yyeof_) if (yyla.type_get () == yyeof_)
YYABORT; YYABORT;
else if (!yyempty) else if (!yyla.empty ())
{ {
yy_destroy_ ("Error: discarding", yyla); yy_destroy_ ("Error: discarding", yyla);
yyempty = true; yyla.clear ();
} }
} }
@ -1007,7 +1080,7 @@ namespace yy {
goto yyreturn; goto yyreturn;
yyreturn: yyreturn:
if (!yyempty) if (!yyla.empty ())
yy_destroy_ ("Cleanup: discarding lookahead", yyla); yy_destroy_ ("Cleanup: discarding lookahead", yyla);
/* Do not reclaim the symbols of the rule whose action triggered /* Do not reclaim the symbols of the rule whose action triggered
@ -1027,7 +1100,7 @@ namespace yy {
<< std::endl; << std::endl;
// Do not try to display the values of the reclaimed symbols, // Do not try to display the values of the reclaimed symbols,
// as their printer might throw an exception. // as their printer might throw an exception.
if (!yyempty) if (!yyla.empty ())
yy_destroy_ (YY_NULLPTR, yyla); yy_destroy_ (YY_NULLPTR, yyla);
while (1 < yystack_.size ()) while (1 < yystack_.size ())
@ -1047,9 +1120,8 @@ namespace yy {
// Generate an error message. // Generate an error message.
std::string std::string
parser::yysyntax_error_ (state_type yystate, symbol_number_type yytoken) const parser::yysyntax_error_ (state_type yystate, const symbol_type& yyla) const
{ {
std::string yyres;
// Number of reported tokens (one for the "unexpected", one per // Number of reported tokens (one for the "unexpected", one per
// "expected"). // "expected").
size_t yycount = 0; size_t yycount = 0;
@ -1063,7 +1135,7 @@ namespace yy {
the only way this function was invoked is if the default action the only way this function was invoked is if the default action
is an error action. In that case, don't check for expected is an error action. In that case, don't check for expected
tokens because there are none. tokens because there are none.
- The only way there can be no lookahead present (in yytoken) is - The only way there can be no lookahead present (in yyla) is
if this state is a consistent state with a default action. if this state is a consistent state with a default action.
Thus, detecting the absence of a lookahead is sufficient to Thus, detecting the absence of a lookahead is sufficient to
determine that there is no unexpected or expected token to determine that there is no unexpected or expected token to
@ -1083,8 +1155,9 @@ namespace yy {
token that will not be accepted due to an error action in a token that will not be accepted due to an error action in a
later state. later state.
*/ */
if (yytoken != yyempty_) if (!yyla.empty ())
{ {
int yytoken = yyla.type_get ();
yyarg[yycount++] = yytname_[yytoken]; yyarg[yycount++] = yytname_[yytoken];
int yyn = yypact_[yystate]; int yyn = yypact_[yystate];
if (!yy_pact_value_is_default_ (yyn)) if (!yy_pact_value_is_default_ (yyn))
@ -1127,6 +1200,7 @@ namespace yy {
#undef YYCASE_ #undef YYCASE_
} }
std::string yyres;
// Argument number. // Argument number.
size_t yyi = 0; size_t yyi = 0;
for (char const* yyp = yyformat; *yyp; ++yyp) for (char const* yyp = yyformat; *yyp; ++yyp)
@ -1143,83 +1217,85 @@ namespace yy {
const signed char parser::yypact_ninf_ = -3; const signed char parser::yypact_ninf_ = -3;
const signed char parser::yytable_ninf_ = -18; const signed char parser::yytable_ninf_ = -19;
const signed char const signed char
parser::yypact_[] = parser::yypact_[] =
{ {
24, 25, 3, 24, 26, 6, 16, -3, 31, -3, 31, 32, 3, 31, 33, 6, 14, -3, 38, -3,
-3, -3, 1, -3, -3, 24, 24, 4, -2, 9, -3, -3, 1, -3, -3, 31, 31, 4, -2, 9,
-2, -2, -2, -2, -3, 4, -3, -3, -3, 37, -2, -2, -2, -2, -3, 4, -3, -3, -3, 16,
-3, -3, -3, -3, -3 18, -3, -3, -3, -3, -3, -3, 22, -3, -3
}; };
const unsigned char const unsigned char
parser::yydefact_[] = parser::yydefact_[] =
{ {
0, 18, 20, 0, 0, 0, 2, 7, 0, 8, 0, 22, 24, 0, 0, 0, 2, 7, 0, 8,
19, 21, 0, 15, 1, 0, 0, 3, 0, 0, 23, 25, 0, 16, 1, 0, 0, 3, 0, 0,
0, 0, 0, 0, 6, 4, 5, 18, 9, 18, 0, 0, 0, 0, 6, 4, 5, 22, 9, 22,
10, 12, 11, 14, 13 0, 11, 10, 13, 12, 15, 14, 21, 20, 19
}; };
const signed char const signed char
parser::yypgoto_[] = parser::yypgoto_[] =
{ {
-3, -3, 0, 13, -3, 36, -3 -3, -3, 0, 34, -3, -3, 37, -3
}; };
const signed char const signed char
parser::yydefgoto_[] = parser::yydefgoto_[] =
{ {
-1, 5, 17, 7, 8, 9, 10 -1, 5, 17, 7, 8, 31, 9, 10
}; };
const signed char const signed char
parser::yytable_[] = parser::yytable_[] =
{ {
6, 27, 2, 12, 1, 2, 14, 15, 11, 3, 6, 27, 2, 12, 1, 2, 14, 15, 11, 3,
4, 16, 29, 2, 16, 25, 26, 13, 24, 1, 4, 16, 29, 2, 16, 25, 26, 1, 2, 24,
2, 0, 15, 0, 3, 4, 16, 1, 2, 1, 15, 38, 3, 4, 16, 39, 30, -18, -18, -18,
2, 0, 3, 4, 0, 4, -16, -16, -16, -16, -18, -18, -18, 37, 1, 2, 1, 2, 13, 3,
-16, -16, 18, 19, 20, 21, 22, 23, -17, -17, 4, 0, 4, -17, -17, -17, -17, -17, -17, 18,
-17, -17, -17, -17, 28, 30, 31, 32, 33, 34 19, 20, 21, 22, 23, 28, 32, 33, 34, 35,
36
}; };
const signed char const signed char
parser::yycheck_[] = parser::yycheck_[] =
{ {
0, 3, 4, 3, 3, 4, 0, 6, 5, 8, 0, 3, 4, 3, 3, 4, 0, 6, 5, 8,
9, 10, 3, 4, 10, 15, 16, 4, 17, 3, 9, 10, 3, 4, 10, 15, 16, 3, 4, 18,
4, -1, 6, -1, 8, 9, 10, 3, 4, 3, 6, 3, 8, 9, 10, 3, 17, 11, 12, 13,
4, -1, 8, 9, -1, 9, 11, 12, 13, 14, 14, 15, 16, 17, 3, 4, 3, 4, 4, 8,
15, 16, 11, 12, 13, 14, 15, 16, 11, 12, 9, -1, 9, 11, 12, 13, 14, 15, 16, 11,
13, 14, 15, 16, 18, 19, 20, 21, 22, 23 12, 13, 14, 15, 16, 18, 19, 20, 21, 22,
23
}; };
const unsigned char const unsigned char
parser::yystos_[] = parser::yystos_[] =
{ {
0, 3, 4, 8, 9, 19, 20, 21, 22, 23, 0, 3, 4, 8, 9, 20, 21, 22, 23, 25,
24, 5, 20, 21, 0, 6, 10, 20, 11, 12, 26, 5, 21, 22, 0, 6, 10, 21, 11, 12,
13, 14, 15, 16, 17, 20, 20, 3, 23, 3, 13, 14, 15, 16, 18, 21, 21, 3, 25, 3,
23, 23, 23, 23, 23 17, 24, 25, 25, 25, 25, 25, 17, 3, 3
}; };
const unsigned char const unsigned char
parser::yyr1_[] = parser::yyr1_[] =
{ {
0, 18, 19, 20, 20, 20, 20, 20, 21, 21, 0, 19, 20, 21, 21, 21, 21, 21, 22, 22,
21, 21, 21, 21, 21, 21, 22, 22, 23, 23, 22, 22, 22, 22, 22, 22, 22, 23, 23, 24,
24, 24 24, 24, 25, 25, 26, 26
}; };
const unsigned char const unsigned char
parser::yyr2_[] = parser::yyr2_[] =
{ {
0, 2, 1, 2, 3, 3, 3, 1, 1, 3, 0, 2, 1, 2, 3, 3, 3, 1, 1, 3,
3, 3, 3, 3, 3, 2, 1, 3, 1, 1, 3, 3, 3, 3, 3, 3, 2, 1, 3, 3,
1, 2 2, 2, 1, 1, 1, 2
}; };
@ -1231,17 +1307,17 @@ namespace yy {
{ {
"$end", "error", "$undefined", "WORD", "QUOTED", "QUALIFIERS", "AND", "$end", "error", "$undefined", "WORD", "QUOTED", "QUALIFIERS", "AND",
"UCONCAT", "'('", "'-'", "OR", "EQUALS", "CONTAINS", "SMALLEREQ", "UCONCAT", "'('", "'-'", "OR", "EQUALS", "CONTAINS", "SMALLEREQ",
"SMALLER", "GREATEREQ", "GREATER", "')'", "$accept", "topquery", "query", "SMALLER", "GREATEREQ", "GREATER", "RANGE", "')'", "$accept", "topquery",
"fieldexpr", "complexfieldname", "term", "qualquote", YY_NULLPTR "query", "fieldexpr", "complexfieldname", "range", "term", "qualquote", YY_NULLPTR
}; };
#if YYDEBUG #if YYDEBUG
const unsigned char const unsigned short int
parser::yyrline_[] = parser::yyrline_[] =
{ {
0, 71, 71, 84, 95, 106, 117, 123, 136, 141, 0, 73, 73, 86, 97, 108, 119, 125, 138, 143,
149, 157, 165, 173, 181, 189, 199, 205, 214, 220, 151, 159, 167, 175, 183, 191, 199, 209, 215, 224,
226, 232 232, 239, 248, 254, 260, 266
}; };
// Print the state stack on the debug stream. // Print the state stack on the debug stream.
@ -1286,7 +1362,7 @@ namespace yy {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
8, 17, 2, 2, 2, 9, 2, 2, 2, 2, 8, 18, 2, 2, 2, 9, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@ -1308,9 +1384,10 @@ namespace yy {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 1, 2, 3, 4, 2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
5, 6, 7, 10, 11, 12, 13, 14, 15, 16 5, 6, 7, 10, 11, 12, 13, 14, 15, 16,
17
}; };
const unsigned int user_token_number_max_ = 269; const unsigned int user_token_number_max_ = 270;
const token_number_type undef_token_ = 2; const token_number_type undef_token_ = 2;
if (static_cast<int>(t) <= yyeof_) if (static_cast<int>(t) <= yyeof_)
@ -1323,8 +1400,8 @@ namespace yy {
} // yy } // yy
#line 1327 "y.tab.c" // lalr1.cc:1155 #line 1404 "y.tab.c" // lalr1.cc:1167
#line 244 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:1156 #line 278 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:1168
#include <ctype.h> #include <ctype.h>
@ -1473,7 +1550,7 @@ int yylex(yy::parser::semantic_type *yylval, yy::parser::location_type *,
return c; return c;
} }
// field-term relations // field-term relations, and ranges
switch (c) { switch (c) {
case '=': return yy::parser::token::EQUALS; case '=': return yy::parser::token::EQUALS;
case ':': return yy::parser::token::CONTAINS; case ':': return yy::parser::token::CONTAINS;
@ -1486,6 +1563,15 @@ int yylex(yy::parser::semantic_type *yylval, yy::parser::location_type *,
return yy::parser::token::SMALLER; return yy::parser::token::SMALLER;
} }
} }
case '.': {
int c1 = d->GETCHAR();
if (c1 == '.') {
return yy::parser::token::RANGE;
} else {
d->UNGETCHAR(c1);
break;
}
}
case '>': { case '>': {
int c1 = d->GETCHAR(); int c1 = d->GETCHAR();
if (c1 == '=') { if (c1 == '=') {
@ -1514,6 +1600,16 @@ int yylex(yy::parser::semantic_type *yylval, yy::parser::location_type *,
//cerr << "Word broken by special char" << endl; //cerr << "Word broken by special char" << endl;
d->UNGETCHAR(c); d->UNGETCHAR(c);
break; break;
} else if (c == '.') {
int c1 = d->GETCHAR();
if (c1 == '.') {
d->UNGETCHAR(c1);
d->UNGETCHAR(c);
break;
} else {
d->UNGETCHAR(c1);
word->push_back(c);
}
} else if (c == 0) { } else if (c == 0) {
//cerr << "Word broken by EOF" << endl; //cerr << "Word broken by EOF" << endl;
break; break;

View File

@ -1,8 +1,8 @@
// A Bison parser, made by GNU Bison 3.0.2. // A Bison parser, made by GNU Bison 3.0.4.
// Skeleton interface for Bison LALR(1) parsers in C++ // Skeleton interface for Bison LALR(1) parsers in C++
// Copyright (C) 2002-2013 Free Software Foundation, Inc. // Copyright (C) 2002-2015 Free Software Foundation, Inc.
// This program is free software: you can redistribute it and/or modify // This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by // it under the terms of the GNU General Public License as published by
@ -41,10 +41,11 @@
# define YY_YY_Y_TAB_H_INCLUDED # define YY_YY_Y_TAB_H_INCLUDED
# include <vector> # include <cstdlib> // std::abort
# include <iostream> # include <iostream>
# include <stdexcept> # include <stdexcept>
# include <string> # include <string>
# include <vector>
# include "stack.hh" # include "stack.hh"
# include "location.hh" # include "location.hh"
@ -109,7 +110,7 @@
namespace yy { namespace yy {
#line 113 "y.tab.h" // lalr1.cc:372 #line 114 "y.tab.h" // lalr1.cc:377
@ -123,13 +124,14 @@ namespace yy {
/// Symbol semantic values. /// Symbol semantic values.
union semantic_type union semantic_type
{ {
#line 46 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:372 #line 46 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:377
std::string *str; std::string *str;
Rcl::SearchDataClauseRange *rg;
Rcl::SearchDataClauseSimple *cl; Rcl::SearchDataClauseSimple *cl;
Rcl::SearchData *sd; Rcl::SearchData *sd;
#line 133 "y.tab.h" // lalr1.cc:372 #line 135 "y.tab.h" // lalr1.cc:377
}; };
#else #else
typedef YYSTYPE semantic_type; typedef YYSTYPE semantic_type;
@ -160,16 +162,20 @@ namespace yy {
SMALLEREQ = 266, SMALLEREQ = 266,
SMALLER = 267, SMALLER = 267,
GREATEREQ = 268, GREATEREQ = 268,
GREATER = 269 GREATER = 269,
RANGE = 270
}; };
}; };
/// (External) token type, as returned by yylex. /// (External) token type, as returned by yylex.
typedef token::yytokentype token_type; typedef token::yytokentype token_type;
/// Internal symbol number. /// Symbol type: an internal symbol number.
typedef int symbol_number_type; typedef int symbol_number_type;
/// The symbol type number to denote an empty symbol.
enum { empty_symbol = -2 };
/// Internal symbol number for tokens (subsumed by symbol_number_type). /// Internal symbol number for tokens (subsumed by symbol_number_type).
typedef unsigned char token_number_type; typedef unsigned char token_number_type;
@ -200,8 +206,15 @@ namespace yy {
const semantic_type& v, const semantic_type& v,
const location_type& l); const location_type& l);
/// Destroy the symbol.
~basic_symbol (); ~basic_symbol ();
/// Destroy contents, and record that is empty.
void clear ();
/// Whether empty.
bool empty () const;
/// Destructive move, \a s is emptied into this. /// Destructive move, \a s is emptied into this.
void move (basic_symbol& s); void move (basic_symbol& s);
@ -231,21 +244,23 @@ namespace yy {
/// Constructor from (external) token numbers. /// Constructor from (external) token numbers.
by_type (kind_type t); by_type (kind_type t);
/// Record that this symbol is empty.
void clear ();
/// Steal the symbol type from \a that. /// Steal the symbol type from \a that.
void move (by_type& that); void move (by_type& that);
/// The (internal) type number (corresponding to \a type). /// The (internal) type number (corresponding to \a type).
/// -1 when this symbol is empty. /// \a empty when empty.
symbol_number_type type_get () const; symbol_number_type type_get () const;
/// The token. /// The token.
token_type token () const; token_type token () const;
enum { empty = 0 };
/// The symbol type. /// The symbol type.
/// -1 when this symbol is empty. /// \a empty_symbol when empty.
token_number_type type; /// An int, not token_number_type, to be able to store empty_symbol.
int type;
}; };
/// "External" symbols: returned by the scanner. /// "External" symbols: returned by the scanner.
@ -292,9 +307,9 @@ namespace yy {
/// Generate an error message. /// Generate an error message.
/// \param yystate the state where the error occurred. /// \param yystate the state where the error occurred.
/// \param yytoken the lookahead token type, or yyempty_. /// \param yyla the lookahead token.
virtual std::string yysyntax_error_ (state_type yystate, virtual std::string yysyntax_error_ (state_type yystate,
symbol_number_type yytoken) const; const symbol_type& yyla) const;
/// Compute post-reduction state. /// Compute post-reduction state.
/// \param yystate the current state /// \param yystate the current state
@ -357,7 +372,7 @@ namespace yy {
static const char* const yytname_[]; static const char* const yytname_[];
#if YYDEBUG #if YYDEBUG
// YYRLINE[YYN] -- Source line where rule number YYN was defined. // YYRLINE[YYN] -- Source line where rule number YYN was defined.
static const unsigned char yyrline_[]; static const unsigned short int yyrline_[];
/// Report on the debug stream that the rule \a r is going to be reduced. /// Report on the debug stream that the rule \a r is going to be reduced.
virtual void yy_reduce_print_ (int r); virtual void yy_reduce_print_ (int r);
/// Print the state stack on the debug stream. /// Print the state stack on the debug stream.
@ -397,16 +412,21 @@ namespace yy {
/// Copy constructor. /// Copy constructor.
by_state (const by_state& other); by_state (const by_state& other);
/// Record that this symbol is empty.
void clear ();
/// Steal the symbol type from \a that. /// Steal the symbol type from \a that.
void move (by_state& that); void move (by_state& that);
/// The (internal) type number (corresponding to \a state). /// The (internal) type number (corresponding to \a state).
/// "empty" when empty. /// \a empty_symbol when empty.
symbol_number_type type_get () const; symbol_number_type type_get () const;
enum { empty = 0 }; /// The state number used to denote an empty symbol.
enum { empty_state = -1 };
/// The state. /// The state.
/// \a empty when empty.
state_type state; state_type state;
}; };
@ -447,17 +467,16 @@ namespace yy {
/// Pop \a n symbols the three stacks. /// Pop \a n symbols the three stacks.
void yypop_ (unsigned int n = 1); void yypop_ (unsigned int n = 1);
// Constants. /// Constants.
enum enum
{ {
yyeof_ = 0, yyeof_ = 0,
yylast_ = 59, ///< Last index in yytable_. yylast_ = 60, ///< Last index in yytable_.
yynnts_ = 7, ///< Number of nonterminal symbols. yynnts_ = 8, ///< Number of nonterminal symbols.
yyempty_ = -2,
yyfinal_ = 14, ///< Termination state number. yyfinal_ = 14, ///< Termination state number.
yyterror_ = 1, yyterror_ = 1,
yyerrcode_ = 256, yyerrcode_ = 256,
yyntokens_ = 18 ///< Number of tokens. yyntokens_ = 19 ///< Number of tokens.
}; };
@ -468,7 +487,7 @@ namespace yy {
} // yy } // yy
#line 472 "y.tab.h" // lalr1.cc:372 #line 491 "y.tab.h" // lalr1.cc:377

View File

@ -13,7 +13,7 @@
using namespace std; using namespace std;
// #define LOG_PARSER //#define LOG_PARSER
#ifdef LOG_PARSER #ifdef LOG_PARSER
#define LOGP(X) {cerr << X;} #define LOGP(X) {cerr << X;}
#else #else
@ -45,6 +45,7 @@ static void addSubQuery(WasaParserDriver *d,
%union { %union {
std::string *str; std::string *str;
Rcl::SearchDataClauseRange *rg;
Rcl::SearchDataClauseSimple *cl; Rcl::SearchDataClauseSimple *cl;
Rcl::SearchData *sd; Rcl::SearchData *sd;
} }
@ -52,6 +53,7 @@ static void addSubQuery(WasaParserDriver *d,
%type <cl> qualquote %type <cl> qualquote
%type <cl> fieldexpr %type <cl> fieldexpr
%type <rg> range
%type <cl> term %type <cl> term
%type <sd> query %type <sd> query
%type <str> complexfieldname %type <str> complexfieldname
@ -64,7 +66,7 @@ static void addSubQuery(WasaParserDriver *d,
%left AND UCONCAT '(' '-' %left AND UCONCAT '(' '-'
%left OR %left OR
%token EQUALS CONTAINS SMALLEREQ SMALLER GREATEREQ GREATER %token EQUALS CONTAINS SMALLEREQ SMALLER GREATEREQ GREATER RANGE
%% %%
@ -154,9 +156,17 @@ fieldexpr: term
$$ = $3; $$ = $3;
delete $1; delete $1;
} }
| complexfieldname CONTAINS range
{
LOGP("fe: " << *$1 << " : " << $3->gettext() << endl);
$3->setfield(*$1);
$3->setrel(Rcl::SearchDataClause::REL_CONTAINS);
$$ = $3;
delete $1;
}
| complexfieldname SMALLER term | complexfieldname SMALLER term
{ {
LOGP(cerr << "fe: " << *$1 << " < " << $3->gettext() << endl); LOGP("fe: " << *$1 << " < " << $3->gettext() << endl);
$3->setfield(*$1); $3->setfield(*$1);
$3->setrel(Rcl::SearchDataClause::REL_LT); $3->setrel(Rcl::SearchDataClause::REL_LT);
$$ = $3; $$ = $3;
@ -210,6 +220,30 @@ complexfieldname CONTAINS WORD
delete $3; delete $3;
} }
range:
WORD RANGE WORD
{
LOGP("Range: " << *$1 << string(" .. ") << *$3 << endl);
$$ = new Rcl::SearchDataClauseRange(*$1, *$3);
delete $1;
delete $3;
}
|
RANGE WORD
{
LOGP("Range: " << "" << string(" .. ") << *$2 << endl);
$$ = new Rcl::SearchDataClauseRange("", *$2);
delete $2;
}
|
WORD RANGE
{
LOGP("Range: " << *$1 << string(" .. ") << "" << endl);
$$ = new Rcl::SearchDataClauseRange(*$1, "");
delete $1;
}
;
term: term:
WORD WORD
{ {
@ -389,7 +423,7 @@ int yylex(yy::parser::semantic_type *yylval, yy::parser::location_type *,
return c; return c;
} }
// field-term relations // field-term relations, and ranges
switch (c) { switch (c) {
case '=': return yy::parser::token::EQUALS; case '=': return yy::parser::token::EQUALS;
case ':': return yy::parser::token::CONTAINS; case ':': return yy::parser::token::CONTAINS;
@ -402,6 +436,15 @@ int yylex(yy::parser::semantic_type *yylval, yy::parser::location_type *,
return yy::parser::token::SMALLER; return yy::parser::token::SMALLER;
} }
} }
case '.': {
int c1 = d->GETCHAR();
if (c1 == '.') {
return yy::parser::token::RANGE;
} else {
d->UNGETCHAR(c1);
break;
}
}
case '>': { case '>': {
int c1 = d->GETCHAR(); int c1 = d->GETCHAR();
if (c1 == '=') { if (c1 == '=') {
@ -430,6 +473,16 @@ int yylex(yy::parser::semantic_type *yylval, yy::parser::location_type *,
//cerr << "Word broken by special char" << endl; //cerr << "Word broken by special char" << endl;
d->UNGETCHAR(c); d->UNGETCHAR(c);
break; break;
} else if (c == '.') {
int c1 = d->GETCHAR();
if (c1 == '.') {
d->UNGETCHAR(c1);
d->UNGETCHAR(c);
break;
} else {
d->UNGETCHAR(c1);
word->push_back(c);
}
} else if (c == 0) { } else if (c == 0) {
//cerr << "Word broken by EOF" << endl; //cerr << "Word broken by EOF" << endl;
break; break;

View File

@ -182,7 +182,8 @@ bool WasaParserDriver::addClause(SearchData *sd,
delete cl; delete cl;
return false; return false;
} }
LOGDEB("addClause:: date span: " << (di.y1) << "-" << (di.m1) << "-" << (di.d1) << "/" << (di.y2) << "-" << (di.m2) << "-" << (di.d2) << "\n" ); LOGDEB("addClause:: date span: " << di.y1 << "-" << di.m1 << "-"
<< di.d1 << "/" << di.y2 << "-" << di.m2 << "-" << di.d2 << "\n");
m_haveDates = true; m_haveDates = true;
m_dates = di; m_dates = di;
delete cl; delete cl;

View File

@ -50,6 +50,7 @@ using namespace std;
#include "searchdata.h" #include "searchdata.h"
#include "rclquery.h" #include "rclquery.h"
#include "rclquery_p.h" #include "rclquery_p.h"
#include "rclvalues.h"
#include "md5ut.h" #include "md5ut.h"
#include "rclversion.h" #include "rclversion.h"
#include "cancelcheck.h" #include "cancelcheck.h"
@ -1489,7 +1490,8 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
for (vector<string>::iterator it = vpath.begin(); for (vector<string>::iterator it = vpath.begin();
it != vpath.end(); it++){ it != vpath.end(); it++){
if (it->length() > 230) { if (it->length() > 230) {
// Just truncate it. May still be useful because of wildcards // Just truncate it. May still be useful because
// of wildcards
*it = it->substr(0, 230); *it = it->substr(0, 230);
} }
newdocument.add_posting(wrap_prefix(pathelt_prefix) + *it, newdocument.add_posting(wrap_prefix(pathelt_prefix) + *it,
@ -1504,25 +1506,35 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
// //
// The order has no importance, and we set a position gap of 100 // The order has no importance, and we set a position gap of 100
// between fields to avoid false proximity matches. // between fields to avoid false proximity matches.
map<string, string>::iterator meta_it; for (const auto& entry: doc.meta) {
for (meta_it = doc.meta.begin(); meta_it != doc.meta.end(); meta_it++) { if (entry.second.empty()) {
if (!meta_it->second.empty()) {
const FieldTraits *ftp;
// We don't test for an empty prefix here. Some fields are part
// of the internal conf with an empty prefix (ie: abstract).
if (!fieldToTraits(meta_it->first, &ftp)) {
LOGDEB0("Db::add: no prefix for field [" <<
meta_it->first << "], no indexing\n");
continue; continue;
} }
LOGDEB0("Db::add: field [" << meta_it->first << "] pfx [" << const FieldTraits *ftp{nullptr};
ftp->pfx << "] inc " << ftp->wdfinc << ": [" << fieldToTraits(entry.first, &ftp);
meta_it->second << "]\n"); if (ftp && ftp->valueslot) {
splitter.setTraits(*ftp); LOGDEB("Adding value: for field " << entry.first << " slot "
if (!splitter.text_to_words(meta_it->second)) { << ftp->valueslot << endl);
LOGDEB("Db::addOrUpdate: split failed for " << add_field_value(newdocument, *ftp, entry.second);
meta_it->first << "\n");
} }
// There was an old comment here about not testing for
// empty prefix, and we indeed did not test. I don't think
// that it makes sense any more (and was in disagreement
// with the LOG message. Really now: no prefix: no
// indexing.
if (ftp && !ftp->pfx.empty()) {
LOGDEB0("Db::add: field [" << entry.first << "] pfx [" <<
ftp->pfx << "] inc " << ftp->wdfinc << ": [" <<
entry.second << "]\n");
splitter.setTraits(*ftp);
if (!splitter.text_to_words(entry.second)) {
LOGDEB("Db::addOrUpdate: split failed for " <<
entry.first << "\n");
}
} else {
LOGDEB0("Db::add: no prefix for field [" <<
entry.first << "], no indexing\n");
} }
} }
@ -1578,8 +1590,8 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
if (doc.getmeta(Doc::keyfn, &utf8fn) && !utf8fn.empty()) { if (doc.getmeta(Doc::keyfn, &utf8fn) && !utf8fn.empty()) {
string fn; string fn;
if (unacmaybefold(utf8fn, fn, "UTF-8", UNACOP_UNACFOLD)) { if (unacmaybefold(utf8fn, fn, "UTF-8", UNACOP_UNACFOLD)) {
// We should truncate after extracting the extension, but this is // We should truncate after extracting the extension,
// a pathological case anyway // but this is a pathological case anyway
if (fn.size() > 230) if (fn.size() > 230)
utf8truncate(fn, 230); utf8truncate(fn, 230);
string::size_type pos = fn.rfind('.'); string::size_type pos = fn.rfind('.');
@ -1587,7 +1599,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
newdocument.add_boolean_term(wrap_prefix(fileext_prefix) + newdocument.add_boolean_term(wrap_prefix(fileext_prefix) +
fn.substr(pos + 1)); fn.substr(pos + 1));
} }
newdocument.add_term(wrap_prefix(unsplitfilename_prefix) + fn, 0); newdocument.add_term(wrap_prefix(unsplitfilename_prefix) + fn,0);
} }
} }

View File

@ -197,7 +197,7 @@ class Db::Native {
std::string rawtextMetaKey(Xapian::docid did) { std::string rawtextMetaKey(Xapian::docid did) {
// Xapian's Olly Betts avises to use a key which will // Xapian's Olly Betts avises to use a key which will
// sort the same as the docid (which we do), and to // sort the same as the docid (which we do), and to
// use Xapian's pack_uint_preserving_sort() which is // use Xapian's pack.h:pack_uint_preserving_sort() which is
// efficient but hard to read. I'd wager that this // efficient but hard to read. I'd wager that this
// does not make much of a difference. 10 ascii bytes // does not make much of a difference. 10 ascii bytes
// gives us 10 billion docs, which is enough (says I). // gives us 10 billion docs, which is enough (says I).

87
src/rcldb/rclvalues.cpp Normal file
View File

@ -0,0 +1,87 @@
/* Copyright (C) 2004-2018 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "autoconfig.h"
#include <string>
#include "xapian.h"
#include "rclconfig.h"
#include "smallut.h"
#include "log.h"
using namespace std;
namespace Rcl {
void add_field_value(Xapian::Document& xdoc, const FieldTraits& ft,
const string& data)
{
string ndata{data};
switch (ft.valuetype) {
case FieldTraits::STR:
break;
case FieldTraits::INT:
{
int len = ft.valuelen ? ft.valuelen : 10;
leftzeropad(ndata, len);
}
}
LOGDEB0("Rcl::add_field_value: slot " << ft.valueslot << " [" <<
ndata << "]\n");
xdoc.add_value(ft.valueslot, ndata);
}
string convert_field_value(const FieldTraits& ft,
const string& data)
{
string ndata(data);
switch (ft.valuetype) {
case FieldTraits::STR:
break;
case FieldTraits::INT:
{
if (ndata.empty())
break;
// Apply suffixes
char c = ndata.back();
string zeroes;
switch(c) {
case 'k':case 'K': zeroes = "000";break;
case 'm':case 'M': zeroes = "000000";break;
case 'g':case 'G': zeroes = "000000000";break;
case 't':case 'T': zeroes = "000000000000";break;
default: break;
}
if (!zeroes.empty()) {
ndata.pop_back();
ndata += zeroes;
}
int len = ft.valuelen ? ft.valuelen : 10;
leftzeropad(ndata, len);
}
}
return ndata;
}
}

29
src/rcldb/rclvalues.h Normal file
View File

@ -0,0 +1,29 @@
#ifndef _RCLVALUES_H_INCLUDED_
#define _RCLVALUES_H_INCLUDED_
/* Copyright (C) 2004-2018 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include <string>
namespace Rcl {
extern void add_field_value(Xapian::Document& xdoc, const FieldTraits& ft,
const std::string& data);
extern std::string convert_field_value(const FieldTraits& ft,
const std::string& data);
}
#endif /* _RCLVALUES_H_INCLUDED_ */

View File

@ -348,6 +348,14 @@ void SearchDataClausePath::dump(ostream& o) const
o << "[" << m_text << "]"; o << "[" << m_text << "]";
} }
void SearchDataClauseRange::dump(ostream& o) const
{
o << "ClauseRange: ";
if (m_exclude)
o << " - ";
o << "[" << gettext() << "]";
}
void SearchDataClauseDist::dump(ostream& o) const void SearchDataClauseDist::dump(ostream& o) const
{ {
if (m_tp == SCLT_NEAR) if (m_tp == SCLT_NEAR)

View File

@ -41,9 +41,8 @@ namespace Rcl {
/** Search clause types */ /** Search clause types */
enum SClType { enum SClType {
SCLT_AND, SCLT_AND, SCLT_OR, SCLT_FILENAME, SCLT_PHRASE, SCLT_NEAR,
SCLT_OR, SCLT_FILENAME, SCLT_PHRASE, SCLT_NEAR, SCLT_PATH, SCLT_PATH, SCLT_RANGE, SCLT_SUB,
SCLT_SUB
}; };
class SearchDataClause; class SearchDataClause;
@ -79,15 +78,13 @@ class SearchDataClauseDist;
class SearchData { class SearchData {
public: public:
SearchData(SClType tp, const string& stemlang) SearchData(SClType tp, const string& stemlang)
: m_tp(tp), m_stemlang(stemlang) : m_tp(tp), m_stemlang(stemlang) {
{
if (m_tp != SCLT_OR && m_tp != SCLT_AND) if (m_tp != SCLT_OR && m_tp != SCLT_AND)
m_tp = SCLT_OR; m_tp = SCLT_OR;
commoninit(); commoninit();
} }
SearchData() SearchData()
: m_tp(SCLT_AND) : m_tp(SCLT_AND) {
{
commoninit(); commoninit();
} }
@ -145,8 +142,7 @@ public:
by the GUI */ by the GUI */
string asXML(); string asXML();
void setTp(SClType tp) void setTp(SClType tp) {
{
m_tp = tp; m_tp = tp;
} }
@ -154,8 +150,7 @@ public:
return m_tp; return m_tp;
} }
void setMaxExpand(int max) void setMaxExpand(int max) {
{
m_softmaxexpand = max; m_softmaxexpand = max;
} }
bool getAutoDiac() {return m_autodiacsens;} bool getAutoDiac() {return m_autodiacsens;}
@ -241,67 +236,54 @@ public:
SearchDataClause(SClType tp) SearchDataClause(SClType tp)
: m_tp(tp), m_parentSearch(0), m_haveWildCards(0), : m_tp(tp), m_parentSearch(0), m_haveWildCards(0),
m_modifiers(SDCM_NONE), m_weight(1.0), m_exclude(false), m_modifiers(SDCM_NONE), m_weight(1.0), m_exclude(false),
m_rel(REL_CONTAINS) m_rel(REL_CONTAINS) {}
{}
virtual ~SearchDataClause() {} virtual ~SearchDataClause() {}
virtual bool toNativeQuery(Rcl::Db &db, void *) = 0; virtual bool toNativeQuery(Rcl::Db &db, void *) = 0;
bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;} bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;}
virtual std::string getReason() const {return m_reason;} virtual std::string getReason() const {return m_reason;}
virtual void getTerms(HighlightData&) const {} virtual void getTerms(HighlightData&) const {}
SClType getTp() const SClType getTp() const {
{
return m_tp; return m_tp;
} }
void setTp(SClType tp) { void setTp(SClType tp) {
m_tp = tp; m_tp = tp;
} }
void setParent(SearchData *p) void setParent(SearchData *p) {
{
m_parentSearch = p; m_parentSearch = p;
} }
string getStemLang() string getStemLang() {
{
return (m_modifiers & SDCM_NOSTEMMING) || m_parentSearch == 0 ? return (m_modifiers & SDCM_NOSTEMMING) || m_parentSearch == 0 ?
cstr_null : m_parentSearch->getStemLang(); cstr_null : m_parentSearch->getStemLang();
} }
bool getAutoDiac() bool getAutoDiac() {
{
return m_parentSearch ? m_parentSearch->getAutoDiac() : false; return m_parentSearch ? m_parentSearch->getAutoDiac() : false;
} }
bool getAutoCase() bool getAutoCase() {
{
return m_parentSearch ? m_parentSearch->getAutoCase() : true; return m_parentSearch ? m_parentSearch->getAutoCase() : true;
} }
int getMaxExp() int getMaxExp() {
{
return m_parentSearch ? m_parentSearch->getMaxExp() : 10000; return m_parentSearch ? m_parentSearch->getMaxExp() : 10000;
} }
size_t getMaxCl() size_t getMaxCl() {
{
return m_parentSearch ? m_parentSearch->getMaxCl() : 100000; return m_parentSearch ? m_parentSearch->getMaxCl() : 100000;
} }
int getSoftMaxExp() int getSoftMaxExp() {
{
return m_parentSearch ? m_parentSearch->getSoftMaxExp() : -1; return m_parentSearch ? m_parentSearch->getSoftMaxExp() : -1;
} }
virtual void addModifier(Modifier mod) virtual void addModifier(Modifier mod) {
{
m_modifiers = m_modifiers | mod; m_modifiers = m_modifiers | mod;
} }
virtual unsigned int getmodifiers() { virtual unsigned int getmodifiers() {
return m_modifiers; return m_modifiers;
} }
virtual void setWeight(float w) virtual void setWeight(float w) {
{
m_weight = w; m_weight = w;
} }
virtual bool getexclude() const virtual bool getexclude() const {
{
return m_exclude; return m_exclude;
} }
virtual void setexclude(bool onoff) virtual void setexclude(bool onoff) {
{
m_exclude = onoff; m_exclude = onoff;
} }
virtual void setrel(Relation rel) { virtual void setrel(Relation rel) {
@ -322,15 +304,6 @@ protected:
float m_weight; float m_weight;
bool m_exclude; bool m_exclude;
Relation m_rel; Relation m_rel;
private:
SearchDataClause(const SearchDataClause&)
{
}
SearchDataClause& operator=(const SearchDataClause&)
{
return *this;
}
}; };
/** /**
@ -342,35 +315,28 @@ class SearchDataClauseSimple : public SearchDataClause {
public: public:
SearchDataClauseSimple(SClType tp, const std::string& txt, SearchDataClauseSimple(SClType tp, const std::string& txt,
const std::string& fld = std::string()) const std::string& fld = std::string())
: SearchDataClause(tp), m_text(txt), m_field(fld), m_curcl(0) : SearchDataClause(tp), m_text(txt), m_field(fld), m_curcl(0) {
{
m_haveWildCards = m_haveWildCards =
(txt.find_first_of(cstr_minwilds) != std::string::npos); (txt.find_first_of(cstr_minwilds) != std::string::npos);
} }
SearchDataClauseSimple(const std::string& txt, SClType tp) SearchDataClauseSimple(const std::string& txt, SClType tp)
: SearchDataClause(tp), m_text(txt), m_curcl(0) : SearchDataClause(tp), m_text(txt), m_curcl(0) {
{
m_haveWildCards = m_haveWildCards =
(txt.find_first_of(cstr_minwilds) != std::string::npos); (txt.find_first_of(cstr_minwilds) != std::string::npos);
} }
virtual ~SearchDataClauseSimple() virtual ~SearchDataClauseSimple() {}
{
}
/** Translate to Xapian query */ /** Translate to Xapian query */
virtual bool toNativeQuery(Rcl::Db &, void *); virtual bool toNativeQuery(Rcl::Db &, void *);
virtual void getTerms(HighlightData& hldata) const virtual void getTerms(HighlightData& hldata) const {
{
hldata.append(m_hldata); hldata.append(m_hldata);
} }
virtual const std::string& gettext() virtual const std::string& gettext() const {
{
return m_text; return m_text;
} }
virtual const std::string& getfield() virtual const std::string& getfield() const {
{
return m_field; return m_field;
} }
virtual void setfield(const string& field) { virtual void setfield(const string& field) {
@ -384,6 +350,7 @@ protected:
HighlightData m_hldata; HighlightData m_hldata;
// Current count of Xapian clauses, to check against expansion limit // Current count of Xapian clauses, to check against expansion limit
size_t m_curcl; size_t m_curcl;
bool processUserString(Rcl::Db &db, const string &iq, bool processUserString(Rcl::Db &db, const string &iq,
std::string &ermsg, std::string &ermsg,
void* pq, int slack = 0, bool useNear = false); void* pq, int slack = 0, bool useNear = false);
@ -400,6 +367,32 @@ protected:
int mods, void *pq, bool useNear, int slack); int mods, void *pq, bool useNear, int slack);
}; };
class SearchDataClauseRange : public SearchDataClauseSimple {
public:
SearchDataClauseRange(const std::string& t1, const std::string& t2,
const std::string& fld = std::string())
: SearchDataClauseSimple(SCLT_RANGE, t1, fld), m_t2(t2) {}
// This is for 'upgrading' a clauseSimple with eq/gt/lt... rel to
// a range. Either of t1 or t2 or both can be set to the original
// text, which is why they are passed as separate parameters
SearchDataClauseRange(const SearchDataClauseSimple& cl,
const std::string& t1, const std::string& t2)
: SearchDataClauseSimple(cl) {
m_text = t1;
m_t2 = t2;
}
virtual ~SearchDataClauseRange() {}
virtual void dump(ostream& o) const;
virtual const std::string& gettext2() const {
return m_t2;
}
virtual bool toNativeQuery(Rcl::Db &db, void *);
protected:
std::string m_t2;
};
/** /**
* Filename search clause. This is special because term expansion is only * Filename search clause. This is special because term expansion is only
@ -412,15 +405,12 @@ protected:
class SearchDataClauseFilename : public SearchDataClauseSimple { class SearchDataClauseFilename : public SearchDataClauseSimple {
public: public:
SearchDataClauseFilename(const std::string& txt) SearchDataClauseFilename(const std::string& txt)
: SearchDataClauseSimple(txt, SCLT_FILENAME) : SearchDataClauseSimple(txt, SCLT_FILENAME) {
{
// File name searches don't count when looking for wild cards. // File name searches don't count when looking for wild cards.
m_haveWildCards = false; m_haveWildCards = false;
} }
virtual ~SearchDataClauseFilename() virtual ~SearchDataClauseFilename() {}
{
}
virtual bool toNativeQuery(Rcl::Db &, void *); virtual bool toNativeQuery(Rcl::Db &, void *);
virtual void dump(ostream& o) const; virtual void dump(ostream& o) const;
@ -450,15 +440,12 @@ public:
class SearchDataClausePath : public SearchDataClauseSimple { class SearchDataClausePath : public SearchDataClauseSimple {
public: public:
SearchDataClausePath(const std::string& txt, bool excl = false) SearchDataClausePath(const std::string& txt, bool excl = false)
: SearchDataClauseSimple(SCLT_PATH, txt, "dir") : SearchDataClauseSimple(SCLT_PATH, txt, "dir") {
{
m_exclude = excl; m_exclude = excl;
m_haveWildCards = false; m_haveWildCards = false;
} }
virtual ~SearchDataClausePath() virtual ~SearchDataClausePath() {}
{
}
virtual bool toNativeQuery(Rcl::Db &, void *); virtual bool toNativeQuery(Rcl::Db &, void *);
virtual void dump(ostream& o) const; virtual void dump(ostream& o) const;
@ -472,17 +459,12 @@ class SearchDataClauseDist : public SearchDataClauseSimple {
public: public:
SearchDataClauseDist(SClType tp, const std::string& txt, int slack, SearchDataClauseDist(SClType tp, const std::string& txt, int slack,
const std::string& fld = std::string()) const std::string& fld = std::string())
: SearchDataClauseSimple(tp, txt, fld), m_slack(slack) : SearchDataClauseSimple(tp, txt, fld), m_slack(slack) {}
{
}
virtual ~SearchDataClauseDist() virtual ~SearchDataClauseDist() {}
{
}
virtual bool toNativeQuery(Rcl::Db &, void *); virtual bool toNativeQuery(Rcl::Db &, void *);
virtual int getslack() const virtual int getslack() const {
{
return m_slack; return m_slack;
} }
virtual void setslack(int slack) { virtual void setslack(int slack) {
@ -497,19 +479,15 @@ private:
class SearchDataClauseSub : public SearchDataClause { class SearchDataClauseSub : public SearchDataClause {
public: public:
SearchDataClauseSub(std::shared_ptr<SearchData> sub) SearchDataClauseSub(std::shared_ptr<SearchData> sub)
: SearchDataClause(SCLT_SUB), m_sub(sub) : SearchDataClause(SCLT_SUB), m_sub(sub) {}
{ virtual bool toNativeQuery(Rcl::Db &db, void *p) {
}
virtual bool toNativeQuery(Rcl::Db &db, void *p)
{
bool ret = m_sub->toNativeQuery(db, p); bool ret = m_sub->toNativeQuery(db, p);
if (!ret) if (!ret)
m_reason = m_sub->getReason(); m_reason = m_sub->getReason();
return ret; return ret;
} }
virtual void getTerms(HighlightData& hldata) const virtual void getTerms(HighlightData& hldata) const {
{
m_sub.get()->getTerms(hldata); m_sub.get()->getTerms(hldata);
} }
virtual std::shared_ptr<SearchData> getSub() { virtual std::shared_ptr<SearchData> getSub() {

View File

@ -47,11 +47,10 @@ using namespace std;
#include "expansiondbs.h" #include "expansiondbs.h"
#include "base64.h" #include "base64.h"
#include "daterange.h" #include "daterange.h"
#include "rclvalues.h"
namespace Rcl { namespace Rcl {
typedef vector<SearchDataClause *>::iterator qlist_it_t;
static const int original_term_wqf_booster = 10; static const int original_term_wqf_booster = 10;
// Expand doc categories and mime type wild card expressions // Expand doc categories and mime type wild card expressions
@ -62,7 +61,7 @@ bool SearchData::expandFileTypes(Db &db, vector<string>& tps)
{ {
const RclConfig *cfg = db.getConf(); const RclConfig *cfg = db.getConf();
if (!cfg) { if (!cfg) {
LOGFATAL("Db::expandFileTypes: null configuration!!\n" ); LOGFATAL("Db::expandFileTypes: null configuration!!\n");
return false; return false;
} }
vector<string> exptps; vector<string> exptps;
@ -110,15 +109,16 @@ bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
string& reason, void *d) string& reason, void *d)
{ {
Xapian::Query xq; Xapian::Query xq;
for (qlist_it_t it = query.begin(); it != query.end(); it++) { for (auto& clausep : query) {
Xapian::Query nq; Xapian::Query nq;
if (!(*it)->toNativeQuery(db, &nq)) { if (!clausep->toNativeQuery(db, &nq)) {
LOGERR("SearchData::clausesToQuery: toNativeQuery failed: " << ((*it)->getReason()) << "\n" ); LOGERR("SearchData::clausesToQuery: toNativeQuery failed: "
reason += (*it)->getReason() + " "; << clausep->getReason() << "\n");
reason += clausep->getReason() + " ";
return false; return false;
} }
if (nq.empty()) { if (nq.empty()) {
LOGDEB("SearchData::clausesToQuery: skipping empty clause\n" ); LOGDEB("SearchData::clausesToQuery: skipping empty clause\n");
continue; continue;
} }
// If this structure is an AND list, must use AND_NOT for excl clauses. // If this structure is an AND list, must use AND_NOT for excl clauses.
@ -126,7 +126,7 @@ bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
// addClause()) // addClause())
Xapian::Query::op op; Xapian::Query::op op;
if (tp == SCLT_AND) { if (tp == SCLT_AND) {
if ((*it)->getexclude()) { if (clausep->getexclude()) {
op = Xapian::Query::OP_AND_NOT; op = Xapian::Query::OP_AND_NOT;
} else { } else {
op = Xapian::Query::OP_AND; op = Xapian::Query::OP_AND;
@ -143,7 +143,7 @@ bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
xq = Xapian::Query(op, xq, nq); xq = Xapian::Query(op, xq, nq);
} }
if (int(xq.get_length()) >= getMaxCl()) { if (int(xq.get_length()) >= getMaxCl()) {
LOGERR("" << (maxXapClauseMsg) << "\n" ); LOGERR("" << maxXapClauseMsg << "\n");
m_reason += maxXapClauseMsg; m_reason += maxXapClauseMsg;
if (!o_index_stripchars) if (!o_index_stripchars)
m_reason += maxXapClauseCaseDiacMsg; m_reason += maxXapClauseCaseDiacMsg;
@ -151,7 +151,7 @@ bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
} }
} }
LOGDEB0("SearchData::clausesToQuery: got " << (xq.get_length()) << " clauses\n" ); LOGDEB0("SearchData::clausesToQuery: got " << xq.get_length()<<" clauses\n");
if (xq.empty()) if (xq.empty())
xq = Xapian::Query::MatchAll; xq = Xapian::Query::MatchAll;
@ -162,7 +162,7 @@ bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
bool SearchData::toNativeQuery(Rcl::Db &db, void *d) bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
{ {
LOGDEB("SearchData::toNativeQuery: stemlang [" << (m_stemlang) << "]\n" ); LOGDEB("SearchData::toNativeQuery: stemlang [" << m_stemlang << "]\n");
m_reason.erase(); m_reason.erase();
db.getConf()->getConfParam("maxTermExpand", &m_maxexp); db.getConf()->getConfParam("maxTermExpand", &m_maxexp);
@ -174,7 +174,8 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
// Xapian query tree // Xapian query tree
Xapian::Query xq; Xapian::Query xq;
if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq)) { if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq)) {
LOGERR("SearchData::toNativeQuery: clausesToQuery failed. reason: " << (m_reason) << "\n" ); LOGERR("SearchData::toNativeQuery: clausesToQuery failed. reason: "
<< m_reason << "\n");
return false; return false;
} }
@ -183,7 +184,7 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
if (m_dates.y1 == 0 || m_dates.y2 == 0) { if (m_dates.y1 == 0 || m_dates.y2 == 0) {
int minyear = 1970, maxyear = 2100; int minyear = 1970, maxyear = 2100;
if (!db.maxYearSpan(&minyear, &maxyear)) { if (!db.maxYearSpan(&minyear, &maxyear)) {
LOGERR("Can't retrieve index min/max dates\n" ); LOGERR("Can't retrieve index min/max dates\n");
//whatever, go on. //whatever, go on.
} }
@ -198,16 +199,18 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
m_dates.d2 = 31; m_dates.d2 = 31;
} }
} }
LOGDEB("Db::toNativeQuery: date interval: " << (m_dates.y1) << "-" << (m_dates.m1) << "-" << (m_dates.d1) << "/" << (m_dates.y2) << "-" << (m_dates.m2) << "-" << (m_dates.d2) << "\n" ); LOGDEB("Db::toNativeQuery: date interval: " << m_dates.y1 <<
"-" << m_dates.m1 << "-" << m_dates.d1 << "/" <<
m_dates.y2 << "-" << m_dates.m2 << "-" << m_dates.d2 << "\n");
Xapian::Query dq = date_range_filter(m_dates.y1, m_dates.m1, m_dates.d1, Xapian::Query dq = date_range_filter(m_dates.y1, m_dates.m1, m_dates.d1,
m_dates.y2, m_dates.m2, m_dates.d2); m_dates.y2, m_dates.m2, m_dates.d2);
if (dq.empty()) { if (dq.empty()) {
LOGINFO("Db::toNativeQuery: date filter is empty\n" ); LOGINFO("Db::toNativeQuery: date filter is empty\n");
} }
// If no probabilistic query is provided then promote the daterange // If no probabilistic query is provided then promote the daterange
// filter to be THE query instead of filtering an empty query. // filter to be THE query instead of filtering an empty query.
if (xq.empty()) { if (xq.empty()) {
LOGINFO("Db::toNativeQuery: proba query is empty\n" ); LOGINFO("Db::toNativeQuery: proba query is empty\n");
xq = dq; xq = dq;
} else { } else {
xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, dq); xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, dq);
@ -239,7 +242,7 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
// If no probabilistic query is provided then promote the // If no probabilistic query is provided then promote the
// filter to be THE query instead of filtering an empty query. // filter to be THE query instead of filtering an empty query.
if (xq.empty()) { if (xq.empty()) {
LOGINFO("Db::toNativeQuery: proba query is empty\n" ); LOGINFO("Db::toNativeQuery: proba query is empty\n");
xq = sq; xq = sq;
} else { } else {
xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, sq); xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, sq);
@ -263,7 +266,7 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
for (vector<string>::iterator it = m_filetypes.begin(); for (vector<string>::iterator it = m_filetypes.begin();
it != m_filetypes.end(); it++) { it != m_filetypes.end(); it++) {
string term = wrap_prefix(mimetype_prefix) + *it; string term = wrap_prefix(mimetype_prefix) + *it;
LOGDEB0("Adding file type term: [" << (term) << "]\n" ); LOGDEB0("Adding file type term: [" << term << "]\n");
tq = tq.empty() ? Xapian::Query(term) : tq = tq.empty() ? Xapian::Query(term) :
Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term)); Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
} }
@ -278,7 +281,7 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
for (vector<string>::iterator it = m_nfiletypes.begin(); for (vector<string>::iterator it = m_nfiletypes.begin();
it != m_nfiletypes.end(); it++) { it != m_nfiletypes.end(); it++) {
string term = wrap_prefix(mimetype_prefix) + *it; string term = wrap_prefix(mimetype_prefix) + *it;
LOGDEB0("Adding negative file type term: [" << (term) << "]\n" ); LOGDEB0("Adding negative file type term: [" << term << "]\n");
tq = tq.empty() ? Xapian::Query(term) : tq = tq.empty() ? Xapian::Query(term) :
Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term)); Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
} }
@ -333,7 +336,8 @@ public:
if (m_lastpos < pos) if (m_lastpos < pos)
m_lastpos = pos; m_lastpos = pos;
bool noexpand = be ? m_ts->nostemexp() : true; bool noexpand = be ? m_ts->nostemexp() : true;
LOGDEB1("TermProcQ::takeword: pushing [" << (term) << "] pos " << (pos) << " noexp " << (noexpand) << "\n" ); LOGDEB1("TermProcQ::takeword: pushing [" << term << "] pos " <<
pos << " noexp " << noexpand << "\n");
if (m_terms[pos].size() < term.size()) { if (m_terms[pos].size() < term.size()) {
m_terms[pos] = term; m_terms[pos] = term;
m_nste[pos] = noexpand; m_nste[pos] = noexpand;
@ -577,13 +581,12 @@ static void prefix_vector(vector<string>& v, const string& prefix)
} }
} }
void SearchDataClauseSimple:: void SearchDataClauseSimple::processSimpleSpan(
processSimpleSpan(Rcl::Db &db, string& ermsg, Rcl::Db &db, string& ermsg, const string& span, int mods, void *pq)
const string& span,
int mods, void * pq)
{ {
vector<Xapian::Query>& pqueries(*(vector<Xapian::Query>*)pq); vector<Xapian::Query>& pqueries(*(vector<Xapian::Query>*)pq);
LOGDEB0("StringToXapianQ::processSimpleSpan: [" << (span) << "] mods 0x" << ((unsigned int)mods) << "\n" ); LOGDEB0("StringToXapianQ::processSimpleSpan: [" << span << "] mods 0x"
<< (unsigned int)mods << "\n");
vector<string> exp; vector<string> exp;
string sterm; // dumb version of user term string sterm; // dumb version of user term
@ -679,7 +682,7 @@ void SearchDataClauseSimple::processPhraseOrNear(Rcl::Db &db, string& ermsg,
vector<bool>::const_iterator nxit = splitData->nostemexps().begin(); vector<bool>::const_iterator nxit = splitData->nostemexps().begin();
for (vector<string>::const_iterator it = splitData->terms().begin(); for (vector<string>::const_iterator it = splitData->terms().begin();
it != splitData->terms().end(); it++, nxit++) { it != splitData->terms().end(); it++, nxit++) {
LOGDEB0("ProcessPhrase: processing [" << *it << "]\n" ); LOGDEB0("ProcessPhrase: processing [" << *it << "]\n");
// Adjust when we do stem expansion. Not if disabled by // Adjust when we do stem expansion. Not if disabled by
// caller, not inside phrases, and some versions of xapian // caller, not inside phrases, and some versions of xapian
// will accept only one OR clause inside NEAR. // will accept only one OR clause inside NEAR.
@ -695,7 +698,8 @@ void SearchDataClauseSimple::processPhraseOrNear(Rcl::Db &db, string& ermsg,
vector<string> exp; vector<string> exp;
if (!expandTerm(db, ermsg, lmods, *it, exp, sterm, prefix)) if (!expandTerm(db, ermsg, lmods, *it, exp, sterm, prefix))
return; return;
LOGDEB0("ProcessPhraseOrNear: exp size " << (exp.size()) << ", exp: " << (stringsToString(exp)) << "\n" ); LOGDEB0("ProcessPhraseOrNear: exp size " << exp.size() << ", exp: " <<
stringsToString(exp) << "\n");
// groups is used for highlighting, we don't want prefixes in there. // groups is used for highlighting, we don't want prefixes in there.
vector<string> noprefs; vector<string> noprefs;
for (vector<string>::const_iterator it = exp.begin(); for (vector<string>::const_iterator it = exp.begin();
@ -721,7 +725,8 @@ void SearchDataClauseSimple::processPhraseOrNear(Rcl::Db &db, string& ermsg,
// Generate an appropriate PHRASE/NEAR query with adjusted slack // Generate an appropriate PHRASE/NEAR query with adjusted slack
// For phrases, give a relevance boost like we do for original terms // For phrases, give a relevance boost like we do for original terms
LOGDEB2("PHRASE/NEAR: alltermcount " << (splitData->alltermcount()) << " lastpos " << (splitData->lastpos()) << "\n" ); LOGDEB2("PHRASE/NEAR: alltermcount " << splitData->alltermcount() <<
" lastpos " << splitData->lastpos() << "\n");
Xapian::Query xq(op, orqueries.begin(), orqueries.end(), Xapian::Query xq(op, orqueries.begin(), orqueries.end(),
splitData->lastpos() + 1 + slack); splitData->lastpos() + 1 + slack);
if (op == Xapian::Query::OP_PHRASE) if (op == Xapian::Query::OP_PHRASE)
@ -791,7 +796,8 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
vector<Xapian::Query> &pqueries(*(vector<Xapian::Query>*)pq); vector<Xapian::Query> &pqueries(*(vector<Xapian::Query>*)pq);
int mods = m_modifiers; int mods = m_modifiers;
LOGDEB("StringToXapianQ:pUS:: qstr [" << (iq) << "] fld [" << (m_field) << "] mods 0x" << (mods) << " slack " << (slack) << " near " << (useNear) << "\n" ); LOGDEB("StringToXapianQ:pUS:: qstr [" << iq << "] fld [" << m_field <<
"] mods 0x"<<mods<<" slack " << slack << " near " << useNear <<"\n");
ermsg.erase(); ermsg.erase();
m_curcl = 0; m_curcl = 0;
const StopList stops = db.getStopList(); const StopList stops = db.getStopList();
@ -811,7 +817,7 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
try { try {
for (vector<string>::iterator it = phrases.begin(); for (vector<string>::iterator it = phrases.begin();
it != phrases.end(); it++) { it != phrases.end(); it++) {
LOGDEB0("strToXapianQ: phrase/word: [" << *it << "]\n" ); LOGDEB0("strToXapianQ: phrase/word: [" << *it << "]\n");
// Anchoring modifiers // Anchoring modifiers
int amods = stringToMods(*it); int amods = stringToMods(*it);
int terminc = amods != 0 ? 1 : 0; int terminc = amods != 0 ? 1 : 0;
@ -849,7 +855,7 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
slack += tpq.lastpos() - int(tpq.terms().size()) + 1; slack += tpq.lastpos() - int(tpq.terms().size()) + 1;
LOGDEB0("strToXapianQ: termcount: " << (tpq.terms().size()) << "\n" ); LOGDEB0("strToXapianQ: termcount: " << tpq.terms().size() << "\n");
switch (tpq.terms().size() + terminc) { switch (tpq.terms().size() + terminc) {
case 0: case 0:
continue;// ?? continue;// ??
@ -884,7 +890,7 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
ermsg = "Caught unknown exception"; ermsg = "Caught unknown exception";
} }
if (!ermsg.empty()) { if (!ermsg.empty()) {
LOGERR("stringToXapianQueries: " << (ermsg) << "\n" ); LOGERR("stringToXapianQueries: " << ermsg << "\n");
return false; return false;
} }
return true; return true;
@ -893,7 +899,35 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
// Translate a simple OR or AND search clause. // Translate a simple OR or AND search clause.
bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p) bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)
{ {
LOGDEB("SearchDataClauseSimple::toNativeQuery: fld [" << (m_field) << "] val [" << (m_text) << "] stemlang [" << (getStemLang()) << "]\n" ); LOGDEB("SearchDataClauseSimple::toNativeQuery: fld [" << m_field <<
"] val [" << m_text << "] stemlang [" << getStemLang() << "]\n");
// Transform (in)equalities into a range query
switch (getrel()) {
case REL_EQUALS:
{
SearchDataClauseRange cl(*this, gettext(), gettext());
bool ret = cl.toNativeQuery(db, p);
m_reason = cl.getReason();
return ret;
}
case REL_LT: case REL_LTE:
{
SearchDataClauseRange cl(*this, "", gettext());
bool ret = cl.toNativeQuery(db, p);
m_reason = cl.getReason();
return ret;
}
case REL_GT: case REL_GTE:
{
SearchDataClauseRange cl(*this, gettext(), "");
bool ret = cl.toNativeQuery(db, p);
m_reason = cl.getReason();
return ret;
}
default:
break;
}
Xapian::Query *qp = (Xapian::Query *)p; Xapian::Query *qp = (Xapian::Query *)p;
*qp = Xapian::Query(); *qp = Xapian::Query();
@ -903,7 +937,7 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)
case SCLT_AND: op = Xapian::Query::OP_AND; break; case SCLT_AND: op = Xapian::Query::OP_AND; break;
case SCLT_OR: op = Xapian::Query::OP_OR; break; case SCLT_OR: op = Xapian::Query::OP_OR; break;
default: default:
LOGERR("SearchDataClauseSimple: bad m_tp " << (m_tp) << "\n" ); LOGERR("SearchDataClauseSimple: bad m_tp " << m_tp << "\n");
m_reason = "Internal error"; m_reason = "Internal error";
return false; return false;
} }
@ -912,7 +946,7 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)
if (!processUserString(db, m_text, m_reason, &pqueries)) if (!processUserString(db, m_text, m_reason, &pqueries))
return false; return false;
if (pqueries.empty()) { if (pqueries.empty()) {
LOGERR("SearchDataClauseSimple: resolved to null query\n" ); LOGERR("SearchDataClauseSimple: resolved to null query\n");
m_reason = string("Resolved to null query. Term too long ? : [" + m_reason = string("Resolved to null query. Term too long ? : [" +
m_text + string("]")); m_text + string("]"));
return false; return false;
@ -925,6 +959,58 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)
return true; return true;
} }
// Translate a range clause. This only works if a Xapian value slot
// was attributed to the field.
bool SearchDataClauseRange::toNativeQuery(Rcl::Db &db, void *p)
{
LOGDEB("SearchDataClauseRange::toNativeQuery: " << m_field <<
" :[" << m_text << ".." << m_t2 << "]\n");
Xapian::Query *qp = (Xapian::Query *)p;
*qp = Xapian::Query();
if (m_field.empty() || (m_text.empty() && m_t2.empty())) {
m_reason = "Range clause needs a field and a value";
return false;
}
// Get the value number for the field from the configuration
const FieldTraits *ftp;
if (!db.fieldToTraits(m_field, &ftp, true)) {
m_reason = string("field ") + m_field + " not found in configuration";
return false;
}
if (ftp->valueslot == 0) {
m_reason = string("No value slot specified in configuration for field ")
+ m_field;
return false;
}
LOGDEB("SearchDataClauseRange: value slot " << ftp->valueslot << endl);
// Build Xapian VALUE query.
string errstr;
try {
if (m_text.empty()) {
*qp = Xapian::Query(Xapian::Query::OP_VALUE_LE,
ftp->valueslot, convert_field_value(*ftp, m_t2));
} else if (m_t2.empty()) {
*qp = Xapian::Query(Xapian::Query::OP_VALUE_GE, ftp->valueslot,
convert_field_value(*ftp, m_text));
} else {
*qp = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, ftp->valueslot,
convert_field_value(*ftp, m_text),
convert_field_value(*ftp, m_t2));
}
}
XCATCHERROR(errstr);
if (!errstr.empty()) {
LOGERR("SearchDataClauseRange: range query creation failed for slot "<<
ftp->valueslot << endl);
m_reason = "Range query creation failed\n";
*qp = Xapian::Query();
return false;
}
return true;
}
// Translate a FILENAME search clause. This always comes // Translate a FILENAME search clause. This always comes
// from a "filename" search from the gui or recollq. A query language // from a "filename" search from the gui or recollq. A query language
// "filename:"-prefixed field will not go through here, but through // "filename:"-prefixed field will not go through here, but through
@ -1018,7 +1104,7 @@ bool SearchDataClausePath::toNativeQuery(Rcl::Db &db, void *p)
// Translate NEAR or PHRASE clause. // Translate NEAR or PHRASE clause.
bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p) bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p)
{ {
LOGDEB("SearchDataClauseDist::toNativeQuery\n" ); LOGDEB("SearchDataClauseDist::toNativeQuery\n");
Xapian::Query *qp = (Xapian::Query *)p; Xapian::Query *qp = (Xapian::Query *)p;
*qp = Xapian::Query(); *qp = Xapian::Query();
@ -1037,7 +1123,7 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p)
if (!processUserString(db, s, m_reason, &pqueries, m_slack, useNear)) if (!processUserString(db, s, m_reason, &pqueries, m_slack, useNear))
return false; return false;
if (pqueries.empty()) { if (pqueries.empty()) {
LOGERR("SearchDataClauseDist: resolved to null query\n" ); LOGERR("SearchDataClauseDist: resolved to null query\n");
m_reason = string("Resolved to null query. Term too long ? : [" + m_reason = string("Resolved to null query. Term too long ? : [" +
m_text + string("]")); m_text + string("]"));
return false; return false;

View File

@ -42,6 +42,7 @@ static string tpToString(SClType tp)
case SCLT_FILENAME: return "FN"; case SCLT_FILENAME: return "FN";
case SCLT_PHRASE: return "PH"; case SCLT_PHRASE: return "PH";
case SCLT_NEAR: return "NE"; case SCLT_NEAR: return "NE";
case SCLT_RANGE: return "RG";
case SCLT_SUB: return "SU"; // Unsupported actually case SCLT_SUB: return "SU"; // Unsupported actually
default: return "UN"; default: return "UN";
} }
@ -101,6 +102,15 @@ string SearchData::asXML()
endl; endl;
} }
os << "<T>" << base64_encode(cl->gettext()) << "</T>" << endl; os << "<T>" << base64_encode(cl->gettext()) << "</T>" << endl;
if (cl->getTp() == SCLT_RANGE) {
SearchDataClauseRange *clr =
dynamic_cast<SearchDataClauseRange*>(cl);
const string& t = clr->gettext2();
if (!t.empty()) {
os << "<T2>" << base64_encode(clr->gettext2()) <<
"</T2>" << endl;
}
}
if (cl->getTp() == SCLT_NEAR || cl->getTp() == SCLT_PHRASE) { if (cl->getTp() == SCLT_NEAR || cl->getTp() == SCLT_PHRASE) {
SearchDataClauseDist *cld = SearchDataClauseDist *cld =
dynamic_cast<SearchDataClauseDist*>(cl); dynamic_cast<SearchDataClauseDist*>(cl);

View File

@ -65,6 +65,20 @@ rclbes = XB ; noterms = 1
# *** USE XY for beginning your local prefixes *** ie: # *** USE XY for beginning your local prefixes *** ie:
# myfield = XYMYPREF # myfield = XYMYPREF
[values]
###########
## Fields which will be stored in Xapian values, authorizing range query
## processing.
# Entries are specified as 'fieldname = valueslot;[px=val1;py=val2...]'.
# Xapian value slots are 32 bits numbers. Numbers below 1000 are reserved
# by Recoll or Xapian. Numbers above are available for user configuration
# Values have types, which can be 'int' or 'string' at the moment. ints have
# an additional 'len' attributes, which specifies the padding size used for
# sorting (leading zeroes: all xapian sorting is text-based). 10 is fine
# for an unsigned 32 bits integer.
# myfield = 1001; type=int; len = 10
# mystrfield = 1002; type = string
[stored] [stored]
############################ ############################
# Some fields are stored in the document data record inside the index and # Some fields are stored in the document data record inside the index and