From 595e419d934c6a973aa36e5f2b13157f7e4b53e6 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Wed, 24 Jan 2018 09:43:20 +0100 Subject: [PATCH] Implemented range queries, based on storing fields in xapian values --- src/Makefile.am | 2 + src/common/rclconfig.cpp | 104 ++++++---- src/common/rclconfig.h | 6 +- src/qtgui/xmltosd.cpp | 8 +- src/query/location.hh | 33 ++-- src/query/position.hh | 16 +- src/query/stack.hh | 15 +- src/query/wasaparse.cpp | 366 +++++++++++++++++++++++------------- src/query/wasaparse.hpp | 67 ++++--- src/query/wasaparse.ypp | 61 +++++- src/query/wasaparseaux.cpp | 3 +- src/rcldb/rcldb.cpp | 54 +++--- src/rcldb/rcldb_p.h | 2 +- src/rcldb/rclvalues.cpp | 87 +++++++++ src/rcldb/rclvalues.h | 29 +++ src/rcldb/searchdata.cpp | 8 + src/rcldb/searchdata.h | 256 ++++++++++++------------- src/rcldb/searchdatatox.cpp | 162 ++++++++++++---- src/rcldb/searchdataxml.cpp | 10 + src/sampleconf/fields | 14 ++ 20 files changed, 875 insertions(+), 428 deletions(-) create mode 100644 src/rcldb/rclvalues.cpp create mode 100644 src/rcldb/rclvalues.h diff --git a/src/Makefile.am b/src/Makefile.am index 75128525..e337b4bf 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -165,6 +165,8 @@ rcldb/rclquery.cpp \ rcldb/rclquery.h \ rcldb/rclquery_p.h \ rcldb/rclterms.cpp \ +rcldb/rclvalues.cpp \ +rcldb/rclvalues.h \ rcldb/searchdata.cpp \ rcldb/searchdata.h \ rcldb/searchdatatox.cpp \ diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index 87651030..54f368ee 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -936,15 +936,15 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc) // Build a direct map avoiding all indirections for field to // prefix translation // Add direct prefixes from the [prefixes] section - vectortps = m_fields->getNames("prefixes"); - for (vector::const_iterator it = tps.begin(); - it != tps.end(); it++) { + vector tps = m_fields->getNames("prefixes"); + for (const auto& fieldname : tps) { string val; - m_fields->get(*it, val, "prefixes"); + m_fields->get(fieldname, val, "prefixes"); ConfSimple attrs; FieldTraits ft; + // fieldname = prefix ; attr1=val;attr2=val... if (!valueSplitAttributes(val, ft.pfx, attrs)) { - LOGERR("readFieldsConfig: bad config line for [" << *it << + LOGERR("readFieldsConfig: bad config line for [" << fieldname << "]: [" << val << "]\n"); return 0; } @@ -957,21 +957,67 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc) ft.pfxonly = stringToBool(tval); if (attrs.get("noterms", tval)) ft.noterms = stringToBool(tval); - m_fldtotraits[stringtolower(*it)] = ft; - LOGDEB2("readFieldsConfig: [" << *it << "] -> [" << ft.pfx << + m_fldtotraits[stringtolower(fieldname)] = ft; + LOGDEB2("readFieldsConfig: [" << fieldname << "] -> [" << ft.pfx << "] " << ft.wdfinc << " " << ft.boost << "\n"); } + // Values section + tps = m_fields->getNames("values"); + for (const auto& fieldname : tps) { + string canonic = stringtolower(fieldname); // canonic name + string val; + m_fields->get(fieldname, val, "values"); + ConfSimple attrs; + string svslot; + // fieldname = valueslot ; attr1=val;attr2=val... + if (!valueSplitAttributes(val, svslot, attrs)) { + LOGERR("readFieldsConfig: bad value line for [" << fieldname << + "]: [" << val << "]\n"); + return 0; + } + uint32_t valueslot = uint32_t(atoi(svslot.c_str())); + if (valueslot == 0) { + LOGERR("readFieldsConfig: found 0 value slot for [" << fieldname << + "]: [" << val << "]\n"); + continue; + } + + string tval; + FieldTraits::ValueType valuetype{FieldTraits::STR}; + if (attrs.get("type", tval)) { + if (tval == "string") { + valuetype = FieldTraits::STR; + } else if (tval == "int") { + valuetype = FieldTraits::INT; + } else { + LOGERR("readFieldsConfig: bad type for value for " << + fieldname << " : " << tval << endl); + return 0; + } + } + int valuelen{0}; + if (attrs.get("len", tval)) { + valuelen = atoi(tval.c_str()); + } + + // Find or insert traits entry + const auto pit = + m_fldtotraits.insert( + pair(canonic, FieldTraits())).first; + pit->second.valueslot = valueslot; + pit->second.valuetype = valuetype; + pit->second.valuelen = valuelen; + } + // Add prefixes for aliases and build alias-to-canonic map while // we're at it. Having the aliases in the prefix map avoids an // additional indirection at index time. tps = m_fields->getNames("aliases"); - for (vector::const_iterator it = tps.begin(); - it != tps.end(); it++){ - string canonic = stringtolower(*it); // canonic name + for (const auto& fieldname : tps) { + string canonic = stringtolower(fieldname); // canonic name FieldTraits ft; - map::const_iterator pit = - m_fldtotraits.find(canonic); + const auto pit = m_fldtotraits.find(canonic); if (pit != m_fldtotraits.end()) { ft = pit->second; } @@ -979,53 +1025,45 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc) m_fields->get(canonic, aliases, "aliases"); vector l; stringToStrings(aliases, l); - for (vector::const_iterator ait = l.begin(); - ait != l.end(); ait++) { + for (const auto& alias : l) { if (pit != m_fldtotraits.end()) - m_fldtotraits[stringtolower(*ait)] = ft; - m_aliastocanon[stringtolower(*ait)] = canonic; + m_fldtotraits[stringtolower(alias)] = ft; + m_aliastocanon[stringtolower(alias)] = canonic; } } // Query aliases map tps = m_fields->getNames("queryaliases"); - for (vector::const_iterator it = tps.begin(); - it != tps.end(); it++){ - string canonic = stringtolower(*it); // canonic name + for (const auto& entry: tps) { + string canonic = stringtolower(entry); // canonic name string aliases; m_fields->get(canonic, aliases, "queryaliases"); vector l; stringToStrings(aliases, l); - for (vector::const_iterator ait = l.begin(); - ait != l.end(); ait++) { - m_aliastoqcanon[stringtolower(*ait)] = canonic; + for (const auto& alias : l) { + m_aliastoqcanon[stringtolower(alias)] = canonic; } } #if 0 for (map::const_iterator it = m_fldtotraits.begin(); it != m_fldtotraits.end(); it++) { - LOGDEB("readFieldsConfig: [" << *it << "] -> [" << it->second.pfx << + LOGDEB("readFieldsConfig: [" << entry << "] -> [" << it->second.pfx << "] " << it->second.wdfinc << " " << it->second.boost << "\n"); } #endif vector sl = m_fields->getNames("stored"); - if (!sl.empty()) { - for (vector::const_iterator it = sl.begin(); - it != sl.end(); it++) { - string fld = fieldCanon(stringtolower(*it)); - m_storedFields.insert(fld); - } + for (const auto& fieldname : sl) { + m_storedFields.insert(fieldCanon(stringtolower(fieldname))); } // Extended file attribute to field translations vectorxattrs = m_fields->getNames("xattrtofields"); - for (vector::const_iterator it = xattrs.begin(); - it != xattrs.end(); it++) { + for (const auto& xattr : xattrs) { string val; - m_fields->get(*it, val, "xattrtofields"); - m_xattrtofld[*it] = val; + m_fields->get(xattr, val, "xattrtofields"); + m_xattrtofld[xattr] = val; } return true; diff --git a/src/common/rclconfig.h b/src/common/rclconfig.h index 744fbb93..90be088f 100644 --- a/src/common/rclconfig.h +++ b/src/common/rclconfig.h @@ -78,7 +78,11 @@ struct MDReaper { // Data associated to a indexed field name: struct FieldTraits { - string pfx; // indexing prefix, + string pfx; // indexing prefix, + uint32_t valueslot{0}; + enum ValueType {STR, INT}; + ValueType valuetype{STR}; + int valuelen{0}; int wdfinc{1}; // Index time term frequency increment (default 1) double boost{1.0}; // Query time boost (default 1.0) bool pfxonly{false}; // Suppress prefix-less indexing diff --git a/src/qtgui/xmltosd.cpp b/src/qtgui/xmltosd.cpp index 5b18ae0e..f7a90a75 100644 --- a/src/qtgui/xmltosd.cpp +++ b/src/qtgui/xmltosd.cpp @@ -59,6 +59,7 @@ private: { currentText = whatclause = ""; text.clear(); + text2.clear(); field.clear(); slack = 0; d = m = y = di.d1 = di.m1 = di.y1 = di.d2 = di.m2 = di.y2 = 0; @@ -69,7 +70,7 @@ private: // Temporary data while parsing. QString currentText; QString whatclause; - string field, text; + string field, text, text2; int slack; int d, m, y; DateInterval di; @@ -120,6 +121,8 @@ bool SDHXMLHandler::endElement(const QString & /* namespaceURI */, field = base64_decode(qs2utf8s(currentText.trimmed())); } else if (qName == "T") { text = base64_decode(qs2utf8s(currentText.trimmed())); + } else if (qName == "T2") { + text2 = base64_decode(qs2utf8s(currentText.trimmed())); } else if (qName == "S") { slack = atoi((const char *)currentText.toUtf8()); } else if (qName == "C") { @@ -130,6 +133,9 @@ bool SDHXMLHandler::endElement(const QString & /* namespaceURI */, } else if (whatclause == "OR") { c = new SearchDataClauseSimple(SCLT_OR, text, field); c->setexclude(exclude); + } else if (whatclause == "RG") { + c = new SearchDataClauseRange(text, text2, field); + c->setexclude(exclude); } else if (whatclause == "EX") { // Compat with old hist. We don't generete EX (SCLT_EXCL) anymore // it's replaced with OR + exclude flag diff --git a/src/query/location.hh b/src/query/location.hh index 7b708fd4..8f642596 100644 --- a/src/query/location.hh +++ b/src/query/location.hh @@ -1,8 +1,8 @@ -// A Bison parser, made by GNU Bison 3.0.2. +// A Bison parser, made by GNU Bison 3.0.4. // Locations for Bison parsers in C++ -// Copyright (C) 2002-2013 Free Software Foundation, Inc. +// Copyright (C) 2002-2015 Free Software Foundation, Inc. // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -42,7 +42,7 @@ namespace yy { -#line 46 "location.hh" // location.cc:291 +#line 46 "location.hh" // location.cc:296 /// Abstract a location. class location { @@ -111,36 +111,42 @@ namespace yy { position end; }; - /// Join two location objects to create a location. - inline location operator+ (location res, const location& end) + /// Join two locations, in place. + inline location& operator+= (location& res, const location& end) { res.end = end.end; return res; } - /// Change end position in place. + /// Join two locations. + inline location operator+ (location res, const location& end) + { + return res += end; + } + + /// Add \a width columns to the end position, in place. inline location& operator+= (location& res, int width) { res.columns (width); return res; } - /// Change end position. + /// Add \a width columns to the end position. inline location operator+ (location res, int width) { return res += width; } - /// Change end position in place. + /// Subtract \a width columns to the end position, in place. inline location& operator-= (location& res, int width) { return res += -width; } - /// Change end position. - inline location operator- (const location& begin, int width) + /// Subtract \a width columns to the end position. + inline location operator- (location res, int width) { - return begin + -width; + return res -= width; } /// Compare two location objects. @@ -168,8 +174,7 @@ namespace yy { operator<< (std::basic_ostream& ostr, const location& loc) { unsigned int end_col = 0 < loc.end.column ? loc.end.column - 1 : 0; - ostr << loc.begin// << "(" << loc.end << ") " -; + ostr << loc.begin; if (loc.end.filename && (!loc.begin.filename || *loc.begin.filename != *loc.end.filename)) @@ -183,5 +188,5 @@ namespace yy { } // yy -#line 187 "location.hh" // location.cc:291 +#line 192 "location.hh" // location.cc:296 #endif // !YY_YY_LOCATION_HH_INCLUDED diff --git a/src/query/position.hh b/src/query/position.hh index 107d8e11..5cd394ac 100644 --- a/src/query/position.hh +++ b/src/query/position.hh @@ -1,8 +1,8 @@ -// A Bison parser, made by GNU Bison 3.0.2. +// A Bison parser, made by GNU Bison 3.0.4. // Positions for Bison parsers in C++ -// Copyright (C) 2002-2013 Free Software Foundation, Inc. +// Copyright (C) 2002-2015 Free Software Foundation, Inc. // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -52,7 +52,7 @@ namespace yy { -#line 56 "position.hh" // location.cc:291 +#line 56 "position.hh" // location.cc:296 /// Abstract a position. class position { @@ -114,7 +114,7 @@ namespace yy { } }; - /// Add and assign a position. + /// Add \a width columns, in place. inline position& operator+= (position& res, int width) { @@ -122,21 +122,21 @@ namespace yy { return res; } - /// Add two position objects. + /// Add \a width columns. inline position operator+ (position res, int width) { return res += width; } - /// Add and assign a position. + /// Subtract \a width columns, in place. inline position& operator-= (position& res, int width) { return res += -width; } - /// Add two position objects. + /// Subtract \a width columns. inline position operator- (position res, int width) { @@ -176,5 +176,5 @@ namespace yy { } // yy -#line 180 "position.hh" // location.cc:291 +#line 180 "position.hh" // location.cc:296 #endif // !YY_YY_POSITION_HH_INCLUDED diff --git a/src/query/stack.hh b/src/query/stack.hh index 87d8f3ef..bf8486e3 100644 --- a/src/query/stack.hh +++ b/src/query/stack.hh @@ -1,8 +1,8 @@ -// A Bison parser, made by GNU Bison 3.0.2. +// A Bison parser, made by GNU Bison 3.0.4. // Stack handling for Bison parsers in C++ -// Copyright (C) 2002-2013 Free Software Foundation, Inc. +// Copyright (C) 2002-2015 Free Software Foundation, Inc. // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -42,7 +42,7 @@ namespace yy { -#line 46 "stack.hh" // stack.hh:133 +#line 46 "stack.hh" // stack.hh:132 template > class stack { @@ -54,12 +54,12 @@ namespace yy { stack () : seq_ () { + seq_.reserve (200); } stack (unsigned int n) : seq_ (n) - { - } + {} inline T& @@ -136,8 +136,7 @@ namespace yy { slice (const S& stack, unsigned int range) : stack_ (stack) , range_ (range) - { - } + {} inline const T& @@ -153,6 +152,6 @@ namespace yy { } // yy -#line 157 "stack.hh" // stack.hh:133 +#line 156 "stack.hh" // stack.hh:132 #endif // !YY_YY_STACK_HH_INCLUDED diff --git a/src/query/wasaparse.cpp b/src/query/wasaparse.cpp index 81293f3e..9e9a4098 100644 --- a/src/query/wasaparse.cpp +++ b/src/query/wasaparse.cpp @@ -1,8 +1,8 @@ -// A Bison parser, made by GNU Bison 3.0.2. +// A Bison parser, made by GNU Bison 3.0.4. // Skeleton implementation for Bison LALR(1) parsers in C++ -// Copyright (C) 2002-2013 Free Software Foundation, Inc. +// Copyright (C) 2002-2015 Free Software Foundation, Inc. // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -32,7 +32,7 @@ // First part of user declarations. -#line 1 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:399 +#line 1 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:404 #define YYDEBUG 1 #include "autoconfig.h" @@ -48,7 +48,7 @@ using namespace std; -// #define LOG_PARSER +//#define LOG_PARSER #ifdef LOG_PARSER #define LOGP(X) {cerr << X;} #else @@ -69,7 +69,7 @@ static void addSubQuery(WasaParserDriver *d, } -#line 73 "y.tab.c" // lalr1.cc:399 +#line 73 "y.tab.c" // lalr1.cc:404 # ifndef YY_NULLPTR # if defined __cplusplus && 201103L <= __cplusplus @@ -83,7 +83,7 @@ static void addSubQuery(WasaParserDriver *d, // User implementation prologue. -#line 87 "y.tab.c" // lalr1.cc:407 +#line 87 "y.tab.c" // lalr1.cc:412 #ifndef YY_ @@ -160,7 +160,7 @@ static void addSubQuery(WasaParserDriver *d, #endif // !YYDEBUG #define yyerrok (yyerrstatus_ = 0) -#define yyclearin (yyempty = true) +#define yyclearin (yyla.clear ()) #define YYACCEPT goto yyacceptlab #define YYABORT goto yyabortlab @@ -169,7 +169,7 @@ static void addSubQuery(WasaParserDriver *d, namespace yy { -#line 173 "y.tab.c" // lalr1.cc:474 +#line 173 "y.tab.c" // lalr1.cc:479 /* Return YYSTR after stripping away unnecessary quotes and backslashes, so that it's suitable for yyerror. The heuristic is @@ -273,6 +273,23 @@ namespace yy { inline parser::basic_symbol::~basic_symbol () { + clear (); + } + + template + inline + void + parser::basic_symbol::clear () + { + Base::clear (); + } + + template + inline + bool + parser::basic_symbol::empty () const + { + return Base::type_get () == empty_symbol; } template @@ -288,7 +305,7 @@ namespace yy { // by_type. inline parser::by_type::by_type () - : type (empty) + : type (empty_symbol) {} inline @@ -301,12 +318,19 @@ namespace yy { : type (yytranslate_ (t)) {} + inline + void + parser::by_type::clear () + { + type = empty_symbol; + } + inline void parser::by_type::move (by_type& that) { type = that.type; - that.type = empty; + that.clear (); } inline @@ -320,7 +344,7 @@ namespace yy { // by_state. inline parser::by_state::by_state () - : state (empty) + : state (empty_state) {} inline @@ -328,12 +352,19 @@ namespace yy { : state (other.state) {} + inline + void + parser::by_state::clear () + { + state = empty_state; + } + inline void parser::by_state::move (by_state& that) { state = that.state; - that.state = empty; + that.clear (); } inline @@ -345,7 +376,10 @@ namespace yy { parser::symbol_number_type parser::by_state::type_get () const { - return state == empty ? 0 : yystos_[state]; + if (state == empty_state) + return empty_symbol; + else + return yystos_[state]; } inline @@ -359,7 +393,7 @@ namespace yy { { value = that.value; // that is emptied. - that.type = empty; + that.type = empty_symbol; } inline @@ -386,30 +420,30 @@ namespace yy { { case 3: // WORD -#line 51 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:599 +#line 52 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:614 {delete (yysym.value.str);} -#line 392 "y.tab.c" // lalr1.cc:599 +#line 426 "y.tab.c" // lalr1.cc:614 break; case 4: // QUOTED -#line 51 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:599 +#line 52 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:614 {delete (yysym.value.str);} -#line 399 "y.tab.c" // lalr1.cc:599 +#line 433 "y.tab.c" // lalr1.cc:614 break; case 5: // QUALIFIERS -#line 51 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:599 +#line 52 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:614 {delete (yysym.value.str);} -#line 406 "y.tab.c" // lalr1.cc:599 +#line 440 "y.tab.c" // lalr1.cc:614 break; - case 22: // complexfieldname + case 23: // complexfieldname -#line 51 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:599 +#line 52 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:614 {delete (yysym.value.str);} -#line 413 "y.tab.c" // lalr1.cc:599 +#line 447 "y.tab.c" // lalr1.cc:614 break; @@ -427,6 +461,10 @@ namespace yy { std::ostream& yyoutput = yyo; YYUSE (yyoutput); symbol_number_type yytype = yysym.type_get (); + // Avoid a (spurious) G++ 4.8 warning about "array subscript is + // below array bounds". + if (yysym.empty ()) + std::abort (); yyo << (yytype < yyntokens_ ? "token" : "nterm") << ' ' << yytname_[yytype] << " (" << yysym.location << ": "; @@ -511,9 +549,6 @@ namespace yy { int parser::parse () { - /// Whether yyla contains a lookahead. - bool yyempty = true; - // State. int yyn; /// Length of the RHS of the rule being reduced. @@ -565,7 +600,7 @@ namespace yy { goto yydefault; // Read a lookahead token. - if (yyempty) + if (yyla.empty ()) { YYCDEBUG << "Reading a token: "; try @@ -577,7 +612,6 @@ namespace yy { error (yyexc); goto yyerrlab1; } - yyempty = false; } YY_SYMBOL_PRINT ("Next token is", yyla); @@ -597,9 +631,6 @@ namespace yy { goto yyreduce; } - // Discard the token being shifted. - yyempty = true; - // Count tokens shifted since error; after three, turn off error status. if (yyerrstatus_) --yyerrstatus_; @@ -649,7 +680,7 @@ namespace yy { switch (yyn) { case 2: -#line 72 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 +#line 74 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 { // It's possible that we end up with no query (e.g.: because just a // date filter was set, no terms). Allocate an empty query so that we @@ -660,11 +691,11 @@ namespace yy { else d->m_result = (yystack_[0].value.sd); } -#line 664 "y.tab.c" // lalr1.cc:847 +#line 695 "y.tab.c" // lalr1.cc:859 break; case 3: -#line 85 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 +#line 87 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 { LOGP("q: query query\n"); Rcl::SearchData *sd = 0; @@ -675,11 +706,11 @@ namespace yy { } (yylhs.value.sd) = sd; } -#line 679 "y.tab.c" // lalr1.cc:847 +#line 710 "y.tab.c" // lalr1.cc:859 break; case 4: -#line 96 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 +#line 98 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 { LOGP("q: query AND query\n"); Rcl::SearchData *sd = 0; @@ -690,11 +721,11 @@ namespace yy { } (yylhs.value.sd) = sd; } -#line 694 "y.tab.c" // lalr1.cc:847 +#line 725 "y.tab.c" // lalr1.cc:859 break; case 5: -#line 107 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 +#line 109 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 { LOGP("query: query OR query\n"); Rcl::SearchData *top = 0; @@ -705,20 +736,20 @@ namespace yy { } (yylhs.value.sd) = top; } -#line 709 "y.tab.c" // lalr1.cc:847 +#line 740 "y.tab.c" // lalr1.cc:859 break; case 6: -#line 118 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 +#line 120 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 { LOGP("q: ( query )\n"); (yylhs.value.sd) = (yystack_[1].value.sd); } -#line 718 "y.tab.c" // lalr1.cc:847 +#line 749 "y.tab.c" // lalr1.cc:859 break; case 7: -#line 124 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 +#line 126 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 { LOGP("q: fieldexpr\n"); Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang); @@ -729,20 +760,20 @@ namespace yy { (yylhs.value.sd) = 0; } } -#line 733 "y.tab.c" // lalr1.cc:847 +#line 764 "y.tab.c" // lalr1.cc:859 break; case 8: -#line 137 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 +#line 139 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 { LOGP("fe: simple fieldexpr: " << (yystack_[0].value.cl)->gettext() << endl); (yylhs.value.cl) = (yystack_[0].value.cl); } -#line 742 "y.tab.c" // lalr1.cc:847 +#line 773 "y.tab.c" // lalr1.cc:859 break; case 9: -#line 142 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 +#line 144 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 { LOGP("fe: " << *(yystack_[2].value.str) << " = " << (yystack_[0].value.cl)->gettext() << endl); (yystack_[0].value.cl)->setfield(*(yystack_[2].value.str)); @@ -750,11 +781,11 @@ namespace yy { (yylhs.value.cl) = (yystack_[0].value.cl); delete (yystack_[2].value.str); } -#line 754 "y.tab.c" // lalr1.cc:847 +#line 785 "y.tab.c" // lalr1.cc:859 break; case 10: -#line 150 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 +#line 152 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 { LOGP("fe: " << *(yystack_[2].value.str) << " : " << (yystack_[0].value.cl)->gettext() << endl); (yystack_[0].value.cl)->setfield(*(yystack_[2].value.str)); @@ -762,23 +793,35 @@ namespace yy { (yylhs.value.cl) = (yystack_[0].value.cl); delete (yystack_[2].value.str); } -#line 766 "y.tab.c" // lalr1.cc:847 +#line 797 "y.tab.c" // lalr1.cc:859 break; case 11: -#line 158 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 +#line 160 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 { - LOGP(cerr << "fe: " << *(yystack_[2].value.str) << " < " << (yystack_[0].value.cl)->gettext() << endl); + LOGP("fe: " << *(yystack_[2].value.str) << " : " << (yystack_[0].value.rg)->gettext() << endl); + (yystack_[0].value.rg)->setfield(*(yystack_[2].value.str)); + (yystack_[0].value.rg)->setrel(Rcl::SearchDataClause::REL_CONTAINS); + (yylhs.value.cl) = (yystack_[0].value.rg); + delete (yystack_[2].value.str); +} +#line 809 "y.tab.c" // lalr1.cc:859 + break; + + case 12: +#line 168 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 + { + LOGP("fe: " << *(yystack_[2].value.str) << " < " << (yystack_[0].value.cl)->gettext() << endl); (yystack_[0].value.cl)->setfield(*(yystack_[2].value.str)); (yystack_[0].value.cl)->setrel(Rcl::SearchDataClause::REL_LT); (yylhs.value.cl) = (yystack_[0].value.cl); delete (yystack_[2].value.str); } -#line 778 "y.tab.c" // lalr1.cc:847 +#line 821 "y.tab.c" // lalr1.cc:859 break; - case 12: -#line 166 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 + case 13: +#line 176 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 { LOGP("fe: " << *(yystack_[2].value.str) << " <= " << (yystack_[0].value.cl)->gettext() << endl); (yystack_[0].value.cl)->setfield(*(yystack_[2].value.str)); @@ -786,11 +829,11 @@ namespace yy { (yylhs.value.cl) = (yystack_[0].value.cl); delete (yystack_[2].value.str); } -#line 790 "y.tab.c" // lalr1.cc:847 +#line 833 "y.tab.c" // lalr1.cc:859 break; - case 13: -#line 174 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 + case 14: +#line 184 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 { LOGP("fe: " << *(yystack_[2].value.str) << " > " << (yystack_[0].value.cl)->gettext() << endl); (yystack_[0].value.cl)->setfield(*(yystack_[2].value.str)); @@ -798,11 +841,11 @@ namespace yy { (yylhs.value.cl) = (yystack_[0].value.cl); delete (yystack_[2].value.str); } -#line 802 "y.tab.c" // lalr1.cc:847 +#line 845 "y.tab.c" // lalr1.cc:859 break; - case 14: -#line 182 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 + case 15: +#line 192 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 { LOGP("fe: " << *(yystack_[2].value.str) << " >= " << (yystack_[0].value.cl)->gettext() << endl); (yystack_[0].value.cl)->setfield(*(yystack_[2].value.str)); @@ -810,69 +853,100 @@ namespace yy { (yylhs.value.cl) = (yystack_[0].value.cl); delete (yystack_[2].value.str); } -#line 814 "y.tab.c" // lalr1.cc:847 +#line 857 "y.tab.c" // lalr1.cc:859 break; - case 15: -#line 190 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 + case 16: +#line 200 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 { LOGP("fe: - fieldexpr[" << (yystack_[0].value.cl)->gettext() << "]" << endl); (yystack_[0].value.cl)->setexclude(true); (yylhs.value.cl) = (yystack_[0].value.cl); } -#line 824 "y.tab.c" // lalr1.cc:847 +#line 867 "y.tab.c" // lalr1.cc:859 break; - case 16: -#line 200 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 + case 17: +#line 210 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 { LOGP("cfn: WORD" << endl); (yylhs.value.str) = (yystack_[0].value.str); } -#line 833 "y.tab.c" // lalr1.cc:847 +#line 876 "y.tab.c" // lalr1.cc:859 break; - case 17: -#line 206 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 + case 18: +#line 216 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 { LOGP("cfn: complexfieldname ':' WORD" << endl); (yylhs.value.str) = new string(*(yystack_[2].value.str) + string(":") + *(yystack_[0].value.str)); delete (yystack_[2].value.str); delete (yystack_[0].value.str); } -#line 844 "y.tab.c" // lalr1.cc:847 +#line 887 "y.tab.c" // lalr1.cc:859 break; - case 18: -#line 215 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 + case 19: +#line 225 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 + { + LOGP("Range: " << *(yystack_[2].value.str) << string(" .. ") << *(yystack_[0].value.str) << endl); + (yylhs.value.rg) = new Rcl::SearchDataClauseRange(*(yystack_[2].value.str), *(yystack_[0].value.str)); + delete (yystack_[2].value.str); + delete (yystack_[0].value.str); +} +#line 898 "y.tab.c" // lalr1.cc:859 + break; + + case 20: +#line 233 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 + { + LOGP("Range: " << "" << string(" .. ") << *(yystack_[0].value.str) << endl); + (yylhs.value.rg) = new Rcl::SearchDataClauseRange("", *(yystack_[0].value.str)); + delete (yystack_[0].value.str); +} +#line 908 "y.tab.c" // lalr1.cc:859 + break; + + case 21: +#line 240 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 + { + LOGP("Range: " << *(yystack_[1].value.str) << string(" .. ") << "" << endl); + (yylhs.value.rg) = new Rcl::SearchDataClauseRange(*(yystack_[1].value.str), ""); + delete (yystack_[1].value.str); +} +#line 918 "y.tab.c" // lalr1.cc:859 + break; + + case 22: +#line 249 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 { LOGP("term[" << *(yystack_[0].value.str) << "]" << endl); (yylhs.value.cl) = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, *(yystack_[0].value.str)); delete (yystack_[0].value.str); } -#line 854 "y.tab.c" // lalr1.cc:847 +#line 928 "y.tab.c" // lalr1.cc:859 break; - case 19: -#line 221 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 + case 23: +#line 255 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 { (yylhs.value.cl) = (yystack_[0].value.cl); } -#line 862 "y.tab.c" // lalr1.cc:847 +#line 936 "y.tab.c" // lalr1.cc:859 break; - case 20: -#line 227 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 + case 24: +#line 261 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 { LOGP("QUOTED[" << *(yystack_[0].value.str) << "]" << endl); (yylhs.value.cl) = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, *(yystack_[0].value.str), 0); delete (yystack_[0].value.str); } -#line 872 "y.tab.c" // lalr1.cc:847 +#line 946 "y.tab.c" // lalr1.cc:859 break; - case 21: -#line 233 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:847 + case 25: +#line 267 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:859 { LOGP("QUOTED[" << *(yystack_[1].value.str) << "] QUALIFIERS[" << *(yystack_[0].value.str) << "]" << endl); Rcl::SearchDataClauseDist *cl = @@ -882,11 +956,11 @@ namespace yy { delete (yystack_[1].value.str); delete (yystack_[0].value.str); } -#line 886 "y.tab.c" // lalr1.cc:847 +#line 960 "y.tab.c" // lalr1.cc:859 break; -#line 890 "y.tab.c" // lalr1.cc:847 +#line 964 "y.tab.c" // lalr1.cc:859 default: break; } @@ -914,8 +988,7 @@ namespace yy { if (!yyerrstatus_) { ++yynerrs_; - error (yyla.location, yysyntax_error_ (yystack_[0].state, - yyempty ? yyempty_ : yyla.type_get ())); + error (yyla.location, yysyntax_error_ (yystack_[0].state, yyla)); } @@ -928,10 +1001,10 @@ namespace yy { // Return failure if at end of input. if (yyla.type_get () == yyeof_) YYABORT; - else if (!yyempty) + else if (!yyla.empty ()) { yy_destroy_ ("Error: discarding", yyla); - yyempty = true; + yyla.clear (); } } @@ -1007,7 +1080,7 @@ namespace yy { goto yyreturn; yyreturn: - if (!yyempty) + if (!yyla.empty ()) yy_destroy_ ("Cleanup: discarding lookahead", yyla); /* Do not reclaim the symbols of the rule whose action triggered @@ -1027,7 +1100,7 @@ namespace yy { << std::endl; // Do not try to display the values of the reclaimed symbols, // as their printer might throw an exception. - if (!yyempty) + if (!yyla.empty ()) yy_destroy_ (YY_NULLPTR, yyla); while (1 < yystack_.size ()) @@ -1047,9 +1120,8 @@ namespace yy { // Generate an error message. std::string - parser::yysyntax_error_ (state_type yystate, symbol_number_type yytoken) const + parser::yysyntax_error_ (state_type yystate, const symbol_type& yyla) const { - std::string yyres; // Number of reported tokens (one for the "unexpected", one per // "expected"). size_t yycount = 0; @@ -1063,7 +1135,7 @@ namespace yy { the only way this function was invoked is if the default action is an error action. In that case, don't check for expected tokens because there are none. - - The only way there can be no lookahead present (in yytoken) is + - The only way there can be no lookahead present (in yyla) is if this state is a consistent state with a default action. Thus, detecting the absence of a lookahead is sufficient to determine that there is no unexpected or expected token to @@ -1083,8 +1155,9 @@ namespace yy { token that will not be accepted due to an error action in a later state. */ - if (yytoken != yyempty_) + if (!yyla.empty ()) { + int yytoken = yyla.type_get (); yyarg[yycount++] = yytname_[yytoken]; int yyn = yypact_[yystate]; if (!yy_pact_value_is_default_ (yyn)) @@ -1127,6 +1200,7 @@ namespace yy { #undef YYCASE_ } + std::string yyres; // Argument number. size_t yyi = 0; for (char const* yyp = yyformat; *yyp; ++yyp) @@ -1143,83 +1217,85 @@ namespace yy { const signed char parser::yypact_ninf_ = -3; - const signed char parser::yytable_ninf_ = -18; + const signed char parser::yytable_ninf_ = -19; const signed char parser::yypact_[] = { - 24, 25, 3, 24, 26, 6, 16, -3, 31, -3, - -3, -3, 1, -3, -3, 24, 24, 4, -2, 9, - -2, -2, -2, -2, -3, 4, -3, -3, -3, 37, - -3, -3, -3, -3, -3 + 31, 32, 3, 31, 33, 6, 14, -3, 38, -3, + -3, -3, 1, -3, -3, 31, 31, 4, -2, 9, + -2, -2, -2, -2, -3, 4, -3, -3, -3, 16, + 18, -3, -3, -3, -3, -3, -3, 22, -3, -3 }; const unsigned char parser::yydefact_[] = { - 0, 18, 20, 0, 0, 0, 2, 7, 0, 8, - 19, 21, 0, 15, 1, 0, 0, 3, 0, 0, - 0, 0, 0, 0, 6, 4, 5, 18, 9, 18, - 10, 12, 11, 14, 13 + 0, 22, 24, 0, 0, 0, 2, 7, 0, 8, + 23, 25, 0, 16, 1, 0, 0, 3, 0, 0, + 0, 0, 0, 0, 6, 4, 5, 22, 9, 22, + 0, 11, 10, 13, 12, 15, 14, 21, 20, 19 }; const signed char parser::yypgoto_[] = { - -3, -3, 0, 13, -3, 36, -3 + -3, -3, 0, 34, -3, -3, 37, -3 }; const signed char parser::yydefgoto_[] = { - -1, 5, 17, 7, 8, 9, 10 + -1, 5, 17, 7, 8, 31, 9, 10 }; const signed char parser::yytable_[] = { 6, 27, 2, 12, 1, 2, 14, 15, 11, 3, - 4, 16, 29, 2, 16, 25, 26, 13, 24, 1, - 2, 0, 15, 0, 3, 4, 16, 1, 2, 1, - 2, 0, 3, 4, 0, 4, -16, -16, -16, -16, - -16, -16, 18, 19, 20, 21, 22, 23, -17, -17, - -17, -17, -17, -17, 28, 30, 31, 32, 33, 34 + 4, 16, 29, 2, 16, 25, 26, 1, 2, 24, + 15, 38, 3, 4, 16, 39, 30, -18, -18, -18, + -18, -18, -18, 37, 1, 2, 1, 2, 13, 3, + 4, 0, 4, -17, -17, -17, -17, -17, -17, 18, + 19, 20, 21, 22, 23, 28, 32, 33, 34, 35, + 36 }; const signed char parser::yycheck_[] = { 0, 3, 4, 3, 3, 4, 0, 6, 5, 8, - 9, 10, 3, 4, 10, 15, 16, 4, 17, 3, - 4, -1, 6, -1, 8, 9, 10, 3, 4, 3, - 4, -1, 8, 9, -1, 9, 11, 12, 13, 14, - 15, 16, 11, 12, 13, 14, 15, 16, 11, 12, - 13, 14, 15, 16, 18, 19, 20, 21, 22, 23 + 9, 10, 3, 4, 10, 15, 16, 3, 4, 18, + 6, 3, 8, 9, 10, 3, 17, 11, 12, 13, + 14, 15, 16, 17, 3, 4, 3, 4, 4, 8, + 9, -1, 9, 11, 12, 13, 14, 15, 16, 11, + 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, + 23 }; const unsigned char parser::yystos_[] = { - 0, 3, 4, 8, 9, 19, 20, 21, 22, 23, - 24, 5, 20, 21, 0, 6, 10, 20, 11, 12, - 13, 14, 15, 16, 17, 20, 20, 3, 23, 3, - 23, 23, 23, 23, 23 + 0, 3, 4, 8, 9, 20, 21, 22, 23, 25, + 26, 5, 21, 22, 0, 6, 10, 21, 11, 12, + 13, 14, 15, 16, 18, 21, 21, 3, 25, 3, + 17, 24, 25, 25, 25, 25, 25, 17, 3, 3 }; const unsigned char parser::yyr1_[] = { - 0, 18, 19, 20, 20, 20, 20, 20, 21, 21, - 21, 21, 21, 21, 21, 21, 22, 22, 23, 23, - 24, 24 + 0, 19, 20, 21, 21, 21, 21, 21, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 23, 23, 24, + 24, 24, 25, 25, 26, 26 }; const unsigned char parser::yyr2_[] = { 0, 2, 1, 2, 3, 3, 3, 1, 1, 3, - 3, 3, 3, 3, 3, 2, 1, 3, 1, 1, - 1, 2 + 3, 3, 3, 3, 3, 3, 2, 1, 3, 3, + 2, 2, 1, 1, 1, 2 }; @@ -1231,17 +1307,17 @@ namespace yy { { "$end", "error", "$undefined", "WORD", "QUOTED", "QUALIFIERS", "AND", "UCONCAT", "'('", "'-'", "OR", "EQUALS", "CONTAINS", "SMALLEREQ", - "SMALLER", "GREATEREQ", "GREATER", "')'", "$accept", "topquery", "query", - "fieldexpr", "complexfieldname", "term", "qualquote", YY_NULLPTR + "SMALLER", "GREATEREQ", "GREATER", "RANGE", "')'", "$accept", "topquery", + "query", "fieldexpr", "complexfieldname", "range", "term", "qualquote", YY_NULLPTR }; #if YYDEBUG - const unsigned char + const unsigned short int parser::yyrline_[] = { - 0, 71, 71, 84, 95, 106, 117, 123, 136, 141, - 149, 157, 165, 173, 181, 189, 199, 205, 214, 220, - 226, 232 + 0, 73, 73, 86, 97, 108, 119, 125, 138, 143, + 151, 159, 167, 175, 183, 191, 199, 209, 215, 224, + 232, 239, 248, 254, 260, 266 }; // Print the state stack on the debug stream. @@ -1286,7 +1362,7 @@ namespace yy { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 8, 17, 2, 2, 2, 9, 2, 2, 2, 2, + 8, 18, 2, 2, 2, 9, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -1308,9 +1384,10 @@ namespace yy { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, - 5, 6, 7, 10, 11, 12, 13, 14, 15, 16 + 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, + 17 }; - const unsigned int user_token_number_max_ = 269; + const unsigned int user_token_number_max_ = 270; const token_number_type undef_token_ = 2; if (static_cast(t) <= yyeof_) @@ -1323,8 +1400,8 @@ namespace yy { } // yy -#line 1327 "y.tab.c" // lalr1.cc:1155 -#line 244 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:1156 +#line 1404 "y.tab.c" // lalr1.cc:1167 +#line 278 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:1168 #include @@ -1473,7 +1550,7 @@ int yylex(yy::parser::semantic_type *yylval, yy::parser::location_type *, return c; } - // field-term relations + // field-term relations, and ranges switch (c) { case '=': return yy::parser::token::EQUALS; case ':': return yy::parser::token::CONTAINS; @@ -1486,6 +1563,15 @@ int yylex(yy::parser::semantic_type *yylval, yy::parser::location_type *, return yy::parser::token::SMALLER; } } + case '.': { + int c1 = d->GETCHAR(); + if (c1 == '.') { + return yy::parser::token::RANGE; + } else { + d->UNGETCHAR(c1); + break; + } + } case '>': { int c1 = d->GETCHAR(); if (c1 == '=') { @@ -1514,6 +1600,16 @@ int yylex(yy::parser::semantic_type *yylval, yy::parser::location_type *, //cerr << "Word broken by special char" << endl; d->UNGETCHAR(c); break; + } else if (c == '.') { + int c1 = d->GETCHAR(); + if (c1 == '.') { + d->UNGETCHAR(c1); + d->UNGETCHAR(c); + break; + } else { + d->UNGETCHAR(c1); + word->push_back(c); + } } else if (c == 0) { //cerr << "Word broken by EOF" << endl; break; diff --git a/src/query/wasaparse.hpp b/src/query/wasaparse.hpp index 186bc4f5..2acdcc42 100644 --- a/src/query/wasaparse.hpp +++ b/src/query/wasaparse.hpp @@ -1,8 +1,8 @@ -// A Bison parser, made by GNU Bison 3.0.2. +// A Bison parser, made by GNU Bison 3.0.4. // Skeleton interface for Bison LALR(1) parsers in C++ -// Copyright (C) 2002-2013 Free Software Foundation, Inc. +// Copyright (C) 2002-2015 Free Software Foundation, Inc. // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -41,10 +41,11 @@ # define YY_YY_Y_TAB_H_INCLUDED -# include +# include // std::abort # include # include # include +# include # include "stack.hh" # include "location.hh" @@ -109,7 +110,7 @@ namespace yy { -#line 113 "y.tab.h" // lalr1.cc:372 +#line 114 "y.tab.h" // lalr1.cc:377 @@ -123,13 +124,14 @@ namespace yy { /// Symbol semantic values. union semantic_type { - #line 46 "/y/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:372 + #line 46 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:377 std::string *str; + Rcl::SearchDataClauseRange *rg; Rcl::SearchDataClauseSimple *cl; Rcl::SearchData *sd; -#line 133 "y.tab.h" // lalr1.cc:372 +#line 135 "y.tab.h" // lalr1.cc:377 }; #else typedef YYSTYPE semantic_type; @@ -160,16 +162,20 @@ namespace yy { SMALLEREQ = 266, SMALLER = 267, GREATEREQ = 268, - GREATER = 269 + GREATER = 269, + RANGE = 270 }; }; /// (External) token type, as returned by yylex. typedef token::yytokentype token_type; - /// Internal symbol number. + /// Symbol type: an internal symbol number. typedef int symbol_number_type; + /// The symbol type number to denote an empty symbol. + enum { empty_symbol = -2 }; + /// Internal symbol number for tokens (subsumed by symbol_number_type). typedef unsigned char token_number_type; @@ -200,8 +206,15 @@ namespace yy { const semantic_type& v, const location_type& l); + /// Destroy the symbol. ~basic_symbol (); + /// Destroy contents, and record that is empty. + void clear (); + + /// Whether empty. + bool empty () const; + /// Destructive move, \a s is emptied into this. void move (basic_symbol& s); @@ -231,21 +244,23 @@ namespace yy { /// Constructor from (external) token numbers. by_type (kind_type t); + /// Record that this symbol is empty. + void clear (); + /// Steal the symbol type from \a that. void move (by_type& that); /// The (internal) type number (corresponding to \a type). - /// -1 when this symbol is empty. + /// \a empty when empty. symbol_number_type type_get () const; /// The token. token_type token () const; - enum { empty = 0 }; - /// The symbol type. - /// -1 when this symbol is empty. - token_number_type type; + /// \a empty_symbol when empty. + /// An int, not token_number_type, to be able to store empty_symbol. + int type; }; /// "External" symbols: returned by the scanner. @@ -292,9 +307,9 @@ namespace yy { /// Generate an error message. /// \param yystate the state where the error occurred. - /// \param yytoken the lookahead token type, or yyempty_. + /// \param yyla the lookahead token. virtual std::string yysyntax_error_ (state_type yystate, - symbol_number_type yytoken) const; + const symbol_type& yyla) const; /// Compute post-reduction state. /// \param yystate the current state @@ -357,7 +372,7 @@ namespace yy { static const char* const yytname_[]; #if YYDEBUG // YYRLINE[YYN] -- Source line where rule number YYN was defined. - static const unsigned char yyrline_[]; + static const unsigned short int yyrline_[]; /// Report on the debug stream that the rule \a r is going to be reduced. virtual void yy_reduce_print_ (int r); /// Print the state stack on the debug stream. @@ -397,16 +412,21 @@ namespace yy { /// Copy constructor. by_state (const by_state& other); + /// Record that this symbol is empty. + void clear (); + /// Steal the symbol type from \a that. void move (by_state& that); /// The (internal) type number (corresponding to \a state). - /// "empty" when empty. + /// \a empty_symbol when empty. symbol_number_type type_get () const; - enum { empty = 0 }; + /// The state number used to denote an empty symbol. + enum { empty_state = -1 }; /// The state. + /// \a empty when empty. state_type state; }; @@ -447,17 +467,16 @@ namespace yy { /// Pop \a n symbols the three stacks. void yypop_ (unsigned int n = 1); - // Constants. + /// Constants. enum { yyeof_ = 0, - yylast_ = 59, ///< Last index in yytable_. - yynnts_ = 7, ///< Number of nonterminal symbols. - yyempty_ = -2, + yylast_ = 60, ///< Last index in yytable_. + yynnts_ = 8, ///< Number of nonterminal symbols. yyfinal_ = 14, ///< Termination state number. yyterror_ = 1, yyerrcode_ = 256, - yyntokens_ = 18 ///< Number of tokens. + yyntokens_ = 19 ///< Number of tokens. }; @@ -468,7 +487,7 @@ namespace yy { } // yy -#line 472 "y.tab.h" // lalr1.cc:372 +#line 491 "y.tab.h" // lalr1.cc:377 diff --git a/src/query/wasaparse.ypp b/src/query/wasaparse.ypp index 3f373cf9..418bc604 100644 --- a/src/query/wasaparse.ypp +++ b/src/query/wasaparse.ypp @@ -13,7 +13,7 @@ using namespace std; -// #define LOG_PARSER +//#define LOG_PARSER #ifdef LOG_PARSER #define LOGP(X) {cerr << X;} #else @@ -45,6 +45,7 @@ static void addSubQuery(WasaParserDriver *d, %union { std::string *str; + Rcl::SearchDataClauseRange *rg; Rcl::SearchDataClauseSimple *cl; Rcl::SearchData *sd; } @@ -52,6 +53,7 @@ static void addSubQuery(WasaParserDriver *d, %type qualquote %type fieldexpr +%type range %type term %type query %type complexfieldname @@ -64,7 +66,7 @@ static void addSubQuery(WasaParserDriver *d, %left AND UCONCAT '(' '-' %left OR -%token EQUALS CONTAINS SMALLEREQ SMALLER GREATEREQ GREATER +%token EQUALS CONTAINS SMALLEREQ SMALLER GREATEREQ GREATER RANGE %% @@ -154,9 +156,17 @@ fieldexpr: term $$ = $3; delete $1; } +| complexfieldname CONTAINS range +{ + LOGP("fe: " << *$1 << " : " << $3->gettext() << endl); + $3->setfield(*$1); + $3->setrel(Rcl::SearchDataClause::REL_CONTAINS); + $$ = $3; + delete $1; +} | complexfieldname SMALLER term { - LOGP(cerr << "fe: " << *$1 << " < " << $3->gettext() << endl); + LOGP("fe: " << *$1 << " < " << $3->gettext() << endl); $3->setfield(*$1); $3->setrel(Rcl::SearchDataClause::REL_LT); $$ = $3; @@ -210,6 +220,30 @@ complexfieldname CONTAINS WORD delete $3; } +range: +WORD RANGE WORD +{ + LOGP("Range: " << *$1 << string(" .. ") << *$3 << endl); + $$ = new Rcl::SearchDataClauseRange(*$1, *$3); + delete $1; + delete $3; +} +| +RANGE WORD +{ + LOGP("Range: " << "" << string(" .. ") << *$2 << endl); + $$ = new Rcl::SearchDataClauseRange("", *$2); + delete $2; +} +| +WORD RANGE +{ + LOGP("Range: " << *$1 << string(" .. ") << "" << endl); + $$ = new Rcl::SearchDataClauseRange(*$1, ""); + delete $1; +} +; + term: WORD { @@ -389,7 +423,7 @@ int yylex(yy::parser::semantic_type *yylval, yy::parser::location_type *, return c; } - // field-term relations + // field-term relations, and ranges switch (c) { case '=': return yy::parser::token::EQUALS; case ':': return yy::parser::token::CONTAINS; @@ -402,6 +436,15 @@ int yylex(yy::parser::semantic_type *yylval, yy::parser::location_type *, return yy::parser::token::SMALLER; } } + case '.': { + int c1 = d->GETCHAR(); + if (c1 == '.') { + return yy::parser::token::RANGE; + } else { + d->UNGETCHAR(c1); + break; + } + } case '>': { int c1 = d->GETCHAR(); if (c1 == '=') { @@ -430,6 +473,16 @@ int yylex(yy::parser::semantic_type *yylval, yy::parser::location_type *, //cerr << "Word broken by special char" << endl; d->UNGETCHAR(c); break; + } else if (c == '.') { + int c1 = d->GETCHAR(); + if (c1 == '.') { + d->UNGETCHAR(c1); + d->UNGETCHAR(c); + break; + } else { + d->UNGETCHAR(c1); + word->push_back(c); + } } else if (c == 0) { //cerr << "Word broken by EOF" << endl; break; diff --git a/src/query/wasaparseaux.cpp b/src/query/wasaparseaux.cpp index 85a9d19f..db76dda7 100644 --- a/src/query/wasaparseaux.cpp +++ b/src/query/wasaparseaux.cpp @@ -182,7 +182,8 @@ bool WasaParserDriver::addClause(SearchData *sd, delete cl; return false; } - LOGDEB("addClause:: date span: " << (di.y1) << "-" << (di.m1) << "-" << (di.d1) << "/" << (di.y2) << "-" << (di.m2) << "-" << (di.d2) << "\n" ); + LOGDEB("addClause:: date span: " << di.y1 << "-" << di.m1 << "-" + << di.d1 << "/" << di.y2 << "-" << di.m2 << "-" << di.d2 << "\n"); m_haveDates = true; m_dates = di; delete cl; diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 9c480a7a..a2f4b8ee 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -50,6 +50,7 @@ using namespace std; #include "searchdata.h" #include "rclquery.h" #include "rclquery_p.h" +#include "rclvalues.h" #include "md5ut.h" #include "rclversion.h" #include "cancelcheck.h" @@ -1489,7 +1490,8 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc) for (vector::iterator it = vpath.begin(); it != vpath.end(); it++){ if (it->length() > 230) { - // Just truncate it. May still be useful because of wildcards + // Just truncate it. May still be useful because + // of wildcards *it = it->substr(0, 230); } newdocument.add_posting(wrap_prefix(pathelt_prefix) + *it, @@ -1504,26 +1506,36 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc) // // The order has no importance, and we set a position gap of 100 // between fields to avoid false proximity matches. - map::iterator meta_it; - for (meta_it = doc.meta.begin(); meta_it != doc.meta.end(); meta_it++) { - if (!meta_it->second.empty()) { - const FieldTraits *ftp; - // We don't test for an empty prefix here. Some fields are part - // of the internal conf with an empty prefix (ie: abstract). - if (!fieldToTraits(meta_it->first, &ftp)) { - LOGDEB0("Db::add: no prefix for field [" << - meta_it->first << "], no indexing\n"); - continue; - } - LOGDEB0("Db::add: field [" << meta_it->first << "] pfx [" << + for (const auto& entry: doc.meta) { + if (entry.second.empty()) { + continue; + } + const FieldTraits *ftp{nullptr}; + fieldToTraits(entry.first, &ftp); + if (ftp && ftp->valueslot) { + LOGDEB("Adding value: for field " << entry.first << " slot " + << ftp->valueslot << endl); + add_field_value(newdocument, *ftp, entry.second); + } + + // There was an old comment here about not testing for + // empty prefix, and we indeed did not test. I don't think + // that it makes sense any more (and was in disagreement + // with the LOG message. Really now: no prefix: no + // indexing. + if (ftp && !ftp->pfx.empty()) { + LOGDEB0("Db::add: field [" << entry.first << "] pfx [" << ftp->pfx << "] inc " << ftp->wdfinc << ": [" << - meta_it->second << "]\n"); + entry.second << "]\n"); splitter.setTraits(*ftp); - if (!splitter.text_to_words(meta_it->second)) { - LOGDEB("Db::addOrUpdate: split failed for " << - meta_it->first << "\n"); + if (!splitter.text_to_words(entry.second)) { + LOGDEB("Db::addOrUpdate: split failed for " << + entry.first << "\n"); } - } + } else { + LOGDEB0("Db::add: no prefix for field [" << + entry.first << "], no indexing\n"); + } } // Reset to no prefix and default params @@ -1578,8 +1590,8 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc) if (doc.getmeta(Doc::keyfn, &utf8fn) && !utf8fn.empty()) { string fn; if (unacmaybefold(utf8fn, fn, "UTF-8", UNACOP_UNACFOLD)) { - // We should truncate after extracting the extension, but this is - // a pathological case anyway + // We should truncate after extracting the extension, + // but this is a pathological case anyway if (fn.size() > 230) utf8truncate(fn, 230); string::size_type pos = fn.rfind('.'); @@ -1587,7 +1599,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc) newdocument.add_boolean_term(wrap_prefix(fileext_prefix) + fn.substr(pos + 1)); } - newdocument.add_term(wrap_prefix(unsplitfilename_prefix) + fn, 0); + newdocument.add_term(wrap_prefix(unsplitfilename_prefix) + fn,0); } } diff --git a/src/rcldb/rcldb_p.h b/src/rcldb/rcldb_p.h index 49650f48..1db35780 100644 --- a/src/rcldb/rcldb_p.h +++ b/src/rcldb/rcldb_p.h @@ -197,7 +197,7 @@ class Db::Native { std::string rawtextMetaKey(Xapian::docid did) { // Xapian's Olly Betts avises to use a key which will // sort the same as the docid (which we do), and to - // use Xapian's pack_uint_preserving_sort() which is + // use Xapian's pack.h:pack_uint_preserving_sort() which is // efficient but hard to read. I'd wager that this // does not make much of a difference. 10 ascii bytes // gives us 10 billion docs, which is enough (says I). diff --git a/src/rcldb/rclvalues.cpp b/src/rcldb/rclvalues.cpp new file mode 100644 index 00000000..fb218111 --- /dev/null +++ b/src/rcldb/rclvalues.cpp @@ -0,0 +1,87 @@ +/* Copyright (C) 2004-2018 J.F.Dockes + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#include "autoconfig.h" + +#include + +#include "xapian.h" + +#include "rclconfig.h" +#include "smallut.h" +#include "log.h" + +using namespace std; + +namespace Rcl { + +void add_field_value(Xapian::Document& xdoc, const FieldTraits& ft, + const string& data) +{ + string ndata{data}; + + switch (ft.valuetype) { + case FieldTraits::STR: + break; + case FieldTraits::INT: + { + int len = ft.valuelen ? ft.valuelen : 10; + leftzeropad(ndata, len); + } + } + LOGDEB0("Rcl::add_field_value: slot " << ft.valueslot << " [" << + ndata << "]\n"); + xdoc.add_value(ft.valueslot, ndata); +} + + +string convert_field_value(const FieldTraits& ft, + const string& data) +{ + string ndata(data); + switch (ft.valuetype) { + case FieldTraits::STR: + break; + case FieldTraits::INT: + { + if (ndata.empty()) + break; + + // Apply suffixes + char c = ndata.back(); + string zeroes; + switch(c) { + case 'k':case 'K': zeroes = "000";break; + case 'm':case 'M': zeroes = "000000";break; + case 'g':case 'G': zeroes = "000000000";break; + case 't':case 'T': zeroes = "000000000000";break; + default: break; + } + if (!zeroes.empty()) { + ndata.pop_back(); + ndata += zeroes; + } + int len = ft.valuelen ? ft.valuelen : 10; + leftzeropad(ndata, len); + } + } + + return ndata; +} + +} + + diff --git a/src/rcldb/rclvalues.h b/src/rcldb/rclvalues.h new file mode 100644 index 00000000..f8bc7576 --- /dev/null +++ b/src/rcldb/rclvalues.h @@ -0,0 +1,29 @@ +#ifndef _RCLVALUES_H_INCLUDED_ +#define _RCLVALUES_H_INCLUDED_ +/* Copyright (C) 2004-2018 J.F.Dockes + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#include + +namespace Rcl { + +extern void add_field_value(Xapian::Document& xdoc, const FieldTraits& ft, + const std::string& data); +extern std::string convert_field_value(const FieldTraits& ft, + const std::string& data); +} + +#endif /* _RCLVALUES_H_INCLUDED_ */ diff --git a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp index 8520d925..a46ce04d 100644 --- a/src/rcldb/searchdata.cpp +++ b/src/rcldb/searchdata.cpp @@ -348,6 +348,14 @@ void SearchDataClausePath::dump(ostream& o) const o << "[" << m_text << "]"; } +void SearchDataClauseRange::dump(ostream& o) const +{ + o << "ClauseRange: "; + if (m_exclude) + o << " - "; + o << "[" << gettext() << "]"; +} + void SearchDataClauseDist::dump(ostream& o) const { if (m_tp == SCLT_NEAR) diff --git a/src/rcldb/searchdata.h b/src/rcldb/searchdata.h index 5f7d9cbf..a6f649fa 100644 --- a/src/rcldb/searchdata.h +++ b/src/rcldb/searchdata.h @@ -41,9 +41,8 @@ namespace Rcl { /** Search clause types */ enum SClType { - SCLT_AND, - SCLT_OR, SCLT_FILENAME, SCLT_PHRASE, SCLT_NEAR, SCLT_PATH, - SCLT_SUB + SCLT_AND, SCLT_OR, SCLT_FILENAME, SCLT_PHRASE, SCLT_NEAR, + SCLT_PATH, SCLT_RANGE, SCLT_SUB, }; class SearchDataClause; @@ -79,16 +78,14 @@ class SearchDataClauseDist; class SearchData { public: SearchData(SClType tp, const string& stemlang) - : m_tp(tp), m_stemlang(stemlang) - { - if (m_tp != SCLT_OR && m_tp != SCLT_AND) - m_tp = SCLT_OR; - commoninit(); + : m_tp(tp), m_stemlang(stemlang) { + if (m_tp != SCLT_OR && m_tp != SCLT_AND) + m_tp = SCLT_OR; + commoninit(); } SearchData() - : m_tp(SCLT_AND) - { - commoninit(); + : m_tp(SCLT_AND) { + commoninit(); } ~SearchData(); @@ -110,7 +107,7 @@ public: * user terms in order will have higher relevance. This must be called * before toNativeQuery(). * @param threshold: don't use terms more frequent than the value - * (proportion of docs where they occur) + * (proportion of docs where they occur) */ bool maybeAddAutoPhrase(Rcl::Db &db, double threshold); @@ -142,21 +139,19 @@ public: void setDescription(const std::string& d) {m_description = d;} /** Return an XML version of the contents, for storage in search history - by the GUI */ + by the GUI */ string asXML(); - void setTp(SClType tp) - { - m_tp = tp; + void setTp(SClType tp) { + m_tp = tp; } SClType getTp() { return m_tp; } - void setMaxExpand(int max) - { - m_softmaxexpand = max; + void setMaxExpand(int max) { + m_softmaxexpand = max; } bool getAutoDiac() {return m_autodiacsens;} bool getAutoCase() {return m_autocasesens;} @@ -217,8 +212,8 @@ private: bool expandFileTypes(Rcl::Db &db, std::vector& exptps); bool clausesToQuery(Rcl::Db &db, SClType tp, - std::vector& query, - string& reason, void *d); + std::vector& query, + string& reason, void *d); void commoninit(); /* Copyconst and assignment private and forbidden */ @@ -229,9 +224,9 @@ private: class SearchDataClause { public: enum Modifier {SDCM_NONE=0, SDCM_NOSTEMMING=0x1, SDCM_ANCHORSTART=0x2, - SDCM_ANCHOREND=0x4, SDCM_CASESENS=0x8, SDCM_DIACSENS=0x10, - SDCM_NOTERMS=0x20, // Don't include terms for highlighting - SDCM_NOSYNS = 0x40, // Don't perform synonym expansion + SDCM_ANCHOREND=0x4, SDCM_CASESENS=0x8, SDCM_DIACSENS=0x10, + SDCM_NOTERMS=0x20, // Don't include terms for highlighting + SDCM_NOSYNS = 0x40, // Don't perform synonym expansion // Aargh special case. pathelts are case/diac-sensitive // even in a stripped index SDCM_PATHELT = 0x80, @@ -239,70 +234,57 @@ public: enum Relation {REL_CONTAINS, REL_EQUALS, REL_LT, REL_LTE, REL_GT, REL_GTE}; SearchDataClause(SClType tp) - : m_tp(tp), m_parentSearch(0), m_haveWildCards(0), - m_modifiers(SDCM_NONE), m_weight(1.0), m_exclude(false), - m_rel(REL_CONTAINS) - {} + : m_tp(tp), m_parentSearch(0), m_haveWildCards(0), + m_modifiers(SDCM_NONE), m_weight(1.0), m_exclude(false), + m_rel(REL_CONTAINS) {} virtual ~SearchDataClause() {} virtual bool toNativeQuery(Rcl::Db &db, void *) = 0; bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;} virtual std::string getReason() const {return m_reason;} virtual void getTerms(HighlightData&) const {} - SClType getTp() const - { - return m_tp; + SClType getTp() const { + return m_tp; } void setTp(SClType tp) { m_tp = tp; } - void setParent(SearchData *p) - { - m_parentSearch = p; + void setParent(SearchData *p) { + m_parentSearch = p; } - string getStemLang() - { - return (m_modifiers & SDCM_NOSTEMMING) || m_parentSearch == 0 ? - cstr_null : m_parentSearch->getStemLang(); + string getStemLang() { + return (m_modifiers & SDCM_NOSTEMMING) || m_parentSearch == 0 ? + cstr_null : m_parentSearch->getStemLang(); } - bool getAutoDiac() - { - return m_parentSearch ? m_parentSearch->getAutoDiac() : false; + bool getAutoDiac() { + return m_parentSearch ? m_parentSearch->getAutoDiac() : false; } - bool getAutoCase() - { - return m_parentSearch ? m_parentSearch->getAutoCase() : true; + bool getAutoCase() { + return m_parentSearch ? m_parentSearch->getAutoCase() : true; } - int getMaxExp() - { - return m_parentSearch ? m_parentSearch->getMaxExp() : 10000; + int getMaxExp() { + return m_parentSearch ? m_parentSearch->getMaxExp() : 10000; } - size_t getMaxCl() - { - return m_parentSearch ? m_parentSearch->getMaxCl() : 100000; + size_t getMaxCl() { + return m_parentSearch ? m_parentSearch->getMaxCl() : 100000; } - int getSoftMaxExp() - { - return m_parentSearch ? m_parentSearch->getSoftMaxExp() : -1; + int getSoftMaxExp() { + return m_parentSearch ? m_parentSearch->getSoftMaxExp() : -1; } - virtual void addModifier(Modifier mod) - { - m_modifiers = m_modifiers | mod; + virtual void addModifier(Modifier mod) { + m_modifiers = m_modifiers | mod; } virtual unsigned int getmodifiers() { - return m_modifiers; + return m_modifiers; } - virtual void setWeight(float w) - { - m_weight = w; + virtual void setWeight(float w) { + m_weight = w; } - virtual bool getexclude() const - { - return m_exclude; + virtual bool getexclude() const { + return m_exclude; } - virtual void setexclude(bool onoff) - { - m_exclude = onoff; + virtual void setexclude(bool onoff) { + m_exclude = onoff; } virtual void setrel(Relation rel) { m_rel = rel; @@ -322,15 +304,6 @@ protected: float m_weight; bool m_exclude; Relation m_rel; - -private: - SearchDataClause(const SearchDataClause&) - { - } - SearchDataClause& operator=(const SearchDataClause&) - { - return *this; - } }; /** @@ -341,37 +314,30 @@ class TermProcQ; class SearchDataClauseSimple : public SearchDataClause { public: SearchDataClauseSimple(SClType tp, const std::string& txt, - const std::string& fld = std::string()) - : SearchDataClause(tp), m_text(txt), m_field(fld), m_curcl(0) - { - m_haveWildCards = - (txt.find_first_of(cstr_minwilds) != std::string::npos); + const std::string& fld = std::string()) + : SearchDataClause(tp), m_text(txt), m_field(fld), m_curcl(0) { + m_haveWildCards = + (txt.find_first_of(cstr_minwilds) != std::string::npos); } SearchDataClauseSimple(const std::string& txt, SClType tp) - : SearchDataClause(tp), m_text(txt), m_curcl(0) - { - m_haveWildCards = - (txt.find_first_of(cstr_minwilds) != std::string::npos); + : SearchDataClause(tp), m_text(txt), m_curcl(0) { + m_haveWildCards = + (txt.find_first_of(cstr_minwilds) != std::string::npos); } - virtual ~SearchDataClauseSimple() - { - } + virtual ~SearchDataClauseSimple() {} /** Translate to Xapian query */ virtual bool toNativeQuery(Rcl::Db &, void *); - virtual void getTerms(HighlightData& hldata) const - { - hldata.append(m_hldata); + virtual void getTerms(HighlightData& hldata) const { + hldata.append(m_hldata); } - virtual const std::string& gettext() - { - return m_text; + virtual const std::string& gettext() const { + return m_text; } - virtual const std::string& getfield() - { - return m_field; + virtual const std::string& getfield() const { + return m_field; } virtual void setfield(const string& field) { m_field = field; @@ -384,22 +350,49 @@ protected: HighlightData m_hldata; // Current count of Xapian clauses, to check against expansion limit size_t m_curcl; + bool processUserString(Rcl::Db &db, const string &iq, - std::string &ermsg, - void* pq, int slack = 0, bool useNear = false); + std::string &ermsg, + void* pq, int slack = 0, bool useNear = false); bool expandTerm(Rcl::Db &db, std::string& ermsg, int mods, - const std::string& term, - std::vector& exp, + const std::string& term, + std::vector& exp, std::string& sterm, const std::string& prefix, - std::vector* multiwords = 0); + std::vector* multiwords = 0); // After splitting entry on whitespace: process non-phrase element void processSimpleSpan(Rcl::Db &db, string& ermsg, const string& span, - int mods, void *pq); + int mods, void *pq); // Process phrase/near element void processPhraseOrNear(Rcl::Db &db, string& ermsg, TermProcQ *splitData, - int mods, void *pq, bool useNear, int slack); + int mods, void *pq, bool useNear, int slack); }; +class SearchDataClauseRange : public SearchDataClauseSimple { +public: + SearchDataClauseRange(const std::string& t1, const std::string& t2, + const std::string& fld = std::string()) + : SearchDataClauseSimple(SCLT_RANGE, t1, fld), m_t2(t2) {} + + // This is for 'upgrading' a clauseSimple with eq/gt/lt... rel to + // a range. Either of t1 or t2 or both can be set to the original + // text, which is why they are passed as separate parameters + SearchDataClauseRange(const SearchDataClauseSimple& cl, + const std::string& t1, const std::string& t2) + : SearchDataClauseSimple(cl) { + m_text = t1; + m_t2 = t2; + } + virtual ~SearchDataClauseRange() {} + + virtual void dump(ostream& o) const; + virtual const std::string& gettext2() const { + return m_t2; + } + virtual bool toNativeQuery(Rcl::Db &db, void *); + +protected: + std::string m_t2; +}; /** * Filename search clause. This is special because term expansion is only @@ -412,15 +405,12 @@ protected: class SearchDataClauseFilename : public SearchDataClauseSimple { public: SearchDataClauseFilename(const std::string& txt) - : SearchDataClauseSimple(txt, SCLT_FILENAME) - { - // File name searches don't count when looking for wild cards. - m_haveWildCards = false; + : SearchDataClauseSimple(txt, SCLT_FILENAME) { + // File name searches don't count when looking for wild cards. + m_haveWildCards = false; } - virtual ~SearchDataClauseFilename() - { - } + virtual ~SearchDataClauseFilename() {} virtual bool toNativeQuery(Rcl::Db &, void *); virtual void dump(ostream& o) const; @@ -450,15 +440,12 @@ public: class SearchDataClausePath : public SearchDataClauseSimple { public: SearchDataClausePath(const std::string& txt, bool excl = false) - : SearchDataClauseSimple(SCLT_PATH, txt, "dir") - { - m_exclude = excl; - m_haveWildCards = false; + : SearchDataClauseSimple(SCLT_PATH, txt, "dir") { + m_exclude = excl; + m_haveWildCards = false; } - virtual ~SearchDataClausePath() - { - } + virtual ~SearchDataClausePath() {} virtual bool toNativeQuery(Rcl::Db &, void *); virtual void dump(ostream& o) const; @@ -471,19 +458,14 @@ public: class SearchDataClauseDist : public SearchDataClauseSimple { public: SearchDataClauseDist(SClType tp, const std::string& txt, int slack, - const std::string& fld = std::string()) - : SearchDataClauseSimple(tp, txt, fld), m_slack(slack) - { - } + const std::string& fld = std::string()) + : SearchDataClauseSimple(tp, txt, fld), m_slack(slack) {} - virtual ~SearchDataClauseDist() - { - } + virtual ~SearchDataClauseDist() {} virtual bool toNativeQuery(Rcl::Db &, void *); - virtual int getslack() const - { - return m_slack; + virtual int getslack() const { + return m_slack; } virtual void setslack(int slack) { m_slack = slack; @@ -497,20 +479,16 @@ private: class SearchDataClauseSub : public SearchDataClause { public: SearchDataClauseSub(std::shared_ptr sub) - : SearchDataClause(SCLT_SUB), m_sub(sub) - { - } - virtual bool toNativeQuery(Rcl::Db &db, void *p) - { - bool ret = m_sub->toNativeQuery(db, p); - if (!ret) - m_reason = m_sub->getReason(); - return ret; + : SearchDataClause(SCLT_SUB), m_sub(sub) {} + virtual bool toNativeQuery(Rcl::Db &db, void *p) { + bool ret = m_sub->toNativeQuery(db, p); + if (!ret) + m_reason = m_sub->getReason(); + return ret; } - virtual void getTerms(HighlightData& hldata) const - { - m_sub.get()->getTerms(hldata); + virtual void getTerms(HighlightData& hldata) const { + m_sub.get()->getTerms(hldata); } virtual std::shared_ptr getSub() { return m_sub; diff --git a/src/rcldb/searchdatatox.cpp b/src/rcldb/searchdatatox.cpp index 940eeb83..8d39883d 100644 --- a/src/rcldb/searchdatatox.cpp +++ b/src/rcldb/searchdatatox.cpp @@ -47,11 +47,10 @@ using namespace std; #include "expansiondbs.h" #include "base64.h" #include "daterange.h" +#include "rclvalues.h" namespace Rcl { -typedef vector::iterator qlist_it_t; - static const int original_term_wqf_booster = 10; // Expand doc categories and mime type wild card expressions @@ -62,7 +61,7 @@ bool SearchData::expandFileTypes(Db &db, vector& tps) { const RclConfig *cfg = db.getConf(); if (!cfg) { - LOGFATAL("Db::expandFileTypes: null configuration!!\n" ); + LOGFATAL("Db::expandFileTypes: null configuration!!\n"); return false; } vector exptps; @@ -110,15 +109,16 @@ bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp, string& reason, void *d) { Xapian::Query xq; - for (qlist_it_t it = query.begin(); it != query.end(); it++) { + for (auto& clausep : query) { Xapian::Query nq; - if (!(*it)->toNativeQuery(db, &nq)) { - LOGERR("SearchData::clausesToQuery: toNativeQuery failed: " << ((*it)->getReason()) << "\n" ); - reason += (*it)->getReason() + " "; + if (!clausep->toNativeQuery(db, &nq)) { + LOGERR("SearchData::clausesToQuery: toNativeQuery failed: " + << clausep->getReason() << "\n"); + reason += clausep->getReason() + " "; return false; } if (nq.empty()) { - LOGDEB("SearchData::clausesToQuery: skipping empty clause\n" ); + LOGDEB("SearchData::clausesToQuery: skipping empty clause\n"); continue; } // If this structure is an AND list, must use AND_NOT for excl clauses. @@ -126,7 +126,7 @@ bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp, // addClause()) Xapian::Query::op op; if (tp == SCLT_AND) { - if ((*it)->getexclude()) { + if (clausep->getexclude()) { op = Xapian::Query::OP_AND_NOT; } else { op = Xapian::Query::OP_AND; @@ -143,7 +143,7 @@ bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp, xq = Xapian::Query(op, xq, nq); } if (int(xq.get_length()) >= getMaxCl()) { - LOGERR("" << (maxXapClauseMsg) << "\n" ); + LOGERR("" << maxXapClauseMsg << "\n"); m_reason += maxXapClauseMsg; if (!o_index_stripchars) m_reason += maxXapClauseCaseDiacMsg; @@ -151,7 +151,7 @@ bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp, } } - LOGDEB0("SearchData::clausesToQuery: got " << (xq.get_length()) << " clauses\n" ); + LOGDEB0("SearchData::clausesToQuery: got " << xq.get_length()<<" clauses\n"); if (xq.empty()) xq = Xapian::Query::MatchAll; @@ -162,7 +162,7 @@ bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp, bool SearchData::toNativeQuery(Rcl::Db &db, void *d) { - LOGDEB("SearchData::toNativeQuery: stemlang [" << (m_stemlang) << "]\n" ); + LOGDEB("SearchData::toNativeQuery: stemlang [" << m_stemlang << "]\n"); m_reason.erase(); db.getConf()->getConfParam("maxTermExpand", &m_maxexp); @@ -174,7 +174,8 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d) // Xapian query tree Xapian::Query xq; if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq)) { - LOGERR("SearchData::toNativeQuery: clausesToQuery failed. reason: " << (m_reason) << "\n" ); + LOGERR("SearchData::toNativeQuery: clausesToQuery failed. reason: " + << m_reason << "\n"); return false; } @@ -183,7 +184,7 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d) if (m_dates.y1 == 0 || m_dates.y2 == 0) { int minyear = 1970, maxyear = 2100; if (!db.maxYearSpan(&minyear, &maxyear)) { - LOGERR("Can't retrieve index min/max dates\n" ); + LOGERR("Can't retrieve index min/max dates\n"); //whatever, go on. } @@ -198,16 +199,18 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d) m_dates.d2 = 31; } } - LOGDEB("Db::toNativeQuery: date interval: " << (m_dates.y1) << "-" << (m_dates.m1) << "-" << (m_dates.d1) << "/" << (m_dates.y2) << "-" << (m_dates.m2) << "-" << (m_dates.d2) << "\n" ); + LOGDEB("Db::toNativeQuery: date interval: " << m_dates.y1 << + "-" << m_dates.m1 << "-" << m_dates.d1 << "/" << + m_dates.y2 << "-" << m_dates.m2 << "-" << m_dates.d2 << "\n"); Xapian::Query dq = date_range_filter(m_dates.y1, m_dates.m1, m_dates.d1, m_dates.y2, m_dates.m2, m_dates.d2); if (dq.empty()) { - LOGINFO("Db::toNativeQuery: date filter is empty\n" ); + LOGINFO("Db::toNativeQuery: date filter is empty\n"); } // If no probabilistic query is provided then promote the daterange // filter to be THE query instead of filtering an empty query. if (xq.empty()) { - LOGINFO("Db::toNativeQuery: proba query is empty\n" ); + LOGINFO("Db::toNativeQuery: proba query is empty\n"); xq = dq; } else { xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, dq); @@ -239,7 +242,7 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d) // If no probabilistic query is provided then promote the // filter to be THE query instead of filtering an empty query. if (xq.empty()) { - LOGINFO("Db::toNativeQuery: proba query is empty\n" ); + LOGINFO("Db::toNativeQuery: proba query is empty\n"); xq = sq; } else { xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, sq); @@ -263,7 +266,7 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d) for (vector::iterator it = m_filetypes.begin(); it != m_filetypes.end(); it++) { string term = wrap_prefix(mimetype_prefix) + *it; - LOGDEB0("Adding file type term: [" << (term) << "]\n" ); + LOGDEB0("Adding file type term: [" << term << "]\n"); tq = tq.empty() ? Xapian::Query(term) : Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term)); } @@ -278,7 +281,7 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d) for (vector::iterator it = m_nfiletypes.begin(); it != m_nfiletypes.end(); it++) { string term = wrap_prefix(mimetype_prefix) + *it; - LOGDEB0("Adding negative file type term: [" << (term) << "]\n" ); + LOGDEB0("Adding negative file type term: [" << term << "]\n"); tq = tq.empty() ? Xapian::Query(term) : Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term)); } @@ -333,7 +336,8 @@ public: if (m_lastpos < pos) m_lastpos = pos; bool noexpand = be ? m_ts->nostemexp() : true; - LOGDEB1("TermProcQ::takeword: pushing [" << (term) << "] pos " << (pos) << " noexp " << (noexpand) << "\n" ); + LOGDEB1("TermProcQ::takeword: pushing [" << term << "] pos " << + pos << " noexp " << noexpand << "\n"); if (m_terms[pos].size() < term.size()) { m_terms[pos] = term; m_nste[pos] = noexpand; @@ -577,13 +581,12 @@ static void prefix_vector(vector& v, const string& prefix) } } -void SearchDataClauseSimple:: -processSimpleSpan(Rcl::Db &db, string& ermsg, - const string& span, - int mods, void * pq) +void SearchDataClauseSimple::processSimpleSpan( + Rcl::Db &db, string& ermsg, const string& span, int mods, void *pq) { vector& pqueries(*(vector*)pq); - LOGDEB0("StringToXapianQ::processSimpleSpan: [" << (span) << "] mods 0x" << ((unsigned int)mods) << "\n" ); + LOGDEB0("StringToXapianQ::processSimpleSpan: [" << span << "] mods 0x" + << (unsigned int)mods << "\n"); vector exp; string sterm; // dumb version of user term @@ -679,7 +682,7 @@ void SearchDataClauseSimple::processPhraseOrNear(Rcl::Db &db, string& ermsg, vector::const_iterator nxit = splitData->nostemexps().begin(); for (vector::const_iterator it = splitData->terms().begin(); it != splitData->terms().end(); it++, nxit++) { - LOGDEB0("ProcessPhrase: processing [" << *it << "]\n" ); + LOGDEB0("ProcessPhrase: processing [" << *it << "]\n"); // Adjust when we do stem expansion. Not if disabled by // caller, not inside phrases, and some versions of xapian // will accept only one OR clause inside NEAR. @@ -695,7 +698,8 @@ void SearchDataClauseSimple::processPhraseOrNear(Rcl::Db &db, string& ermsg, vector exp; if (!expandTerm(db, ermsg, lmods, *it, exp, sterm, prefix)) return; - LOGDEB0("ProcessPhraseOrNear: exp size " << (exp.size()) << ", exp: " << (stringsToString(exp)) << "\n" ); + LOGDEB0("ProcessPhraseOrNear: exp size " << exp.size() << ", exp: " << + stringsToString(exp) << "\n"); // groups is used for highlighting, we don't want prefixes in there. vector noprefs; for (vector::const_iterator it = exp.begin(); @@ -721,7 +725,8 @@ void SearchDataClauseSimple::processPhraseOrNear(Rcl::Db &db, string& ermsg, // Generate an appropriate PHRASE/NEAR query with adjusted slack // For phrases, give a relevance boost like we do for original terms - LOGDEB2("PHRASE/NEAR: alltermcount " << (splitData->alltermcount()) << " lastpos " << (splitData->lastpos()) << "\n" ); + LOGDEB2("PHRASE/NEAR: alltermcount " << splitData->alltermcount() << + " lastpos " << splitData->lastpos() << "\n"); Xapian::Query xq(op, orqueries.begin(), orqueries.end(), splitData->lastpos() + 1 + slack); if (op == Xapian::Query::OP_PHRASE) @@ -791,7 +796,8 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq, vector &pqueries(*(vector*)pq); int mods = m_modifiers; - LOGDEB("StringToXapianQ:pUS:: qstr [" << (iq) << "] fld [" << (m_field) << "] mods 0x" << (mods) << " slack " << (slack) << " near " << (useNear) << "\n" ); + LOGDEB("StringToXapianQ:pUS:: qstr [" << iq << "] fld [" << m_field << + "] mods 0x"<::iterator it = phrases.begin(); it != phrases.end(); it++) { - LOGDEB0("strToXapianQ: phrase/word: [" << *it << "]\n" ); + LOGDEB0("strToXapianQ: phrase/word: [" << *it << "]\n"); // Anchoring modifiers int amods = stringToMods(*it); int terminc = amods != 0 ? 1 : 0; @@ -849,7 +855,7 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq, slack += tpq.lastpos() - int(tpq.terms().size()) + 1; - LOGDEB0("strToXapianQ: termcount: " << (tpq.terms().size()) << "\n" ); + LOGDEB0("strToXapianQ: termcount: " << tpq.terms().size() << "\n"); switch (tpq.terms().size() + terminc) { case 0: continue;// ?? @@ -884,7 +890,7 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq, ermsg = "Caught unknown exception"; } if (!ermsg.empty()) { - LOGERR("stringToXapianQueries: " << (ermsg) << "\n" ); + LOGERR("stringToXapianQueries: " << ermsg << "\n"); return false; } return true; @@ -893,8 +899,36 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq, // Translate a simple OR or AND search clause. bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p) { - LOGDEB("SearchDataClauseSimple::toNativeQuery: fld [" << (m_field) << "] val [" << (m_text) << "] stemlang [" << (getStemLang()) << "]\n" ); + LOGDEB("SearchDataClauseSimple::toNativeQuery: fld [" << m_field << + "] val [" << m_text << "] stemlang [" << getStemLang() << "]\n"); + // Transform (in)equalities into a range query + switch (getrel()) { + case REL_EQUALS: + { + SearchDataClauseRange cl(*this, gettext(), gettext()); + bool ret = cl.toNativeQuery(db, p); + m_reason = cl.getReason(); + return ret; + } + case REL_LT: case REL_LTE: + { + SearchDataClauseRange cl(*this, "", gettext()); + bool ret = cl.toNativeQuery(db, p); + m_reason = cl.getReason(); + return ret; + } + case REL_GT: case REL_GTE: + { + SearchDataClauseRange cl(*this, gettext(), ""); + bool ret = cl.toNativeQuery(db, p); + m_reason = cl.getReason(); + return ret; + } + default: + break; + } + Xapian::Query *qp = (Xapian::Query *)p; *qp = Xapian::Query(); @@ -903,7 +937,7 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p) case SCLT_AND: op = Xapian::Query::OP_AND; break; case SCLT_OR: op = Xapian::Query::OP_OR; break; default: - LOGERR("SearchDataClauseSimple: bad m_tp " << (m_tp) << "\n" ); + LOGERR("SearchDataClauseSimple: bad m_tp " << m_tp << "\n"); m_reason = "Internal error"; return false; } @@ -912,7 +946,7 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p) if (!processUserString(db, m_text, m_reason, &pqueries)) return false; if (pqueries.empty()) { - LOGERR("SearchDataClauseSimple: resolved to null query\n" ); + LOGERR("SearchDataClauseSimple: resolved to null query\n"); m_reason = string("Resolved to null query. Term too long ? : [" + m_text + string("]")); return false; @@ -925,6 +959,58 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p) return true; } +// Translate a range clause. This only works if a Xapian value slot +// was attributed to the field. +bool SearchDataClauseRange::toNativeQuery(Rcl::Db &db, void *p) +{ + LOGDEB("SearchDataClauseRange::toNativeQuery: " << m_field << + " :[" << m_text << ".." << m_t2 << "]\n"); + Xapian::Query *qp = (Xapian::Query *)p; + *qp = Xapian::Query(); + + if (m_field.empty() || (m_text.empty() && m_t2.empty())) { + m_reason = "Range clause needs a field and a value"; + return false; + } + + // Get the value number for the field from the configuration + const FieldTraits *ftp; + if (!db.fieldToTraits(m_field, &ftp, true)) { + m_reason = string("field ") + m_field + " not found in configuration"; + return false; + } + if (ftp->valueslot == 0) { + m_reason = string("No value slot specified in configuration for field ") + + m_field; + return false; + } + LOGDEB("SearchDataClauseRange: value slot " << ftp->valueslot << endl); + // Build Xapian VALUE query. + string errstr; + try { + if (m_text.empty()) { + *qp = Xapian::Query(Xapian::Query::OP_VALUE_LE, + ftp->valueslot, convert_field_value(*ftp, m_t2)); + } else if (m_t2.empty()) { + *qp = Xapian::Query(Xapian::Query::OP_VALUE_GE, ftp->valueslot, + convert_field_value(*ftp, m_text)); + } else { + *qp = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, ftp->valueslot, + convert_field_value(*ftp, m_text), + convert_field_value(*ftp, m_t2)); + } + } + XCATCHERROR(errstr); + if (!errstr.empty()) { + LOGERR("SearchDataClauseRange: range query creation failed for slot "<< + ftp->valueslot << endl); + m_reason = "Range query creation failed\n"; + *qp = Xapian::Query(); + return false; + } + return true; +} + // Translate a FILENAME search clause. This always comes // from a "filename" search from the gui or recollq. A query language // "filename:"-prefixed field will not go through here, but through @@ -1018,7 +1104,7 @@ bool SearchDataClausePath::toNativeQuery(Rcl::Db &db, void *p) // Translate NEAR or PHRASE clause. bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p) { - LOGDEB("SearchDataClauseDist::toNativeQuery\n" ); + LOGDEB("SearchDataClauseDist::toNativeQuery\n"); Xapian::Query *qp = (Xapian::Query *)p; *qp = Xapian::Query(); @@ -1037,7 +1123,7 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p) if (!processUserString(db, s, m_reason, &pqueries, m_slack, useNear)) return false; if (pqueries.empty()) { - LOGERR("SearchDataClauseDist: resolved to null query\n" ); + LOGERR("SearchDataClauseDist: resolved to null query\n"); m_reason = string("Resolved to null query. Term too long ? : [" + m_text + string("]")); return false; diff --git a/src/rcldb/searchdataxml.cpp b/src/rcldb/searchdataxml.cpp index 409d319e..2ee907be 100644 --- a/src/rcldb/searchdataxml.cpp +++ b/src/rcldb/searchdataxml.cpp @@ -42,6 +42,7 @@ static string tpToString(SClType tp) case SCLT_FILENAME: return "FN"; case SCLT_PHRASE: return "PH"; case SCLT_NEAR: return "NE"; + case SCLT_RANGE: return "RG"; case SCLT_SUB: return "SU"; // Unsupported actually default: return "UN"; } @@ -101,6 +102,15 @@ string SearchData::asXML() endl; } os << "" << base64_encode(cl->gettext()) << "" << endl; + if (cl->getTp() == SCLT_RANGE) { + SearchDataClauseRange *clr = + dynamic_cast(cl); + const string& t = clr->gettext2(); + if (!t.empty()) { + os << "" << base64_encode(clr->gettext2()) << + "" << endl; + } + } if (cl->getTp() == SCLT_NEAR || cl->getTp() == SCLT_PHRASE) { SearchDataClauseDist *cld = dynamic_cast(cl); diff --git a/src/sampleconf/fields b/src/sampleconf/fields index 4250fb2f..a673bf78 100644 --- a/src/sampleconf/fields +++ b/src/sampleconf/fields @@ -65,6 +65,20 @@ rclbes = XB ; noterms = 1 # *** USE XY for beginning your local prefixes *** ie: # myfield = XYMYPREF +[values] +########### +## Fields which will be stored in Xapian values, authorizing range query +## processing. +# Entries are specified as 'fieldname = valueslot;[px=val1;py=val2...]'. +# Xapian value slots are 32 bits numbers. Numbers below 1000 are reserved +# by Recoll or Xapian. Numbers above are available for user configuration +# Values have types, which can be 'int' or 'string' at the moment. ints have +# an additional 'len' attributes, which specifies the padding size used for +# sorting (leading zeroes: all xapian sorting is text-based). 10 is fine +# for an unsigned 32 bits integer. +# myfield = 1001; type=int; len = 10 +# mystrfield = 1002; type = string + [stored] ############################ # Some fields are stored in the document data record inside the index and