Search: change query processing a bit so that we can use OP_FILTER for path selection
This commit is contained in:
parent
16467900bd
commit
bf46e6ca0e
@ -1 +1 @@
|
||||
1.32.8
|
||||
1.33.0
|
||||
|
||||
@ -125,7 +125,7 @@
|
||||
#define PACKAGE_NAME "Recoll"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "Recoll 1.32.8"
|
||||
#define PACKAGE_STRING "Recoll 1.33.0"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "recoll"
|
||||
@ -134,7 +134,7 @@
|
||||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "1.32.8"
|
||||
#define PACKAGE_VERSION "1.33.0"
|
||||
|
||||
/* putenv parameter is const */
|
||||
/* #undef PUTENV_ARG_CONST */
|
||||
|
||||
@ -118,7 +118,7 @@
|
||||
#define PACKAGE_NAME "Recoll"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "Recoll 1.32.8"
|
||||
#define PACKAGE_STRING "Recoll 1.33.0"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "recoll"
|
||||
@ -127,7 +127,7 @@
|
||||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "1.32.8"
|
||||
#define PACKAGE_VERSION "1.33.0"
|
||||
|
||||
/* putenv parameter is const */
|
||||
/* #undef PUTENV_ARG_CONST */
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2006 J.F.Dockes
|
||||
/* Copyright (C) 2006-2022 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
@ -50,14 +50,11 @@ using namespace std;
|
||||
|
||||
namespace Rcl {
|
||||
|
||||
typedef vector<SearchDataClause *>::iterator qlist_it_t;
|
||||
typedef vector<SearchDataClause *>::const_iterator qlist_cit_t;
|
||||
|
||||
SearchData::~SearchData()
|
||||
{
|
||||
LOGDEB0("SearchData::~SearchData\n" );
|
||||
for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++)
|
||||
delete *it;
|
||||
LOGDEB0("SearchData::~SearchData\n");
|
||||
for (auto& clausep : m_query)
|
||||
delete clausep;
|
||||
}
|
||||
|
||||
// This is called by the GUI simple search if the option is set: add
|
||||
@ -66,44 +63,42 @@ SearchData::~SearchData()
|
||||
// We remove very common terms from the query to avoid performance issues.
|
||||
bool SearchData::maybeAddAutoPhrase(Rcl::Db& db, double freqThreshold)
|
||||
{
|
||||
LOGDEB0("SearchData::maybeAddAutoPhrase()\n" );
|
||||
LOGDEB0("SearchData::maybeAddAutoPhrase()\n");
|
||||
// cerr << "BEFORE SIMPLIFY\n"; dump(cerr);
|
||||
simplify();
|
||||
// cerr << "AFTER SIMPLIFY\n"; dump(cerr);
|
||||
|
||||
if (!m_query.size()) {
|
||||
LOGDEB2("SearchData::maybeAddAutoPhrase: empty query\n" );
|
||||
if (m_query.empty()) {
|
||||
LOGDEB2("SearchData::maybeAddAutoPhrase: empty query\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
string field;
|
||||
auto clp0 = dynamic_cast<SearchDataClauseSimple*>(*m_query.begin());
|
||||
if (clp0)
|
||||
field = clp0->getfield();
|
||||
vector<string> words;
|
||||
// Walk the clause list. If this is not an AND list, we find any
|
||||
// non simple clause or different field names, bail out.
|
||||
for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++) {
|
||||
SClType tp = (*it)->m_tp;
|
||||
for (auto& clausep : m_query) {
|
||||
SClType tp = clausep->m_tp;
|
||||
if (tp != SCLT_AND) {
|
||||
LOGDEB2("SearchData::maybeAddAutoPhrase: wrong tp " << (tp) << "\n" );
|
||||
LOGDEB2("SearchData::maybeAddAutoPhrase: wrong tp " << tp << "\n");
|
||||
return false;
|
||||
}
|
||||
SearchDataClauseSimple *clp =
|
||||
dynamic_cast<SearchDataClauseSimple*>(*it);
|
||||
auto clp = dynamic_cast<SearchDataClauseSimple*>(clausep);
|
||||
if (clp == 0) {
|
||||
LOGDEB2("SearchData::maybeAddAutoPhrase: dyncast failed\n" );
|
||||
LOGDEB2("SearchData::maybeAddAutoPhrase: other than clauseSimple in query.\n");
|
||||
return false;
|
||||
}
|
||||
if (it == m_query.begin()) {
|
||||
field = clp->getfield();
|
||||
} else {
|
||||
if (clp->getfield().compare(field)) {
|
||||
LOGDEB2("SearchData::maybeAddAutoPhrase: diff. fields\n" );
|
||||
return false;
|
||||
}
|
||||
if (clp->getfield().compare(field)) {
|
||||
LOGDEB2("SearchData::maybeAddAutoPhrase: diff. fields\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// If there are wildcards or quotes in there, bail out
|
||||
if (clp->gettext().find_first_of("\"*[?") != string::npos) {
|
||||
LOGDEB2("SearchData::maybeAddAutoPhrase: wildcards\n" );
|
||||
LOGDEB2("SearchData::maybeAddAutoPhrase: wildcards\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -124,16 +119,15 @@ bool SearchData::maybeAddAutoPhrase(Rcl::Db& db, double freqThreshold)
|
||||
if (!doccnt)
|
||||
doccnt = 1;
|
||||
string swords;
|
||||
for (vector<string>::iterator it = words.begin();
|
||||
it != words.end(); it++) {
|
||||
double freq = double(db.termDocCnt(*it)) / doccnt;
|
||||
for (const auto& word : words) {
|
||||
double freq = double(db.termDocCnt(word)) / doccnt;
|
||||
if (freq < freqThreshold) {
|
||||
if (!swords.empty())
|
||||
swords.append(1, ' ');
|
||||
swords += *it;
|
||||
swords += word;
|
||||
} else {
|
||||
LOGDEB0("SearchData::Autophrase: [" << *it << "] too frequent ("
|
||||
<< (100 * freq) << " %" << ")\n" );
|
||||
LOGDEB0("SearchData::Autophrase: [" << word << "] too frequent ("
|
||||
<< (100 * freq) << " %" << ")\n");
|
||||
slack++;
|
||||
}
|
||||
}
|
||||
@ -141,7 +135,7 @@ bool SearchData::maybeAddAutoPhrase(Rcl::Db& db, double freqThreshold)
|
||||
// We can't make a phrase with a single word :)
|
||||
int nwords = TextSplit::countWords(swords);
|
||||
if (nwords <= 1) {
|
||||
LOGDEB2("SearchData::maybeAddAutoPhrase: ended with 1 word\n" );
|
||||
LOGDEB2("SearchData::maybeAddAutoPhrase: ended with 1 word\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -149,8 +143,7 @@ bool SearchData::maybeAddAutoPhrase(Rcl::Db& db, double freqThreshold)
|
||||
// an actual user-entered phrase
|
||||
slack += 1 + nwords / 3;
|
||||
|
||||
m_autophrase = std::shared_ptr<SearchDataClauseDist>(
|
||||
new SearchDataClauseDist(SCLT_PHRASE, swords, slack, field));
|
||||
m_autophrase = make_shared<SearchDataClauseDist>(SCLT_PHRASE, swords, slack, field);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -158,7 +151,7 @@ bool SearchData::maybeAddAutoPhrase(Rcl::Db& db, double freqThreshold)
|
||||
bool SearchData::addClause(SearchDataClause* cl)
|
||||
{
|
||||
if (m_tp == SCLT_OR && cl->getexclude()) {
|
||||
LOGERR("SearchData::addClause: cant add EXCL to OR list\n" );
|
||||
LOGERR("SearchData::addClause: cant add EXCL to OR list\n");
|
||||
m_reason = "No Negative (AND_NOT) clauses allowed in OR queries";
|
||||
return false;
|
||||
}
|
||||
@ -172,27 +165,30 @@ bool SearchData::addClause(SearchDataClause* cl)
|
||||
// There can't be a subclause in a filename search: no possible need to recurse
|
||||
bool SearchData::fileNameOnly()
|
||||
{
|
||||
for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++)
|
||||
if (!(*it)->isFileName())
|
||||
for (const auto& clausep : m_query) {
|
||||
if (!clausep->isFileName())
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// The query language creates a lot of subqueries. See if we can merge them.
|
||||
void SearchData::simplify()
|
||||
{
|
||||
LOGDEB0("SearchData::simplify()\n");
|
||||
for (unsigned int i = 0; i < m_query.size(); i++) {
|
||||
if (m_query[i]->m_tp != SCLT_SUB)
|
||||
continue;
|
||||
//C[est ce dyncast qui crashe??
|
||||
SearchDataClauseSub *clsubp =
|
||||
dynamic_cast<SearchDataClauseSub*>(m_query[i]);
|
||||
if (clsubp == 0) {
|
||||
|
||||
auto clsubp = dynamic_cast<SearchDataClauseSub*>(m_query[i]);
|
||||
if (nullptr == clsubp) {
|
||||
// ??
|
||||
continue;
|
||||
}
|
||||
if (clsubp->getSub()->m_tp != m_tp)
|
||||
if (clsubp->getSub()->m_tp != m_tp) {
|
||||
LOGDEB0("Not simplifying because sub has differing m_tp\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
clsubp->getSub()->simplify();
|
||||
|
||||
@ -211,8 +207,8 @@ void SearchData::simplify()
|
||||
clsubp->getSub()->m_filetypes.begin(),
|
||||
clsubp->getSub()->m_filetypes.end());
|
||||
m_nfiletypes.insert(m_nfiletypes.end(),
|
||||
clsubp->getSub()->m_nfiletypes.begin(),
|
||||
clsubp->getSub()->m_nfiletypes.end());
|
||||
clsubp->getSub()->m_nfiletypes.begin(),
|
||||
clsubp->getSub()->m_nfiletypes.end());
|
||||
if (clsubp->getSub()->m_haveDates && !m_haveDates) {
|
||||
m_dates = clsubp->getSub()->m_dates;
|
||||
}
|
||||
@ -220,34 +216,17 @@ void SearchData::simplify()
|
||||
m_maxSize = clsubp->getSub()->m_maxSize;
|
||||
if (m_minSize == -1)
|
||||
m_minSize = clsubp->getSub()->m_minSize;
|
||||
m_haveWildCards = m_haveWildCards ||
|
||||
clsubp->getSub()->m_haveWildCards;
|
||||
m_haveWildCards = m_haveWildCards || clsubp->getSub()->m_haveWildCards;
|
||||
// And then let the clauses processing go on, there are
|
||||
// none anyway, we will just delete the subquery.
|
||||
}
|
||||
|
||||
|
||||
bool allsametp = true;
|
||||
for (qlist_it_t it1 = clsubp->getSub()->m_query.begin();
|
||||
it1 != clsubp->getSub()->m_query.end(); it1++) {
|
||||
// We want all AND or OR clause, and same as our conjunction
|
||||
if (((*it1)->getTp() != SCLT_AND && (*it1)->getTp() != SCLT_OR) ||
|
||||
(*it1)->getTp() != m_tp) {
|
||||
allsametp = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!allsametp)
|
||||
continue;
|
||||
|
||||
// All ok: delete the clause_sub, and insert the queries from
|
||||
// its searchdata in its place
|
||||
// Delete the clause_sub, and insert the queries from its searchdata in its place
|
||||
m_query.erase(m_query.begin() + i);
|
||||
m_query.insert(m_query.begin() + i,
|
||||
clsubp->getSub()->m_query.begin(),
|
||||
m_query.insert(m_query.begin() + i, clsubp->getSub()->m_query.begin(),
|
||||
clsubp->getSub()->m_query.end());
|
||||
for (unsigned int j = i;
|
||||
j < i + clsubp->getSub()->m_query.size(); j++) {
|
||||
for (unsigned int j = i; j < i + clsubp->getSub()->m_query.size(); j++) {
|
||||
m_query[j]->setParent(this);
|
||||
}
|
||||
i += int(clsubp->getSub()->m_query.size()) - 1;
|
||||
@ -262,11 +241,10 @@ void SearchData::simplify()
|
||||
// Extract terms and groups for highlighting
|
||||
void SearchData::getTerms(HighlightData &hld) const
|
||||
{
|
||||
for (qlist_cit_t it = m_query.begin(); it != m_query.end(); it++) {
|
||||
if (!((*it)->getmodifiers() & SearchDataClause::SDCM_NOTERMS) &&
|
||||
!(*it)->getexclude()) {
|
||||
(*it)->getTerms(hld);
|
||||
}
|
||||
for (const auto& clausep : m_query) {
|
||||
if (!(clausep->getModifiers() & SearchDataClause::SDCM_NOTERMS) && !clausep->getexclude()) {
|
||||
clausep->getTerms(hld);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -294,10 +272,9 @@ void SearchData::dump(ostream& o) const
|
||||
" ft " << m_filetypes.size() << " nft " << m_nfiletypes.size() <<
|
||||
" hd " << m_haveDates << " maxs " << m_maxSize << " mins " <<
|
||||
m_minSize << " wc " << m_haveWildCards << "\n";
|
||||
for (std::vector<SearchDataClause*>::const_iterator it =
|
||||
m_query.begin(); it != m_query.end(); it++) {
|
||||
for (const auto& clausep : m_query) {
|
||||
o << dumptabs;
|
||||
(*it)->dump(o);
|
||||
clausep->dump(o);
|
||||
o << "\n";
|
||||
}
|
||||
// o << dumptabs << "\n";
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2004 J.F.Dockes
|
||||
/* Copyright (C) 2004-2022 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
@ -18,7 +18,7 @@
|
||||
#define _SEARCHDATA_H_INCLUDED_
|
||||
|
||||
/**
|
||||
* Structures to hold data coming almost directly from the gui
|
||||
* Structures to hold data coming almost directly from the GUI
|
||||
* and handle its translation to Xapian queries.
|
||||
* This is not generic code, it reflects the choices made for the user
|
||||
* interface, and it also knows some specific of recoll's usage of Xapian
|
||||
@ -27,9 +27,9 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <ostream>
|
||||
#include <memory>
|
||||
|
||||
#include "rcldb.h"
|
||||
#include <memory>
|
||||
#include "smallut.h"
|
||||
#include "cstr.h"
|
||||
#include "hldata.h"
|
||||
@ -238,6 +238,7 @@ public:
|
||||
// Aargh special case. pathelts are case/diac-sensitive
|
||||
// even in a stripped index
|
||||
SDCM_PATHELT = 0x80,
|
||||
SDCM_FILTER = 0x100,
|
||||
};
|
||||
enum Relation {REL_CONTAINS, REL_EQUALS, REL_LT, REL_LTE, REL_GT, REL_GTE};
|
||||
|
||||
@ -285,7 +286,7 @@ public:
|
||||
virtual void addModifier(Modifier mod) {
|
||||
m_modifiers = m_modifiers | mod;
|
||||
}
|
||||
virtual unsigned int getmodifiers() {
|
||||
virtual unsigned int getModifiers() {
|
||||
return m_modifiers;
|
||||
}
|
||||
virtual void setWeight(float w) {
|
||||
@ -419,6 +420,7 @@ public:
|
||||
: SearchDataClauseSimple(txt, SCLT_FILENAME) {
|
||||
// File name searches don't count when looking for wild cards.
|
||||
m_haveWildCards = false;
|
||||
addModifier(SDCM_FILTER);
|
||||
}
|
||||
|
||||
virtual ~SearchDataClauseFilename() {}
|
||||
@ -454,6 +456,7 @@ public:
|
||||
: SearchDataClauseSimple(SCLT_PATH, txt, "dir") {
|
||||
m_exclude = excl;
|
||||
m_haveWildCards = false;
|
||||
addModifier(SDCM_FILTER);
|
||||
}
|
||||
|
||||
virtual ~SearchDataClausePath() {}
|
||||
|
||||
@ -65,7 +65,6 @@ bool SearchData::expandFileTypes(Db &db, vector<string>& tps)
|
||||
return false;
|
||||
}
|
||||
vector<string> exptps;
|
||||
|
||||
for (const auto& mtype : tps) {
|
||||
if (cfg->isMimeCategory(mtype)) {
|
||||
vector<string> ctps;
|
||||
@ -106,6 +105,14 @@ bool SearchData::clausesToQuery(
|
||||
{
|
||||
Xapian::Query xq;
|
||||
for (auto& clausep : query) {
|
||||
#if 0
|
||||
string txt;
|
||||
auto clp = dynamic_cast<SearchDataClauseSimple*>(clausep);
|
||||
if (clp)
|
||||
txt = clp->gettext();
|
||||
LOGINF("Clause: tp: " << clausep->getTp() << " txt: [" << txt << "] mods: " <<
|
||||
std::hex << clausep->getModifiers() << std::dec << "\n");
|
||||
#endif
|
||||
Xapian::Query nq;
|
||||
if (!clausep->toNativeQuery(db, &nq)) {
|
||||
LOGERR("SearchData::clausesToQuery: toNativeQuery failed: "
|
||||
@ -114,7 +121,7 @@ bool SearchData::clausesToQuery(
|
||||
return false;
|
||||
}
|
||||
if (nq.empty()) {
|
||||
LOGDEB("SearchData::clausesToQuery: skipping empty clause\n");
|
||||
LOGDEB0("SearchData::clausesToQuery: skipping empty clause\n");
|
||||
continue;
|
||||
}
|
||||
// If this structure is an AND list, must use AND_NOT for excl clauses.
|
||||
@ -125,7 +132,11 @@ bool SearchData::clausesToQuery(
|
||||
if (clausep->getexclude()) {
|
||||
op = Xapian::Query::OP_AND_NOT;
|
||||
} else {
|
||||
op = Xapian::Query::OP_AND;
|
||||
if (clausep->getModifiers() & SearchDataClause::SDCM_FILTER) {
|
||||
op = Xapian::Query::OP_FILTER;
|
||||
} else {
|
||||
op = Xapian::Query::OP_AND;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
op = Xapian::Query::OP_OR;
|
||||
@ -166,12 +177,12 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
|
||||
db.getConf()->getConfParam("autocasesens", &m_autocasesens);
|
||||
db.getConf()->getConfParam("autodiacsens", &m_autodiacsens);
|
||||
|
||||
simplify();
|
||||
// Walk the clause list translating each in turn and building the
|
||||
// Xapian query tree
|
||||
Xapian::Query xq;
|
||||
if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq)) {
|
||||
LOGERR("SearchData::toNativeQuery: clausesToQuery failed. reason: "
|
||||
<< m_reason << "\n");
|
||||
LOGERR("SearchData::toNativeQuery: clausesToQuery failed. reason: " << m_reason << "\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -231,8 +242,7 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
|
||||
leftzeropad(minvalue, 12);
|
||||
string maxvalue(max);
|
||||
leftzeropad(maxvalue, 12);
|
||||
sq = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, VALUE_SIZE,
|
||||
minvalue, maxvalue);
|
||||
sq = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, VALUE_SIZE, minvalue, maxvalue);
|
||||
}
|
||||
|
||||
// If no probabilistic query is provided then promote the
|
||||
@ -249,8 +259,7 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
|
||||
if (m_autophrase) {
|
||||
Xapian::Query apq;
|
||||
if (m_autophrase->toNativeQuery(db, &apq)) {
|
||||
xq = xq.empty() ? apq :
|
||||
Xapian::Query(Xapian::Query::OP_AND_MAYBE, xq, apq);
|
||||
xq = xq.empty() ? apq : Xapian::Query(Xapian::Query::OP_AND_MAYBE, xq, apq);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
Query: (A AND B) OR (C AND D) -> Recoll query: Query(((a AND b) OR (c AND d)))
|
||||
Query: (A OR B) AND (C OR D) -> Recoll query: Query(((a OR b) AND (c OR d)))
|
||||
Query: -the B -> Recoll query: Query(((<alldocuments> AND_NOT the) AND b))
|
||||
Query: A -B -> Recoll query: Query((a AND (<alldocuments> AND_NOT b)))
|
||||
Query: A -B -> Recoll query: Query((a AND_NOT b))
|
||||
Query: mime:text/plain -> Recoll query: Query((<alldocuments> FILTER Ttext/plain))
|
||||
Query: size>10k -> Recoll query: Query((<alldocuments> FILTER VALUE_GE 2 000000010000))
|
||||
Query: date:3000-01-01 -> Recoll query: Query((<alldocuments> FILTER D30000101))
|
||||
@ -23,5 +23,5 @@
|
||||
Query: A OR B date:3000-01-01 -> Recoll query: Query(((a OR b) FILTER D30000101))
|
||||
Query: A OR B AND date:3000-01-01 -> Recoll query: Query(((a OR b) FILTER D30000101))
|
||||
Query: title:A B -> Recoll query: Query((Sa AND b))
|
||||
Query: title:A -B -> Recoll query: Query((Sa AND (<alldocuments> AND_NOT b)))
|
||||
Query: A -title:B -> Recoll query: Query((a AND (<alldocuments> AND_NOT Sb)))
|
||||
Query: title:A -B -> Recoll query: Query((Sa AND_NOT b))
|
||||
Query: A -title:B -> Recoll query: Query((a AND_NOT Sb))
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user