the way we added the autophrase to the query (OR) created false matches. Use AND_MAYBE instead
This commit is contained in:
parent
243ac82526
commit
e7b3042310
@ -250,7 +250,7 @@ SearchData_addclause(recoll_SearchDataObject* self, PyObject *args,
|
||||
case 'S':
|
||||
if (strcasecmp(tp, "sub"))
|
||||
goto defaultcase;
|
||||
cl = new Rcl::SearchDataClauseSub(Rcl::SCLT_SUB, sub->sd);
|
||||
cl = new Rcl::SearchDataClauseSub(sub->sd);
|
||||
break;
|
||||
defaultcase:
|
||||
default:
|
||||
|
||||
@ -161,7 +161,7 @@ bool DocSequenceDb::setFiltSpec(const DocSeqFiltSpec &fs)
|
||||
m_fsdata = RefCntr<Rcl::SearchData>(
|
||||
new Rcl::SearchData(Rcl::SCLT_AND, m_sdata->getStemLang()));
|
||||
Rcl::SearchDataClauseSub *cl =
|
||||
new Rcl::SearchDataClauseSub(Rcl::SCLT_SUB, m_sdata);
|
||||
new Rcl::SearchDataClauseSub(m_sdata);
|
||||
m_fsdata->addClause(cl);
|
||||
|
||||
for (unsigned int i = 0; i < fs.crits.size(); i++) {
|
||||
@ -181,8 +181,8 @@ bool DocSequenceDb::setFiltSpec(const DocSeqFiltSpec &fs)
|
||||
fs.values[i], reason);
|
||||
if (sd) {
|
||||
Rcl::SearchDataClauseSub *cl1 =
|
||||
new Rcl::SearchDataClauseSub(Rcl::SCLT_SUB,
|
||||
RefCntr<Rcl::SearchData>(sd));
|
||||
new Rcl::SearchDataClauseSub(
|
||||
RefCntr<Rcl::SearchData>(sd));
|
||||
m_fsdata->addClause(cl1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -241,8 +241,7 @@ static Rcl::SearchData *wasaQueryToRcl(const RclConfig *config,
|
||||
continue;
|
||||
}
|
||||
nclause =
|
||||
new Rcl::SearchDataClauseSub(Rcl::SCLT_SUB,
|
||||
RefCntr<Rcl::SearchData>(sub));
|
||||
new Rcl::SearchDataClauseSub(RefCntr<Rcl::SearchData>(sub));
|
||||
if (nclause == 0) {
|
||||
LOGERR(("wasaQueryToRcl: out of memory\n"));
|
||||
reason = "Out of memory";
|
||||
|
||||
@ -60,11 +60,18 @@ void SearchData::commoninit()
|
||||
m_maxSize = size_t(-1);
|
||||
m_minSize = size_t(-1);
|
||||
m_haveWildCards = false;
|
||||
m_softmaxexpand = -1;
|
||||
m_autodiacsens = false;
|
||||
m_autocasesens = true;
|
||||
m_maxexp = 10000;
|
||||
m_maxcl = 100000;
|
||||
m_softmaxexpand = -1;
|
||||
}
|
||||
|
||||
SearchData::~SearchData()
|
||||
{
|
||||
LOGDEB0(("SearchData::~SearchData\n"));
|
||||
for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++)
|
||||
delete *it;
|
||||
}
|
||||
|
||||
// Expand categories and mime type wild card exps Categories are
|
||||
@ -259,6 +266,15 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
|
||||
}
|
||||
}
|
||||
|
||||
// Add the autophrase if any
|
||||
if (m_autophrase.isNotNull()) {
|
||||
Xapian::Query apq;
|
||||
if (m_autophrase->toNativeQuery(db, &apq)) {
|
||||
xq = xq.empty() ? apq :
|
||||
Xapian::Query(Xapian::Query::OP_AND_MAYBE, xq, apq);
|
||||
}
|
||||
}
|
||||
|
||||
// Add the file type filtering clause if any
|
||||
if (!m_filetypes.empty()) {
|
||||
expandFileTypes(db, m_filetypes);
|
||||
@ -376,27 +392,8 @@ bool SearchData::maybeAddAutoPhrase(Rcl::Db& db, double freqThreshold)
|
||||
// an actual user-entered phrase
|
||||
slack += 1 + nwords / 3;
|
||||
|
||||
SearchDataClauseDist *nclp =
|
||||
new SearchDataClauseDist(SCLT_PHRASE, swords, slack, field);
|
||||
|
||||
// If the toplevel conjunction is an OR, just OR the phrase, else
|
||||
// deepen the tree.
|
||||
if (m_tp == SCLT_OR) {
|
||||
addClause(nclp);
|
||||
} else {
|
||||
// My type is AND. Change it to OR and insert two queries, one
|
||||
// being the original query as a subquery, the other the
|
||||
// phrase.
|
||||
SearchData *sd = new SearchData(m_tp, m_stemlang);
|
||||
sd->m_query = m_query;
|
||||
sd->m_stemlang = m_stemlang;
|
||||
m_tp = SCLT_OR;
|
||||
m_query.clear();
|
||||
SearchDataClauseSub *oq =
|
||||
new SearchDataClauseSub(SCLT_OR, RefCntr<SearchData>(sd));
|
||||
addClause(oq);
|
||||
addClause(nclp);
|
||||
}
|
||||
m_autophrase = RefCntr<SearchDataClauseDist>(
|
||||
new SearchDataClauseDist(SCLT_PHRASE, swords, slack, field));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -414,22 +411,6 @@ bool SearchData::addClause(SearchDataClause* cl)
|
||||
return true;
|
||||
}
|
||||
|
||||
// Make me all new
|
||||
void SearchData::erase()
|
||||
{
|
||||
LOGDEB0(("SearchData::erase\n"));
|
||||
m_tp = SCLT_AND;
|
||||
for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++)
|
||||
delete *it;
|
||||
m_query.clear();
|
||||
m_filetypes.clear();
|
||||
m_description.erase();
|
||||
m_reason.erase();
|
||||
m_haveDates = false;
|
||||
m_minSize = size_t(-1);
|
||||
m_maxSize = size_t(-1);
|
||||
}
|
||||
|
||||
// Am I a file name only search ? This is to turn off term highlighting
|
||||
bool SearchData::fileNameOnly()
|
||||
{
|
||||
|
||||
@ -46,6 +46,7 @@ enum SClType {
|
||||
};
|
||||
|
||||
class SearchDataClause;
|
||||
class SearchDataClauseDist;
|
||||
|
||||
/**
|
||||
A SearchData object represents a Recoll user query, for translation
|
||||
@ -89,13 +90,7 @@ public:
|
||||
commoninit();
|
||||
}
|
||||
|
||||
~SearchData()
|
||||
{
|
||||
erase();
|
||||
}
|
||||
|
||||
/** Make pristine */
|
||||
void erase();
|
||||
~SearchData();
|
||||
|
||||
/** Is there anything but a file name search in here ? */
|
||||
bool fileNameOnly();
|
||||
@ -175,7 +170,10 @@ private:
|
||||
std::vector<std::string> m_filetypes;
|
||||
// Excluded set of file types if not empty
|
||||
std::vector<std::string> m_nfiletypes;
|
||||
|
||||
// Autophrase if set. Can't be part of the normal chain because
|
||||
// it uses OP_AND_MAYBE
|
||||
RefCntr<SearchDataClauseDist> m_autophrase;
|
||||
//
|
||||
bool m_haveDates;
|
||||
DateInterval m_dates; // Restrict to date interval
|
||||
size_t m_maxSize;
|
||||
@ -461,8 +459,8 @@ private:
|
||||
/** Subquery */
|
||||
class SearchDataClauseSub : public SearchDataClause {
|
||||
public:
|
||||
SearchDataClauseSub(SClType tp, RefCntr<SearchData> sub)
|
||||
: SearchDataClause(tp), m_sub(sub)
|
||||
SearchDataClauseSub(RefCntr<SearchData> sub)
|
||||
: SearchDataClause(SCLT_SUB), m_sub(sub)
|
||||
{
|
||||
}
|
||||
virtual bool toNativeQuery(Rcl::Db &db, void *p)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user