the way we added the autophrase to the query (OR) created false matches. Use AND_MAYBE instead

This commit is contained in:
Jean-Francois Dockes 2013-05-26 15:26:41 +02:00
parent 243ac82526
commit e7b3042310
5 changed files with 32 additions and 54 deletions

View File

@ -250,7 +250,7 @@ SearchData_addclause(recoll_SearchDataObject* self, PyObject *args,
case 'S':
if (strcasecmp(tp, "sub"))
goto defaultcase;
cl = new Rcl::SearchDataClauseSub(Rcl::SCLT_SUB, sub->sd);
cl = new Rcl::SearchDataClauseSub(sub->sd);
break;
defaultcase:
default:

View File

@ -161,7 +161,7 @@ bool DocSequenceDb::setFiltSpec(const DocSeqFiltSpec &fs)
m_fsdata = RefCntr<Rcl::SearchData>(
new Rcl::SearchData(Rcl::SCLT_AND, m_sdata->getStemLang()));
Rcl::SearchDataClauseSub *cl =
new Rcl::SearchDataClauseSub(Rcl::SCLT_SUB, m_sdata);
new Rcl::SearchDataClauseSub(m_sdata);
m_fsdata->addClause(cl);
for (unsigned int i = 0; i < fs.crits.size(); i++) {
@ -181,8 +181,8 @@ bool DocSequenceDb::setFiltSpec(const DocSeqFiltSpec &fs)
fs.values[i], reason);
if (sd) {
Rcl::SearchDataClauseSub *cl1 =
new Rcl::SearchDataClauseSub(Rcl::SCLT_SUB,
RefCntr<Rcl::SearchData>(sd));
new Rcl::SearchDataClauseSub(
RefCntr<Rcl::SearchData>(sd));
m_fsdata->addClause(cl1);
}
}

View File

@ -241,8 +241,7 @@ static Rcl::SearchData *wasaQueryToRcl(const RclConfig *config,
continue;
}
nclause =
new Rcl::SearchDataClauseSub(Rcl::SCLT_SUB,
RefCntr<Rcl::SearchData>(sub));
new Rcl::SearchDataClauseSub(RefCntr<Rcl::SearchData>(sub));
if (nclause == 0) {
LOGERR(("wasaQueryToRcl: out of memory\n"));
reason = "Out of memory";

View File

@ -60,11 +60,18 @@ void SearchData::commoninit()
m_maxSize = size_t(-1);
m_minSize = size_t(-1);
m_haveWildCards = false;
m_softmaxexpand = -1;
m_autodiacsens = false;
m_autocasesens = true;
m_maxexp = 10000;
m_maxcl = 100000;
m_softmaxexpand = -1;
}
SearchData::~SearchData()
{
LOGDEB0(("SearchData::~SearchData\n"));
for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++)
delete *it;
}
// Expand categories and mime type wild card exps Categories are
@ -259,6 +266,15 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
}
}
// Add the autophrase if any
if (m_autophrase.isNotNull()) {
Xapian::Query apq;
if (m_autophrase->toNativeQuery(db, &apq)) {
xq = xq.empty() ? apq :
Xapian::Query(Xapian::Query::OP_AND_MAYBE, xq, apq);
}
}
// Add the file type filtering clause if any
if (!m_filetypes.empty()) {
expandFileTypes(db, m_filetypes);
@ -376,27 +392,8 @@ bool SearchData::maybeAddAutoPhrase(Rcl::Db& db, double freqThreshold)
// an actual user-entered phrase
slack += 1 + nwords / 3;
SearchDataClauseDist *nclp =
new SearchDataClauseDist(SCLT_PHRASE, swords, slack, field);
// If the toplevel conjunction is an OR, just OR the phrase, else
// deepen the tree.
if (m_tp == SCLT_OR) {
addClause(nclp);
} else {
// My type is AND. Change it to OR and insert two queries, one
// being the original query as a subquery, the other the
// phrase.
SearchData *sd = new SearchData(m_tp, m_stemlang);
sd->m_query = m_query;
sd->m_stemlang = m_stemlang;
m_tp = SCLT_OR;
m_query.clear();
SearchDataClauseSub *oq =
new SearchDataClauseSub(SCLT_OR, RefCntr<SearchData>(sd));
addClause(oq);
addClause(nclp);
}
m_autophrase = RefCntr<SearchDataClauseDist>(
new SearchDataClauseDist(SCLT_PHRASE, swords, slack, field));
return true;
}
@ -414,22 +411,6 @@ bool SearchData::addClause(SearchDataClause* cl)
return true;
}
// Make me all new
void SearchData::erase()
{
LOGDEB0(("SearchData::erase\n"));
m_tp = SCLT_AND;
for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++)
delete *it;
m_query.clear();
m_filetypes.clear();
m_description.erase();
m_reason.erase();
m_haveDates = false;
m_minSize = size_t(-1);
m_maxSize = size_t(-1);
}
// Am I a file name only search ? This is to turn off term highlighting
bool SearchData::fileNameOnly()
{

View File

@ -46,6 +46,7 @@ enum SClType {
};
class SearchDataClause;
class SearchDataClauseDist;
/**
A SearchData object represents a Recoll user query, for translation
@ -89,13 +90,7 @@ public:
commoninit();
}
~SearchData()
{
erase();
}
/** Make pristine */
void erase();
~SearchData();
/** Is there anything but a file name search in here ? */
bool fileNameOnly();
@ -175,7 +170,10 @@ private:
std::vector<std::string> m_filetypes;
// Excluded set of file types if not empty
std::vector<std::string> m_nfiletypes;
// Autophrase if set. Can't be part of the normal chain because
// it uses OP_AND_MAYBE
RefCntr<SearchDataClauseDist> m_autophrase;
//
bool m_haveDates;
DateInterval m_dates; // Restrict to date interval
size_t m_maxSize;
@ -461,8 +459,8 @@ private:
/** Subquery */
class SearchDataClauseSub : public SearchDataClause {
public:
SearchDataClauseSub(SClType tp, RefCntr<SearchData> sub)
: SearchDataClause(tp), m_sub(sub)
SearchDataClauseSub(RefCntr<SearchData> sub)
: SearchDataClause(SCLT_SUB), m_sub(sub)
{
}
virtual bool toNativeQuery(Rcl::Db &db, void *p)