Search: allow setting weights on terms, ie: "important"2.5
This commit is contained in:
parent
72fe512c5a
commit
91f277ec26
@ -84,17 +84,16 @@ void WasaQuery::describe(string &desc) const
|
|||||||
desc.erase(desc.length() - 1);
|
desc.erase(desc.length() - 1);
|
||||||
desc += ")";
|
desc += ")";
|
||||||
if (m_modifiers != 0) {
|
if (m_modifiers != 0) {
|
||||||
if (m_modifiers & WQM_BOOST) desc += "BOOST|";
|
if (m_modifiers & WQM_BOOST) desc += "BOOST|";
|
||||||
if (m_modifiers & WQM_CASESENS) desc += "CASESENS|";
|
if (m_modifiers & WQM_CASESENS) desc += "CASESENS|";
|
||||||
if (m_modifiers & WQM_DIACSENS) desc += "DIACSENS|";
|
if (m_modifiers & WQM_DIACSENS) desc += "DIACSENS|";
|
||||||
|
if (m_modifiers & WQM_FUZZY) desc += "FUZZY|";
|
||||||
if (m_modifiers & WQM_NOSTEM) desc += "NOSTEM|";
|
if (m_modifiers & WQM_NOSTEM) desc += "NOSTEM|";
|
||||||
if (m_modifiers & WQM_BOOST) desc += "BOOST|";
|
if (m_modifiers & WQM_PHRASESLACK) desc += "PHRASESLACK|";
|
||||||
if (m_modifiers & WQM_PROX) desc += "PROX|";
|
if (m_modifiers & WQM_PROX) desc += "PROX|";
|
||||||
|
if (m_modifiers & WQM_REGEX) desc += "REGEX|";
|
||||||
if (m_modifiers & WQM_SLOPPY) desc += "SLOPPY|";
|
if (m_modifiers & WQM_SLOPPY) desc += "SLOPPY|";
|
||||||
if (m_modifiers & WQM_WORDS) desc += "WORDS|";
|
if (m_modifiers & WQM_WORDS) desc += "WORDS|";
|
||||||
if (m_modifiers & WQM_PHRASESLACK) desc += "PHRASESLACK|";
|
|
||||||
if (m_modifiers & WQM_REGEX) desc += "REGEX|";
|
|
||||||
if (m_modifiers & WQM_FUZZY) desc += "FUZZY|";
|
|
||||||
if (desc.length() > 0 && desc[desc.length()-1] == '|')
|
if (desc.length() > 0 && desc[desc.length()-1] == '|')
|
||||||
desc = desc.substr(0, desc.length()-1);
|
desc = desc.substr(0, desc.length()-1);
|
||||||
}
|
}
|
||||||
@ -132,7 +131,7 @@ static const char * parserExpr =
|
|||||||
"(\"" //9
|
"(\"" //9
|
||||||
"([^\"]+)" //10 "A quoted term"
|
"([^\"]+)" //10 "A quoted term"
|
||||||
"\")"
|
"\")"
|
||||||
"([a-zA-Z0-9]*)" //11 modifiers
|
"([bcCdDeflLoprsw.0-9]*)" //11 modifiers
|
||||||
"|"
|
"|"
|
||||||
"([^[:space:]\"]+)" //12 ANormalTerm
|
"([^[:space:]\"]+)" //12 ANormalTerm
|
||||||
")"
|
")"
|
||||||
@ -152,7 +151,7 @@ static const char *matchNames[] = {
|
|||||||
/* 8*/ "",
|
/* 8*/ "",
|
||||||
/* 9*/ "",
|
/* 9*/ "",
|
||||||
/*10*/ "QUOTEDTERM",
|
/*10*/ "QUOTEDTERM",
|
||||||
/*11*/ "MODIIFIERS",
|
/*11*/ "MODIFIERS",
|
||||||
/*12*/ "TERM",
|
/*12*/ "TERM",
|
||||||
};
|
};
|
||||||
#define NMATCH (sizeof(matchNames) / sizeof(char *))
|
#define NMATCH (sizeof(matchNames) / sizeof(char *))
|
||||||
@ -328,12 +327,18 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
|
|||||||
unsigned int mods = 0;
|
unsigned int mods = 0;
|
||||||
for (unsigned int i = 0; i < strlen(match); i++) {
|
for (unsigned int i = 0; i < strlen(match); i++) {
|
||||||
switch (match[i]) {
|
switch (match[i]) {
|
||||||
case 'b': mods |= WasaQuery::WQM_BOOST; break;
|
case 'b':
|
||||||
|
mods |= WasaQuery::WQM_BOOST;
|
||||||
|
nclause->m_weight = 10.0;
|
||||||
|
break;
|
||||||
case 'c': break;
|
case 'c': break;
|
||||||
case 'C': mods |= WasaQuery::WQM_CASESENS; break;
|
case 'C': mods |= WasaQuery::WQM_CASESENS; break;
|
||||||
case 'd': break;
|
case 'd': break;
|
||||||
case 'D': mods |= WasaQuery::WQM_DIACSENS; break;
|
case 'D': mods |= WasaQuery::WQM_DIACSENS; break;
|
||||||
case 'e': mods |= WasaQuery::WQM_CASESENS | WasaQuery::WQM_DIACSENS | WasaQuery::WQM_NOSTEM; break;
|
case 'e': mods |= WasaQuery::WQM_CASESENS |
|
||||||
|
WasaQuery::WQM_DIACSENS |
|
||||||
|
WasaQuery::WQM_NOSTEM;
|
||||||
|
break;
|
||||||
case 'f': mods |= WasaQuery::WQM_FUZZY; break;
|
case 'f': mods |= WasaQuery::WQM_FUZZY; break;
|
||||||
case 'l': mods |= WasaQuery::WQM_NOSTEM; break;
|
case 'l': mods |= WasaQuery::WQM_NOSTEM; break;
|
||||||
case 'L': break;
|
case 'L': break;
|
||||||
@ -342,6 +347,19 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
|
|||||||
case 'r': mods |= WasaQuery::WQM_REGEX; break;
|
case 'r': mods |= WasaQuery::WQM_REGEX; break;
|
||||||
case 's': mods |= WasaQuery::WQM_SLOPPY; break;
|
case 's': mods |= WasaQuery::WQM_SLOPPY; break;
|
||||||
case 'w': mods |= WasaQuery::WQM_WORDS; break;
|
case 'w': mods |= WasaQuery::WQM_WORDS; break;
|
||||||
|
case '.':case '0':case '1':case '2':case '3':case '4':
|
||||||
|
case '5':case '6':case '7':case '8':case '9':
|
||||||
|
{
|
||||||
|
int n;
|
||||||
|
float factor;
|
||||||
|
if (sscanf(match+i, "%f %n", &factor, &n)) {
|
||||||
|
nclause->m_weight = factor;
|
||||||
|
DPRINT((stderr, "Got factor %.2f len %d\n",
|
||||||
|
factor, n));
|
||||||
|
}
|
||||||
|
if (n)
|
||||||
|
i += n-1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
nclause->m_modifiers = WasaQuery::Modifier(mods);
|
nclause->m_modifiers = WasaQuery::Modifier(mods);
|
||||||
|
|||||||
@ -63,7 +63,7 @@ public:
|
|||||||
typedef vector<WasaQuery*> subqlist_t;
|
typedef vector<WasaQuery*> subqlist_t;
|
||||||
|
|
||||||
WasaQuery()
|
WasaQuery()
|
||||||
: m_op(OP_NULL), m_modifiers(0)
|
: m_op(OP_NULL), m_modifiers(0), m_weight(1.0)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
~WasaQuery();
|
~WasaQuery();
|
||||||
@ -86,6 +86,7 @@ public:
|
|||||||
vector<WasaQuery*> m_subs;
|
vector<WasaQuery*> m_subs;
|
||||||
|
|
||||||
unsigned int m_modifiers;
|
unsigned int m_modifiers;
|
||||||
|
float m_weight;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -101,7 +101,8 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
|
|||||||
|
|
||||||
// Filtering on location
|
// Filtering on location
|
||||||
if (!stringicmp("dir", (*it)->m_fieldspec)) {
|
if (!stringicmp("dir", (*it)->m_fieldspec)) {
|
||||||
sdata->setTopdir((*it)->m_value, (*it)->m_op == WasaQuery::OP_EXCL);
|
sdata->setTopdir((*it)->m_value, (*it)->m_op == WasaQuery::OP_EXCL,
|
||||||
|
(*it)->m_weight);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -174,6 +175,8 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
|
|||||||
if (mods & WasaQuery::WQM_NOSTEM) {
|
if (mods & WasaQuery::WQM_NOSTEM) {
|
||||||
nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING);
|
nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING);
|
||||||
}
|
}
|
||||||
|
if ((*it)->m_weight != 1.0)
|
||||||
|
nclause->setWeight((*it)->m_weight);
|
||||||
sdata->addClause(nclause);
|
sdata->addClause(nclause);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -203,6 +206,8 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
|
|||||||
}
|
}
|
||||||
if ((*it)->m_modifiers & WasaQuery::WQM_NOSTEM)
|
if ((*it)->m_modifiers & WasaQuery::WQM_NOSTEM)
|
||||||
nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING);
|
nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING);
|
||||||
|
if ((*it)->m_weight != 1.0)
|
||||||
|
nclause->setWeight((*it)->m_weight);
|
||||||
sdata->addClause(nclause);
|
sdata->addClause(nclause);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|||||||
@ -278,7 +278,8 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
|
|||||||
xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_AND_NOT, xq, tq);
|
xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_AND_NOT, xq, tq);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add the directory filtering clause
|
// Add the directory filtering clause. This is a phrase of terms
|
||||||
|
// prefixed with the pathelt prefix XP
|
||||||
if (!m_topdir.empty()) {
|
if (!m_topdir.empty()) {
|
||||||
vector<string> vpath;
|
vector<string> vpath;
|
||||||
stringToTokens(m_topdir, vpath, "/");
|
stringToTokens(m_topdir, vpath, "/");
|
||||||
@ -288,10 +289,21 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
|
|||||||
it != vpath.end(); it++){
|
it != vpath.end(); it++){
|
||||||
pvpath.push_back(pathelt_prefix + *it);
|
pvpath.push_back(pathelt_prefix + *it);
|
||||||
}
|
}
|
||||||
xq = Xapian::Query(m_topdirexcl ?
|
Xapian::Query::op tdop;
|
||||||
Xapian::Query::OP_AND_NOT:Xapian::Query::OP_FILTER,
|
if (m_topdirweight == 1.0) {
|
||||||
xq, Xapian::Query(Xapian::Query::OP_PHRASE,
|
tdop = m_topdirexcl ?
|
||||||
pvpath.begin(), pvpath.end()));
|
Xapian::Query::OP_AND_NOT : Xapian::Query::OP_FILTER;
|
||||||
|
} else {
|
||||||
|
tdop = m_topdirexcl ?
|
||||||
|
Xapian::Query::OP_AND_NOT : Xapian::Query::OP_AND_MAYBE;
|
||||||
|
}
|
||||||
|
Xapian::Query tdq = Xapian::Query(Xapian::Query::OP_PHRASE,
|
||||||
|
pvpath.begin(), pvpath.end());
|
||||||
|
if (m_topdirweight != 1.0)
|
||||||
|
tdq = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT,
|
||||||
|
tdq, m_topdirweight);
|
||||||
|
|
||||||
|
xq = Xapian::Query(tdop, xq, tdq);
|
||||||
}
|
}
|
||||||
|
|
||||||
*((Xapian::Query *)d) = xq;
|
*((Xapian::Query *)d) = xq;
|
||||||
@ -847,8 +859,7 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
|
|||||||
(m_parentSearch == 0 && !m_haveWildCards);
|
(m_parentSearch == 0 && !m_haveWildCards);
|
||||||
|
|
||||||
StringToXapianQ tr(db, m_field, l_stemlang, doBoostUserTerm);
|
StringToXapianQ tr(db, m_field, l_stemlang, doBoostUserTerm);
|
||||||
if (!tr.processUserString(m_text, m_reason, pqueries,
|
if (!tr.processUserString(m_text, m_reason, pqueries, db.getStopList()))
|
||||||
db.getStopList()))
|
|
||||||
return false;
|
return false;
|
||||||
if (pqueries.empty()) {
|
if (pqueries.empty()) {
|
||||||
LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
|
LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
|
||||||
@ -858,6 +869,9 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
|
|||||||
tr.getUTerms(m_uterms);
|
tr.getUTerms(m_uterms);
|
||||||
//listVector("SearchDataClauseSimple: Uterms: ", m_uterms);
|
//listVector("SearchDataClauseSimple: Uterms: ", m_uterms);
|
||||||
*qp = Xapian::Query(op, pqueries.begin(), pqueries.end());
|
*qp = Xapian::Query(op, pqueries.begin(), pqueries.end());
|
||||||
|
if (m_weight != 1.0) {
|
||||||
|
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -887,6 +901,9 @@ bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p,
|
|||||||
more.end());
|
more.end());
|
||||||
*qp = qp->empty() ? tq : Xapian::Query(Xapian::Query::OP_AND, *qp, tq);
|
*qp = qp->empty() ? tq : Xapian::Query(Xapian::Query::OP_AND, *qp, tq);
|
||||||
}
|
}
|
||||||
|
if (m_weight != 1.0) {
|
||||||
|
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -932,6 +949,9 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
|
|||||||
tr.getTerms(m_terms, m_groups);
|
tr.getTerms(m_terms, m_groups);
|
||||||
tr.getUTerms(m_uterms);
|
tr.getUTerms(m_uterms);
|
||||||
*qp = *pqueries.begin();
|
*qp = *pqueries.begin();
|
||||||
|
if (m_weight != 1.0) {
|
||||||
|
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -73,8 +73,8 @@ class SearchDataClause;
|
|||||||
class SearchData {
|
class SearchData {
|
||||||
public:
|
public:
|
||||||
SearchData(SClType tp)
|
SearchData(SClType tp)
|
||||||
: m_tp(tp), m_topdirexcl(false), m_haveDates(false),
|
: m_tp(tp), m_topdirexcl(false), m_topdirweight(1.0),
|
||||||
m_haveWildCards(false)
|
m_haveDates(false), m_haveWildCards(false)
|
||||||
{
|
{
|
||||||
if (m_tp != SCLT_OR && m_tp != SCLT_AND)
|
if (m_tp != SCLT_OR && m_tp != SCLT_AND)
|
||||||
m_tp = SCLT_OR;
|
m_tp = SCLT_OR;
|
||||||
@ -104,10 +104,11 @@ public:
|
|||||||
bool maybeAddAutoPhrase();
|
bool maybeAddAutoPhrase();
|
||||||
|
|
||||||
/** Set/get top subdirectory for filtering results */
|
/** Set/get top subdirectory for filtering results */
|
||||||
void setTopdir(const string& t, bool excl = false)
|
void setTopdir(const string& t, bool excl = false, float w = 1.0)
|
||||||
{
|
{
|
||||||
m_topdir = t;
|
m_topdir = t;
|
||||||
m_topdirexcl = excl;
|
m_topdirexcl = excl;
|
||||||
|
m_topdirweight = w;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Set date span for filtering results */
|
/** Set date span for filtering results */
|
||||||
@ -147,6 +148,7 @@ private:
|
|||||||
vector<string> m_nfiletypes; // Unwanted file types
|
vector<string> m_nfiletypes; // Unwanted file types
|
||||||
string m_topdir; // Restrict to subtree.
|
string m_topdir; // Restrict to subtree.
|
||||||
bool m_topdirexcl; // Invert meaning
|
bool m_topdirexcl; // Invert meaning
|
||||||
|
float m_topdirweight; // affect weight instead of filter
|
||||||
bool m_haveDates;
|
bool m_haveDates;
|
||||||
DateInterval m_dates; // Restrict to date interval
|
DateInterval m_dates; // Restrict to date interval
|
||||||
// Printable expanded version of the complete query, retrieved/set
|
// Printable expanded version of the complete query, retrieved/set
|
||||||
@ -167,7 +169,7 @@ public:
|
|||||||
|
|
||||||
SearchDataClause(SClType tp)
|
SearchDataClause(SClType tp)
|
||||||
: m_tp(tp), m_parentSearch(0), m_haveWildCards(0),
|
: m_tp(tp), m_parentSearch(0), m_haveWildCards(0),
|
||||||
m_modifiers(SDCM_NONE)
|
m_modifiers(SDCM_NONE), m_weight(1.0)
|
||||||
{}
|
{}
|
||||||
virtual ~SearchDataClause() {}
|
virtual ~SearchDataClause() {}
|
||||||
virtual bool toNativeQuery(Rcl::Db &db, void *, const string&) = 0;
|
virtual bool toNativeQuery(Rcl::Db &db, void *, const string&) = 0;
|
||||||
@ -180,7 +182,7 @@ public:
|
|||||||
SClType getTp() {return m_tp;}
|
SClType getTp() {return m_tp;}
|
||||||
void setParent(SearchData *p) {m_parentSearch = p;}
|
void setParent(SearchData *p) {m_parentSearch = p;}
|
||||||
virtual void setModifiers(Modifier mod) {m_modifiers = mod;}
|
virtual void setModifiers(Modifier mod) {m_modifiers = mod;}
|
||||||
|
virtual void setWeight(float w) {m_weight = w;}
|
||||||
friend class SearchData;
|
friend class SearchData;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
@ -189,6 +191,7 @@ protected:
|
|||||||
SearchData *m_parentSearch;
|
SearchData *m_parentSearch;
|
||||||
bool m_haveWildCards;
|
bool m_haveWildCards;
|
||||||
Modifier m_modifiers;
|
Modifier m_modifiers;
|
||||||
|
float m_weight;
|
||||||
private:
|
private:
|
||||||
SearchDataClause(const SearchDataClause&) {}
|
SearchDataClause(const SearchDataClause&) {}
|
||||||
SearchDataClause& operator=(const SearchDataClause&) {
|
SearchDataClause& operator=(const SearchDataClause&) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user