Search: allow setting weights on terms, ie: "important"2.5
This commit is contained in:
parent
72fe512c5a
commit
91f277ec26
@ -84,17 +84,16 @@ void WasaQuery::describe(string &desc) const
|
||||
desc.erase(desc.length() - 1);
|
||||
desc += ")";
|
||||
if (m_modifiers != 0) {
|
||||
if (m_modifiers & WQM_BOOST) desc += "BOOST|";
|
||||
if (m_modifiers & WQM_BOOST) desc += "BOOST|";
|
||||
if (m_modifiers & WQM_CASESENS) desc += "CASESENS|";
|
||||
if (m_modifiers & WQM_DIACSENS) desc += "DIACSENS|";
|
||||
if (m_modifiers & WQM_FUZZY) desc += "FUZZY|";
|
||||
if (m_modifiers & WQM_NOSTEM) desc += "NOSTEM|";
|
||||
if (m_modifiers & WQM_BOOST) desc += "BOOST|";
|
||||
if (m_modifiers & WQM_PHRASESLACK) desc += "PHRASESLACK|";
|
||||
if (m_modifiers & WQM_PROX) desc += "PROX|";
|
||||
if (m_modifiers & WQM_REGEX) desc += "REGEX|";
|
||||
if (m_modifiers & WQM_SLOPPY) desc += "SLOPPY|";
|
||||
if (m_modifiers & WQM_WORDS) desc += "WORDS|";
|
||||
if (m_modifiers & WQM_PHRASESLACK) desc += "PHRASESLACK|";
|
||||
if (m_modifiers & WQM_REGEX) desc += "REGEX|";
|
||||
if (m_modifiers & WQM_FUZZY) desc += "FUZZY|";
|
||||
if (desc.length() > 0 && desc[desc.length()-1] == '|')
|
||||
desc = desc.substr(0, desc.length()-1);
|
||||
}
|
||||
@ -132,7 +131,7 @@ static const char * parserExpr =
|
||||
"(\"" //9
|
||||
"([^\"]+)" //10 "A quoted term"
|
||||
"\")"
|
||||
"([a-zA-Z0-9]*)" //11 modifiers
|
||||
"([bcCdDeflLoprsw.0-9]*)" //11 modifiers
|
||||
"|"
|
||||
"([^[:space:]\"]+)" //12 ANormalTerm
|
||||
")"
|
||||
@ -152,7 +151,7 @@ static const char *matchNames[] = {
|
||||
/* 8*/ "",
|
||||
/* 9*/ "",
|
||||
/*10*/ "QUOTEDTERM",
|
||||
/*11*/ "MODIIFIERS",
|
||||
/*11*/ "MODIFIERS",
|
||||
/*12*/ "TERM",
|
||||
};
|
||||
#define NMATCH (sizeof(matchNames) / sizeof(char *))
|
||||
@ -328,12 +327,18 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
|
||||
unsigned int mods = 0;
|
||||
for (unsigned int i = 0; i < strlen(match); i++) {
|
||||
switch (match[i]) {
|
||||
case 'b': mods |= WasaQuery::WQM_BOOST; break;
|
||||
case 'b':
|
||||
mods |= WasaQuery::WQM_BOOST;
|
||||
nclause->m_weight = 10.0;
|
||||
break;
|
||||
case 'c': break;
|
||||
case 'C': mods |= WasaQuery::WQM_CASESENS; break;
|
||||
case 'd': break;
|
||||
case 'D': mods |= WasaQuery::WQM_DIACSENS; break;
|
||||
case 'e': mods |= WasaQuery::WQM_CASESENS | WasaQuery::WQM_DIACSENS | WasaQuery::WQM_NOSTEM; break;
|
||||
case 'e': mods |= WasaQuery::WQM_CASESENS |
|
||||
WasaQuery::WQM_DIACSENS |
|
||||
WasaQuery::WQM_NOSTEM;
|
||||
break;
|
||||
case 'f': mods |= WasaQuery::WQM_FUZZY; break;
|
||||
case 'l': mods |= WasaQuery::WQM_NOSTEM; break;
|
||||
case 'L': break;
|
||||
@ -342,6 +347,19 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
|
||||
case 'r': mods |= WasaQuery::WQM_REGEX; break;
|
||||
case 's': mods |= WasaQuery::WQM_SLOPPY; break;
|
||||
case 'w': mods |= WasaQuery::WQM_WORDS; break;
|
||||
case '.':case '0':case '1':case '2':case '3':case '4':
|
||||
case '5':case '6':case '7':case '8':case '9':
|
||||
{
|
||||
int n;
|
||||
float factor;
|
||||
if (sscanf(match+i, "%f %n", &factor, &n)) {
|
||||
nclause->m_weight = factor;
|
||||
DPRINT((stderr, "Got factor %.2f len %d\n",
|
||||
factor, n));
|
||||
}
|
||||
if (n)
|
||||
i += n-1;
|
||||
}
|
||||
}
|
||||
}
|
||||
nclause->m_modifiers = WasaQuery::Modifier(mods);
|
||||
|
||||
@ -63,7 +63,7 @@ public:
|
||||
typedef vector<WasaQuery*> subqlist_t;
|
||||
|
||||
WasaQuery()
|
||||
: m_op(OP_NULL), m_modifiers(0)
|
||||
: m_op(OP_NULL), m_modifiers(0), m_weight(1.0)
|
||||
{}
|
||||
|
||||
~WasaQuery();
|
||||
@ -86,6 +86,7 @@ public:
|
||||
vector<WasaQuery*> m_subs;
|
||||
|
||||
unsigned int m_modifiers;
|
||||
float m_weight;
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@ -101,7 +101,8 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
|
||||
|
||||
// Filtering on location
|
||||
if (!stringicmp("dir", (*it)->m_fieldspec)) {
|
||||
sdata->setTopdir((*it)->m_value, (*it)->m_op == WasaQuery::OP_EXCL);
|
||||
sdata->setTopdir((*it)->m_value, (*it)->m_op == WasaQuery::OP_EXCL,
|
||||
(*it)->m_weight);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -174,6 +175,8 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
|
||||
if (mods & WasaQuery::WQM_NOSTEM) {
|
||||
nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING);
|
||||
}
|
||||
if ((*it)->m_weight != 1.0)
|
||||
nclause->setWeight((*it)->m_weight);
|
||||
sdata->addClause(nclause);
|
||||
}
|
||||
break;
|
||||
@ -203,6 +206,8 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
|
||||
}
|
||||
if ((*it)->m_modifiers & WasaQuery::WQM_NOSTEM)
|
||||
nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING);
|
||||
if ((*it)->m_weight != 1.0)
|
||||
nclause->setWeight((*it)->m_weight);
|
||||
sdata->addClause(nclause);
|
||||
break;
|
||||
|
||||
|
||||
@ -278,7 +278,8 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
|
||||
xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_AND_NOT, xq, tq);
|
||||
}
|
||||
|
||||
// Add the directory filtering clause
|
||||
// Add the directory filtering clause. This is a phrase of terms
|
||||
// prefixed with the pathelt prefix XP
|
||||
if (!m_topdir.empty()) {
|
||||
vector<string> vpath;
|
||||
stringToTokens(m_topdir, vpath, "/");
|
||||
@ -288,10 +289,21 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
|
||||
it != vpath.end(); it++){
|
||||
pvpath.push_back(pathelt_prefix + *it);
|
||||
}
|
||||
xq = Xapian::Query(m_topdirexcl ?
|
||||
Xapian::Query::OP_AND_NOT:Xapian::Query::OP_FILTER,
|
||||
xq, Xapian::Query(Xapian::Query::OP_PHRASE,
|
||||
pvpath.begin(), pvpath.end()));
|
||||
Xapian::Query::op tdop;
|
||||
if (m_topdirweight == 1.0) {
|
||||
tdop = m_topdirexcl ?
|
||||
Xapian::Query::OP_AND_NOT : Xapian::Query::OP_FILTER;
|
||||
} else {
|
||||
tdop = m_topdirexcl ?
|
||||
Xapian::Query::OP_AND_NOT : Xapian::Query::OP_AND_MAYBE;
|
||||
}
|
||||
Xapian::Query tdq = Xapian::Query(Xapian::Query::OP_PHRASE,
|
||||
pvpath.begin(), pvpath.end());
|
||||
if (m_topdirweight != 1.0)
|
||||
tdq = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT,
|
||||
tdq, m_topdirweight);
|
||||
|
||||
xq = Xapian::Query(tdop, xq, tdq);
|
||||
}
|
||||
|
||||
*((Xapian::Query *)d) = xq;
|
||||
@ -847,8 +859,7 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
|
||||
(m_parentSearch == 0 && !m_haveWildCards);
|
||||
|
||||
StringToXapianQ tr(db, m_field, l_stemlang, doBoostUserTerm);
|
||||
if (!tr.processUserString(m_text, m_reason, pqueries,
|
||||
db.getStopList()))
|
||||
if (!tr.processUserString(m_text, m_reason, pqueries, db.getStopList()))
|
||||
return false;
|
||||
if (pqueries.empty()) {
|
||||
LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
|
||||
@ -858,6 +869,9 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
|
||||
tr.getUTerms(m_uterms);
|
||||
//listVector("SearchDataClauseSimple: Uterms: ", m_uterms);
|
||||
*qp = Xapian::Query(op, pqueries.begin(), pqueries.end());
|
||||
if (m_weight != 1.0) {
|
||||
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -887,6 +901,9 @@ bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p,
|
||||
more.end());
|
||||
*qp = qp->empty() ? tq : Xapian::Query(Xapian::Query::OP_AND, *qp, tq);
|
||||
}
|
||||
if (m_weight != 1.0) {
|
||||
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -932,6 +949,9 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
|
||||
tr.getTerms(m_terms, m_groups);
|
||||
tr.getUTerms(m_uterms);
|
||||
*qp = *pqueries.begin();
|
||||
if (m_weight != 1.0) {
|
||||
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@ -73,8 +73,8 @@ class SearchDataClause;
|
||||
class SearchData {
|
||||
public:
|
||||
SearchData(SClType tp)
|
||||
: m_tp(tp), m_topdirexcl(false), m_haveDates(false),
|
||||
m_haveWildCards(false)
|
||||
: m_tp(tp), m_topdirexcl(false), m_topdirweight(1.0),
|
||||
m_haveDates(false), m_haveWildCards(false)
|
||||
{
|
||||
if (m_tp != SCLT_OR && m_tp != SCLT_AND)
|
||||
m_tp = SCLT_OR;
|
||||
@ -104,10 +104,11 @@ public:
|
||||
bool maybeAddAutoPhrase();
|
||||
|
||||
/** Set/get top subdirectory for filtering results */
|
||||
void setTopdir(const string& t, bool excl = false)
|
||||
void setTopdir(const string& t, bool excl = false, float w = 1.0)
|
||||
{
|
||||
m_topdir = t;
|
||||
m_topdirexcl = excl;
|
||||
m_topdirweight = w;
|
||||
}
|
||||
|
||||
/** Set date span for filtering results */
|
||||
@ -147,6 +148,7 @@ private:
|
||||
vector<string> m_nfiletypes; // Unwanted file types
|
||||
string m_topdir; // Restrict to subtree.
|
||||
bool m_topdirexcl; // Invert meaning
|
||||
float m_topdirweight; // affect weight instead of filter
|
||||
bool m_haveDates;
|
||||
DateInterval m_dates; // Restrict to date interval
|
||||
// Printable expanded version of the complete query, retrieved/set
|
||||
@ -167,7 +169,7 @@ public:
|
||||
|
||||
SearchDataClause(SClType tp)
|
||||
: m_tp(tp), m_parentSearch(0), m_haveWildCards(0),
|
||||
m_modifiers(SDCM_NONE)
|
||||
m_modifiers(SDCM_NONE), m_weight(1.0)
|
||||
{}
|
||||
virtual ~SearchDataClause() {}
|
||||
virtual bool toNativeQuery(Rcl::Db &db, void *, const string&) = 0;
|
||||
@ -180,7 +182,7 @@ public:
|
||||
SClType getTp() {return m_tp;}
|
||||
void setParent(SearchData *p) {m_parentSearch = p;}
|
||||
virtual void setModifiers(Modifier mod) {m_modifiers = mod;}
|
||||
|
||||
virtual void setWeight(float w) {m_weight = w;}
|
||||
friend class SearchData;
|
||||
|
||||
protected:
|
||||
@ -189,6 +191,7 @@ protected:
|
||||
SearchData *m_parentSearch;
|
||||
bool m_haveWildCards;
|
||||
Modifier m_modifiers;
|
||||
float m_weight;
|
||||
private:
|
||||
SearchDataClause(const SearchDataClause&) {}
|
||||
SearchDataClause& operator=(const SearchDataClause&) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user