Search: allow setting weights on terms, ie: "important"2.5

This commit is contained in:
Jean-Francois Dockes 2011-05-30 14:03:01 +02:00
parent 72fe512c5a
commit 91f277ec26
5 changed files with 70 additions and 23 deletions

View File

@ -84,17 +84,16 @@ void WasaQuery::describe(string &desc) const
desc.erase(desc.length() - 1);
desc += ")";
if (m_modifiers != 0) {
if (m_modifiers & WQM_BOOST) desc += "BOOST|";
if (m_modifiers & WQM_BOOST) desc += "BOOST|";
if (m_modifiers & WQM_CASESENS) desc += "CASESENS|";
if (m_modifiers & WQM_DIACSENS) desc += "DIACSENS|";
if (m_modifiers & WQM_FUZZY) desc += "FUZZY|";
if (m_modifiers & WQM_NOSTEM) desc += "NOSTEM|";
if (m_modifiers & WQM_BOOST) desc += "BOOST|";
if (m_modifiers & WQM_PHRASESLACK) desc += "PHRASESLACK|";
if (m_modifiers & WQM_PROX) desc += "PROX|";
if (m_modifiers & WQM_REGEX) desc += "REGEX|";
if (m_modifiers & WQM_SLOPPY) desc += "SLOPPY|";
if (m_modifiers & WQM_WORDS) desc += "WORDS|";
if (m_modifiers & WQM_PHRASESLACK) desc += "PHRASESLACK|";
if (m_modifiers & WQM_REGEX) desc += "REGEX|";
if (m_modifiers & WQM_FUZZY) desc += "FUZZY|";
if (desc.length() > 0 && desc[desc.length()-1] == '|')
desc = desc.substr(0, desc.length()-1);
}
@ -132,7 +131,7 @@ static const char * parserExpr =
"(\"" //9
"([^\"]+)" //10 "A quoted term"
"\")"
"([a-zA-Z0-9]*)" //11 modifiers
"([bcCdDeflLoprsw.0-9]*)" //11 modifiers
"|"
"([^[:space:]\"]+)" //12 ANormalTerm
")"
@ -152,7 +151,7 @@ static const char *matchNames[] = {
/* 8*/ "",
/* 9*/ "",
/*10*/ "QUOTEDTERM",
/*11*/ "MODIIFIERS",
/*11*/ "MODIFIERS",
/*12*/ "TERM",
};
#define NMATCH (sizeof(matchNames) / sizeof(char *))
@ -328,12 +327,18 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
unsigned int mods = 0;
for (unsigned int i = 0; i < strlen(match); i++) {
switch (match[i]) {
case 'b': mods |= WasaQuery::WQM_BOOST; break;
case 'b':
mods |= WasaQuery::WQM_BOOST;
nclause->m_weight = 10.0;
break;
case 'c': break;
case 'C': mods |= WasaQuery::WQM_CASESENS; break;
case 'd': break;
case 'D': mods |= WasaQuery::WQM_DIACSENS; break;
case 'e': mods |= WasaQuery::WQM_CASESENS | WasaQuery::WQM_DIACSENS | WasaQuery::WQM_NOSTEM; break;
case 'e': mods |= WasaQuery::WQM_CASESENS |
WasaQuery::WQM_DIACSENS |
WasaQuery::WQM_NOSTEM;
break;
case 'f': mods |= WasaQuery::WQM_FUZZY; break;
case 'l': mods |= WasaQuery::WQM_NOSTEM; break;
case 'L': break;
@ -342,6 +347,19 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
case 'r': mods |= WasaQuery::WQM_REGEX; break;
case 's': mods |= WasaQuery::WQM_SLOPPY; break;
case 'w': mods |= WasaQuery::WQM_WORDS; break;
case '.':case '0':case '1':case '2':case '3':case '4':
case '5':case '6':case '7':case '8':case '9':
{
int n;
float factor;
if (sscanf(match+i, "%f %n", &factor, &n)) {
nclause->m_weight = factor;
DPRINT((stderr, "Got factor %.2f len %d\n",
factor, n));
}
if (n)
i += n-1;
}
}
}
nclause->m_modifiers = WasaQuery::Modifier(mods);

View File

@ -63,7 +63,7 @@ public:
typedef vector<WasaQuery*> subqlist_t;
WasaQuery()
: m_op(OP_NULL), m_modifiers(0)
: m_op(OP_NULL), m_modifiers(0), m_weight(1.0)
{}
~WasaQuery();
@ -86,6 +86,7 @@ public:
vector<WasaQuery*> m_subs;
unsigned int m_modifiers;
float m_weight;
};
/**

View File

@ -101,7 +101,8 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
// Filtering on location
if (!stringicmp("dir", (*it)->m_fieldspec)) {
sdata->setTopdir((*it)->m_value, (*it)->m_op == WasaQuery::OP_EXCL);
sdata->setTopdir((*it)->m_value, (*it)->m_op == WasaQuery::OP_EXCL,
(*it)->m_weight);
continue;
}
@ -174,6 +175,8 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
if (mods & WasaQuery::WQM_NOSTEM) {
nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING);
}
if ((*it)->m_weight != 1.0)
nclause->setWeight((*it)->m_weight);
sdata->addClause(nclause);
}
break;
@ -203,6 +206,8 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
}
if ((*it)->m_modifiers & WasaQuery::WQM_NOSTEM)
nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING);
if ((*it)->m_weight != 1.0)
nclause->setWeight((*it)->m_weight);
sdata->addClause(nclause);
break;

View File

@ -278,7 +278,8 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_AND_NOT, xq, tq);
}
// Add the directory filtering clause
// Add the directory filtering clause. This is a phrase of terms
// prefixed with the pathelt prefix XP
if (!m_topdir.empty()) {
vector<string> vpath;
stringToTokens(m_topdir, vpath, "/");
@ -288,10 +289,21 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
it != vpath.end(); it++){
pvpath.push_back(pathelt_prefix + *it);
}
xq = Xapian::Query(m_topdirexcl ?
Xapian::Query::OP_AND_NOT:Xapian::Query::OP_FILTER,
xq, Xapian::Query(Xapian::Query::OP_PHRASE,
pvpath.begin(), pvpath.end()));
Xapian::Query::op tdop;
if (m_topdirweight == 1.0) {
tdop = m_topdirexcl ?
Xapian::Query::OP_AND_NOT : Xapian::Query::OP_FILTER;
} else {
tdop = m_topdirexcl ?
Xapian::Query::OP_AND_NOT : Xapian::Query::OP_AND_MAYBE;
}
Xapian::Query tdq = Xapian::Query(Xapian::Query::OP_PHRASE,
pvpath.begin(), pvpath.end());
if (m_topdirweight != 1.0)
tdq = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT,
tdq, m_topdirweight);
xq = Xapian::Query(tdop, xq, tdq);
}
*((Xapian::Query *)d) = xq;
@ -847,8 +859,7 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
(m_parentSearch == 0 && !m_haveWildCards);
StringToXapianQ tr(db, m_field, l_stemlang, doBoostUserTerm);
if (!tr.processUserString(m_text, m_reason, pqueries,
db.getStopList()))
if (!tr.processUserString(m_text, m_reason, pqueries, db.getStopList()))
return false;
if (pqueries.empty()) {
LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
@ -858,6 +869,9 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
tr.getUTerms(m_uterms);
//listVector("SearchDataClauseSimple: Uterms: ", m_uterms);
*qp = Xapian::Query(op, pqueries.begin(), pqueries.end());
if (m_weight != 1.0) {
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
}
return true;
}
@ -887,6 +901,9 @@ bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p,
more.end());
*qp = qp->empty() ? tq : Xapian::Query(Xapian::Query::OP_AND, *qp, tq);
}
if (m_weight != 1.0) {
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
}
return true;
}
@ -932,6 +949,9 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
tr.getTerms(m_terms, m_groups);
tr.getUTerms(m_uterms);
*qp = *pqueries.begin();
if (m_weight != 1.0) {
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
}
return true;
}

View File

@ -73,8 +73,8 @@ class SearchDataClause;
class SearchData {
public:
SearchData(SClType tp)
: m_tp(tp), m_topdirexcl(false), m_haveDates(false),
m_haveWildCards(false)
: m_tp(tp), m_topdirexcl(false), m_topdirweight(1.0),
m_haveDates(false), m_haveWildCards(false)
{
if (m_tp != SCLT_OR && m_tp != SCLT_AND)
m_tp = SCLT_OR;
@ -104,10 +104,11 @@ public:
bool maybeAddAutoPhrase();
/** Set/get top subdirectory for filtering results */
void setTopdir(const string& t, bool excl = false)
void setTopdir(const string& t, bool excl = false, float w = 1.0)
{
m_topdir = t;
m_topdirexcl = excl;
m_topdirweight = w;
}
/** Set date span for filtering results */
@ -147,6 +148,7 @@ private:
vector<string> m_nfiletypes; // Unwanted file types
string m_topdir; // Restrict to subtree.
bool m_topdirexcl; // Invert meaning
float m_topdirweight; // affect weight instead of filter
bool m_haveDates;
DateInterval m_dates; // Restrict to date interval
// Printable expanded version of the complete query, retrieved/set
@ -167,7 +169,7 @@ public:
SearchDataClause(SClType tp)
: m_tp(tp), m_parentSearch(0), m_haveWildCards(0),
m_modifiers(SDCM_NONE)
m_modifiers(SDCM_NONE), m_weight(1.0)
{}
virtual ~SearchDataClause() {}
virtual bool toNativeQuery(Rcl::Db &db, void *, const string&) = 0;
@ -180,7 +182,7 @@ public:
SClType getTp() {return m_tp;}
void setParent(SearchData *p) {m_parentSearch = p;}
virtual void setModifiers(Modifier mod) {m_modifiers = mod;}
virtual void setWeight(float w) {m_weight = w;}
friend class SearchData;
protected:
@ -189,6 +191,7 @@ protected:
SearchData *m_parentSearch;
bool m_haveWildCards;
Modifier m_modifiers;
float m_weight;
private:
SearchDataClause(const SearchDataClause&) {}
SearchDataClause& operator=(const SearchDataClause&) {