diff --git a/src/query/wasastringtoquery.cpp b/src/query/wasastringtoquery.cpp index 88407457..91b64fb2 100644 --- a/src/query/wasastringtoquery.cpp +++ b/src/query/wasastringtoquery.cpp @@ -84,17 +84,16 @@ void WasaQuery::describe(string &desc) const desc.erase(desc.length() - 1); desc += ")"; if (m_modifiers != 0) { - if (m_modifiers & WQM_BOOST) desc += "BOOST|"; + if (m_modifiers & WQM_BOOST) desc += "BOOST|"; if (m_modifiers & WQM_CASESENS) desc += "CASESENS|"; if (m_modifiers & WQM_DIACSENS) desc += "DIACSENS|"; + if (m_modifiers & WQM_FUZZY) desc += "FUZZY|"; if (m_modifiers & WQM_NOSTEM) desc += "NOSTEM|"; - if (m_modifiers & WQM_BOOST) desc += "BOOST|"; + if (m_modifiers & WQM_PHRASESLACK) desc += "PHRASESLACK|"; if (m_modifiers & WQM_PROX) desc += "PROX|"; + if (m_modifiers & WQM_REGEX) desc += "REGEX|"; if (m_modifiers & WQM_SLOPPY) desc += "SLOPPY|"; if (m_modifiers & WQM_WORDS) desc += "WORDS|"; - if (m_modifiers & WQM_PHRASESLACK) desc += "PHRASESLACK|"; - if (m_modifiers & WQM_REGEX) desc += "REGEX|"; - if (m_modifiers & WQM_FUZZY) desc += "FUZZY|"; if (desc.length() > 0 && desc[desc.length()-1] == '|') desc = desc.substr(0, desc.length()-1); } @@ -132,7 +131,7 @@ static const char * parserExpr = "(\"" //9 "([^\"]+)" //10 "A quoted term" "\")" - "([a-zA-Z0-9]*)" //11 modifiers + "([bcCdDeflLoprsw.0-9]*)" //11 modifiers "|" "([^[:space:]\"]+)" //12 ANormalTerm ")" @@ -152,7 +151,7 @@ static const char *matchNames[] = { /* 8*/ "", /* 9*/ "", /*10*/ "QUOTEDTERM", - /*11*/ "MODIIFIERS", + /*11*/ "MODIFIERS", /*12*/ "TERM", }; #define NMATCH (sizeof(matchNames) / sizeof(char *)) @@ -328,12 +327,18 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason) unsigned int mods = 0; for (unsigned int i = 0; i < strlen(match); i++) { switch (match[i]) { - case 'b': mods |= WasaQuery::WQM_BOOST; break; + case 'b': + mods |= WasaQuery::WQM_BOOST; + nclause->m_weight = 10.0; + break; case 'c': break; case 'C': mods |= WasaQuery::WQM_CASESENS; break; case 'd': break; case 'D': mods |= WasaQuery::WQM_DIACSENS; break; - case 'e': mods |= WasaQuery::WQM_CASESENS | WasaQuery::WQM_DIACSENS | WasaQuery::WQM_NOSTEM; break; + case 'e': mods |= WasaQuery::WQM_CASESENS | + WasaQuery::WQM_DIACSENS | + WasaQuery::WQM_NOSTEM; + break; case 'f': mods |= WasaQuery::WQM_FUZZY; break; case 'l': mods |= WasaQuery::WQM_NOSTEM; break; case 'L': break; @@ -342,6 +347,19 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason) case 'r': mods |= WasaQuery::WQM_REGEX; break; case 's': mods |= WasaQuery::WQM_SLOPPY; break; case 'w': mods |= WasaQuery::WQM_WORDS; break; + case '.':case '0':case '1':case '2':case '3':case '4': + case '5':case '6':case '7':case '8':case '9': + { + int n; + float factor; + if (sscanf(match+i, "%f %n", &factor, &n)) { + nclause->m_weight = factor; + DPRINT((stderr, "Got factor %.2f len %d\n", + factor, n)); + } + if (n) + i += n-1; + } } } nclause->m_modifiers = WasaQuery::Modifier(mods); diff --git a/src/query/wasastringtoquery.h b/src/query/wasastringtoquery.h index 11cae9a4..baa30dcf 100644 --- a/src/query/wasastringtoquery.h +++ b/src/query/wasastringtoquery.h @@ -63,7 +63,7 @@ public: typedef vector subqlist_t; WasaQuery() - : m_op(OP_NULL), m_modifiers(0) + : m_op(OP_NULL), m_modifiers(0), m_weight(1.0) {} ~WasaQuery(); @@ -86,6 +86,7 @@ public: vector m_subs; unsigned int m_modifiers; + float m_weight; }; /** diff --git a/src/query/wasatorcl.cpp b/src/query/wasatorcl.cpp index eacf4340..6cfc3876 100644 --- a/src/query/wasatorcl.cpp +++ b/src/query/wasatorcl.cpp @@ -101,7 +101,8 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa, // Filtering on location if (!stringicmp("dir", (*it)->m_fieldspec)) { - sdata->setTopdir((*it)->m_value, (*it)->m_op == WasaQuery::OP_EXCL); + sdata->setTopdir((*it)->m_value, (*it)->m_op == WasaQuery::OP_EXCL, + (*it)->m_weight); continue; } @@ -174,6 +175,8 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa, if (mods & WasaQuery::WQM_NOSTEM) { nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING); } + if ((*it)->m_weight != 1.0) + nclause->setWeight((*it)->m_weight); sdata->addClause(nclause); } break; @@ -203,6 +206,8 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa, } if ((*it)->m_modifiers & WasaQuery::WQM_NOSTEM) nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING); + if ((*it)->m_weight != 1.0) + nclause->setWeight((*it)->m_weight); sdata->addClause(nclause); break; diff --git a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp index 1a6a8946..e9aab438 100644 --- a/src/rcldb/searchdata.cpp +++ b/src/rcldb/searchdata.cpp @@ -278,7 +278,8 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d) xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_AND_NOT, xq, tq); } - // Add the directory filtering clause + // Add the directory filtering clause. This is a phrase of terms + // prefixed with the pathelt prefix XP if (!m_topdir.empty()) { vector vpath; stringToTokens(m_topdir, vpath, "/"); @@ -288,10 +289,21 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d) it != vpath.end(); it++){ pvpath.push_back(pathelt_prefix + *it); } - xq = Xapian::Query(m_topdirexcl ? - Xapian::Query::OP_AND_NOT:Xapian::Query::OP_FILTER, - xq, Xapian::Query(Xapian::Query::OP_PHRASE, - pvpath.begin(), pvpath.end())); + Xapian::Query::op tdop; + if (m_topdirweight == 1.0) { + tdop = m_topdirexcl ? + Xapian::Query::OP_AND_NOT : Xapian::Query::OP_FILTER; + } else { + tdop = m_topdirexcl ? + Xapian::Query::OP_AND_NOT : Xapian::Query::OP_AND_MAYBE; + } + Xapian::Query tdq = Xapian::Query(Xapian::Query::OP_PHRASE, + pvpath.begin(), pvpath.end()); + if (m_topdirweight != 1.0) + tdq = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, + tdq, m_topdirweight); + + xq = Xapian::Query(tdop, xq, tdq); } *((Xapian::Query *)d) = xq; @@ -847,8 +859,7 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p, (m_parentSearch == 0 && !m_haveWildCards); StringToXapianQ tr(db, m_field, l_stemlang, doBoostUserTerm); - if (!tr.processUserString(m_text, m_reason, pqueries, - db.getStopList())) + if (!tr.processUserString(m_text, m_reason, pqueries, db.getStopList())) return false; if (pqueries.empty()) { LOGERR(("SearchDataClauseSimple: resolved to null query\n")); @@ -858,6 +869,9 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p, tr.getUTerms(m_uterms); //listVector("SearchDataClauseSimple: Uterms: ", m_uterms); *qp = Xapian::Query(op, pqueries.begin(), pqueries.end()); + if (m_weight != 1.0) { + *qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight); + } return true; } @@ -887,6 +901,9 @@ bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p, more.end()); *qp = qp->empty() ? tq : Xapian::Query(Xapian::Query::OP_AND, *qp, tq); } + if (m_weight != 1.0) { + *qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight); + } return true; } @@ -932,6 +949,9 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p, tr.getTerms(m_terms, m_groups); tr.getUTerms(m_uterms); *qp = *pqueries.begin(); + if (m_weight != 1.0) { + *qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight); + } return true; } diff --git a/src/rcldb/searchdata.h b/src/rcldb/searchdata.h index cb4e3bbb..95879c56 100644 --- a/src/rcldb/searchdata.h +++ b/src/rcldb/searchdata.h @@ -73,8 +73,8 @@ class SearchDataClause; class SearchData { public: SearchData(SClType tp) - : m_tp(tp), m_topdirexcl(false), m_haveDates(false), - m_haveWildCards(false) + : m_tp(tp), m_topdirexcl(false), m_topdirweight(1.0), + m_haveDates(false), m_haveWildCards(false) { if (m_tp != SCLT_OR && m_tp != SCLT_AND) m_tp = SCLT_OR; @@ -104,10 +104,11 @@ public: bool maybeAddAutoPhrase(); /** Set/get top subdirectory for filtering results */ - void setTopdir(const string& t, bool excl = false) + void setTopdir(const string& t, bool excl = false, float w = 1.0) { m_topdir = t; m_topdirexcl = excl; + m_topdirweight = w; } /** Set date span for filtering results */ @@ -147,6 +148,7 @@ private: vector m_nfiletypes; // Unwanted file types string m_topdir; // Restrict to subtree. bool m_topdirexcl; // Invert meaning + float m_topdirweight; // affect weight instead of filter bool m_haveDates; DateInterval m_dates; // Restrict to date interval // Printable expanded version of the complete query, retrieved/set @@ -167,7 +169,7 @@ public: SearchDataClause(SClType tp) : m_tp(tp), m_parentSearch(0), m_haveWildCards(0), - m_modifiers(SDCM_NONE) + m_modifiers(SDCM_NONE), m_weight(1.0) {} virtual ~SearchDataClause() {} virtual bool toNativeQuery(Rcl::Db &db, void *, const string&) = 0; @@ -180,7 +182,7 @@ public: SClType getTp() {return m_tp;} void setParent(SearchData *p) {m_parentSearch = p;} virtual void setModifiers(Modifier mod) {m_modifiers = mod;} - + virtual void setWeight(float w) {m_weight = w;} friend class SearchData; protected: @@ -189,6 +191,7 @@ protected: SearchData *m_parentSearch; bool m_haveWildCards; Modifier m_modifiers; + float m_weight; private: SearchDataClause(const SearchDataClause&) {} SearchDataClause& operator=(const SearchDataClause&) {