diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 2c2b90ff..82a18bd5 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1329,11 +1329,7 @@ bool Db::purgeFile(const string &udi, bool *existed) // File name wild card expansion. This is a specialisation ot termMatch bool Db::filenameWildExp(const string& fnexp, list& names) { - string pattern; - if (!unacmaybefold(fnexp, pattern, "UTF-8", true)) { - LOGERR(("Db::filenameWildExp: unac error for [%s]\n", fnexp.c_str())); - return false; - } + string pattern = fnexp; names.clear(); // If pattern is not quoted, and has no wildcards, we add * at @@ -1350,12 +1346,12 @@ bool Db::filenameWildExp(const string& fnexp, list& names) return false; for (list::const_iterator it = entries.begin(); it != entries.end(); it++) - names.push_back("XSFN"+it->term); + names.push_back(it->term); if (names.empty()) { // Build an impossible query: we know its impossible because we // control the prefixes! - names.push_back("XIMPOSSIBLE"); + names.push_back("XNONENoMatchingTerms"); } return true; } @@ -1398,6 +1394,16 @@ bool Db::stemExpand(const string &lang, const string &term, return true; } +/** Add prefix to all strings in list */ +static void addPrefix(list& terms, const string& prefix) +{ + if (prefix.empty()) + return; + for (list::iterator it = terms.begin(); + it != terms.end(); it++) + it->term.insert(0, prefix); +} + // Characters that can begin a wildcard or regexp expression. We use skipto // to begin the allterms search with terms that begin with the portion of // the input string prior to these chars. @@ -1409,7 +1415,9 @@ bool Db::termMatch(MatchType typ, const string &lang, const string &root, list& res, int max, - const string& field) + const string& field, + string *prefixp + ) { if (!m_ndb || !m_ndb->m_isopen) return false; @@ -1428,6 +1436,12 @@ bool Db::termMatch(MatchType typ, const string &lang, string prefix; if (!field.empty()) { (void)fieldToPrefix(field, prefix); + if (prefix.empty()) { + LOGDEB(("Db::termMatch: field is not indexed (no prefix): [%s]\n", + field.c_str())); + } + if (prefixp) + *prefixp = prefix; } if (typ == ET_STEM) { @@ -1443,6 +1457,8 @@ bool Db::termMatch(MatchType typ, const string &lang, return false; LOGDEB1(("termMatch: %d [%s]\n", it->wcf, it->term.c_str())); } + if (!prefix.empty()) + addPrefix(res, prefix); } else { regex_t reg; int errcode; @@ -1493,7 +1509,7 @@ bool Db::termMatch(MatchType typ, const string &lang, continue; } // Do we want stem expansion here? We don't do it for now - res.push_back(TermMatchEntry(term, it.get_termfreq())); + res.push_back(TermMatchEntry(*it, it.get_termfreq())); ++n; } m_reason.erase(); diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index 7b82156a..fb6792ea 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -158,7 +158,9 @@ class Db { enum MatchType {ET_WILD, ET_REGEXP, ET_STEM}; bool termMatch(MatchType typ, const string &lang, const string &s, list& result, int max = -1, - const string& field = ""); + const string& field = "", + string *prefix = 0 + ); /** Special filename wildcard to XSFN terms expansion. internal/searchdata use only */ diff --git a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp index 918b8a2a..ac1088af 100644 --- a/src/rcldb/searchdata.cpp +++ b/src/rcldb/searchdata.cpp @@ -240,9 +240,9 @@ class wsQData : public TextSplitCB { // translating. class StringToXapianQ { public: - StringToXapianQ(Db& db, const string& prefix, + StringToXapianQ(Db& db, const string& field, const string &stmlng, bool boostUser) - : m_db(db), m_prefix(prefix), m_stemlang(stmlng), + : m_db(db), m_field(field), m_stemlang(stmlng), m_doBoostUserTerms(boostUser) { } @@ -267,7 +267,7 @@ public: private: void expandTerm(bool dont, const string& term, list& exp, - string& sterm); + string& sterm, string *prefix); // After splitting entry on whitespace: process non-phrase element void processSimpleSpan(const string& span, bool nostemexp, list &pqueries); // Process phrase/near element @@ -276,7 +276,7 @@ private: bool useNear, int slack); Db& m_db; - const string& m_prefix; + const string& m_field; const string& m_stemlang; bool m_doBoostUserTerms; // Single terms and phrases resulting from breaking up text; @@ -309,9 +309,9 @@ static void listVector(const string& what, const vector&l) * @param sterm output original input term if there were no wildcards */ void StringToXapianQ::expandTerm(bool nostemexp, - const string& term, - list& exp, - string &sterm) + const string& term, + list& exp, + string &sterm, string *prefix) { LOGDEB2(("expandTerm: term [%s] stemlang [%s] nostemexp %d\n", term.c_str(), m_stemlang.c_str(), nostemexp)); @@ -336,11 +336,13 @@ void StringToXapianQ::expandTerm(bool nostemexp, } else { list l; if (haswild) { - m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, l); + m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, l, -1, m_field, + prefix); } else { sterm = term; m_uterms.push_back(sterm); - m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, l); + m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, l, -1, m_field, + prefix); } for (list::const_iterator it = l.begin(); it != l.end(); it++) { @@ -384,23 +386,14 @@ void multiply_groups(vector >::const_iterator vvit, } } -/** Add prefix to all strings in list */ -static void addPrefix(list& terms, const string& prefix) -{ - if (prefix.empty()) - return; - for (list::iterator it = terms.begin(); it != terms.end(); it++) - it->insert(0, prefix); -} - void StringToXapianQ::processSimpleSpan(const string& span, bool nostemexp, list &pqueries) { list exp; string sterm; // dumb version of user term - expandTerm(nostemexp, span, exp, sterm); + string prefix; + expandTerm(nostemexp, span, exp, sterm, &prefix); m_terms.insert(m_terms.end(), exp.begin(), exp.end()); - addPrefix(exp, m_prefix); // Push either term or OR of stem-expanded set Xapian::Query xq(Xapian::Query::OP_OR, exp.begin(), exp.end()); @@ -412,7 +405,7 @@ void StringToXapianQ::processSimpleSpan(const string& span, bool nostemexp, if (m_doBoostUserTerms && !sterm.empty()) { xq = Xapian::Query(Xapian::Query::OP_OR, xq, - Xapian::Query(m_prefix+sterm, + Xapian::Query(prefix+sterm, original_term_wqf_booster)); } pqueries.push_back(xq); @@ -443,9 +436,9 @@ void StringToXapianQ::processPhraseOrNear(wsQData *splitData, string sterm; listexp; - expandTerm(nostemexp, *it, exp, sterm); + string prefix; + expandTerm(nostemexp, *it, exp, sterm, &prefix); groups.push_back(vector(exp.begin(), exp.end())); - addPrefix(exp, m_prefix); orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, exp.begin(), exp.end())); #ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF @@ -597,9 +590,6 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p, LOGERR(("SearchDataClauseSimple: bad m_tp %d\n", m_tp)); return false; } - string prefix; - if (!m_field.empty()) - db.fieldToPrefix(m_field, prefix); list pqueries; // We normally boost the original term in the stem expansion list. Don't @@ -608,7 +598,7 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p, (m_parentSearch && !m_parentSearch->haveWildCards()) || (m_parentSearch == 0 && !m_haveWildCards); - StringToXapianQ tr(db, prefix, l_stemlang, doBoostUserTerm); + StringToXapianQ tr(db, m_field, l_stemlang, doBoostUserTerm); if (!tr.processUserString(m_text, m_reason, pqueries, db.getStopList())) return false; @@ -623,7 +613,8 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p, return true; } -// Translate a FILENAME search clause. +// Translate a FILENAME search clause. Actually this is now mostly +// a "filename" field search. bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p, const string&) { @@ -660,10 +651,6 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p, list pqueries; Xapian::Query nq; - string prefix; - if (!m_field.empty()) - db.fieldToPrefix(m_field, prefix); - // We normally boost the original term in the stem expansion list. Don't // do it if there are wildcards anywhere, this would skew the results. bool doBoostUserTerm = @@ -680,7 +667,7 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p, } string s = string("\"") + m_text + string("\""); bool useNear = (m_tp == SCLT_NEAR); - StringToXapianQ tr(db, prefix, l_stemlang, doBoostUserTerm); + StringToXapianQ tr(db, m_field, l_stemlang, doBoostUserTerm); if (!tr.processUserString(s, m_reason, pqueries, db.getStopList(), m_slack, useNear)) return false;