diff --git a/.hgignore b/.hgignore index a41ef2f4..03c81f25 100644 --- a/.hgignore +++ b/.hgignore @@ -38,6 +38,7 @@ src/doc/user/rcl.program.api.html src/doc/user/rcl.program.fields.html src/doc/user/rcl.program.html src/doc/user/rcl.search.anchorwild.html +src/doc/user/rcl.search.casediac.html src/doc/user/rcl.search.commandline.html src/doc/user/rcl.search.complex.html src/doc/user/rcl.search.custom.html diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index d98ff157..bac800a2 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1449,7 +1449,7 @@ bool Db::filenameWildExp(const string& fnexp, vector& names) LOGDEB(("Rcl::Db::filenameWildExp: pattern: [%s]\n", pattern.c_str())); TermMatchResult result; - if (!termMatch(ET_WILD, string(), pattern, result, 1000, + if (!termMatch(ET_WILD, string(), pattern, result, -1, unsplitFilenameFieldName)) return false; for (vector::const_iterator it = result.entries.begin(); @@ -1506,7 +1506,7 @@ public: }; bool Db::stemExpand(const string &langs, const string &term, - TermMatchResult& result, int max) + TermMatchResult& result) { if (m_ndb == 0 || m_ndb->m_isopen == false) return false; @@ -1518,7 +1518,9 @@ bool Db::stemExpand(const string &langs, const string &term, return true; } -/** Add prefix to all strings in list */ +/** Add prefix to all strings in list. + * @param prefix already wrapped prefix + */ static void addPrefix(vector& terms, const string& prefix) { if (prefix.empty()) @@ -1579,7 +1581,7 @@ bool Db::termMatch(MatchType typ, const string &lang, res.prefix = prefix; if (typ == ET_STEM) { - if (!stemExpand(lang, root, res, max)) + if (!stemExpand(lang, root, res)) return false; for (vector::iterator it = res.entries.begin(); it != res.entries.end(); it++) { @@ -1623,7 +1625,7 @@ bool Db::termMatch(MatchType typ, const string &lang, Xapian::TermIterator it = xdb.allterms_begin(); if (!is.empty()) it.skip_to(is.c_str()); - for (int n = 0; it != xdb.allterms_end(); it++) { + for (int rcnt = 0; it != xdb.allterms_end(); it++) { // If we're beyond the terms matching the initial // string, end if (!is.empty() && (*it).find(is) != 0) @@ -1645,7 +1647,14 @@ bool Db::termMatch(MatchType typ, const string &lang, res.entries.push_back(TermMatchEntry(*it, xdb.get_collection_freq(*it), it.get_termfreq())); - ++n; + + // The problem with truncating here is that this is done + // alphabetically and we may not keep the most frequent + // terms. OTOH, not doing it may stall the program if + // we are walking the whole term list. We compromise + // by cutting at 2*max + if (max > 0 && ++rcnt >= 2*max) + break; } m_reason.erase(); break; @@ -1676,6 +1685,7 @@ bool Db::termMatch(MatchType typ, const string &lang, TermMatchCmpByWcf wcmp; sort(res.entries.begin(), res.entries.end(), wcmp); if (max > 0) { + // Would need a small max and big stem expansion... res.entries.resize(MIN(res.entries.size(), (unsigned int)max)); } return true; diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index 1fce7eaa..6f1d630f 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -406,7 +406,7 @@ private: // Reinitialize when adding/removing additional dbs bool adjustdbs(); bool stemExpand(const string &lang, const string &s, - TermMatchResult& result, int max = -1); + TermMatchResult& result); // Flush when idxflushmb is reached bool maybeflush(off_t moretext); diff --git a/src/rcldb/rclquery.h b/src/rcldb/rclquery.h index 150a819a..a9e9d893 100644 --- a/src/rcldb/rclquery.h +++ b/src/rcldb/rclquery.h @@ -19,11 +19,6 @@ #include #include -#ifndef NO_NAMESPACES -using std::string; -using std::vector; -#endif - #include "refcntr.h" #include "searchdata.h" @@ -43,18 +38,18 @@ enum abstract_result { // Snippet entry for makeDocAbstract class Snippet { public: - Snippet(int page, const string& snip) + Snippet(int page, const std::string& snip) : page(page), snippet(snip) { } - Snippet& setTerm(const string& trm) + Snippet& setTerm(const std::string& trm) { term = trm; return *this; } int page; - string term; - string snippet; + std::string term; + std::string snippet; }; @@ -71,11 +66,11 @@ class Query { ~Query(); /** Get explanation about last error */ - string getReason() const; + std::string getReason() const; /** Choose sort order. Must be called before setQuery */ - void setSortBy(const string& fld, bool ascending = true); - const string& getSortBy() const {return m_sortField;} + void setSortBy(const std::string& fld, bool ascending = true); + const std::string& getSortBy() const {return m_sortField;} bool getSortAscending() const {return m_sortAscending;} /** Return or filter results with identical content checksum */ @@ -94,26 +89,26 @@ class Query { bool getDoc(int i, Doc &doc); /** Get possibly expanded list of query terms */ - bool getQueryTerms(vector& terms); + bool getQueryTerms(std::vector& terms); /** Return a list of terms which matched for a specific result document */ - bool getMatchTerms(const Doc& doc, vector& terms); - bool getMatchTerms(unsigned long xdocid, vector& terms); + bool getMatchTerms(const Doc& doc, std::vector& terms); + bool getMatchTerms(unsigned long xdocid, std::vector& terms); /** Build synthetic abstract for document, extracting chunks relevant for * the input query. This uses index data only (no access to the file) */ // Abstract return as one string - bool makeDocAbstract(Doc &doc, string& abstract); + bool makeDocAbstract(Doc &doc, std::string& abstract); // Returned as a snippets vector - bool makeDocAbstract(Doc &doc, vector& abstract); + bool makeDocAbstract(Doc &doc, std::vector& abstract); // Returned as a vector of pair page is 0 if unknown - abstract_result makeDocAbstract(Doc &doc, vector& abst, + abstract_result makeDocAbstract(Doc &doc, std::vector& abst, int maxoccs= -1, int ctxwords = -1); /** Retrieve detected page breaks positions */ int getFirstMatchPage(Doc &doc, std::string& term); /** Expand query to look for documents like the one passed in */ - vector expand(const Doc &doc); + std::vector expand(const Doc &doc); /** Return the Db we're set for */ Db *whatDb(); @@ -123,10 +118,10 @@ class Query { Native *m_nq; private: - string m_reason; // Error explanation + std::string m_reason; // Error explanation Db *m_db; void *m_sorter; - string m_sortField; + std::string m_sortField; bool m_sortAscending; bool m_collapseDuplicates; int m_resCnt; diff --git a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp index 675b535a..577960bb 100644 --- a/src/rcldb/searchdata.cpp +++ b/src/rcldb/searchdata.cpp @@ -1095,8 +1095,9 @@ bool StringToXapianQ::processUserString(const string &iq, bool useNear ) { - LOGDEB(("StringToXapianQ:: qstr [%s] mods 0x%x slack %d near %d\n", - iq.c_str(), mods, slack, useNear)); + LOGDEB(("StringToXapianQ:pUS:: qstr [%s] fld [%s] mods 0x%x " + "slack %d near %d\n", + iq.c_str(), m_field.c_str(), mods, slack, useNear)); ermsg.erase(); const StopList stops = m_db.getStopList();