diff --git a/src/query/recollq.cpp b/src/query/recollq.cpp index c2b8bf46..154d29da 100644 --- a/src/query/recollq.cpp +++ b/src/query/recollq.cpp @@ -123,6 +123,7 @@ static char usage [] = " -T : use the parameter (Thesaurus) for word expansion.\n" " -i : additional index, several can be given.\n" " -e use url encoding (%xx) for urls.\n" +" -E use exact result count instead of lower bound estimate" " -F : output exactly these fields for each result.\n" " The field values are encoded in base64, output in one line and \n" " separated by one space character. This is the recommended format \n" @@ -142,39 +143,40 @@ Usage(void) static int op_flags; #define OPT_A 0x1 -// gui: -a same +// GUI: -a same #define OPT_a 0x2 #define OPT_b 0x4 #define OPT_C 0x8 -// gui: -c same +// GUI: -c same #define OPT_c 0x10 #define OPT_D 0x20 #define OPT_d 0x40 #define OPT_e 0x80 #define OPT_F 0x100 -// gui: -f same +// GUI: -f same #define OPT_f 0x200 -// gui uses -h for help. us: usage +// GUI uses -h for help. us: usage #define OPT_i 0x400 -// gui uses -L to set language of messages -// gui: -l same +// GUI uses -L to set language of messages +// GUI: -l same #define OPT_l 0x800 #define OPT_m 0x1000 #define OPT_N 0x2000 #define OPT_n 0x4000 -// gui: -o same +// GUI: -o same #define OPT_o 0x8000 #define OPT_P 0x10000 #define OPT_Q 0x20000 -// gui: -q same +// GUI: -q same #define OPT_q 0x40000 #define OPT_S 0x80000 #define OPT_s 0x100000 #define OPT_T 0x200000 -// gui: -t use command line, us: ignored +// GUI: -t use command line, us: ignored #define OPT_t 0x400000 -// gui uses -v : show version. Us: usage -// gui uses -w : open minimized +// GUI uses -v : show version. Us: usage +// GUI uses -w : open minimized +#define OPT_E 0x800000 int recollq(RclConfig **cfp, int argc, char **argv) { @@ -212,6 +214,7 @@ int recollq(RclConfig **cfp, int argc, char **argv) argc--; goto b1; case 'd': op_flags |= OPT_d; break; case 'D': op_flags |= OPT_D; break; + case 'E': op_flags |= OPT_E; break; case 'e': op_flags |= OPT_e; break; case 'f': op_flags |= OPT_f; break; case 'F': op_flags |= OPT_F; if (argc < 2) Usage(); @@ -366,7 +369,12 @@ endopts: cerr << "Query setup failed: " << query.getReason() << endl; return(1); } - int cnt = query.getResCnt(); + int cnt; + if (op_flags & OPT_E) { + cnt = query.getResCnt(-1, true); + } else { + cnt = query.getResCnt(); + } if (!(op_flags & OPT_b)) { cout << "Recoll query: " << rq->getDescription() << endl; if (firstres == 0) { diff --git a/src/rcldb/rclquery.cpp b/src/rcldb/rclquery.cpp index 690729f7..9052853c 100644 --- a/src/rcldb/rclquery.cpp +++ b/src/rcldb/rclquery.cpp @@ -169,15 +169,12 @@ void Query::setSortBy(const string& fld, bool ascending) { (m_sortAscending ? "ascending" : "descending") << "\n"); } -//#define ISNULL(X) (X).isNull() -#define ISNULL(X) !(X) - // Prepare query out of user search data bool Query::setQuery(std::shared_ptr sdata) { LOGDEB("Query::setQuery:\n"); - if (!m_db || ISNULL(m_nq)) { + if (!m_db || !m_nq) { LOGERR("Query::setQuery: not initialised!\n"); return false; } @@ -247,7 +244,7 @@ bool Query::setQuery(std::shared_ptr sdata) bool Query::getQueryTerms(vector& terms) { - if (ISNULL(m_nq)) + if (!m_nq) return false; terms.clear(); @@ -336,30 +333,37 @@ static const int qquantum = 50; // Get estimated result count for query. Xapian actually does most of // the search job in there, this can be long -int Query::getResCnt() +int Query::getResCnt(int checkatleast, bool useestimate) { - if (ISNULL(m_nq) || !m_nq->xenquire) { + if (!m_db || !m_nq || !m_nq->xenquire) { LOGERR("Query::getResCnt: no query opened\n"); return -1; } + LOGDEB0("Query::getResCnt: checkatleast " << checkatleast << " estimate " << + useestimate << "\n"); + if (m_resCnt >= 0) return m_resCnt; - m_resCnt = -1; if (m_nq->xmset.size() <= 0) { Chrono chron; - - XAPTRY(m_nq->xmset = - m_nq->xenquire->get_mset(0, qquantum, 1000); - m_resCnt = m_nq->xmset.get_matches_lower_bound(), + XAPTRY(if (checkatleast == -1) + checkatleast = m_db->docCnt(); + m_nq->xmset = m_nq->xenquire->get_mset(0, qquantum, checkatleast), m_db->m_ndb->xrdb, m_reason); - - LOGDEB("Query::getResCnt: "<get_mset: exception: " << m_reason << "\n"); + return -1; + } + LOGDEB("Query::getResCnt: get_mset: " << chron.millis() << " mS\n"); + } + + if (useestimate) { + m_resCnt = m_nq->xmset.get_matches_estimated(); } else { m_resCnt = m_nq->xmset.get_matches_lower_bound(); } + LOGDEB("Query::getResCnt: " << m_resCnt << "\n"); return m_resCnt; } @@ -374,7 +378,7 @@ int Query::getResCnt() bool Query::getDoc(int xapi, Doc &doc, bool fetchtext) { LOGDEB1("Query::getDoc: xapian enquire index " << xapi << "\n"); - if (ISNULL(m_nq) || !m_nq->xenquire) { + if (!m_nq || !m_nq->xenquire) { LOGERR("Query::getDoc: no query opened\n"); return false; } @@ -457,7 +461,7 @@ vector Query::expand(const Doc &doc) { LOGDEB("Rcl::Query::expand()\n"); vector res; - if (ISNULL(m_nq) || !m_nq->xenquire) { + if (!m_nq || !m_nq->xenquire) { LOGERR("Query::expand: no query opened\n"); return res; } diff --git a/src/rcldb/rclquery.h b/src/rcldb/rclquery.h index b1fd6255..30656b51 100644 --- a/src/rcldb/rclquery.h +++ b/src/rcldb/rclquery.h @@ -90,8 +90,15 @@ public: */ bool setQuery(std::shared_ptr q); - /** Get results count for current query */ - int getResCnt(); + + /** Get results count for current query. + * + * @param useestimate Use get_matches_estimated() if true, else + * get_matches_lower_bound() + * @param checkatleast checkatleast parameter to get_mset(). Use -1 for + * full scan. + */ + int getResCnt(int checkatleast=1000, bool useestimate=false); /** Get document at rank i in current query results. */ bool getDoc(int i, Doc &doc, bool fetchtext = false);