recollq: add option to obtain exact result count

This commit is contained in:
Jean-Francois Dockes 2019-11-28 16:13:27 +01:00
parent a1138dd9e5
commit f42338c026
3 changed files with 50 additions and 31 deletions

View File

@ -123,6 +123,7 @@ static char usage [] =
" -T <synonyms file>: use the parameter (Thesaurus) for word expansion.\n"
" -i <dbdir> : additional index, several can be given.\n"
" -e use url encoding (%xx) for urls.\n"
" -E use exact result count instead of lower bound estimate"
" -F <field name list> : output exactly these fields for each result.\n"
" The field values are encoded in base64, output in one line and \n"
" separated by one space character. This is the recommended format \n"
@ -142,39 +143,40 @@ Usage(void)
static int op_flags;
#define OPT_A 0x1
// gui: -a same
// GUI: -a same
#define OPT_a 0x2
#define OPT_b 0x4
#define OPT_C 0x8
// gui: -c same
// GUI: -c same
#define OPT_c 0x10
#define OPT_D 0x20
#define OPT_d 0x40
#define OPT_e 0x80
#define OPT_F 0x100
// gui: -f same
// GUI: -f same
#define OPT_f 0x200
// gui uses -h for help. us: usage
// GUI uses -h for help. us: usage
#define OPT_i 0x400
// gui uses -L to set language of messages
// gui: -l same
// GUI uses -L to set language of messages
// GUI: -l same
#define OPT_l 0x800
#define OPT_m 0x1000
#define OPT_N 0x2000
#define OPT_n 0x4000
// gui: -o same
// GUI: -o same
#define OPT_o 0x8000
#define OPT_P 0x10000
#define OPT_Q 0x20000
// gui: -q same
// GUI: -q same
#define OPT_q 0x40000
#define OPT_S 0x80000
#define OPT_s 0x100000
#define OPT_T 0x200000
// gui: -t use command line, us: ignored
// GUI: -t use command line, us: ignored
#define OPT_t 0x400000
// gui uses -v : show version. Us: usage
// gui uses -w : open minimized
// GUI uses -v : show version. Us: usage
// GUI uses -w : open minimized
#define OPT_E 0x800000
int recollq(RclConfig **cfp, int argc, char **argv)
{
@ -212,6 +214,7 @@ int recollq(RclConfig **cfp, int argc, char **argv)
argc--; goto b1;
case 'd': op_flags |= OPT_d; break;
case 'D': op_flags |= OPT_D; break;
case 'E': op_flags |= OPT_E; break;
case 'e': op_flags |= OPT_e; break;
case 'f': op_flags |= OPT_f; break;
case 'F': op_flags |= OPT_F; if (argc < 2) Usage();
@ -366,7 +369,12 @@ endopts:
cerr << "Query setup failed: " << query.getReason() << endl;
return(1);
}
int cnt = query.getResCnt();
int cnt;
if (op_flags & OPT_E) {
cnt = query.getResCnt(-1, true);
} else {
cnt = query.getResCnt();
}
if (!(op_flags & OPT_b)) {
cout << "Recoll query: " << rq->getDescription() << endl;
if (firstres == 0) {

View File

@ -169,15 +169,12 @@ void Query::setSortBy(const string& fld, bool ascending) {
(m_sortAscending ? "ascending" : "descending") << "\n");
}
//#define ISNULL(X) (X).isNull()
#define ISNULL(X) !(X)
// Prepare query out of user search data
bool Query::setQuery(std::shared_ptr<SearchData> sdata)
{
LOGDEB("Query::setQuery:\n");
if (!m_db || ISNULL(m_nq)) {
if (!m_db || !m_nq) {
LOGERR("Query::setQuery: not initialised!\n");
return false;
}
@ -247,7 +244,7 @@ bool Query::setQuery(std::shared_ptr<SearchData> sdata)
bool Query::getQueryTerms(vector<string>& terms)
{
if (ISNULL(m_nq))
if (!m_nq)
return false;
terms.clear();
@ -336,30 +333,37 @@ static const int qquantum = 50;
// Get estimated result count for query. Xapian actually does most of
// the search job in there, this can be long
int Query::getResCnt()
int Query::getResCnt(int checkatleast, bool useestimate)
{
if (ISNULL(m_nq) || !m_nq->xenquire) {
if (!m_db || !m_nq || !m_nq->xenquire) {
LOGERR("Query::getResCnt: no query opened\n");
return -1;
}
LOGDEB0("Query::getResCnt: checkatleast " << checkatleast << " estimate " <<
useestimate << "\n");
if (m_resCnt >= 0)
return m_resCnt;
m_resCnt = -1;
if (m_nq->xmset.size() <= 0) {
Chrono chron;
XAPTRY(m_nq->xmset =
m_nq->xenquire->get_mset(0, qquantum, 1000);
m_resCnt = m_nq->xmset.get_matches_lower_bound(),
XAPTRY(if (checkatleast == -1)
checkatleast = m_db->docCnt();
m_nq->xmset = m_nq->xenquire->get_mset(0, qquantum, checkatleast),
m_db->m_ndb->xrdb, m_reason);
LOGDEB("Query::getResCnt: "<<m_resCnt<<" "<< chron.millis() << " mS\n");
if (!m_reason.empty())
if (!m_reason.empty()) {
LOGERR("xenquire->get_mset: exception: " << m_reason << "\n");
return -1;
}
LOGDEB("Query::getResCnt: get_mset: " << chron.millis() << " mS\n");
}
if (useestimate) {
m_resCnt = m_nq->xmset.get_matches_estimated();
} else {
m_resCnt = m_nq->xmset.get_matches_lower_bound();
}
LOGDEB("Query::getResCnt: " << m_resCnt << "\n");
return m_resCnt;
}
@ -374,7 +378,7 @@ int Query::getResCnt()
bool Query::getDoc(int xapi, Doc &doc, bool fetchtext)
{
LOGDEB1("Query::getDoc: xapian enquire index " << xapi << "\n");
if (ISNULL(m_nq) || !m_nq->xenquire) {
if (!m_nq || !m_nq->xenquire) {
LOGERR("Query::getDoc: no query opened\n");
return false;
}
@ -457,7 +461,7 @@ vector<string> Query::expand(const Doc &doc)
{
LOGDEB("Rcl::Query::expand()\n");
vector<string> res;
if (ISNULL(m_nq) || !m_nq->xenquire) {
if (!m_nq || !m_nq->xenquire) {
LOGERR("Query::expand: no query opened\n");
return res;
}

View File

@ -90,8 +90,15 @@ public:
*/
bool setQuery(std::shared_ptr<SearchData> q);
/** Get results count for current query */
int getResCnt();
/** Get results count for current query.
*
* @param useestimate Use get_matches_estimated() if true, else
* get_matches_lower_bound()
* @param checkatleast checkatleast parameter to get_mset(). Use -1 for
* full scan.
*/
int getResCnt(int checkatleast=1000, bool useestimate=false);
/** Get document at rank i in current query results. */
bool getDoc(int i, Doc &doc, bool fetchtext = false);