search language: implemented filtering on file size

2012-03-07 17:08:22 +01:00 · 2012-03-07 17:08:22 +01:00 · 7ddbbb1ee8
commit 7ddbbb1ee8
parent 85166c93b2
8 changed files with 113 additions and 16 deletions
--- a/src/doc/user/usermanual.sgml
+++ b/src/doc/user/usermanual.sgml
@ -2125,6 +2125,17 @@ text/html       [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r
        <filename>/usr/local/share/doc</filename> </para>
        </listitem>
        <listitem><para><literal>size</literal> for filtering the
        results on file size. Exemple: 
        <literal>size<10000</literal>. You can use
        <literal>&lt;</literal>, <literal>&gt;</literal> or
        <literal>=</literal> as operators. You can specify a range like the
        following: <literal>size>100 size<1000</literal>. The usual
        <literal>k/K, m/M, g/G, t/T</literal> can be used as (decimal)
        multipliers. Ex: <literal>size&gt;1k</literal> to search for files
        bigger than 1000 bytes.</para>
        </listitem>
        <listitem><para><literal>date</literal> for searching or filtering
        on dates. The syntax for the argument is based on the ISO8601
        standard for dates and time intervals. Only dates are supported, no
--- a/src/query/wasastringtoquery.cpp
+++ b/src/query/wasastringtoquery.cpp
@ -139,7 +139,7 @@ static const char * parserExpr =
      "("                            //5
        "([[:alpha:]][[:alnum:]:]*)" //6 Field spec: ie: "dc:title:letitre"
        "[[:space:]]*"
-        "(:)"            //7 Relation
+        "(:|=|>|<)"            //7 Relation
        "[[:space:]]*)?"
      "("                            //8
        "(\""                        //9
--- a/src/query/wasatorcl.cpp
+++ b/src/query/wasatorcl.cpp
@ -125,6 +125,46 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
 	    continue;
 	} 
 	// Handle "size" spec
 	if (!stringicmp("size", (*it)->m_fieldspec)) {
 	    if ((*it)->m_op != WasaQuery::OP_LEAF) {
 		reason = "Negative size filtering not supported";
 		return 0;
 	    }
 	    char *cp;
 	    size_t size = strtoll((*it)->m_value.c_str(), &cp, 10);
 	    if (*cp != 0) {
 		switch (*cp) {
 		case 'k': case 'K': size *= 1E3;break;
 		case 'm': case 'M': size *= 1E6;break;
 		case 'g': case 'G': size *= 1E9;break;
 		case 't': case 'T': size *= 1E12;break;
 		default: 
 		    reason = string("Bad multiplier suffix: ") + *cp;
 		    return 0;
 		}
 	    }
 	    switch ((*it)->m_rel) {
 	    case WasaQuery::REL_EQUALS:
 		sdata->setMaxSize(size);
 		sdata->setMinSize(size);
 		break;
 	    case WasaQuery::REL_LT:
 	    case WasaQuery::REL_LTE:
 		sdata->setMaxSize(size);
 		break;
 	    case WasaQuery::REL_GT: 
 	    case WasaQuery::REL_GTE:
 		sdata->setMinSize(size);
 		break;
 	    default:
 		reason = "Bad relation operator with size query. Use > < or =";
 		return 0;
 	    }
 	    continue;
 	} 
 	// "Regular" processing follows:
 	switch ((*it)->m_op) {
 	case WasaQuery::OP_NULL:
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -1257,8 +1257,13 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
    if (doc.fbytes.empty())
 	doc.fbytes = doc.pcbytes;
-    if (!doc.fbytes.empty())
+
    if (!doc.fbytes.empty()) {
 	RECORD_APPEND(record, Doc::keyfs, doc.fbytes);
 	leftzeropad(doc.fbytes, 12);
 	newdocument.add_value(VALUE_SIZE, doc.fbytes);
    }
    if (!doc.pcbytes.empty())
 	RECORD_APPEND(record, Doc::keypcs, doc.pcbytes);
    char sizebuf[30]; 
--- a/src/rcldb/rcldb.h
+++ b/src/rcldb/rcldb.h
@ -57,6 +57,19 @@ class RclConfig;
 namespace Rcl {
 #endif
 // Omega compatible values. We leave a hole for future omega values. Not sure 
 // it makes any sense to keep any level of omega compat given that the index
 // is incompatible anyway.
 enum value_slot {
    // Omega-compatible values:
    VALUE_LASTMOD = 0,	// 4 byte big endian value - seconds since 1970.
    VALUE_MD5 = 1,	// 16 byte MD5 checksum of original document.
    VALUE_SIZE = 2,     // sortable_serialise(<file size in bytes>)
    // Recoll only:
    VALUE_SIG = 10      // Doc sig as chosen by app (ex: mtime+size
 };
 class SearchData;
 class TermIter;
 class Query;
--- a/src/rcldb/rcldb_p.h
+++ b/src/rcldb/rcldb_p.h
@ -25,19 +25,6 @@
 namespace Rcl {
 // Omega compatible values. We leave a hole for future omega values. Not sure 
 // it makes any sense to keep any level of omega compat given that the index
 // is incompatible anyway.
 enum value_slot {
    // Omega-compatible values:
    VALUE_LASTMOD = 0,	// 4 byte big endian value - seconds since 1970.
    VALUE_MD5 = 1,	// 16 byte MD5 checksum of original document.
    VALUE_SIZE = 2,     // sortable_serialise(<file size in bytes>)
    // Recoll only:
    VALUE_SIG = 10      // Doc sig as chosen by app (ex: mtime+size
 };
 // Generic Xapian exception catching code. We do this quite often,
 // and I have no idea how to do this except for a macro
 #define XCATCHERROR(MSG) \
--- a/src/rcldb/searchdata.cpp
+++ b/src/rcldb/searchdata.cpp
@ -255,6 +255,39 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
        }
    }
    if (m_minSize != size_t(-1) || m_maxSize != size_t(-1)) {
        Xapian::Query sq;
 	char min[50], max[50];
 	sprintf(min, "%lld", (long long)m_minSize);
 	sprintf(max, "%lld", (long long)m_maxSize);
 	if (m_minSize == size_t(-1)) {
 	    string value(max);
 	    leftzeropad(value, 12);
 	    sq = Xapian::Query(Xapian::Query::OP_VALUE_LE, VALUE_SIZE, value);
 	} else if (m_maxSize == size_t(-1)) {
 	    string value(min);
 	    leftzeropad(value, 12);
 	    sq = Xapian::Query(Xapian::Query::OP_VALUE_GE, VALUE_SIZE, value);
 	} else {
 	    string minvalue(min);
 	    leftzeropad(minvalue, 12);
 	    string maxvalue(max);
 	    leftzeropad(maxvalue, 12);
 	    sq = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, VALUE_SIZE, 
 			       minvalue, maxvalue);
 	}
        // If no probabilistic query is provided then promote the
        // filter to be THE query instead of filtering an empty query.
        if (xq.empty()) {
            LOGINFO(("Db::toNativeQuery: proba query is empty\n"));
            xq = sq;
        } else {
            xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, sq);
        }
    }
    // Add the file type filtering clause if any
    if (!m_filetypes.empty()) {
 	expandFileTypes(db.getConf(), m_filetypes);
@ -452,6 +485,8 @@ void SearchData::erase() {
    m_description.erase();
    m_reason.erase();
    m_haveDates = false;
    m_minSize = size_t(-1);
    m_maxSize = size_t(-1);
 }
 // Am I a file name only search ? This is to turn off term highlighting
--- a/src/rcldb/searchdata.h
+++ b/src/rcldb/searchdata.h
@ -75,7 +75,8 @@ class SearchData {
 public:
    SearchData(SClType tp) 
        : m_tp(tp), m_topdirexcl(false), m_topdirweight(1.0), 
-	  m_haveDates(false), m_haveWildCards(false) 
+	  m_haveDates(false), m_maxSize(size_t(-1)),
 	  m_minSize(size_t(-1)), m_haveWildCards(false) 
    {
 	if (m_tp != SCLT_OR && m_tp != SCLT_AND) 
 	    m_tp = SCLT_OR;
@ -114,6 +115,9 @@ public:
 	m_topdirweight = w;
    }
    void setMinSize(size_t size) {m_minSize = size;}
    void setMaxSize(size_t size) {m_maxSize = size;}
    /** Set date span for filtering results */
    void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;}
@ -154,6 +158,8 @@ private:
    float                     m_topdirweight; // affect weight instead of filter
    bool                      m_haveDates;
    DateInterval              m_dates; // Restrict to date interval
    size_t                    m_maxSize;
    size_t                    m_minSize;
    // Printable expanded version of the complete query, retrieved/set
    // from rcldb after the Xapian::setQuery() call
    string m_description;