diff --git a/src/doc/user/usermanual.sgml b/src/doc/user/usermanual.sgml index 275564c1..80797e7b 100644 --- a/src/doc/user/usermanual.sgml +++ b/src/doc/user/usermanual.sgml @@ -2125,6 +2125,17 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r /usr/local/share/doc + size for filtering the + results on file size. Exemple: + size<10000. You can use + <, > or + = as operators. You can specify a range like the + following: size>100 size<1000. The usual + k/K, m/M, g/G, t/T can be used as (decimal) + multipliers. Ex: size>1k to search for files + bigger than 1000 bytes. + + date for searching or filtering on dates. The syntax for the argument is based on the ISO8601 standard for dates and time intervals. Only dates are supported, no diff --git a/src/query/wasastringtoquery.cpp b/src/query/wasastringtoquery.cpp index 2938ee00..a2c1a3a3 100644 --- a/src/query/wasastringtoquery.cpp +++ b/src/query/wasastringtoquery.cpp @@ -139,7 +139,7 @@ static const char * parserExpr = "(" //5 "([[:alpha:]][[:alnum:]:]*)" //6 Field spec: ie: "dc:title:letitre" "[[:space:]]*" - "(:)" //7 Relation + "(:|=|>|<)" //7 Relation "[[:space:]]*)?" "(" //8 "(\"" //9 diff --git a/src/query/wasatorcl.cpp b/src/query/wasatorcl.cpp index bc4f7c7a..f7f51a32 100644 --- a/src/query/wasatorcl.cpp +++ b/src/query/wasatorcl.cpp @@ -125,6 +125,46 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa, continue; } + // Handle "size" spec + if (!stringicmp("size", (*it)->m_fieldspec)) { + if ((*it)->m_op != WasaQuery::OP_LEAF) { + reason = "Negative size filtering not supported"; + return 0; + } + char *cp; + size_t size = strtoll((*it)->m_value.c_str(), &cp, 10); + if (*cp != 0) { + switch (*cp) { + case 'k': case 'K': size *= 1E3;break; + case 'm': case 'M': size *= 1E6;break; + case 'g': case 'G': size *= 1E9;break; + case 't': case 'T': size *= 1E12;break; + default: + reason = string("Bad multiplier suffix: ") + *cp; + return 0; + } + } + + switch ((*it)->m_rel) { + case WasaQuery::REL_EQUALS: + sdata->setMaxSize(size); + sdata->setMinSize(size); + break; + case WasaQuery::REL_LT: + case WasaQuery::REL_LTE: + sdata->setMaxSize(size); + break; + case WasaQuery::REL_GT: + case WasaQuery::REL_GTE: + sdata->setMinSize(size); + break; + default: + reason = "Bad relation operator with size query. Use > < or ="; + return 0; + } + continue; + } + // "Regular" processing follows: switch ((*it)->m_op) { case WasaQuery::OP_NULL: diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index c83c043f..a4351395 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1257,8 +1257,13 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, if (doc.fbytes.empty()) doc.fbytes = doc.pcbytes; - if (!doc.fbytes.empty()) + + if (!doc.fbytes.empty()) { RECORD_APPEND(record, Doc::keyfs, doc.fbytes); + leftzeropad(doc.fbytes, 12); + newdocument.add_value(VALUE_SIZE, doc.fbytes); + } + if (!doc.pcbytes.empty()) RECORD_APPEND(record, Doc::keypcs, doc.pcbytes); char sizebuf[30]; diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index 8adffa48..26d1adc3 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -57,6 +57,19 @@ class RclConfig; namespace Rcl { #endif +// Omega compatible values. We leave a hole for future omega values. Not sure +// it makes any sense to keep any level of omega compat given that the index +// is incompatible anyway. +enum value_slot { + // Omega-compatible values: + VALUE_LASTMOD = 0, // 4 byte big endian value - seconds since 1970. + VALUE_MD5 = 1, // 16 byte MD5 checksum of original document. + VALUE_SIZE = 2, // sortable_serialise() + + // Recoll only: + VALUE_SIG = 10 // Doc sig as chosen by app (ex: mtime+size +}; + class SearchData; class TermIter; class Query; diff --git a/src/rcldb/rcldb_p.h b/src/rcldb/rcldb_p.h index 23db4fcf..ad229780 100644 --- a/src/rcldb/rcldb_p.h +++ b/src/rcldb/rcldb_p.h @@ -25,19 +25,6 @@ namespace Rcl { -// Omega compatible values. We leave a hole for future omega values. Not sure -// it makes any sense to keep any level of omega compat given that the index -// is incompatible anyway. -enum value_slot { - // Omega-compatible values: - VALUE_LASTMOD = 0, // 4 byte big endian value - seconds since 1970. - VALUE_MD5 = 1, // 16 byte MD5 checksum of original document. - VALUE_SIZE = 2, // sortable_serialise() - - // Recoll only: - VALUE_SIG = 10 // Doc sig as chosen by app (ex: mtime+size -}; - // Generic Xapian exception catching code. We do this quite often, // and I have no idea how to do this except for a macro #define XCATCHERROR(MSG) \ diff --git a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp index afc9578a..9b366d11 100644 --- a/src/rcldb/searchdata.cpp +++ b/src/rcldb/searchdata.cpp @@ -255,6 +255,39 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d) } } + + if (m_minSize != size_t(-1) || m_maxSize != size_t(-1)) { + Xapian::Query sq; + char min[50], max[50]; + sprintf(min, "%lld", (long long)m_minSize); + sprintf(max, "%lld", (long long)m_maxSize); + if (m_minSize == size_t(-1)) { + string value(max); + leftzeropad(value, 12); + sq = Xapian::Query(Xapian::Query::OP_VALUE_LE, VALUE_SIZE, value); + } else if (m_maxSize == size_t(-1)) { + string value(min); + leftzeropad(value, 12); + sq = Xapian::Query(Xapian::Query::OP_VALUE_GE, VALUE_SIZE, value); + } else { + string minvalue(min); + leftzeropad(minvalue, 12); + string maxvalue(max); + leftzeropad(maxvalue, 12); + sq = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, VALUE_SIZE, + minvalue, maxvalue); + } + + // If no probabilistic query is provided then promote the + // filter to be THE query instead of filtering an empty query. + if (xq.empty()) { + LOGINFO(("Db::toNativeQuery: proba query is empty\n")); + xq = sq; + } else { + xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, sq); + } + } + // Add the file type filtering clause if any if (!m_filetypes.empty()) { expandFileTypes(db.getConf(), m_filetypes); @@ -452,6 +485,8 @@ void SearchData::erase() { m_description.erase(); m_reason.erase(); m_haveDates = false; + m_minSize = size_t(-1); + m_maxSize = size_t(-1); } // Am I a file name only search ? This is to turn off term highlighting diff --git a/src/rcldb/searchdata.h b/src/rcldb/searchdata.h index cf390d1c..d872778c 100644 --- a/src/rcldb/searchdata.h +++ b/src/rcldb/searchdata.h @@ -75,7 +75,8 @@ class SearchData { public: SearchData(SClType tp) : m_tp(tp), m_topdirexcl(false), m_topdirweight(1.0), - m_haveDates(false), m_haveWildCards(false) + m_haveDates(false), m_maxSize(size_t(-1)), + m_minSize(size_t(-1)), m_haveWildCards(false) { if (m_tp != SCLT_OR && m_tp != SCLT_AND) m_tp = SCLT_OR; @@ -114,6 +115,9 @@ public: m_topdirweight = w; } + void setMinSize(size_t size) {m_minSize = size;} + void setMaxSize(size_t size) {m_maxSize = size;} + /** Set date span for filtering results */ void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;} @@ -154,6 +158,8 @@ private: float m_topdirweight; // affect weight instead of filter bool m_haveDates; DateInterval m_dates; // Restrict to date interval + size_t m_maxSize; + size_t m_minSize; // Printable expanded version of the complete query, retrieved/set // from rcldb after the Xapian::setQuery() call string m_description;