search language: implemented filtering on file size

This commit is contained in:
Jean-Francois Dockes 2012-03-07 17:08:22 +01:00
parent 85166c93b2
commit 7ddbbb1ee8
8 changed files with 113 additions and 16 deletions

View File

@ -2125,6 +2125,17 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r
<filename>/usr/local/share/doc</filename> </para> <filename>/usr/local/share/doc</filename> </para>
</listitem> </listitem>
<listitem><para><literal>size</literal> for filtering the
results on file size. Exemple:
<literal>size<10000</literal>. You can use
<literal>&lt;</literal>, <literal>&gt;</literal> or
<literal>=</literal> as operators. You can specify a range like the
following: <literal>size>100 size<1000</literal>. The usual
<literal>k/K, m/M, g/G, t/T</literal> can be used as (decimal)
multipliers. Ex: <literal>size&gt;1k</literal> to search for files
bigger than 1000 bytes.</para>
</listitem>
<listitem><para><literal>date</literal> for searching or filtering <listitem><para><literal>date</literal> for searching or filtering
on dates. The syntax for the argument is based on the ISO8601 on dates. The syntax for the argument is based on the ISO8601
standard for dates and time intervals. Only dates are supported, no standard for dates and time intervals. Only dates are supported, no

View File

@ -139,7 +139,7 @@ static const char * parserExpr =
"(" //5 "(" //5
"([[:alpha:]][[:alnum:]:]*)" //6 Field spec: ie: "dc:title:letitre" "([[:alpha:]][[:alnum:]:]*)" //6 Field spec: ie: "dc:title:letitre"
"[[:space:]]*" "[[:space:]]*"
"(:)" //7 Relation "(:|=|>|<)" //7 Relation
"[[:space:]]*)?" "[[:space:]]*)?"
"(" //8 "(" //8
"(\"" //9 "(\"" //9

View File

@ -125,6 +125,46 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
continue; continue;
} }
// Handle "size" spec
if (!stringicmp("size", (*it)->m_fieldspec)) {
if ((*it)->m_op != WasaQuery::OP_LEAF) {
reason = "Negative size filtering not supported";
return 0;
}
char *cp;
size_t size = strtoll((*it)->m_value.c_str(), &cp, 10);
if (*cp != 0) {
switch (*cp) {
case 'k': case 'K': size *= 1E3;break;
case 'm': case 'M': size *= 1E6;break;
case 'g': case 'G': size *= 1E9;break;
case 't': case 'T': size *= 1E12;break;
default:
reason = string("Bad multiplier suffix: ") + *cp;
return 0;
}
}
switch ((*it)->m_rel) {
case WasaQuery::REL_EQUALS:
sdata->setMaxSize(size);
sdata->setMinSize(size);
break;
case WasaQuery::REL_LT:
case WasaQuery::REL_LTE:
sdata->setMaxSize(size);
break;
case WasaQuery::REL_GT:
case WasaQuery::REL_GTE:
sdata->setMinSize(size);
break;
default:
reason = "Bad relation operator with size query. Use > < or =";
return 0;
}
continue;
}
// "Regular" processing follows: // "Regular" processing follows:
switch ((*it)->m_op) { switch ((*it)->m_op) {
case WasaQuery::OP_NULL: case WasaQuery::OP_NULL:

View File

@ -1257,8 +1257,13 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
if (doc.fbytes.empty()) if (doc.fbytes.empty())
doc.fbytes = doc.pcbytes; doc.fbytes = doc.pcbytes;
if (!doc.fbytes.empty())
if (!doc.fbytes.empty()) {
RECORD_APPEND(record, Doc::keyfs, doc.fbytes); RECORD_APPEND(record, Doc::keyfs, doc.fbytes);
leftzeropad(doc.fbytes, 12);
newdocument.add_value(VALUE_SIZE, doc.fbytes);
}
if (!doc.pcbytes.empty()) if (!doc.pcbytes.empty())
RECORD_APPEND(record, Doc::keypcs, doc.pcbytes); RECORD_APPEND(record, Doc::keypcs, doc.pcbytes);
char sizebuf[30]; char sizebuf[30];

View File

@ -57,6 +57,19 @@ class RclConfig;
namespace Rcl { namespace Rcl {
#endif #endif
// Omega compatible values. We leave a hole for future omega values. Not sure
// it makes any sense to keep any level of omega compat given that the index
// is incompatible anyway.
enum value_slot {
// Omega-compatible values:
VALUE_LASTMOD = 0, // 4 byte big endian value - seconds since 1970.
VALUE_MD5 = 1, // 16 byte MD5 checksum of original document.
VALUE_SIZE = 2, // sortable_serialise(<file size in bytes>)
// Recoll only:
VALUE_SIG = 10 // Doc sig as chosen by app (ex: mtime+size
};
class SearchData; class SearchData;
class TermIter; class TermIter;
class Query; class Query;

View File

@ -25,19 +25,6 @@
namespace Rcl { namespace Rcl {
// Omega compatible values. We leave a hole for future omega values. Not sure
// it makes any sense to keep any level of omega compat given that the index
// is incompatible anyway.
enum value_slot {
// Omega-compatible values:
VALUE_LASTMOD = 0, // 4 byte big endian value - seconds since 1970.
VALUE_MD5 = 1, // 16 byte MD5 checksum of original document.
VALUE_SIZE = 2, // sortable_serialise(<file size in bytes>)
// Recoll only:
VALUE_SIG = 10 // Doc sig as chosen by app (ex: mtime+size
};
// Generic Xapian exception catching code. We do this quite often, // Generic Xapian exception catching code. We do this quite often,
// and I have no idea how to do this except for a macro // and I have no idea how to do this except for a macro
#define XCATCHERROR(MSG) \ #define XCATCHERROR(MSG) \

View File

@ -255,6 +255,39 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
} }
} }
if (m_minSize != size_t(-1) || m_maxSize != size_t(-1)) {
Xapian::Query sq;
char min[50], max[50];
sprintf(min, "%lld", (long long)m_minSize);
sprintf(max, "%lld", (long long)m_maxSize);
if (m_minSize == size_t(-1)) {
string value(max);
leftzeropad(value, 12);
sq = Xapian::Query(Xapian::Query::OP_VALUE_LE, VALUE_SIZE, value);
} else if (m_maxSize == size_t(-1)) {
string value(min);
leftzeropad(value, 12);
sq = Xapian::Query(Xapian::Query::OP_VALUE_GE, VALUE_SIZE, value);
} else {
string minvalue(min);
leftzeropad(minvalue, 12);
string maxvalue(max);
leftzeropad(maxvalue, 12);
sq = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, VALUE_SIZE,
minvalue, maxvalue);
}
// If no probabilistic query is provided then promote the
// filter to be THE query instead of filtering an empty query.
if (xq.empty()) {
LOGINFO(("Db::toNativeQuery: proba query is empty\n"));
xq = sq;
} else {
xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, sq);
}
}
// Add the file type filtering clause if any // Add the file type filtering clause if any
if (!m_filetypes.empty()) { if (!m_filetypes.empty()) {
expandFileTypes(db.getConf(), m_filetypes); expandFileTypes(db.getConf(), m_filetypes);
@ -452,6 +485,8 @@ void SearchData::erase() {
m_description.erase(); m_description.erase();
m_reason.erase(); m_reason.erase();
m_haveDates = false; m_haveDates = false;
m_minSize = size_t(-1);
m_maxSize = size_t(-1);
} }
// Am I a file name only search ? This is to turn off term highlighting // Am I a file name only search ? This is to turn off term highlighting

View File

@ -75,7 +75,8 @@ class SearchData {
public: public:
SearchData(SClType tp) SearchData(SClType tp)
: m_tp(tp), m_topdirexcl(false), m_topdirweight(1.0), : m_tp(tp), m_topdirexcl(false), m_topdirweight(1.0),
m_haveDates(false), m_haveWildCards(false) m_haveDates(false), m_maxSize(size_t(-1)),
m_minSize(size_t(-1)), m_haveWildCards(false)
{ {
if (m_tp != SCLT_OR && m_tp != SCLT_AND) if (m_tp != SCLT_OR && m_tp != SCLT_AND)
m_tp = SCLT_OR; m_tp = SCLT_OR;
@ -114,6 +115,9 @@ public:
m_topdirweight = w; m_topdirweight = w;
} }
void setMinSize(size_t size) {m_minSize = size;}
void setMaxSize(size_t size) {m_maxSize = size;}
/** Set date span for filtering results */ /** Set date span for filtering results */
void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;} void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;}
@ -154,6 +158,8 @@ private:
float m_topdirweight; // affect weight instead of filter float m_topdirweight; // affect weight instead of filter
bool m_haveDates; bool m_haveDates;
DateInterval m_dates; // Restrict to date interval DateInterval m_dates; // Restrict to date interval
size_t m_maxSize;
size_t m_minSize;
// Printable expanded version of the complete query, retrieved/set // Printable expanded version of the complete query, retrieved/set
// from rcldb after the Xapian::setQuery() call // from rcldb after the Xapian::setQuery() call
string m_description; string m_description;