search language: implemented filtering on file size

This commit is contained in:
Jean-Francois Dockes 2012-03-07 17:08:22 +01:00
parent 85166c93b2
commit 7ddbbb1ee8
8 changed files with 113 additions and 16 deletions

View File

@ -2125,6 +2125,17 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r
<filename>/usr/local/share/doc</filename> </para>
</listitem>
<listitem><para><literal>size</literal> for filtering the
results on file size. Exemple:
<literal>size<10000</literal>. You can use
<literal>&lt;</literal>, <literal>&gt;</literal> or
<literal>=</literal> as operators. You can specify a range like the
following: <literal>size>100 size<1000</literal>. The usual
<literal>k/K, m/M, g/G, t/T</literal> can be used as (decimal)
multipliers. Ex: <literal>size&gt;1k</literal> to search for files
bigger than 1000 bytes.</para>
</listitem>
<listitem><para><literal>date</literal> for searching or filtering
on dates. The syntax for the argument is based on the ISO8601
standard for dates and time intervals. Only dates are supported, no

View File

@ -139,7 +139,7 @@ static const char * parserExpr =
"(" //5
"([[:alpha:]][[:alnum:]:]*)" //6 Field spec: ie: "dc:title:letitre"
"[[:space:]]*"
"(:)" //7 Relation
"(:|=|>|<)" //7 Relation
"[[:space:]]*)?"
"(" //8
"(\"" //9

View File

@ -125,6 +125,46 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
continue;
}
// Handle "size" spec
if (!stringicmp("size", (*it)->m_fieldspec)) {
if ((*it)->m_op != WasaQuery::OP_LEAF) {
reason = "Negative size filtering not supported";
return 0;
}
char *cp;
size_t size = strtoll((*it)->m_value.c_str(), &cp, 10);
if (*cp != 0) {
switch (*cp) {
case 'k': case 'K': size *= 1E3;break;
case 'm': case 'M': size *= 1E6;break;
case 'g': case 'G': size *= 1E9;break;
case 't': case 'T': size *= 1E12;break;
default:
reason = string("Bad multiplier suffix: ") + *cp;
return 0;
}
}
switch ((*it)->m_rel) {
case WasaQuery::REL_EQUALS:
sdata->setMaxSize(size);
sdata->setMinSize(size);
break;
case WasaQuery::REL_LT:
case WasaQuery::REL_LTE:
sdata->setMaxSize(size);
break;
case WasaQuery::REL_GT:
case WasaQuery::REL_GTE:
sdata->setMinSize(size);
break;
default:
reason = "Bad relation operator with size query. Use > < or =";
return 0;
}
continue;
}
// "Regular" processing follows:
switch ((*it)->m_op) {
case WasaQuery::OP_NULL:

View File

@ -1257,8 +1257,13 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
if (doc.fbytes.empty())
doc.fbytes = doc.pcbytes;
if (!doc.fbytes.empty())
if (!doc.fbytes.empty()) {
RECORD_APPEND(record, Doc::keyfs, doc.fbytes);
leftzeropad(doc.fbytes, 12);
newdocument.add_value(VALUE_SIZE, doc.fbytes);
}
if (!doc.pcbytes.empty())
RECORD_APPEND(record, Doc::keypcs, doc.pcbytes);
char sizebuf[30];

View File

@ -57,6 +57,19 @@ class RclConfig;
namespace Rcl {
#endif
// Omega compatible values. We leave a hole for future omega values. Not sure
// it makes any sense to keep any level of omega compat given that the index
// is incompatible anyway.
enum value_slot {
// Omega-compatible values:
VALUE_LASTMOD = 0, // 4 byte big endian value - seconds since 1970.
VALUE_MD5 = 1, // 16 byte MD5 checksum of original document.
VALUE_SIZE = 2, // sortable_serialise(<file size in bytes>)
// Recoll only:
VALUE_SIG = 10 // Doc sig as chosen by app (ex: mtime+size
};
class SearchData;
class TermIter;
class Query;

View File

@ -25,19 +25,6 @@
namespace Rcl {
// Omega compatible values. We leave a hole for future omega values. Not sure
// it makes any sense to keep any level of omega compat given that the index
// is incompatible anyway.
enum value_slot {
// Omega-compatible values:
VALUE_LASTMOD = 0, // 4 byte big endian value - seconds since 1970.
VALUE_MD5 = 1, // 16 byte MD5 checksum of original document.
VALUE_SIZE = 2, // sortable_serialise(<file size in bytes>)
// Recoll only:
VALUE_SIG = 10 // Doc sig as chosen by app (ex: mtime+size
};
// Generic Xapian exception catching code. We do this quite often,
// and I have no idea how to do this except for a macro
#define XCATCHERROR(MSG) \

View File

@ -255,6 +255,39 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
}
}
if (m_minSize != size_t(-1) || m_maxSize != size_t(-1)) {
Xapian::Query sq;
char min[50], max[50];
sprintf(min, "%lld", (long long)m_minSize);
sprintf(max, "%lld", (long long)m_maxSize);
if (m_minSize == size_t(-1)) {
string value(max);
leftzeropad(value, 12);
sq = Xapian::Query(Xapian::Query::OP_VALUE_LE, VALUE_SIZE, value);
} else if (m_maxSize == size_t(-1)) {
string value(min);
leftzeropad(value, 12);
sq = Xapian::Query(Xapian::Query::OP_VALUE_GE, VALUE_SIZE, value);
} else {
string minvalue(min);
leftzeropad(minvalue, 12);
string maxvalue(max);
leftzeropad(maxvalue, 12);
sq = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, VALUE_SIZE,
minvalue, maxvalue);
}
// If no probabilistic query is provided then promote the
// filter to be THE query instead of filtering an empty query.
if (xq.empty()) {
LOGINFO(("Db::toNativeQuery: proba query is empty\n"));
xq = sq;
} else {
xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, sq);
}
}
// Add the file type filtering clause if any
if (!m_filetypes.empty()) {
expandFileTypes(db.getConf(), m_filetypes);
@ -452,6 +485,8 @@ void SearchData::erase() {
m_description.erase();
m_reason.erase();
m_haveDates = false;
m_minSize = size_t(-1);
m_maxSize = size_t(-1);
}
// Am I a file name only search ? This is to turn off term highlighting

View File

@ -75,7 +75,8 @@ class SearchData {
public:
SearchData(SClType tp)
: m_tp(tp), m_topdirexcl(false), m_topdirweight(1.0),
m_haveDates(false), m_haveWildCards(false)
m_haveDates(false), m_maxSize(size_t(-1)),
m_minSize(size_t(-1)), m_haveWildCards(false)
{
if (m_tp != SCLT_OR && m_tp != SCLT_AND)
m_tp = SCLT_OR;
@ -114,6 +115,9 @@ public:
m_topdirweight = w;
}
void setMinSize(size_t size) {m_minSize = size;}
void setMaxSize(size_t size) {m_maxSize = size;}
/** Set date span for filtering results */
void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;}
@ -154,6 +158,8 @@ private:
float m_topdirweight; // affect weight instead of filter
bool m_haveDates;
DateInterval m_dates; // Restrict to date interval
size_t m_maxSize;
size_t m_minSize;
// Printable expanded version of the complete query, retrieved/set
// from rcldb after the Xapian::setQuery() call
string m_description;