search language: implemented filtering on file size
This commit is contained in:
parent
85166c93b2
commit
7ddbbb1ee8
@ -2125,6 +2125,17 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r
|
||||
<filename>/usr/local/share/doc</filename> </para>
|
||||
</listitem>
|
||||
|
||||
<listitem><para><literal>size</literal> for filtering the
|
||||
results on file size. Exemple:
|
||||
<literal>size<10000</literal>. You can use
|
||||
<literal><</literal>, <literal>></literal> or
|
||||
<literal>=</literal> as operators. You can specify a range like the
|
||||
following: <literal>size>100 size<1000</literal>. The usual
|
||||
<literal>k/K, m/M, g/G, t/T</literal> can be used as (decimal)
|
||||
multipliers. Ex: <literal>size>1k</literal> to search for files
|
||||
bigger than 1000 bytes.</para>
|
||||
</listitem>
|
||||
|
||||
<listitem><para><literal>date</literal> for searching or filtering
|
||||
on dates. The syntax for the argument is based on the ISO8601
|
||||
standard for dates and time intervals. Only dates are supported, no
|
||||
|
||||
@ -139,7 +139,7 @@ static const char * parserExpr =
|
||||
"(" //5
|
||||
"([[:alpha:]][[:alnum:]:]*)" //6 Field spec: ie: "dc:title:letitre"
|
||||
"[[:space:]]*"
|
||||
"(:)" //7 Relation
|
||||
"(:|=|>|<)" //7 Relation
|
||||
"[[:space:]]*)?"
|
||||
"(" //8
|
||||
"(\"" //9
|
||||
|
||||
@ -125,6 +125,46 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle "size" spec
|
||||
if (!stringicmp("size", (*it)->m_fieldspec)) {
|
||||
if ((*it)->m_op != WasaQuery::OP_LEAF) {
|
||||
reason = "Negative size filtering not supported";
|
||||
return 0;
|
||||
}
|
||||
char *cp;
|
||||
size_t size = strtoll((*it)->m_value.c_str(), &cp, 10);
|
||||
if (*cp != 0) {
|
||||
switch (*cp) {
|
||||
case 'k': case 'K': size *= 1E3;break;
|
||||
case 'm': case 'M': size *= 1E6;break;
|
||||
case 'g': case 'G': size *= 1E9;break;
|
||||
case 't': case 'T': size *= 1E12;break;
|
||||
default:
|
||||
reason = string("Bad multiplier suffix: ") + *cp;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
switch ((*it)->m_rel) {
|
||||
case WasaQuery::REL_EQUALS:
|
||||
sdata->setMaxSize(size);
|
||||
sdata->setMinSize(size);
|
||||
break;
|
||||
case WasaQuery::REL_LT:
|
||||
case WasaQuery::REL_LTE:
|
||||
sdata->setMaxSize(size);
|
||||
break;
|
||||
case WasaQuery::REL_GT:
|
||||
case WasaQuery::REL_GTE:
|
||||
sdata->setMinSize(size);
|
||||
break;
|
||||
default:
|
||||
reason = "Bad relation operator with size query. Use > < or =";
|
||||
return 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// "Regular" processing follows:
|
||||
switch ((*it)->m_op) {
|
||||
case WasaQuery::OP_NULL:
|
||||
|
||||
@ -1257,8 +1257,13 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
|
||||
|
||||
if (doc.fbytes.empty())
|
||||
doc.fbytes = doc.pcbytes;
|
||||
if (!doc.fbytes.empty())
|
||||
|
||||
if (!doc.fbytes.empty()) {
|
||||
RECORD_APPEND(record, Doc::keyfs, doc.fbytes);
|
||||
leftzeropad(doc.fbytes, 12);
|
||||
newdocument.add_value(VALUE_SIZE, doc.fbytes);
|
||||
}
|
||||
|
||||
if (!doc.pcbytes.empty())
|
||||
RECORD_APPEND(record, Doc::keypcs, doc.pcbytes);
|
||||
char sizebuf[30];
|
||||
|
||||
@ -57,6 +57,19 @@ class RclConfig;
|
||||
namespace Rcl {
|
||||
#endif
|
||||
|
||||
// Omega compatible values. We leave a hole for future omega values. Not sure
|
||||
// it makes any sense to keep any level of omega compat given that the index
|
||||
// is incompatible anyway.
|
||||
enum value_slot {
|
||||
// Omega-compatible values:
|
||||
VALUE_LASTMOD = 0, // 4 byte big endian value - seconds since 1970.
|
||||
VALUE_MD5 = 1, // 16 byte MD5 checksum of original document.
|
||||
VALUE_SIZE = 2, // sortable_serialise(<file size in bytes>)
|
||||
|
||||
// Recoll only:
|
||||
VALUE_SIG = 10 // Doc sig as chosen by app (ex: mtime+size
|
||||
};
|
||||
|
||||
class SearchData;
|
||||
class TermIter;
|
||||
class Query;
|
||||
|
||||
@ -25,19 +25,6 @@
|
||||
|
||||
namespace Rcl {
|
||||
|
||||
// Omega compatible values. We leave a hole for future omega values. Not sure
|
||||
// it makes any sense to keep any level of omega compat given that the index
|
||||
// is incompatible anyway.
|
||||
enum value_slot {
|
||||
// Omega-compatible values:
|
||||
VALUE_LASTMOD = 0, // 4 byte big endian value - seconds since 1970.
|
||||
VALUE_MD5 = 1, // 16 byte MD5 checksum of original document.
|
||||
VALUE_SIZE = 2, // sortable_serialise(<file size in bytes>)
|
||||
|
||||
// Recoll only:
|
||||
VALUE_SIG = 10 // Doc sig as chosen by app (ex: mtime+size
|
||||
};
|
||||
|
||||
// Generic Xapian exception catching code. We do this quite often,
|
||||
// and I have no idea how to do this except for a macro
|
||||
#define XCATCHERROR(MSG) \
|
||||
|
||||
@ -255,6 +255,39 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (m_minSize != size_t(-1) || m_maxSize != size_t(-1)) {
|
||||
Xapian::Query sq;
|
||||
char min[50], max[50];
|
||||
sprintf(min, "%lld", (long long)m_minSize);
|
||||
sprintf(max, "%lld", (long long)m_maxSize);
|
||||
if (m_minSize == size_t(-1)) {
|
||||
string value(max);
|
||||
leftzeropad(value, 12);
|
||||
sq = Xapian::Query(Xapian::Query::OP_VALUE_LE, VALUE_SIZE, value);
|
||||
} else if (m_maxSize == size_t(-1)) {
|
||||
string value(min);
|
||||
leftzeropad(value, 12);
|
||||
sq = Xapian::Query(Xapian::Query::OP_VALUE_GE, VALUE_SIZE, value);
|
||||
} else {
|
||||
string minvalue(min);
|
||||
leftzeropad(minvalue, 12);
|
||||
string maxvalue(max);
|
||||
leftzeropad(maxvalue, 12);
|
||||
sq = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, VALUE_SIZE,
|
||||
minvalue, maxvalue);
|
||||
}
|
||||
|
||||
// If no probabilistic query is provided then promote the
|
||||
// filter to be THE query instead of filtering an empty query.
|
||||
if (xq.empty()) {
|
||||
LOGINFO(("Db::toNativeQuery: proba query is empty\n"));
|
||||
xq = sq;
|
||||
} else {
|
||||
xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, sq);
|
||||
}
|
||||
}
|
||||
|
||||
// Add the file type filtering clause if any
|
||||
if (!m_filetypes.empty()) {
|
||||
expandFileTypes(db.getConf(), m_filetypes);
|
||||
@ -452,6 +485,8 @@ void SearchData::erase() {
|
||||
m_description.erase();
|
||||
m_reason.erase();
|
||||
m_haveDates = false;
|
||||
m_minSize = size_t(-1);
|
||||
m_maxSize = size_t(-1);
|
||||
}
|
||||
|
||||
// Am I a file name only search ? This is to turn off term highlighting
|
||||
|
||||
@ -75,7 +75,8 @@ class SearchData {
|
||||
public:
|
||||
SearchData(SClType tp)
|
||||
: m_tp(tp), m_topdirexcl(false), m_topdirweight(1.0),
|
||||
m_haveDates(false), m_haveWildCards(false)
|
||||
m_haveDates(false), m_maxSize(size_t(-1)),
|
||||
m_minSize(size_t(-1)), m_haveWildCards(false)
|
||||
{
|
||||
if (m_tp != SCLT_OR && m_tp != SCLT_AND)
|
||||
m_tp = SCLT_OR;
|
||||
@ -114,6 +115,9 @@ public:
|
||||
m_topdirweight = w;
|
||||
}
|
||||
|
||||
void setMinSize(size_t size) {m_minSize = size;}
|
||||
void setMaxSize(size_t size) {m_maxSize = size;}
|
||||
|
||||
/** Set date span for filtering results */
|
||||
void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;}
|
||||
|
||||
@ -154,6 +158,8 @@ private:
|
||||
float m_topdirweight; // affect weight instead of filter
|
||||
bool m_haveDates;
|
||||
DateInterval m_dates; // Restrict to date interval
|
||||
size_t m_maxSize;
|
||||
size_t m_minSize;
|
||||
// Printable expanded version of the complete query, retrieved/set
|
||||
// from rcldb after the Xapian::setQuery() call
|
||||
string m_description;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user