From ce9e9e4d009a27516849889f99660e957bda93e7 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Sun, 15 May 2011 09:29:24 +0200 Subject: [PATCH] query: support negative mime and catg clauses: -mime:text/plain --- src/query/wasatorcl.cpp | 20 +++++++++---- src/rcldb/searchdata.cpp | 64 ++++++++++++++++++++++++++++++---------- src/rcldb/searchdata.h | 6 ++++ src/sampleconf/mimeconf | 4 +-- 4 files changed, 70 insertions(+), 24 deletions(-) diff --git a/src/query/wasatorcl.cpp b/src/query/wasatorcl.cpp index 52f834d4..eacf4340 100644 --- a/src/query/wasatorcl.cpp +++ b/src/query/wasatorcl.cpp @@ -61,11 +61,14 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa, if (!stringicmp("mime", (*it)->m_fieldspec) || !stringicmp("format", (*it)->m_fieldspec)) { - if ((*it)->m_op != WasaQuery::OP_LEAF) { - reason = "Negative mime/format clauses not supported yet"; + if ((*it)->m_op == WasaQuery::OP_LEAF) { + sdata->addFiletype((*it)->m_value); + } else if ((*it)->m_op == WasaQuery::OP_EXCL) { + sdata->remFiletype((*it)->m_value); + } else { + reason = "internal error: mime clause neither leaf not excl??"; return 0; } - sdata->addFiletype((*it)->m_value); continue; } @@ -73,8 +76,10 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa, // categories like "audio", "presentation", etc. if (!stringicmp("rclcat", (*it)->m_fieldspec) || !stringicmp("type", (*it)->m_fieldspec)) { - if ((*it)->m_op != WasaQuery::OP_LEAF) { - reason = "Negative rclcat/type clauses not supported yet"; + if ((*it)->m_op != WasaQuery::OP_LEAF && + (*it)->m_op != WasaQuery::OP_EXCL) { + reason = "internal error: rclcat/type clause neither leaf" + "nor excl??"; return 0; } list mtypes; @@ -82,7 +87,10 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa, && !mtypes.empty()) { for (list::iterator mit = mtypes.begin(); mit != mtypes.end(); mit++) { - sdata->addFiletype(*mit); + if ((*it)->m_op == WasaQuery::OP_LEAF) + sdata->addFiletype(*mit); + else + sdata->remFiletype(*mit); } } else { reason = "Unknown rclcat/type value: no mime types found"; diff --git a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp index 8628cbb0..1a6a8946 100644 --- a/src/rcldb/searchdata.cpp +++ b/src/rcldb/searchdata.cpp @@ -17,6 +17,7 @@ // Handle translation from rcl's SearchData structures to Xapian Queries #include +#include #include #include @@ -135,6 +136,35 @@ date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2) return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end()); } +// Expand categories and mime type wild card exps +bool SearchData::expandFileTypes(RclConfig *cfg, vector& tps) +{ + if (!cfg) { + LOGFATAL(("Db::expandFileTypes: null configuration!!\n")); + return false; + } + vector exptps; + list alltypes = cfg->getAllMimeTypes(); + + for (vector::iterator it = tps.begin(); it != tps.end(); it++) { + if (cfg->isMimeCategory(*it)) { + listtps; + cfg->getMimeCatTypes(*it, tps); + exptps.insert(exptps.end(), tps.begin(), tps.end()); + } else { + for (list::const_iterator ait = alltypes.begin(); + ait != alltypes.end(); ait++) { + if (fnmatch(it->c_str(), ait->c_str(), FNM_CASEFOLD) + != FNM_NOMATCH) { + exptps.push_back(*ait); + } + } + } + } + tps = exptps; + return true; +} + bool SearchData::toNativeQuery(Rcl::Db &db, void *d) { Xapian::Query xq; @@ -220,24 +250,11 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d) // Add the file type filtering clause if any if (!m_filetypes.empty()) { - vector exptps; - exptps.reserve(m_filetypes.size()); - // Expand categories - RclConfig *cfg = db.getConf(); - for (vector::iterator it = m_filetypes.begin(); - it != m_filetypes.end(); it++) { - if (cfg && cfg->isMimeCategory(*it)) { - listtps; - cfg->getMimeCatTypes(*it, tps); - exptps.insert(exptps.end(), tps.begin(), tps.end()); - } else { - exptps.push_back(*it); - } - } + expandFileTypes(db.getConf(), m_filetypes); Xapian::Query tq; - for (vector::iterator it = exptps.begin(); - it != exptps.end(); it++) { + for (vector::iterator it = m_filetypes.begin(); + it != m_filetypes.end(); it++) { string term = "T" + *it; LOGDEB0(("Adding file type term: [%s]\n", term.c_str())); tq = tq.empty() ? Xapian::Query(term) : @@ -246,6 +263,21 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d) xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_FILTER, xq, tq); } + // Add the neg file type filtering clause if any + if (!m_nfiletypes.empty()) { + expandFileTypes(db.getConf(), m_nfiletypes); + + Xapian::Query tq; + for (vector::iterator it = m_nfiletypes.begin(); + it != m_nfiletypes.end(); it++) { + string term = "T" + *it; + LOGDEB0(("Adding negative file type term: [%s]\n", term.c_str())); + tq = tq.empty() ? Xapian::Query(term) : + Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term)); + } + xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_AND_NOT, xq, tq); + } + // Add the directory filtering clause if (!m_topdir.empty()) { vector vpath; diff --git a/src/rcldb/searchdata.h b/src/rcldb/searchdata.h index d2ac10ae..cb4e3bbb 100644 --- a/src/rcldb/searchdata.h +++ b/src/rcldb/searchdata.h @@ -31,6 +31,8 @@ #include "refcntr.h" #include "smallut.h" +class RclConfig; + #ifndef NO_NAMESPACES using std::vector; using std::string; @@ -113,6 +115,8 @@ public: /** Add file type for filtering results */ void addFiletype(const string& ft) {m_filetypes.push_back(ft);} + /** Add file type to not wanted list */ + void remFiletype(const string& ft) {m_nfiletypes.push_back(ft);} void setStemlang(const string& lang = "english") {m_stemlang = lang;} @@ -140,6 +144,7 @@ private: SClType m_tp; // Only SCLT_AND or SCLT_OR here vector m_query; vector m_filetypes; // Restrict to filetypes if set. + vector m_nfiletypes; // Unwanted file types string m_topdir; // Restrict to subtree. bool m_topdirexcl; // Invert meaning bool m_haveDates; @@ -150,6 +155,7 @@ private: string m_reason; bool m_haveWildCards; string m_stemlang; + bool expandFileTypes(RclConfig *cfg, vector& exptps); /* Copyconst and assignment private and forbidden */ SearchData(const SearchData &) {} SearchData& operator=(const SearchData&) {return *this;}; diff --git a/src/sampleconf/mimeconf b/src/sampleconf/mimeconf index a01bf763..6521d3d6 100644 --- a/src/sampleconf/mimeconf +++ b/src/sampleconf/mimeconf @@ -239,6 +239,7 @@ media = \ image/gif \ image/jpeg \ image/png \ + image/svg+xml \ image/tiff \ video/mp2p \ video/mp2t \ @@ -256,5 +257,4 @@ other = application/vnd.sun.xml.draw \ application/vnd.sun.xml.draw.template \ application/vnd.sun.xml.math \ application/x-fsdirectory \ - application/zip \ - image/svg+xml \ + application/zip