implemented filtering on file subtree
This commit is contained in:
parent
ce740a26ad
commit
9fb52e83ec
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.8 2005-10-17 13:36:53 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.9 2005-10-19 14:14:17 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
#include <unistd.h>
|
||||
|
||||
@ -81,12 +81,15 @@ static ConfSimple::WalkerCode mtypesWalker(void *l,
|
||||
return ConfSimple::WALK_CONTINUE;
|
||||
}
|
||||
|
||||
#include "idfile.h"
|
||||
std::list<string> RclConfig::getAllMimeTypes()
|
||||
{
|
||||
std::list <string> lst;
|
||||
std::list<string> lst;
|
||||
if (mimemap == 0)
|
||||
return lst;
|
||||
mimemap->sortwalk(mtypesWalker, &lst);
|
||||
std::list<string> l1 = idFileAllTypes();
|
||||
lst.insert(lst.end(), l1.begin(), l1.end());
|
||||
lst.sort();
|
||||
lst.unique();
|
||||
return lst;
|
||||
|
||||
@ -22,13 +22,13 @@ using std::string;
|
||||
#include "rclconfig.h"
|
||||
#include "debuglog.h"
|
||||
|
||||
|
||||
extern RclConfig *rclconfig;
|
||||
|
||||
// Constructor/initialization
|
||||
void advsearch::init()
|
||||
{
|
||||
list<string> types = rclconfig->getAllMimeTypes();
|
||||
|
||||
QStringList ql;
|
||||
for (list<string>::iterator it = types.begin(); it != types.end(); it++) {
|
||||
ql.append(it->c_str());
|
||||
|
||||
@ -304,13 +304,14 @@
|
||||
<slot>clearqPB_clicked()</slot>
|
||||
<slot>listPrevPB_clicked()</slot>
|
||||
<slot>listNextPB_clicked()</slot>
|
||||
<slot>advSearchPB_clicked()</slot>
|
||||
<slot>previewClosed( Preview * w )</slot>
|
||||
<slot>advSearchPB_clicked()</slot>
|
||||
<slot>startAdvSearch( Rcl::AdvSearchData sdata )</slot>
|
||||
</slots>
|
||||
<functions>
|
||||
<function access="private">init()</function>
|
||||
<function access="private" returnType="bool">eventFilter( QObject * target, QEvent * event )</function>
|
||||
<function returnType="bool">close( bool alsoDelete )</function>
|
||||
</functions>
|
||||
<pixmapinproject/>
|
||||
<layoutdefaults spacing="6" margin="11"/>
|
||||
|
||||
@ -50,6 +50,16 @@ void RecollMain::init()
|
||||
asearchform = 0;
|
||||
}
|
||||
|
||||
// We also want to get rid of the advanced search form when we exit
|
||||
// (it's not our children so that it's not systematically created over
|
||||
// the main form).
|
||||
bool RecollMain::close( bool alsoDelete )
|
||||
{
|
||||
if (asearchform)
|
||||
delete asearchform;
|
||||
return QWidget::close(alsoDelete);
|
||||
}
|
||||
|
||||
// We want to catch ^Q everywhere to mean quit.
|
||||
bool RecollMain::eventFilter( QObject * target, QEvent * event )
|
||||
{
|
||||
@ -65,6 +75,8 @@ bool RecollMain::eventFilter( QObject * target, QEvent * event )
|
||||
void RecollMain::fileExit()
|
||||
{
|
||||
LOGDEB1(("RecollMain: fileExit\n"));
|
||||
if (asearchform)
|
||||
delete asearchform;
|
||||
exit(0);
|
||||
}
|
||||
|
||||
@ -349,8 +361,8 @@ void RecollMain::listNextPB_clicked()
|
||||
if (i == 0) {
|
||||
reslistTE->append("<qt><head></head><body><p>");
|
||||
char line[80];
|
||||
sprintf(line, "<p><b>Displaying results %d-%d out of %d</b><br>",
|
||||
reslist_winfirst+1, reslist_winfirst+last, resCnt);
|
||||
sprintf(line, "<p><b>Displaying results starting at index %d (maximum set size %d)</b><br>",
|
||||
reslist_winfirst+1, resCnt);
|
||||
reslistTE->append(line);
|
||||
}
|
||||
|
||||
@ -422,7 +434,7 @@ void RecollMain::previewClosed(Preview *w)
|
||||
void RecollMain::advSearchPB_clicked()
|
||||
{
|
||||
if (asearchform == 0) {
|
||||
asearchform = new advsearch(this, "Advanced search", FALSE,
|
||||
asearchform = new advsearch(0, "Advanced search", FALSE,
|
||||
WStyle_Customize | WStyle_NormalBorder |
|
||||
WStyle_Title | WStyle_SysMenu);
|
||||
asearchform->setSizeGripEnabled(FALSE);
|
||||
@ -455,3 +467,5 @@ void RecollMain::startAdvSearch(Rcl::AdvSearchData sdata)
|
||||
curPreview = 0;
|
||||
listNextPB_clicked();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.29 2005-10-19 10:21:47 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.30 2005-10-19 14:14:17 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <sys/stat.h>
|
||||
@ -42,8 +42,7 @@ class Native {
|
||||
Xapian::Enquire *enquire;
|
||||
Xapian::MSet mset;
|
||||
|
||||
Native() : isopen(false), iswritable(false), enquire(0) {
|
||||
}
|
||||
Native() : isopen(false), iswritable(false), enquire(0) { }
|
||||
~Native() {
|
||||
delete enquire;
|
||||
}
|
||||
@ -388,7 +387,6 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc)
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool Rcl::Db::needUpdate(const string &filename, const struct stat *stp)
|
||||
{
|
||||
if (pdata == 0)
|
||||
@ -611,35 +609,9 @@ bool Rcl::Db::purge()
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
#include <vector>
|
||||
|
||||
class wsQData : public TextSplitCB {
|
||||
public:
|
||||
vector<string> terms;
|
||||
string catterms() {
|
||||
string s;
|
||||
for (unsigned int i=0;i<terms.size();i++) {
|
||||
s += "[" + terms[i] + "] ";
|
||||
}
|
||||
return s;
|
||||
}
|
||||
bool takeword(const std::string &term, int , int, int) {
|
||||
LOGDEB1(("wsQData::takeword: %s\n", term.c_str()));
|
||||
terms.push_back(term);
|
||||
return true;
|
||||
}
|
||||
void dumball() {
|
||||
for (vector<string>::iterator it=terms.begin(); it !=terms.end();it++){
|
||||
string dumb;
|
||||
Rcl::dumb_string(*it, dumb);
|
||||
*it = dumb;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// Expand term to list of all terms which stem to the same term.
|
||||
/**
|
||||
* Expand term to list of all terms which stem to the same term.
|
||||
*/
|
||||
static list<string> stemexpand(Native *ndb, string term, const string& lang)
|
||||
{
|
||||
list<string> explist;
|
||||
@ -687,10 +659,38 @@ static list<string> stemexpand(Native *ndb, string term, const string& lang)
|
||||
return explist;
|
||||
}
|
||||
|
||||
|
||||
class wsQData : public TextSplitCB {
|
||||
public:
|
||||
vector<string> terms;
|
||||
string catterms() {
|
||||
string s;
|
||||
for (unsigned int i=0;i<terms.size();i++) {
|
||||
s += "[" + terms[i] + "] ";
|
||||
}
|
||||
return s;
|
||||
}
|
||||
bool takeword(const std::string &term, int , int, int) {
|
||||
LOGDEB1(("wsQData::takeword: %s\n", term.c_str()));
|
||||
terms.push_back(term);
|
||||
return true;
|
||||
}
|
||||
void dumball() {
|
||||
for (vector<string>::iterator it=terms.begin(); it !=terms.end();it++){
|
||||
string dumb;
|
||||
Rcl::dumb_string(*it, dumb);
|
||||
*it = dumb;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
///
|
||||
// Turn string into possibly complex xapian query. There is little
|
||||
// interpretation done on the string (no +term -term or filename:term
|
||||
// stuff). We just separate words and phrases, and interpret
|
||||
// capitalized terms as wanting no stem expansion
|
||||
//
|
||||
static void stringToXapianQueries(const string &iq,
|
||||
const string& stemlang,
|
||||
Native *ndb,
|
||||
@ -762,6 +762,7 @@ static void stringToXapianQueries(const string &iq,
|
||||
}
|
||||
}
|
||||
|
||||
// Prepare query out of simple query string
|
||||
bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts,
|
||||
const string& stemlang)
|
||||
{
|
||||
@ -771,6 +772,8 @@ bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts,
|
||||
if (!ndb)
|
||||
return false;
|
||||
|
||||
asdata.erase();
|
||||
dbindices.clear();
|
||||
list<Xapian::Query> pqueries;
|
||||
stringToXapianQueries(iqstring, stemlang, ndb, pqueries, opts);
|
||||
ndb->query = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
||||
@ -782,6 +785,7 @@ bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts,
|
||||
return true;
|
||||
}
|
||||
|
||||
// Prepare query out of "advanced search" data
|
||||
bool Rcl::Db::setQuery(AdvSearchData &sdata, const string& stemlang)
|
||||
{
|
||||
LOGDEB(("Rcl::Db::setQuery: adv:\n"));
|
||||
@ -797,10 +801,12 @@ bool Rcl::Db::setQuery(AdvSearchData &sdata, const string& stemlang)
|
||||
if (!sdata.topdir.empty())
|
||||
LOGDEB((" restricted to: %s\n", sdata.topdir.c_str()));
|
||||
|
||||
asdata = sdata;
|
||||
dbindices.clear();
|
||||
|
||||
Native *ndb = (Native *)pdata;
|
||||
if (!ndb)
|
||||
return false;
|
||||
|
||||
list<Xapian::Query> pqueries;
|
||||
Xapian::Query xq;
|
||||
|
||||
@ -896,25 +902,91 @@ int Rcl::Db::getResCnt()
|
||||
return ndb->mset.get_matches_lower_bound();
|
||||
}
|
||||
|
||||
// This class (friend to RclDb) exists so that we can have functions that
|
||||
// access private RclDb data and have Xapian-specific parameters (so that we
|
||||
// don't want them to appear in the public rcldb.h).
|
||||
class Rcl::DbPops {
|
||||
public:
|
||||
static bool filterMatch(Rcl::Db *rdb, Xapian::Document &xdoc) {
|
||||
// Parse xapian document's data and populate doc fields
|
||||
string data = xdoc.get_data();
|
||||
ConfSimple parms(&data);
|
||||
|
||||
// The only filtering for now is on file path (subtree)
|
||||
string url;
|
||||
parms.get(string("url"), url);
|
||||
url = url.substr(7);
|
||||
if (url.find(rdb->asdata.topdir) == 0)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// Get document at rank i in query (i is the index in the whole result
|
||||
// set, as in the enquire class. We check if the current mset has the
|
||||
// doc, else ask for an other one. We use msets of 10 documents. Don't
|
||||
// know if the whole thing makes sense at all but it seems to work.
|
||||
bool Rcl::Db::getDoc(int i, Doc &doc, int *percent)
|
||||
//
|
||||
// If there is a postquery filter (ie: file names), we have to
|
||||
// maintain a correspondance from the sequential external index
|
||||
// sequence to the internal Xapian hole-y one (the holes being the documents
|
||||
// that dont match the filter).
|
||||
bool Rcl::Db::getDoc(int exti, Doc &doc, int *percent)
|
||||
{
|
||||
LOGDEB1(("Rcl::Db::getDoc: %d\n", i));
|
||||
const int qquantum = 30;
|
||||
LOGDEB1(("Rcl::Db::getDoc: exti %d\n", exti));
|
||||
Native *ndb = (Native *)pdata;
|
||||
if (!ndb || !ndb->enquire) {
|
||||
LOGERR(("Rcl::Db::getDoc: no query opened\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
// For now the only post-query filter is on dir subtree
|
||||
bool postqfilter = !asdata.topdir.empty();
|
||||
LOGDEB1(("Topdir %s postqflt %d\n", asdata.topdir.c_str(), postqfilter));
|
||||
|
||||
int xapi;
|
||||
if (postqfilter) {
|
||||
// There is a postquery filter, does this fall in already known area ?
|
||||
if (exti >= (int)dbindices.size()) {
|
||||
// Have to fetch xapian docs and filter until we get
|
||||
// enough or fail
|
||||
dbindices.reserve(exti+1);
|
||||
// First xapian doc we fetch is the one after last stored
|
||||
int first = dbindices.size() > 0 ? dbindices.back() + 1 : 0;
|
||||
// Loop until we get enough docs
|
||||
while (exti >= (int)dbindices.size()) {
|
||||
LOGDEB(("Rcl::Db::getDoc: fetching %d starting at %d\n",
|
||||
qquantum, first));
|
||||
ndb->mset = ndb->enquire->get_mset(first, qquantum);
|
||||
if (ndb->mset.empty()) {
|
||||
LOGDEB(("Rcl::Db::getDoc: got empty mset\n"));
|
||||
return false;
|
||||
}
|
||||
first = ndb->mset.get_firstitem();
|
||||
for (unsigned int i = 0; i < ndb->mset.size() ; i++) {
|
||||
LOGDEB(("Rcl::Db::getDoc: [%d]\n", i));
|
||||
Xapian::Document xdoc = ndb->mset[i].get_document();
|
||||
if (Rcl::DbPops::filterMatch(this, xdoc)) {
|
||||
dbindices.push_back(first + i);
|
||||
}
|
||||
}
|
||||
first = first + ndb->mset.size();
|
||||
}
|
||||
}
|
||||
xapi = dbindices[exti];
|
||||
} else {
|
||||
xapi = exti;
|
||||
}
|
||||
|
||||
|
||||
// From there on, we work with a xapian enquire item number. Fetch it
|
||||
int first = ndb->mset.get_firstitem();
|
||||
int last = first + ndb->mset.size() -1;
|
||||
|
||||
if (!(i >= first && i <= last)) {
|
||||
LOGDEB1(("Fetching for first %d, count 10\n", i));
|
||||
ndb->mset = ndb->enquire->get_mset(i, 10);
|
||||
if (!(xapi >= first && xapi <= last)) {
|
||||
LOGDEB(("Fetching for first %d, count %d\n", xapi, qquantum));
|
||||
ndb->mset = ndb->enquire->get_mset(xapi, qquantum);
|
||||
if (ndb->mset.empty())
|
||||
return false;
|
||||
first = ndb->mset.get_firstitem();
|
||||
@ -926,9 +998,9 @@ bool Rcl::Db::getDoc(int i, Doc &doc, int *percent)
|
||||
first, last,
|
||||
ndb->mset.get_matches_lower_bound()));
|
||||
|
||||
Xapian::Document xdoc = ndb->mset[i-first].get_document();
|
||||
Xapian::Document xdoc = ndb->mset[xapi-first].get_document();
|
||||
if (percent)
|
||||
*percent = ndb->mset.convert_to_percent(ndb->mset[i-first]);
|
||||
*percent = ndb->mset.convert_to_percent(ndb->mset[xapi-first]);
|
||||
|
||||
// Parse xapian document's data and populate doc fields
|
||||
string data = xdoc.get_data();
|
||||
|
||||
@ -1,13 +1,14 @@
|
||||
#ifndef _DB_H_INCLUDED_
|
||||
#define _DB_H_INCLUDED_
|
||||
/* @(#$Id: rcldb.h,v 1.14 2005-10-19 10:21:47 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: rcldb.h,v 1.15 2005-10-19 14:14:17 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
|
||||
#include <vector>
|
||||
#ifndef NO_NAMESPACES
|
||||
using std::string;
|
||||
using std::list;
|
||||
using std::vector;
|
||||
#endif
|
||||
|
||||
// rcldb defines an interface for a 'real' text database. The current
|
||||
@ -71,14 +72,24 @@ class AdvSearchData {
|
||||
string nowords;
|
||||
list<string> filetypes; // restrict to types. Empty if inactive
|
||||
string topdir; // restrict to subtree. Empty if inactive
|
||||
|
||||
void erase() {
|
||||
allwords.erase();phrase.erase();orwords.erase();nowords.erase();
|
||||
filetypes.clear(); topdir.erase();
|
||||
}
|
||||
};
|
||||
|
||||
class DbPops;
|
||||
|
||||
/**
|
||||
* Wrapper class for the native database.
|
||||
*/
|
||||
class Db {
|
||||
void *pdata;
|
||||
Doc curdoc;
|
||||
AdvSearchData asdata;
|
||||
vector<int> dbindices; // In case there is a postq filter: sequence of
|
||||
// db indices that match
|
||||
void *pdata; // Pointer to private data. We don't want db(ie
|
||||
// xapian)-specific defs to show in here
|
||||
public:
|
||||
Db();
|
||||
~Db();
|
||||
@ -104,10 +115,16 @@ class Db {
|
||||
|
||||
// Get document at rank i. This is probably vastly inferior to the type
|
||||
// of interface in Xapian, but we have to start with something simple
|
||||
// to experiment with the GUI
|
||||
// to experiment with the GUI. i is sequential from 0 to some value
|
||||
bool getDoc(int i, Doc &doc, int *percent = 0);
|
||||
// Get results count
|
||||
int getResCnt();
|
||||
|
||||
friend class Rcl::DbPops;
|
||||
private:
|
||||
/* Copyconst and assignemt private and forbidden */
|
||||
Db(const Db &) {}
|
||||
Db & operator=(const Db &) {return *this;};
|
||||
};
|
||||
|
||||
// Unaccent and lowercase data.
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: idfile.cpp,v 1.1 2005-04-07 09:05:39 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: idfile.cpp,v 1.2 2005-10-19 14:14:17 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
#ifndef TEST_IDFILE
|
||||
#include <unistd.h> // for access(2)
|
||||
@ -8,10 +8,18 @@ static char rcsid[] = "@(#$Id: idfile.cpp,v 1.1 2005-04-07 09:05:39 dockes Exp $
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
|
||||
#include "idfile.h"
|
||||
#include "debuglog.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
std::list<string> idFileAllTypes()
|
||||
{
|
||||
std::list<string> lst;
|
||||
lst.push_back("text/x-mail");
|
||||
lst.push_back("message/rfc822");
|
||||
return lst;
|
||||
}
|
||||
|
||||
// Mail headers we compare to:
|
||||
static const char *mailhs[] = {"From: ", "Received: ", "Message-Id: ", "To: ",
|
||||
|
||||
@ -1,12 +1,16 @@
|
||||
#ifndef _IDFILE_H_INCLUDED_
|
||||
#define _IDFILE_H_INCLUDED_
|
||||
/* @(#$Id: idfile.h,v 1.1 2005-04-07 09:05:39 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: idfile.h,v 1.2 2005-10-19 14:14:17 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
|
||||
// Return mime type for file or empty string. The system's file utility does
|
||||
// a bad job on mail folders. idFile only looks for mail file types for now,
|
||||
// but this may change
|
||||
extern std::string idFile(const char *fn);
|
||||
|
||||
// Return all types known to us
|
||||
extern std::list<std::string> idFileAllTypes();
|
||||
|
||||
#endif /* _IDFILE_H_INCLUDED_ */
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user