GUI directory side filter: compute the directories from the index, not the FS tree, so that things work with external indexes
This commit is contained in:
parent
be758e2c94
commit
2ebb0a689d
@ -9,6 +9,7 @@
|
||||
#include "fstreewalk.h"
|
||||
#include "idxmodel.h"
|
||||
|
||||
#undef USE_TREEWALK
|
||||
|
||||
class WalkerCB : public FsTreeWalkerCB {
|
||||
public:
|
||||
@ -45,6 +46,7 @@ FsTreeWalker::Status WalkerCB::processone(
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
if (flg == FsTreeWalker::FtwDirEnter) {
|
||||
//std::cerr << "ENTER: " << path << "\n";
|
||||
if (m_model->columnCount(m_indexes.top()) == 0) {
|
||||
if (!m_model->insertColumn(0, m_indexes.top()))
|
||||
return FsTreeWalker::FtwError;
|
||||
@ -70,6 +72,7 @@ FsTreeWalker::Status WalkerCB::processone(
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
|
||||
#ifdef USE_TREEWALK
|
||||
static void populateDir(RclConfig *config, const std::string& topstr, IdxTreeModel *model,
|
||||
const QModelIndex& index, const std::string& path, int depth)
|
||||
{
|
||||
@ -81,19 +84,70 @@ static void populateDir(RclConfig *config, const std::string& topstr, IdxTreeMod
|
||||
walker.walk(path, cb);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// Assemble a path from its components up to lst
|
||||
std::string toksToPath(std::vector<std::string>& path, int lst)
|
||||
{
|
||||
std::string out;
|
||||
for (int i = 0; i <= lst; i++) {
|
||||
out += "/" + path[i];
|
||||
}
|
||||
if (out.empty())
|
||||
out = "/";
|
||||
return out;
|
||||
}
|
||||
|
||||
// Process a sorted list of directory paths, generating a sequence of enter/exit calls equivalent to
|
||||
// what would happen for a recursive tree walk of the original tree.
|
||||
static void treelist(const std::string& top, const std::vector<std::string>& lst, WalkerCB &cb)
|
||||
{
|
||||
if (lst.empty()) {
|
||||
return;
|
||||
}
|
||||
std::vector<std::string> curpath;
|
||||
stringToTokens(top, curpath, "/");
|
||||
std::cerr << "top " << top << " TOP len is " << curpath.size() << "\n";
|
||||
for (const auto& dir : lst) {
|
||||
// std::cerr << "DIR: " << dir << "\n";
|
||||
std::vector<std::string> npath;
|
||||
// Compute the new directory stack
|
||||
stringToTokens(dir, npath, "/");
|
||||
// Walk the stacks until we find a differing entry, and then unwind the old stack to the new
|
||||
// base, and issue enter calls for new entries over the base.
|
||||
int i = 0;
|
||||
for (; i < int(std::min(curpath.size(), npath.size())); i++) {
|
||||
if (npath[i] != curpath[i] && int(curpath.size()) > 0) {
|
||||
// Differing at i, unwind old stack and break the main loop
|
||||
for (int j = int(curpath.size()) - 1; j >= i; j--) {
|
||||
//std::cerr << "Exiting " << toksToPath(curpath, j) << "\n";
|
||||
cb.processone(toksToPath(curpath, j), nullptr, FsTreeWalker::FtwDirReturn);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Callbacks for new entries above the base.
|
||||
for (int j = i; j < int(npath.size()); j++) {
|
||||
std::cerr << "Entering " << toksToPath(npath, j) << "\n";
|
||||
cb.processone(toksToPath(npath, j), nullptr, FsTreeWalker::FtwDirEnter);
|
||||
}
|
||||
curpath.swap(npath);
|
||||
}
|
||||
}
|
||||
#endif // USE_TREEWALK
|
||||
|
||||
void IdxTreeModel::populate()
|
||||
{
|
||||
QModelIndex index = this->index(0,0);
|
||||
auto topdirs = m_config->getTopdirs();
|
||||
|
||||
auto prefix = commonprefix(topdirs);
|
||||
|
||||
if (this->columnCount(index) == 0) {
|
||||
if (!this->insertColumn(0, index))
|
||||
return;
|
||||
}
|
||||
|
||||
int row = 0;
|
||||
|
||||
#ifdef USE_TREEWALK
|
||||
auto topdirs = m_config->getTopdirs();
|
||||
auto prefix = commonprefix(topdirs);
|
||||
for (const auto& topdir : topdirs) {
|
||||
const QModelIndex child = this->index(row, 0, index);
|
||||
std::string topdisp;
|
||||
@ -106,4 +160,13 @@ void IdxTreeModel::populate()
|
||||
++row;
|
||||
}
|
||||
sort(0, Qt::AscendingOrder);
|
||||
#else
|
||||
std::vector<std::string> thedirs;
|
||||
std::string prefix;
|
||||
rcldb->dirlist(m_depth, prefix, thedirs);
|
||||
const QModelIndex child = this->index(row, 0, index);
|
||||
FsTreeWalker walker;
|
||||
WalkerCB cb(m_config, prefix == "/" ? std::string() : prefix, walker, this, child);
|
||||
treelist(path_getfather(prefix), thedirs, cb);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -433,6 +433,16 @@ public:
|
||||
usage. Inserts the types at the end of the parameter */
|
||||
bool getAllDbMimeTypes(std::vector<std::string>&);
|
||||
|
||||
/** Compute a list of all the directories containing indexed documents, down to a given depth
|
||||
|
||||
@param depth depth belowr a possible computed common prefix, that is, if all
|
||||
directories are relative to /home/you, a depth of 2 would get you /home/you/1/2 but
|
||||
not /home/you/1/2/3
|
||||
@param[out] commonprefix common prefix path for the list. May be "/".
|
||||
@param[out] dirs the computed list (full paths including the prefix).
|
||||
*/
|
||||
bool dirlist(int depth, std::string& commonprefix, std::vector<std::string>& dirs);
|
||||
|
||||
/** Wildcard expansion specific to file names. Internal/sdata use only */
|
||||
bool filenameWildExp(const string& exp, vector<string>& names, int max);
|
||||
|
||||
|
||||
@ -469,6 +469,91 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Compute list of directories in index at given depth under the common root (which we also compute)
|
||||
// This is used for the GUI directory side filter tree.
|
||||
//
|
||||
// This is more complicated than it seems because there could be extra_dbs so we can't use topdirs
|
||||
// (which we did with an fstreewalk() initially), and also inode/directory might be excluded from
|
||||
// the index (for example by an onlymimetypes parameter).
|
||||
//
|
||||
// We look at all the paths, compute a common prefix, and truncate at the given depth under the
|
||||
// prefix and insert into an std::unordered_set for deduplication.
|
||||
//
|
||||
// unordered_set was the (slightly) fastest of "insert all then sort and truncate", std::set,
|
||||
// std::unordered_set. Other approaches may exist, for example, by skipping same prefix in the list
|
||||
// (which is sorted). Did not try, as the current approach is reasonably fast.
|
||||
//
|
||||
// This is admittedly horrible, and might be too slow on very big indexes, or actually fail if the
|
||||
// requested depth is such that we reach the max term length and the terms are
|
||||
// truncated-hashed. We'd need to look at the doc data for the full URLs, but this would be much
|
||||
// slower.
|
||||
//
|
||||
// Still I have no other idea of how to do this, other than disable the side filter if directories
|
||||
// are not indexed?
|
||||
//
|
||||
// We could use less memory by not computing a full list and walking the index twice instead (we
|
||||
// need two passes in any case because of the common root computation).
|
||||
//
|
||||
|
||||
bool Db::dirlist(int depth, std::string& root, std::vector<std::string>& dirs)
|
||||
{
|
||||
// Build a full list of filesystem paths.
|
||||
Xapian::Database xdb = m_ndb->xrdb;
|
||||
auto prefix = wrap_prefix("Q");
|
||||
std::vector<std::string> listall;
|
||||
for (int tries = 0; tries < 2; tries++) {
|
||||
try {
|
||||
Xapian::TermIterator it = xdb.allterms_begin();
|
||||
it.skip_to(prefix.c_str());
|
||||
for (; it != xdb.allterms_end(); it++) {
|
||||
string ixterm{*it};
|
||||
// If we're beyond the Q terms end
|
||||
if (ixterm.find(prefix) != 0)
|
||||
break;
|
||||
ixterm = strip_prefix(ixterm);
|
||||
// Skip non-paths like Web entries etc.
|
||||
if (!path_isabsolute(ixterm))
|
||||
continue;
|
||||
// Skip subdocs
|
||||
auto pos = ixterm.find_first_of('|');
|
||||
if (pos < ixterm.size() - 1)
|
||||
continue;
|
||||
listall.push_back(ixterm);
|
||||
}
|
||||
break;
|
||||
} catch (const Xapian::DatabaseModifiedError &e) {
|
||||
m_reason = e.get_msg();
|
||||
xdb.reopen();
|
||||
continue;
|
||||
} XCATCHERROR(m_reason);
|
||||
break;
|
||||
}
|
||||
if (!m_reason.empty()) {
|
||||
LOGERR("Db::dirlist: " << m_reason << "\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
root = commonprefix(listall);
|
||||
std::unordered_set<std::string> unics;
|
||||
for (auto& entry : listall) {
|
||||
string::size_type pos = root.size();
|
||||
for (int i = 0; i < depth; i++) {
|
||||
auto npos = entry.find("/", pos+1);
|
||||
if (npos == std::string::npos) {
|
||||
break;
|
||||
}
|
||||
pos = npos;
|
||||
}
|
||||
entry.erase(pos);
|
||||
unics.insert(entry);
|
||||
}
|
||||
|
||||
dirs.clear();
|
||||
dirs.insert(dirs.begin(), unics.begin(), unics.end());
|
||||
sort(dirs.begin(), dirs.end());
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Term list walking. */
|
||||
class TermIter {
|
||||
public:
|
||||
|
||||
@ -39,7 +39,7 @@ AM_CPPFLAGS = -Wall -Wno-unused -std=c++11 \
|
||||
$(DEFS)
|
||||
|
||||
noinst_PROGRAMS = plaintorich textsplit fstreewalk rclconfig hldata unac mbox \
|
||||
circache wipedir mimetype fileudi x11mon trqrstore ecrontab
|
||||
circache wipedir mimetype fileudi x11mon trqrstore ecrontab rcldb
|
||||
|
||||
ecrontab_SOURCES = trecrontab.cpp
|
||||
ecrontab_LDADD = ../librecoll.la
|
||||
@ -65,6 +65,9 @@ mimetype_LDADD = ../librecoll.la
|
||||
rclconfig_SOURCES = trrclconfig.cpp
|
||||
rclconfig_LDADD = ../librecoll.la
|
||||
|
||||
rcldb_SOURCES = trrcldb.cpp
|
||||
rcldb_LDADD = ../librecoll.la
|
||||
|
||||
textsplit_SOURCES = trtextsplit.cpp
|
||||
textsplit_LDADD = ../librecoll.la
|
||||
|
||||
|
||||
131
src/testmains/trrcldb.cpp
Normal file
131
src/testmains/trrcldb.cpp
Normal file
@ -0,0 +1,131 @@
|
||||
/* Copyright (C) 2022-2022 J.F.Dockes
|
||||
*
|
||||
* License: GPL 2.1
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2.1 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <set>
|
||||
#include <getopt.h>
|
||||
#include <malloc.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "rclinit.h"
|
||||
#include "rclconfig.h"
|
||||
#include "rcldb.h"
|
||||
#include "pathut.h"
|
||||
#include "smallut.h"
|
||||
#include "rclutil.h"
|
||||
#include "log.h"
|
||||
|
||||
static std::map<std::string, int> options {
|
||||
{"dirlist", 0},
|
||||
};
|
||||
|
||||
static char *thisprog;
|
||||
static void Usage(FILE *fp = stderr)
|
||||
{
|
||||
string sopts;
|
||||
for (const auto& opt: options) {
|
||||
sopts += "--" + opt.first + "\n";
|
||||
}
|
||||
fprintf(fp, "%s: usage: %s\n%s", thisprog, thisprog, sopts.c_str());
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
thisprog = *argv;
|
||||
std::vector<struct option> long_options;
|
||||
|
||||
for (auto& entry : options) {
|
||||
struct option opt;
|
||||
opt.name = entry.first.c_str();
|
||||
opt.has_arg = 0;
|
||||
opt.flag = &entry.second;
|
||||
opt.val = 1;
|
||||
long_options.push_back(opt);
|
||||
}
|
||||
long_options.push_back({0, 0, 0, 0});
|
||||
|
||||
std::string confdir;
|
||||
std::string *argcnf{nullptr};
|
||||
int opt;
|
||||
while ((opt = getopt_long(argc, argv, "c:", &long_options[0], nullptr)) != -1) {
|
||||
switch (opt) {
|
||||
case 'c':
|
||||
confdir = optarg;
|
||||
argcnf = &confdir;
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
Usage();
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
for (const auto& e : options) {
|
||||
std::cerr << e.first << " -> " << e.second << "\n";
|
||||
}
|
||||
#endif
|
||||
|
||||
if (options["dirlist"]) {
|
||||
std::string reason;
|
||||
RclConfig *rclconfig = recollinit(0, 0, 0, reason, argcnf);
|
||||
if (!rclconfig || !rclconfig->ok()) {
|
||||
std::cerr << "Recoll init failed: " << reason << "\n";
|
||||
return 1;
|
||||
}
|
||||
Rcl::Db rcldb(rclconfig);
|
||||
if (!rcldb.open(Rcl::Db::DbRO)) {
|
||||
LOGERR("db open error\n");
|
||||
return 1;
|
||||
}
|
||||
const char *cp;
|
||||
if ((cp = getenv("RECOLL_EXTRA_DBS")) != 0) {
|
||||
vector<string> dbl;
|
||||
stringToTokens(cp, dbl, ":");
|
||||
for (const auto& path : dbl) {
|
||||
string dbdir = path_canon(path);
|
||||
path_catslash(dbdir);
|
||||
bool stripped;
|
||||
if (!Rcl::Db::testDbDir(dbdir, &stripped)) {
|
||||
LOGERR("Not a xapian index: [" << dbdir << "]\n");
|
||||
return 1;
|
||||
}
|
||||
if (!rcldb.addQueryDb(dbdir)) {
|
||||
LOGERR("Can't add " << dbdir << " as extra index\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string prefix;
|
||||
std::vector<std::string> dirs;
|
||||
rcldb.dirlist(1, prefix, dirs);
|
||||
sort(dirs.begin(), dirs.end());
|
||||
std::cout << "Prefix " << prefix << " dirs :\n";
|
||||
for (const auto& dir : dirs) {
|
||||
std::cout << dir << "\n";
|
||||
}
|
||||
return 0;
|
||||
} else {
|
||||
std::cerr << "No operation set\n";
|
||||
Usage();
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user