diff --git a/src/testmains/trfstreewalk.cpp b/src/testmains/trfstreewalk.cpp new file mode 100644 index 00000000..dec3e715 --- /dev/null +++ b/src/testmains/trfstreewalk.cpp @@ -0,0 +1,194 @@ +/* Copyright (C) 2017-2019 J.F.Dockes + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#include "fstreewalk.h" + +#include +#include +#include + +#include + +#include "rclinit.h" +#include "rclconfig.h" + +using namespace std; + +static int op_flags; +#define OPT_MOINS 0x1 +#define OPT_p 0x2 +#define OPT_P 0x4 +#define OPT_r 0x8 +#define OPT_c 0x10 +#define OPT_b 0x20 +#define OPT_d 0x40 +#define OPT_m 0x80 +#define OPT_L 0x100 +#define OPT_w 0x200 +#define OPT_M 0x400 +#define OPT_D 0x800 +#define OPT_k 0x1000 +class myCB : public FsTreeWalkerCB { + public: + FsTreeWalker::Status processone(const string &path, + const struct stat *st, + FsTreeWalker::CbFlag flg) + { + if (flg == FsTreeWalker::FtwDirEnter) { + if (op_flags & OPT_r) + cout << path << endl; + else + cout << "[Entering " << path << "]" << endl; + } else if (flg == FsTreeWalker::FtwDirReturn) { + cout << "[Returning to " << path << "]" << endl; + } else if (flg == FsTreeWalker::FtwRegular) { + cout << path << endl; + } + return FsTreeWalker::FtwOk; + } +}; + +static const char *thisprog; + +// Note that breadth first sorting is relatively expensive: less inode +// locality, more disk usage (and also more user memory usage, does +// not appear here). Some typical results on a real tree with 2.6 +// million entries (220MB of name data) +// Recoll 1.13 +// time trfstreewalk / > /data/tmp/old +// real 13m32.839s user 0m4.443s sys 0m31.128s +// +// Recoll 1.14 +// time trfstreewalk / > /data/tmp/nat; +// real 13m28.685s user 0m4.430s sys 0m31.083s +// time trfstreewalk -d / > /data/tmp/depth; +// real 13m30.051s user 0m4.140s sys 0m33.862s +// time trfstreewalk -m / > /data/tmp/mixed; +// real 14m53.245s user 0m4.244s sys 0m34.494s +// time trfstreewalk -b / > /data/tmp/breadth; +// real 17m10.585s user 0m4.532s sys 0m35.033s + +static char usage [] = +"trfstreewalk [-p pattern] [-P ignpath] [-r] [-c] [-L] topdir\n" +" -r : norecurse\n" +" -c : no path canonification\n" +" -L : follow symbolic links\n" +" -b : use breadth first walk\n" +" -d : use almost depth first (dir files, then subdirs)\n" +" -m : use breadth up to 4 deep then switch to -d\n" +" -w : unset default FNM_PATHNAME when using fnmatch() to match skipped paths\n" +" -M : limit depth (works with -b/m/d)\n" +" -D : skip dotfiles\n" +"-k : like du\n" +; +static void +Usage(void) +{ + fprintf(stderr, "%s: usage:\n%s", thisprog, usage); + exit(1); +} + +int main(int argc, const char **argv) +{ + vector patterns; + vector paths; + int maxdepth = -1; + + thisprog = argv[0]; + argc--; argv++; + while (argc > 0 && **argv == '-') { + (*argv)++; + if (!(**argv)) + /* Cas du "adb - core" */ + Usage(); + while (**argv) + switch (*(*argv)++) { + case 'b': op_flags |= OPT_b; break; + case 'c': op_flags |= OPT_c; break; + case 'd': op_flags |= OPT_d; break; + case 'D': op_flags |= OPT_D; break; + case 'k': op_flags |= OPT_k; break; + case 'L': op_flags |= OPT_L; break; + case 'm': op_flags |= OPT_m; break; + case 'M': op_flags |= OPT_M; if (argc < 2) Usage(); + maxdepth = atoi(*(++argv)); + argc--; + goto b1; + case 'p': op_flags |= OPT_p; if (argc < 2) Usage(); + patterns.push_back(*(++argv)); + argc--; + goto b1; + case 'P': op_flags |= OPT_P; if (argc < 2) Usage(); + paths.push_back(*(++argv)); + argc--; + goto b1; + case 'r': op_flags |= OPT_r; break; + case 'w': op_flags |= OPT_w; break; + default: Usage(); break; + } + b1: argc--; argv++; + } + + if (argc != 1) + Usage(); + string topdir = *argv++;argc--; + + if (op_flags & OPT_k) { + int64_t bytes = fsTreeBytes(topdir); + if (bytes < 0) { + cerr << "fsTreeBytes failed\n"; + return 1; + } else { + cout << bytes / 1024 << "\t" << topdir << endl; + return 0; + } + } + + int opt = 0; + if (op_flags & OPT_r) + opt |= FsTreeWalker::FtwNoRecurse; + if (op_flags & OPT_c) + opt |= FsTreeWalker::FtwNoCanon; + if (op_flags & OPT_L) + opt |= FsTreeWalker::FtwFollow; + if (op_flags & OPT_D) + opt |= FsTreeWalker::FtwSkipDotFiles; + + if (op_flags & OPT_b) + opt |= FsTreeWalker::FtwTravBreadth; + else if (op_flags & OPT_d) + opt |= FsTreeWalker::FtwTravFilesThenDirs; + else if (op_flags & OPT_m) + opt |= FsTreeWalker::FtwTravBreadthThenDepth; + + string reason; + if (!recollinit(0, 0, reason)) { + fprintf(stderr, "Init failed: %s\n", reason.c_str()); + exit(1); + } + if (op_flags & OPT_w) { + FsTreeWalker::setNoFnmPathname(); + } + FsTreeWalker walker; + walker.setOpts(opt); + walker.setMaxDepth(maxdepth); + walker.setSkippedNames(patterns); + walker.setSkippedPaths(paths); + myCB cb; + walker.walk(topdir, cb); + if (walker.getErrCnt() > 0) + cout << walker.getReason(); +} diff --git a/src/utils/conftree.h b/src/utils/conftree.h index cc53d237..462935c9 100644 --- a/src/utils/conftree.h +++ b/src/utils/conftree.h @@ -90,12 +90,12 @@ public: virtual ~ConfNull() {}; virtual int get(const std::string& name, std::string& value, const std::string& sk = std::string()) const = 0; - virtual bool hasNameAnywhere(const std::string& nm) const = 0; virtual int set(const std::string& nm, const std::string& val, const std::string& sk = std::string()) = 0; virtual bool ok() const = 0; virtual std::vector getNames(const std::string& sk, const char* = 0)const = 0; + virtual bool hasNameAnywhere(const std::string& nm) const = 0; virtual int erase(const std::string&, const std::string&) = 0; virtual int eraseKey(const std::string&) = 0; virtual void showall() const {}; @@ -140,13 +140,13 @@ public: virtual ~ConfSimple() {}; /** Origin file changed. Only makes sense if we read the data from a file */ - virtual bool sourceChanged() const; + virtual bool sourceChanged() const override; /** * Decide if we actually rewrite the backing-store after modifying the * tree. */ - virtual bool holdWrites(bool on) { + virtual bool holdWrites(bool on) override { m_holdWrites = on; if (on == false) { return write(); @@ -158,16 +158,13 @@ public: /** Clear, then reparse from string */ void reparse(const std::string& in); - /** Clear all content */ - int clear(); - /** * Get string value for named parameter, from specified subsection (looks * in global space if sk is empty). * @return 0 if name not found, 1 else */ virtual int get(const std::string& name, std::string& value, - const std::string& sk = std::string()) const; + const std::string& sk = std::string()) const override; /** * Get integer value for named parameter, from specified subsection (looks @@ -183,7 +180,7 @@ public: * @return 0 for error, 1 else */ virtual int set(const std::string& nm, const std::string& val, - const std::string& sk = std::string()); + const std::string& sk = std::string()) override; /** * Set value for named integer parameter in specified subsection (or global) * @return 0 for error, 1 else @@ -194,12 +191,15 @@ public: /** * Remove name and value from config */ - virtual int erase(const std::string& name, const std::string& sk); + virtual int erase(const std::string& name, const std::string& sk) override; /** * Erase all names under given subkey (and subkey itself) */ - virtual int eraseKey(const std::string& sk); + virtual int eraseKey(const std::string& sk) override; + + /** Clear all content */ + virtual int clear(); virtual StatusCode getStatus() const; virtual bool ok() const { @@ -220,7 +220,7 @@ public: void *clidata) const; /** Print all values to stdout */ - virtual void showall() const; + virtual void showall() const override; /** Return all names in given submap. */ virtual std::vector getNames(const std::string& sk, @@ -233,10 +233,10 @@ public: /** * Return all subkeys */ - virtual std::vector getSubKeys(bool) const { + virtual std::vector getSubKeys(bool) const override { return getSubKeys(); } - virtual std::vector getSubKeys() const; + virtual std::vector getSubKeys() const override; /** Return subkeys in file order. BEWARE: only for the original from the * file: the data is not duplicated to further copies */ @@ -418,7 +418,7 @@ public: return *this; } - virtual bool sourceChanged() const { + virtual bool sourceChanged() const override { typename std::vector::const_iterator it; for (it = m_confs.begin(); it != m_confs.end(); it++) { if ((*it)->sourceChanged()) { @@ -430,9 +430,8 @@ public: virtual int get(const std::string& name, std::string& value, const std::string& sk, bool shallow) const { - typename std::vector::const_iterator it; - for (it = m_confs.begin(); it != m_confs.end(); it++) { - if ((*it)->get(name, value, sk)) { + for (const auto& conf : m_confs) { + if (conf->get(name, value, sk)) { return true; } if (shallow) { @@ -441,8 +440,9 @@ public: } return false; } + virtual int get(const std::string& name, std::string& value, - const std::string& sk) const { + const std::string& sk) const override { return get(name, value, sk, false); }