From 291b0c32d4c52d27f07c34c95093460ae5646393 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Sun, 21 Mar 2021 17:17:37 +0100 Subject: [PATCH] fstreewalk: add option to only call back for skipped names/paths (diags) --- src/testmains/trfstreewalk.cpp | 8 ++++ src/utils/fstreewalk.cpp | 82 ++++++++++++++++++++++------------ src/utils/fstreewalk.h | 49 +++++++++++--------- 3 files changed, 88 insertions(+), 51 deletions(-) diff --git a/src/testmains/trfstreewalk.cpp b/src/testmains/trfstreewalk.cpp index 0562f558..51e2edd7 100644 --- a/src/testmains/trfstreewalk.cpp +++ b/src/testmains/trfstreewalk.cpp @@ -44,6 +44,7 @@ static int op_flags; #define OPT_k 0x1000 #define OPT_y 0x2000 #define OPT_s 0x4000 +#define OPT_S 0x8000 class myCB : public FsTreeWalkerCB { public: @@ -64,6 +65,8 @@ public: } } else if (flg == FsTreeWalker::FtwRegular) { cout << path << endl; + } else if (flg == FsTreeWalker::FtwSkipped) { + cout << "SKIPPED: " << path << endl; } return FsTreeWalker::FtwOk; } @@ -105,6 +108,7 @@ static char usage [] = " -s : don't print dir change info\n" " -w : unset default FNM_PATHNAME when using fnmatch() to match skipped paths\n" " -y : add onlyNames entry\n" + " -S : only print skipped files and directories\n"; ; static void Usage(void) @@ -150,6 +154,7 @@ int main(int argc, const char **argv) goto b1; case 'r': op_flags |= OPT_r; break; case 's': op_flags |= OPT_s; break; + case 'S': op_flags |= OPT_S; break; case 'w': op_flags |= OPT_w; break; case 'y': op_flags |= OPT_y; if (argc < 2) Usage(); onlynames.push_back(*(++argv)); @@ -184,6 +189,8 @@ int main(int argc, const char **argv) opt |= FsTreeWalker::FtwFollow; if (op_flags & OPT_D) opt |= FsTreeWalker::FtwSkipDotFiles; + if (op_flags & OPT_S) + opt |= FsTreeWalker::FtwOnlySkipped; if (op_flags & OPT_b) opt |= FsTreeWalker::FtwTravBreadth; @@ -192,6 +199,7 @@ int main(int argc, const char **argv) else if (op_flags & OPT_m) opt |= FsTreeWalker::FtwTravBreadthThenDepth; + string reason; if (!recollinit(0, 0, 0, reason)) { fprintf(stderr, "Init failed: %s\n", reason.c_str()); diff --git a/src/utils/fstreewalk.cpp b/src/utils/fstreewalk.cpp index 9e468ff4..c36a98e9 100644 --- a/src/utils/fstreewalk.cpp +++ b/src/utils/fstreewalk.cpp @@ -218,11 +218,7 @@ bool FsTreeWalker::inSkippedPaths(const string& path, bool ckparents) static inline int slashcount(const string& p) { - int n = 0; - for (unsigned int i = 0; i < p.size(); i++) - if (p[i] == '/') - n++; - return n; + return std::count(p.begin(), p.end(), '/'); } FsTreeWalker::Status FsTreeWalker::walk(const string& _top, FsTreeWalkerCB& cb) @@ -302,9 +298,11 @@ FsTreeWalker::Status FsTreeWalker::walk(const string& _top, FsTreeWalkerCB& cb) data->logsyserr("stat", nfather); return errno == ENOENT ? FtwOk : FtwError; } - if ((status = cb.processone(nfather, &st, FtwDirReturn)) & - (FtwStop|FtwError)) { - return status; + if (!(data->options & FtwOnlySkipped)) { + if ((status = cb.processone(nfather, &st, FtwDirReturn)) & + (FtwStop|FtwError)) { + return status; + } } } @@ -333,12 +331,18 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top, // Tell user to process the top entry itself if (stp->pst_type == PathStat::PST_DIR) { - if ((status = cb.processone(top, stp, FtwDirEnter)) & - (FtwStop|FtwError)) { - return status; + if (!(data->options & FtwOnlySkipped)) { + if ((status = cb.processone(top, stp, FtwDirEnter)) & + (FtwStop|FtwError)) { + return status; + } } } else if (stp->pst_type == PathStat::PST_REGULAR) { - return cb.processone(top, stp, FtwRegular); + if (!(data->options & FtwOnlySkipped)) { + return cb.processone(top, stp, FtwRegular); + } else { + return status; + } } else { return status; } @@ -406,10 +410,31 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top, // Skipped file names match ? if (!data->skippedNames.empty()) { - if (inSkippedNames(dname)) + if (inSkippedNames(dname)) { + if (data->options & FtwOnlySkipped) { + cb.processone(path_cat(top, dname), nullptr, FtwSkipped); + } continue; + } } + fn = path_cat(top, dname); + + // Skipped file paths match ? + if (!data->skippedPaths.empty()) { + // We do not check the ancestors. This means that you can have + // a topdirs member under a skippedPath, to index a portion of + // an ignored area. This is the way it had always worked, but + // this was broken by 1.13.00 and the systematic use of + // FNM_LEADING_DIR + if (inSkippedPaths(fn, false)) { + if (data->options & FtwOnlySkipped) { + cb.processone(fn, nullptr, FtwSkipped); + } + continue; + } + } + int statret = path_fileprops(fn.c_str(), &st, data->options&FtwFollow); if (statret == -1) { data->logsyserr("stat", fn); @@ -424,22 +449,17 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top, continue; } - if (!data->skippedPaths.empty()) { - // We do not check the ancestors. This means that you can have - // a topdirs member under a skippedPath, to index a portion of - // an ignored area. This is the way it had always worked, but - // this was broken by 1.13.00 and the systematic use of - // FNM_LEADING_DIR - if (inSkippedPaths(fn, false)) - continue; - } if (st.pst_type == PathStat::PST_DIR) { if (!o_nowalkfn.empty() && path_exists(path_cat(fn, o_nowalkfn))) { continue; } if (data->options & FtwNoRecurse) { - status = cb.processone(fn, &st, FtwDirEnter); + if (!(data->options & FtwOnlySkipped)) { + status = cb.processone(fn, &st, FtwDirEnter); + } else { + status = FtwOk; + } } else { if (data->options & FtwTravNatural) { status = iwalk(fn, &st, cb); @@ -461,9 +481,11 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top, if (status & (FtwStop|FtwError)) goto out; if (!(data->options & FtwNoRecurse)) - if ((status = cb.processone(top, &st, FtwDirReturn)) - & (FtwStop|FtwError)) - goto out; + if (!(data->options & FtwOnlySkipped)) { + if ((status = cb.processone(top, &st, FtwDirReturn)) + & (FtwStop|FtwError)) + goto out; + } } else if (st.pst_type == PathStat::PST_REGULAR || st.pst_type == PathStat::PST_SYMLINK) { // Filtering patterns match ? @@ -471,9 +493,11 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top, if (!inOnlyNames(dname)) continue; } - if ((status = cb.processone(fn, &st, FtwRegular)) & - (FtwStop|FtwError)) { - goto out; + if (!(data->options & FtwOnlySkipped)) { + if ((status = cb.processone(fn, &st, FtwRegular)) & + (FtwStop|FtwError)) { + goto out; + } } } // We ignore other file types (devices etc...) diff --git a/src/utils/fstreewalk.h b/src/utils/fstreewalk.h index 7396554a..e1d26acb 100644 --- a/src/utils/fstreewalk.h +++ b/src/utils/fstreewalk.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2004 J.F.Dockes +/* Copyright (C) 2004-2021 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -25,7 +25,7 @@ struct PathStat; class FsTreeWalkerCB; /** - * Class implementing a unix directory recursive walk. + * Class implementing a Unix directory recursive walk. * * A user-defined function object is called for every file or * directory. Patterns to be ignored can be set before starting the @@ -33,7 +33,7 @@ class FsTreeWalkerCB; * on subdirectories. */ class FsTreeWalker { - public: +public: // Global option to use FNM_PATHNAME when matching paths (for // skippedPaths). // We initially used FNM_PATHNAME, and we can't change it now @@ -42,7 +42,7 @@ class FsTreeWalker { // a value to the config file (skippedPathsNoFnmPathname) static bool o_useFnmPathname; static void setNoFnmPathname() { - o_useFnmPathname = false; + o_useFnmPathname = false; } // Global option to observe a "nowalk" file, which makes us treat @@ -56,22 +56,27 @@ class FsTreeWalker { // Flags for call to processone(). FtwDirEnter is used when // entering a directory. FtwDirReturn is used when returning to it // after processing a subdirectory. - enum CbFlag {FtwRegular, FtwDirEnter, FtwDirReturn}; + enum CbFlag {FtwRegular, FtwDirEnter, FtwDirReturn, FtwSkipped}; enum Status {FtwOk=0, FtwError=1, FtwStop=2, - FtwStatAll = FtwError|FtwStop}; + FtwStatAll = FtwError|FtwStop}; enum Options {FtwOptNone = 0, FtwNoRecurse = 1, FtwFollow = 2, FtwNoCanon = 4, FtwSkipDotFiles = 8, - // Tree walking options. Natural is close to depth first: process - // directory entries as we see them, recursing into subdirectories at - // once - // Breadth means we process all files and dirs at a given directory level - // before going deeper. - // - // FilesThenDirs is close to Natural, except that we process all files in a - // given directory before going deeper: allows keeping only a single - // directory open - // We don't do pure depth first (process subdirs before files), this does - // not appear to make any sense. + // Only callback for skipped files and directories, + // for getting a list of skipped stuff. We don't + // descend into skipped directories. + // ** The callback will receive a null struct stat pointer.** + FtwOnlySkipped = 0x10, + // Tree walking options. Natural is close to depth first: process + // directory entries as we see them, recursing into subdirectories at + // once + // Breadth means we process all files and dirs at a given directory level + // before going deeper. + // + // FilesThenDirs is close to Natural, except that we process all files in a + // given directory before going deeper: allows keeping only a single + // directory open + // We don't do pure depth first (process subdirs before files), this does + // not appear to make any sense. FtwTravNatural = 0x10000, FtwTravBreadth = 0x20000, FtwTravFilesThenDirs = 0x40000, FtwTravBreadthThenDepth = 0x80000 @@ -107,26 +112,26 @@ class FsTreeWalker { bool setOnlyNames(const std::vector &patterns); /** Same for skipped paths: this are paths, not names, under which we - do not descend (ie: /home/me/.recoll) */ + do not descend (ie: /home/me/.recoll) */ bool addSkippedPath(const std::string &path); /** Set the ignored paths list */ bool setSkippedPaths(const std::vector &patterns); /** Test if path/name should be skipped. This can be used independently of - * an actual tree walk */ + * an actual tree walk */ bool inSkippedPaths(const std::string& path, bool ckparents = false); bool inSkippedNames(const std::string& name); bool inOnlyNames(const std::string& name); - private: +private: Status iwalk(const std::string &dir, struct PathStat *stp, FsTreeWalkerCB& cb); class Internal; - Internal *data; + Internal *data; }; class FsTreeWalkerCB { - public: +public: virtual ~FsTreeWalkerCB() {} // Only st_mtime, st_ctime, st_size, st_mode (filetype bits: dir/reg/lnk), virtual FsTreeWalker::Status