fstreewalk: add option to only call back for skipped names/paths (diags)

This commit is contained in:
Jean-Francois Dockes 2021-03-21 17:17:37 +01:00
parent ad9e721fab
commit 291b0c32d4
3 changed files with 88 additions and 51 deletions

View File

@ -44,6 +44,7 @@ static int op_flags;
#define OPT_k 0x1000
#define OPT_y 0x2000
#define OPT_s 0x4000
#define OPT_S 0x8000
class myCB : public FsTreeWalkerCB {
public:
@ -64,6 +65,8 @@ public:
}
} else if (flg == FsTreeWalker::FtwRegular) {
cout << path << endl;
} else if (flg == FsTreeWalker::FtwSkipped) {
cout << "SKIPPED: " << path << endl;
}
return FsTreeWalker::FtwOk;
}
@ -105,6 +108,7 @@ static char usage [] =
" -s : don't print dir change info\n"
" -w : unset default FNM_PATHNAME when using fnmatch() to match skipped paths\n"
" -y <pattern> : add onlyNames entry\n"
" -S : only print skipped files and directories\n";
;
static void
Usage(void)
@ -150,6 +154,7 @@ int main(int argc, const char **argv)
goto b1;
case 'r': op_flags |= OPT_r; break;
case 's': op_flags |= OPT_s; break;
case 'S': op_flags |= OPT_S; break;
case 'w': op_flags |= OPT_w; break;
case 'y': op_flags |= OPT_y; if (argc < 2) Usage();
onlynames.push_back(*(++argv));
@ -184,6 +189,8 @@ int main(int argc, const char **argv)
opt |= FsTreeWalker::FtwFollow;
if (op_flags & OPT_D)
opt |= FsTreeWalker::FtwSkipDotFiles;
if (op_flags & OPT_S)
opt |= FsTreeWalker::FtwOnlySkipped;
if (op_flags & OPT_b)
opt |= FsTreeWalker::FtwTravBreadth;
@ -192,6 +199,7 @@ int main(int argc, const char **argv)
else if (op_flags & OPT_m)
opt |= FsTreeWalker::FtwTravBreadthThenDepth;
string reason;
if (!recollinit(0, 0, 0, reason)) {
fprintf(stderr, "Init failed: %s\n", reason.c_str());

View File

@ -218,11 +218,7 @@ bool FsTreeWalker::inSkippedPaths(const string& path, bool ckparents)
static inline int slashcount(const string& p)
{
int n = 0;
for (unsigned int i = 0; i < p.size(); i++)
if (p[i] == '/')
n++;
return n;
return std::count(p.begin(), p.end(), '/');
}
FsTreeWalker::Status FsTreeWalker::walk(const string& _top, FsTreeWalkerCB& cb)
@ -302,9 +298,11 @@ FsTreeWalker::Status FsTreeWalker::walk(const string& _top, FsTreeWalkerCB& cb)
data->logsyserr("stat", nfather);
return errno == ENOENT ? FtwOk : FtwError;
}
if ((status = cb.processone(nfather, &st, FtwDirReturn)) &
(FtwStop|FtwError)) {
return status;
if (!(data->options & FtwOnlySkipped)) {
if ((status = cb.processone(nfather, &st, FtwDirReturn)) &
(FtwStop|FtwError)) {
return status;
}
}
}
@ -333,12 +331,18 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
// Tell user to process the top entry itself
if (stp->pst_type == PathStat::PST_DIR) {
if ((status = cb.processone(top, stp, FtwDirEnter)) &
(FtwStop|FtwError)) {
return status;
if (!(data->options & FtwOnlySkipped)) {
if ((status = cb.processone(top, stp, FtwDirEnter)) &
(FtwStop|FtwError)) {
return status;
}
}
} else if (stp->pst_type == PathStat::PST_REGULAR) {
return cb.processone(top, stp, FtwRegular);
if (!(data->options & FtwOnlySkipped)) {
return cb.processone(top, stp, FtwRegular);
} else {
return status;
}
} else {
return status;
}
@ -406,10 +410,31 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
// Skipped file names match ?
if (!data->skippedNames.empty()) {
if (inSkippedNames(dname))
if (inSkippedNames(dname)) {
if (data->options & FtwOnlySkipped) {
cb.processone(path_cat(top, dname), nullptr, FtwSkipped);
}
continue;
}
}
fn = path_cat(top, dname);
// Skipped file paths match ?
if (!data->skippedPaths.empty()) {
// We do not check the ancestors. This means that you can have
// a topdirs member under a skippedPath, to index a portion of
// an ignored area. This is the way it had always worked, but
// this was broken by 1.13.00 and the systematic use of
// FNM_LEADING_DIR
if (inSkippedPaths(fn, false)) {
if (data->options & FtwOnlySkipped) {
cb.processone(fn, nullptr, FtwSkipped);
}
continue;
}
}
int statret = path_fileprops(fn.c_str(), &st, data->options&FtwFollow);
if (statret == -1) {
data->logsyserr("stat", fn);
@ -424,22 +449,17 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
continue;
}
if (!data->skippedPaths.empty()) {
// We do not check the ancestors. This means that you can have
// a topdirs member under a skippedPath, to index a portion of
// an ignored area. This is the way it had always worked, but
// this was broken by 1.13.00 and the systematic use of
// FNM_LEADING_DIR
if (inSkippedPaths(fn, false))
continue;
}
if (st.pst_type == PathStat::PST_DIR) {
if (!o_nowalkfn.empty() && path_exists(path_cat(fn, o_nowalkfn))) {
continue;
}
if (data->options & FtwNoRecurse) {
status = cb.processone(fn, &st, FtwDirEnter);
if (!(data->options & FtwOnlySkipped)) {
status = cb.processone(fn, &st, FtwDirEnter);
} else {
status = FtwOk;
}
} else {
if (data->options & FtwTravNatural) {
status = iwalk(fn, &st, cb);
@ -461,9 +481,11 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
if (status & (FtwStop|FtwError))
goto out;
if (!(data->options & FtwNoRecurse))
if ((status = cb.processone(top, &st, FtwDirReturn))
& (FtwStop|FtwError))
goto out;
if (!(data->options & FtwOnlySkipped)) {
if ((status = cb.processone(top, &st, FtwDirReturn))
& (FtwStop|FtwError))
goto out;
}
} else if (st.pst_type == PathStat::PST_REGULAR ||
st.pst_type == PathStat::PST_SYMLINK) {
// Filtering patterns match ?
@ -471,9 +493,11 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
if (!inOnlyNames(dname))
continue;
}
if ((status = cb.processone(fn, &st, FtwRegular)) &
(FtwStop|FtwError)) {
goto out;
if (!(data->options & FtwOnlySkipped)) {
if ((status = cb.processone(fn, &st, FtwRegular)) &
(FtwStop|FtwError)) {
goto out;
}
}
}
// We ignore other file types (devices etc...)

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2004 J.F.Dockes
/* Copyright (C) 2004-2021 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@ -25,7 +25,7 @@ struct PathStat;
class FsTreeWalkerCB;
/**
* Class implementing a unix directory recursive walk.
* Class implementing a Unix directory recursive walk.
*
* A user-defined function object is called for every file or
* directory. Patterns to be ignored can be set before starting the
@ -33,7 +33,7 @@ class FsTreeWalkerCB;
* on subdirectories.
*/
class FsTreeWalker {
public:
public:
// Global option to use FNM_PATHNAME when matching paths (for
// skippedPaths).
// We initially used FNM_PATHNAME, and we can't change it now
@ -42,7 +42,7 @@ class FsTreeWalker {
// a value to the config file (skippedPathsNoFnmPathname)
static bool o_useFnmPathname;
static void setNoFnmPathname() {
o_useFnmPathname = false;
o_useFnmPathname = false;
}
// Global option to observe a "nowalk" file, which makes us treat
@ -56,22 +56,27 @@ class FsTreeWalker {
// Flags for call to processone(). FtwDirEnter is used when
// entering a directory. FtwDirReturn is used when returning to it
// after processing a subdirectory.
enum CbFlag {FtwRegular, FtwDirEnter, FtwDirReturn};
enum CbFlag {FtwRegular, FtwDirEnter, FtwDirReturn, FtwSkipped};
enum Status {FtwOk=0, FtwError=1, FtwStop=2,
FtwStatAll = FtwError|FtwStop};
FtwStatAll = FtwError|FtwStop};
enum Options {FtwOptNone = 0, FtwNoRecurse = 1, FtwFollow = 2,
FtwNoCanon = 4, FtwSkipDotFiles = 8,
// Tree walking options. Natural is close to depth first: process
// directory entries as we see them, recursing into subdirectories at
// once
// Breadth means we process all files and dirs at a given directory level
// before going deeper.
//
// FilesThenDirs is close to Natural, except that we process all files in a
// given directory before going deeper: allows keeping only a single
// directory open
// We don't do pure depth first (process subdirs before files), this does
// not appear to make any sense.
// Only callback for skipped files and directories,
// for getting a list of skipped stuff. We don't
// descend into skipped directories.
// ** The callback will receive a null struct stat pointer.**
FtwOnlySkipped = 0x10,
// Tree walking options. Natural is close to depth first: process
// directory entries as we see them, recursing into subdirectories at
// once
// Breadth means we process all files and dirs at a given directory level
// before going deeper.
//
// FilesThenDirs is close to Natural, except that we process all files in a
// given directory before going deeper: allows keeping only a single
// directory open
// We don't do pure depth first (process subdirs before files), this does
// not appear to make any sense.
FtwTravNatural = 0x10000, FtwTravBreadth = 0x20000,
FtwTravFilesThenDirs = 0x40000,
FtwTravBreadthThenDepth = 0x80000
@ -107,26 +112,26 @@ class FsTreeWalker {
bool setOnlyNames(const std::vector<std::string> &patterns);
/** Same for skipped paths: this are paths, not names, under which we
do not descend (ie: /home/me/.recoll) */
do not descend (ie: /home/me/.recoll) */
bool addSkippedPath(const std::string &path);
/** Set the ignored paths list */
bool setSkippedPaths(const std::vector<std::string> &patterns);
/** Test if path/name should be skipped. This can be used independently of
* an actual tree walk */
* an actual tree walk */
bool inSkippedPaths(const std::string& path, bool ckparents = false);
bool inSkippedNames(const std::string& name);
bool inOnlyNames(const std::string& name);
private:
private:
Status iwalk(const std::string &dir, struct PathStat *stp,
FsTreeWalkerCB& cb);
class Internal;
Internal *data;
Internal *data;
};
class FsTreeWalkerCB {
public:
public:
virtual ~FsTreeWalkerCB() {}
// Only st_mtime, st_ctime, st_size, st_mode (filetype bits: dir/reg/lnk),
virtual FsTreeWalker::Status