Implement different tree traversal options in fstreewalk, including breadth first and mixed.
This commit is contained in:
parent
ce17860264
commit
771c5cb336
@ -38,11 +38,18 @@ static char rcsid[] = "@(#$Id: fstreewalk.cpp,v 1.15 2007-12-13 06:58:22 dockes
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
#endif /* NO_NAMESPACES */
|
#endif /* NO_NAMESPACES */
|
||||||
|
|
||||||
|
const int FsTreeWalker::FtwTravMask = FtwTravNatural|
|
||||||
|
FtwTravBreadth|FtwTravFilesThenDirs|FtwTravBreadthThenDepth;
|
||||||
|
|
||||||
class FsTreeWalker::Internal {
|
class FsTreeWalker::Internal {
|
||||||
int options;
|
int options;
|
||||||
|
int depthswitch;
|
||||||
stringstream reason;
|
stringstream reason;
|
||||||
list<string> skippedNames;
|
list<string> skippedNames;
|
||||||
list<string> skippedPaths;
|
list<string> skippedPaths;
|
||||||
|
// When doing Breadth or FilesThenDirs traversal, we keep a list
|
||||||
|
// of directory paths to be processed, and we do not recurse.
|
||||||
|
list<string> dirs;
|
||||||
int errors;
|
int errors;
|
||||||
void logsyserr(const char *call, const string ¶m)
|
void logsyserr(const char *call, const string ¶m)
|
||||||
{
|
{
|
||||||
@ -58,6 +65,7 @@ FsTreeWalker::FsTreeWalker(int opts)
|
|||||||
data = new Internal;
|
data = new Internal;
|
||||||
if (data) {
|
if (data) {
|
||||||
data->options = opts;
|
data->options = opts;
|
||||||
|
data->depthswitch = 4;
|
||||||
data->errors = 0;
|
data->errors = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -67,10 +75,11 @@ FsTreeWalker::~FsTreeWalker()
|
|||||||
delete data;
|
delete data;
|
||||||
}
|
}
|
||||||
|
|
||||||
void FsTreeWalker::setOpts(Options opts)
|
void FsTreeWalker::setOpts(Options opts, int depthswitch)
|
||||||
{
|
{
|
||||||
if (data) {
|
if (data) {
|
||||||
data->options = opts;
|
data->options = opts;
|
||||||
|
data->depthswitch = depthswitch;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -152,20 +161,109 @@ bool FsTreeWalker::inSkippedPaths(const string& path, bool ckparents)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int slashcount(const string& p)
|
||||||
|
{
|
||||||
|
int n = 0;
|
||||||
|
for (unsigned int i = 0; i < p.size(); i++)
|
||||||
|
if (p[i] == '/')
|
||||||
|
n++;
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
FsTreeWalker::Status FsTreeWalker::walk(const string& _top,
|
FsTreeWalker::Status FsTreeWalker::walk(const string& _top,
|
||||||
FsTreeWalkerCB& cb)
|
FsTreeWalkerCB& cb)
|
||||||
{
|
{
|
||||||
string top = (data->options & FtwNoCanon) ? _top : path_canon(_top);
|
string top = (data->options & FtwNoCanon) ? _top : path_canon(_top);
|
||||||
|
|
||||||
|
if ((data->options & FtwTravMask) == 0) {
|
||||||
|
data->options |= FtwTravNatural;
|
||||||
|
}
|
||||||
|
|
||||||
|
int basedepth = slashcount(top); // Only used for breadthThenDepth
|
||||||
|
|
||||||
struct stat st;
|
struct stat st;
|
||||||
int statret;
|
|
||||||
// We always follow symlinks at this point. Makes more sense.
|
// We always follow symlinks at this point. Makes more sense.
|
||||||
statret = stat(top.c_str(), &st);
|
if (stat(top.c_str(), &st) == -1) {
|
||||||
if (statret == -1) {
|
|
||||||
data->logsyserr("stat", top);
|
data->logsyserr("stat", top);
|
||||||
return FtwError;
|
return FtwError;
|
||||||
}
|
}
|
||||||
return iwalk(top, &st, cb);
|
|
||||||
|
// Recursive version, using the call stack to store state. iwalk
|
||||||
|
// will process files and recursively descend into subdirs in
|
||||||
|
// physical order of the current directory.
|
||||||
|
if ((data->options & FtwTravMask) == FtwTravNatural) {
|
||||||
|
return iwalk(top, &st, cb);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Breadth first of filesThenDirs semi-depth first order
|
||||||
|
// Managing lists of directories to be visited later, in breadth or
|
||||||
|
// depth order. Null marker are inserted in the list to indicate
|
||||||
|
// father directory changes (avoids computing parents all the time).
|
||||||
|
data->dirs.push_back(top);
|
||||||
|
Status status;
|
||||||
|
while (!data->dirs.empty()) {
|
||||||
|
string dir, nfather;
|
||||||
|
if (data->options & (FtwTravBreadth|FtwTravBreadthThenDepth)) {
|
||||||
|
// Breadth first, pop and process an older dir at the
|
||||||
|
// front of the list. This will add any child dirs at the
|
||||||
|
// back
|
||||||
|
dir = data->dirs.front();
|
||||||
|
data->dirs.pop_front();
|
||||||
|
if (dir == "") {
|
||||||
|
// Father change marker.
|
||||||
|
if (data->dirs.empty())
|
||||||
|
break;
|
||||||
|
dir = data->dirs.front();
|
||||||
|
data->dirs.pop_front();
|
||||||
|
nfather = path_getfather(dir);
|
||||||
|
if (data->options & FtwTravBreadthThenDepth) {
|
||||||
|
// Check if new depth warrants switch to depth first
|
||||||
|
// traversal (will happen on next loop iteration).
|
||||||
|
int curdepth = slashcount(dir) - basedepth;
|
||||||
|
if (curdepth >= data->depthswitch) {
|
||||||
|
//fprintf(stderr, "SWITCHING TO DEPTH FIRST\n");
|
||||||
|
data->options &= ~FtwTravMask;
|
||||||
|
data->options |= FtwTravFilesThenDirs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Depth first, pop and process latest dir
|
||||||
|
dir = data->dirs.back();
|
||||||
|
data->dirs.pop_back();
|
||||||
|
if (dir == "") {
|
||||||
|
// Father change marker.
|
||||||
|
if (data->dirs.empty())
|
||||||
|
break;
|
||||||
|
dir = data->dirs.back();
|
||||||
|
data->dirs.pop_back();
|
||||||
|
nfather = path_getfather(dir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If changing parent directory, advise our user.
|
||||||
|
if (!nfather.empty()) {
|
||||||
|
if (stat(nfather.c_str(), &st) == -1) {
|
||||||
|
data->logsyserr("stat", nfather);
|
||||||
|
return FtwError;
|
||||||
|
}
|
||||||
|
if ((status = cb.processone(nfather, &st, FtwDirReturn)) &
|
||||||
|
(FtwStop|FtwError)) {
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stat(dir.c_str(), &st) == -1) {
|
||||||
|
data->logsyserr("stat", dir);
|
||||||
|
return FtwError;
|
||||||
|
}
|
||||||
|
// iwalk will not recurse in this case, just process file entries
|
||||||
|
// and append subdir entries to the list.
|
||||||
|
status = iwalk(dir, &st, cb);
|
||||||
|
if (status != FtwOk)
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
return FtwOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note that the 'norecurse' flag is handled as part of the directory read.
|
// Note that the 'norecurse' flag is handled as part of the directory read.
|
||||||
@ -176,8 +274,9 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
|
|||||||
FsTreeWalkerCB& cb)
|
FsTreeWalkerCB& cb)
|
||||||
{
|
{
|
||||||
Status status = FtwOk;
|
Status status = FtwOk;
|
||||||
|
bool nullpush = false;
|
||||||
|
|
||||||
// Handle the parameter
|
// Tell user to process the top entry itself
|
||||||
if (S_ISDIR(stp->st_mode)) {
|
if (S_ISDIR(stp->st_mode)) {
|
||||||
if ((status = cb.processone(top, stp, FtwDirEnter)) &
|
if ((status = cb.processone(top, stp, FtwDirEnter)) &
|
||||||
(FtwStop|FtwError)) {
|
(FtwStop|FtwError)) {
|
||||||
@ -205,6 +304,8 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
|
|||||||
|
|
||||||
struct dirent *ent;
|
struct dirent *ent;
|
||||||
while ((ent = readdir(d)) != 0) {
|
while ((ent = readdir(d)) != 0) {
|
||||||
|
string fn;
|
||||||
|
struct stat st;
|
||||||
// Skip . and ..
|
// Skip . and ..
|
||||||
if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
|
if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
|
||||||
continue;
|
continue;
|
||||||
@ -212,55 +313,60 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
|
|||||||
// Skipped file names match ?
|
// Skipped file names match ?
|
||||||
if (!data->skippedNames.empty()) {
|
if (!data->skippedNames.empty()) {
|
||||||
if (inSkippedNames(ent->d_name))
|
if (inSkippedNames(ent->d_name))
|
||||||
goto skip;
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
string fn = path_cat(top, ent->d_name);
|
|
||||||
|
|
||||||
struct stat st;
|
|
||||||
int statret = (data->options & FtwFollow) ? stat(fn.c_str(), &st) :
|
|
||||||
lstat(fn.c_str(), &st);
|
|
||||||
if (statret == -1) {
|
|
||||||
data->logsyserr("stat", fn);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
if (!data->skippedPaths.empty()) {
|
|
||||||
// We do not check the ancestors. This means that you can have
|
|
||||||
// a topdirs member under a skippedPath, to index a portion of
|
|
||||||
// an ignored area. This is the way it had always worked, but
|
|
||||||
// this was broken by 1.13.00 and the systematic use of
|
|
||||||
// FNM_LEADING_DIR
|
|
||||||
if (inSkippedPaths(fn, false))
|
|
||||||
goto skip;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (S_ISDIR(st.st_mode)) {
|
|
||||||
if (data->options & FtwNoRecurse) {
|
|
||||||
status = cb.processone(fn, &st, FtwDirEnter);
|
|
||||||
} else {
|
|
||||||
status = iwalk(fn, &st, cb);
|
|
||||||
}
|
|
||||||
if (status & (FtwStop|FtwError))
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
// No DirReturn call when not recursing
|
|
||||||
if (!(data->options & FtwNoRecurse))
|
|
||||||
if ((status = cb.processone(top, &st, FtwDirReturn))
|
|
||||||
& (FtwStop|FtwError))
|
|
||||||
goto out;
|
|
||||||
} else if (S_ISREG(st.st_mode)) {
|
|
||||||
if ((status = cb.processone(fn, &st, FtwRegular)) &
|
|
||||||
(FtwStop|FtwError)) {
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
skip: ;
|
fn = path_cat(top, ent->d_name);
|
||||||
|
int statret = (data->options & FtwFollow) ? stat(fn.c_str(), &st) :
|
||||||
|
lstat(fn.c_str(), &st);
|
||||||
|
if (statret == -1) {
|
||||||
|
data->logsyserr("stat", fn);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!data->skippedPaths.empty()) {
|
||||||
|
// We do not check the ancestors. This means that you can have
|
||||||
|
// a topdirs member under a skippedPath, to index a portion of
|
||||||
|
// an ignored area. This is the way it had always worked, but
|
||||||
|
// this was broken by 1.13.00 and the systematic use of
|
||||||
|
// FNM_LEADING_DIR
|
||||||
|
if (inSkippedPaths(fn, false))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// We skip other file types (devices etc...)
|
if (S_ISDIR(st.st_mode)) {
|
||||||
}
|
if (data->options & FtwNoRecurse) {
|
||||||
|
status = cb.processone(fn, &st, FtwDirEnter);
|
||||||
|
} else {
|
||||||
|
if (data->options & FtwTravNatural) {
|
||||||
|
status = iwalk(fn, &st, cb);
|
||||||
|
} else {
|
||||||
|
// If first subdir, push marker to separate
|
||||||
|
// from entries for other dir. This is to help
|
||||||
|
// with generating DirReturn callbacks
|
||||||
|
if (!nullpush) {
|
||||||
|
if (!data->dirs.empty() && data->dirs.back() != "")
|
||||||
|
data->dirs.push_back("");
|
||||||
|
nullpush = true;
|
||||||
|
}
|
||||||
|
data->dirs.push_back(fn);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Note: only recursive case gets here.
|
||||||
|
if (status & (FtwStop|FtwError))
|
||||||
|
goto out;
|
||||||
|
if (!(data->options & FtwNoRecurse))
|
||||||
|
if ((status = cb.processone(top, &st, FtwDirReturn))
|
||||||
|
& (FtwStop|FtwError))
|
||||||
|
goto out;
|
||||||
|
} else if (S_ISREG(st.st_mode)) {
|
||||||
|
if ((status = cb.processone(fn, &st, FtwRegular)) &
|
||||||
|
(FtwStop|FtwError)) {
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// We ignore other file types (devices etc...)
|
||||||
|
} // readdir loop
|
||||||
|
|
||||||
out:
|
out:
|
||||||
if (d)
|
if (d)
|
||||||
@ -283,6 +389,9 @@ static int op_flags;
|
|||||||
#define OPT_P 0x4
|
#define OPT_P 0x4
|
||||||
#define OPT_r 0x8
|
#define OPT_r 0x8
|
||||||
#define OPT_c 0x10
|
#define OPT_c 0x10
|
||||||
|
#define OPT_b 0x20
|
||||||
|
#define OPT_d 0x40
|
||||||
|
#define OPT_m 0x80
|
||||||
|
|
||||||
class myCB : public FsTreeWalkerCB {
|
class myCB : public FsTreeWalkerCB {
|
||||||
public:
|
public:
|
||||||
@ -306,10 +415,25 @@ class myCB : public FsTreeWalkerCB {
|
|||||||
|
|
||||||
static const char *thisprog;
|
static const char *thisprog;
|
||||||
|
|
||||||
|
// Note that breadth first sorting is relatively expensive: less inode
|
||||||
|
// locality, more disk usage (and also more user memory usage, does not appear
|
||||||
|
// here):
|
||||||
|
// y$ time trfstreewalk -d / 2>&1 > /tmp/alld
|
||||||
|
// real 15m32.520s user 0m4.815s sys 0m33.014s
|
||||||
|
// y$ time trfstreewalk -b / 2>&1 > /tmp/allb
|
||||||
|
// real 17m3.380s user 0m4.658s sys 0m34.473s
|
||||||
|
// y$ wc /tmp/allb /tmp/alld
|
||||||
|
// 2618155 3208127 221619857 /tmp/allb
|
||||||
|
// 2618154 3208126 221619847 /tmp/alld
|
||||||
|
// (diff due to changes in /proc)
|
||||||
|
|
||||||
static char usage [] =
|
static char usage [] =
|
||||||
"trfstreewalk [-p pattern] [-P ignpath] [-r] [-c] topdir\n"
|
"trfstreewalk [-p pattern] [-P ignpath] [-r] [-c] topdir\n"
|
||||||
" -r : norecurse\n"
|
" -r : norecurse\n"
|
||||||
" -c : no path canonification\n"
|
" -c : no path canonification\n"
|
||||||
|
" -b : use breadth first walk\n"
|
||||||
|
" -d : use almost depth first (dir files, then subdirs)\n"
|
||||||
|
" -m : use breadth up to 4 deep then switch to -d\n"
|
||||||
;
|
;
|
||||||
static void
|
static void
|
||||||
Usage(void)
|
Usage(void)
|
||||||
@ -332,8 +456,11 @@ int main(int argc, const char **argv)
|
|||||||
Usage();
|
Usage();
|
||||||
while (**argv)
|
while (**argv)
|
||||||
switch (*(*argv)++) {
|
switch (*(*argv)++) {
|
||||||
case 'r': op_flags |= OPT_r; break;
|
case 'b': op_flags |= OPT_b; break;
|
||||||
case 'c': op_flags |= OPT_c; break;
|
case 'c': op_flags |= OPT_c; break;
|
||||||
|
case 'd': op_flags |= OPT_d; break;
|
||||||
|
case 'm': op_flags |= OPT_m; break;
|
||||||
|
case 'r': op_flags |= OPT_r; break;
|
||||||
case 'p': op_flags |= OPT_p; if (argc < 2) Usage();
|
case 'p': op_flags |= OPT_p; if (argc < 2) Usage();
|
||||||
patterns.push_back(*(++argv));
|
patterns.push_back(*(++argv));
|
||||||
argc--;
|
argc--;
|
||||||
@ -356,6 +483,14 @@ int main(int argc, const char **argv)
|
|||||||
opt |= FsTreeWalker::FtwNoRecurse;
|
opt |= FsTreeWalker::FtwNoRecurse;
|
||||||
if (op_flags & OPT_c)
|
if (op_flags & OPT_c)
|
||||||
opt |= FsTreeWalker::FtwNoCanon;
|
opt |= FsTreeWalker::FtwNoCanon;
|
||||||
|
|
||||||
|
if (op_flags & OPT_b)
|
||||||
|
opt |= FsTreeWalker::FtwTravBreadth;
|
||||||
|
else if (op_flags & OPT_d)
|
||||||
|
opt |= FsTreeWalker::FtwTravFilesThenDirs;
|
||||||
|
else if (op_flags & OPT_m)
|
||||||
|
opt |= FsTreeWalker::FtwTravBreadthThenDepth;
|
||||||
|
|
||||||
FsTreeWalker walker(opt);
|
FsTreeWalker walker(opt);
|
||||||
walker.setSkippedNames(patterns);
|
walker.setSkippedNames(patterns);
|
||||||
walker.setSkippedPaths(paths);
|
walker.setSkippedPaths(paths);
|
||||||
|
|||||||
@ -46,12 +46,27 @@ class FsTreeWalker {
|
|||||||
enum Status {FtwOk=0, FtwError=1, FtwStop=2,
|
enum Status {FtwOk=0, FtwError=1, FtwStop=2,
|
||||||
FtwStatAll = FtwError|FtwStop};
|
FtwStatAll = FtwError|FtwStop};
|
||||||
enum Options {FtwOptNone = 0, FtwNoRecurse = 1, FtwFollow = 2,
|
enum Options {FtwOptNone = 0, FtwNoRecurse = 1, FtwFollow = 2,
|
||||||
FtwNoCanon = 4};
|
FtwNoCanon = 4,
|
||||||
|
// Tree walking options. Natural is close to depth first: process
|
||||||
FsTreeWalker(int opts = FtwOptNone);
|
// directory entries as we see them, recursing into subdirectories at
|
||||||
|
// once
|
||||||
|
// Breadth means we process all files and dirs at a given directory level
|
||||||
|
// before going deeper.
|
||||||
|
//
|
||||||
|
// FilesThenDirs is close to Natural, except that we process all files in a
|
||||||
|
// given directory before going deeper: allows keeping only a single
|
||||||
|
// directory open
|
||||||
|
// We don't do pure depth first (process subdirs before files), this does
|
||||||
|
// not appear to make any sense.
|
||||||
|
FtwTravNatural = 0x10000, FtwTravBreadth = 0x20000,
|
||||||
|
FtwTravFilesThenDirs = 0x40000,
|
||||||
|
FtwTravBreadthThenDepth = 0x80000
|
||||||
|
};
|
||||||
|
static const int FtwTravMask;
|
||||||
|
FsTreeWalker(int opts = FtwTravNatural);
|
||||||
~FsTreeWalker();
|
~FsTreeWalker();
|
||||||
|
|
||||||
void setOpts(Options opts);
|
void setOpts(Options opts, int depthswitch = 4);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Begin file system walk.
|
* Begin file system walk.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user