/* Copyright (C) 2004-2019 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the * Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include "autoconfig.h" #include #include #include #include #include "safesysstat.h" #include #include #include #include #include #include #include "cstr.h" #include "log.h" #include "pathut.h" #include "fstreewalk.h" #include "transcode.h" using namespace std; bool FsTreeWalker::o_useFnmPathname = true; string FsTreeWalker::o_nowalkfn; const int FsTreeWalker::FtwTravMask = FtwTravNatural| FtwTravBreadth|FtwTravFilesThenDirs|FtwTravBreadthThenDepth; #ifndef _WIN32 // dev/ino means nothing on Windows. It seems that FileId could replace it // but we only use this for cycle detection which we just disable. class DirId { public: dev_t dev; ino_t ino; DirId(dev_t d, ino_t i) : dev(d), ino(i) {} bool operator<(const DirId& r) const { return dev < r.dev || (dev == r.dev && ino < r.ino); } }; #endif class FsTreeWalker::Internal { public: Internal(int opts) : options(opts), depthswitch(4), maxdepth(-1), errors(0) { } int options; int depthswitch; int maxdepth; int basedepth; stringstream reason; vector skippedNames; vector onlyNames; vector skippedPaths; // When doing Breadth or FilesThenDirs traversal, we keep a list // of directory paths to be processed, and we do not recurse. deque dirs; int errors; #ifndef _WIN32 set donedirs; #endif void logsyserr(const char *call, const string ¶m) { errors++; reason << call << "(" << param << ") : " << errno << " : " << strerror(errno) << endl; } }; FsTreeWalker::FsTreeWalker(int opts) { data = new Internal(opts); } FsTreeWalker::~FsTreeWalker() { delete data; } void FsTreeWalker::setOpts(int opts) { if (data) { data->options = opts; } } int FsTreeWalker::getOpts() { if (data) { return data->options; } else { return 0; } } void FsTreeWalker::setDepthSwitch(int ds) { if (data) { data->depthswitch = ds; } } void FsTreeWalker::setMaxDepth(int md) { if (data) { data->maxdepth = md; } } string FsTreeWalker::getReason() { string reason = data->reason.str(); data->reason.str(string()); data->errors = 0; return reason; } int FsTreeWalker::getErrCnt() { return data->errors; } bool FsTreeWalker::addSkippedName(const string& pattern) { if (find(data->skippedNames.begin(), data->skippedNames.end(), pattern) == data->skippedNames.end()) data->skippedNames.push_back(pattern); return true; } bool FsTreeWalker::setSkippedNames(const vector &patterns) { data->skippedNames = patterns; return true; } bool FsTreeWalker::inSkippedNames(const string& name) { for (const auto& pattern : data->skippedNames) { if (fnmatch(pattern.c_str(), name.c_str(), 0) == 0) { return true; } } return false; } bool FsTreeWalker::setOnlyNames(const vector &patterns) { data->onlyNames = patterns; return true; } bool FsTreeWalker::inOnlyNames(const string& name) { if (data->onlyNames.empty()) { // Not set: all match return true; } for (const auto& pattern : data->onlyNames) { if (fnmatch(pattern.c_str(), name.c_str(), 0) == 0) { return true; } } return false; } bool FsTreeWalker::addSkippedPath(const string& ipath) { string path = (data->options & FtwNoCanon) ? ipath : path_canon(ipath); if (find(data->skippedPaths.begin(), data->skippedPaths.end(), path) == data->skippedPaths.end()) data->skippedPaths.push_back(path); return true; } bool FsTreeWalker::setSkippedPaths(const vector &paths) { data->skippedPaths = paths; for (vector::iterator it = data->skippedPaths.begin(); it != data->skippedPaths.end(); it++) if (!(data->options & FtwNoCanon)) *it = path_canon(*it); return true; } bool FsTreeWalker::inSkippedPaths(const string& path, bool ckparents) { int fnmflags = o_useFnmPathname ? FNM_PATHNAME : 0; #ifdef FNM_LEADING_DIR if (ckparents) fnmflags |= FNM_LEADING_DIR; #endif for (vector::const_iterator it = data->skippedPaths.begin(); it != data->skippedPaths.end(); it++) { #ifndef FNM_LEADING_DIR if (ckparents) { string mpath = path; while (mpath.length() > 2) { if (fnmatch(it->c_str(), mpath.c_str(), fnmflags) == 0) return true; mpath = path_getfather(mpath); } } else #endif /* FNM_LEADING_DIR */ if (fnmatch(it->c_str(), path.c_str(), fnmflags) == 0) { return true; } } return false; } static inline int slashcount(const string& p) { int n = 0; for (unsigned int i = 0; i < p.size(); i++) if (p[i] == '/') n++; return n; } FsTreeWalker::Status FsTreeWalker::walk(const string& _top, FsTreeWalkerCB& cb) { string top = (data->options & FtwNoCanon) ? _top : path_canon(_top); if ((data->options & FtwTravMask) == 0) { data->options |= FtwTravNatural; } data->basedepth = slashcount(top); // Only used for breadthxx struct stat st; // We always follow symlinks at this point. Makes more sense. if (path_fileprops(top, &st) == -1) { // Note that we do not return an error if the stat call // fails. A temp file may have gone away. data->logsyserr("stat", top); return errno == ENOENT ? FtwOk : FtwError; } // Recursive version, using the call stack to store state. iwalk // will process files and recursively descend into subdirs in // physical order of the current directory. if ((data->options & FtwTravMask) == FtwTravNatural) { return iwalk(top, &st, cb); } // Breadth first of filesThenDirs semi-depth first order // Managing queues of directories to be visited later, in breadth or // depth order. Null marker are inserted in the queue to indicate // father directory changes (avoids computing parents all the time). data->dirs.push_back(top); Status status; while (!data->dirs.empty()) { string dir, nfather; if (data->options & (FtwTravBreadth|FtwTravBreadthThenDepth)) { // Breadth first, pop and process an older dir at the // front of the queue. This will add any child dirs at the // back dir = data->dirs.front(); data->dirs.pop_front(); if (dir.empty()) { // Father change marker. if (data->dirs.empty()) break; dir = data->dirs.front(); data->dirs.pop_front(); nfather = path_getfather(dir); if (data->options & FtwTravBreadthThenDepth) { // Check if new depth warrants switch to depth first // traversal (will happen on next loop iteration). int curdepth = slashcount(dir) - data->basedepth; if (curdepth >= data->depthswitch) { //fprintf(stderr, "SWITCHING TO DEPTH FIRST\n"); data->options &= ~FtwTravMask; data->options |= FtwTravFilesThenDirs; } } } } else { // Depth first, pop and process latest dir dir = data->dirs.back(); data->dirs.pop_back(); if (dir.empty()) { // Father change marker. if (data->dirs.empty()) break; dir = data->dirs.back(); data->dirs.pop_back(); nfather = path_getfather(dir); } } // If changing parent directory, advise our user. if (!nfather.empty()) { if (path_fileprops(nfather, &st) == -1) { data->logsyserr("stat", nfather); return errno == ENOENT ? FtwOk : FtwError; } if ((status = cb.processone(nfather, &st, FtwDirReturn)) & (FtwStop|FtwError)) { return status; } } if (path_fileprops(dir, &st) == -1) { data->logsyserr("stat", dir); return errno == ENOENT ? FtwOk : FtwError; } // iwalk will not recurse in this case, just process file entries // and append subdir entries to the queue. status = iwalk(dir, &st, cb); if (status != FtwOk) return status; } return FtwOk; } #ifdef _WIN32 #define DIRENT _wdirent #define DIRHDL _WDIR #define OPENDIR _wopendir #define CLOSEDIR _wclosedir #define READDIR _wreaddir #else #define DIRENT dirent #define DIRHDL DIR #define OPENDIR opendir #define CLOSEDIR closedir #define READDIR readdir #endif // Note that the 'norecurse' flag is handled as part of the directory read. // This means that we always go into the top 'walk()' parameter if it is a // directory, even if norecurse is set. Bug or Feature ? FsTreeWalker::Status FsTreeWalker::iwalk(const string &top, struct stat *stp, FsTreeWalkerCB& cb) { Status status = FtwOk; bool nullpush = false; // Tell user to process the top entry itself if (S_ISDIR(stp->st_mode)) { if ((status = cb.processone(top, stp, FtwDirEnter)) & (FtwStop|FtwError)) { return status; } } else if (S_ISREG(stp->st_mode)) { return cb.processone(top, stp, FtwRegular); } else { return status; } int curdepth = slashcount(top) - data->basedepth; if (data->maxdepth >= 0 && curdepth >= data->maxdepth) { LOGDEB1("FsTreeWalker::iwalk: Maxdepth reached: [" << (top) << "]\n" ); return status; } // This is a directory, read it and process entries: #ifndef _WIN32 // Detect if directory already seen. This could just be several // symlinks pointing to the same place (if FtwFollow is set), it // could also be some other kind of cycle. In any case, there is // no point in entering again. // For now, we'll ignore the "other kind of cycle" part and only monitor // this is FtwFollow is set if (data->options & FtwFollow) { DirId dirid(stp->st_dev, stp->st_ino); if (data->donedirs.find(dirid) != data->donedirs.end()) { LOGINFO("Not processing [" << top << "] (already seen as other path)\n"); return status; } data->donedirs.insert(dirid); } #endif SYSPATH(top, systop); DIRHDL *d = OPENDIR(systop); if (nullptr == d) { data->logsyserr("opendir", top); #ifdef _WIN32 int rc = GetLastError(); LOGERR("opendir failed: LastError " << rc << endl); if (rc == ERROR_NETNAME_DELETED) { // 64: share disconnected. // Not too sure of the errno in this case. // Make sure it's not one of the permissible ones errno = ENODEV; } #endif switch (errno) { case EPERM: case EACCES: case ENOENT: #ifdef _WIN32 // We get this quite a lot, don't know why. To be checked. case EINVAL: #endif // No error set: indexing will continue in other directories goto out; default: status = FtwError; goto out; } } struct DIRENT *ent; while (errno = 0, ((ent = READDIR(d)) != 0)) { string fn; struct stat st; #ifdef _WIN32 string sdname; if (!wchartoutf8(ent->d_name, sdname)) { LOGERR("wchartoutf8 failed in " << top << endl); continue; } const char *dname = sdname.c_str(); #else const char *dname = ent->d_name; #endif // Maybe skip dotfiles if ((data->options & FtwSkipDotFiles) && dname[0] == '.') continue; // Skip . and .. if (!strcmp(dname, ".") || !strcmp(dname, "..")) continue; // Skipped file names match ? if (!data->skippedNames.empty()) { if (inSkippedNames(dname)) continue; } fn = path_cat(top, dname); int statret = path_fileprops(fn.c_str(), &st, data->options&FtwFollow); if (statret == -1) { data->logsyserr("stat", fn); #ifdef _WIN32 int rc = GetLastError(); LOGERR("stat failed: LastError " << rc << endl); if (rc == ERROR_NETNAME_DELETED) { status = FtwError; goto out; } #endif continue; } if (!data->skippedPaths.empty()) { // We do not check the ancestors. This means that you can have // a topdirs member under a skippedPath, to index a portion of // an ignored area. This is the way it had always worked, but // this was broken by 1.13.00 and the systematic use of // FNM_LEADING_DIR if (inSkippedPaths(fn, false)) continue; } if (S_ISDIR(st.st_mode)) { if (!o_nowalkfn.empty() && path_exists(path_cat(fn, o_nowalkfn))) { continue; } if (data->options & FtwNoRecurse) { status = cb.processone(fn, &st, FtwDirEnter); } else { if (data->options & FtwTravNatural) { status = iwalk(fn, &st, cb); } else { // If first subdir, push marker to separate // from entries for other dir. This is to help // with generating DirReturn callbacks if (!nullpush) { if (!data->dirs.empty() && !data->dirs.back().empty()) data->dirs.push_back(cstr_null); nullpush = true; } data->dirs.push_back(fn); continue; } } // Note: only recursive case gets here. if (status & (FtwStop|FtwError)) goto out; if (!(data->options & FtwNoRecurse)) if ((status = cb.processone(top, &st, FtwDirReturn)) & (FtwStop|FtwError)) goto out; } else if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) { // Filtering patterns match ? if (!data->onlyNames.empty()) { if (!inOnlyNames(dname)) continue; } if ((status = cb.processone(fn, &st, FtwRegular)) & (FtwStop|FtwError)) { goto out; } } // We ignore other file types (devices etc...) } // readdir loop if (errno) { // Actual readdir error, not eof. data->logsyserr("readdir", top); #ifdef _WIN32 int rc = GetLastError(); LOGERR("Readdir failed: LastError " << rc << endl); if (rc == ERROR_NETNAME_DELETED) { status = FtwError; goto out; } #endif } out: if (d) CLOSEDIR(d); return status; } int64_t fsTreeBytes(const string& topdir) { class bytesCB : public FsTreeWalkerCB { public: FsTreeWalker::Status processone(const string &path, const struct stat *st, FsTreeWalker::CbFlag flg) { if (flg == FsTreeWalker::FtwDirEnter || flg == FsTreeWalker::FtwRegular) { #ifdef _WIN32 totalbytes += st->st_size; #else totalbytes += st->st_blocks * 512; #endif } return FsTreeWalker::FtwOk; } int64_t totalbytes{0}; }; FsTreeWalker walker; bytesCB cb; FsTreeWalker::Status status = walker.walk(topdir, cb); if (status != FsTreeWalker::FtwOk) { LOGERR("fsTreeBytes: walker failed: " << walker.getReason() << endl); return -1; } return cb.totalbytes; }