Allow defining a file name which causes directory skip if present
This commit is contained in:
parent
d538ba3190
commit
c93581201a
@ -397,6 +397,11 @@ bool RclConfig::updateMainConfig()
|
|||||||
if (getConfParam("skippedPathsFnmPathname", &bvalue) && bvalue == false) {
|
if (getConfParam("skippedPathsFnmPathname", &bvalue) && bvalue == false) {
|
||||||
FsTreeWalker::setNoFnmPathname();
|
FsTreeWalker::setNoFnmPathname();
|
||||||
}
|
}
|
||||||
|
string nowalkfn;
|
||||||
|
getConfParam("nowalkfn", nowalkfn);
|
||||||
|
if (!nowalkfn.empty()) {
|
||||||
|
FsTreeWalker::setNoWalkFn(nowalkfn);
|
||||||
|
}
|
||||||
|
|
||||||
static int m_index_stripchars_init = 0;
|
static int m_index_stripchars_init = 0;
|
||||||
if (!m_index_stripchars_init) {
|
if (!m_index_stripchars_init) {
|
||||||
|
|||||||
@ -11,11 +11,10 @@ $HOME). You can use symbolic links in the list, they will be followed,
|
|||||||
independantly of the value of the followLinks variable.</para></listitem></varlistentry>
|
independantly of the value of the followLinks variable.</para></listitem></varlistentry>
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONITORDIRS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONITORDIRS">
|
||||||
<term><varname>monitordirs</varname></term>
|
<term><varname>monitordirs</varname></term>
|
||||||
<listitem><para>(1.24) Space-separated list of
|
<listitem><para>Space-separated list of files or directories to monitor for
|
||||||
files or directories to monitor for updates. When running
|
updates. When running the real-time indexer, this allows monitoring only a
|
||||||
the real-time indexer, this allows monitoring only a subset of the whole
|
subset of the whole indexed area. The elements must be included in the
|
||||||
indexed area. The elements must be included in the tree defined by the
|
tree defined by the 'topdirs' members.</para></listitem></varlistentry>
|
||||||
'topdirs' members.</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SKIPPEDNAMES">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SKIPPEDNAMES">
|
||||||
<term><varname>skippedNames</varname></term>
|
<term><varname>skippedNames</varname></term>
|
||||||
<listitem><para>Files and directories which should be ignored.
|
<listitem><para>Files and directories which should be ignored.
|
||||||
@ -78,6 +77,10 @@ this.</para></listitem></varlistentry>
|
|||||||
<listitem><para>Set to 0 to
|
<listitem><para>Set to 0 to
|
||||||
override use of FNM_PATHNAME for matching skipped
|
override use of FNM_PATHNAME for matching skipped
|
||||||
paths. </para></listitem></varlistentry>
|
paths. </para></listitem></varlistentry>
|
||||||
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOWALKFN">
|
||||||
|
<term><varname>nowalkfn</varname></term>
|
||||||
|
<listitem><para>File name which will cause its parent directory to be skipped. Any directory containing a file with this name will be skipped as
|
||||||
|
if it was part of the skippedPaths list. Ex: .recoll-noindex</para></listitem></varlistentry>
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DAEMSKIPPEDPATHS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DAEMSKIPPEDPATHS">
|
||||||
<term><varname>daemSkippedPaths</varname></term>
|
<term><varname>daemSkippedPaths</varname></term>
|
||||||
<listitem><para>skippedPaths equivalent specific to
|
<listitem><para>skippedPaths equivalent specific to
|
||||||
|
|||||||
@ -114,6 +114,13 @@ skippedPaths = /media
|
|||||||
# paths.</brief><descr></descr></var>
|
# paths.</brief><descr></descr></var>
|
||||||
#skippedPathsFnmPathname = 1
|
#skippedPathsFnmPathname = 1
|
||||||
|
|
||||||
|
# <var name="nowalkfn" type="string">
|
||||||
|
#
|
||||||
|
# <brief>File name which will cause its parent directory to be skipped.</brief>
|
||||||
|
# <descr>Any directory containing a file with this name will be skipped as
|
||||||
|
# if it was part of the skippedPaths list. Ex: .recoll-noindex</descr></var>
|
||||||
|
#nowalkfn = .recoll-noindex
|
||||||
|
|
||||||
# <var name="daemSkippedPaths" type="string">
|
# <var name="daemSkippedPaths" type="string">
|
||||||
#
|
#
|
||||||
# <brief>skippedPaths equivalent specific to
|
# <brief>skippedPaths equivalent specific to
|
||||||
|
|||||||
@ -37,7 +37,14 @@ AM_CPPFLAGS = -Wall -Wno-unused -std=c++11 \
|
|||||||
-D_GNU_SOURCE \
|
-D_GNU_SOURCE \
|
||||||
$(DEFS)
|
$(DEFS)
|
||||||
|
|
||||||
noinst_PROGRAMS = textsplit
|
noinst_PROGRAMS = textsplit utf8iter fstreewalk
|
||||||
|
|
||||||
textsplit_SOURCES = trtextsplit.cpp
|
textsplit_SOURCES = trtextsplit.cpp
|
||||||
textsplit_LDADD = ../librecoll.la
|
textsplit_LDADD = ../librecoll.la
|
||||||
|
|
||||||
|
utf8iter_SOURCES = trutf8iter.cpp
|
||||||
|
utf8iter_LDADD = ../librecoll.la
|
||||||
|
|
||||||
|
fstreewalk_SOURCES = trfstreewalk.cpp
|
||||||
|
fstreewalk_LDADD = ../librecoll.la
|
||||||
|
|
||||||
|
|||||||
@ -17,8 +17,6 @@
|
|||||||
|
|
||||||
#include "autoconfig.h"
|
#include "autoconfig.h"
|
||||||
|
|
||||||
#ifndef TEST_FSTREEWALK
|
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <dirent.h>
|
#include <dirent.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
@ -41,6 +39,7 @@
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
bool FsTreeWalker::o_useFnmPathname = true;
|
bool FsTreeWalker::o_useFnmPathname = true;
|
||||||
|
string FsTreeWalker::o_nowalkfn;
|
||||||
|
|
||||||
const int FsTreeWalker::FtwTravMask = FtwTravNatural|
|
const int FsTreeWalker::FtwTravMask = FtwTravNatural|
|
||||||
FtwTravBreadth|FtwTravFilesThenDirs|FtwTravBreadthThenDepth;
|
FtwTravBreadth|FtwTravFilesThenDirs|FtwTravBreadthThenDepth;
|
||||||
@ -63,8 +62,7 @@ public:
|
|||||||
class FsTreeWalker::Internal {
|
class FsTreeWalker::Internal {
|
||||||
public:
|
public:
|
||||||
Internal(int opts)
|
Internal(int opts)
|
||||||
: options(opts), depthswitch(4), maxdepth(-1), errors(0)
|
: options(opts), depthswitch(4), maxdepth(-1), errors(0) {
|
||||||
{
|
|
||||||
}
|
}
|
||||||
int options;
|
int options;
|
||||||
int depthswitch;
|
int depthswitch;
|
||||||
@ -80,8 +78,7 @@ public:
|
|||||||
#ifndef _WIN32
|
#ifndef _WIN32
|
||||||
set<DirId> donedirs;
|
set<DirId> donedirs;
|
||||||
#endif
|
#endif
|
||||||
void logsyserr(const char *call, const string ¶m)
|
void logsyserr(const char *call, const string ¶m) {
|
||||||
{
|
|
||||||
errors++;
|
errors++;
|
||||||
reason << call << "(" << param << ") : " << errno << " : " <<
|
reason << call << "(" << param << ") : " << errno << " : " <<
|
||||||
strerror(errno) << endl;
|
strerror(errno) << endl;
|
||||||
@ -436,6 +433,9 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (S_ISDIR(st.st_mode)) {
|
if (S_ISDIR(st.st_mode)) {
|
||||||
|
if (!o_nowalkfn.empty() && path_exists(path_cat(fn, o_nowalkfn))) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (data->options & FtwNoRecurse) {
|
if (data->options & FtwNoRecurse) {
|
||||||
status = cb.processone(fn, &st, FtwDirEnter);
|
status = cb.processone(fn, &st, FtwDirEnter);
|
||||||
} else {
|
} else {
|
||||||
@ -506,185 +506,3 @@ int64_t fsTreeBytes(const string& topdir)
|
|||||||
}
|
}
|
||||||
return cb.totalbytes;
|
return cb.totalbytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
#else // TEST_FSTREEWALK
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
#include "rclinit.h"
|
|
||||||
#include "rclconfig.h"
|
|
||||||
#include "fstreewalk.h"
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
static int op_flags;
|
|
||||||
#define OPT_MOINS 0x1
|
|
||||||
#define OPT_p 0x2
|
|
||||||
#define OPT_P 0x4
|
|
||||||
#define OPT_r 0x8
|
|
||||||
#define OPT_c 0x10
|
|
||||||
#define OPT_b 0x20
|
|
||||||
#define OPT_d 0x40
|
|
||||||
#define OPT_m 0x80
|
|
||||||
#define OPT_L 0x100
|
|
||||||
#define OPT_w 0x200
|
|
||||||
#define OPT_M 0x400
|
|
||||||
#define OPT_D 0x800
|
|
||||||
#define OPT_k 0x1000
|
|
||||||
class myCB : public FsTreeWalkerCB {
|
|
||||||
public:
|
|
||||||
FsTreeWalker::Status processone(const string &path,
|
|
||||||
const struct stat *st,
|
|
||||||
FsTreeWalker::CbFlag flg)
|
|
||||||
{
|
|
||||||
if (flg == FsTreeWalker::FtwDirEnter) {
|
|
||||||
if (op_flags & OPT_r)
|
|
||||||
cout << path << endl;
|
|
||||||
else
|
|
||||||
cout << "[Entering " << path << "]" << endl;
|
|
||||||
} else if (flg == FsTreeWalker::FtwDirReturn) {
|
|
||||||
cout << "[Returning to " << path << "]" << endl;
|
|
||||||
} else if (flg == FsTreeWalker::FtwRegular) {
|
|
||||||
cout << path << endl;
|
|
||||||
}
|
|
||||||
return FsTreeWalker::FtwOk;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
static const char *thisprog;
|
|
||||||
|
|
||||||
// Note that breadth first sorting is relatively expensive: less inode
|
|
||||||
// locality, more disk usage (and also more user memory usage, does
|
|
||||||
// not appear here). Some typical results on a real tree with 2.6
|
|
||||||
// million entries (220MB of name data)
|
|
||||||
// Recoll 1.13
|
|
||||||
// time trfstreewalk / > /data/tmp/old
|
|
||||||
// real 13m32.839s user 0m4.443s sys 0m31.128s
|
|
||||||
//
|
|
||||||
// Recoll 1.14
|
|
||||||
// time trfstreewalk / > /data/tmp/nat;
|
|
||||||
// real 13m28.685s user 0m4.430s sys 0m31.083s
|
|
||||||
// time trfstreewalk -d / > /data/tmp/depth;
|
|
||||||
// real 13m30.051s user 0m4.140s sys 0m33.862s
|
|
||||||
// time trfstreewalk -m / > /data/tmp/mixed;
|
|
||||||
// real 14m53.245s user 0m4.244s sys 0m34.494s
|
|
||||||
// time trfstreewalk -b / > /data/tmp/breadth;
|
|
||||||
// real 17m10.585s user 0m4.532s sys 0m35.033s
|
|
||||||
|
|
||||||
static char usage [] =
|
|
||||||
"trfstreewalk [-p pattern] [-P ignpath] [-r] [-c] [-L] topdir\n"
|
|
||||||
" -r : norecurse\n"
|
|
||||||
" -c : no path canonification\n"
|
|
||||||
" -L : follow symbolic links\n"
|
|
||||||
" -b : use breadth first walk\n"
|
|
||||||
" -d : use almost depth first (dir files, then subdirs)\n"
|
|
||||||
" -m : use breadth up to 4 deep then switch to -d\n"
|
|
||||||
" -w : unset default FNM_PATHNAME when using fnmatch() to match skipped paths\n"
|
|
||||||
" -M <depth>: limit depth (works with -b/m/d)\n"
|
|
||||||
" -D : skip dotfiles\n"
|
|
||||||
"-k : like du\n"
|
|
||||||
;
|
|
||||||
static void
|
|
||||||
Usage(void)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "%s: usage:\n%s", thisprog, usage);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, const char **argv)
|
|
||||||
{
|
|
||||||
vector<string> patterns;
|
|
||||||
vector<string> paths;
|
|
||||||
int maxdepth = -1;
|
|
||||||
|
|
||||||
thisprog = argv[0];
|
|
||||||
argc--; argv++;
|
|
||||||
while (argc > 0 && **argv == '-') {
|
|
||||||
(*argv)++;
|
|
||||||
if (!(**argv))
|
|
||||||
/* Cas du "adb - core" */
|
|
||||||
Usage();
|
|
||||||
while (**argv)
|
|
||||||
switch (*(*argv)++) {
|
|
||||||
case 'b': op_flags |= OPT_b; break;
|
|
||||||
case 'c': op_flags |= OPT_c; break;
|
|
||||||
case 'd': op_flags |= OPT_d; break;
|
|
||||||
case 'D': op_flags |= OPT_D; break;
|
|
||||||
case 'k': op_flags |= OPT_k; break;
|
|
||||||
case 'L': op_flags |= OPT_L; break;
|
|
||||||
case 'm': op_flags |= OPT_m; break;
|
|
||||||
case 'M': op_flags |= OPT_M; if (argc < 2) Usage();
|
|
||||||
maxdepth = atoi(*(++argv));
|
|
||||||
argc--;
|
|
||||||
goto b1;
|
|
||||||
case 'p': op_flags |= OPT_p; if (argc < 2) Usage();
|
|
||||||
patterns.push_back(*(++argv));
|
|
||||||
argc--;
|
|
||||||
goto b1;
|
|
||||||
case 'P': op_flags |= OPT_P; if (argc < 2) Usage();
|
|
||||||
paths.push_back(*(++argv));
|
|
||||||
argc--;
|
|
||||||
goto b1;
|
|
||||||
case 'r': op_flags |= OPT_r; break;
|
|
||||||
case 'w': op_flags |= OPT_w; break;
|
|
||||||
default: Usage(); break;
|
|
||||||
}
|
|
||||||
b1: argc--; argv++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (argc != 1)
|
|
||||||
Usage();
|
|
||||||
string topdir = *argv++;argc--;
|
|
||||||
|
|
||||||
if (op_flags & OPT_k) {
|
|
||||||
int64_t bytes = fsTreeBytes(topdir);
|
|
||||||
if (bytes < 0) {
|
|
||||||
cerr << "fsTreeBytes failed\n";
|
|
||||||
return 1;
|
|
||||||
} else {
|
|
||||||
cout << bytes / 1024 << "\t" << topdir << endl;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int opt = 0;
|
|
||||||
if (op_flags & OPT_r)
|
|
||||||
opt |= FsTreeWalker::FtwNoRecurse;
|
|
||||||
if (op_flags & OPT_c)
|
|
||||||
opt |= FsTreeWalker::FtwNoCanon;
|
|
||||||
if (op_flags & OPT_L)
|
|
||||||
opt |= FsTreeWalker::FtwFollow;
|
|
||||||
if (op_flags & OPT_D)
|
|
||||||
opt |= FsTreeWalker::FtwSkipDotFiles;
|
|
||||||
|
|
||||||
if (op_flags & OPT_b)
|
|
||||||
opt |= FsTreeWalker::FtwTravBreadth;
|
|
||||||
else if (op_flags & OPT_d)
|
|
||||||
opt |= FsTreeWalker::FtwTravFilesThenDirs;
|
|
||||||
else if (op_flags & OPT_m)
|
|
||||||
opt |= FsTreeWalker::FtwTravBreadthThenDepth;
|
|
||||||
|
|
||||||
string reason;
|
|
||||||
if (!recollinit(0, 0, reason)) {
|
|
||||||
fprintf(stderr, "Init failed: %s\n", reason.c_str());
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
if (op_flags & OPT_w) {
|
|
||||||
FsTreeWalker::setNoFnmPathname();
|
|
||||||
}
|
|
||||||
FsTreeWalker walker;
|
|
||||||
walker.setOpts(opt);
|
|
||||||
walker.setMaxDepth(maxdepth);
|
|
||||||
walker.setSkippedNames(patterns);
|
|
||||||
walker.setSkippedPaths(paths);
|
|
||||||
myCB cb;
|
|
||||||
walker.walk(topdir, cb);
|
|
||||||
if (walker.getErrCnt() > 0)
|
|
||||||
cout << walker.getReason();
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // TEST_FSTREEWALK
|
|
||||||
|
|||||||
@ -49,6 +49,15 @@ class FsTreeWalker {
|
|||||||
o_useFnmPathname = false;
|
o_useFnmPathname = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Global option to observe a "nowalk" file, which makes us treat
|
||||||
|
// directories as if they were in skippedPaths) if the file exists
|
||||||
|
// inside the directory.
|
||||||
|
static std::string o_nowalkfn;
|
||||||
|
static void setNoWalkFn(const std::string& nowalkfn)
|
||||||
|
{
|
||||||
|
o_nowalkfn = nowalkfn;
|
||||||
|
}
|
||||||
|
|
||||||
// Flags for call to processone(). FtwDirEnter is used when
|
// Flags for call to processone(). FtwDirEnter is used when
|
||||||
// entering a directory. FtwDirReturn is used when returning to it
|
// entering a directory. FtwDirReturn is used when returning to it
|
||||||
// after processing a subdirectory.
|
// after processing a subdirectory.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user