From 3759c0b52d8b5fd13cc8a3c0fa2b69209672503e Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Wed, 30 Nov 2011 16:36:51 +0100 Subject: [PATCH] index: add skippedPathsFnmPathname variable to enable disabling the use of FNM_PATHNAME while matching in skippedPaths. Closes issue #67 --- src/common/rclconfig.cpp | 17 +++++++++++++---- src/doc/user/usermanual.sgml | 19 +++++++++++++------ src/sampleconf/recoll.conf.in | 6 ++++++ src/utils/Makefile | 2 +- src/utils/circache.cpp | 2 +- src/utils/fstreewalk.cpp | 14 ++++++++++++-- src/utils/fstreewalk.h | 12 ++++++++++++ website/index.html.en | 30 ++++++++++++------------------ 8 files changed, 70 insertions(+), 32 deletions(-) diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index 35b043f7..9b429d4f 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -43,6 +43,7 @@ #include "smallut.h" #include "textsplit.h" #include "readfile.h" +#include "fstreewalk.h" #ifndef NO_NAMESPACES using namespace std; @@ -200,6 +201,12 @@ bool RclConfig::updateMainConfig() m_rmtstate.init(this, 0, "indexedmimetypes"); return false; } + delete m_conf; + m_conf = newconf; + m_skpnstate.init(this, m_conf, "skippedNames"); + m_rmtstate.init(this, m_conf, "indexedmimetypes"); + + setKeyDir(cstr_null); bool nocjk = false; if (getConfParam("nocjk", &nocjk) && nocjk == true) { @@ -218,10 +225,12 @@ bool RclConfig::updateMainConfig() TextSplit::noNumbers(); } - delete m_conf; - m_conf = newconf; - m_skpnstate.init(this, m_conf, "skippedNames"); - m_rmtstate.init(this, m_conf, "indexedmimetypes"); + bool fnmpathname = true; + if (getConfParam("skippedPathsFnmPathname", &fnmpathname) + && fnmpathname == false) { + FsTreeWalker::setNoFnmPathname(); + } + return true; } diff --git a/src/doc/user/usermanual.sgml b/src/doc/user/usermanual.sgml index a3e258a4..1303cec2 100644 --- a/src/doc/user/usermanual.sgml +++ b/src/doc/user/usermanual.sgml @@ -3441,12 +3441,19 @@ skippedNames = #* bin CVS Cache cache* caughtspam tmp .thumbnails .svn \ skippedPaths = ~/somedir/∗.txt - The values in the *skippedPaths - variables are currently matched with - fnmatch(3), with the FNM_PATHNAME and - FNM_LEADING_DIR flags. This means that '/' characters must - be matched explicitely, which is probably - unfortunate. + + + + + skippedPathsFnmPathname + The values in the + *skippedPaths variables are matched by + default with fnmatch(3), with the + FNM_PATHNAME and FNM_LEADING_DIR flags. This means that '/' + characters must be matched explicitely. You can set + skippedPathsFnmPathname to 0 to disable + the use of FNM_PATHNAME (meaning that /*/dir3 will match + /dir1/dir2/dir3). diff --git a/src/sampleconf/recoll.conf.in b/src/sampleconf/recoll.conf.in index 12fea8a8..d36fefd7 100644 --- a/src/sampleconf/recoll.conf.in +++ b/src/sampleconf/recoll.conf.in @@ -29,6 +29,12 @@ skippedNames = #* bin CVS Cache cache* caughtspam tmp .thumbnails .svn \ # not set, the daemon uses skippedPaths. #daemSkippedPaths = +# Recoll uses FNM_PATHNAME by default when matching skipped paths, which +# means that /dir1/dir2/dir3 is not matched by */dir3. Can't change the +# default now, but you can set the following variable to 0 to disable the +# use of FNM_PATHNAME (see fnmatch(3) man page) +# skippedPathsFnmPathname = 1 + # Option to follow symbolic links. We normally don't, to avoid duplicated # indexing (in any case, no effort is made to identify or avoid multiple # indexing of linked files) diff --git a/src/utils/Makefile b/src/utils/Makefile index 9132777f..3562f242 100644 --- a/src/utils/Makefile +++ b/src/utils/Makefile @@ -11,7 +11,7 @@ $(BIGLIB): force cd $(depth)/lib;$(MAKE) force: -FSTREEWALK_OBJS= trfstreewalk.o fstreewalk.o $(BIGLIB) +FSTREEWALK_OBJS= trfstreewalk.o $(BIGLIB) trfstreewalk : $(FSTREEWALK_OBJS) $(CXX) -o trfstreewalk $(FSTREEWALK_OBJS) $(LIBICONV) \ $(LIBSYS) diff --git a/src/utils/circache.cpp b/src/utils/circache.cpp index 9b6fc749..eeefcef0 100644 --- a/src/utils/circache.cpp +++ b/src/utils/circache.cpp @@ -1301,7 +1301,7 @@ int main(int argc, char **argv) exit(1); } string udi; - make_udi(fn, cstr_null, udi); + make_udi(fn, "", udi); sprintf(dic, "#whatever...\nmimetype = text/plain\nudi=%s\n", udi.c_str()); string sdic; diff --git a/src/utils/fstreewalk.cpp b/src/utils/fstreewalk.cpp index 9b916741..35a48eaf 100644 --- a/src/utils/fstreewalk.cpp +++ b/src/utils/fstreewalk.cpp @@ -40,6 +40,8 @@ using namespace std; #endif /* NO_NAMESPACES */ +bool FsTreeWalker::o_useFnmPathname = true; + const int FsTreeWalker::FtwTravMask = FtwTravNatural| FtwTravBreadth|FtwTravFilesThenDirs|FtwTravBreadthThenDepth; @@ -153,7 +155,7 @@ bool FsTreeWalker::setSkippedPaths(const list &paths) } bool FsTreeWalker::inSkippedPaths(const string& path, bool ckparents) { - int fnmflags = FNM_PATHNAME; + int fnmflags = o_useFnmPathname ? FNM_PATHNAME : 0; #ifdef FNM_LEADING_DIR if (ckparents) fnmflags |= FNM_LEADING_DIR; @@ -420,6 +422,8 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top, #include +#include "rclinit.h" +#include "rclconfig.h" #include "fstreewalk.h" using namespace std; @@ -542,7 +546,13 @@ int main(int argc, const char **argv) opt |= FsTreeWalker::FtwTravFilesThenDirs; else if (op_flags & OPT_m) opt |= FsTreeWalker::FtwTravBreadthThenDepth; - + + string reason; + if (!recollinit(0, 0, reason)) { + fprintf(stderr, "Init failed: %s\n", reason.c_str()); + exit(1); + } + FsTreeWalker walker(opt); walker.setSkippedNames(patterns); walker.setSkippedPaths(paths); diff --git a/src/utils/fstreewalk.h b/src/utils/fstreewalk.h index 75e1197e..b6a8baf5 100644 --- a/src/utils/fstreewalk.h +++ b/src/utils/fstreewalk.h @@ -38,6 +38,18 @@ struct stat; */ class FsTreeWalker { public: + // Global option to use FNM_PATHNAME when matching paths (for + // skippedPaths). + // We initially used FNM_PATHNAME, and we can't change it now + // (because of all the config files around). So add global option + // to not use the flag, which can be set from rclconfig by adding + // a value to the config file (skippedPathsNoFnmPathname) + static bool o_useFnmPathname; + static void setNoFnmPathname() + { + o_useFnmPathname = false; + } + // Flags for call to processone(). FtwDirEnter is used when // entering a directory. FtwDirReturn is used when returning to it // after processing a subdirectory. diff --git a/website/index.html.en b/website/index.html.en index e6a251fc..40d46743 100644 --- a/website/index.html.en +++ b/website/index.html.en @@ -1,18 +1,13 @@ - - RECOLL: a personal text search system for - Unix/Linux + RECOLL: a personal text search system for Unix/Linux - - + + - + @@ -41,31 +36,30 @@ a personal full text search tool for Unix/Linux.

It is based on the very strong Xapian back-end, for which it - provides a feature-rich yet easy to use front-end with a Qt - graphical interface.

+ "http://www.xapian.org">Xapian search engine library, for + which it provides a powerful text extraction layer and a + complete, yet easy to use, Qt graphical interface.

Recoll is free, open source, and licensed under the GPL. The current version is - 1.16.2 + 1.16.2 (Release notes).

Recoll will index an MS-Word document stored as an attachment to an e-mail message inside a Thunderbird folder archived in a Zip file - (and more...). It will also let you open a copy of the file - without any fuss. There is little that will remain hidden on + (and more...). It will also help you search for it with a + friendly and powerful interface, and let you open a copy of the file + without much fuss. There is little that will remain hidden on your disk. More details …

-

-

If you have problems with Recoll, documentation and support are available.

Recoll user ? Maybe there are still a few useful - tricks that you don't know about. A quick look at + search tricks that you don't know about. A quick look at the search tips might prove useful ! Also the