index: add skippedPathsFnmPathname variable to enable disabling the use of FNM_PATHNAME while matching in skippedPaths. Closes issue #67

This commit is contained in:
Jean-Francois Dockes 2011-11-30 16:36:51 +01:00
parent 2afc769c38
commit 3759c0b52d
8 changed files with 70 additions and 32 deletions

View File

@ -43,6 +43,7 @@
#include "smallut.h" #include "smallut.h"
#include "textsplit.h" #include "textsplit.h"
#include "readfile.h" #include "readfile.h"
#include "fstreewalk.h"
#ifndef NO_NAMESPACES #ifndef NO_NAMESPACES
using namespace std; using namespace std;
@ -200,6 +201,12 @@ bool RclConfig::updateMainConfig()
m_rmtstate.init(this, 0, "indexedmimetypes"); m_rmtstate.init(this, 0, "indexedmimetypes");
return false; return false;
} }
delete m_conf;
m_conf = newconf;
m_skpnstate.init(this, m_conf, "skippedNames");
m_rmtstate.init(this, m_conf, "indexedmimetypes");
setKeyDir(cstr_null); setKeyDir(cstr_null);
bool nocjk = false; bool nocjk = false;
if (getConfParam("nocjk", &nocjk) && nocjk == true) { if (getConfParam("nocjk", &nocjk) && nocjk == true) {
@ -218,10 +225,12 @@ bool RclConfig::updateMainConfig()
TextSplit::noNumbers(); TextSplit::noNumbers();
} }
delete m_conf; bool fnmpathname = true;
m_conf = newconf; if (getConfParam("skippedPathsFnmPathname", &fnmpathname)
m_skpnstate.init(this, m_conf, "skippedNames"); && fnmpathname == false) {
m_rmtstate.init(this, m_conf, "indexedmimetypes"); FsTreeWalker::setNoFnmPathname();
}
return true; return true;
} }

View File

@ -3441,12 +3441,19 @@ skippedNames = #* bin CVS Cache cache* caughtspam tmp .thumbnails .svn \
<programlisting> <programlisting>
skippedPaths = ~/somedir/&lowast;.txt skippedPaths = ~/somedir/&lowast;.txt
</programlisting> </programlisting>
<para>The values in the <literal>*skippedPaths</literal> </listitem>
variables are currently matched with </varlistentry>
<literal>fnmatch(3)</literal>, with the FNM_PATHNAME and
FNM_LEADING_DIR flags. This means that '/' characters must <varlistentry id="rcl.install.config.recollconf.skippedpathsfnmpathname">
be matched explicitely, which is probably <term><literal>skippedPathsFnmPathname</literal></term>
unfortunate.</para> <listitem><para>The values in the
<literal>*skippedPaths</literal> variables are matched by
default with <literal>fnmatch(3)</literal>, with the
FNM_PATHNAME and FNM_LEADING_DIR flags. This means that '/'
characters must be matched explicitely. You can set
<literal>skippedPathsFnmPathname</literal> to 0 to disable
the use of FNM_PATHNAME (meaning that /*/dir3 will match
/dir1/dir2/dir3).</para>
</listitem> </listitem>
</varlistentry> </varlistentry>

View File

@ -29,6 +29,12 @@ skippedNames = #* bin CVS Cache cache* caughtspam tmp .thumbnails .svn \
# not set, the daemon uses skippedPaths. # not set, the daemon uses skippedPaths.
#daemSkippedPaths = #daemSkippedPaths =
# Recoll uses FNM_PATHNAME by default when matching skipped paths, which
# means that /dir1/dir2/dir3 is not matched by */dir3. Can't change the
# default now, but you can set the following variable to 0 to disable the
# use of FNM_PATHNAME (see fnmatch(3) man page)
# skippedPathsFnmPathname = 1
# Option to follow symbolic links. We normally don't, to avoid duplicated # Option to follow symbolic links. We normally don't, to avoid duplicated
# indexing (in any case, no effort is made to identify or avoid multiple # indexing (in any case, no effort is made to identify or avoid multiple
# indexing of linked files) # indexing of linked files)

View File

@ -11,7 +11,7 @@ $(BIGLIB): force
cd $(depth)/lib;$(MAKE) cd $(depth)/lib;$(MAKE)
force: force:
FSTREEWALK_OBJS= trfstreewalk.o fstreewalk.o $(BIGLIB) FSTREEWALK_OBJS= trfstreewalk.o $(BIGLIB)
trfstreewalk : $(FSTREEWALK_OBJS) trfstreewalk : $(FSTREEWALK_OBJS)
$(CXX) -o trfstreewalk $(FSTREEWALK_OBJS) $(LIBICONV) \ $(CXX) -o trfstreewalk $(FSTREEWALK_OBJS) $(LIBICONV) \
$(LIBSYS) $(LIBSYS)

View File

@ -1301,7 +1301,7 @@ int main(int argc, char **argv)
exit(1); exit(1);
} }
string udi; string udi;
make_udi(fn, cstr_null, udi); make_udi(fn, "", udi);
sprintf(dic, "#whatever...\nmimetype = text/plain\nudi=%s\n", sprintf(dic, "#whatever...\nmimetype = text/plain\nudi=%s\n",
udi.c_str()); udi.c_str());
string sdic; string sdic;

View File

@ -40,6 +40,8 @@
using namespace std; using namespace std;
#endif /* NO_NAMESPACES */ #endif /* NO_NAMESPACES */
bool FsTreeWalker::o_useFnmPathname = true;
const int FsTreeWalker::FtwTravMask = FtwTravNatural| const int FsTreeWalker::FtwTravMask = FtwTravNatural|
FtwTravBreadth|FtwTravFilesThenDirs|FtwTravBreadthThenDepth; FtwTravBreadth|FtwTravFilesThenDirs|FtwTravBreadthThenDepth;
@ -153,7 +155,7 @@ bool FsTreeWalker::setSkippedPaths(const list<string> &paths)
} }
bool FsTreeWalker::inSkippedPaths(const string& path, bool ckparents) bool FsTreeWalker::inSkippedPaths(const string& path, bool ckparents)
{ {
int fnmflags = FNM_PATHNAME; int fnmflags = o_useFnmPathname ? FNM_PATHNAME : 0;
#ifdef FNM_LEADING_DIR #ifdef FNM_LEADING_DIR
if (ckparents) if (ckparents)
fnmflags |= FNM_LEADING_DIR; fnmflags |= FNM_LEADING_DIR;
@ -420,6 +422,8 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
#include <iostream> #include <iostream>
#include "rclinit.h"
#include "rclconfig.h"
#include "fstreewalk.h" #include "fstreewalk.h"
using namespace std; using namespace std;
@ -542,7 +546,13 @@ int main(int argc, const char **argv)
opt |= FsTreeWalker::FtwTravFilesThenDirs; opt |= FsTreeWalker::FtwTravFilesThenDirs;
else if (op_flags & OPT_m) else if (op_flags & OPT_m)
opt |= FsTreeWalker::FtwTravBreadthThenDepth; opt |= FsTreeWalker::FtwTravBreadthThenDepth;
string reason;
if (!recollinit(0, 0, reason)) {
fprintf(stderr, "Init failed: %s\n", reason.c_str());
exit(1);
}
FsTreeWalker walker(opt); FsTreeWalker walker(opt);
walker.setSkippedNames(patterns); walker.setSkippedNames(patterns);
walker.setSkippedPaths(paths); walker.setSkippedPaths(paths);

View File

@ -38,6 +38,18 @@ struct stat;
*/ */
class FsTreeWalker { class FsTreeWalker {
public: public:
// Global option to use FNM_PATHNAME when matching paths (for
// skippedPaths).
// We initially used FNM_PATHNAME, and we can't change it now
// (because of all the config files around). So add global option
// to not use the flag, which can be set from rclconfig by adding
// a value to the config file (skippedPathsNoFnmPathname)
static bool o_useFnmPathname;
static void setNoFnmPathname()
{
o_useFnmPathname = false;
}
// Flags for call to processone(). FtwDirEnter is used when // Flags for call to processone(). FtwDirEnter is used when
// entering a directory. FtwDirReturn is used when returning to it // entering a directory. FtwDirReturn is used when returning to it
// after processing a subdirectory. // after processing a subdirectory.

View File

@ -1,18 +1,13 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html> <html>
<head> <head>
<title>RECOLL: a personal text search system for <title>RECOLL: a personal text search system for Unix/Linux</title>
Unix/Linux</title>
<meta name="generator" content="HTML Tidy, see www.w3.org"> <meta name="generator" content="HTML Tidy, see www.w3.org">
<meta name="Author" content="Jean-Francois Dockes"> <meta name="Author" content="Jean-Francois Dockes">
<meta name="Description" content= <meta name="Description" content="Recoll is a personal desktop full text search application for Unix, Linux and Mac OS X, based on the Xapian search engine library.">
"recoll is a simple full-text search system for unix and linux based on the powerful and mature xapian engine"> <meta name="Keywords" content="full text search,fulltext,full text,search,desktop search,unix,linux,open source,free">
<meta name="Keywords" content=
"full text search,fulltext,desktop search,unix,linux,solaris,open source,free">
<meta http-equiv="Content-language" content="en"> <meta http-equiv="Content-language" content="en">
<meta http-equiv="content-type" content= <meta http-equiv="content-type" content="text/html; charset=iso-8859-1">
"text/html; charset=iso-8859-1">
<meta name="robots" content="All,Index,Follow"> <meta name="robots" content="All,Index,Follow">
<link type="text/css" rel="stylesheet" href="styles/style.css"> <link type="text/css" rel="stylesheet" href="styles/style.css">
</head> </head>
@ -41,31 +36,30 @@
a personal full text search tool for Unix/Linux.</h1> a personal full text search tool for Unix/Linux.</h1>
<p>It is based on the very strong <a href= <p>It is based on the very strong <a href=
"http://www.xapian.org">Xapian</a> back-end, for which it "http://www.xapian.org">Xapian</a> search engine library, for
provides a feature-rich yet easy to use front-end with a Qt which it provides a powerful text extraction layer and a
graphical interface.</p> complete, yet easy to use, Qt graphical interface.</p>
<p><span class="application">Recoll</span> is free, open source, <p><span class="application">Recoll</span> is free, open source,
and licensed under the GPL. The current version is and licensed under the GPL. The current version is
<a class="important" href="download.html">1.16.2</a> <a href="download.html">1.16.2</a>
(<a href="release-1.16.html">Release notes</a>).</p> (<a href="release-1.16.html">Release notes</a>).</p>
<p class="remark">Recoll will index an <b>MS-Word</b> document stored as <p class="remark">Recoll will index an <b>MS-Word</b> document stored as
an <b>attachment</b> to an <b>e-mail message</b> inside an <b>attachment</b> to an <b>e-mail message</b> inside
a <b>Thunderbird folder</b> archived in a <b>Zip file</b> a <b>Thunderbird folder</b> archived in a <b>Zip file</b>
(and more...). It will also let you open a copy of the file (and more...). It will also help you search for it with a
without any fuss. There is little that will remain hidden on friendly and powerful interface, and let you open a copy of the file
without much fuss. There is little that will remain hidden on
your disk. your disk.
<a class="important" href="features.html">More details &hellip;</a></p> <a class="important" href="features.html">More details &hellip;</a></p>
<p></p>
<p>If you have problems with <p>If you have problems with
Recoll, <a href="doc.html">documentation</a> Recoll, <a href="doc.html">documentation</a>
and <a href="support.html">support</a> are available.</p> and <a href="support.html">support</a> are available.</p>
<p><b><i>Recoll user ?</i></b> Maybe there are still a few useful <p><b><i>Recoll user ?</i></b> Maybe there are still a few useful
tricks that you don't know about. A quick look at search tricks that you don't know about. A quick look at
the <a href="usermanual/rcl.search.tips.html">search the <a href="usermanual/rcl.search.tips.html">search
tips</a> might prove useful ! Also the tips</a> might prove useful ! Also the
<a href="http://bitbucket.org/medoc/recoll/wiki/FaqsAndHowTos"> <a href="http://bitbucket.org/medoc/recoll/wiki/FaqsAndHowTos">