Moved mimemap variable recoll_noindex to recoll.conf noContentSuffixes

This commit is contained in:
Jean-Francois Dockes 2015-03-25 11:48:59 +01:00
parent 34e1a25d31
commit e0517a7d13
6 changed files with 155 additions and 39 deletions

View File

@ -114,6 +114,7 @@ void RclConfig::zeroMe() {
m_stopsuffixes = 0;
m_maxsufflen = 0;
m_oldstpsuffstate.init(0);
m_stpsuffstate.init(0);
m_skpnstate.init(0);
m_rmtstate.init(0);
@ -130,7 +131,8 @@ bool RclConfig::isDefaultConfig() const
}
RclConfig::RclConfig(const string *argcnf)
: m_stpsuffstate(this, "recoll_noindex"),
: m_oldstpsuffstate(this, "recoll_noindex"),
m_stpsuffstate(this, "noContentSuffixes"),
m_skpnstate(this, "skippedNames"),
m_rmtstate(this, "indexedmimetypes"),
m_xmtstate(this, "excludedmimetypes"),
@ -282,7 +284,8 @@ RclConfig::RclConfig(const string *argcnf)
m_ok = true;
setKeyDir(cstr_null);
m_stpsuffstate.init(mimemap);
m_oldstpsuffstate.init(mimemap);
m_stpsuffstate.init(m_conf);
m_skpnstate.init(m_conf);
m_rmtstate.init(m_conf);
m_xmtstate.init(m_conf);
@ -605,17 +608,24 @@ typedef multiset<SfString, SuffCmp> SuffixStore;
bool RclConfig::inStopSuffixes(const string& fni)
{
LOGDEB2(("RclConfig::inStopSuffixes(%s)\n", fni.c_str()));
// Beware: needrecompute() needs to be called always. 2nd test stays back.
if (m_stpsuffstate.needrecompute() || m_stopsuffixes == 0) {
// Beware: both needrecompute() need to be called always hence the
// bizarre way we do things
bool needrecompute = m_stpsuffstate.needrecompute();
needrecompute = needrecompute || m_oldstpsuffstate.needrecompute();
if (needrecompute || m_stopsuffixes == 0) {
// Need to initialize the suffixes
delete STOPSUFFIXES;
if ((m_stopsuffixes = new SuffixStore) == 0) {
LOGERR(("RclConfig::inStopSuffixes: out of memory\n"));
return false;
}
list<string> stoplist;
vector<string> stoplist;
stringToStrings(m_stpsuffstate.savedvalue, stoplist);
for (list<string>::const_iterator it = stoplist.begin();
vector<string> ostoplist;
stringToStrings(m_oldstpsuffstate.savedvalue, ostoplist);
stoplist.resize(stoplist.size() + ostoplist.size());
stoplist.insert(stoplist.end(), ostoplist.begin(), ostoplist.end());
for (vector<string>::const_iterator it = stoplist.begin();
it != stoplist.end(); it++) {
STOPSUFFIXES->insert(SfString(stringtolower(*it)));
if (m_maxsufflen < it->length())
@ -1461,7 +1471,8 @@ void RclConfig::initFrom(const RclConfig& r)
m_maxsufflen = r.m_maxsufflen;
m_defcharset = r.m_defcharset;
m_stpsuffstate.init(mimemap);
m_oldstpsuffstate.init(mimemap);
m_stpsuffstate.init(m_conf);
m_skpnstate.init(m_conf);
m_rmtstate.init(m_conf);
m_xmtstate.init(m_conf);

View File

@ -81,7 +81,8 @@ class RclConfig {
RclConfig(const string *argcnf = 0);
RclConfig(const RclConfig &r)
: m_stpsuffstate(this, "recoll_noindex"),
: m_oldstpsuffstate(this, "recoll_noindex"),
m_stpsuffstate(this, "noContentSuffixes"),
m_skpnstate(this, "skippedNames"),
m_rmtstate(this, "indexedmimetypes"),
m_xmtstate(this, "excludedmimetypes"),
@ -334,6 +335,7 @@ class RclConfig {
void *m_stopsuffixes;
unsigned int m_maxsufflen;
ParamStale m_oldstpsuffstate; // Values from user mimemap, now obsolete
ParamStale m_stpsuffstate;
ParamStale m_skpnstate;

View File

@ -3,7 +3,7 @@
<!ENTITY RCL "<application>Recoll</application>">
<!ENTITY RCLAPPS "<ulink url='http://www.recoll.org/features.html'>http://www.recoll.org/features.html</ulink>">
<!ENTITY RCLVERSION "1.20">
<!ENTITY RCLVERSION "1.21">
<!ENTITY XAP "<application>Xapian</application>">
<!ENTITY WIKI "http://bitbucket.org/medoc/recoll/wiki/">
]>
@ -22,7 +22,7 @@
</author>
<copyright>
<year>2005-2014</year>
<year>2005-2015</year>
<holder role="mailto:jfd@recoll.org">Jean-Francois Dockes</holder>
</copyright>
<abstract>
@ -274,6 +274,14 @@
aspects of the indexing processes and configuration, with links
to detailed sections.</para>
<para>Depending on your data, temporary files may be needed during
indexing, some of them possibly quite big. You can use the
<envar>RECOLL_TMPDIR</envar> or <envar>TMPDIR</envar> environment
variables to determine where they are created (the default is to
use <filename>/tmp</filename>). Using <envar>TMPDIR</envar> has
the nice property that it may also be taken into account by
auxiliary commands executed by <command>recollindex</command>.</para>
<sect2 id="RCL.INDEXING.INTRODUCTION.MODES">
<title>Indexing modes</title>
@ -5122,6 +5130,73 @@ except:
</itemizedlist>
</para>
<sect2 id="RCL.INSTALL.CONFIG.ENVIR">
<title>Environment variables</title>
<variablelist>
<varlistentry>
<term><varname>RECOLL_CONFDIR</varname></term>
<listitem><para>Defines the main configuration
directory.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>RECOLL_TMPDIR, TMPDIR</varname></term>
<listitem><para>Locations for temporary files, in this order
of priority. The default if none of these is set is to use
<filename>/tmp</filename>. Big temporary files may be created
during indexing, mostly for decompressing, and also for
processing, e.g. email attachments.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>RECOLL_CONFTOP, RECOLL_CONFMID</varname></term>
<listitem><para>Allow adding configuration directories with
priorities below and above the user directory (see above the
Configuration overview section for details).</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>RECOLL_EXTRA_DBS,
RECOLL_ACTIVE_EXTRA_DBS</varname></term>
<listitem><para>
Help for setting up external indexes. See <link
linkend="RCL.SEARCH.GUI.MULTIDB">this paragraph</link> for
explanations.
</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>RECOLL_DATADIR</varname></term>
<listitem><para>Defines replacement for the default location
of Recoll data files, normally found in, e.g.,
<filename>/usr/share/recoll</filename>).</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>RECOLL_FILTERSDIR</varname></term>
<listitem><para>Defines replacement for the default location
of Recoll filters, normally found in, e.g.,
<filename>/usr/share/recoll/filters</filename>).</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>ASPELL_PROG</varname></term>
<listitem><para><command>aspell</command> program to use for
creating the spelling dictionary. The result has to be
compatible with the <filename>libaspell</filename> which &RCL;
is using.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>VARNAME</varname></term>
<listitem><para>Blabla</para></listitem>
</varlistentry>
</variablelist>
</sect2>
<sect2 id="RCL.INSTALL.CONFIG.RECOLLCONF">
<title>The main configuration file, recoll.conf</title>
@ -5188,12 +5263,29 @@ skippedNames = #* bin CVS Cache cache* caughtspam tmp .thumbnails .svn \
<para>Not even the file names are indexed for patterns
in this list. See the
<varname>recoll_noindex</varname> variable in
<filename>mimemap</filename> for an alternative
<varname>noContentSuffixes</varname> variable for an alternative
approach which indexes the file names.</para>
</listitem>
</varlistentry>
<varlistentry><term><varname>noContentSuffixes</varname></term>
<listitem><para>This is a list of file name endings (not
wildcard expressions, nor dot-delimited suffixes). Only the
names of matching files will be indexed (no attempt at MIME
type identification, no decompression, no content
indexing). This can be redefined for
subdirectories, and edited from the GUI. The default value is:
<programlisting>
noContentSuffixes = .md5 .map \
.o .lib .dll .a .sys .exe .com \
.mpp .mpt .vsd \
.img .img.gz .img.bz2 .img.xz .image .image.gz .image.bz2 .image.xz \
.dat .bak .rdf .log.gz .log .db .msf .pid \
,v ~ #
</programlisting>
</para></listitem>
</varlistentry>
<varlistentry><term><varname>skippedPaths</varname> and
<varname>daemSkippedPaths</varname> </term>
<listitem>
@ -6049,21 +6141,14 @@ x-my-tag = mailmytag
should be handled specially, which is possible because they
are usually all located in one place.</para>
<para><filename>mimemap</filename> also has a
<varname>recoll_noindex</varname> variable which is a list of
suffixes. Matching files will be skipped (which avoids
unnecessary decompressions or <command>file</command>
executions). This is partially redundant with
<varname>skippedNames</varname> in the main configuration
file, with a few differences: it will not affect directories,
it cannot be made dependant on the file-system location (it is
a configuration-wide parameter), and the file names will still
be indexed (not even the file names are indexed for patterns
in <varname>skippedNames</varname>.
<varname>recoll_noindex</varname> is used mostly for things
known to be unindexable by a given &RCL; version. Having it
there avoids cluttering the more user-oriented and locally
customized <varname>skippedNames</varname>.</para>
<para>The <varname>recoll_noindex</varname>
<filename>mimemap</filename> variable has been moved to
<filename>recoll.conf</filename> and renamed to
<varname>noContentSuffixes</varname>, while keeping the same
function, as of &RCL; version 1.21. For older &RCL; versions,
see the documentation for <varname>noContentSuffixes</varname>
but use <varname>recoll_noindex</varname> in
<filename>mimemap</filename>.</para>
</sect2>

View File

@ -454,6 +454,17 @@ ConfSubPanelW::ConfSubPanelW(QWidget *parent, ConfNull *config,
m_widgets.push_back(eexcm);
gl1->addWidget(eexcm, gridy, 0);
ConfParamSLW *encs = new ConfParamSLW(
m_groupbox,
ConfLink(new ConfLinkRclRep(config, "noContentSuffixes", &m_sk)),
QObject::tr("Ignored endings"),
QObject::tr("These are file name endings for files which will be "
"indexed by content only \n(no MIME type identification "
"attempt, no decompression, no content indexing."));
encs->setFsEncoding(true);
m_widgets.push_back(encs);
gl1->addWidget(encs, gridy++, 1);
vector<string> args;
args.push_back("-l");
ExecCmd ex;
@ -484,7 +495,7 @@ ConfSubPanelW::ConfSubPanelW(QWidget *parent, ConfNull *config,
"and the value from the NLS environnement is used."
), charsets);
m_widgets.push_back(e21);
gl1->addWidget(e21, gridy++, 1);
gl1->addWidget(e21, gridy++, 0);
ConfParamBoolW *e3 = new ConfParamBoolW(
m_groupbox,

View File

@ -164,17 +164,10 @@
.mht = application/x-mimehtml
.mhtml = application/x-mimehtml
# A list of suffixes (name endings) that we don't want to touch at all.
# Having these explicitely listed speeds things up a bit by avoiding
# unneeded decompression or 'file' calls. File names still get indexed if
# indexallfilenames is set (so this is different from skippedNames). It's a
# bit unconsistent to have it listed among the suffix translations, but no
# problem in practice.
recoll_noindex = .md5 .map \
.o .lib .dll .a .sys .exe .com \
.mpp .mpt .vsd \
.dat .bak .rdf .log.gz .log .db .msf .pid \
,v ~ #
# Note: recoll_noindex has been obsoleted and moved to recoll.conf as
# noContentSuffixes. recoll_noindex from your personal mimemap file is
# still taken into account for now, but you should move its contents to the
# new recoll.conf variable.
# Special handling of .txt files inside ~/.gaim and ~/.purple directories
[~/.gaim]

View File

@ -31,6 +31,20 @@ skippedNames = #* bin CVS Cache cache* .cache caughtspam tmp \
# Explicitely adding /media/xxx to the topdirs will override this.
skippedPaths = /media
# List of suffixes for which we don't try mime type identification (and
# don't uncompress or index content obviously). This complements the now
# obsoleted mimemap recoll_noindex list, which will go away in a future
# release (the move from mimemap to recoll.conf allows editing the list
# through the GUI). This is different from skippedNames because these are
# name ending matches only (not wildcard patterns), and the file name
# itself gets indexed normally.
noContentSuffixes = .md5 .map \
.o .lib .dll .a .sys .exe .com \
.mpp .mpt .vsd \
.img .img.gz .img.bz2 .img.xz .image .image.gz .image.bz2 .image.xz \
.dat .bak .rdf .log.gz .log .db .msf .pid \
,v ~ #
# Same for real time indexing. The idea here is that there is stuff that
# you might want to initially index but not monitor. If daemSkippedPaths is
# not set, the daemon uses skippedPaths.