Moved mimemap variable recoll_noindex to recoll.conf noContentSuffixes
This commit is contained in:
parent
34e1a25d31
commit
e0517a7d13
@ -114,6 +114,7 @@ void RclConfig::zeroMe() {
|
||||
m_stopsuffixes = 0;
|
||||
m_maxsufflen = 0;
|
||||
|
||||
m_oldstpsuffstate.init(0);
|
||||
m_stpsuffstate.init(0);
|
||||
m_skpnstate.init(0);
|
||||
m_rmtstate.init(0);
|
||||
@ -130,7 +131,8 @@ bool RclConfig::isDefaultConfig() const
|
||||
}
|
||||
|
||||
RclConfig::RclConfig(const string *argcnf)
|
||||
: m_stpsuffstate(this, "recoll_noindex"),
|
||||
: m_oldstpsuffstate(this, "recoll_noindex"),
|
||||
m_stpsuffstate(this, "noContentSuffixes"),
|
||||
m_skpnstate(this, "skippedNames"),
|
||||
m_rmtstate(this, "indexedmimetypes"),
|
||||
m_xmtstate(this, "excludedmimetypes"),
|
||||
@ -282,7 +284,8 @@ RclConfig::RclConfig(const string *argcnf)
|
||||
m_ok = true;
|
||||
setKeyDir(cstr_null);
|
||||
|
||||
m_stpsuffstate.init(mimemap);
|
||||
m_oldstpsuffstate.init(mimemap);
|
||||
m_stpsuffstate.init(m_conf);
|
||||
m_skpnstate.init(m_conf);
|
||||
m_rmtstate.init(m_conf);
|
||||
m_xmtstate.init(m_conf);
|
||||
@ -605,17 +608,24 @@ typedef multiset<SfString, SuffCmp> SuffixStore;
|
||||
bool RclConfig::inStopSuffixes(const string& fni)
|
||||
{
|
||||
LOGDEB2(("RclConfig::inStopSuffixes(%s)\n", fni.c_str()));
|
||||
// Beware: needrecompute() needs to be called always. 2nd test stays back.
|
||||
if (m_stpsuffstate.needrecompute() || m_stopsuffixes == 0) {
|
||||
// Beware: both needrecompute() need to be called always hence the
|
||||
// bizarre way we do things
|
||||
bool needrecompute = m_stpsuffstate.needrecompute();
|
||||
needrecompute = needrecompute || m_oldstpsuffstate.needrecompute();
|
||||
if (needrecompute || m_stopsuffixes == 0) {
|
||||
// Need to initialize the suffixes
|
||||
delete STOPSUFFIXES;
|
||||
if ((m_stopsuffixes = new SuffixStore) == 0) {
|
||||
LOGERR(("RclConfig::inStopSuffixes: out of memory\n"));
|
||||
return false;
|
||||
}
|
||||
list<string> stoplist;
|
||||
vector<string> stoplist;
|
||||
stringToStrings(m_stpsuffstate.savedvalue, stoplist);
|
||||
for (list<string>::const_iterator it = stoplist.begin();
|
||||
vector<string> ostoplist;
|
||||
stringToStrings(m_oldstpsuffstate.savedvalue, ostoplist);
|
||||
stoplist.resize(stoplist.size() + ostoplist.size());
|
||||
stoplist.insert(stoplist.end(), ostoplist.begin(), ostoplist.end());
|
||||
for (vector<string>::const_iterator it = stoplist.begin();
|
||||
it != stoplist.end(); it++) {
|
||||
STOPSUFFIXES->insert(SfString(stringtolower(*it)));
|
||||
if (m_maxsufflen < it->length())
|
||||
@ -1461,7 +1471,8 @@ void RclConfig::initFrom(const RclConfig& r)
|
||||
m_maxsufflen = r.m_maxsufflen;
|
||||
m_defcharset = r.m_defcharset;
|
||||
|
||||
m_stpsuffstate.init(mimemap);
|
||||
m_oldstpsuffstate.init(mimemap);
|
||||
m_stpsuffstate.init(m_conf);
|
||||
m_skpnstate.init(m_conf);
|
||||
m_rmtstate.init(m_conf);
|
||||
m_xmtstate.init(m_conf);
|
||||
|
||||
@ -81,7 +81,8 @@ class RclConfig {
|
||||
RclConfig(const string *argcnf = 0);
|
||||
|
||||
RclConfig(const RclConfig &r)
|
||||
: m_stpsuffstate(this, "recoll_noindex"),
|
||||
: m_oldstpsuffstate(this, "recoll_noindex"),
|
||||
m_stpsuffstate(this, "noContentSuffixes"),
|
||||
m_skpnstate(this, "skippedNames"),
|
||||
m_rmtstate(this, "indexedmimetypes"),
|
||||
m_xmtstate(this, "excludedmimetypes"),
|
||||
@ -334,6 +335,7 @@ class RclConfig {
|
||||
|
||||
void *m_stopsuffixes;
|
||||
unsigned int m_maxsufflen;
|
||||
ParamStale m_oldstpsuffstate; // Values from user mimemap, now obsolete
|
||||
ParamStale m_stpsuffstate;
|
||||
|
||||
ParamStale m_skpnstate;
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
|
||||
<!ENTITY RCL "<application>Recoll</application>">
|
||||
<!ENTITY RCLAPPS "<ulink url='http://www.recoll.org/features.html'>http://www.recoll.org/features.html</ulink>">
|
||||
<!ENTITY RCLVERSION "1.20">
|
||||
<!ENTITY RCLVERSION "1.21">
|
||||
<!ENTITY XAP "<application>Xapian</application>">
|
||||
<!ENTITY WIKI "http://bitbucket.org/medoc/recoll/wiki/">
|
||||
]>
|
||||
@ -22,7 +22,7 @@
|
||||
</author>
|
||||
|
||||
<copyright>
|
||||
<year>2005-2014</year>
|
||||
<year>2005-2015</year>
|
||||
<holder role="mailto:jfd@recoll.org">Jean-Francois Dockes</holder>
|
||||
</copyright>
|
||||
<abstract>
|
||||
@ -274,6 +274,14 @@
|
||||
aspects of the indexing processes and configuration, with links
|
||||
to detailed sections.</para>
|
||||
|
||||
<para>Depending on your data, temporary files may be needed during
|
||||
indexing, some of them possibly quite big. You can use the
|
||||
<envar>RECOLL_TMPDIR</envar> or <envar>TMPDIR</envar> environment
|
||||
variables to determine where they are created (the default is to
|
||||
use <filename>/tmp</filename>). Using <envar>TMPDIR</envar> has
|
||||
the nice property that it may also be taken into account by
|
||||
auxiliary commands executed by <command>recollindex</command>.</para>
|
||||
|
||||
<sect2 id="RCL.INDEXING.INTRODUCTION.MODES">
|
||||
<title>Indexing modes</title>
|
||||
|
||||
@ -5122,6 +5130,73 @@ except:
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<sect2 id="RCL.INSTALL.CONFIG.ENVIR">
|
||||
<title>Environment variables</title>
|
||||
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term><varname>RECOLL_CONFDIR</varname></term>
|
||||
<listitem><para>Defines the main configuration
|
||||
directory.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>RECOLL_TMPDIR, TMPDIR</varname></term>
|
||||
<listitem><para>Locations for temporary files, in this order
|
||||
of priority. The default if none of these is set is to use
|
||||
<filename>/tmp</filename>. Big temporary files may be created
|
||||
during indexing, mostly for decompressing, and also for
|
||||
processing, e.g. email attachments.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>RECOLL_CONFTOP, RECOLL_CONFMID</varname></term>
|
||||
<listitem><para>Allow adding configuration directories with
|
||||
priorities below and above the user directory (see above the
|
||||
Configuration overview section for details).</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>RECOLL_EXTRA_DBS,
|
||||
RECOLL_ACTIVE_EXTRA_DBS</varname></term>
|
||||
<listitem><para>
|
||||
Help for setting up external indexes. See <link
|
||||
linkend="RCL.SEARCH.GUI.MULTIDB">this paragraph</link> for
|
||||
explanations.
|
||||
</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>RECOLL_DATADIR</varname></term>
|
||||
<listitem><para>Defines replacement for the default location
|
||||
of Recoll data files, normally found in, e.g.,
|
||||
<filename>/usr/share/recoll</filename>).</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>RECOLL_FILTERSDIR</varname></term>
|
||||
<listitem><para>Defines replacement for the default location
|
||||
of Recoll filters, normally found in, e.g.,
|
||||
<filename>/usr/share/recoll/filters</filename>).</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>ASPELL_PROG</varname></term>
|
||||
<listitem><para><command>aspell</command> program to use for
|
||||
creating the spelling dictionary. The result has to be
|
||||
compatible with the <filename>libaspell</filename> which &RCL;
|
||||
is using.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>VARNAME</varname></term>
|
||||
<listitem><para>Blabla</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
|
||||
</sect2>
|
||||
|
||||
<sect2 id="RCL.INSTALL.CONFIG.RECOLLCONF">
|
||||
<title>The main configuration file, recoll.conf</title>
|
||||
|
||||
@ -5188,12 +5263,29 @@ skippedNames = #* bin CVS Cache cache* caughtspam tmp .thumbnails .svn \
|
||||
|
||||
<para>Not even the file names are indexed for patterns
|
||||
in this list. See the
|
||||
<varname>recoll_noindex</varname> variable in
|
||||
<filename>mimemap</filename> for an alternative
|
||||
<varname>noContentSuffixes</varname> variable for an alternative
|
||||
approach which indexes the file names.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry><term><varname>noContentSuffixes</varname></term>
|
||||
<listitem><para>This is a list of file name endings (not
|
||||
wildcard expressions, nor dot-delimited suffixes). Only the
|
||||
names of matching files will be indexed (no attempt at MIME
|
||||
type identification, no decompression, no content
|
||||
indexing). This can be redefined for
|
||||
subdirectories, and edited from the GUI. The default value is:
|
||||
<programlisting>
|
||||
noContentSuffixes = .md5 .map \
|
||||
.o .lib .dll .a .sys .exe .com \
|
||||
.mpp .mpt .vsd \
|
||||
.img .img.gz .img.bz2 .img.xz .image .image.gz .image.bz2 .image.xz \
|
||||
.dat .bak .rdf .log.gz .log .db .msf .pid \
|
||||
,v ~ #
|
||||
</programlisting>
|
||||
</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry><term><varname>skippedPaths</varname> and
|
||||
<varname>daemSkippedPaths</varname> </term>
|
||||
<listitem>
|
||||
@ -6049,21 +6141,14 @@ x-my-tag = mailmytag
|
||||
should be handled specially, which is possible because they
|
||||
are usually all located in one place.</para>
|
||||
|
||||
<para><filename>mimemap</filename> also has a
|
||||
<varname>recoll_noindex</varname> variable which is a list of
|
||||
suffixes. Matching files will be skipped (which avoids
|
||||
unnecessary decompressions or <command>file</command>
|
||||
executions). This is partially redundant with
|
||||
<varname>skippedNames</varname> in the main configuration
|
||||
file, with a few differences: it will not affect directories,
|
||||
it cannot be made dependant on the file-system location (it is
|
||||
a configuration-wide parameter), and the file names will still
|
||||
be indexed (not even the file names are indexed for patterns
|
||||
in <varname>skippedNames</varname>.
|
||||
<varname>recoll_noindex</varname> is used mostly for things
|
||||
known to be unindexable by a given &RCL; version. Having it
|
||||
there avoids cluttering the more user-oriented and locally
|
||||
customized <varname>skippedNames</varname>.</para>
|
||||
<para>The <varname>recoll_noindex</varname>
|
||||
<filename>mimemap</filename> variable has been moved to
|
||||
<filename>recoll.conf</filename> and renamed to
|
||||
<varname>noContentSuffixes</varname>, while keeping the same
|
||||
function, as of &RCL; version 1.21. For older &RCL; versions,
|
||||
see the documentation for <varname>noContentSuffixes</varname>
|
||||
but use <varname>recoll_noindex</varname> in
|
||||
<filename>mimemap</filename>.</para>
|
||||
|
||||
</sect2>
|
||||
|
||||
|
||||
@ -454,6 +454,17 @@ ConfSubPanelW::ConfSubPanelW(QWidget *parent, ConfNull *config,
|
||||
m_widgets.push_back(eexcm);
|
||||
gl1->addWidget(eexcm, gridy, 0);
|
||||
|
||||
ConfParamSLW *encs = new ConfParamSLW(
|
||||
m_groupbox,
|
||||
ConfLink(new ConfLinkRclRep(config, "noContentSuffixes", &m_sk)),
|
||||
QObject::tr("Ignored endings"),
|
||||
QObject::tr("These are file name endings for files which will be "
|
||||
"indexed by content only \n(no MIME type identification "
|
||||
"attempt, no decompression, no content indexing."));
|
||||
encs->setFsEncoding(true);
|
||||
m_widgets.push_back(encs);
|
||||
gl1->addWidget(encs, gridy++, 1);
|
||||
|
||||
vector<string> args;
|
||||
args.push_back("-l");
|
||||
ExecCmd ex;
|
||||
@ -484,7 +495,7 @@ ConfSubPanelW::ConfSubPanelW(QWidget *parent, ConfNull *config,
|
||||
"and the value from the NLS environnement is used."
|
||||
), charsets);
|
||||
m_widgets.push_back(e21);
|
||||
gl1->addWidget(e21, gridy++, 1);
|
||||
gl1->addWidget(e21, gridy++, 0);
|
||||
|
||||
ConfParamBoolW *e3 = new ConfParamBoolW(
|
||||
m_groupbox,
|
||||
|
||||
@ -164,17 +164,10 @@
|
||||
.mht = application/x-mimehtml
|
||||
.mhtml = application/x-mimehtml
|
||||
|
||||
# A list of suffixes (name endings) that we don't want to touch at all.
|
||||
# Having these explicitely listed speeds things up a bit by avoiding
|
||||
# unneeded decompression or 'file' calls. File names still get indexed if
|
||||
# indexallfilenames is set (so this is different from skippedNames). It's a
|
||||
# bit unconsistent to have it listed among the suffix translations, but no
|
||||
# problem in practice.
|
||||
recoll_noindex = .md5 .map \
|
||||
.o .lib .dll .a .sys .exe .com \
|
||||
.mpp .mpt .vsd \
|
||||
.dat .bak .rdf .log.gz .log .db .msf .pid \
|
||||
,v ~ #
|
||||
# Note: recoll_noindex has been obsoleted and moved to recoll.conf as
|
||||
# noContentSuffixes. recoll_noindex from your personal mimemap file is
|
||||
# still taken into account for now, but you should move its contents to the
|
||||
# new recoll.conf variable.
|
||||
|
||||
# Special handling of .txt files inside ~/.gaim and ~/.purple directories
|
||||
[~/.gaim]
|
||||
|
||||
@ -31,6 +31,20 @@ skippedNames = #* bin CVS Cache cache* .cache caughtspam tmp \
|
||||
# Explicitely adding /media/xxx to the topdirs will override this.
|
||||
skippedPaths = /media
|
||||
|
||||
# List of suffixes for which we don't try mime type identification (and
|
||||
# don't uncompress or index content obviously). This complements the now
|
||||
# obsoleted mimemap recoll_noindex list, which will go away in a future
|
||||
# release (the move from mimemap to recoll.conf allows editing the list
|
||||
# through the GUI). This is different from skippedNames because these are
|
||||
# name ending matches only (not wildcard patterns), and the file name
|
||||
# itself gets indexed normally.
|
||||
noContentSuffixes = .md5 .map \
|
||||
.o .lib .dll .a .sys .exe .com \
|
||||
.mpp .mpt .vsd \
|
||||
.img .img.gz .img.bz2 .img.xz .image .image.gz .image.bz2 .image.xz \
|
||||
.dat .bak .rdf .log.gz .log .db .msf .pid \
|
||||
,v ~ #
|
||||
|
||||
# Same for real time indexing. The idea here is that there is stuff that
|
||||
# you might want to initially index but not monitor. If daemSkippedPaths is
|
||||
# not set, the daemon uses skippedPaths.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user