# Recoll default main configuration file
# The XML tags in the comments are used to help produce the documentation
# from the sample/reference file, and not at all at run time, where
# comments are just comments. Edit at will.
# This typically lives in $prefix/share/recoll/examples and provides
# default values. You can override selected parameters by adding assigments
# to ~/.recoll/recoll.conf (or $RECOLL_CONFDIR/recoll.conf)
#
# Most of the important values in this file can be set from the GUI
# configuration menus, which may be an easier approach than direct editing.
# Parameters affecting what documents we index
# Space-separated list of files or
# directories to recursively index.Default to ~ (indexes
# $HOME). You can use symbolic links in the list, they will be followed,
# independantly of the value of the followLinks variable.
topdirs = ~
# Wildcard expressions for
# names of files and directories that we should ignore.
# White space separated list of wildcard patterns (simple
# ones, not paths, must contain no / ), which will be tested against file
# and directory names. The list in the default configuration does not
# exclude hidden directories (names beginning with a dot), which means that
# it may index quite a few things that you do not want. On the other hand,
# email user agents like Thunderbird usually store messages in hidden
# directories, and you probably want this indexed. One possible solution is
# to have '.*' in 'skippedNames', and add things like '~/.thunderbird'
# '~/.evolution' to 'topdirs'. Not even the file names are indexed for
# patterns in this list, see the 'noContentSuffixes' variable for an
# alternative approach which indexes the file names. Can be redefined for
# any subtree.
skippedNames = #* bin CVS Cache cache* .cache caughtspam tmp \
.thumbnails .svn \
*~ .beagle .git .hg .bzr loop.ps .xsession-errors \
.recoll* xapiandb recollrc recoll.conf
# List of name endings (not
# necessarily dot-separated suffixes) for which we don't try MIME type
# identification, and don't uncompress or index content.Only
# the names will be indexed. This complements the now obsoleted mimemap
# recoll_noindex list, which will go away in a future release (the move
# from mimemap to recoll.conf allows editing the list through the
# GUI). This is different from skippedNames because these are name ending
# matches only (not wildcard patterns), and the file name itself gets
# indexed normally. This can be redefined for subdirectories.
noContentSuffixes = .md5 .map \
.o .lib .dll .a .sys .exe .com \
.mpp .mpt .vsd \
.img .img.gz .img.bz2 .img.xz .image .image.gz .image.bz2 .image.xz \
.dat .bak .rdf .log.gz .log .db .msf .pid \
,v ~ #
# Space-separated list of
# wildcard expressions for paths we shouldn't go into.Can
# contain files and directories. The database and configuration directories
# will automatically be added. The expressions are matched 'fnmatch(3)'
# with the FNM_PATHNAME flag set by default. This means that '/' characters
# must be matched explicitely. You can set 'skippedPathsFnmPathname' to 0
# to disable the use of FNM_PATHNAME (meaning that '/*/dir3' will match
# '/dir1/dir2/dir3'). The default contains the usual mount point for
# removable media by default to remind people that it is a bad idea to
# naively have recoll work on these (esp. with the monitor: media gets
# indexed on mount, all data gets erased on unmount). Typically the
# presence of '/media' is mostly a reminder, it would only have effect for
# someone who is indexing '/'. Explicitely adding '/media/xxx' to the
# topdirs will override this.
skippedPaths = /media
# Set to 0 to
# override use of FNM_PATHNAME for matching skipped
# paths.
#skippedPathsFnmPathname = 1
# skippedPaths equivalent specific to
# real time indexing.This enables having parts of the tree
# which are initially indexed but not monitored. If daemSkippedPaths is
# not set, the daemon uses skippedPaths.
#daemSkippedPaths =
# Space-separated list of
# wildcard expresions for names that should be ignored
# inside zip archives.This is used directly by the zip
# handler, and has a function similar to skippedNames, but
# works independantly. Can be redefined for subdirectories. Supported by
# recoll 1.20 and newer. See
# https://bitbucket.org/medoc/recoll/wiki/Filtering%20out%20Zip%20archive%20members
#
#zipSkippedNames =
# Follow symbolic links during
# indexing.The default is to ignore symbolic links to avoid
# multiple indexing of linked files. No effort is made to avoid duplication
# when this option is set to true. This option can be set individually for
# each of the 'topdirs' members by using sections. It can not be changed
# below the 'topdirs' level. Links in the 'topdirs' list itself are always
# followed.
#followLinks = 0
# Restrictive list of
# indexed mime types.Normally not set (in which case all
# supported types are indexed). If it is set,
# only the types from the list will have their contents indexed. The names
# will be indexed anyway if indexallfilenames is set (default). MIME
# type names should be taken from the mimemap file. Can be redefined for
# subtrees.
#indexedmimetypes =
# List of excluded MIME
# types.Lets you exclude some types from indexing. Can be
# redefined for subtrees.
#excludedmimetypes =
# Size limit for compressed
# files.We need to decompress these in a
# temporary directory for identification, which can be wasteful in some
# cases. Limit the waste. Negative means no limit. 0 results in no
# processing of any compressed file.
compressedfilemaxkbs = 50000
# Size limit for text
# files.Mostly for skipping monster
# logs.
textfilemaxmbs = 20
# Index the file names of
# unprocessed filesIndex the names of files the contents of
# which we don't index because of an excluded or unsupported MIME
# type.
indexallfilenames = 1
# Use a system command
# for file MIME type guessing as a final step in file type
# identificationThis is generally useful, but will usually
# cause the indexing of many bogus 'text' files. See 'systemfilecommand'
# for the command used.
usesystemfilecommand = 1
# Command used to guess
# MIME types if the internal methods failsThis should be a
# "file -i" workalike. The file path will be added as a last parameter to
# the command line. 'xdg-mime' works better than the traditional 'file'
# command, and is now the configured default (with a hard-coded fallback to
# 'file')
systemfilecommand = xdg-mime query filetype
# Decide if we process the
# Web queue.The queue is a directory where the Recoll Web
# browser plugins create the copies of visited pages.
processwebqueue = 0
# Page size for text
# files.If this is set, text/plain files will be divided
# into documents of approximately this size. Will reduce memory usage at
# index time and help with loading data in the preview window at query
# time. Particularly useful with very big files, such as application or
# system logs.
textfilepagekbs = 1000
# Size limit for archive
# members.This is passed to the filters in the environment
# as RECOLL_FILTER_MAXMEMBERKB.
membermaxkbs = 50000
# Parameters affecting how we generate terms
# Changing some of these parameters will imply a full
# reindex. Also, when using multiple indexes, it may not make sense
# to search indexes that don't share the values for these parameters,
# because they usually affect both search and index operations.
# Decide if we store
# character case and diacritics in the index.If we do,
# searches sensitive to case and diacritics can be performed, but the index
# will be bigger, and some marginal weirdness may sometimes occur. The
# default is a stripped index. When using multiple indexes for a search,
# this parameter must be defined identically for all. Changing the value
# implies an index reset.
indexStripChars = 1
# Decides if terms will be
# generated for numbers.For example "123", "1.5e6",
# 192.168.1.4, would not be indexed if nonumbers is set ("value123" would
# still be). Numbers are often quite interesting to search for, and this
# should probably not be set except for special situations, ie, scientific
# documents with huge amounts of numbers in them, where setting nonumbers
# will reduce the index size. This can only be set for a whole index, not
# for a subtree.
#nonumbers = 0
# Determines if we index
# 'coworker' also when the input is 'co-worker'.This is new
# in version 1.22, and on by default. Setting the variable to off allows
# restoring the previous behaviour.
#dehyphenate = 1
# Decides if specific east asian
# (Chinese Korean Japanese) characters/word splitting is turned
# off.This will save a small amount of cpu if you have no CJK
# documents. If your document base does include such text but you are not
# interested in searching it, setting nocjk may be a
# significant time and space saver.
#nocjk = 0
# This lets you adjust the size of
# n-grams used for indexing CJK text.The default value of 2 is
# probably appropriate in most cases. A value of 3 would allow more precision
# and efficiency on longer words, but the index will be approximately twice
# as large.
#cjkngramlen = 2
# Languages for
# which to create stemming expansion data.Stemmer names can
# be found on http://www.xapian.org, or by executing 'recollindex -l', or
# this can also be set from a list in the GUI
indexstemminglanguages = english
# Default character
# set.This is used for files which do not contain a
# character set definition (e.g.: text/plain). Values found inside files,
# e.g. a 'charset' tag in HTML documents, will override it. If this is not
# set, the default character set is the one defined by the NLS environment
# ($LC_ALL, $LC_CTYPE, $LANG), or ultimately iso-8859-1 (cp-1252 in fact).
# If for some reason you want a general default which does not match your
# LANG and is not 8859-1, use this variable. This can be redefined for any
# sub-directory.
#defaultcharset = iso-8859-1
# A list of characters,
# encoded in UTF-8, which should be handled specially
# when converting text to unaccented lowercase.For
# example, in Swedish, the letter a with diaeresis has full alphabet
# citizenship and should not be turned into an a.
# Each element in the space-separated list has the special character as
# first element and the translation following. The handling of both the
# lowercase and upper-case versions of a character should be specified, as
# appartenance to the list will turn-off both standard accent and case
# processing. The value is global and affects both indexing and querying.
# Examples:
# Swedish:
# unac_except_trans = ää Ää öö Öö üü Üü ßss œoe Œoe æae Æae ffff fifi flfl åå Åå
# German:
# unac_except_trans = ää Ää öö Öö üü Üü ßss œoe Œoe æae Æae ffff fifi flfl
# In French, you probably want to decompose oe and ae and nobody would type
# a German ß
# unac_except_trans = ßss œoe Œoe æae Æae ffff fifi flfl
# Reasonable default for all until someone protests. These decompositions
# are not performed by unac, but I cant imagine someone typing the composed
# forms in a search.
# unac_except_trans = ßss œoe Œoe æae Æae ffff fifi flfl
unac_except_trans = ßss œoe Œoe æae Æae ffff fifi flfl
# Overrides the default
# character set for email messages which don't specify
# one.This is mainly useful for readpst (libpst) dumps,
# which are utf-8 but do not say so.
#maildefcharset=
# Set fields on all files
# (usually of a specific fs area).Syntax is the usual:
# name = value ; attr1 = val1 ; [...]
# value is empty so this needs an initial semi-colon. This is useful, e.g.,
# for setting the rclaptg field for application selection inside
# mimeview.
#[/some/app/directory]
#localfields = ; rclaptg = someapp; otherfield = somevalue
# Use mtime instead of
# ctime to test if a file has been modified.The time is used
# in in addition to the size, which is always used.
# Setting this can reduce re-indexing on systems where extended attributes
# are used (by some other application), but not indexed, because changing
# extended attributes only affects ctime.
# Notes:
# - This may prevent detection of change in some marginal file rename cases
# (the target would need to have the same size and mtime).
# - You should probably also set noxattrfields to 1 in this case, except if
# you still prefer to perform xattr indexing, for example if the local
# file update pattern makes it of value (as in general, there is a risk
# for pure extended attributes updates without file modification to go
# undetected). Perform a full index reset after changing this.
#
testmodifusemtime = 0
# Disable extended attributes
# conversion to metadata fields.This probably needs to be
# set if testmodifusemtime is set.
noxattrfields = 0
# Define commands to
# gather external metadata, e.g. tmsu tags.
# There can be several entries, separated by semi-colons, each defining
# which field name the data goes into and the command to use. Don't forget the
# initial semi-colon. All the field names must be different. You can use
# aliases in the "field" file if necessary.
# As a not too pretty hack conceded to convenience, any field name
# beginning with "rclmulti" will be taken as an indication that the command
# returns multiple field values inside a text blob formatted as a recoll
# configuration file ("fieldname = fieldvalue" lines). The rclmultixx name
# will be ignored, and field names and values will be parsed from the data.
#
#[/some/area/of/the/fs]
#metadatacmds = ; tags = tmsu tags %f; rclmulti1 = cmdOutputsConf %f
# Parameters affecting where and how we store things
# Top directory for Recoll
# dataRecoll data directories are normally located relative
# to the configuration directory (e.g. ~/.recoll/xapiandb,
# ~/.recoll/mboxcache). If 'cachedir' is set, the directories are stored under
# the specified value instead (e.g. if cachedir is ~/.cache/recoll, the
# default dbdir would be ~/.cache/recoll/xapiandb). This affects dbdir,
# webcachedir, mboxcachedir, aspellDicDir, which can still be individually
# specified to override cachedir. Note that if you have multiple
# configurations, each must have a different cachedir, there is no
# automatic computation of a subpath under cachedir.
#cachedir = ~/.cache/recoll
# Maximum file system occupation
# over which we stop indexing.The value is a percentage,
# corresponding to what the "Capacity" df output column shows. The default
# value is 0, meaning no checking.
maxfsoccuppc = 0
# Xapian database directory
# location.This will be created on first indexing. If the
# value is not an absolute path, it will be interpreted as relative to
# cachedir if set, or the configuration directory (-c argument or
# $RECOLL_CONFDIR). If nothing is specified, the default is then
# ~/.recoll/xapiandb/
dbdir = xapiandb
# Name of the scratch file where
# the indexer process updates its status. Default:
# idxstatus.txt inside the configuration directory
#idxstatusfile = idxstatus.txt
#
#
# Directory location for storing mbox message offsets cache
# files.This is normally 'mboxcache' under cachedir if set,
# or else under the configuration directory, but it may be useful to share
# a directory between different configurations.
#mboxcachedir = mboxcache
#
#
# Minimum mbox file size over which we cache the offsets.
# There is really no sense in caching offsets for small files. The
# default is 5 MB.
#mboxcacheminmbs = 5
#
#
# Directory where we store the archived web pages.
# This is only used by the web history indexing code
# Default: cachedir/webcache if cachedir is set, else
# $RECOLL_CONFDIR/webcache
webcachedir = webcache
#
# Maximum size in MB of the Web archive.
# This is only used by the web history indexing code.
# Default: 100 MB.
# Reducing the size will not physically truncate the file.
webcachemaxmbs = 100
#
#
# The path to the Web indexing queue.This is
# hard-coded in the plugin as ~/.recollweb/ToIndex so there should be no
# need or possibility to change it.
#webqueuedir = ~/.recollweb/ToIndex
#
#
# Aspell dictionary storage directory location. The
# aspell dictionary (aspdict.(lang).rws) is normally stored in the
# directory specified by cachedir if set, or under the configuration
# directory.
#aspellDicDir =
#
#
# Directory location for executable input handlers.If
# RECOLL_FILTERSDIR is set in the environment, we use it instead. Defaults
# to $prefix/share/recoll/filters. Can be redefined for
# subdirectories.
#filtersdir = /path/to/my/filters
#
#
# Directory location for icons.The only reason to
# change this would be if you want to change the icons displayed in the
# result list. Defaults to $prefix/share/recoll/images
#iconsdir = /path/to/my/icons
# Parameters affecting indexing performance and resource
# usage
#
#
# Threshold (megabytes of new data) where we flush from memory to disk
# index.
# Setting this allows some control over memory usage by the indexer
# process. A value of 0 means no explicit flushing, which lets Xapian
# perform its own thing, meaning flushing every XAPIAN_FLUSH_THRESHOLD
# documents created, modified or deleted. XAPIAN_FLUSH_THRESHOLD is an
# environment variable. As memory usage depends on average document size,
# not only document count, this is not very useful.
# The default value of 10 MB may be a bit low. If you are looking for
# maximum speed, you may want to experiment with values between 20 and
# 80. In my experience, values beyond 100 are always counterproductive. If
# you find otherwise, please drop me a note.
idxflushmb = 10
#
#
# Maximum external filter execution time in
# seconds.Default 1200 (20mn). Set to 0 for no limit. This
# is mainly to avoid infinite loops in postscript files
# (loop.ps)
filtermaxseconds = 1200
#
#
# Maximum virtual memory space for filter processes
# (setrlimit(RLIMIT_AS)), in megabytes. Note that this
# includes any mapped libs (there is no reliable Linux way to limit the
# data space only), so we need to be a bit generous here. Anything over
# 2000 will be ignored on 32 bits machines.
filtermaxmbytes = 2000
#
#
# Stage input queues configuration. There are three
# internal queues in the indexing pipeline stages (file data extraction,
# terms generation, index update). This parameter defines the queue depths
# for each stage (three integer values). If a value of -1 is given for a
# given stage, no queue is used, and the thread will go on performing the
# next stage. In practise, deep queues have not been shown to increase
# performance. Default: a value of 0 for the first queue tells &RCL; to
# perform autoconfiguration based on the detected number of CPUs (no need
# for the two other values in this case). Use thrQSizes = -1 -1 -1 to
# disable multithreading entirely.
thrQSizes = 0
#
#
# Number of threads used for each indexing stage. The
# three stages are: file data extraction, terms generation, index
# update). The use of the counts is also controlled by some special values
# in thrQSizes: if the first queue depth is 0, all counts are ignored
# (autoconfigured); if a value of -1 is used for a queue depth, the
# corresponding thread count is ignored. It makes no sense to use a value
# other than 1 for the last stage because updating the &XAP; index is
# necessarily single-threaded (and protected by a mutex).
#thrTCounts = 4 2 1
# Miscellaneous parameters
#
#
# Debug log verbosity 1-6 2 is errors/warnings
# only. 3 information like document updates, 4 is quite verbose and 6 very
# verbose.
loglevel = 3
#
#
# Debug log destination. Use 'stderr' (default) to write to the
# console.
logfilename = stderr
#
#
# Override loglevel for the indexer.
#idxloglevel = 3
#
#
# Override logfilename for the indexer.
#idxlogfilename = stderr
#
#
# Override loglevel for the indexer in real time
# mode.The default is to use the idx... values if set, else
# the log... values.
#daemloglevel = 3
#
#
# Override logfilename for the indexer in real time
# mode.The default is to use the idx... values if set, else
# the log... values.
#daemlogfilename = /dev/null
#
#
# Indexing process current directory. The input
# handlers sometimes leave temporary files in the current directory, so it
# makes sense to have recollindex chdir to some temporary directory. Three
# possible types of values:
# - (literal) tmp : go to temp dir as set by environment (RECOLL_TMPDIR else
# TMPDIR else /tmp)
# - Empty: stay where started
# - Absolute path value: go there.
idxrundir = tmp
#
#
# Script used to heuristically check if we need to retry indexing
# files which previously failed. The default script checks
# the modified dates on /usr/bin and /usr/local/bin. A relative path will
# be looked up in the filters dirs, then in the path. Use an absolute path
# to do otherwise.
checkneedretryindexscript = rclcheckneedretry.sh
#
#
# Additional places to search for helper executables.
# This is only used on Windows for now.
#recollhelperpath = c:/someprog/bin;c:/someotherprog/bin
#
#
# Length of abstracts we store while indexing.
# Recoll stores an abstract for each indexed file.
# The text can come from an actual 'abstract' section in the
# document or will just be the beginning of the document. It is stored in
# the index so that it can be displayed inside the result lists without
# decoding the original file. The idxabsmlen parameter
# defines the size of the stored abstract. The default value is 250
# bytes. The search interface gives you the choice to display this stored
# text or a synthetic abstract built by extracting text around the search
# terms. If you always prefer the synthetic abstract, you can reduce this
# value and save a little space.
#idxabsmlen = 250
#
#
# Truncation length of stored metadata fields.This
# does not affect indexing (the whole field is processed anyway), just the
# amount of data stored in the index for the purpose of displaying fields
# inside result lists or previews. The default value is 150 bytes which
# may be too low if you have custom fields.
#idxmetastoredlen = 150
#
#
# Language definitions to use when creating the aspell
# dictionary.The value must match a set of aspell language
# definition files. You can type "aspell dicts" to see a list The default
# if this is not set is to use the NLS environment to guess the
# value.
#aspellLanguage = en
#
#
# Additional parameter to aspell dictionary creation
# command.Some aspell packages may need an additional option
# (e.g. on Debian Jessie). See Debian bug 772415.
#aspellAddCreateParam = --local-data-dir=/usr/lib/aspell
#
#
# Set this to have a look at aspell dictionary creation
# errors.There are always many, so this is mostly for
# debugging.
#aspellKeepStderr = 1
#
#
# Disable aspell use.The aspell dictionary generation
# takes time, and some combinations of aspell version, language, and local
# terms, result in aspell crashing, so it sometimes makes sense to just
# disable the thing.
#noaspell = 1
#
#
# Seconds between auxiliary databases updates (stemdb,
# aspell).The default is one hour.
#monauxinterval = 3600
#
#
# Minimum interval (seconds) between processings of the indexing
# queue. The real time monitor does not process each event
# when it comes in, but lets the queue accumulate, to diminish overhead and
# to aggregate multiple events to the same file. Default 30 S.
#monixinterval = 30
#
#
# Timing parameters for the real time indexing.
# Definitions for files which get a longer delay before reindexing
# is allowed. This is for fast-changing files, that should only be
# reindexed once in a while. A list of wildcardPattern:seconds pairs. The
# patterns are matched with fnmatch(pattern, path, 0) You can quote entries
# containing white space with double quotes (quote the whole entry, not the
# pattern). The default is empty. Example:mondelaypatterns = *.log:20
# "*with spaces.*:30"
#mondelaypatterns = *.log:20 "*with spaces.*:30"
#
#
# ionice class for the real time indexing process
# On platforms where this is supported, the default value is
# 3.
# monioniceclass = 3
#
#
# ionice class parameter for the real time indexing process.
# On platforms where this is supported. The default is
# empty.
#monioniceclassdata =
# Query-time parameters (no impact on the index)
#
#
# auto-trigger diacritics sensitivity (raw index only)
# IF the index is not stripped, decide if we automatically trigger
# diacritics sensitivity if the search term has accented characters (not in
# unac_except_trans). Else you need to use the query language and the "D"
# modifier to specify diacritics sensitivity. Default is no.
autodiacsens = 0
#
#
# auto-trigger case sensitivity (raw index only) IF
# the index is not stripped (see indexStripChars), decide if we
# automatically trigger character case sensitivity if the search term has
# upper-case characters in any but the first position. Else you need to use
# the query language and the "C" modifier to specify character-case
# sensitivity. Default is yes.
autocasesens = 1
# Maximum query expansion count
# for a single term (e.g.: when using wildcards).This only
# affects queries, not indexing. We used to not limit this at all (except
# for filenames where the limit was too low at 1000), but it is
# unreasonable with a big index. Default 10000.
maxTermExpand = 10000
# Maximum number of clauses
# we add to a single Xapian query.This only affects queries,
# not indexing. In some cases, the result of term expansion can be
# multiplicative, and we want to avoid eating all the memory. Default
# 50000.
maxXapianClauses = 50000
#
#
# Maximum number of positions we walk while populating a snippet for the
# result list.The default of 1,000,000 may be insufficient
# for big documents, the consequence would be snippets with possibly
# meaning-altering missing words.
snippetMaxPosWalk = 1000000
# Parameters for the PDF input script
#
#
# Attempt OCR of PDF files with no text content if both tesseract and
# pdftoppm are installed.The default is off because OCR is so
# very slow.
#pdfocr = 0
#
#
# Enable PDF attachment extraction by executing pdftk (if
# available).This is
# normally disabled, because it does slow down PDF indexing a bit even if
# not one attachment is ever found.
#pdfattach = 0
# Parameters set for specific locations
# You could specify different parameters for a subdirectory like this:
#[~/hungariandocs/plain]
#defaultcharset = iso-8859-2
[/usr/share/man]
followLinks = 1
#
#
# Enable thunderbird/mozilla-seamonkey mbox format quirks
# Set this for the directory where the email mbox files are
# stored.
[~/.thunderbird]
mhmboxquirks = tbird
[~/.mozilla]
mhmboxquirks = tbird
# pidgin / purple directories for irc chats have names beginning with #
[~/.purple]
skippedNames =