# @(#$Id: recoll.conf.in,v 1.22 2008-10-15 08:30:18 dockes Exp $  (C) 2004 J.F.Dockes
#
# Recoll default configuration file. This typically lives in
# @prefix@/share/recoll/examples and provides default values. You can
# override selected parameters by adding assigments to
# ~/.recoll/recoll.conf (or $RECOLL_CONFDIR/recoll.conf)
#
# Almost all values in this file can be set from the GUI configuration menus,
# which maybe an easier approach than direct editing
#

# Space-separated list of directories to index. Next line indexes $HOME
topdirs = ~

# Wildcard expressions for names of files and directories that we should
# ignore. If you need index mozilla/thunderbird mail folders, don't put
# ".*" in there (as was the case with an older sample config)
# These are simple names, not paths (must contain no / )
skippedNames = #* bin CVS  Cache cache* .cache caughtspam tmp \
     .thumbnails .svn \
     *~ .beagle .git .hg .bzr loop.ps .xsession-errors \
     .recoll* xapiandb recollrc recoll.conf

# Wildcard expressions for paths we shouldn't go into. The database and
# configuration directories will be added in there, else the default value
# is empty
# skippedPaths = 
# Same for real time indexing. The idea here is that there is stuff that
# you might want to initially index but not monitor. If daemSkippedPaths is
# not set, the daemon uses skippedPaths.
#daemSkippedPaths = 

# Recoll uses FNM_PATHNAME by default when matching skipped paths, which
# means that /dir1/dir2/dir3 is not matched by */dir3. Can't change the
# default now, but you can set the following variable to 0 to disable the
# use of FNM_PATHNAME (see fnmatch(3) man page)
# skippedPathsFnmPathname = 1

# Option to follow symbolic links. We normally don't, to avoid duplicated
# indexing (in any case, no effort is made to identify or avoid multiple
# indexing of linked files)
# followLinks = 0

# Debug messages. 2 is errors/warnings only. 3 information like doc
# updates, 4 is quite verbose and 6 very verbose
loglevel = 3
logfilename = stderr

# Specific versions of log file name and level for the indexing daemon. The 
# default is to use the above values.
# daemloglevel = 3
# daemlogfilename = /dev/null

# Decide if we store character case and diacritics in the index. If we do, 
# searches sensitive to case and diacritics can be performed, but the index
# will be bigger, and some marginal weirdness may sometimes occur. We
# default to a stripped index for now.
indexStripChars = 1

# IF the index is not stripped. Decide if we automatically trigger
# diacritics sensitivity if the search term has accented characters (not in
# unac_except_trans). Else you need to use the query language and the "D"
# modifier to specify diacritics sensitivity. Default is no.
autodiacsens = 0

# IF the index is not stripped. Decide if we automatically trigger
# character case sensitivity if the search term has upper-case characters
# in any but the first position. Else you need to use the query language
# and the "C" modifier to specify character-case sensitivity. Default is
# yes.
autocasesens = 1

# Languages for which to build stemming databases at the end of
# indexing. Stemmer names can be found on http://www.xapian.org 
# The flag to perform stem expansion at query time is now set from the GUI
indexstemminglanguages = english 

# Default character set. Values found inside files, ie content tag in html
# documents, will override this. It can be specified per directory (see
# below). Used when converting to utf-8 (internal storage format), so it
# may be quite important for pure text files.
# The default used to be set to iso8859-1, but we now take it from the nls 
# environment (LC_ALL/LC_CTYPE/LANG). The ultimate hardwired default is
# still 8859-1. If for some reason you want a general default which doesnt
# match your LANG and is not 8859-1, set it here.
# defaultcharset = iso-8859-1

# A list of characters, encoded in UTF-8, which should be handled specially
# when converting text to unaccented lowercase. For example, in Swedish,
# the letter a with diaeresis has full alphabet citizenship and should not
# be turned into an a. 
# Each element in the space-separated list has the special character as
# first element and the translation following. The handling of both the
# lowercase and upper-case versions of a character should be specified, as
# appartenance to the list will turn-off both standard accent and case
# processing. Examples: 
# Swedish:
# unac_except_trans =  åå Åå ää Ää öö Öö
# German:
# unac_except_trans = Ää Öö Üü ää öö üü ßss
# In French, you probably want to decompose oe and ae
# unac_except_trans = œoe Œoe æae Æae
# Actually, this seems a reasonable default for all until someone
# protests. These decompositions are not performed by unac, but I
# cant imagine someone typing the composed forms in a search.
unac_except_trans = ßss œoe Œoe æae ÆAE ﬁfi ﬂfl

# Maximum expansion count for a single term (ie: when using wildcards).
# We used to not limit this at all (except for filenames where the limit
# was too low at 1000), but it is unreasonable with a big index. 
# Default 10 000
maxTermExpand = 10000

# Maximum number of clauses we add to a single Xapian query. In some cases,
# the result of term expansion can be multiplicative, and we want to avoid
# eating all the memory. Default 100 000
maxXapianClauses = 100000

# Where to store the database (directory). This may be an absolute path,
# else it is taken as relative to the configuration directory (-c argument
# or $RECOLL_CONFDIR). 
# If nothing is specified, the default is then ~/.recoll/xapiandb/
dbdir = xapiandb

# Maximum file system occupation before we stop indexing. The default value
# is 0, meaning no checking. The value is a percentage, corresponding to
# what the "Capacity" df output column shows.
maxfsoccuppc = 0

# Threshold (megabytes of new data) where we flush from memory to disk
# index. Setting this (ie to 10) can help control memory usage. 
#
# A value of 0 means no explicit flushing, which lets Xapian perform its
# own thing, meaning flushing every XAPIAN_FLUSH_THRESHOLD documents
# created, modified or deleted. XAPIAN_FLUSH_THRESHOLD is an environment
# variable. As memory usage depends on average document size, not only
# document count, this is not very useful.
idxflushmb = 10

# Place to search for executable filters. If RECOLL_FILTERSDIR is set in
# the environement, we use it instead
filtersdir = @prefix@/share/recoll/filters

# Place to search for icons. The only reason to change this would be if you
# want to change the icons displayed in the result list
iconsdir = @prefix@/share/recoll/images

# Should we use the system's 'file -i' command as a final step in file type
# identification ? This may be useful, but will usually cause the
# indexation of many bogus 'text' files
usesystemfilecommand = 1

# Should we index the file names of files with mime types we don't
# know? (we can otherwise just ignore them)
indexallfilenames = 1

# A restrictive list of indexed mime types. Normally not set. If it is set,
# only the types from the list will have their contents indexed (the names
# will be indexed anyway if indexallfilenames is set as by default). Mime
# type names should be taken from the mimemap file.
#
# indexedmimetypes = text/html application/pdf

#
# Size limit for archive members. This is passed to the filters in the
# environment as RECOLL_FILTER_MAXMEMBERKB
# 
membermaxkbs = 50000

# Size limit for compressed files. We need to decompress these in a
# temporary directory for identification, which can be wasteful in some
# cases. Limit the waste. Negative means no limit. 0 results in no
# processing of any compressed file
compressedfilemaxkbs = -1

# Size limit for text files. This is for skipping monster logs
textfilemaxmbs = 20

# Page size for text files. If this is set, text/plain files will be
# divided into documents of approximately this size. May be useful to
# access pieces of big text files which would be problematic to load as one
# piece into the preview window. Might be useful for big logs
textfilepagekbs = 1000

# Maximum external filter execution time. Default 20mn. This is mainly
# to avoid infinite loops in postscript files (loop.ps)
filtermaxseconds = 1200

# Length of abstracts we store while indexing. Longer will make for a
# bigger db
# idxabsmlen = 250

# Language definitions to use when creating the aspell dictionary. 
# The value must match a set of aspell language definition files. 
# You can type "aspell config"  to see where these are installed.
# The default if this is not set is to use the NLS environment to guess the
# value
# aspellLanguage = en

# Disabling aspell use. The aspell dictionary generation takes some time,
# and some combinations of aspell version, language, and local terms,
# result in aspell dumping core each time. You can disable the aspell
# dictionary generation by setting the following variable:
# noaspell = 1

# Timing parameters for the real time mode:
#
# Seconds between auxiliary databases updates (stemdb, aspell):
# monauxinterval = 3600
#
# Resting time (seconds) during which we let the queue accumulate, in hope
# that events to the same file will merge, before we start indexing:
# monixinterval = 30
#
# Definitions for files which get a longer delay before reindexing is
# allowed. This is for fast-changing files, that should only be reindexed
# once in a while. A list of wildcardPattern:seconds pairs. The patterns
# are matched with fnmatch(pattern, path, 0) You can quote entries containing
# white space with double quotes. The default is empty, here follows an
# example:
# mondelaypatterns = *.log:20  "*with spaces.*:30"

# ionice class for monitor (on platforms where this is supported)
# monioniceclass = 3
# ionice class param for monitor (on platforms where this is supported)
# monioniceclassdata = 

# If this is set, process the directory where Beagle Web browser plugins
# copy visited pages for indexing. Of course, Beagle MUST NOT be running,
# else things will behave strangely. 
processbeaglequeue = 0
# The path to the Beagle indexing queue. This is hard-coded in the Beagle
# plugin as ~/.beagle/ToIndex so there should be no need to change it. 
#beaglequeuedir = ~/.beagle/ToIndex
# This is only used by the Beagle web browser plugin indexing code, and
# defines where the cache for visited pages will live. Default:
# $RECOLL_CONFDIR/webcache 
webcachedir = webcache
# This is only used by the Beagle web browser plugin indexing code, and
# defines the maximum size for the web page cache. Default: 40 MB. 
webcachemaxmbs = 40

# The directory where mbox message offsets cache files are held. This is
# normally $RECOLL_CONFDIR/mboxcache, but it may be useful to share a
# directory between different configurations. 
#mboxcachedir = mboxcache

# The minimum mbox file size over which we cache the offsets. There is
# really no sense in caching offsets for small files. The default is 5 MB.
#mboxcacheminmbs = 5

# Maximum number of positions we walk while populating a snippet for the
# result list. The default of 1 000 000 may be insufficient for big
# documents, the consequence would be snippets with possibly
# meaning-altering missing words. 
snippetMaxPosWalk = 1000000

# You could specify different parameters for a subdirectory like this:
#[~/hungariandocs/plain]
#defaultcharset = iso-8859-2

# You can set fields on all files of a specific fs area. (rclaptg can be
# used for application selection inside mimeview
#[/some/app/directory]
#localfields = rclaptg = someapp; otherfield = somevalue

# Use app tag to enable using gnu info to open info files (as the subnodes
# are indexed as html, we'd use firefox on a temp file else. Set this on
# some known info storage places
[/usr/share/info]
localfields = rclaptg=gnuinfo
[/usr/local/share/info]
localfields = rclaptg=gnuinfo
[/usr/local/info]
localfields = rclaptg=gnuinfo

# Enable thunderbird mbox format quirks where appropriate
[~/.thunderbird]
mhmboxquirks = tbird

# pidgin / purple directories for irc chats have names beginning with #
[~/.purple]
skippedNames =