893 lines
40 KiB
Plaintext
893 lines
40 KiB
Plaintext
# <filetitle>Recoll main configuration file, recoll.conf</filetitle>
|
|
|
|
# The XML tags in the comments are used to help produce the documentation
|
|
# from the sample/reference file, and not at all at run time, where
|
|
# comments are just comments. Edit at will.
|
|
|
|
# This typically lives in $prefix/share/recoll/examples and provides
|
|
# default values. You can override selected parameters by adding assigments
|
|
# to ~/.recoll/recoll.conf (or $RECOLL_CONFDIR/recoll.conf)
|
|
#
|
|
# Most of the important values in this file can be set from the GUI
|
|
# configuration menus, which may be an easier approach than direct editing.
|
|
|
|
# <grouptitle id="WHATDOCS">Parameters affecting what documents we
|
|
# index</grouptitle>
|
|
|
|
# <var name="topdirs" type="string"><brief>Space-separated list of files or
|
|
# directories to recursively index.</brief><descr>Default to ~ (indexes
|
|
# $HOME). You can use symbolic links in the list, they will be followed,
|
|
# independantly of the value of the followLinks variable.</descr></var>
|
|
topdirs = ~
|
|
|
|
# <var name="monitordirs" type="string"><brief>(1.25) Space-separated list of
|
|
# files or directories to monitor for updates.</brief><descr>When running
|
|
# the real-time indexer, this allows monitoring only a subset of the whole
|
|
# indexed area. The elements must be included in the tree defined by the
|
|
# 'topdirs' members.</descr></var>
|
|
#monitordirs=
|
|
|
|
# <var name="skippedNames" type="string">
|
|
#
|
|
# <brief>Files and directories which should be ignored.</brief> <descr>
|
|
# White space separated list of wildcard patterns (simple ones, not paths,
|
|
# must contain no / ), which will be tested against file and directory
|
|
# names. The list in the default configuration does not exclude hidden
|
|
# directories (names beginning with a dot), which means that it may index
|
|
# quite a few things that you do not want. On the other hand, email user
|
|
# agents like Thunderbird usually store messages in hidden directories, and
|
|
# you probably want this indexed. One possible solution is to have ".*" in
|
|
# "skippedNames", and add things like "~/.thunderbird" "~/.evolution" to
|
|
# "topdirs". Not even the file names are indexed for patterns in this
|
|
# list, see the "noContentSuffixes" variable for an alternative approach
|
|
# which indexes the file names. Can be redefined for any
|
|
# subtree.</descr></var>
|
|
skippedNames = #* CVS Cache cache* .cache caughtspam tmp \
|
|
.thumbnails .svn \
|
|
*~ .beagle .git .hg .bzr loop.ps .xsession-errors \
|
|
.recoll* xapiandb recollrc recoll.conf
|
|
|
|
# <var name="skippedNames-" type="string">
|
|
#
|
|
# <brief>List of name endings to remove from the default skippedNames
|
|
# list.</brief><descr></descr></var>
|
|
skippedNames- =
|
|
|
|
# <var name="skippedNames+" type="string">
|
|
#
|
|
# <brief>List of name endings to add to the default skippedNames
|
|
# list.</brief><descr></descr></var>
|
|
skippedNames+ =
|
|
|
|
# <var name="noContentSuffixes" type="string">
|
|
#
|
|
# <brief>List of name endings (not necessarily dot-separated suffixes) for
|
|
# which we don't try MIME type identification, and don't uncompress or
|
|
# index content.</brief><descr>Only the names will be indexed. This
|
|
# complements the now obsoleted recoll_noindex list from the mimemap file,
|
|
# which will go away in a future release (the move from mimemap to
|
|
# recoll.conf allows editing the list through the GUI). This is different
|
|
# from skippedNames because these are name ending matches only (not
|
|
# wildcard patterns), and the file name itself gets indexed normally. This
|
|
# can be redefined for subdirectories.</descr></var>
|
|
noContentSuffixes = .md5 .map \
|
|
.o .lib .dll .a .sys .exe .com \
|
|
.mpp .mpt .vsd \
|
|
.img .img.gz .img.bz2 .img.xz .image .image.gz .image.bz2 .image.xz \
|
|
.dat .bak .rdf .log.gz .log .db .msf .pid \
|
|
,v ~ #
|
|
|
|
# <var name="noContentSuffixes-" type="string">
|
|
#
|
|
# <brief>List of name endings to remove from the default noContentSuffixes
|
|
# list.</brief><descr></descr></var>
|
|
noContentSuffixes- =
|
|
|
|
# <var name="noContentSuffixes+" type="string">
|
|
#
|
|
# <brief>List of name endings to add to the default noContentSuffixes
|
|
# list.</brief><descr></descr></var>
|
|
noContentSuffixes+ =
|
|
|
|
|
|
# <var name="skippedPaths" type="string">
|
|
#
|
|
# <brief>Absolute paths we should not go into.</brief>
|
|
# <descr>Space-separated list of wildcard expressions for absolute
|
|
# filesystem paths. Must be defined at the top level of the configuration
|
|
# file, not in a subsection. Can contain files and directories. The database and
|
|
# configuration directories will automatically be added. The expressions
|
|
# are matched using 'fnmatch(3)' with the FNM_PATHNAME flag set by
|
|
# default. This means that '/' characters must be matched explicitely. You
|
|
# can set 'skippedPathsFnmPathname' to 0 to disable the use of FNM_PATHNAME
|
|
# (meaning that '/*/dir3' will match '/dir1/dir2/dir3'). The default value
|
|
# contains the usual mount point for removable media to remind you that it
|
|
# is a bad idea to have Recoll work on these (esp. with the monitor: media
|
|
# gets indexed on mount, all data gets erased on unmount). Explicitely
|
|
# adding '/media/xxx' to the 'topdirs' variable will override
|
|
# this.</descr></var>
|
|
skippedPaths = /media
|
|
|
|
# <var name="skippedPathsFnmPathname" type="bool"><brief>Set to 0 to
|
|
# override use of FNM_PATHNAME for matching skipped
|
|
# paths.</brief><descr></descr></var>
|
|
#skippedPathsFnmPathname = 1
|
|
|
|
# <var name="daemSkippedPaths" type="string">
|
|
#
|
|
# <brief>skippedPaths equivalent specific to
|
|
# real time indexing.</brief><descr>This enables having parts of the tree
|
|
# which are initially indexed but not monitored. If daemSkippedPaths is
|
|
# not set, the daemon uses skippedPaths.</descr></var>
|
|
#daemSkippedPaths =
|
|
|
|
|
|
# <var name="zipUseSkippedNames" type="bool">
|
|
#
|
|
# <brief>Use skippedNames inside Zip archives.</brief><descr>Fetched
|
|
# directly by the rclzip handler. Skip the patterns defined by skippedNames
|
|
# inside Zip archives. Can be redefined for subdirectories.
|
|
# See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html
|
|
# </descr></var>
|
|
#zipUseSkippedNames = 0
|
|
|
|
# <var name="zipSkippedNames" type="string">
|
|
#
|
|
# <brief>Space-separated list of wildcard expressions for names that should
|
|
# be ignored inside zip archives.</brief><descr>This is used directly by
|
|
# the zip handler. If zipUseSkippedNames is not set, zipSkippedNames
|
|
# defines the patterns to be skipped inside archives. If zipUseSkippedNames
|
|
# is set, the two lists are concatenated and used. Can be redefined for
|
|
# subdirectories.
|
|
# See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html
|
|
# </descr></var>
|
|
#zipSkippedNames =
|
|
|
|
# <var name="followLinks" type="bool"><brief>Follow symbolic links during
|
|
# indexing.</brief><descr>The default is to ignore symbolic links to avoid
|
|
# multiple indexing of linked files. No effort is made to avoid duplication
|
|
# when this option is set to true. This option can be set individually for
|
|
# each of the 'topdirs' members by using sections. It can not be changed
|
|
# below the 'topdirs' level. Links in the 'topdirs' list itself are always
|
|
# followed.</descr></var>
|
|
#followLinks = 0
|
|
|
|
# <var name="indexedmimetypes" type="string"><brief>Restrictive list of
|
|
# indexed mime types.</brief><descr>Normally not set (in which case all
|
|
# supported types are indexed). If it is set, only the types from the list
|
|
# will have their contents indexed. The names will be indexed anyway if
|
|
# indexallfilenames is set (default). MIME type names should be taken from
|
|
# the mimemap file (the values may be different from xdg-mime or file -i
|
|
# output in some cases). Can be redefined for subtrees.</descr></var>
|
|
#indexedmimetypes =
|
|
|
|
# <var name="excludedmimetypes" type="string"><brief>List of excluded MIME
|
|
# types.</brief><descr>Lets you exclude some types from indexing. MIME type
|
|
# names should be taken from the mimemap file (the values may be different
|
|
# from xdg-mime or file -i output in some cases) Can be redefined for
|
|
# subtrees.</descr></var>
|
|
#excludedmimetypes =
|
|
|
|
# <var name="nomd5mimetypes" type="string"><brief>Don't compute md5 for
|
|
# these types.</brief><descr>md5 checksums are used only for deduplicating
|
|
# results, and can be very expensive to compute on multimedia or other big
|
|
# files. This list lets you turn off md5 computation for selected types. It
|
|
# is global (no redefinition for subtrees). At the moment, it only has an
|
|
# effect for external handlers (exec and execm). The file types can be
|
|
# specified by listing either MIME types (e.g. audio/mpeg) or handler names
|
|
# (e.g. rclaudio).</descr></var>
|
|
nomd5types = rclaudio
|
|
|
|
# <var name="compressedfilemaxkbs" type="int"><brief>Size limit for compressed
|
|
# files.</brief><descr>We need to decompress these in a
|
|
# temporary directory for identification, which can be wasteful in some
|
|
# cases. Limit the waste. Negative means no limit. 0 results in no
|
|
# processing of any compressed file. Default 50 MB.</descr></var>
|
|
compressedfilemaxkbs = 50000
|
|
|
|
# <var name="textfilemaxmbs" type="int"><brief>Size limit for text
|
|
# files.</brief><descr>Mostly for skipping monster
|
|
# logs. Default 20 MB.</descr></var>
|
|
textfilemaxmbs = 20
|
|
|
|
# <var name="indexallfilenames" type="bool"><brief>Index the file names of
|
|
# unprocessed files</brief><descr>Index the names of files the contents of
|
|
# which we don't index because of an excluded or unsupported MIME
|
|
# type.</descr></var>
|
|
indexallfilenames = 1
|
|
|
|
# <var name="usesystemfilecommand" type="bool"><brief>Use a system command
|
|
# for file MIME type guessing as a final step in file type
|
|
# identification</brief><descr>This is generally useful, but will usually
|
|
# cause the indexing of many bogus 'text' files. See 'systemfilecommand'
|
|
# for the command used.</descr></var>
|
|
usesystemfilecommand = 1
|
|
|
|
# <var name="systemfilecommand" type="string"><brief>Command used to guess
|
|
# MIME types if the internal methods fails</brief><descr>This should be a
|
|
# "file -i" workalike. The file path will be added as a last parameter to
|
|
# the command line. 'xdg-mime' works better than the traditional 'file'
|
|
# command, and is now the configured default (with a hard-coded fallback to
|
|
# 'file')</descr></var>
|
|
systemfilecommand = xdg-mime query filetype
|
|
|
|
# <var name="processwebqueue" type="bool"><brief>Decide if we process the
|
|
# Web queue.</brief><descr>The queue is a directory where the Recoll Web
|
|
# browser plugins create the copies of visited pages.</descr></var>
|
|
processwebqueue = 0
|
|
|
|
# <var name="textfilepagekbs" type="int"><brief>Page size for text
|
|
# files.</brief><descr>If this is set, text/plain files will be divided
|
|
# into documents of approximately this size. Will reduce memory usage at
|
|
# index time and help with loading data in the preview window at query
|
|
# time. Particularly useful with very big files, such as application or
|
|
# system logs. Also see textfilemaxmbs and
|
|
# compressedfilemaxkbs.</descr></var>
|
|
textfilepagekbs = 1000
|
|
|
|
# <var name="membermaxkbs" type="int"><brief>Size limit for archive
|
|
# members.</brief><descr>This is passed to the filters in the environment
|
|
# as RECOLL_FILTER_MAXMEMBERKB.</descr></var>
|
|
membermaxkbs = 50000
|
|
|
|
|
|
|
|
# <grouptitle id="TERMS">Parameters affecting how we generate
|
|
# terms and organize the index</grouptitle>
|
|
|
|
# Changing some of these parameters will imply a full
|
|
# reindex. Also, when using multiple indexes, it may not make sense
|
|
# to search indexes that don't share the values for these parameters,
|
|
# because they usually affect both search and index operations.
|
|
|
|
|
|
# <var name="indexStripChars" type="bool"><brief>Decide if we store
|
|
# character case and diacritics in the index.</brief><descr>If we do,
|
|
# searches sensitive to case and diacritics can be performed, but the index
|
|
# will be bigger, and some marginal weirdness may sometimes occur. The
|
|
# default is a stripped index. When using multiple indexes for a search,
|
|
# this parameter must be defined identically for all. Changing the value
|
|
# implies an index reset.</descr></var>
|
|
indexStripChars = 1
|
|
|
|
# <var name="indexStoreDocText" type="bool"><brief>Decide if we store the
|
|
# documents' text content in the index.</brief><descr>Storing the text
|
|
# allows extracting snippets from it at query time, instead of building
|
|
# them from index position data.
|
|
#
|
|
# Newer Xapian index formats have rendered our use of positions list
|
|
# unacceptably slow in some cases. The last Xapian index format with good
|
|
# performance for the old method is Chert, which is default for 1.2, still
|
|
# supported but not default in 1.4 and will be dropped in 1.6.
|
|
#
|
|
# The stored document text is translated from its original format to UTF-8
|
|
# plain text, but not stripped of upper-case, diacritics, or punctuation
|
|
# signs. Storing it increases the index size by 10-20% typically, but also
|
|
# allows for nicer snippets, so it may be worth enabling it even if not
|
|
# strictly needed for performance if you can afford the space.
|
|
#
|
|
# The variable only has an effect when creating an index, meaning that the
|
|
# xapiandb directory must not exist yet. Its exact effect depends on the
|
|
# Xapian version.
|
|
#
|
|
# For Xapian 1.4, if the variable is set to 0, the Chert format will be
|
|
# used, and the text will not be stored. If the variable is 1, Glass will
|
|
# be used, and the text stored.
|
|
#
|
|
# For Xapian 1.2, and for versions after 1.5 and newer, the index format is
|
|
# always the default, but the variable controls if the text is stored or
|
|
# not, and the abstract generation method. With Xapian 1.5 and later, and
|
|
# the variable set to 0, abstract generation may be very slow, but this
|
|
# setting may still be useful to save space if you do not use abstract
|
|
# generation at all.
|
|
# </descr></var>
|
|
indexStoreDocText = 1
|
|
|
|
# <var name="nonumbers" type="bool"><brief>Decides if terms will be
|
|
# generated for numbers.</brief><descr>For example "123", "1.5e6",
|
|
# 192.168.1.4, would not be indexed if nonumbers is set ("value123" would
|
|
# still be). Numbers are often quite interesting to search for, and this
|
|
# should probably not be set except for special situations, ie, scientific
|
|
# documents with huge amounts of numbers in them, where setting nonumbers
|
|
# will reduce the index size. This can only be set for a whole index, not
|
|
# for a subtree.</descr></var>
|
|
#nonumbers = 0
|
|
|
|
# <var name="dehyphenate" type="bool"><brief>Determines if we index
|
|
# 'coworker' also when the input is 'co-worker'.</brief><descr>This is new
|
|
# in version 1.22, and on by default. Setting the variable to off allows
|
|
# restoring the previous behaviour.</descr></var>
|
|
#dehyphenate = 1
|
|
|
|
# <var name="nocjk" type="bool"><brief>Decides if specific East Asian
|
|
# (Chinese Korean Japanese) characters/word splitting is turned
|
|
# off.</brief><descr>This will save a small amount of CPU if you have no CJK
|
|
# documents. If your document base does include such text but you are not
|
|
# interested in searching it, setting nocjk may be a
|
|
# significant time and space saver.</descr></var>
|
|
#nocjk = 0
|
|
|
|
# <var name="cjkngramlen" type="int"><brief>This lets you adjust the size of
|
|
# n-grams used for indexing CJK text.</brief><descr>The default value of 2 is
|
|
# probably appropriate in most cases. A value of 3 would allow more precision
|
|
# and efficiency on longer words, but the index will be approximately twice
|
|
# as large.</descr></var>
|
|
#cjkngramlen = 2
|
|
|
|
# <var name="indexstemminglanguages" type="string">
|
|
#
|
|
# <brief>Languages for which to create stemming expansion
|
|
# data.</brief><descr>Stemmer names can be found by executing 'recollindex
|
|
# -l', or this can also be set from a list in the GUI.</descr></var>
|
|
indexstemminglanguages = english
|
|
|
|
# <var name="defaultcharset" type="string"><brief>Default character
|
|
# set.</brief><descr>This is used for files which do not contain a
|
|
# character set definition (e.g.: text/plain). Values found inside files,
|
|
# e.g. a 'charset' tag in HTML documents, will override it. If this is not
|
|
# set, the default character set is the one defined by the NLS environment
|
|
# ($LC_ALL, $LC_CTYPE, $LANG), or ultimately iso-8859-1 (cp-1252 in fact).
|
|
# If for some reason you want a general default which does not match your
|
|
# LANG and is not 8859-1, use this variable. This can be redefined for any
|
|
# sub-directory.</descr></var>
|
|
#defaultcharset = iso-8859-1
|
|
|
|
# <var name="unac_except_trans" type="string"><brief>A list of characters,
|
|
# encoded in UTF-8, which should be handled specially
|
|
# when converting text to unaccented lowercase.</brief><descr>For
|
|
# example, in Swedish, the letter a with diaeresis has full alphabet
|
|
# citizenship and should not be turned into an a.
|
|
# Each element in the space-separated list has the special character as
|
|
# first element and the translation following. The handling of both the
|
|
# lowercase and upper-case versions of a character should be specified, as
|
|
# appartenance to the list will turn-off both standard accent and case
|
|
# processing. The value is global and affects both indexing and querying.
|
|
# Examples:
|
|
# Swedish:
|
|
# unac_except_trans = ää Ää öö Öö üü Üü ßss œoe Œoe æae Æae ffff fifi flfl åå Åå
|
|
# . German:
|
|
# unac_except_trans = ää Ää öö Öö üü Üü ßss œoe Œoe æae Æae ffff fifi flfl
|
|
# In French, you probably want to decompose oe and ae and nobody would type
|
|
# a German ß
|
|
# unac_except_trans = ßss œoe Œoe æae Æae ffff fifi flfl
|
|
# . The default for all until someone protests follows. These decompositions
|
|
# are not performed by unac, but it is unlikely that someone would type the
|
|
# composed forms in a search.
|
|
# unac_except_trans = ßss œoe Œoe æae Æae ffff fifi flfl</descr></var>
|
|
unac_except_trans = ßss œoe Œoe æae Æae ffff fifi flfl
|
|
|
|
# <var name="maildefcharset" type="string"><brief>Overrides the default
|
|
# character set for email messages which don't specify
|
|
# one.</brief><descr>This is mainly useful for readpst (libpst) dumps,
|
|
# which are utf-8 but do not say so.</descr></var>
|
|
#maildefcharset=
|
|
|
|
# <var name="localfields" type="string"><brief>Set fields on all files
|
|
# (usually of a specific fs area).</brief><descr>Syntax is the usual:
|
|
# name = value ; attr1 = val1 ; [...]
|
|
# value is empty so this needs an initial semi-colon. This is useful, e.g.,
|
|
# for setting the rclaptg field for application selection inside
|
|
# mimeview.</descr></var>
|
|
#[/some/app/directory]
|
|
#localfields = ; rclaptg = someapp; otherfield = somevalue
|
|
|
|
# <var name="testmodifusemtime" type="bool"><brief>Use mtime instead of
|
|
# ctime to test if a file has been modified.</brief><descr>The time is used
|
|
# in addition to the size, which is always used.
|
|
# Setting this can reduce re-indexing on systems where extended attributes
|
|
# are used (by some other application), but not indexed, because changing
|
|
# extended attributes only affects ctime.
|
|
# Notes:
|
|
# - This may prevent detection of change in some marginal file rename cases
|
|
# (the target would need to have the same size and mtime).
|
|
# - You should probably also set noxattrfields to 1 in this case, except if
|
|
# you still prefer to perform xattr indexing, for example if the local
|
|
# file update pattern makes it of value (as in general, there is a risk
|
|
# for pure extended attributes updates without file modification to go
|
|
# undetected). Perform a full index reset after changing this.
|
|
# </descr></var>
|
|
testmodifusemtime = 0
|
|
|
|
# <var name="noxattrfields" type="bool"><brief>Disable extended attributes
|
|
# conversion to metadata fields.</brief><descr>This probably needs to be
|
|
# set if testmodifusemtime is set.</descr></var>
|
|
noxattrfields = 0
|
|
|
|
# <var name="metadatacmds" type="string"><brief>Define commands to
|
|
# gather external metadata, e.g. tmsu tags.</brief><descr>
|
|
# There can be several entries, separated by semi-colons, each defining
|
|
# which field name the data goes into and the command to use. Don't forget the
|
|
# initial semi-colon. All the field names must be different. You can use
|
|
# aliases in the "field" file if necessary.
|
|
# As a not too pretty hack conceded to convenience, any field name
|
|
# beginning with "rclmulti" will be taken as an indication that the command
|
|
# returns multiple field values inside a text blob formatted as a recoll
|
|
# configuration file ("fieldname = fieldvalue" lines). The rclmultixx name
|
|
# will be ignored, and field names and values will be parsed from the data.
|
|
# Example: metadatacmds = ; tags = tmsu tags %f; rclmulti1 = cmdOutputsConf %f
|
|
# </descr></var>
|
|
#[/some/area/of/the/fs]
|
|
#metadatacmds = ; tags = tmsu tags %f; rclmulti1 = cmdOutputsConf %f
|
|
|
|
|
|
|
|
|
|
# <grouptitle id="STORE">Parameters affecting where and how we store
|
|
# things</grouptitle>
|
|
|
|
# <var name="cachedir" type="dfn">
|
|
#
|
|
# <brief>Top directory for Recoll data.</brief><descr>Recoll data
|
|
# directories are normally located relative to the configuration directory
|
|
# (e.g. ~/.recoll/xapiandb, ~/.recoll/mboxcache). If 'cachedir' is set, the
|
|
# directories are stored under the specified value instead (e.g. if
|
|
# cachedir is ~/.cache/recoll, the default dbdir would be
|
|
# ~/.cache/recoll/xapiandb). This affects dbdir, webcachedir,
|
|
# mboxcachedir, aspellDicDir, which can still be individually specified to
|
|
# override cachedir. Note that if you have multiple configurations, each
|
|
# must have a different cachedir, there is no automatic computation of a
|
|
# subpath under cachedir.</descr></var>
|
|
#cachedir = ~/.cache/recoll
|
|
|
|
# <var name="maxfsoccuppc" type="int"><brief>Maximum file system occupation
|
|
# over which we stop indexing.</brief><descr>The value is a percentage,
|
|
# corresponding to what the "Capacity" df output column shows. The default
|
|
# value is 0, meaning no checking.</descr></var>
|
|
maxfsoccuppc = 0
|
|
|
|
# <var name="xapiandb" type="dfn"><brief>Xapian database directory
|
|
# location.</brief><descr>This will be created on first indexing. If the
|
|
# value is not an absolute path, it will be interpreted as relative to
|
|
# cachedir if set, or the configuration directory (-c argument or
|
|
# $RECOLL_CONFDIR). If nothing is specified, the default is then
|
|
# ~/.recoll/xapiandb/</descr></var>
|
|
dbdir = xapiandb
|
|
|
|
# <var name="idxstatusfile" type="fn">
|
|
#
|
|
# <brief>Name of the scratch file where the indexer process updates its
|
|
# status.</brief><descr>Default: idxstatus.txt inside the configuration
|
|
# directory.</descr></var>
|
|
#idxstatusfile = idxstatus.txt
|
|
|
|
# <var name="mboxcachedir" type="dfn">
|
|
#
|
|
# <brief>Directory location for storing mbox message offsets cache
|
|
# files.</brief><descr>This is normally 'mboxcache' under cachedir if set,
|
|
# or else under the configuration directory, but it may be useful to share
|
|
# a directory between different configurations.</descr></var>
|
|
#mboxcachedir = mboxcache
|
|
|
|
# <var name="mboxcacheminmbs" type="int">
|
|
#
|
|
# <brief>Minimum mbox file size over which we cache the offsets.</brief>
|
|
# <descr>There is really no sense in caching offsets for small files. The
|
|
# default is 5 MB.</descr></var>
|
|
#mboxcacheminmbs = 5
|
|
|
|
# <var name="webcachedir" type="dfn">
|
|
#
|
|
# <brief>Directory where we store the archived web pages.</brief>
|
|
# <descr>This is only used by the web history indexing code
|
|
# Default: cachedir/webcache if cachedir is set, else
|
|
# $RECOLL_CONFDIR/webcache</descr></var>
|
|
webcachedir = webcache
|
|
|
|
# <var name="webcachemaxmbs" type="int">
|
|
# <brief>Maximum size in MB of the Web archive.</brief>
|
|
# <descr>This is only used by the web history indexing code.
|
|
# Default: 40 MB.
|
|
# Reducing the size will not physically truncate the file.</descr></var>
|
|
webcachemaxmbs = 40
|
|
|
|
# <var name="webqueuedir" type="fn">
|
|
#
|
|
# <brief>The path to the Web indexing queue.</brief><descr>This used to be
|
|
# hard-coded in the old plugin as ~/.recollweb/ToIndex so there would be no
|
|
# need or possibility to change it, but the WebExtensions plugin now downloads
|
|
# the files to the user Downloads directory, and a script moves them to
|
|
# webqueuedir. The script reads this value from the config so it has become
|
|
# possible to change it.</descr></var>
|
|
#webqueuedir = ~/.recollweb/ToIndex
|
|
|
|
# <var name="webdownloadsdir" type="fn">
|
|
#
|
|
# <brief>The path to browser downloads directory.</brief><descr>This is
|
|
# where the new browser add-on extension has to create the files. They are
|
|
# then moved by a script to webqueuedir.</descr></var>
|
|
#webdownloadsdir = ~/Downloads
|
|
|
|
# <var name="aspellDicDir" type="dfn">
|
|
#
|
|
# <brief>Aspell dictionary storage directory location.</brief> <descr>The
|
|
# aspell dictionary (aspdict.(lang).rws) is normally stored in the
|
|
# directory specified by cachedir if set, or under the configuration
|
|
# directory.</descr></var>
|
|
#aspellDicDir =
|
|
|
|
# <var name="filtersdir" type="dfn">
|
|
#
|
|
# <brief>Directory location for executable input handlers.</brief><descr>If
|
|
# RECOLL_FILTERSDIR is set in the environment, we use it instead. Defaults
|
|
# to $prefix/share/recoll/filters. Can be redefined for
|
|
# subdirectories.</descr></var>
|
|
#filtersdir = /path/to/my/filters
|
|
|
|
# <var name="iconsdir" type="dfn">
|
|
#
|
|
# <brief>Directory location for icons.</brief><descr>The only reason to
|
|
# change this would be if you want to change the icons displayed in the
|
|
# result list. Defaults to $prefix/share/recoll/images</descr></var>
|
|
#iconsdir = /path/to/my/icons
|
|
|
|
# <grouptitle id="PERFS">Parameters affecting indexing performance and
|
|
# resource usage</grouptitle>
|
|
|
|
# <var name="idxflushmb" type="int">
|
|
#
|
|
# <brief>Threshold (megabytes of new data) where we flush from memory to
|
|
# disk index.</brief> <descr>Setting this allows some control over memory
|
|
# usage by the indexer process. A value of 0 means no explicit flushing,
|
|
# which lets Xapian perform its own thing, meaning flushing every
|
|
# $XAPIAN_FLUSH_THRESHOLD documents created, modified or deleted: as memory
|
|
# usage depends on average document size, not only document count, the
|
|
# Xapian approach is is not very useful, and you should let Recoll manage
|
|
# the flushes. The program compiled value is 0. The configured default
|
|
# value (from this file) is 10 MB, and will be too low in many cases (it is
|
|
# chosen to conserve memory). If you are looking
|
|
# for maximum speed, you may want to experiment with values between 20 and
|
|
# 200. In my experience, values beyond this are always counterproductive. If
|
|
# you find otherwise, please drop me a note.</descr></var>
|
|
idxflushmb = 10
|
|
|
|
# <var name="filtermaxseconds" type="int">
|
|
#
|
|
# <brief>Maximum external filter execution time in
|
|
# seconds.</brief><descr>Default 1200 (20mn). Set to 0 for no limit. This
|
|
# is mainly to avoid infinite loops in postscript files
|
|
# (loop.ps)</descr></var>
|
|
filtermaxseconds = 1200
|
|
|
|
# <var name="filtermaxmbytes" type="int">
|
|
#
|
|
# <brief>Maximum virtual memory space for filter processes
|
|
# (setrlimit(RLIMIT_AS)), in megabytes.</brief> <descr>Note that this
|
|
# includes any mapped libs (there is no reliable Linux way to limit the
|
|
# data space only), so we need to be a bit generous here. Anything over
|
|
# 2000 will be ignored on 32 bits machines.</descr></var>
|
|
filtermaxmbytes = 2000
|
|
|
|
# <var name="thrQSizes" type="string">
|
|
#
|
|
# <brief>Stage input queues configuration.</brief> <descr>There are three
|
|
# internal queues in the indexing pipeline stages (file data extraction,
|
|
# terms generation, index update). This parameter defines the queue depths
|
|
# for each stage (three integer values). If a value of -1 is given for a
|
|
# given stage, no queue is used, and the thread will go on performing the
|
|
# next stage. In practise, deep queues have not been shown to increase
|
|
# performance. Default: a value of 0 for the first queue tells Recoll to
|
|
# perform autoconfiguration based on the detected number of CPUs (no need
|
|
# for the two other values in this case). Use thrQSizes = -1 -1 -1 to
|
|
# disable multithreading entirely.</descr></var>
|
|
thrQSizes = 0
|
|
|
|
# <var name="thrTCounts" type="string">
|
|
#
|
|
# <brief>Number of threads used for each indexing stage.</brief> <descr>The
|
|
# three stages are: file data extraction, terms generation, index
|
|
# update). The use of the counts is also controlled by some special values
|
|
# in thrQSizes: if the first queue depth is 0, all counts are ignored
|
|
# (autoconfigured); if a value of -1 is used for a queue depth, the
|
|
# corresponding thread count is ignored. It makes no sense to use a value
|
|
# other than 1 for the last stage because updating the Xapian index is
|
|
# necessarily single-threaded (and protected by a mutex).</descr></var>
|
|
#thrTCounts = 4 2 1
|
|
|
|
|
|
# <grouptitle id="MISC">Miscellaneous parameters</grouptitle>
|
|
|
|
# <var name="loglevel" type="int">
|
|
#
|
|
# <brief>Log file verbosity 1-6.</brief> <descr>A value of 2 will print
|
|
# only errors and warnings. 3 will print information like document updates,
|
|
# 4 is quite verbose and 6 very verbose.</descr></var>
|
|
loglevel = 3
|
|
|
|
# <var name="logfilename" type="fn">
|
|
#
|
|
# <brief>Log file destination. Use 'stderr' (default) to write to the
|
|
# console.</brief><descr></descr></var>
|
|
logfilename = stderr
|
|
|
|
# <var name="idxloglevel" type="int">
|
|
#
|
|
# <brief>Override loglevel for the indexer.</brief><descr></descr></var>
|
|
#idxloglevel = 3
|
|
|
|
# <var name="idxlogfilename" type="fn">
|
|
#
|
|
# <brief>Override logfilename for the indexer.</brief><descr></descr></var>
|
|
#idxlogfilename = stderr
|
|
|
|
# <var name="daemloglevel" type="int">
|
|
#
|
|
# <brief>Override loglevel for the indexer in real time
|
|
# mode.</brief><descr>The default is to use the idx... values if set, else
|
|
# the log... values.</descr></var>
|
|
#daemloglevel = 3
|
|
|
|
# <var name="daemlogfilename" type="fn">
|
|
#
|
|
# <brief>Override logfilename for the indexer in real time
|
|
# mode.</brief><descr>The default is to use the idx... values if set, else
|
|
# the log... values.</descr></var>
|
|
#daemlogfilename = /dev/null
|
|
|
|
# <var name="orgidxconfdir" type="dfn">
|
|
#
|
|
# <brief>Original location of the configuration directory.</brief>
|
|
# <descr>This is used exclusively for movable datasets. Locating the
|
|
# configuration directory inside the directory tree makes it possible to
|
|
# provide automatic query time path translations once the data set has
|
|
# moved (for example, because it has been mounted on another
|
|
# location).</descr></var>
|
|
#orgidxconfdir =
|
|
|
|
# <var name="curidxconfdir" type="dfn">
|
|
#
|
|
# <brief>Current location of the configuration directory.</brief>
|
|
# <descr>Complement orgidxconfdir for movable datasets. This should be used
|
|
# if the configuration directory has been copied from the dataset to
|
|
# another location, either because the dataset is readonly and an r/w copy
|
|
# is desired, or for performance reasons. This records the original moved
|
|
# location before copy, to allow path translation computations. For
|
|
# example if a dataset originally indexed as '/home/me/mydata/config' has
|
|
# been mounted to '/media/me/mydata', and the GUI is running from a copied
|
|
# configuration, orgidxconfdir would be '/home/me/mydata/config', and
|
|
# curidxconfdir (as set in the copied configuration) would be
|
|
# '/media/me/mydata/config'.</descr></var>
|
|
#curidxconfdir =
|
|
|
|
# <var name="idxrundir" type="dfn">
|
|
#
|
|
# <brief>Indexing process current directory.</brief> <descr>The input
|
|
# handlers sometimes leave temporary files in the current directory, so it
|
|
# makes sense to have recollindex chdir to some temporary directory. If the
|
|
# value is empty, the current directory is not changed. If the
|
|
# value is (literal) tmp, we use the temporary directory as set by the
|
|
# environment (RECOLL_TMPDIR else TMPDIR else /tmp). If the value is an
|
|
# absolute path to a directory, we go there.</descr></var>
|
|
idxrundir = tmp
|
|
|
|
# <var name="checkneedretryindexscript" type="fn">
|
|
#
|
|
# <brief>Script used to heuristically check if we need to retry indexing
|
|
# files which previously failed. </brief> <descr>The default script checks
|
|
# the modified dates on /usr/bin and /usr/local/bin. A relative path will
|
|
# be looked up in the filters dirs, then in the path. Use an absolute path
|
|
# to do otherwise.</descr></var>
|
|
checkneedretryindexscript = rclcheckneedretry.sh
|
|
|
|
# <var name="recollhelperpath" type="string">
|
|
#
|
|
# <brief>Additional places to search for helper executables.</brief>
|
|
# <descr>This is only used on Windows for now.</descr></var>
|
|
#recollhelperpath = c:/someprog/bin;c:/someotherprog/bin
|
|
|
|
# <var name="idxabsmlen" type="int">
|
|
#
|
|
# <brief>Length of abstracts we store while indexing.</brief>
|
|
# <descr>Recoll stores an abstract for each indexed file.
|
|
# The text can come from an actual 'abstract' section in the
|
|
# document or will just be the beginning of the document. It is stored in
|
|
# the index so that it can be displayed inside the result lists without
|
|
# decoding the original file. The idxabsmlen parameter
|
|
# defines the size of the stored abstract. The default value is 250
|
|
# bytes. The search interface gives you the choice to display this stored
|
|
# text or a synthetic abstract built by extracting text around the search
|
|
# terms. If you always prefer the synthetic abstract, you can reduce this
|
|
# value and save a little space.</descr></var>
|
|
#idxabsmlen = 250
|
|
|
|
# <var name="idxmetastoredlen" type="int">
|
|
#
|
|
# <brief>Truncation length of stored metadata fields.</brief><descr>This
|
|
# does not affect indexing (the whole field is processed anyway), just the
|
|
# amount of data stored in the index for the purpose of displaying fields
|
|
# inside result lists or previews. The default value is 150 bytes which
|
|
# may be too low if you have custom fields.</descr></var>
|
|
#idxmetastoredlen = 150
|
|
|
|
# <var name="idxtexttruncatelen" type="int">
|
|
#
|
|
# <brief>Truncation length for all document texts.</brief><descr>Only index
|
|
# the beginning of documents. This is not recommended except if you are
|
|
# sure that the interesting keywords are at the top and have severe disk
|
|
# space issues.</descr></var>
|
|
#idxtexttruncatelen = 0
|
|
|
|
# <var name="aspellLanguage" type="string">
|
|
#
|
|
# <brief>Language definitions to use when creating the aspell
|
|
# dictionary.</brief><descr>The value must match a set of aspell language
|
|
# definition files. You can type "aspell dicts" to see a list The default
|
|
# if this is not set is to use the NLS environment to guess the
|
|
# value.</descr></var>
|
|
#aspellLanguage = en
|
|
|
|
# <var name="aspellAddCreateParam" type="string">
|
|
#
|
|
# <brief>Additional option and parameter to aspell dictionary creation
|
|
# command.</brief><descr>Some aspell packages may need an additional option
|
|
# (e.g. on Debian Jessie: --local-data-dir=/usr/lib/aspell). See Debian bug
|
|
# 772415.</descr></var>
|
|
#aspellAddCreateParam = --local-data-dir=/usr/lib/aspell
|
|
|
|
# <var name="aspellKeepStderr" type="bool">
|
|
#
|
|
# <brief>Set this to have a look at aspell dictionary creation
|
|
# errors.</brief><descr>There are always many, so this is mostly for
|
|
# debugging.</descr></var>
|
|
#aspellKeepStderr = 1
|
|
|
|
# <var name="noaspell" type="bool">
|
|
#
|
|
# <brief>Disable aspell use.</brief><descr>The aspell dictionary generation
|
|
# takes time, and some combinations of aspell version, language, and local
|
|
# terms, result in aspell crashing, so it sometimes makes sense to just
|
|
# disable the thing.</descr></var>
|
|
#noaspell = 1
|
|
|
|
# <var name="monauxinterval" type="int">
|
|
#
|
|
# <brief>Auxiliary database update interval.</brief><descr>The real time
|
|
# indexer only updates the auxiliary databases (stemdb, aspell)
|
|
# periodically, because it would be too costly to do it for every document
|
|
# change. The default period is one hour.</descr></var>
|
|
#monauxinterval = 3600
|
|
|
|
# <var name="monixinterval" type="int">
|
|
#
|
|
# <brief>Minimum interval (seconds) between processings of the indexing
|
|
# queue.</brief><descr>The real time indexer does not process each event
|
|
# when it comes in, but lets the queue accumulate, to diminish overhead and
|
|
# to aggregate multiple events affecting the same file. Default 30
|
|
# S.</descr></var>
|
|
#monixinterval = 30
|
|
|
|
# <var name="mondelaypatterns" type="string">
|
|
#
|
|
# <brief>Timing parameters for the real time indexing.</brief>
|
|
# <descr>Definitions for files which get a longer delay before reindexing
|
|
# is allowed. This is for fast-changing files, that should only be
|
|
# reindexed once in a while. A list of wildcardPattern:seconds pairs. The
|
|
# patterns are matched with fnmatch(pattern, path, 0) You can quote entries
|
|
# containing white space with double quotes (quote the whole entry, not the
|
|
# pattern). The default is empty.
|
|
# Example: mondelaypatterns = *.log:20 "*with spaces.*:30"</descr></var>
|
|
#mondelaypatterns = *.log:20 "*with spaces.*:30"
|
|
|
|
# <var name="monioniceclass" type="int">
|
|
#
|
|
# <brief>ionice class for the real time indexing process</brief>
|
|
# <descr>On platforms where this is supported. The default value is
|
|
# 3.</descr></var>
|
|
# monioniceclass = 3
|
|
|
|
# <var name="monioniceclassdata" type="string">
|
|
#
|
|
# <brief>ionice class parameter for the real time indexing process.</brief>
|
|
# <descr>On platforms where this is supported. The default is
|
|
# empty.</descr></var>
|
|
#monioniceclassdata =
|
|
|
|
|
|
|
|
# <grouptitle id="QUERY">Query-time parameters (no impact on the
|
|
# index)</grouptitle>
|
|
|
|
# <var name="autodiacsens" type="bool">
|
|
#
|
|
# <brief>auto-trigger diacritics sensitivity (raw index only).</brief>
|
|
# <descr>IF the index is not stripped, decide if we automatically trigger
|
|
# diacritics sensitivity if the search term has accented characters (not in
|
|
# unac_except_trans). Else you need to use the query language and the "D"
|
|
# modifier to specify diacritics sensitivity. Default is no.</descr></var>
|
|
autodiacsens = 0
|
|
|
|
# <var name="autocasesens" type="bool">
|
|
#
|
|
# <brief>auto-trigger case sensitivity (raw index only).</brief><descr>IF
|
|
# the index is not stripped (see indexStripChars), decide if we
|
|
# automatically trigger character case sensitivity if the search term has
|
|
# upper-case characters in any but the first position. Else you need to use
|
|
# the query language and the "C" modifier to specify character-case
|
|
# sensitivity. Default is yes.</descr></var>
|
|
autocasesens = 1
|
|
|
|
# <var name="maxTermExpand" type="int"><brief>Maximum query expansion count
|
|
# for a single term (e.g.: when using wildcards).</brief><descr>This only
|
|
# affects queries, not indexing. We used to not limit this at all (except
|
|
# for filenames where the limit was too low at 1000), but it is
|
|
# unreasonable with a big index. Default 10000.</descr></var>
|
|
maxTermExpand = 10000
|
|
|
|
# <var name="maxXapianClauses" type="int"><brief>Maximum number of clauses
|
|
# we add to a single Xapian query.</brief><descr>This only affects queries,
|
|
# not indexing. In some cases, the result of term expansion can be
|
|
# multiplicative, and we want to avoid eating all the memory. Default
|
|
# 50000.</descr></var>
|
|
maxXapianClauses = 50000
|
|
|
|
# <var name="snippetMaxPosWalk" type="int">
|
|
#
|
|
# <brief>Maximum number of positions we walk while populating a snippet for
|
|
# the result list.</brief><descr>The default of 1,000,000 may be
|
|
# insufficient for very big documents, the consequence would be snippets
|
|
# with possibly meaning-altering missing words.</descr></var>
|
|
snippetMaxPosWalk = 1000000
|
|
|
|
|
|
# <grouptitle id="PDF">Parameters for the PDF input script</grouptitle>
|
|
|
|
# <var name="pdfocr" type="bool">
|
|
#
|
|
# <brief>Attempt OCR of PDF files with no text content if both tesseract and
|
|
# pdftoppm are installed.</brief><descr>The default is off because OCR is so
|
|
# very slow.</descr></var>
|
|
#pdfocr = 0
|
|
|
|
# <var name="pdfattach" type="bool">
|
|
#
|
|
# <brief>Enable PDF attachment extraction by executing pdftk (if
|
|
# available).</brief><descr>This is
|
|
# normally disabled, because it does slow down PDF indexing a bit even if
|
|
# not one attachment is ever found.</descr></var>
|
|
#pdfattach = 0
|
|
|
|
# <var name="pdfextrameta" type="string">
|
|
#
|
|
# <brief>Extract text from selected XMP metadata tags.</brief><descr>This
|
|
# is a space-separated list of qualified XMP tag names. Each element can also
|
|
# include a translation to a Recoll field name, separated by a '|'
|
|
# character. If the second element is absent, the tag name is used as the
|
|
# Recoll field names. You will also need to add specifications to the
|
|
# 'fields' file to direct processing of the extracted data.</descr></var>
|
|
#pdfextrameta = bibtex:location|location bibtex:booktitle bibtex:pages
|
|
|
|
# <var name="pdfextrametafix" type="fn">
|
|
#
|
|
# <brief>Define name of XMP field editing script.</brief><descr>This
|
|
# defines the name of a script to be loaded for editing XMP field
|
|
# values. The script should define a 'MetaFixer' class with a metafix()
|
|
# method which will be called with the qualified tag name and value of each
|
|
# selected field, for editing or erasing. A new instance is created for
|
|
# each document, so that the object can keep state for, e.g. eliminating
|
|
# duplicate values.</descr></var>
|
|
#pdfextrametafix = /path/to/fixerscript.py
|
|
|
|
|
|
# <grouptitle id="SPECLOCATIONS">Parameters set for specific
|
|
# locations</grouptitle>
|
|
|
|
# You could specify different parameters for a subdirectory like this:
|
|
#[~/hungariandocs/plain]
|
|
#defaultcharset = iso-8859-2
|
|
|
|
[/usr/share/man]
|
|
followLinks = 1
|
|
|
|
# <var name="mhmboxquirks" type="string">
|
|
#
|
|
# <brief>Enable thunderbird/mozilla-seamonkey mbox format quirks</brief>
|
|
# <descr>Set this for the directory where the email mbox files are
|
|
# stored.</descr></var>
|
|
[~/.thunderbird]
|
|
mhmboxquirks = tbird
|
|
[~/.mozilla]
|
|
mhmboxquirks = tbird
|
|
|
|
# pidgin / purple directories for irc chats have names beginning with #
|
|
[~/.purple]
|
|
skippedNames =
|