# @(#$Id: recoll.conf.in,v 1.22 2008-10-15 08:30:18 dockes Exp $ (C) 2004 J.F.Dockes # # Recoll default configuration file. This typically lives in # @prefix@/share/recoll/examples and provides default values. You can # override selected parameters by adding assigments to # ~/.recoll/recoll.conf (or $RECOLL_CONFDIR/recoll.conf) # # Almost all values in this file can be set from the GUI configuration menus, # which maybe an easier approach than direct editing # # Space-separated list of directories to index. Next line indexes $HOME topdirs = ~ # Wildcard expressions for names of files and directories that we should # ignore. If you need index mozilla/thunderbird mail folders, don't put # ".*" in there (as was the case with an older sample config) # These are simple names, not paths (must contain no / ) skippedNames = #* bin CVS Cache cache* .cache caughtspam tmp \ .thumbnails .svn \ *~ .beagle .git .hg .bzr loop.ps .xsession-errors \ .recoll* xapiandb recollrc recoll.conf # Wildcard expressions for paths we shouldn't go into. The database and # configuration directories will be added in there, else the default value # is empty # skippedPaths = # Same for real time indexing. The idea here is that there is stuff that # you might want to initially index but not monitor. If daemSkippedPaths is # not set, the daemon uses skippedPaths. #daemSkippedPaths = # Recoll uses FNM_PATHNAME by default when matching skipped paths, which # means that /dir1/dir2/dir3 is not matched by */dir3. Can't change the # default now, but you can set the following variable to 0 to disable the # use of FNM_PATHNAME (see fnmatch(3) man page) # skippedPathsFnmPathname = 1 # Option to follow symbolic links. We normally don't, to avoid duplicated # indexing (in any case, no effort is made to identify or avoid multiple # indexing of linked files) # followLinks = 0 # Debug messages. 2 is errors/warnings only. 3 information like doc # updates, 4 is quite verbose and 6 very verbose loglevel = 3 logfilename = stderr # Specific versions of log file name and level for the indexing daemon. The # default is to use the above values. # daemloglevel = 3 # daemlogfilename = /dev/null # Decide if we store character case and diacritics in the index. If we do, # searches sensitive to case and diacritics can be performed, but the index # will be bigger, and some marginal weirdness may sometimes occur. We # default to a stripped index for now. indexStripChars = 1 # IF the index is not stripped. Decide if we automatically trigger # diacritics sensitivity if the search term has accented characters (not in # unac_except_trans). Else you need to use the query language and the "D" # modifier to specify diacritics sensitivity. Default is no. autodiacsens = 0 # IF the index is not stripped. Decide if we automatically trigger # character case sensitivity if the search term has upper-case characters # in any but the first position. Else you need to use the query language # and the "C" modifier to specify character-case sensitivity. Default is # yes. autocasesens = 1 # Languages for which to build stemming databases at the end of # indexing. Stemmer names can be found on http://www.xapian.org # The flag to perform stem expansion at query time is now set from the GUI indexstemminglanguages = english # Default character set. Values found inside files, ie content tag in html # documents, will override this. It can be specified per directory (see # below). Used when converting to utf-8 (internal storage format), so it # may be quite important for pure text files. # The default used to be set to iso8859-1, but we now take it from the nls # environment (LC_ALL/LC_CTYPE/LANG). The ultimate hardwired default is # still 8859-1. If for some reason you want a general default which doesnt # match your LANG and is not 8859-1, set it here. # defaultcharset = iso-8859-1 # A list of characters, encoded in UTF-8, which should be handled specially # when converting text to unaccented lowercase. For example, in Swedish, # the letter a with diaeresis has full alphabet citizenship and should not # be turned into an a. # Each element in the space-separated list has the special character as # first element and the translation following. The handling of both the # lowercase and upper-case versions of a character should be specified, as # appartenance to the list will turn-off both standard accent and case # processing. Examples: # Swedish: # unac_except_trans = åå Åå ää Ää öö Öö # German: # unac_except_trans = Ää Öö Üü ää öö üü ßss # In French, you probably want to decompose oe and ae # unac_except_trans = œoe Œoe æae Æae # Actually, this seems a reasonable default for all until someone # protests. These decompositions are not performed by unac, but I # cant imagine someone typing the composed forms in a search. unac_except_trans = ßss œoe Œoe æae ÆAE fifi flfl # Maximum expansion count for a single term (ie: when using wildcards). # We used to not limit this at all (except for filenames where the limit # was too low at 1000), but it is unreasonable with a big index. # Default 10 000 maxTermExpand = 10000 # Maximum number of clauses we add to a single Xapian query. In some cases, # the result of term expansion can be multiplicative, and we want to avoid # eating all the memory. Default 100 000 maxXapianClauses = 100000 # Where to store the database (directory). This may be an absolute path, # else it is taken as relative to the configuration directory (-c argument # or $RECOLL_CONFDIR). # If nothing is specified, the default is then ~/.recoll/xapiandb/ dbdir = xapiandb # Maximum file system occupation before we stop indexing. The default value # is 0, meaning no checking. The value is a percentage, corresponding to # what the "Capacity" df output column shows. maxfsoccuppc = 0 # Threshold (megabytes of new data) where we flush from memory to disk # index. Setting this (ie to 10) can help control memory usage. # # A value of 0 means no explicit flushing, which lets Xapian perform its # own thing, meaning flushing every XAPIAN_FLUSH_THRESHOLD documents # created, modified or deleted. XAPIAN_FLUSH_THRESHOLD is an environment # variable. As memory usage depends on average document size, not only # document count, this is not very useful. idxflushmb = 10 # Place to search for executable filters. If RECOLL_FILTERSDIR is set in # the environement, we use it instead filtersdir = @prefix@/share/recoll/filters # Place to search for icons. The only reason to change this would be if you # want to change the icons displayed in the result list iconsdir = @prefix@/share/recoll/images # Should we use the system's 'file -i' command as a final step in file type # identification ? This may be useful, but will usually cause the # indexation of many bogus 'text' files usesystemfilecommand = 1 # Should we index the file names of files with mime types we don't # know? (we can otherwise just ignore them) indexallfilenames = 1 # A restrictive list of indexed mime types. Normally not set. If it is set, # only the types from the list will have their contents indexed (the names # will be indexed anyway if indexallfilenames is set as by default). Mime # type names should be taken from the mimemap file. # # indexedmimetypes = text/html application/pdf # # Size limit for archive members. This is passed to the filters in the # environment as RECOLL_FILTER_MAXMEMBERKB # membermaxkbs = 50000 # Size limit for compressed files. We need to decompress these in a # temporary directory for identification, which can be wasteful in some # cases. Limit the waste. Negative means no limit. 0 results in no # processing of any compressed file compressedfilemaxkbs = -1 # Size limit for text files. This is for skipping monster logs textfilemaxmbs = 20 # Page size for text files. If this is set, text/plain files will be # divided into documents of approximately this size. May be useful to # access pieces of big text files which would be problematic to load as one # piece into the preview window. Might be useful for big logs textfilepagekbs = 1000 # Maximum external filter execution time. Default 20mn. This is mainly # to avoid infinite loops in postscript files (loop.ps) filtermaxseconds = 1200 # Length of abstracts we store while indexing. Longer will make for a # bigger db # idxabsmlen = 250 # Language definitions to use when creating the aspell dictionary. # The value must match a set of aspell language definition files. # You can type "aspell config" to see where these are installed. # The default if this is not set is to use the NLS environment to guess the # value # aspellLanguage = en # Disabling aspell use. The aspell dictionary generation takes some time, # and some combinations of aspell version, language, and local terms, # result in aspell dumping core each time. You can disable the aspell # dictionary generation by setting the following variable: # noaspell = 1 # Timing parameters for the real time mode: # # Seconds between auxiliary databases updates (stemdb, aspell): # monauxinterval = 3600 # # Resting time (seconds) during which we let the queue accumulate, in hope # that events to the same file will merge, before we start indexing: # monixinterval = 30 # # Definitions for files which get a longer delay before reindexing is # allowed. This is for fast-changing files, that should only be reindexed # once in a while. A list of wildcardPattern:seconds pairs. The patterns # are matched with fnmatch(pattern, path, 0) You can quote entries containing # white space with double quotes. The default is empty, here follows an # example: # mondelaypatterns = *.log:20 "*with spaces.*:30" # ionice class for monitor (on platforms where this is supported) # monioniceclass = 3 # ionice class param for monitor (on platforms where this is supported) # monioniceclassdata = # If this is set, process the directory where Beagle Web browser plugins # copy visited pages for indexing. Of course, Beagle MUST NOT be running, # else things will behave strangely. processbeaglequeue = 0 # The path to the Beagle indexing queue. This is hard-coded in the Beagle # plugin as ~/.beagle/ToIndex so there should be no need to change it. #beaglequeuedir = ~/.beagle/ToIndex # This is only used by the Beagle web browser plugin indexing code, and # defines where the cache for visited pages will live. Default: # $RECOLL_CONFDIR/webcache webcachedir = webcache # This is only used by the Beagle web browser plugin indexing code, and # defines the maximum size for the web page cache. Default: 40 MB. webcachemaxmbs = 40 # The directory where mbox message offsets cache files are held. This is # normally $RECOLL_CONFDIR/mboxcache, but it may be useful to share a # directory between different configurations. #mboxcachedir = mboxcache # The minimum mbox file size over which we cache the offsets. There is # really no sense in caching offsets for small files. The default is 5 MB. #mboxcacheminmbs = 5 # Maximum number of positions we walk while populating a snippet for the # result list. The default of 1 000 000 may be insufficient for big # documents, the consequence would be snippets with possibly # meaning-altering missing words. snippetMaxPosWalk = 1000000 # You could specify different parameters for a subdirectory like this: #[~/hungariandocs/plain] #defaultcharset = iso-8859-2 # You can set fields on all files of a specific fs area. (rclaptg can be # used for application selection inside mimeview #[/some/app/directory] #localfields = rclaptg = someapp; otherfield = somevalue # Use app tag to enable using gnu info to open info files (as the subnodes # are indexed as html, we'd use firefox on a temp file else. Set this on # some known info storage places [/usr/share/info] localfields = rclaptg=gnuinfo [/usr/local/share/info] localfields = rclaptg=gnuinfo [/usr/local/info] localfields = rclaptg=gnuinfo # Enable thunderbird mbox format quirks where appropriate [~/.thunderbird] mhmboxquirks = tbird # pidgin / purple directories for irc chats have names beginning with # [~/.purple] skippedNames =