From b0f02f02681facd5e4f05a51f268d9b916140494 Mon Sep 17 00:00:00 2001 From: dockes Date: Wed, 4 Jan 2006 11:09:53 +0000 Subject: [PATCH] *** empty log message *** --- src/doc/user/Makefile | 32 ++ src/doc/user/docbook.css | 160 ++++++++++ src/doc/user/usermanual.sgml | 596 +++++++++++++++++++++++++++++++++++ 3 files changed, 788 insertions(+) create mode 100644 src/doc/user/Makefile create mode 100644 src/doc/user/docbook.css create mode 100644 src/doc/user/usermanual.sgml diff --git a/src/doc/user/Makefile b/src/doc/user/Makefile new file mode 100644 index 00000000..7c75094d --- /dev/null +++ b/src/doc/user/Makefile @@ -0,0 +1,32 @@ +# +# @(#$Id: Makefile,v 1.1 2006-01-04 11:09:53 dockes Exp $ +# +# + +MAINTAINER=jean-francois.dockes@wanadoo.fr + +DOC?= usermanual +LANGCODE?= en_US.ISO_8859-1 +FORMATS?= html txt html-split +JADEFLAGS+= -V %generate-article-toc% + +# The purpose for this is to replace the FreeBSD file which contains the +# blurb about file being hosted on FreeBSD.org. To work, it also +# needs a modification to /usr/share/doc/mk/doc.docbook.mk. Around line +# 121, replace LANGUAGECATALOG= with LANGUAGECATALOG?= +LANGUAGECATALOG=/usr/doc/share/sgml/catalog + +#INSTALL_COMPRESSED?= gz +INSTALL_ONLY_COMPRESSED?= + +# +# SRCS lists the individual SGML files that make up the document. Changes +# to any of these files will force a rebuild +# + +# SGML content +SRCS= usermanual.sgml + +# ${.CURDIR}/../../.. +DOC_PREFIX?= /usr/doc +.include "${DOC_PREFIX}/share/mk/doc.project.mk" diff --git a/src/doc/user/docbook.css b/src/doc/user/docbook.css new file mode 100644 index 00000000..2224d7e3 --- /dev/null +++ b/src/doc/user/docbook.css @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2001, 2003 The FreeBSD Documentation Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: doc/share/misc/docbook.css,v 1.7 2004/03/22 19:17:52 ceri Exp $ + */ + +BODY ADDRESS { + line-height: 1.3; + margin: .6em 0; +} + +BODY BLOCKQUOTE { + margin-top: .75em; + line-height: 1.5; + margin-bottom: .75em; +} + +HTML BODY { + margin: 1em 8% 1em 10%; + line-height: 1.2; +} + +.LEGALNOTICE { + font-size: small; + font-variant: small-caps; +} + +BODY DIV { + margin: 0; +} + +DL { + margin: .8em 0; + line-height: 1.2; +} + +BODY FORM { + margin: .6em 0; +} + +H1, H2, H3, H4, H5, H6, +DIV.EXAMPLE P B, +.QUESTION, +DIV.TABLE P B, +DIV.PROCEDURE P B { + color: #990000; +} + +BODY H1 { + margin: .8em 0 0 -4%; + line-height: 1.3; +} + +BODY H2 { + margin: .8em 0 0 -4%; + line-height: 1.3; +} + +BODY H3 { + margin: .8em 0 0 -3%; + line-height: 1.3; +} + +BODY H4 { + margin: .8em 0 0 -3%; + line-height: 1.3; +} + +BODY H5 { + margin: .8em 0 0 -2%; + line-height: 1.3; +} + +BODY H6 { + margin: .8em 0 0 -1%; + line-height: 1.3; +} + +BODY HR { + margin: .6em +} + +BODY IMG.NAVHEADER { + margin: 0 0 0 -4%; +} + +OL { + margin: 0 0 0 5%; + line-height: 1.2; +} + +BODY PRE { + margin: .75em 0; + line-height: 1.0; + color: #461b7e; +} + +BODY TD { + line-height: 1.2 +} + +BODY TH { + line-height: 1.2; +} + +UL, BODY DIR, BODY MENU { + margin: 0 0 0 5%; + line-height: 1.2; +} + +HTML { + margin: 0; + padding: 0; +} + + +.FILENAME { + color: #007a00; +} + +BODY H1, BODY H2, BODY H3, BODY H4, BODY H5, BODY H6 { + margin-left: 0 +} + +.GUIMENU, .GUIMENUITEM, .GUISUBMENU, +.GUILABEL, .INTERFACE, .GUIBUTTON, +.SHORTCUT, .SHORTCUT .KEYCAP { + background-color: #F0F0F0; +} + +.ACCEL { + background-color: #F0F0F0; + text-decoration: underline; +} + +.PROGRAMLISTING, .SCREEN { + margin-left: 3ex; +} diff --git a/src/doc/user/usermanual.sgml b/src/doc/user/usermanual.sgml new file mode 100644 index 00000000..603d4f88 --- /dev/null +++ b/src/doc/user/usermanual.sgml @@ -0,0 +1,596 @@ +Recoll"> +Xapian"> + +]> + + + + + Recoll user manual + + + + Jean-Francois + Dockes + +
jean-francois.dockes@wanadoo.fr
+
+
+ + + 2005 + Jean-Francois + Dockes + + + $Id: usermanual.sgml,v 1.1 2006-01-04 11:09:53 dockes Exp $ + + + The &RCL; user manual introduces full text search notions + and describes the installation and use of the &RCL; application. + + + +
+ + + Introduction + + + Giving it a try + + If you do not like reading manuals and would like to give + &RCL; a try, just perform installation and start the + recoll user interface, which will index your + home directory and let you search it right after. + + Do not do this if your home has a huge + number of documents and you do not want to wait or are very + short on disk space. In this case, you may want to edit the configuration file first to + restrict the indexed area. + + Also be aware that you will need to install the + appropriate supporting applications for document types that need + them (for example antiword for + ms-word files), and that the default character set used to read + raw text files for indexing is iso8859-1, which may not be + appropriate for you. + + + Full text search + + Full text search applications allow you to find your data + by content rather than by external attributes (like a file + name). More specifically, they will let you specify words + (terms) that should or should not appear in the text you are + looking for, and return a list of matching documents, ordered + so that the most relevant documents will + appear first. + + You do not need to remember in what file or email message you + stored a given piece of information. You just ask for related + terms, and the tool will return a list of documents where + those terms are prominent. + + This mode of operation has been made very familiar by www + search engines. + + The notion of relevance is a difficult one, as only you, the + user, actually know which documents are relevant to your search, + and the application can only try a guess. The quality of this + guess is probably the most important element for a search + application. + + In many cases, one is looking for all the forms of a word, + not for a specific form or spelling. These different forms may include + plurals, different tenses for a verb, or terms derived from + the same root or stem (exemple: floor, + floors, floored, floorings...). &RCL; will by default expand + queries to all such related terms (words that reduce to the + same stem). This expansion can be disabled at search + time. + + Stemming, by itself, does not provide for misspellings or + phonetic searches. &RCL; does not support these currently. + + + + + + Recoll overview + + &RCL; is a full text search application which uses the + &XAP; information retrieval + library as its storage and retrieval engine. &XAP; is a very + mature package using a sophisticated + probabilistic ranking model. &RCL; provides the interface + to get data into (indexation) and out (searching) of the system. + + In practice, &XAP; works by remembering where terms appear + in your document files. The acquisition process is called + indexation. + + The resulting database can be big (roughly the size of the + original document set), but it is not a document archive. &RCL; + can only display documents that still exist at the place from which + they were indexed. + + &RCL; stores all internal data in Unicode + UTF-8 format, and it can index files with + different character sets, encodings, and languages into the same + database. It has input filters for many document types. + + Stemming depends on the document language. &RCL; stores + the unstemmed versions of terms and uses auxiliary databases for + term expansion. It can switch stemming languages without reindexing. + Storing documents in different languages in the same + database is possible, and useful in practice, but does introduce + possibilities of confusion. &RCL; makes no attempt at automatic + language recognition. + + &RCL; has many parameters which define exactly what to + index, and how to classify and decode the source + documents. These are kept in a configuration file. A + sample configuration is installed into the + .recoll subdirectory of your home + directory when you first execute a &RCL; command. The initial + configuration will index your home directory with default + parameters and should be sufficient for giving &RCL; a try, + but you may want to adjust it later. + + Indexation is started + automatically the first time you execute the + recoll search graphical user interface, or by + executing the recollindex. + + Searches are + performed inside the recoll + program, which has many options to help you find what you are + looking for. + + + + + + + Indexation + + + Introduction + + Indexation is the process by which the set of documents is + analyzed and the data entered into the database. &RCL; indexation + is normally incremental: documents will only be processed if + they have been modified. On the first execution, of course, all + documents will need processing. A full index build can be forced + later on by specifying an option to the indexation command. + + &RCL; indexation takes place at discrete times. There is no + currently no interface to real time file modification + monitors. The typical usage is to have a nightly indexation run + programmed into your cron file. + + &RCL; knows about quite a few different document + types. The parameters for document types recognition and + processing are set in + configuration files + Most file types, like HTML or word processing files, only hold + one document. Some file types, like mail folder files can hold + many individually indexed documents. + + + Without further configuration, &RCL; will index all + appropriate files from your home directory, with a reasonable + set of defaults, if you live in western Europe or the USA. If + your normal character set is not iso8859-1, you almost certainly + need to adjust the configuration. + + + + + + The indexation configuration + + The main configuration file is named + $HOME/.recoll/recoll.conf by default or + $RECOLL_CONFDIR/recoll.conf if + RECOLL_CONFDIR is set. + + The most accurate documentation for editing the file is + given by comments inside the default file that will be created + when you first start recoll. If you want to + adjust the configuration before indexation, just click + Cancel when the program asks if it should + start initial indexation. + + You can also have a look to the configuration overview inside + the installation chapter of this document. + + + + + Starting indexation + + Indexation is performed either by the + recollindex program, or by the + indexation thread inside the recoll + program (use the File menu). + + If the recoll program finds no database + when it starts, it will automatically start indexation (except + if cancelled). + + It is best to avoid interrupting the indexation process, as + this may sometimes leave the database in a bad state. This is + not a serious problem, as you then just need to clear + everything and restart the indexation. The database files are + normally stored in the $HOME/.recoll/xapiandb + directory, + which you can just delete if needed. Alternatively, you can + start recollindex -z, which will + reset the database before indexation. + + + + + Using <command>cron</command> to automate + indexation + + The most common way to set up indexation is to have a cron + task execute it every night. For example the following + crontab entry would do it every day at + 3:30AM (supposing recollindex is in your PATH): + + 30 3 * * * recollindex > /tmp/recolltrace 2>&1 + + The usual command to edit your + crontab is + crontab -e (which will usually start the + vi editor to edit the file). You may have + more sophisticated tools available on your system. + + + + + + + Searching + + + Simple search + + Start the recoll program, then + enter search term(s) in the text field at the top left of the + window. Clicking the Search button or + hitting the Enter key will start a search. By + default, this will look for documents with any of the terms + (the ones with more terms will get better scores). You can + check the All terms checkbox to ensure + that only documents with all the terms will be returned. Use + the Tools / Advanced + search dialog for more complex searches. + + After starting a search, a list of results will instantly + be displayed in the main list window. Clicking on an entry will + open an internal preview window for the + document. Double-clicking will attempt to start an external + viewer (have a look at the + ~/.recoll/mimeconf file to see how these + are configured). + + By default, the document list is presented in order of + relevance (how well the system estimates that the document + matches the query). You can specify a different ordering by + using the Tools + / Sort parameters dialog. + + + + + Complex/advanced search + + The advanced search dialog has fields that will allow a more + refined search, looking for documents with all given words, a + given exact phrase, or none of the given words (all fields may + be combined by an implicit AND clause). + + It will let you search for documents of specific mime + types (ie: only text/plain, or + text/html or + application/pdf etc...) + + It will let you restrict the search results to a subtree of + the indexed area. + + In other respects, it works like the simple search. + + + + Document history + + Documents that you actually view (with the internal preview + or an external tool) are entered into the document history, + which is remembered. You can display the history list by using + the Tools/Doc History menu + entry. + + + + + Search tips, shortcuts + + Disabling stem expansion + Entering a capitalized word in any search field will prevent + stem expansion (no search for + gardening if you enter + Garden instead of + garden). This is the only case where + character case will make a difference for a &RCL; + search. + + Phrases + A phrase can be looked for by enclosing it in double + quotes. Example: "user manual" will look + only for occurrences of user immediately + followed by manual. You can use the + This exact phrase field of the advanced + search dialog to the same effect. + + + Quitting + Entering ^Q almost anywhere will + close the application. + + Closing previews + Entering ^W in a preview tab will + close it (and, for the last tab, close the preview + window). + + + + + + + + + Installation + + + Building from source + + + Prerequisites + + At the very least, you will need to download and install the + xapian core + package (&RCL; currently uses version 0.9.2), and the qt + runtime and development packages (&RCL; currently uses + version 3.3.3). + + You will most probably be able to find a binary package for + qt for your system. You may have to + compile Xapian, + but this is not difficult (if you are using + FreeBSD, there is a port). + + You may also need + libiconv. &RCL; + currently uses version 1.9 (this should not be critical). On + Linux systems, the iconv interface + is part of libc and you should not need to do anything + special. + + External file types&RCL; uses + external applications + to index some file types. You need to install them for the + file types that you wish to have indexed: + + + + + MS Word: + antiword. + + + PDF: pdftotext is part of the Xpdf package. + + + Postscript: + pstotext. + + + + RTF: unrtf + + + + + + + Building + + &RCL; has been built on + Linux (redhat7.3, mandriva 2005), FreeBSD and Solaris 8. If + you build on another system, I would very + much welcome patches. + + Normal procedure: + + cd recoll-xxx + configure + make + (practises usual hardship-repelling invocations) + + + + There little autoconfiguration. The + configure script will mainly link one of + the system-specific files in the mk + directory to mk/sysconf. If your system + is not known yet, it will tell you as much, and you may want + to manually copy and modify one of the existing files (the new + file name should be the output of uname -s). + + + + Installation + + Either type make install or execute + recollinstall targetdir, in the root + of the source tree. This will copy the commands to + $targetdir/bin and the sample + configuration files, scripts and other shared data to + $targetdir/share/recoll. + + + + + Installing a prebuilt copy + + + Installing through a package system + + If you are lucky enough to be using a port system or a + prebuilt package (RPM or other), just follow the usual + procedure, and have a look at the configuration + section. + + + + Installing a prebuilt &RCL; + + The unpackaged binary versions are just compressed tar + files of a build + tree, where only the useful parts were kept (executables and + sample configuration). + + The executable binary files are built with a static link to + libxapian and libiconv, to make installation easier (no + dependencies). However, this also means that you cannot change + the versions which are used. + + After extracting the tar file, you can proceed with + installation as + if you had built the package from source. + + + + + Configuration overview + + The personal configuration files and the database are kept in + the .recoll directory in your + home. If this directory does not exist when + recoll or + recollindex are started, the + directory will be created and the sample configuration files will + be copied. recoll will give you a + chance to edit the configuration file before starting + indexation. recollindex will + proceed immediately. + + &RCL; uses text + configuration files. You will have to edit them by hand for + now (there is still some hope for a GUI configuration tool + in the future). The most accurate documentation for the + configuraton parameters is given by comments inside the sample + files, and we will just give a general overview here. + + Most of the parameters specific to the + recoll GUI are set through the + Preferences menu and stored in the + standard QT place + ($HOME/.qt/recollrc). You probably do not + want to edit this by hand. + + + Main configuration file + + ~/.recoll/recoll.conf is the main + configuration file. It defines + what to index (top directories and things to ignore), and the + default character set to use (for document types which do not + specify it internally). The default character set can be + specified separately for any directory subtree. + + The default configuration will index your home + directory. If this is not appropriate, use + recoll to copy the sample + configuration, click Cancel, and edit + the configuration file before restarting the command. This + will start the initial indexation, which may take some time. + + There are also miscellaneous other parameters inside + recoll.conf. Explore and enjoy :) + + + + + The mimemap file + + ~/.recoll/mimemap specifies the + file name extension to mime type mappings. For + file names without an extension, or with an unknown one, the + system's file -i command will be executed + to determine the mime type (this can be switched off inside + the main configuration file). + + mimemap also has a list of + extensions which should be ignored totally (to avoid losing + time by executing file + for things that certainly should not be indexed). + + The mappings can be specified on a per-subtree basis, + which may be useful in some cases. Example: + gaim logs have a + .txt extension but + should be handled specially, which is possible because they + are usually all located in one place. + + + + + The mimeconf file + + ~/.recoll/mimeconf specifies how the + different mime types are handled for indexation, and for + display. + + Changing the indexation parameters is probably not a + good idea except if you are a &RCL; developper. + + You may want to adjust the external viewers defined in + (ie: html is either + previewed internally or displayed using + firefox, but you may prefer + mozilla...). Look for the + [view] section. + + You can also change the icons which are displayed by + recoll in the result lists (the values are + the basenames of the png images inside the + iconsdir directory (specified in + recoll.conf). + + + + + + +
+