moved website

2017-08-16 15:54:31 +02:00 · 2017-08-16 15:54:31 +02:00 · 51ac5e8440
commit 51ac5e8440
parent 524491b162
157 changed files with 27 additions and 14944 deletions
--- a/packaging/debian/buildppa.sh
+++ b/packaging/debian/buildppa.sh
@ -45,8 +45,8 @@ debdir=debian
 # Note: no new releases for lucid: no webkit. Or use old debianrclqt4 dir.
 # No new releases for trusty either because of risk of kio compat (kio
 # wont build)
-series="xenial yakkety zesty"
-series=
+series="xenial yakkety zesty artful"
+series=artful

 if test "X$series" != X ; then
    check_recoll_orig
@ -77,8 +77,8 @@ done

 ### KIO. Does not build on trusty from recoll 1.23 because of the need
 ### for c++11
-series="xenial yakkety zesty"
-#series=
+series="xenial yakkety zesty artful"
+series=

 debdir=debiankio
 topdir=kio-recoll-${RCLVERS}
@ -146,8 +146,8 @@ for series in $series ; do
 done

 ### Unity Scope
-series="trusty xenial yakkety"
-series=
+series="trusty xenial yakkety zesty artful"
+#series=

 debdir=debianunityscope
 if test ! -d ${debdir}/ ; then
--- a/src/doc/user/usermanual.html
+++ b/src/doc/user/usermanual.html
@ -20,8 +20,8 @@ alink="#0000FF">
    <div class="titlepage">
      <div>
        <div>
-          <h1 class="title"><a name="idp56557776" id=
-          "idp56557776"></a>Recoll user manual</h1>
+          <h1 class="title"><a name="idm44986984150384" id=
+          "idm44986984150384"></a>Recoll user manual</h1>
        </div>

        <div>
@ -109,13 +109,14 @@ alink="#0000FF">
                multiple indexes</a></span></dt>

                <dt><span class="sect2">2.1.3. <a href=
-                "#idp62130176">Document types</a></span></dt>
+                "#idm44986952097312">Document types</a></span></dt>

                <dt><span class="sect2">2.1.4. <a href=
-                "#idp62154272">Indexing failures</a></span></dt>
+                "#idm44986952072736">Indexing
+                failures</a></span></dt>

                <dt><span class="sect2">2.1.5. <a href=
-                "#idp62161280">Recovery</a></span></dt>
+                "#idm44986952065728">Recovery</a></span></dt>
              </dl>
            </dd>

@ -1017,8 +1018,9 @@ alink="#0000FF">
          <div class="titlepage">
            <div>
              <div>
-                <h3 class="title"><a name="idp62130176" id=
-                "idp62130176"></a>2.1.3.&nbsp;Document types</h3>
+                <h3 class="title"><a name="idm44986952097312" id=
+                "idm44986952097312"></a>2.1.3.&nbsp;Document
+                types</h3>
              </div>
            </div>
          </div>
@ -1131,8 +1133,8 @@ indexedmimetypes = application/pdf
          <div class="titlepage">
            <div>
              <div>
-                <h3 class="title"><a name="idp62154272" id=
-                "idp62154272"></a>2.1.4.&nbsp;Indexing
+                <h3 class="title"><a name="idm44986952072736" id=
+                "idm44986952072736"></a>2.1.4.&nbsp;Indexing
                failures</h3>
              </div>
            </div>
@ -1172,8 +1174,8 @@ indexedmimetypes = application/pdf
          <div class="titlepage">
            <div>
              <div>
-                <h3 class="title"><a name="idp62161280" id=
-                "idp62161280"></a>2.1.5.&nbsp;Recovery</h3>
+                <h3 class="title"><a name="idm44986952065728" id=
+                "idm44986952065728"></a>2.1.5.&nbsp;Recovery</h3>
              </div>
            </div>
          </div>
@ -1778,7 +1780,7 @@ thrQSizes = -1 -1 -1

        <p>A current pointer to the extension can be found, along
        with up-to-date instructions, on the <a class="ulink" href=
-        "http://bitbucket.org/medoc/recoll/wiki/IndexWebHistory"
+        "https://www.lesbonscomptes.com/recoll/faqsandhowtos/IndexWebHistory"
        target="_top">Recoll wiki</a>.</p>

        <p>A copy of the indexed WEB pages is retained by Recoll in
@ -3057,7 +3059,7 @@ MimeType=*/*
          thumbnails.</p>

          <p>There are also <a class="ulink" href=
-          "http://bitbucket.org/medoc/recoll/wiki/ResultsThumbnails.wiki"
+          "https://www.lesbonscomptes.com/recoll/faqsandhowtos/ResultsThumbnails.wiki"
          target="_top">some pointers about thumbnail
          generation</a> on the <span class=
          "application">Recoll</span> wiki.</p>
@ -5898,7 +5900,7 @@ dir:recoll dir:src -dir:utils -dir:common
            <li class="listitem">
              <p>If you use a recent version of Ubuntu Linux, you
              may find the <a class="ulink" href=
-              "http://bitbucket.org/medoc/recoll/wiki/UnityLens"
+              "https://www.lesbonscomptes.com/recoll/faqsandhowtos/UnityLens"
              target="_top">Ubuntu Unity Lens</a> module
              useful.</p>
            </li>
@ -5932,7 +5934,7 @@ dir:recoll dir:src -dir:utils -dir:common
          "application">libwnck</span> window manager interface
          library, which will allow you to do just this. The
          detailed instructions are on <a class="ulink" href=
-          "http://bitbucket.org/medoc/recoll/wiki/HotRecoll"
+          "https://www.lesbonscomptes.com/recoll/faqsandhowtos/HotRecoll"
          target="_top">this wiki page</a>.</p>
        </div>

@ -6642,9 +6644,9 @@ or
        comments inside the file.</p>

        <p>You can also have a look at the <a class="ulink" href=
-        "http://bitbucket.org/medoc/recoll/wiki/HandleCustomField"
-        target="_top">example on the Wiki</a>, detailing how one
-        could add a <span class="emphasis"><em>page
+        "https://www.lesbonscomptes.com/recoll/faqsandhowtos/HandleCustomField"
+        target="_top">example in the FAQs area</a>, detailing how
+        one could add a <span class="emphasis"><em>page
        count</em></span> field to pdf documents for displaying
        inside result lists.</p>
      </div>
@ -8978,7 +8980,7 @@ thesame = "some string with spaces"
                function similar to skippedNames, but works
                independantly. Can be redefined for subdirectories.
                Supported by recoll 1.20 and newer. See
-                https://bitbucket.org/medoc/recoll/wiki/Filtering%20out%20Zip%20archive%20members</p>
+                https://www.lesbonscomptes.com/recoll/faqsandhowtos/Filtering%20out%20Zip%20archive%20members</p>
              </dd>

              <dt><a name=
--- a/website/BUGS.html
+++ b/website/BUGS.html
--- a/website/CHANGES.html
+++ b/website/CHANGES.html
@ -1,945 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html>
-  <head>
-    <title>Recoll changes</title>
-
-    <meta name="generator" content="HTML Tidy, see www.w3.org">
-    <meta name="Author" content="Jean-Francois Dockes">
-    <meta name="Description" content=
-	  "recoll is a simple full-text search system for unix and linux
-	   based on the powerful and mature xapian engine">
-    <meta name="Keywords" content=
-	  "full text search, desktop search, unix, linux">
-    <meta http-equiv="Content-language" content="en">
-    <meta http-equiv="content-type" content="text/html; charset=iso-8859-1">
-    <meta name="robots" content="All,Index,Follow">
-
-    <link type="text/css" rel="stylesheet" href="styles/style.css">
-  </head>
-
-  <body>
-    
-    <div class="rightlinks">
-      <ul>
-        <li><a href="index.html">Home</a></li>
-        <li><a href="download.html">Downloads</a></li>
-        <li><a href="doc.html">Documentation</a></li>
-      </ul>
-    </div>
-    
-    <div class="content">
-
-      <h1>Recoll journal of user-visible changes </h1>
-
-      <p>Newer releases are described in their release notes document:</p>
-      <p>
-         <a href="release-1.20.html">1.20</a> 
-         <a href="release-1.19.html">1.19</a> 
-         <a href="release-1.18.html">1.18</a> 
-         <a href="release-1.17.html">1.17</a> 
-         <a href="release-1.16.html">1.16</a> 
-         <a href="release-1.15.html">1.15</a> 
-         <a href="release-1.14.4.html">1.14.4</a>
-       </p>
-
-      <h2><a name="1.14.3">1.14.3</a></h2>
-      <ul>
-        <li>Get rid of permanent filter subprocess at the end of a GUI
-          indexing pass.</li>
-        <li>Add new filter for indexing GNU info files.</li>
-        <li>Index the file name from a zip or chm internal path.</li>
-        <li>Add hotrecoll.py script to help with one-key recoll
-          activation/hiding. Move focus to search entry when unminimized.</li> 
-        <li>Handle bad mbox format from Thunderbird.</li>
-        <li>Catch exception which was causing stderr messages while
-          indexing encrypted zip files.</li>
-        <li>Change result list "Edit" links to "Open" for consistency
-          with menus.</li>
-        <li>Change the type of character set conversion occurring
-          when using  "Copy file path" from the result list. 
-          Should work in more cases than the previous approach (but
-          will still fail sometimes).</li>
-        <li>Update lyx filter.</li>
-        <li>Fix problems with white space in file name in several
-          input filters.</li>
-        <li>Support mutagen versions older than 1.17.</li>
-      </ul>
-
-      <h2><a name="1.14.2">1.14.2</a></h2>
-      <p>Note: most of the changes are in release 1.14.0. Release 1.14.1 fixed 2
-	bugs. Release 1.14.2 fixes the help browser which was broken
-	by 1.14.2. Sigh ...</p>
-      <ul>
-	<li><a href="usermanual/usermanual.html#RCL.SEARCH.LANG">
-	    date selection in queries</a>.</li>
-	<li>Pure negative queries (ie: <i>-someterm date:P10D/</i>.</li>
-	<li>Autosuffs: option to automatically turn words into <tt>ext:</tt>
-	  clauses (ie: <i>xls</i> -> <i>ext:xls</i>) (GUI preferences
-	  panel).</li> 
-	<li>Allow extracting arbitrary mail headers and use them as
-	  index/search fields (configured in the <tt>fields</tt>
-	  file).</li>
-	<li><tt>nonumbers</tt> configuration parameter: disable
-	  indexing of all numbers, useful for some data files with lots
-	  of numerical data.</li>
-	<li>Shortcuts for the results page: <tt>PageUp/Down</tt> can
-	  be used even when the focus is in the search
-	  entry. <tt>Shift+Home</tt>: back to first page of results.
-	  <tt>Ctrl+Shift+s</tt>: return focus to the search
-	  entry. </li>
-	<li>Add full screen mode for small devices.</li>
-	<li>Added -i option to recollq to specify extra indexes.</li>
-	<li>Removed use of id3lib for extracting mp3 tags. A Python filter
- 	  based on mutagen now handles all audio formats
- 	  (mp3/flac/ogg). <i>If you are currently indexing audio
- 	  files, you need to install mutagen, Recoll will not use
- 	    id3lib or the Flac/Ogg tools any more</i>. </li>
-	<li>Filter for <b>fictionbook</b> (.fb2) documents.</li>
-	<li>Cleaned up the Python samples and made recollq.py a usable
-	  clone of recollq.</li>
-	<li>Errors when opening additional indexes for a query are now
-	  fatal. They could easily go unnoticed before.</li>
-	<li>Proper LARGEFILE support.</li>
-	<li>Use <b>xsltproc</b> instead of misc dirty tricks to
-	  extract text from most current XML-based documents (except
-	  those in which the XML is too broken).</li>
-	<li>Implement <tt>configure --enable-pic</tt> and use it for
-	  the KIO slave and Python and PHP modules.</li>
-      </ul>
-
-      <p>Bugs also fixed in the 1.13 branch:</p>
-      <ul>
-	<li>The <tt>filename</tt> (transcoded file name) field
-	    could not be stored, so it could not be displayed in the
-	    result list. Can now be displayed as %(filename).</li> 
-	<li>Html files would always be indexed even when filtered
-	    out by <tt>indexedmimetypes</tt></li>
-	<li>Preview: toggling between main text and metadata
-	    display would confuse the text format.</li>
-	<li>Restore <tt>indexallfilenames=0</tt> functionality.</li>
-      </ul>
-
-	<h2><a name="1.13.04">1.13.04</a></h2>
-	<ul>
-	  <li>Provide a set of configuration defaults so that compilation has a
-            chance to succeed on unknown systems.</li>
-	  <li>Install icon to the pixmaps directory.</li>
-	  <li>Fixes stemming, which was broken for all previous 1.13
-	    releases.</li>
-	  <li><a href="BUGS.html#b_1_13_02">Bugs fixed between 1.13.02
-	      and 1.13.04.</a></li>
-	</ul>
-
-	<h2><a name="1.13.02">1.13.02</a></h2>
-	<ul><li>This version has a single fix to work around a problem in the
-            Qt 4.6.1 uic utility. If you are not using Qt 4.6.1 and are
-            currently running Recoll 1.13.01, you do not need to
-            upgrade.</li></ul>
-
-	<h2><a name="1.13.01">1.13.01</a></h2>
-
-	<ul>
-	  <li>Recoll has a new class of persistent external filters
-            with the capability to process several documents, or
-            multi-document files, in the same instance. Benefits: much
-            faster image tag indexing, and new file formats. Except for
-            the Perl image tag filter (because of ExifTool), the new
-            filters are written in Python.<li>
-
-	  <li>New file formats: chm (microsoft help), zip archives, .ics
-	    calendar files. Individual pages in chm files are indexed and
-	    can be previewed. Zip is quite convenient for maildir
-	    archives (for example).</li>
-
-	  <li>Recoll can now use the output of the Beagle Firefox plugin
-	    to index visited web pages and bookmarks. This is only usable
-	    if Beagle itself is not running, else Recoll and Beagle will be
-	    fighting for the same queue.</li>
-
-	  <li>Big text files (like application logs) can now be paged for
-            indexing, avoiding excess memory usage during indexing and
-            improving the usability at query time. They can also be
-            altogether skipped by setting a maximum size configuration
-            parameter. These parameters have default values (1 MB and 20
-            MB) which change Recoll behaviour compared to previous
-            versions. You can set <i>textfilepagekbs</i>
-            and <i>textfilemaxmbs</i> to -1 in the configuration to
-            restore the old behaviour.</li>
-
-	  <li>A cache was implemented for mbox message header offsets. This
-	    speeds up message previews for big mbox files.</li>
-
-	  <li>Miscellaneous usability improvements: 
-	    <ul>
-	      <li>Allow using page-up/down and shift-home to scroll the
-		result list while the focus is in the search entry. </li>
-	      <li>Make 'Use desktop preferences' the default for new
-		Recoll installations, and make this choice more
-		prominent in the external viewer dialog.</li> 
-	      <li>^P starts the print dialog on a preview window.</li>
-	      <li>If a search has no result, alternate spellings are
-		suggested. This feature is still a bit raw and will be
-		improved.</li> 
-	      <li>If the text of a document is empty, preview will switch to
-		displaying the document fields.</li>
-	      <li>New entry in the result list contextual menu for opening
-		the parent document of a result list hit with its native
-		application. Useful for exemple for pages inside chm files.</li>
-	    </ul>
-	  </li>
-
-	  <li>Indentation is now preserved when displaying text documents
-	    inside the preview window. This is particularly welcome for
-	    program source files.</li>
-
-	  <li>Allow substituting arbitrary fields in the result
-            paragraph, using a %(fieldname) syntax</li>
-
-	  <li>The real-time indexing monitor will now accumulate
-	    modifications during 30&nbsp;S before indexing.</li>
-
-	  <li>The indexer can now split camelCase words, allowing search on
-	    component terms. This is not enabled by default as it can
-	    confuse phrase searches (ie: "MySQL&nbsp;manual" is matched by
-	    phrase queries for "my&nbsp;sql&nbsp;manual" and "MySQL&nbsp;manual"
-	    but not "mysql&nbsp;manual"). Use "configure&nbsp;--enable-camelcase"
-	    to activate it.
-	  </li>
-
-	  <li>The ipath is now printed by default after the url in the
-	    default result list format.</li>
-	  
-	  <li><i>recoll_noindex</i> and <i>skippedNames</i> can now be
-	    changed at any point in the tree (only for topdirs previously).</li>
-
-	  <li>Allow using location/application sensitivity in external viewer
-            choice. This uses several new functions: 
-            <ul>
-              <li>Allow the substitution of arbitrary document fields inside
-		external viewer command line arguments.</li>
-              <li>Allow field values to be set on all documents
-		in a file system subtree. For example, you can 
-		set an application tag (ie: rclaptg = gnus) on all mailbox
-		files under a specific directory.</li>
-              <li>New syntax in mimeview for including the rclaptg field in
-		viewer choice
-		(<i>mimetype</i>|<i>tagvalue</i>&nbsp;=&nbsp;...).</li> 
-            </ul>
-	  </li>
-
-	  <li>Allow specifiying a specific default character set for mail
-	    messages. This is mainly useful for readpst dumps. All
-	    reasonable non-ascii messages specify their character set.</li>
-
-	  <li>Added a --without-gui configure option. Removes all X11 and
-	    Qt dependancies and only compiles the command-line interface.</li>
-
-	  <li>Improved the kio_recoll build. There is no need to run
-	    configure manually in the main directory any more. Ubuntu
-	    packages for kio_recoll are now built on the
-	    <a href="http://launchpad.net/~recoll-backports/+archive/ppa">
-	      recoll-backports PPA on launchpad.net</a>.</li>
-
-	</ul>
-
-
-	<h2><a name="1.12.4">1.12.4</a></h2>
-	<p>Bugs fixed:</p>
-	<ul>
-	  <li>Qt4 version only: the search inside the preview window
-	    could become unbearably slow for big documents (quadratically
-	    so), and could not be interrupted (Qt bug). The Qt3 version of
-	    the code was included in the preview tool to restore good
-	    performance. This bug is the main reason for this release.</li>
-	</ul>
-	<p>Build system improvements:</p>
-	<ul>
-	  <li>Perform minimal base package configuration inside the kio
-	    cmake code to permit building it from scratch (without a build
-	    of the main code). Mainly useful for builds on the Ubuntu
-	    PPA.</li>
-	  <li>Implement a --without-gui option to build a pure
-	    command-line version with no Qt or X11 dependancies.</li>
-	  <li>Ensure that the user's PATH settings determine where we
-	    look first for qmake in all cases.</li>
-	</ul>
-
-	<h2><a name="1.12.3">1.12.3</a></h2>
-	<p>This is a bug fix release.</p>
-	<ul>
-	  <li>Fix the sort tool which had been broken since 1.11  with
-            some (or all?) qt3 versions.<li>
-	  <li>Catch two Xapian exceptions which could crash the GUI when a query
-            was run while the index was being updated.</li>
-	  <li>Ensure that the result list right-click pop up menu will appear even
-            when the click is inside a table.</li>
-	  <li>Fix the way we retrieve the Xapian library version to avoid
-            GUI compilation problems.</li>
-	  <li>Inside the real-time indexer: only use the main thread to test that
-            the X11 server is still alive. Multithreaded calls to x11IsAlive()
-            would sometimes crash the process because of an X11 error.</li>
-	  <li>Define filter timeout so that a looping filter (ie: rclps trying to
-            index loop.ps) will not completely stop the indexing. Default value:
-            20mn. Add loop.ps to skippedNames.</li>
-	  <li>Improve filter subprocesses management. Some could previously be
-            left around after recollindex was killed. Improve cancellation
-            request acknowledgment by recollindex (two ^C were sometimes
-            necessary to make it terminate).</li>
-	  <li>Signals SIGUSR1 and SIGUSR2 are now blocked in addition to
-	    INTR/TERM/QUIT.</li>
-	  <li>Extended attributes indexing now works for all file types.</li>
-	  <li>Ensure that queries started from the command line are handled as
-            normal ones (they previously could not be sorted).</li>
-	  <li>Improve man page indexing: do not index section header terms.</li>
-	</ul>
-
-
-	<h2><a name="1.12.1">1.12.1</a></h2>
-	<p>This is a very minor release, mainly to fix compilation
-          issues and a few very minor bugs. No need to upgrade if
-          you don't experience these.</p>
-	<ul>
-          <li>Fixed compilation errors for new gcc and gnu libc.</li>
-          <li>Use groff html output in rclman to get rid of control
-            characters in output (improve manual pages indexing). Fix
-            8bit character issues in file names in rcllyx.</li>
-          <li>Fixed command line arguments processing problem with 
-            "recoll -q"</li>
-	</ul>
-
-	<h2><a name="1.12.0">1.12.0</a></h2>
-	<ul>
-          <li>Recoll now implements a KIO slave to allow searching
-            directly from KDE applications. This does not affect the
-            main application and is not enabled by default (go to the
-            kde/kio/recoll source directory for build
-            instructions). </li>
-          <li>Recoll now computes md5 checksums for all indexed
-            documents and optionally collapses duplicate entries inside
-            the result list. This needs a full reindex to become
-            effective for older documents already in the index. The
-            option to activate collapsing is in the <i>Query
-              Configuration</i>.</li>
-          <li>Typing F1 anywhere in the GUI should bring up the
-            appropriate section of the manual in the application
-            configured for viewing HTML documents.</li>
-          <li>The result list right click menu now has an entry to
-            save the document to a file. This is only enabled for
-            documents contained inside another file (ie, messages inside
-            an mbox folder, or attachments), and is especially useful for
-            extracting an attachment with no associated external
-            editor.</li>
-          <li>The preview window now has a right-click menu, with an
-            entry to toggle between viewing the main text or all the
-            metadata for the document. This is most useful in the case
-            where the search match actually occurred in a field not
-            visible in the main text (ie: author or HTML title).</li>
-          <li>Words glued by an underscore character like
-            <i>compound_word</i> are now split during indexing, and
-            will be found when queried either as themselves or in a
-            search for the components.</li>
-          <li>There is now a size limit over which no attempt will be made to
-            uncompress/identify/index compressed files. Not active by
-            default, to be set in the <i>Indexing Configuration</i>.</li>
-          <li>Added support for fetching field values from extended file
-            attributes. This is not enabled by default, use 
-            <i>configure&nbsp;--enable-xattr</i>. You'll also need to
-            set up a map from the attributes names to the Recoll field
-            names (see comment at the end of the <i>fields</i>
-            configuration file.</li> 
-	</ul>
-
-	<h2><a name="1.11.4">1.11.4</a></h2>
-	<ul>
-
-          <li>Bugs fixed:
-            check the <a href="BUGS.html#b_1_11_1">list</a>.</li>  
-
-          <li>The right-click menu "Copy" commands inside the result list
-            now copy to the clipboard in addition to the main selection,
-            enabling subsequent ^v commands.</li>
-
-	</ul>
-
-	<h2><a name="1.11.0">1.11.0</a></h2>
-
-	<p><i>Recoll release 1.11 has relatively extensive changes that have
-            necessitated a modification of the index format. Hence installing this
-            release implies a full re-indexing, which is enforced by the
-            software.</i></p> 
-
-	<ul>
-          <li>Filtering on category (message/text/media etc.) as a function of
-            the main window for quick access.</li>
-
-          <li>Use html for preview when available (ex: html files or "colorized"
-            python) instead of converting to text. This can be turned of in the
-            preferences. </li>
-
-          <li>New Python query and index interfaces. The Python query
-            interface will be used for building a Xesam adapter for
-            Recoll when the specification is stabilized, and could be
-            useful for other things, such as indexing contents from an
-            RDBMS (see 
-            <a href="usermanual/usermanual.html#RCL.PROGRAM.PYTHONAPI">
-              the manual</a> for details). Restructured and cleaned up
-            internal Recoll interfaces.</li>
-
-          <li>Improved filter framework. Can now process either html or text output
-            from the filters, and more easily execute "raw" commands instead of
-            Recoll scripts. Avoided wasteful repeated execution of filters for
-            which the helper application is missing.</li>
-
-          <li>Query language now closer to Xesam specification, (but
-            still far from a 
-            complete implementation). See the Recoll manual and
-            <a href="http://www.xesam.org/main/XesamUserSearchLanguage">
-              http://www.xesam.org/main/XesamUserSearchLanguage</a> </li>
-          
-          <li>Much improved configuration for fields. Fields like
-            "author" can now be specified as storable (displayable in
-            results) and/or indexed (searchable). Added alias facility
-            for translating from user-level names to internal.</li>
-
-          <li>Added "recipient" as an indexed/searchable field for emails.</li>
-
-          <li>rcltext filter for processing text such as C code for which no specific
-            processing is needed when indexing but a specific viewer is
-            desired.</li>
-	</ul>
-
-	<h2><a name="1.10.6">1.10.6</a></h2>
-	<ul>
-          <li>Fix a simple and mildly nasty bug that would cause the
-            indexer to stop 
-            indexing an mbox on encountering a specific but not exceptional error
-            condition (like a few dozen errors while indexing attachments for which
-            no filter was installed).</li>
-	</ul>
-
-
-	<h2><a name="1.10.5">1.10.5</a></h2>
-	<ul>
-          <li>Ensure that file names indexed as terms don't overflow the maximum term
-            size.</li>
-
-          <li> Handle non-standard date format in mbox separator lines sometimes
-            generated by thunderbird.
-
-          <li> Use attachment file names to help identify a better mime type for
-            parts only described as application/octet-stream
-
-          <li> For Phrase/Near searches, highlight all term groups in preview, not just
-            the first
-
-          <li> Added Open XML filters
-
-	</ul>
-	<h2><a name="1.10.2">1.10.2</a></h2>
-	<ul>
-
-          <li>Fixed openSuse 11 compile issues.
-
-          <li>Fixed bug in interpreting email mime structure, which resulted in base-64
-            decoding errors.
-
-          <li>Fixed "Prev" button in preview window. Would actually go forward when
-            walking the search terms.
-
-          <li> Allow setting the highlight color for search terms in result list and
-            preview (yes: feature change, should have waited for major release...)
-
-          <li> Added svg filter
-
-	</ul>
-	<h2><a name="1.10.1">1.10.1</a></h2>
-	<ul>
-
-          <li> Ensure that in case the data of a file can't be indexed because of some
-            error, at least the file name is indexed.
-
-          <li> Improve query language to support OR queries of terms with field
-            specifications (ie: title:someterm OR author:someauthor).
-
-          <li> Fix filename search to split patterns on white space, so that 
-            a "*.jpg *.jpeg" search does what's expected. Means you now need to use
-            double-quotes if there is actual embedded white space.
-
-          <li> Jump directly to the external editor choice dialog instead of opening
-            preferences when an external viewer is not found.
-
-          <li> Allow stopping indexing through menu action (only works with qt4 for now).
-
-          <li> Create an "indexedmimetypes" configuration variable to allow explicitely
-            restricting the file types which do get indexed.
-
-
-	</ul>
-	<h2><a name="1.10.0">1.10.0</a></h2>
-	<ul>
-
-          <li> Added a GUI dialog to configure the indexing parameters.
-
-          <li> Added better support for indexing CJK text (Chinese, Japanese, Korean). 
-            Please note that:
-            - You will need a full reindex to take good advantage of this. (You
-            *don't* need to reindex if you don't need to search CJK, even if there
-            is some in your index).
-            - When entering CJK search terms, words (single or multiple characters)
-            should be separated with white space.
-            - The specific CJK processing can be turned off by setting the nocjk
-            variable to true in the configuration file (this may make sense if you
-            have a mixed cjk/other document base and don't want to index the cjk
-            part, as it will save some disk space and a minuscule amount of cpu).
-
-          <li> Changed the way Recoll handles searches including composite words (like
-            an email address). The new approach looks saner, but could have
-            side-effects, please report any problems in this area.
-
-          <li> The query language got a new "dir:" specifier to filter results on location.
-
-          <li> New rclimg perl filter for better indexing of picture tags, thanks to
-            Cedric Scott. This depends on Exiftool.
-          <li> New rcltex filter.
-
-          <li> Changed and improved how the preview window local search finds the
-            query terms, this does not involve weird characters any more. The
-            display is cleaner and cut and paste works better.
-
-          <li> Fixed the fact that a newline-separated word list in simple search would
-            wrongly trigger a phrase search.
-
-          <li> Fixed the way we input text to the preview textedit (the old way would
-            sometimes confuse the window into displaying tags instead of acting on
-            them).
-
-          <li> Fixed transcoding to utf-8 for text/plain email attachments
-
-          <li> Improved mbox From_ line detection
-
-          <li> Added indexedmimetypes variables to allow restricting the list of indexed
-            mime types.
-
-          <li> KDE kicker applet: start a recoll search from the panel and get a
-            Recoll window. This is a clone from the find_applet, originally meant to
-            start a Tracker search. Not so useful presently because it will start a
-            new Recoll instance for every search. Not part of the main source (the
-            configure script is a whopping 1MB...), linked from the download page.
-          <li> Added recoll command line options to define a query and execute it
-            immediately when the program starts. This is used in practice from the
-            applet and could be used from other programs. There is a also a new
-            option to not start the GUI and print the results to stdout.
-
-	</ul>
-	<h2><a name="1.9.0">1.9.0</a></h2>
-	<ul>
-          <li> Incompatible change: the icon image reference is now part of the result
-            list paragraph format string:
-            - If you had a standard config, you need do nothing.
-            - If you had a custom format string, you need to add
-            <img src="%I" align="left"> at its beginning to get the same result as
-            before.
-            - If you had unchecked the "show icons" option, you need to remove the
-            above string from the paragraph format to make the icons go away.
-            Changes to the format string are performed in the 
-            "Preferences->Query Configuration->User Interface" dialog tab.
-
-          <li> New filters: wordperfect, abiword and kword, rcljpeg, rclflac, rclogg
-            (contributed filters). The jpeg and audio filters should be extended to
-            make use of the new field indexing/search capability (hint :) )
-
-          <li> When searching for an empty string inside the preview window, position
-            the window to the next occurrence of a primary search term.
-
-          <li> Added ext: and mime: selectors to the query language.
-
-          <li> Added an adjustable flush threshold during indexing: should help control
-            memory usage. See the idxflushmb configuration variable.
-
-          <li> Added a check for file system free space. Indexing will stop if the
-            threshold is reached. See the maxfsoccuppc configuration parameter.
-
-          <li> Added 'followLinks' configuration option to have the indexer follow
-            symbolic links while walking the tree (the default is false).
-
-          <li> Allow symbolic links as 'topdirs' members. These are always followed.
-
-          <li> Add preference option to remember sort tool state between program
-            invocations (it is reset to inactive by default)
-
-          <li> Added File menu entry to erase document history.
-
-          <li> Bound the space and backspace keys to PgUp/PgDown in preview.
-
-          <li> (Hopefully) Improved abstract (keyword in context) generation
-
-          <li> Added support for arbitrary fields. Filters can now produce any number of
-            fields which will be selectively searchable through the query
-            language. This could be useful, for exemple, for the mp3 and jpeg filters
-            (but it is not currently used).
-
-          <li> Improved qt4 build: no more need for --enable-qt4. Note: the qt4 build
-            still needs the qt3 support library.
-
-          <li> Changed the icon to an ugly one. The previous one was nicer but looked
-            too much like Xapian's.
-
-          <li> Added some kind of support for a stopword list.
-
-          <li> Have email attachments inherit date and author from their parent message
-            (instead of mail folder).
-
-          <li> Fix bus error on rclmon exit
-
-          <li> Better handling of aspell errors inside rclmon
-
-          <li> Fixed a number of qt4 glitches: selection and keyboard shortcuts.
-
-          <li> New query configuration parameter to set the maximum text size beyond
-            which text won't be hilighted before preview (takes too much time). This
-            was a fixed value in 1.8.
-
-	</ul>
-	<h2><a name="1.8.2">1.8.2 2007-05-19</a></h2>
-	<ul>
-          <li> Fixed method name for compatibility with xapian 1.0.0
-          <li> Add .beagle to default list of skipped names (avoids indexing beagle
-            document cache...)
-          <li> Fix configure.ac to use $libdir instead of /usr/lib
-          <li> Fix recollinstall to properly copy translations and pictures for qt4
-
-	</ul>
-	<h2><a name="1.8.1">1.8.1 2007-02-20</a></h2>
-	<ul>
-          <li> Add a small query language with some field-based searches (author, title,
-            etc.)
-          <li> Add wildcard handling everywhere. *, ?, [] can be used in any
-            search. Warning: using a wild card at the left of a term can make
-            for a very slow search.
-          <li> Allow skipping specific paths during indexing (in addition to file name
-            patterns)
-          <li> Improved external index choice dialog, accessible from the top-level menu.
-          <li> Many small bugs fixed: stemming language choice ignored in term explorer,
-            qt4 preview window reentrancy crashes, issues with saving the default
-            advanced search file, type filter, display more clearly missing helper
-            errors, etc. 
-          <li> Option to use the desktop defaults (with xdg-open) to choose the native
-            viewer for files (instead of recoll's mimeview). 
-
-	</ul>
-	<h2><a name="1.7.6">1.7.6 2007-01-30</a></h2>
-	<ul>
-          <li> Fixes an issue with the openoffice filter on debian systems. 
-          <li> Adds Scribus and Lyx filters.
-
-	</ul>
-	<h2><a name="1.7.5">1.7.5 2007-01-15</a></h2>
-	<ul>
-          <li> Fixes two email indexing bugs in 1.7.3, which would bail out from an
-            mbox folder on the first attachment filtering error, and would decline
-            to handle multipart/signed bodies. You may need to run a full indexing
-            pass (recollindex -z), to force reindexing of old folders.
-
-	</ul>
-	<h2><a name="1.7.3">1.7.3 2007-01-09</a></h2>
-	<ul>
-          <li> Email attachments are now indexed.
-          <li> Right-click menu option to access the parent document of an embedded
-            result (ie from mail attachment to parent message), or the parent folder
-            of a given file (which is opened with the application configured for
-            directories) 
-          <li> The sort tool has been improved: no need to restart the query after sort
-            criteria change. 
-          <li> Support for real-time indexing with inotify is now enabled by default
-            when appropriate.
-          <li> Recoll now warns when the configured native viewer can not be found and
-            starts an interface for chosing another one.
-          <li> Categories (text, presentation, spreadsheets, etc.) can be used instead
-            of raw mime types when filtering on file types in advanced search.
-          <li> The port to qt4 is functional and can be enabled with configure --enable-qt4
-          <li> 'autophrase' option improved and may now actually be useful.
-          <li> Improved highlighting (again...)
-          <li> Display term frequencies in term explorer.
-          <li> Recollindex -e to remove data from index for listed files.
-          <li> Directory names now indexed. Directories can be 'edited' with the
-            configured application (rox by default)
-
-	</ul>
-	<h2><a name="1.6.3">1.6.3</a></h2>
-	<ul>
-          <li> Fixed problem with bad detection of mbox message boundaries.
-            Upgrading can change the message numbering in some cases, and you should
-            perform a full index update (recollindex -z) after installing
-            the new version.
-          <li> Fixed problem with execution of external viewer for files with
-            single-quotes in the name.
-	</ul>
-	<h2><a name="1.6.2">1.6.2</a></h2>
-	<ul>
-          <li> Minor solaris compilation glitches only.
-	</ul>
-	<h2><a name="1.6.1">1.6.1</a></h2>
-	<ul>
-          <li> Term explorer: a multimode wildcard-regexp-spell/phonetic tool to search
-            the index for terms. This uses aspell for the orthographic/phonetic part.
-          <li> A more dynamic advanced search window. You now have a choice of the top
-            level conjunction (OR/AND) and of any number of clauses, including NEAR
-            and PHRASE clauses with an adjustable proximity parameter.
-          <li> User-settable format for the result-list entries, which use an HTML
-            string with %xx printf-like replacements (accessible from the user
-            preferences).
-          <li> Real time monitoring/indexing support. This is not configured by
-            default, and must be specified at build time (configure --help).
-          <li> Improved phrase/group highlighting in abstracts and preview
-          <li> Better sample selection for synthetic abstracts.
-          <li> Improved performance of the text splitter, good for indexing and previewing.
-          <li> Shift+click link to open new preview window instead of tab in existing
-            window.
-          <li> The key sequence for term completion in the simple search entry was
-            changed from CTRL+TAB to "Escape Space" to avoid interaction with window
-            managers.
-          <li> Improved recall for phrases with composite words like email addresses.
-
-
-            Updating from 1.2 to 1.3 or 1.4 or 1.5: 
-          <li>--------------------------------------
-            From version 1.3 up, there is a new feature to search specifically for file
-            names (with wildcard processing). If you want to take full advantage of
-            this, you should perform a full reindex after installing the new version
-            (ie: use recollindex -z, or delete ~/.recoll/xapiandb).
-            Also, we now use the central copies of configuration files for default
-            values, and the user ones only for overrides. Your old configuration files
-            will still work, but, you may want to remove them if they are unmodified,
-            or keep only the modified parameters.
-
-	</ul>
-	<h2><a name="1.5.9 ">1.5.9 </a></h2>
-	<ul>
-          <li> Fix bad timezone conversion in email dates. Display timezone in result
-            list dates.
-
-	</ul>
-	<h2><a name="1.5.8">1.5.8</a></h2>
-	<ul>
-          <li> Fix stored and displayed dates which used to come from the file's ctime,
-            now use mtime (which was already used for deciding re-indexing).
-          <li> Fix problem with some weird MIME messages (with null boundaries) which
-            crashed the indexer.
-
-	</ul>
-	<h2><a name="1.5.6">1.5.6</a></h2>
-	<ul>
-          <li> Small fixes dealing with the build process or compiler issues. 
-            1.5.6 has updated ukrainian and russian messages.
-            Otherwise no functional changes, and no need to upgrade from 1.5.1
-
-	</ul>
-	<h2><a name="1.5.1">1.5.1</a></h2>
-	<ul>
-          <li> Fix serious bug with non ascii strings in simple search history
-          <li> Improve synthetic abstracts: remove size limitations, handle overlapping
-            extracts, avoid printing several terms from the same position.
-
-	</ul>
-	<h2><a name="1.5.0">1.5.0 2006-09-20</a></h2>
-	<ul>
-
-          <li> Added support for powerpoint and excel files, with the catdoc package.
-          <li> Allow viewing consecutive documents from the result list inside a single
-            preview window using the shift-arrow-up and shift-arrow-down keys.
-          <li> Colorize search terms in abstracts in the result list.
-          <li> A number of elements are now remembered between program invocations:
-            sort criteria, list of ignored file types (always starts inactive), 
-            subtree restriction, better handling of the recent searches listbox, the
-            buildAbstract and replaceAbstract settings are not forgotten any more.
-          <li> New option to automatically add a phrase to simple searches.
-          <li> Possibility to adjust the length and context width for synthetic abstracts.
-          <li> Handle weird html better.
-          <li> When indexing mail messages, walk the full mime tree instead of staying
-            at the top level, index all text parts and attachement file names.
-          <li> Add -c <confdir> option to recoll and recollindex to specify the
-              configuration directory on the command line
-              <li> Better synchronization between the active preview and the highlighted
-		paragraph inside the list
-              <li> Improved recall for some special cases of stemming.
-              <li> Much better handling of email dates, allowing better email sorting by
-		date (previously the message date was quite often the date when the file
-		was indexed).
-              <li> Store the external database lists in the configuration directory, not the
-		qt preferences.
-              <li> Ensure dialogs are sized according to font size
-
-
-	</ul>
-	<h2><a name="1.4.3">1.4.3 2006-05-07</a></h2>
-	<ul>
-          <li> Multiple search databases. 
-          <li> Optionally auto-search when a word is entered in the simple search
-            field.
-          <li> Show possible term completions in simple search by typing CTRL+TAB
-          <li> Add 'more like this' option to result list right-click menu, to look for
-            documents related to the current result.
-          <li> Double-click in preview or result list adds the selected word to the
-            simple search text field.
-          <li> The simple search text entry field is now a combobox and remembers
-            previous searches.
-          <li> Additional OR field in complex search.
-          <li> Improved indexing cancellability (interrupting recollindex or closing 
-            recoll with an indexing thread active), and status reporting.
-          <li> Fixed filters to handle file paths with embedded spaces.
-          <li> Misc small bug and memory leaks fixes.
-          <li> More compact result list.
-          <li> Set mode 0700 on .recoll directory by default
-
-	</ul>
-	<h2><a name="1.3.3">1.3.3 2006-04-04</a></h2>
-	<ul>
-          <li> Implement specific search on file names with wildcard
-            support. Indexing can optionally process all file names or only those
-            with mime types supported for normal indexing. UPDATING: you need a
-            full re-indexing to take advantage of this.
-          <li> Use links and a right-click popup menu to replace confusing use of
-            mouse clicks and double-clicks inside the result list.
-          <li> The 'example' configuration files are now used as default, and are not
-            copied any more to the user directory during installation. Overrides can
-            be set in the personal files for any value that the user wishes to
-            modify, with unchanged formats and file names (so that the files from
-            previous versions remain valid, but you may wish to trim them of values
-            that duplicate the central ones).
-          <li> Use NLS information (LC_CTYPE, LANG) do determine default charset when
-            possible.
-          <li> Mp3 file indexing, either filenames only or also id3 tags if id3info is
-            available. c/c++  ext edit. Use gnuclient instead of xemacs for text files.
-          <li> Russian and Ukrainian translations and many improvement ideas thanks to
-            Michael Shigorin.
-
-	</ul>
-	<h2><a name="1.2.3">1.2.3 2006-03-03</a></h2>
-	<ul>
-          <li> Added support for dvi (with dvips), and dvu (with DjVuLibre).
-          <li> Ensure that configure and make use the same qt version.
-          <li> Fix sorted sequence title display.
-          <li> Discriminate fatal errors and missing docs while loading a doc list.
-          <li> Improved and cleaned up way to position a preview on the first search term.
-
-	</ul>
-	<h2><a name="1.2.2">1.2.2 2006-02-02</a></h2>
-	<ul>
-          <li> Fix minor compilation glitches (FreeBSD 4, QT 3.1, xapian-config problem)
-
-	</ul>
-	<h2><a name="1.2.0">1.2.0 2006-02-01</a></h2>
-	<ul>
-          <li> Improved preview loading: don't highlight very big documents (over 1Mb),
-            allow cancellation while loading.
-          <li> Abstracts generated in the result list by looking at search term
-            contexts. This can slow down result list display for big documents, and
-            can be turned off in the preferences menu.
-          <li> Wrap query detail line displayed when clicking on result list header.
-          <li> Text splitting cleanup with less spurious terms should result in
-            slightly smaller databases.
-          <li> Sligthly improved presentation in preview, esp. line breaks.
-          <li> Color icons...
-          <li> Let the user select the html browser used for help display.
-          <li> autoconf/Makefile change: allow building UI from inside the qtgui
-            directory. 
-          <li> autoconf/Makefile: improved search and diagnostics for qt/qmake.
-          <li> Internal code cleanup for maintainability: text splitting, user
-            interface.
-          <li> Added prototype kio_slave to show result inside Konqueror, doesn't seem
-            particularly useful.</li>
-	</ul>
-
-	<h2><a name="1.1.0">1.1.0 2006-01-12</a></h2>
-	<ul>
-          <li> A much better user manual, which can be browsed from the help menu.
-          <li> man pages for recoll, recollindex, recoll.conf
-          <li> User/query interface configuration dialog.
-          <li> Click on result list header will display the exact boolean search which
-            was used.
-          <li> recollindex can be used to create stem expansion databases independantly
-            of a full indexing pass.
-          <li> Misc user interface improvements, like an 'all terms' checkbox for
-            simple search.
-          <li> Fixed case-insensitivity issues. Probably needs more testing.
-
-	</ul>
-
-	<h2><a name="1.0.16">1.0.16 2006-01-05</a></h2>
-	<ul>
-          <li> Minor installation tweaks for rpm compatibility
-	</ul>
-
-	<h2><a name="1.0.15 ">1.0.15 </a></h2>
-	<ul>
-          <li> Fix problems with prefix != /usr/local
-          <li> Remove '.*' from the default list of ignored file/dir names: this
-            prevented mozilla/thunderbird mail indexing. 
-          <li> Fix some 64 bits issues
-	</ul>
-
-	<h2><a name="1.0.14">1.0.14</a></h2>
-	<ul>
-          <li> Small changes for FreeBSD 4 compilation.
-	</ul>
-
-	<h2><a name="1.0.13">1.0.13</a></h2>
-	<ul>
-          <li> Install of recollinstall program not done or needed any more.
-	</ul>
-
-	<h2><a name="1.0.12">1.0.12</a></h2>
-	<ul>
-          <li> Fixed nasty html parsing bug introduced in 1.0.9 Html parsing failed
-            whenever the document charset name differed from the default only in 
-            character case or punctuation.
-	</ul>
-
-	<h2><a name="1.0.11">1.0.11</a></h2>
-	<ul>
-          <li> Create personal configuration on first start.
-          <li> Use qt toolbars.
-          <li> Also index terms in file paths.
-          <li> Tool for sorting on dates or mime types.
-          <li> Fixed pdf filter which was broken by more recent xpdf
-          <li> Filters now installed/executed from /usr/local
-
-	</ul>
-
-	<h2><a name="1.0.10">1.0.10</a></h2>
-	<ul>
-          <li> Added tool to manage the history of consulted documents.
-          <li> Try harder to convert email messages with wrongly declared charsets.
-          <li> Add option to reset the database before indexing (easier than rm -rf).
-          <li> Small gui improvements.
-          <li> Install partial french translation as a tease for future translaters...
-
-	</ul>
-
-	<h2><a name="1.0.9">1.0.9</a></h2>
-	<ul>
-          <li> Fixed 2 really ennoying bugs in 1.0.8: wouldn't preview 2nd document
-            from same file + spurious db close when filter could not be executed.
-
-	</ul>
-
-	<h2><a name="1.0.8">1.0.8</a></h2>
-	<ul>
-          <li> Add support for rtf and gaim logs
-          <li> Optionally show icons to indicate mime types in result list
-          <li> Better (but imperfect) feedback during the preview
-            loading for big files 
-          <li> Remember main window geometry when closing
-          <li> Fix stem expansion in advanced search
-          <li> Some autoconf
-          <li> Option to use the system's 'file' command as a final step of
-            identification for suffix-less or unknown files.
-          <li> Typo had removed support for .Z compression
-          <li>Use more appropriate conjonction operators when
-            computing the advanced search query (OP_AND_MAYBE,
-            OP_FILTER instead of OP_AND) 
-	</ul>
-
-    </div>
-  </body>
-</html>
--- a/website/copydocs
+++ b/website/copydocs
@ -1,17 +0,0 @@
-#!/bin/sh
-set -x 
-docdir=/home/dockes/projets/fulltext/recoll/src/doc/user/
-
-#(cd $docdir;make) || exit 1
-
-test -d usermanual || mkdir usermanual || exit 1
-cd usermanual
-
-thisdir=`pwd`
-(cd $docdir; find . -name templates -prune -o -print | cpio -vudp $thisdir)
-
-mv usermanual.pdf recoll_user_manual.pdf
-# The freebsd tool chain generates a link to book.html in the index. Too
-# lazy to check if this can be changed
-cp -p usermanual.html book.html
-#cp usermanual.html index.html
--- a/website/credits.html
+++ b/website/credits.html
@ -1,80 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-
-<html>
-  <head>
-    <title>RECOLL: credits</title>
-
-    <meta name="generator" content="HTML Tidy, see www.w3.org">
-    <meta name="Author" content="Jean-Francois Dockes">
-    <meta name="Description" content=
-    "recoll is a simple full-text search system for unix and linux
-    based on the powerful and mature xapian engine">
-    <meta name="Keywords" content=
-    "full text search, desktop search, unix, linux">
-    <meta http-equiv="Content-language" content="en">
-    <meta http-equiv="content-type" content="text/html; charset=iso-8859-1">
-    <meta name="robots" content="All,Index,Follow">
-
-    <link type="text/css" rel="stylesheet" href="styles/style.css">
-  </head>
-
-  <body>
-    
-    <div class="rightlinks">
-      <ul>
-<li><a href="index.html">Home</a></li>
-<li><a href="pics/index.html">Screenshots</a></li>
-<li><a href="download.html">Downloads</a></li>
-<li><a href="doc.html">User manual</a></li>
-<li><a href="index.html#support">Support</a></li>
-
-      </ul>
-    </div>
-    
-    <div class="content">
-
-      <h3><a name="credits">Credits</a></h3>
-
-      <p>First of all, many thanks to the users who provided criticism
-	and ideas to make <span class="application">Recoll</span> go
-	forward ! Please 
-	<a href="mailto:jfd@recoll.org">
-	  contact me</a> if you have something to suggest.</p>
-
-      <p><span class="application">Recoll</span> borrows
-	from the following projects. I tried to include the relevant
-	copyright attributions with the code. Any omission is
-	unintentional and will be fixed as soon as notified. </p>
-
-      <ul>
-	<li><a href="http://www.xapian.org">Xapian</a>: The database module
-	  (core) is used unmodified, and quite a lot of code has been
-	  borrowed from Omega, the web-based search application (ie:
-	  the html parser, plus miscellaneous bits and ideas). </li>
-	<li><a href="http://estraier.sourceforge.net/">Estraier</a>:
-	  Some of the input handlers still have bits of Estraier code
-	  in them.</li>
-	<li><a href="http://www.senga.org/">Unac</a>: for accent
-	  removal. This package is unmaintained and the (quite modified)
-	  code is carried with the <span class="application">Recoll</span>
-	  source.</li>
-	<li><a href="http://www.gnu.org/software/libiconv/">Iconv</a>, for
-	  character set conversion.</li>
-	<li><a href="http://www.bincimap.org/">Binc IMAP</a> for MIME
-	  parsing code. The original package is unmaintained and the
-	  relevant code is carried with the <span
-	  class="application">Recoll</span> source.</li>  
-	<li>The icons mainly come from the <a
-	    href="http://www.everaldo.com/">Crystal SVG</a> KDE set.</li>
-      </ul>
-
-      <ul>
-	<li>I fear that bugs found elsewhere are mostly mine:
-	  <a href="mailto:jfd@recoll.org">jfd@recoll.org</a></li>
-      </ul>
-
-
-    </div>
-  </body>
-</html>
-
--- a/website/custom.html
+++ b/website/custom.html
@ -1,630 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-
-<html>
-  <head>
-    <title>RECOLL: result list customisation tips</title>
-
-    <meta name="generator" content="HTML Tidy, see www.w3.org">
-    <meta name="Author" content="Jean-Francois Dockes">
-    <meta name="Description" content=
-    "recoll is a simple full-text search system for unix and linux
-    based on the powerful and mature xapian engine">
-    <meta name="Keywords" content=
-    "full text search, desktop search, unix, linux">
-    <meta http-equiv="Content-language" content="en">
-    <meta http-equiv="content-type" content="text/html; charset=utf-8">
-    <meta name="robots" content="All,Index,Follow">
-
-    <link type="text/css" rel="stylesheet" href="styles/style.css">
-
-
-<style type="text/css">
-/* Photo-Caption PZ3 CSS v080630
-* copyright: http://randsco.com/copyright
-* www.randsco.com
-*/
-
-.PZ3-l { float:left; margin-right:10px; }
-.PZ3-r { float:right; margin-left:10px; direction:rtl; }
-  html>/**/body .PZ3-r { position:relative; }
-
-.PZ3zoom { border:1px solid #369; }
-.PZ3zoom a,.PZ3zoom a:visited { display:block;
-  padding:0; overflow:hidden; text-decoration:none;
-  height:100%; width:100%; }
-  html>/**/body .PZ3-r a { right:0; }
-
-.PZ3zoom a:hover { position:absolute;
-  z-index:999; padding:0; background:none;
-  cursor:default; height:auto; width:auto;
-  overflow:visible; border:1px solid #369;
-  margin:-1px 0 0 -1px; }
-  html>body .PZ3zoom a:hover { margin:-1px -1px 0 -1px; }
-
-.PZ3zoom a img { border:0; height:100%; width:100%; }
-.PZ3zoom a:hover img { height:auto; width:auto;
-  border:0; }
-
-a:hover .PZ3cap,
-a:hover .PZ31cap { display:block;
-  direction:ltr; font:10pt verdana,sans-serif;
-  margin-top:-3px; background:#369; color:#fff;
-  text-align:left; }
-a:hover .PZ3cap { padding:3px 5px; }
-.PZ3inr { display:block; padding:2px 5px; }
-
-.noCap a:hover .PZ3cap,
-.noCap a:hover .PZ31cap { display:none; }
-.noBdr,.noBdr a:hover { border:0; }
-.Lnk a:hover { cursor:pointer; }
-
-/* End Photo-Caption Zoom CSS */ 
-</style>
-
-
-  </head>
-
-  <body>
-    
-    <div class="rightlinks">
-      <ul>
-        <li><a href="index.html">Home</a></li>
-        <li><a href="pics/index.html">Screenshots</a></li>
-        <li><a href="download.html">Downloads</a></li>
-        <li><a href="doc.html">User manual</a></li>
-        <li><a href="index.html#support">Support</a></li>
-      </ul>
-    </div>
-
-    
-    <div class="content">
-
-      <h1>Recoll result list customising exemples</h1>
-
-      <p>The Recoll result list is actually made of html text
-        displayed inside a Qt Widget. In all Recoll versions, you
-        can specify the format for the list entries: what data is
-        displayed for each hit document and how. This used to include
-        "almost full" support for HTML capabilities, with a few
-        restrictions due to the Qt QTextBrowser object. The details
-        are described in the
-        <a href="http://www.recoll.org/usermanual/usermanual.html#RCL.SEARCH.GUI.CUSTOM.RESLIST">
-	  Recoll manual</a>.</p>
-
-      <p>As of Recoll 1.17, the result list is a WebKit object by
-        default (WebKit is the basis for several major browsers),
-        which yields full CSS and even Javascript support.</p>
-
-      <h2>New in Recoll 1.17: the WebKit result list</h2>
-
-
-      <p>For newer Recoll versions, you can specify the
-        individual result format, as for previous versions. You can
-        also define code to be included in the HTML
-        header (ie: CSS or Javascript), using 
-        <tt>Preferences->Query&nbsp;Configuration->Result&nbsp;List->Edit&nbsp;result&nbsp;page&nbsp;html&nbsp;header&nbsp;insert</tt></p>
-
-      <p>This, plus the full Javascript and CSS support in WebKit,
-        open a world of possibilities for result list formatting and
-        even behaviour.</p>
-
-      <p>The examples which follow are probably not generally
-        very useful but they show the kinds of things you can do, if
-        you can use Javascript/CSS which is not my case.</p>
-      
-      <h3>Using the icons as links</h3>
-      <p>You can now make the list icons links that activate the
-        preview or open action (or the document url which you can then
-        drag/drop to other windows). Using images as links did
-        not work with QTextBrowser.</p>
-
-      <h3>Alternating result backgrounds</h3>
-      <p>Using the following Javascript inside the header will yield
-        alternating backgrounds for the results:</p>
-
-<pre>
-&lt;script type="text/javascript">
-function altRows() {
-    var rows = document.getElementsByClassName("rclresult");
-    for (i = 0; i &lt; rows.length; i++) {
-        if (i % 2 == 0) {
-            rows[i].style.backgroundColor = "#d4e3e5";
-        }
-    }
-}
-
-window.onload = function() {
-    altRows();
-}
-&lt;/script>
-</pre>
-
-
-      <h3>Zooming the paragraph font size</h3>
-      <p>If you are using a format with small fonts, it may be useful
-        to be able to zoom the text when the mouse hovers over it. A
-        very basic way to do this -<em>with the standard paragraph
-          format, which is a table</em>- would be to include the following
-        code in the header:</p>
-<pre>
-&lt;style type="text/css">
-    table:hover {font-size: 130%;}
-&lt;/style>
-</pre>
-
-        <p>Of course, the selector should be adapted to your own
-          result format. You should know that every result will be
-          enclosed by Recoll inside a <tt>&lt;div
-            class="rclresult" rcldocnum="nn"&gt;</tt> element.</p>
-
-      <h3>Zooming the thumbnails</h3>
-
-      <p>Recoll 1.17 and newer will display document
-        thumbnails instead of the type icon if the thumbnail exists in
-        the standard Freedesktop location. The icons/thumbnails are
-        64x64 pixels in size, which is a bit small. The standard
-        thumbnail files are actually 128x128, which is much more
-        detailed. Using them statically would consume too much list
-        space though. Using CSS, you can get them to expand when the
-        mouse is over them. Recipee:</p>
-
-      <blockquote>
-      <p>Retrieve the CSS code
-        from <a href="http://randsco.com/_miscPgs/cssZoomPZ3.html">randsco
-        pure CSS photo-caption zoom</a>, and include it inside the
-        result list html header by using the "Edit result page html
-        header insert" from the GUI preferences. Don't forget to
-        enclose the CSS code between <code>&lt;style type="text/css"&gt;
-        &lt;/style&gt</code> tags.</p> 
-      
-      <p>Use something like the following result paragraph format
-      (only the code around the img tag is relevant, the rest can be
-        what you want):</p>
-
-<pre>
-<!--
-<table><tr><td>
- <div class="PZ3zoom PZ3-l noBdr noCap noLnk" style="width:64px;height:64px;">
- <a href="%U"> <img src='%I' width='64'></a>
-</div>
-</td><td>
-%R %S %L &nbsp;&nbsp;<b>%T</b><br>%M&nbsp;%D&nbsp;&nbsp;&nbsp;<i>%U</i>&nbsp;%i<br>%A %K
-</td></tr></table>
-->
-&lt;table&gt;&lt;tr&gt;&lt;td&gt;
-
- &lt;div class="PZ3zoom PZ3-l noBdr noCap noLnk" style="width:64px;height:64px;"&gt;
- &lt;a href="%U"&gt; &lt;img src='%I' width='64'&gt;&lt;/a&gt;
-&lt;/div&gt;
-
-&lt;/td&gt;&lt;td&gt;
-%R %S %L &amp;nbsp;&amp;nbsp;&lt;b&gt;%T&lt;/b&gt;&lt;br&gt;%M&amp;nbsp;%D&amp;nbsp;&amp;nbsp;&amp;nbsp;&lt;i&gt;%U&lt;/i&gt;&amp;nbsp;%i&lt;br&gt;%A %K
-&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
-</pre>
-
-      </blockquote>
-   <div class="PZ3zoom PZ3-r noCap noLnk" style="width:100px;height:40px;">
-     <a href="resparpics/pz3.png" onclick="return false">
-       <img src="resparpics/pz3.png" alt="hover zoom" />
-     </a>
-   </div>
-
-      <p>Et voilà! The icons will grow to their full size when the mouse is
-        over them.</p>
-
-      <h2>Alternate icons theme</h2>
-      <p>There is an alternate set of icons
-      at <a href="http://kde-look.org/content/show.php?content=145669">
-          kde-look.org</a>. If you are running KDE desktop, it should
-        be more consistent with the rest of your applications.</p>
-      <p>You do not need to replace the standard Recoll set of icons
-        to use it, just extract it somewhere, and use
-        the <tt>iconsdir</tt> variable in <i>~/.recoll/recoll.conf</i> to
-        point Recoll to it. e.g.:
-        <blockquote><pre>
-            <tt>iconsdir = /path/to/my/icons</tt>
-        </pre></blockquote>
-      </p>
-
-      <h2>Result list paragraph format samples (for all versions)</h2>
-
-      <p>Here follow some sample formats. Most of them were contributed by
-        kind users, and I'll be happy to show their names if they so
-        wish (abstaining by default).</p>
-
-      <h3>Recoll 1.15 default</h3>
-<pre>
-
-<!--
-<table>
-  <tr>
-   <td><img src='%I'></td>
-   <td>%R %S %L&nbsp;&nbsp;<b>%T</b><br>
-       %M&nbsp;%D&nbsp;&nbsp;&nbsp;<i>%U</i><br>
-       %A %K
-    </td>
-  </tr>
-</table>
-->
-&lt;table&gt;
-  &lt;tr&gt;
-   &lt;td&gt;&lt;img src='%I'&gt;&lt;/td&gt;
-   &lt;td&gt;%R %S %L&amp;nbsp;&amp;nbsp;&lt;b&gt;%T&lt;/b&gt;&lt;br&gt;
-     %M&amp;nbsp;%D&amp;nbsp;&amp;nbsp;&amp;nbsp;&lt;i&gt;%U&lt;/i&gt;&lt;br&gt;
-     %A %K
-    &lt;/td&gt;
-  &lt;/tr&gt;
-&lt;/table&gt;
-</pre>
-
-<br clear="all">
-<img src="resparpics/default.png"/>
-
-<h3>Alternating bands, bigger previews, and custom paragraph
-typesetting</h3>
-
-      <p>Paul, the author, gives the following description for his
-      result list formatting:
-        <blockquote>
-          It uses the "Alternating Results Background" from that page,
-          plus my own layout which incorporates a larger view of image
-          files. The 'large image' is scaled down from the actual
-          image, rather than a scaled up version of the thumbnail.
-        </blockquote>
-      </p> 
-
-      <p>The header fragment has the javascript for
-        alternating backgrounds, and the CSS code:</p>
-
-<pre>
-&lt;!-- Custom Header --&gt;
-&lt;script type=&quot;text/javascript&quot;&gt;
-  function altRows() {
-      var rows = document.getElementsByClassName(&quot;rclresult&quot;);
-      for (i = 0; i &lt; rows.length; i++) {
-          if (i % 2 == 0) {
-              rows[i].style.backgroundColor = &quot;#f0f0f0&quot;;
-          }
-      }
-  }
-  window.onload = function() {
-      altRows();
-  }
-&lt;/script&gt;
-
-&lt;style type=&quot;text/css&quot;&gt;
-.thumbnail { 
-   display:block;
-   position:relative;
-   padding: 4px;
-   width: auto; /* set width of thumbnail image in 'paragraph' code - not here */
-   border:none;
-   z-index:0;
-   }
-.thumbnail:hover {
-   border:none;
-   background-color: transparent;
-   z-index: 50;
-   }
-.thumbnail span {
-   position: absolute;
-   left: -9999px;
-   visibility: hidden;
-   }
-.thumbnail span img {
-   max-width:256px;  /* set 'large image' max width/height - advise keeping these  */
-   max-height:256px; /* the same to avoid inadvertently changing the aspect ratio  */
-   width:auto;   /* leave set to auto */
-   height:auto;  /* leave set to auto */
-   background-color: gray;
-   padding: 1px;
-   border: 1px solid black;
-   }
-.thumbnail:hover span {
-   visibility: visible;
-   top: 4px;   /* top/left positions 'large image' relative to top left */
-   left: 88px; /* of parent thumbnail (plus padding)                    */
-   }
-&lt;/style&gt;
-&lt;!-- End of Custom Header --&gt;
-</pre>
-
-      <p>And the paragraph format:</p>
-
-<pre>
-&lt;!-- Custom Paragraph --&gt;
-&lt;table&gt;
-  &lt;tr&gt;
-    &lt;td&gt;
-      &lt;a class=&quot;thumbnail&quot; href=&quot;#&quot;&gt;
-        &lt;img src=&quot;%I&quot; width=&quot;64px&quot; height=&quot;auto&quot;&gt;   &lt;!-- set width of thumbnail --&gt;
-        &lt;span&gt;
-          &lt;img src=&quot;%U&quot;&gt;
-        &lt;/span&gt;
-      &lt;/a&gt;
-    &lt;td&gt;
-      &lt;table&gt;
-        &lt;tr&gt;
-          &lt;td&gt;
-            &lt;div&gt;
-              &lt;b&gt;%T&lt;/b&gt;&lt;/br&gt;
-              %L&lt;/br&gt;
-              &lt;p&gt;&lt;font color=&quot;grey&quot;&gt;%A &lt;/font&gt;&lt;font color=&quot;#CD6688&quot;&gt;&lt;i&gt;%K&lt;/i&gt;&lt;/font&gt;&lt;/p&gt;
-              &lt;font color=&quot;green&quot;&gt;&lt;font size=1&gt;
-              %U&lt;/br&gt;
-              %R — %S—%D — %M
-              &lt;/font&gt;&lt;/font&gt;&lt;/br&gt;
-            &lt;/div&gt;
-          &lt;/td&gt;
-        &lt;/tr&gt;
-      &lt;/table&gt;
-    &lt;/td&gt;
-  &lt;/tr&gt;
-&lt;/table&gt;
-&lt;!-- End Custom Paragraph --&gt;
-</pre>
-
-      <p>Result:</p>
-<br clear="all">
-<img src="resparpics/pip.png"/>
-      
-<h3>A simpler format, suggested in Bitbucket issue #69</h3>
-
-<pre>
-<!--
-<img src="%I" align="left">%R %L&nbsp;&nbsp;<b>%T</b><br>
-&nbsp;&nbsp;<i><font color="#808080">%U</font></i>&nbsp;%i<br>
-%A %K
-->
-&lt;img src=&quot;%I&quot; align=&quot;left&quot;&gt;%R %L&amp;nbsp;&amp;nbsp;&lt;b&gt;%T&lt;/b&gt;&lt;br&gt;
-&amp;nbsp;&amp;nbsp;&lt;i&gt;&lt;font color=&quot;#808080&quot;&gt;%U&lt;/font&gt;&lt;/i&gt;&amp;nbsp;%i&lt;br&gt;
-%A %K
-</pre>
-<br clear="all">
-<img src="resparpics/issue73.png"/>
-
-
-<h3>Simple+table</h3>
-
-<p>Same format, but using a table to avoid text flowing into the icon
-area.</p>
-
-<pre>
-<!--
-<table>
-  <tr>
-    <td><img src="%I" align="left"></td>
-    <td>%R %L&nbsp;&nbsp;<b>%T</b><br>
-            &nbsp;&nbsp;<i><font color="#808080">%U</font></i>&nbsp;%i<br>
-            %A %K
-    </td>
-  </tr>
-</table>
-->
-&lt;table&gt;
-  &lt;tr&gt;
-    &lt;td&gt;&lt;img src=&quot;%I&quot; align=&quot;left&quot;&gt;&lt;/td&gt;
-    &lt;td&gt;%R %L&amp;nbsp;&amp;nbsp;&lt;b&gt;%T&lt;/b&gt;&lt;br&gt;
-            &amp;nbsp;&amp;nbsp;&lt;i&gt;&lt;font color=&quot;#808080&quot;&gt;%U&lt;/font&gt;&lt;/i&gt;&amp;nbsp;%i&lt;br&gt;
-            %A %K
-    &lt;/td&gt;
-  &lt;/tr&gt;
-&lt;/table&gt;
-
-</pre>
-
-<br clear="all">
-<img src="resparpics/issue73+table.png"/>
-
-
-
-<h3>Using a small font to make the size/date details less obstrusive</h3>
-
-<pre>
-<!--
-<table>
-  <tr>
-    <td><img src="%I" align="left"></td>
-    <td><table bgcolor="#bababa">
-        <tr><td><div>
-              <font face="Tahoma, sans-serif"><u><b><a href="P%N">%T</a></b></u><br>
-                <font color=#008000>%L</font><br>
-                <font color=#510101>%A %K</font><br>
-                <font color=#0100FF>%U</font>
-                <p align="right"><font size=1><font color=#000000>%S
-                      &nbsp;&nbsp;&nbsp;-&nbsp;&nbsp;&nbsp; %D
-                      &nbsp;&nbsp;&nbsp;-&nbsp;&nbsp;&nbsp; %M</font></p>
-        </div></td></tr>
-    </table></td>
-  </tr>
-</table>
-->
-&lt;table&gt;
-  &lt;tr&gt;
-    &lt;td&gt;&lt;img src=&quot;%I&quot; align=&quot;left&quot;&gt;&lt;/td&gt;
-    &lt;td&gt;&lt;table bgcolor=&quot;#bababa&quot;&gt;
-        &lt;tr&gt;&lt;td&gt;&lt;div&gt;
-              &lt;font face=&quot;Tahoma, sans-serif&quot;&gt;&lt;u&gt;&lt;b&gt;&lt;a href=&quot;P%N&quot;&gt;%T&lt;/a&gt;&lt;/b&gt;&lt;/u&gt;&lt;br&gt;
-                &lt;font color=#008000&gt;%L&lt;/font&gt;&lt;br&gt;
-                &lt;font color=#510101&gt;%A %K&lt;/font&gt;&lt;br&gt;
-                &lt;font color=#0100FF&gt;%U&lt;/font&gt;
-                &lt;p align=&quot;right&quot;&gt;&lt;font size=1&gt;&lt;font color=#000000&gt;%S
-                      &amp;nbsp;&amp;nbsp;&amp;nbsp;-&amp;nbsp;&amp;nbsp;&amp;nbsp; %D
-                      &amp;nbsp;&amp;nbsp;&amp;nbsp;-&amp;nbsp;&amp;nbsp;&amp;nbsp; %M&lt;/font&gt;&lt;/p&gt;
-        &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
-    &lt;/table&gt;&lt;/td&gt;
-  &lt;/tr&gt;
-&lt;/table&gt;
-</pre>
-
-<br clear="all">
-<img src="resparpics/detailSmallGreyTable.png"/>
-
-
-<h3>A very structured table</h3>
-
-<pre>
-<!--
-<table border="1" bgcolor="lightyellow">
-  <tr>
-    <td rowspan="4" width="40px" align="center" valign="center">
-      <img src="%I" width="32" height="32">
-      <p><b>%R</b></p>
-      <p><a href="P%N">Aperçu</a></p>
-    </td>
-    <th colspan="3" bgcolor="lightgrey">%T</th>
-  </tr>
-  <tr>
-    <td align="center">%M</td>
-    <td align="center">%D</td>
-    <td align="center">%S</td>
-  </tr>
-  <tr>
-    <td colspan="3"><a href="E%N">%U</a></td>
-  </tr>
-  <tr>
-    <td colspan="3">%A</td>
-  </tr>
-</table>
-->
-&lt;table border=&quot;1&quot; bgcolor=&quot;lightyellow&quot;&gt;
-  &lt;tr&gt;
-    &lt;td rowspan=&quot;4&quot; width=&quot;40px&quot; align=&quot;center&quot; valign=&quot;center&quot;&gt;
-      &lt;img src=&quot;%I&quot; width=&quot;32&quot; height=&quot;32&quot;&gt;
-      &lt;p&gt;&lt;b&gt;%R&lt;/b&gt;&lt;/p&gt;
-      &lt;p&gt;&lt;a href=&quot;P%N&quot;&gt;Aper&Atilde;&sect;u&lt;/a&gt;&lt;/p&gt;
-    &lt;/td&gt;
-    &lt;th colspan=&quot;3&quot; bgcolor=&quot;lightgrey&quot;&gt;%T&lt;/th&gt;
-  &lt;/tr&gt;
-  &lt;tr&gt;
-    &lt;td align=&quot;center&quot;&gt;%M&lt;/td&gt;
-    &lt;td align=&quot;center&quot;&gt;%D&lt;/td&gt;
-    &lt;td align=&quot;center&quot;&gt;%S&lt;/td&gt;
-  &lt;/tr&gt;
-  &lt;tr&gt;
-    &lt;td colspan=&quot;3&quot;&gt;&lt;a href=&quot;E%N&quot;&gt;%U&lt;/a&gt;&lt;/td&gt;
-  &lt;/tr&gt;
-  &lt;tr&gt;
-    &lt;td colspan=&quot;3&quot;&gt;%A&lt;/td&gt;
-  &lt;/tr&gt;
-&lt;/table&gt;
-</pre>
-<br clear="all">
-<img src="resparpics/structuredTable.png"/>
-
-
-<h3>Web-like from the user manual</h3>
-
-<pre>
-<!--
-<u><b><a href="P%N">%T</a></b></u><br>
-%U<br>
-%A <font color=#008000>%S</font> - <a href="E%N">Edit</a>
-->
-&lt;u&gt;&lt;b&gt;&lt;a href=&quot;P%N&quot;&gt;%T&lt;/a&gt;&lt;/b&gt;&lt;/u&gt;&lt;br&gt;
-%U&lt;br&gt;
-%A &lt;font color=#008000&gt;%S&lt;/font&gt; - &lt;a href=&quot;E%N&quot;&gt;Edit&lt;/a&gt;
-</pre>
-<br clear="all">
-<img src="resparpics/weblike.png"/>
-
-
-<h3>Clean-Looking from the user manual</h3>
-
-<pre>
-<!--
-<table>
-  <tr><td><img src="%I" align="left"></td>
-    <td>%L <font color="#900000">%R</font> <b>%T</b><br>
-      %S <font color="#808080"><i>%U</i></font>
-      <table bgcolor="#e0e0e0">
-        <tr><td><div>%A</div> %K </td></tr>
-    </table></td>
-</table>
-->
-&lt;table&gt;
-  &lt;tr&gt;&lt;td&gt;&lt;img src=&quot;%I&quot; align=&quot;left&quot;&gt;&lt;/td&gt;
-    &lt;td&gt;%L &lt;font color=&quot;#900000&quot;&gt;%R&lt;/font&gt; &lt;b&gt;%T&lt;/b&gt;&lt;br&gt;
-      %S &lt;font color=&quot;#808080&quot;&gt;&lt;i&gt;%U&lt;/i&gt;&lt;/font&gt;
-      &lt;table bgcolor=&quot;#e0e0e0&quot;&gt;
-        &lt;tr&gt;&lt;td&gt;&lt;div&gt;%A&lt;/div&gt; %K &lt;/td&gt;&lt;/tr&gt;
-    &lt;/table&gt;&lt;/td&gt;
-&lt;/table&gt;
-
-</pre>
-<br clear="all">
-<img src="resparpics/clean.png"/>
-
-
-
-<h3>Another clean and nice one, using both a bit of header code and a
-  custom paragraph format</h3>
-
-<p>This one also uses the custom icons set from
-<a href="http://kde-look.org/content/show.php?content=145669">
-  this kde-look page</a>.</p>
-
-<p>The header code:</p>
-
-<pre>
-&lt;style type="text/css"&gt;
-body {
-    color: rgb(0, 0, 0);
-    background-color: rgb(224, 224, 224);
-}
-&lt;/style&gt;
-</pre>
-
-<p>The paragraph code:</p>
-
-<pre>
-&lt;table style="background-color: white; width: 950px;"
-border-style="none" border-color:="" border="0"&gt;
-&lt;tbody&gt;
-&lt;tr&gt;
-&lt;td rowspan="4"
-style="width: 68px; text-align: center; background-color: rgb(238, 238, 238);"&gt;
-&lt;img src="%I" height="32" width="32"&gt;
-&lt;p style="font-family: sans-serif;"&gt;&lt;b&gt;%R&lt;/b&gt;&lt;/p&gt;
-&lt;p style="font-family: sans-serif; color: rgb(0, 153, 0);"&gt;&lt;br&gt;
-&lt;/p&gt;
-&lt;/td&gt;
-&lt;td style="vertical-align: top;"&gt;&lt;br&gt;
-&lt;/td&gt;
-&lt;th
-style="font-family: sans-serif; background-color: white; text-align: left;"
-colspan="3" bgcolor="lightgrey"&gt;%T&lt;/th&gt;
-&lt;/tr&gt;
-&lt;tr&gt;
-&lt;td style="vertical-align: top; width: 11px;"&gt;&lt;br&gt;
-&lt;/td&gt;
-&lt;td
-style="text-align: center; font-family: sans-serif; background-color: rgb(249, 249, 249);"&gt;%M&lt;/td&gt;
-&lt;td
-style="text-align: center; font-family: sans-serif; background-color: rgb(249, 249, 249);"&gt;%D&lt;/td&gt;
-&lt;td
-style="font-family: sans-serif; text-align: right; background-color: rgb(249, 249, 249);"&gt;%S&lt;/td&gt;
-&lt;/tr&gt;
-&lt;tr style="font-family: sans-serif; color: rgb(0, 153, 0);"&gt;
-&lt;td style="vertical-align: top;"&gt;&lt;br&gt;
-&lt;/td&gt;
-&lt;td colspan="3"&gt;&lt;a href="E%N"&gt;%U&lt;/a&gt;&lt;/td&gt;
-&lt;/tr&gt;
-&lt;tr style="font-family: sans-serif;" 8=""&gt;
-&lt;td style="vertical-align: top;"&gt;&lt;br&gt;
-&lt;/td&gt;
-&lt;td colspan="3"&gt;%A&lt;/td&gt;
-&lt;/tr&gt;
-&lt;/tbody&gt;
-&lt;/table&gt;
-&lt;br&gt;
-&lt;br&gt;
-</pre>
-
-<br clear="all">
-<img src="resparpics/christopher.png"/>
-
-    </div>
-  </body>
-</html>
-
--- a/website/devel.html
+++ b/website/devel.html
@ -1,109 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-
-<html>
-  <head>
-    <title>RECOLL: a personal text search system for
-    Unix/Linux</title>
-    <meta name="generator" content="HTML Tidy, see www.w3.org">
-    <meta name="Author" content="Jean-Francois Dockes">
-    <meta name="Description" content=
-    "recoll is a simple full-text search system for unix and linux based on the powerful and mature xapian engine">
-    <meta name="Keywords" content=
-      "full text search,fulltext,desktop search,unix,linux,solaris,open source,free">
-    <meta http-equiv="Content-language" content="en">
-    <meta http-equiv="content-type" content=
-    "text/html; charset=iso-8859-1">
-    <meta name="robots" content="All,Index,Follow">
-    <link type="text/css" rel="stylesheet" href="styles/style.css">
-  </head>
-
-  <body>
-    <div class="rightlinks">
-      <ul>
-	<li><a href="index.html">Home</a></li>
-	<li><a href="pics/index.html">Screenshots</a></li>
-	<li><a href="download.html">Downloads</a></li>
-	<li><a href="doc.html">Documentation</a></li>
-      </ul>
-    </div>
-
-    <div class="content">
-
-      <h1>Contributing to Recoll developement and availability</h1>
-      
-      <p>If you are not a software developer, or have no time
-      available for testing the application of thinking about how it
-      could be improved, there is always the possibility of
-      contributing a donation, which will be much appreciated !<br/>
-      <a href="/donations/index.html">
-        <img src="/donations/btn_donate_LG.gif" /></a>
-      </p>
-        
-      <p>If you wish to become involved in the development of <span
-	  class="application">Recoll</span>, please send me an <a
-	  href="mailto:jfd@recoll.org">email</a>.</p> 
-
-      <h1><a name="translation">Translation</a></h1>
-
-      <p>More translations is good ! If you are a non-english speaker
-	(and understand english, which can probably be assumed, you
-	being reading this), you can take a little time to translate
-	the GUI messages file.</p>
-      <p>The newest versions of the message files follow can be found
-        in <a href="translations">this directory</a>. There
-	is an empty one (the xx one), the others are partially
-	translated, just needing an update for the new messages.<p>
-      <p>Updating the files can easily be done with
-	the <span class="application">Qt Linguist</span>. Contact me
-	for more directions if needed.</p>
-
-      <h1><a name="development">Development</a></h1>
-
-      <p>The Recoll source repository is on
-	<a href="https://opensourceprojects.eu/p/recoll1/code/">opensourceprojects.eu</a>.
-        Use git, to clone it and hack away.</p>
-      
-      <p>Apart from possible tickets in the
-        <a href="https://opensourceprojects.eu/p/recoll1/tickets/">tracking
-        system</a>, these are the general areas where help or
-	ideas are particularly welcome:</p>
-      <ul>
-	<li>A better GUI design (both the ergonomy and the
-	appearance). Adding missing shortcuts or fixing the menu
-	accelerators for exemple is easy and useful.</li>
-
-        <li>More support for the more advanced <span class=
-	    "application">Xapian</span> concepts like relevance
-	  feedback.</li>
-
-        <li>More filters for less common or less obviously
-	  useful file types.</li>
-
-        <li>Integration with the <span class="application">KDE</span> 
-	  desktop.</li>
-
-	<li>Integration with some mail user agent. We need a way to
-	  jump from a message preview to the message in thread context
-	  inside the MUA.</li>
-
-	<li>Etc. :)</li>
-      </ul>
-
-      <h1><a name="problemreport">Problem reporting</a></h1>
-
-      <p>Once in a while it will happen that a Recoll program will
-	crash (either the "recoll" graphical interface or the
-	"recollindex" command line indexing command).</p>
-
-      <p>Reporting crashes is very useful. It can help others, and it
-	can get your own problem to be solved.</p>
-
-      <p>You will find help and information about producing a useful
-      problem report on this 
-        <a href="faqsandhowtos/ProblemSolvingData.html">
-          Howto page</a>.</p>
-
-    </div>
-  </body>
-</html>
-
--- a/website/doc.html
+++ b/website/doc.html
@ -1,71 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-
-<html>
-  <head>
-    <title>Recoll documentation</title>
-
-    <meta name="generator" content="HTML Tidy, see www.w3.org">
-    <meta name="Author" content="Jean-Francois Dockes">
-    <meta name="Description" content=
-    "recoll is a simple full-text search system for unix and linux
-    based on the powerful and mature xapian engine">
-    <meta name="Keywords" content=
-    "full text search, desktop search, unix, linux">
-    <meta http-equiv="Content-language" content="en">
-    <meta http-equiv="content-type" content="text/html; charset=utf-8">
-    <meta name="robots" content="All,Index,Follow">
-
-    <link type="text/css" rel="stylesheet" href="styles/style.css">
-  </head>
-
-  <body>
-    
-    <div class="rightlinks">
-      <ul>
-	<li><a href="index.html">Home</a></li>
-	<li><a href="download.html">Downloads</a></li>
-	<li>Documentation</li>
-      </ul>
-    </div>
-    
-    <div class="content">
-
-      <h1>Recoll user manual</h1>
-      
-      <ul>
-      <li><a href="usermanual/webhelp/docs/index.html">English, HTML, many
-          pages, nicer format (needs javascript).</a></li>
-      <li><a href="usermanual/usermanual.html">English, HTML, one page</a></li>
-      <li><a href="http://stupidbeauty.com/Blog/2012/03/recoll%E7%94%A8%E6%88%B6%E6%89%8B%E5%86%8A%E7%BF%BB%E8%AD%AF%EF%BC%8Crecoll-user-manual-2/">
-          中文，HTML</a></li>
-      <li><a href="usermanual/recoll_user_manual.pdf">English, PDF</a></li>
-      <li><a href="http://mcz.altervista.org/Pagine/usermanual-italian.html">
-	  Italian (rather old)</a></li>
-      </ul>
-
-      <p><br></p>
-
-      <h1>Faqs and Howtos</h1>
-
-      <p>You will find a number of useful tips for common
-        issues and extensions on the 
-	  <a href="faqsandhowtos/index.html">
-	    Faqs and Howtos section</a>.
-
-      <h1>Other documentation</h1>
-
-      <ul>
-        <li><a href="recoll_XMP/index.html">Indexing PDF
-            XMP-metadata</a>: a nice exemple of customizing a Recoll
-            configuration and the PDF filter to use additional
-            metadata, by Jeffrey Dick.</li>
-      <li><a href="perfs.html">Index size and indexing performance
-	  data.</a></li>
-      <li><a href="custom.html">Result list format samples.</a></li>
-      <li><a href="idxthreads/threadingRecoll.html">Lessons learned
-          while modifying Recoll indexing to be multithreaded</a>.</li>
-      </ul>
-
-    </div>
-  </body>
-</html>
--- a/website/download.html
+++ b/website/download.html
@ -1,462 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html>
-  <head>
-    <title>Recoll download</title>
-    <meta name="Author" content="Jean-Francois Dockes">
-    <meta name="Description"
-          content="recoll is a simple full-text search system for unix and linux     based on the powerful and mature xapian engine">
-    <meta name="Keywords" content="full text search, desktop search, unix, linux">
-    <meta http-equiv="Content-language" content="en">
-    <meta http-equiv="content-type" content="text/html; charset=utf-8">
-    <meta name="robots" content="All,Index,Follow">
-    <link type="text/css" rel="stylesheet" href="styles/style.css">
-
-    <script type="text/javascript">
-      function showdiv(viewid)
-      {
-          var ids = ["general", "bugs", "source", "packages",
-              "windows", "ports", "filters", "translations"];
-          for (var i = 0; i < ids.length; i++) {
-              document.getElementById(ids[i]).style.display = "none";
-          }
-          document.getElementById(viewid).style.display = "block";
-      }
-                          </script>
-
-  </head>
-
-  <body>
-
-    <div class="rightlinks">
-      <ul>
-        <li><a href="index.html.en">Home</a></li>
-        <li><b>Downloads</b></li>
-        <li><a href="doc.html">Documentation</a></li>
-        <li><a href="usermanual/usermanual.html#RCL.INSTALL">Installation</a></li>
-        <li><a href="support.html">Support</a></li>
-      </ul>
-    </div>
-
-    <div class="content">
-      <h1>Recoll downloads</h1>
-
-      <div class="intrapage">
-
-        <table width="100%">
-          <tbody>
-            <tr>
-              <td><a href="#general" onmouseover="showdiv('general')">
-	          General</a></td>
-              <td><a href="#source" onmouseover="showdiv('source')">
-	          Source</a></td>
-              <td><a href="#packages" onmouseover="showdiv('packages')">
-	          Linux Packages (.rpm and .deb)</a></td>
-              <td><a href="#windows" onmouseover="showdiv('windows')">
-	          Windows</a></td>
-              <td><a href="BUGS.html" onmouseover="showdiv('bugs')">
-	          Known bugs</a></td>
-              <td><a href="#ports" onmouseover="showdiv('ports')">
-	          Mac ports</a></td>
-              <td><a href="filters/filters.html" onmouseover="showdiv('filters')">
-	          Updated Filters</a></td>
-              <td><a href="#translations" onmouseover="showdiv('translations')">
-	          Translations</a></td>
-            </tr>
-          </tbody>
-        </table>
-      </div>
-
-      <div id="general">
-        <h2><a name="general">General information</a></h2>
-
-        <p>The current version is 1.23.2. <a href="release-1.23.html">Release
-            notes</a>.</p>
-
-        <p>Recoll <a href="usermanual/usermanual.html#RCL.INSTALL">Installation
-            / building manual</a>.</p>
-
-        <p>The indexing filters used for some document types may need external
-          packages not installed on your system by default, and not installed
-          automatically with Recoll: <a href="features.html#doctypes">take a
-            look at the list</a> and decide what you need to install.</p>
-
-        <p>The Recoll term explorer tool in phonetic mode (marginally useful and
-          optional) uses the <b>aspell</b> package, version 0.60
-          (utf-8 support) or newer.</p>
-
-        <p>If you find problems with this page, the package or its
-          installation, <em>please</em> <a href="mailto:jfd@recoll.org">report
-          them</a>.</p> 
-
-        <h4>What do the release numbers mean?</h4>
-
-        <p>The Recoll releases are numbered X.Y.Z. The X would only
-          change for really major modifications like a big change in
-          the index format, and possibly won't ever reach 2.</p>
-
-        <p>Y is for functional modifications. These may bring bugs, so
-          if you don't need the new features, you may want to wait a
-          little, and especially skip the first release (X.Y.0), at
-          least for a few weeks.</p>
-
-        <p>Z changes for <a href="BUGS.html">bug fixes</a> only, and
-          moving from X.Y.Z to X.Y.Z+u should in general involve
-          little risk of regression. But, <em>any</em> change can
-          bring problems, if you are not affected by the corrected
-          bugs (check the <a href="release-1.21.html">release
-          file</a>), there is probably no necessity to upgrade
-          anyway.</p>
-      </div>
-
-      <div id="bugs">
-        <h2><a name="bugs">Known bugs</a></h2>
-        <p>There is a <a href="BUGS.html">history of known bugs</a>, sorted
-          by fix release. Also see 
-          the <a href="https://opensourceprojects.eu/p/recoll1/tickets/new/">
-            Recoll issue tracker</a>.
-        </p>
-      </div>
-
-      <div id="source">
-        <h2><a name="source">Source</a></h2>
-
-        <h3>Current release distribution: 1.23.2:</h3>
-        <!-- Attention: source packages must remain here, not in a
-        subdirectory, because of all the places they're referred from
-        (package watches) -->
-
-        <p><a href="recoll-1.23.2.tar.gz">recoll-1.23.2.tar.gz</a>.</p>
-        <p><a href="release-1.23.html">Release notes</a>.</p>
-
-        <h3>Previous release: 1.22.4:</h3>
-        <p><a href="recoll-1.22.4.tar.gz">recoll-1.22.4.tar.gz</a>.</p>
-        <p><a href="release-1.22.html">Release notes</a>.</p>
-
-
-        <!--
-        <h3>Snapshot</h3>
-        <p>I sometimes release a source tarfile when I consider that the
-          current development version is stable enough. The current
-          snapshot contains commits up to 2240 (see 
-          <a href="https://opensourceprojects.eu/p/recoll1/code/commit_browser">
-            the changelog</a>, and a synthetic abstract in the
-          current <a href="release-1.16.html">1.16 release notes</a>).
-        <p><a href="betarecoll-2240.tar.gz">betarecoll-2240.tar.gz</a>.</p>
-        -->
-
-        <h3>Ubuntu Unity Lens and Scope</h3>
-
-        <p>You will probably get these from the <a href="#ubuntu">PPA</a>, but
-          here are the source files. These are not included in the main tar file
-          any more. For any Recoll version after 1.19 (choose on the
-          Ubuntu version, not the Recoll one):
-
-          <blockquote>
-            <a href="recoll-lens-1.19.10.3543.tar.gz">
-              recoll-lens-1.19.10.3543.tar.gz</a> (Ubuntu up to 13.04
-              Raring)<br> 
-
-            <a href="unity-scope-recoll-1.20.2.4.tar.gz">
-              unity-scope-recoll-1.20.2.4.tar.gz</a> (Ubuntu 13.10 and
-              later).<br> 
-
-          </blockquote>
-
-          For Recoll 1.18: 
-          <a href="recoll-lens-1.18.1.2997.tar.gz">
-            recoll-lens-1.18.1.2997.tar.gz</a><br>
-          For Recoll 1.17: 
-          <a href="recoll-lens-1.17.2.2697.tar.gz">
-            recoll-lens-1.17.2.2697.tar.gz</a>
-
-        </p>
-
-        <h3>Prerequisites for building from source:</h3>
-        <ul>
-          <li>C++ compiler. Be aware that its absence sometimes
-          manifests itself by quite cryptic messages.</li>
-
-          <li><p>Xapian core development libraries. Most Linux
-              distributions carry them in their package repository. Or
-              you will find source and binary packages on
-              the <a href="http://www.xapian.org/download.php">Xapian
-              download page</a>.
-              <br>
-            <p><em>Note on building Xapian for older CPUs:</em> The build
-              configurations for Xapian releases 1.0.21 and 1.2.1 or
-              newer enable the use of SSE2 floating point
-              instructions. These instructions are not available in
-              CPUs older than Intel Pentium 4 or AMD Athlon 64. When
-              building for such a CPU, you need to add the
-              --disable-sse flag to the Xapian library configure
-              command. If this is not done, the problem signals itself
-              by "Illegal instruction" crashes (SIGILL) in recollindex
-              and recoll. </p>
-          </li>
-          <li>Qt development files: Qt 4.4, 5.3 or newer (5.2 not ok).</li>
-          <li>Qt WebKit development files: these are quite often
-            distributed apart from the main Qt libraries. It is
-            possible to configure Recoll not to use Qt WebKit (see
-            configure --help).</li>
-          <li>zlib development files.</li>
-          <li>X11 development files.</li>
-          <li>Python development package: you can avoid needing this
-            by configuring with --disable-python-module.</li>
-        </ul>
-
-        <h3>Source repository:</h3>
-
-        <p>The <span class="application">Recoll</span> source
-          repository is hosted
-          on <a href="https://opensourceprojects.eu/p/recoll1/code/">
-            opensourceprojects.eu</a>. The trunk is usually a bit on the
-          bleeding edge, but there is always a maintenance branch for
-          the current production version.</p>
-
-        <h3>Instructions for building</h3>
-
-        <p>Normally, it's just:</p>
-        <div class="code">./configure; make; make install</div>
-        <p>If a bit more detail is needed, 
-          <a href="http://www.recoll.org/usermanual/usermanual.html#RCL.INSTALL.BUILDING">
-            there is some in the manual</a>.
-
-      </div>
-
-      <div id="packages">
-        <h2><a name="packages">Packages</a></h2>
-
-        <p>Packages or ports for Recoll are available in the standard
-          repositories for many distributions.</p>
-
-        <p>However they are often a bit older or built with older
-          Xapian releases. Here follow some pointers to find newer
-          packages for some distributions. In most cases, you will
-          just need to use an alternate repository.</p>
-
-        <h3><a name="debian">Debian</a></h3>
-
-        <p>The Debian Recoll packages are not always up to date in
-          stable distributions. Debian Wheezy and Jessie have Recoll
-          1.17.3. which is ancient (it was an accident for
-          Jessie). Stretch has 1.22.4 which is largely ok.</p>
-        
-        <p>I am maintaining a repository for newer versions of the packages.
-          The repository currently has recoll 1.23.x for Jessie and
-          Stretch, Intel 32 and 64 bits, and armhf, and slightly older
-          1.22 packages for Wheezy. There is a separate
-          repository for Raspbian Jessie, which is <em>not</em>
-          compatible with vanilla Debian.</p>
-
-        <p>To add the Debian or Raspbian repository to your sources:</p>
-
-        <ul>
-
-          <li>See <a href="../pages/signatures.html">here</a> for the
-            keys used to signed the repository. You will need to import
-            them to suppress <b>apt-get</b> messages about unverified
-            signatures (the method is described on the
-            <a href="../pages/signatures.html">same page</a>).</li>
-          
-          <li>Create and edit <span class="filename">
-              /etc/apt/sources.list.d/recoll.list</span>
-            and add the following lines:<br>
-            for Debian wheezy (debian 7.x, recoll 1.22.3):<br>
-            <div class="code">
-deb http://www.lesbonscomptes.com/recoll/debian/ wheezy main
-deb-src http://www.lesbonscomptes.com/recoll/debian/ wheezy main
-            </div>
-            for Debian jessie (debian 8.x):<br>
-            <div class="code">
-deb http://www.lesbonscomptes.com/recoll/debian/ jessie main
-deb-src http://www.lesbonscomptes.com/recoll/debian/ jessie main
-            </div>
-            for Debian stretch (debian 9.x):<br>
-            <div class="code">
-deb http://www.lesbonscomptes.com/recoll/debian/ stretch main
-deb-src http://www.lesbonscomptes.com/recoll/debian/ stretch main
-            </div>
-            for Raspbian jessie (raspbian 8.x):<br>
-            <div class="code">
-deb http://www.lesbonscomptes.com/recoll/raspbian/ jessie main
-deb-src http://www.lesbonscomptes.com/recoll/raspbian/ jessie main
-            </div>
-          <li>Then:
-            <div class="code">
-sudo apt-get update
-sudo apt-get install recoll python-recoll python3-recoll
-            </div>
-          </li>
-        </ul>
-
-        <p>If you prefer to manually install the packages, they are here:
-          <a href="debian/pool/main/r/recoll/">
-            debian/pool/main/r/recoll/</a><br/>
-        </p>
-
-        <h3><a name="ubuntu">Ubuntu</a></h3>
-
-        <p>There are Personal Package Archives on launchpad.net for
-          <a href="https://launchpad.net/~recoll-backports/+archive/recoll-1.15-on">
-            Recoll, kio-recoll and recoll-lens</a>. These were built
-          from the latest versions, for the current set of supported Ubuntu
-          versions. Procedure:</p>
-        <div class="code">
-sudo add-apt-repository ppa:recoll-backports/recoll-1.15-on
-sudo apt-get update
-sudo apt-get install recoll
-        </div>
-
-        <p>The packages in the PPA now have a separate package for the Python
-          extension, like the standard ones, so there should be no more
-          conflict issues while switching from the PPA to the normal
-          repositories and back.</p>
-
-        <h3><a name="mint">Linux Mint</a></h3>
-
-        <p>The Ubuntu PPA works perfectly for Mint 13 (and probably other releases
-          too). Just follow the instructions for Ubuntu.</p>
-
-        <h3>RPMS</h3>
-
-        <p>You'll need to install the Xapian, Qt, Qt-Webkit and zlib development
-          packages if you want use the source rpms.</p>
-
-        <h3>Fedora</h3>
-
-        <p>Recoll is present in the standard Fedora package repositories starting from
-          F-12. Recoll packages in Fedora are usually fairly up to
-          date. Please get in touch if you have a need for a Recoll package
-          for Fedora.</p>
-
-        <h3>CentOS 7.1</h3>
-
-        <p><a href="https://fedoraproject.org/wiki/EPEL">EPEL</a> now
-          has a package for Recoll. It is in currently in
-          the <a href="https://fedoraproject.org/wiki/EPEL/testing">testing
-          section</a>, but it should hopefully move on 
-          shortly. If you install the test package (which runs just
-          fine as far as I can see), please add feedback to
-          the
-          <a href="https://bodhi.fedoraproject.org/updates/FEDORA-EPEL-2017-ede90eda56">
-            package page</a>.
-
-          <p>If EPEL does not work for you, there are still a few
-            <a href="downloads/centos71">pre-EPEL packages
-            here</a>. They will be deleted shortly, except if someone
-            provides me with a good reason to keep them.  There are
-            only x86_64 binaries, use the source rpm for other
-            archs. As base CentOS does not seem to have the Qt WebKit
-            module, the Recoll build uses QTextBrowser instead of a
-            WebKit QWebView, so no Javascript or advanced CSS in the
-            result list or snippets window for you (the EPEL package
-            uses WebKit, so this is another way it is better).</p>
-
-
-        <h3>OpenSUSE</h3>
-
-        <p>Recoll is in the KDE:Extra repository. You just need to add the
-          repository to your software
-          sources (Yast2-&gt;software-&gt;Software repositories).<br>
-          <a href="http://download.opensuse.org/repositories/KDE:/Extra/">
-            Repository list (supported Suse versions)</a>. 
-          After adding the appropriate repository to your software sources,
-          you will be able to install recoll and kio_recoll from the software
-          management interface. The Xapian dependancy will also be satisfied
-          from the build service repository. Some of the older repositories do
-          not build antiword, just tell the software manager to "break" recoll
-          by installing anyway, and get antiword somewhere else.</p>
-
-
-      </div> <!-- Packages -->
-
-      <div id="windows">
-        <h2><a name="windows">Microsoft Windows Setup Files</a></h2>
-
-        <p>The port of Recoll to Windows is still a bit experimental and
-          lacking things like real-time indexing or spelling
-          suggestions. However it works well enough to be useful. More info
-          and links to the setup
-          files <a href="pages/recoll-windows.html">here</a>.</p>
-      </div> <!-- windows -->
-
-      <div id="ports">
-        <h2><a name="ports">Ports</a></h2>
-
-        <h3>Mac port</h3>
-
-        <p>It seems that Recoll will sometimes find data that Spotlight misses
-          (especially inside pdfs apparently, which is probably more to the credit of
-          poppler than recoll itself).</p>
-
-        <p>Recoll is in MacPorts and really easy to install:</p>
-        <ol>
-          <li><a href="https://trac.macports.org/wiki/InstallingMacPorts">Install
-              MacPorts</a>.</li>
-          <li>Type "sudo port install recoll"</li>
-        </ol>
-
-        <p>Recoll is then available from the command line and as an icon in the usual
-          MacPorts applications place.</p>
-      </div>
-
-      <div id="filters">
-        <h2><a name="filters">Updated filters</a></h2>
-        <p><a href="filters/filters.html">new or updated filters</a>
-          sometimes become available after a release. As a rule, all
-          filters are compatible with all Recoll versions. Any
-          compatibility problem will be explicitely mentionned.</p>
-      </div>
-
-      <div id="translations">
-        <h2><a name="translations">Translations</a></h2>
-
-        <p>Most of the translations for 1.22/23 are incomplete The source
-          translation files are included in the source release. If
-          your language has some english messages left and you want to
-          take a shot at fixing the problem, you can send the results
-          to <a href="mailto:jfd@recoll.org">me</a> and earn my
-          gratefulness (and your less multilingual
-          compatriot's)...</p>
-
-        <p>You can use the <em>.ts</em> file to alter the translations
-          if you wish (use Qt's <em>linguist</em> tool to edit the
-          source file, then <em>lrelease</em> to produce
-          the <em>.qm</em> file.). The <em>.qm</em> file should be copied
-          to <span class="filename">/usr/[local/]share/recoll/translations</span>
-        </p>
-
-        <p><a href="translations/recoll_xx.ts">recoll_xx.ts</a> is a blank
-          Recoll 1.22 message file, handy to work on a new translation. You can
-          also <a href="translations/">list the directory</a> to see all the
-          translation files (same as those in the maintenance source branch on
-          opensourceprojects.eu).</p>
-
-        <h3>Updated 1.22 translations that became available after the
-        release:</h3> 
-
-        <p>Greek translation by  Dimitrios Glentadakis:
-          <a href="translations/recoll_el.ts">recoll_el.ts</a>
-          <a href="translations/recoll_el.qm">recoll_el.qm</a><br/>
-        </p>
-        <p>Dutch translation by Leslie Scheelings:
-          <a href="translations/recoll_nl.ts">recoll_nl.ts</a>
-          <a href="translations/recoll_nl.qm">recoll_nl.qm</a><br/>
-        </p>
-
-        <p>Danish translation by Morten Langlo:
-          <a href="translations/recoll_da.ts">recoll_da.ts</a>
-          <a href="translations/recoll_da.qm">recoll_da.qm</a><br/>
-        </p>
-
-        <p>Note that, if you are running an older release, you may find updated
-          messages by looking inside the appropriate maintenance
-          branch in
-          <a href="https://opensourceprojects.eu/p/recoll1/code/">
-            the source repository</a>.</p>
-
-      </div> <!-- translations -->
-    </div> <!-- content -->
-
-  </body>
-</html>
--- a/website/faqsandhowtos/ElinksWeb.txt
+++ b/website/faqsandhowtos/ElinksWeb.txt
@ -1,35 +0,0 @@
-== Extending the Recoll Firefox visited web page indexing mechanism to other browsers
-
-The *Recoll* _Web Queue_ function allows using WEB browser plug-ins
-originally designed for indexing visited WEB pages with *Beagle* (rip). The
-browser plug-ins works very simply by creating copies of the visited pages
-in a designated directory. Two files are created for each page, one for the
-contents, the other for the metadata. 
-
-When activated, *Recoll* will visit the queue directory and index each HTML
-page and its associated metadata. There is more detail about the mechanism
-on the [[IndexWebHistory|page about the Recoll Web queue]], but mostly, you
-just need to go to the _Indexing Preferences_ in the *recoll* GUI, open the
-_Web history_ panel and check the top button. 
-
-Franck, a *Recoll* and *Elinks* user from New Zealand, designed a method
-and wrote a script to index the *Elinks* WEB history in this fashion.  
-
-The script works by using *wget* to fetch the visited page into the queue
-directory. This means that it would be reusable to index arbitrary WEB
-pages in contexts other than *Elinks* visits. 
-
-Recipee for *Elinks* and Recoll 1.18 and later:
-
-* Retrieve the 
-  link:https://www.recoll.org/files/elinks_recoll.sh[elinks_recoll.sh] shell
-  script and make it executable (`chmod a+x elinks_recoll.sh`).
-* In the Elinks Keyboard shortcut manager (k)/Main, add a shortcut to pass
-  the current URL to an external commande, e.g. _Ctrl-P_.
-* In the Options manager (o) /Document/Uri Passing, add an action named for
-  example _ToIndex_
-* Modify the ToIndex action to execute `/path/to/the/script/elinks_recoll.sh %c`
-* Save, you are done
-
-For Recoll 1.17, the method is analog, but the script is named
-link:https://www.recoll.org/files/elinks_recoll.sh[elinks_beagle.sh].
--- a/website/faqsandhowtos/FilterArch.txt
+++ b/website/faqsandhowtos/FilterArch.txt
@ -1,82 +0,0 @@
-== Recoll input handlers
-
-In the end, Recoll indexes plain UTF-8 text, remembering when it came
-from.
-
-But of course, this is not how the source data looks like.
-The text content of the original documents is encoded in many fashions
-(ie pdf, ms-word, html, etc.), and it can also be stored in quite
-involved ways (inside archives, email attachments ...).
-
-For getting to the data and converting it to plain text, Recoll uses a set
-of modules which it calls input handlers (or filters), which either operate
-on the storage structure (ie: a zip handler), or the storage format (ie a
-pdf to text translator), or both. In addition, there is a tentative notion
-of a higher level storage backend which we will ignore for now (for
-reference there are currently two of those: the file system and the web
-history cache).
-
-The basic task of filters is to take a document as input and produce a
-series of subdocuments as output. The subdocument's format is defined
-either dynamically (as part of the output data), or statically, in the
-filter definition. 
-
-=== Simple filters
-
-These are executed by a the **mh_exec** recoll module. They are the vast
-majority.
-
-These filters are very simple. They are designed to perform a simple task
-with minimal interface, they mostly don't know anything about each other,
-and they don't know much about their context. This makes writing a filter
-quite easy as there is not much to learn about their environment.
-
-Only one output document is produced and the format is fixed. 
-
-In practise the filter, which is most generally a shell-script (but could
-be any executable program), takes a file name on the command line and
-outputs an html or plain text document on standard output, then exits.
-
-For example, the pdf filter takes one pdf file name as input on the command
-line and produces one html document on stdout. The fact that the output is
-html is statically defined in a configuration file. 
-
-For filters which produce plain text, the output character set information
-is in general defined in the configuration file. Else it will be obtained
-from the locale (hoping that it makes sense).
-
-Filters that output html can produce metadata information in the html
-header (ie author etc.). Filters that output plain text can only output
-main text data, no metadata fields. 
-
-Besides the file name, there is one other piece of input information, which
-is in the form of an environment variable, and can be safely ignored:
-+RECOLL_FILTER_FORPREVIEW+. This indicates if the filter is being used
-for previewing or for indexing data. Some filters will elect to suppress
-repetitive parts of the output text when indexing to avoid distorting the
-term statistics. For exemple, the man filter suppresses the section
-headers (NAME, SYNOPSIS...) when indexing.
-
-=== Multiple input filters
-
-These filters are more complex, but still quite easy to write, especially
-if you can use Python, because they can then use a common module which
-manages the communication with the indexer.
-
-Newer Recoll versions have converted many previously 'simple' filters to
-this kind as part of the port to Windows.
-
-These filters are executed by the *mh_execm* Recoll module.
-
-They are persistent (one instance will persist through a whole indexing
-pass), and will index successive multiple input files (the point being to
-avoid startup performance penalty), and possibly multiple documents per
-input file if this makes sense for their input format (ie: zip archive, chm
-help file). 
-
-They use a simple communication protocol over a pipe with the main recoll
-or recollindex process, with file names and a few other parameters being
-sent as input, and decoded data and attributes being sent in return.
-
-The shared Python module is 'filters/rclexecm.py'. You can look at 'rclzip'
-or 'rclaudio' for reasonably straightforward exemples.
--- a/website/faqsandhowtos/FilterRetrofit.txt
+++ b/website/faqsandhowtos/FilterRetrofit.txt
@ -1,62 +0,0 @@
-== Installing a filter for a new document type
-
-It will sometimes happen that a newer Recoll release has support for a
-document type which would be useful to you, but which your older release
-does not support.
-
-It is in general easy to import support from the newer to the older
-release: the Recoll input handler interface is very stable, so things should just
-work.
-
-Input Handler updates are generally described on the Recoll web site
-link:https://www.recoll.org/filters/filters.html[new filters pages]. They
-may include notes about which versions need the new input handler, or specifics
-about installing it.
-
-An up to date copy of input handlers and configuration files is also kept
-link:https://www.recoll.org/filters/[at the same location].
-
-We will take an example to make things more concrete: Tomboy and Gnote
-files are directly supported by Recoll 1.19, but not in older Recoll
-releases. The *rclxml* handler is needed to process them.
-
-The following procedure will allow you to retrofit support:
-
- Retrieve the *rclxml* input handler from:
-  link:https://www.lesbonscomptes.com/recoll/filters/rclxml[]
-
- Copy it to '/usr/share/recoll/filters' and make it executable: 
-  `chmod +x rclxml`
-  The input handler needs *xsltproc*, but this is probably already on your
-  system (else get it with the package manager).
-
- Edit '~/.recoll/mimemap', add the following line:
- `.note = application/x-gnote`
- Edit '~/.recoll/mimeconf', add the following lines:
-+
----
-[index]
-application/x-gnote = exec rclxml
----
- Edit '~/.recoll/mimeview', add the following lines:
-+
----
-[view]
-application/x-gnote = tomboy %f
----
-
- The easiest way to make sure the files are indexed with the new input
-  handlers may then be to just run a full indexing pass (`recollindex -z`). 
-
-Notes:
- 
- The MIME type which is used is not crucial, you could prefer to use,
-  e.g., +application/x-tomboy+ instead, it just has to be consistent. To
-  avoid future trouble, it's better to use the type used by newer Recoll
-  releases though.
- The 'mimeview' entry is necessary even if you are using the desktop
-  preferences to open files. The value will not be used, but it has to be
-  there.
-
-
-
--- a/website/faqsandhowtos/FilteringOutZipArchiveMembers.txt
+++ b/website/faqsandhowtos/FilteringOutZipArchiveMembers.txt
@ -1,34 +0,0 @@
-== Filtering out Zip archive members ==
-
-The *rclzip* Zip archive extraction input handler does not use the general
-configuration variables which define what file system objects should be
-skipped, but it has an equivalent internal function. 
-
-The name-skipping code depends on a recent member of the the Recoll Python
-package. This will become standard for release 1.20, but for earlier
-releases, you need to do two things to use this function: 
-
- Fetch 'python/recoll/recoll/rclconfig.py' and 'filters/rclzip' from the
-  source repository. 
- Copy both to '/usr/share/recoll/filters' and make 'rclzip' executable.
-
-You can then set a variable named +zipSkippedNames+ inside
-'recoll.conf'. +zipSkippedNames+ should be a space-separated list of
-patterns which will be passed to the Python fnmatch() function. The +/+
-characters are not special (matched as any character). 
-
-You can't use embedded spaces in patterns (no double-quote quoting for now)
-
-This can be redefined for file system directories using the usual section
-indicators (Zip archives in different file-system directories can have
-different skip lists). 
-
-Example:
-
----
-zipSkippedNames = *.txt
-[/path/to/the/dir]
-zipSkippedNames = somedir/*/*.html
----
-
-
--- a/website/faqsandhowtos/GUIKeyboard.txt
+++ b/website/faqsandhowtos/GUIKeyboard.txt
@ -1,60 +0,0 @@
-== Recoll GUI keyboard navigation
-
-Using Recoll without the mouse is not completely straightforward, but it is
-mostly feasible. Here follows a description of the usable shortcuts. 
-
-=== Anywhere
-
-`Ctrl+q` should exit Recoll from anywhere.
-
-=== Main window and result list ===
-
-When Recoll starts up, the focus is in the simple search entry. The main
-window tab order is as follows: 
-
-* Clear
-* Search
-* Search type combo
-* Search entry  (Initial focus)
-* Result list (scrolling etc)
-* Result list 1st link
-* Result list next links...
-* Back to Clear
-
-Each result list entry has 3 links: the icon link is not active, but its
-value is the URL, so that it can be dragged and dropped to another
-application. The 2 other links are _Preview_ and _Open_ and can be
-activated by typing _Enter_. 
-
-Typing _Ctrl+Shift+s_ anywhere in the main window should return the focus to the search entry. So will _Ctrl+l_ in future versions (for compatibility with WEB browser usage).
-
-For pure keyboard usage, you can improve this by:
-
- Disabling the icon link: use _Preferences->GUI configuration->Result
-  List->Edit result paragraph_ and remove the `<a href='%U'>` and `</a>`
-  around the `<img...>` tag. 
- Making the active link more visible by adding the following code to the
-  result page HTML header insert (same preferences tab). Feel free to
-  adjust the color :=) : 
-
----
-<style type="text/css">
-a:focus {background-color: red;}
-</style>
----
-
-=== Result table
-
-The same _Ctrl+Shift+s_ will return the focus to the search entry when
-working with the result table. 
-
-_Ctrl+r_ will move the focus from the entry to the spreadsheet. When in
-there the arrow keys will navigate the lines.  
-
-When a line is selected:
-
-* _Ctrl+o_ will _Open_ the document.
-* _Ctrl+Shift+o_ will _Open_ the document and exit Recoll.
-* _Ctrl+d_ (detail) will start a _Preview_
-
-_Esc_ will deselect the current line so that mouse hovering will work again.
--- a/website/faqsandhowtos/HandleCustomField.txt
+++ b/website/faqsandhowtos/HandleCustomField.txt
@ -1,69 +0,0 @@
-== Generating a custom field and using it to sort results
-
-We are going to show how to generate a custom field from a Recoll filter,
-and use it for sorting results. The example chosen comes from an actual
-user request: sorting results on pdf page counts. 
-
-The details here are obsolete, as the +pdf+ input handler is now a quite
-different python program, but the general idea is still relevant.
-
-The page count from a pdf file can be displayed by the pdfinfo command
-(xpdf or poppler tools). 
-
-We first modify a copy of the rclpdf filter
-('/usr/[local/]share/recoll/filters/rclpdf'), to compute the pdf page count,
-and output the value as an html meta field. This is a not very interesting
-bit of shell/awk magic. Another approach would be to just rewrite the
-rclpdf filter in your favorite scripting language (ie: perl, python...), as
-all it does is execute pdftotext and pdfinfo and output html, nothing
-complicated. Here follows the rclpdf modification as a pseudo patch: 
-
----
-# compute the page count and format it so that it's alphabetically sortable
-+set `pdfinfo "$infile" | egrep ^Pages:`
-+pages=`printf "%04d" $2`
-[skip...]
-# Pass the page count value to awk
-awk 'BEGIN'\
-+awk -v Pages="$pages" 'BEGIN'\
-[skip...]
-# Inside the awk program startup section: compute the "meta" field line
-+  pagemeta = "<meta name=\"pdfpages\" content=\"" Pages "\">\n"
-[skip...]
-# Then print it as part of the header:
-+    $0 =  part1 charsetmeta pagemeta part2
-[skip...]
----
-
-You can execute your own version of rclpdf by modifying '~/.recoll/mimeconf':
-
----
-[index]
-application/pdf = exec /path/to/my/own/rclpdf
----
-
-At this point, recollindex would receive and extract a +pdfpages+ field,
-but it would not know what to do with it. We are going to tell it to store
-the value inside the document data record so that it can be displayed in
-the results, and sorted on. For this we modify the '~/.recoll/fields' file: 
-
----
-[stored]
-pdfpages=
----
-
-That's it ! After reindexing, you can now display +pdfpages+ inside the
-result list (add a +%(pdfpages)+ value to the paragraph format), and display
-+pdfpages+ inside the result table (right-click the table header), and sort
-the results on page count (click the column header). 
-
-Note that +pdfpages+ has not been defined as searchable (this would not make
-much sense). For this, you'd have to define a prefix and add it to the
-[prefixes] fields file section: 
-
----
-[prefixes]
-pdfpages = XYPDFP
----
-
-Have a look at the comments inside the 'fields' file for more information.
--- a/website/faqsandhowtos/Home.txt
+++ b/website/faqsandhowtos/Home.txt
@ -1,13 +0,0 @@
-== Welcome to the Recoll Faqs and Recipees
-
-link:FaqsAndHowTos.html[FAQs and Howtos] are stored here, but 
-the main source for Recoll user documentation is 
-link:https://www.recoll.org/doc.html[the _Recoll user manual_] on the
-link:https://www.recoll.org/[Recoll Web site] where you will also find a
-lot of other Recoll information, source code tarballs and contact
-information.
-
-If you want to make your problem report as useful as possible, you may want
-to take a look at link:ProblemSolvingData.html[this page]. 
-
-link:WikiIndex.html[Full file index]
--- a/website/faqsandhowtos/HotRecoll.txt
+++ b/website/faqsandhowtos/HotRecoll.txt
@ -1,79 +0,0 @@
-== Recoll hotkey: starting / hiding recoll with a keyboard shortcut
-
-Type a key (ie: F12) and have recoll appear or disappear. On the first
-occurrence, recoll is started if it's not already running. Further
-occurrences toggle recoll between visible and minimized states. Never
-thought this would be useful until someone asked for it. Can't do without
-it anymore :) 
-
-This works well with both Gnome and KDE, but is implemented using a gnome
-library (*libwnck*) and its python interface, which you may have to install
-on your system if you are a pure KDE user. The library most probably exists
-in the package repositories for your distribution, so this should not be
-too complicated. 
-
-This should also work with other window managers, because it is based on a
-standard window manager interface extension (EWMH) that most modern window
-managers implement. 
-
-=== Installing the script (all desktops):
-
- You will need the libwnck library and its python interface. These are
-  usually part of a gnome installation, otherwise check and possibly
-  install them. For OpenSuse, the library should already be there but you
-  need to install gnome-python-desktop. 
- Download the
- link:https://www.recoll.org/files/hotrecoll.py[http://www.recoll.org/files/hotrecoll.py
- script]. If you have a recent recoll installation (1.14.3 and 
-  further), it's already in the recoll filters directory
-  ('/usr/[local/]share/recoll/filters') 
- Copy the script to some permanent place (ie: '~/bin') and make it
-  executable (you can leave it in the filters dirs if it's there). In a
-  shell window: `chmod +x hotrecoll.py`.
- You can check that the script works (or not) by executing it on the
-  command line. It does not need an argument. Recoll should appear or
-  disappear every time you execute the script. A few warning messages may
-  be considered normal. If the script says that it does not find the wnck
-  library or some other module, you'll have to install them. 
-
-=== Installing the keyboard shortcut (Gnome):
-
- _System->Preferences->Keyboard shortcuts_, or execute
-  *gnome-keybinding-properties* 
- Click add, Name, ie: StartRecoll, Action: /path/to/hotrecoll.py
- This will add the shortcut to the "Custom shortcuts" section. You can
-  then click in the "Shortcut" column for "StartRecoll", and type any key
-  combination (ie: push F12) to assign a key shortcut. 
-
-=== Installing the keyboard shortcut (KDE):
-
-Under KDE installing a global custom keyboard shortcut like we need is most
-helpfully not under "Keyboard Shortcuts" but under "Input Actions". 
-
- _Kmenu -> Configure Desktop -> Input Actions -> Edit -> New -> Global
-  Shortcut -> Command/Url_ 
- A new Action appears, named _New Action_. You can rename it something
-  like +hotrecoll+ for clarity. 
- Click the _Trigger_ tab, click the input area and press your preferred
-  key combination (ie: F12) 
- Click the _Action_ tab, and enter +hotrecoll.py+ (if it's in your PATH),
-  or else the full path to the command (e.g.:
-  '/usr/share/recoll/filters/hotrecoll.py').
- Click _Apply_.
-
-=== Installing the keyboard shortcut (XFCE):
-
-Open the settings manager, and add the shortcut in the 
-_Application Shortcuts_ panel inside the _Keyboard_ tool.
-
-
-=== Other environments
-
-Many window managers have a way to set up a keyboard shortcut for running
-an arbitrary command. You'll need to look at the documentation for yours,
-or search the web for a solution.  
-
-An alternative independant of the environment would be to use the XBindKeys
-utility. See this link:http://www.linux.com/archive/feed/59494[linux.com
-article] for helpful instructions. 
-
--- a/website/faqsandhowtos/IndexMailHeader.txt
+++ b/website/faqsandhowtos/IndexMailHeader.txt
@ -1,33 +0,0 @@
-== Indexing arbitrary mail headers
-
-By default the Recoll mail handler only processes a subset of email headers
-(+From+, +To+, +Cc+, +Date+, +Subject+). It is possible to index additional
-headers by specifying them inside the 'fields' configuration file, inside
-the configuration directory (typically '~/.recoll/').
-
-Lengthy explanations are not really needed here, and I'll just show an
-example (duplicated from the configuration section of the manual):
-
----
-[prefixes]
-# Index mailmytag contents (with the given prefix)
-mailmytag = XMTAG
-
-[stored]
-# Store mailmytag inside the document data record (so that it can be
-# displayed - as %(mailmytag) - in result lists).
-mailmytag = 
-
-[mail]
-# Extract the X-My-Tag mail header, and use it internally with the
-# mailmytag field name
-x-my-tag = mailmytag
-
----
-
-Limitations:
-
- The mail filter will only process the first instance for a header
-  occurring several times.
- No decoding will take place (ie for non-ascii headers which would have
-  some kind of encoding). 
--- a/website/faqsandhowtos/IndexMozillaCalendari.txt
+++ b/website/faqsandhowtos/IndexMozillaCalendari.txt
@ -1,32 +0,0 @@
-== Indexing Mozilla calendar data
-
-Mozilla calendar programs (*Sunbird*, *Lightning*) do not store their
-data in +ics+ files natively. They use an *SQLite* database (the
-'storage.sdb' file inside the profile). This means that calendar data
-cannot be indexed directly.  
-
-To get Recoll to index calendar data, you need to export it to an +ics+
-file. This can be done manually, from the application menus, or, by
-installing the
-link:https://addons.mozilla.org/en-US/sunbird/addon/3740[Automatic Export
-extension]. 
-
-The extension can be configured to export the data when exiting the
-program, or at regular time intervals.  You can even set up a command to be
-executed after the export. If you are not using real time indexing, this
-can usefully be *recollindex*.
-
-In _Tools->Add Ons->Automatic Export preferences_, in the _Start an
-application after export_ subpanel, set _Path of application_ to
-'/usr/[local/]bin/recollindex' and _Parameters of application_ to
-something like _-i;/home/me/path/to/nameofexportedcal.ics_ 
-
-This will ensure that the calendar is indexed every time it is exported
-(this is not necessary though, you can let the next batch indexing pass
-take care of it). 
-
-It may happen that the exported data has some syntax errors which will
-prevent indexing with the *rclics* filter which was distributed up to
-Recoll 1.13.04 (included). You may get an updated filter from the
-link:https://www.recoll.org/download.html[Recoll download page].
-
--- a/website/faqsandhowtos/IndexOnAc.txt
+++ b/website/faqsandhowtos/IndexOnAc.txt
@ -1,24 +0,0 @@
-== Laptops: starting or stopping indexing according to AC power status
-
-For people using real time indexing on a laptop, kind user "The Doctor"
-contributed a script to automatically start and stop indexing according to
-power status. The script can be found here:
-link:https://opensourceprojects.eu/p/recoll1/code/ci/144da4a5caa2b39d23d9d7cf262f03b6d80a4739/tree/src/desktop/recoll_index_on_ac.sh[recoll_index_on_ac.sh]
-
-To use it, you need to copy it somewhere (e.g.: '/usr/bin', but any place
-will do), make it executable (`chmod a+x recoll_index_on_ac.sh`), and edit
-'~/.config/autostart/recollindex.desktop'
-
-Change the following line:
-
-    Exec=recollindex -w 60 -m
-
-to something like the following (depending where you copied the script):
-
-    Exec=/usr/bin/recoll_index_on_ac.sh
-
-You may also want to change
-'/usr/share/recoll/examples/recollindex.desktop', otherwise your change
-will be reverted the next time you toggle real time indexing through the
-GUI. And, yes, sorry about it, _this_ change will be lost on the next
-Recoll update, so save a copy.
--- a/website/faqsandhowtos/IndexOutlook.txt
+++ b/website/faqsandhowtos/IndexOutlook.txt
@ -1,11 +0,0 @@
-== Indexing Outlook archives ==
-
-Recoll has no direct support for indexing Microsoft Outlook data, because,
-if you are a Windows user, you probably are not a good customer for Linux
-desktop indexing...
-
-However, if you have a need to index Outlook data at some point, I can
-recommend the excellent link:http://www.five-ten-sg.com/libpst/[libpst]
-library and its link:http://www.five-ten-sg.com/libpst/rn01re01.html[readpst]
-utility. Using this you can very easily convert the Outlook data into MH or
-mbox format, and then index the result with Recoll.
--- a/website/faqsandhowtos/IndexWebHistory.txt
+++ b/website/faqsandhowtos/IndexWebHistory.txt
@ -1,29 +0,0 @@
-== Indexing Web history with the Firefox extension ==
-
-Note: this document is valid for Recoll versions from 1.18.
-
-The link:http://sourceforge.net/projects/recollfirefox/[Recoll Firefox
-extension] 
-works together with Recoll to index the Web pages that you visit. The
-extension is based on an older one which was initially written for the
-Beagle indexer.
-
-The extension works by copying the data for the visited pages to a queue
-directory ('~/.recollweb/ToIndex' by default), from which they are
-indexed and removed by Recoll, and then stored in a local cache.
-
-The extension is now hosted on the Mozilla add-ons site, so you can install
-it very simply in Firefox: link:https://addons.mozilla.org/fr/firefox/addon/recoll-indexer-1/[Recoll Firefox add-on page].
-
-This feature can be enabled in the Recoll GUI index configuration panel
-(Web history section), or by editing the configuration file (set
-+processwebqueue+ to 1).
-
-Please remember that Recoll only stores a limited amount of cached web data
-(adjustable from the GUI Index Configuration section), and that old pages
-will be purged from the index. Pages that you want to archive permanently
-need to be saved elsewhere, as they will otherwise eventually disappear
-from the Recoll results.
-
-Recoll will index +.maff+ files, which may be a better choice for archival
-usage. 
--- a/website/faqsandhowtos/Makefile
+++ b/website/faqsandhowtos/Makefile
@ -1,9 +0,0 @@
-.SUFFIXES: .txt .html
-
-.txt.html:
-	asciidoc $<
-
-all: $(addsuffix .html,$(basename $(wildcard *.txt)))
-
-clean:
-	rm *.html
--- a/website/faqsandhowtos/MultipleIndexes.txt
+++ b/website/faqsandhowtos/MultipleIndexes.txt
@ -1,97 +0,0 @@
-== Creating and using multiple indexes
-
-=== Why would you want to do this ?
-
- Easy adjustment of search areas: you can filter results by using the
-  directory filter in the advanced search panel, but, if you have
-  separate well defined places where you store different kind of data,
-  it is easier to maintain separate index and use the External indexes
-  dialog to switch them on or off, and it will also yield much better
-  search performance. 
- Shared indexes: it may be useful to maintain one or several indexes
-  for shared data, and separate personal indexes for each user. Indexes
-  can be shared over the network.
- Creating separate indexes for removable volumes.
-
-=== How to do it
-
-As an example we'll suppose that you have Recoll installed and indexing
-your home directory, and that you would like to have a separate index for
-'/usr/share/doc'. 
-
-You need to create a separate configuration for the new index, then add it
-to the external indexes list in the user interface, and activate it as
-needed. 
-
-. Create a directory for the new index, and create an empty configuration
-  file
-+
----
-cd
-mkdir .recoll-sharedoc
-touch .recoll-sharedoc/recoll.conf
----
-. Either edit the new configuration by hand or start recoll to use the GUI
-   configuration editor.
-+
----
-cd .recoll-sharedoc
-echo "topdirs = /usr/share/doc" > recoll.conf
-# OR
-recoll -c ~/.recoll-sharedoc
----
-+
-If using the GUI, click _Cancel_ when asked, to start the configuration
-editor.
-
-. Perform initial indexing. If you chose the GUI route, indexing will
-  start as soon as you leave the configuration editor. Else, on the
-  command line: 
-+
----
-recollindex -c ~/.recoll-sharedoc
----
-. Optionally set up *cron* to perform nightly indexing, use +crontab -e+
-  and insert a line like the following:
-+
----
-45 20 * * * recollindex -c ~/.recoll-sharedoc
----
-+
-This would start the indexing at 20:45. `crontab -e` will use the *vi*
-editor by default, you can change this by using the EDITOR
-environment variable. Exemple: `EDITOR=kate crontab -e`
-Your favorite desktop may also have a dedicated tool to add crontab entries.
-
-. Start recoll and choose the _Preferences->External_ index dialog menu
-  entry, then click the Browse button (near the bottom), and select the
-  new index Xapian database directory '~/.recoll-sharedoc/xapiandb'
-  Then click _Add index_.
-
-. You can then activate or deactivate the new index by clicking the box
-  in front of the directory name in the list. 
-
-When adding an index shared by multiple users, it may be helpful to use the
-RECOLL_EXTRA_DBS environment variable instead of editing individual
-configurations, see the manual for more details.
-
-=== Paths adjustments
-
-When sharing indexes over a network, in most cases, the indexed data will
-be accessible through different paths on the different hosts. This will
-prevent the Preview and Open functions to work because the paths they get
-from the index do not match the ones which are usable from the local
-host.
-
-For example my home directory is accessed as '/home/me' on my home
-machine, and as '/net/myhost/home/me' on other hosts. By default, trying
-to access a result from a remote host would use the first path, when the
-second is the one that would work.
-
-As of release 1.19 **Recoll** has a facility to perform index-dependant
-path translations. This facility is accessible from the _external index
-dialog_ in the GUI preferences. Paths translations can be set for the main
-index if no index is selected (rarely useful), or for the selected
-additional index. 
-link:../usermanual/webhelp/docs/RCL.SEARCH.PTRANS.html[See
-the manual] for more detail.
--- a/website/faqsandhowtos/MuttAndRecoll.txt
+++ b/website/faqsandhowtos/MuttAndRecoll.txt
@ -1,77 +0,0 @@
-== Interfacing Recoll and Mutt
-
-It is possible to either use Mutt as a Recoll search result viewer, or
-start Recoll from the Mutt search.
-
-=== Starting Mutt to view Recoll search results
-
-This method and the associated 
-link:http://www.recoll.org/files/recoll2mutt[recoll2mutt script] were kindly
-contributed by Morten Langlo.
-
-This allows finding mail messages in recoll and then calling *mutt*
-or *mutt-kz* to read or process the mail. 
-
-Installation:
-
- Copy the [[http://www.recoll.org/files/recoll2mutt|recoll2mutt script]]
-  somewhere in your PATH, and make it executable.
- In the **recoll** GUI menus: 
-_Preferences->GUI configuration->User interface->Choose editor applications_
-change the entry for "message/rfc822" to: +recoll2mutt %f+
-
-The script has options for setting a number of parameters, you may not need
-to set any of them, the defaults are:
-
- -c mutt
- -F .muttrc
- -m Mail
- -x "-fn 10*20 -geometry 115x40"
-
-Example:
-
----
-recoll2mutt -c mutt-kz -F .mutt_kzrc -m Mail -x "-fn 10*20 -geometry 115x40"  %f
----
-
-The option +-x+ is passed to *xterm*, which is used to call *mutt* or
-*mutt-kz*.
-
-The script works for both _mbox_ and _maildir_ mail boxes, and it
-expects the configuration file for mutt and the mail directory to reside in
-your $HOME and the spool file to be '/var/spool/mail/$USER' if it is
-not in your mail directory. But it is easy to change the values in the
-script if you need to.
-
-*mutt* is opened with the right mailbox and limit set to _Date_ and
-_Sender_.  In theory you could set limit to _Message-Id_, but very often
-*mutt* reports, that there are invalid patterns in _Message-Id_, so do it
-safe, even though all emails in the opened mail box with the same date from
-the sender are shown.
-
-
-=== Starting Recoll from the Mutt search
-
-This will work only when using maildir storage (messages in individual
-files). It will not work with mailbox files. The latter would probably be
-possible by extracting the individual result messages using the Python
-interface, but I did not try.
-
-The classic way to interface Mutt and a search application is to create a
-shortcut to an external command which creates a temporary Maildir
-containing the search results.
-
-There is such a script for Recoll, you will find it link:https://bitbucket.org/medoc/recoll/raw/41d41799dbac4c69a34db985b3ab9f1597c9c742/src/python/samples/mutt-recoll.py[here].
-
-Copy the script somewhere in your PATH, and make it executable, then add
-the following line to your '.muttrc':
-
-
----
-
-macro index S "<enter-command>unset wait_key<enter><shell-escape>mutt-recoll.py -G<enter><change-folder-readonly>~/.cache/mutt_results<enter>" \
-          "search mail (using recoll)"
-
----
-
-Obviously, you can replace the 'S' letter with whatever will suit you (e.g:/)
--- a/website/faqsandhowtos/NonAsciiFileNames.txt
+++ b/website/faqsandhowtos/NonAsciiFileNames.txt
@ -1,85 +0,0 @@
-== Unix and non-ASCII file names, a summary of issues
-
-Unix/Linux file and directory names are binary byte C strings. Only the
-null byte and the slash character (/) are forbidden inside a name,
-nowhere does the kernel interpret the strings as meaningful or
-printable.  
-
-In the old times, all utilities that would display to the user were
-ASCII-based, and people would use pure printable ASCII file names (even
-using space characters inside names was a cause for trouble). Non
-alphanumeric characters were exclusively used for playing tricks on
-colleagues. And all was well. 
-
-Then the devil came under the guise of accented 8 bit characters. The
-system has no problem with them, file names are still binary C strings, but
-the utilities have to display them or take them as input, and, because
-there is no encoding specification stored with the file names, they can
-only do this according to the character encoding taken from the user's
-current locale.
-
-For example fr_FR.UTF-8, and fr_FR.ISO8859-1 could be used simultaneously
-on the same system (by different users), but they are completely
-uncompatible: ISO-8859-1 strings are illegal when viewed in an UTF-8 locale
-(will display as interrogation points or some other conventional error
-marker). UTF-8 strings will display as gibberish in an ISO-8859-1 locale.
-
-This means that the file names created by an UTF-8 user are displayed as
-garbage to the ISO-8859 one...
-
-If you ever change your locale, your old files are still there and named
-the same (in the binary sense), but the names display badly and you have
-great trouble inputing them. If you add distributed (NFS) file system
-issues, things become totally unmanageable. Also think about archives sent
-from another system with a different encoding.
-
-For what concerns Recoll:
-
- The file names inside recoll.conf are not transcoded, they are taken as
-  binary strings (mostly, only +\n+ and +space+ are a bit special), and
-  passed as is to the system. So if you edit 'recoll.conf' with a text
-  editor, inside the same locale that is or has been used for file names,
-  you'll be fine.
- There was a bug in the GUI configuration tool, up to 1.12, it should
-  transcode between the internal Qt format and locale-dependant strings,
-  but it doesn't or does it badly.  
- There is also an exception for the +unac_except_trans+ variable, this
-  *has* to be UTF-8, so if the rest of the file uses another encoding,
-  you'll need to edit two separate files and concatenate them.
-
-As of version 1.13, Recoll uses local8Bit()/fromLocal8Bit() to convert
-recoll.conf file names from/to QStrings (it uses UTF-8 for all string
-values which are not file names).
-
-The Qt file dialog is broken (at least was, I have not checked this on
-recent versions). It should consider file paths as almost-binary data, not
-QStrings, but doesn't. In consequence, things are even more broken than
-necessary as seen from there:
-
-With LANG="C", no non-ASCII paths can't be used at all:
-
- Strings read from recoll.conf are stripped of 8bit characters before display.
- Directory entries with 8bit characters are not displayed at all in the
-  selection dialog.
-
-With LANG="fr_FR.UTF-8", only UTF-8 paths can be used:
-
- Strings read from recoll.conf are damaged when converted to QString
-  (except those that were actually UTF-8) 
- Only the UTF-8 directory entries are displayed in the selection dialog.
-
-
-With LANG="fr_FR.iso8859-1", everything works ok.
-
- Strings read from recoll.conf are displayed with weird characters if
-  they use another encoding such as UTF-8, but are correctly maintained
-  and can be read back from the dialogs and rewritten without damage. 
- Directory entries with 8 bit characters are displayed weirdly (normal),
-  but can be manipulated without trouble (this includes utf-8 names of
-  course). 
-
-In conclusion, only the iso-8859 locales can be used for handling mixed
-encoding situations. This is a possible workaround for people who need it. 
-
-More data about path encoding issues:
-http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html 
--- a/website/faqsandhowtos/OpenHelperScript.txt
+++ b/website/faqsandhowtos/OpenHelperScript.txt
@ -1,71 +0,0 @@
-== Starting native applications
-
-It is sometimes difficult to start a native application on a result
-document, especially when the result comes from a container file (ie: email
-folder file, chm file).  
-
-The problem is that native applications usually expect at most a file name
-on the command line, and sometimes not even that (emailers). 
-
-The _Open parent documents_ link in the result list right click menu is
-sometimes useful in this situation (e.g.: +chm+ files). 
-
-In some other cases it may help that Recoll does make a lot of data
-available to the application. This data may have to be pre-processed in a
-script before calling the actual application. 
-
-Details about configuring how the native application or script are called
-are given with the 
-link:http://www.recoll.org/usermanual/usermanual.html#RCL.INSTALL.CONFIG.MIMEVIEW[description of the mimeview configuration file]
-
-Information about
-link:http://www.recoll.org/usermanual/usermanual.html#RCL.INSTALL.CONFIG.FIELDS[configuring
-customised fields] may also be useful in combination. 
-
-=== Example
-
-This is a simple example, because it does not need to use special
-fields. It just shows how to solve a simple issue by using an intermediary
-script. The problem is due to the fact that thunderbird's +-file+ option
-won't open a file if the extension is not '.eml'. Jorge, the kind Recoll
-user who supplied the example stores his email in Maildir++ format, the
-file names have no extension, so an intermediary script is necessary to get
-thunderbird to open them: 
-
-Note that this only works with messages stored in Maildir or MH format (one
-message per file). As far as I know, there is no way to get Thunderbird to
-open an arbitrary mbox file. 
-
-The 'recoll-thunderbird-open-file' script:
-
----
-#!/bin/sh
-cp $1 /tmp/$$.eml
-thunderbird  -file /tmp/$$.eml
----
-
-Create the file in an editor, save it somewhere, and make it executable
-(`chmod +x recoll-thunderbird-open-file`).
-
-The mail line in the '~/.recoll/mimeview' file:
-
----
-[view]
-message/rfc822  = recoll-thunderbird-open-file  %f
----
-
-If the place where you saved the script is not in your PATH, you will need
-to use the full path instead of just the script name, as in  
-
----
-[view]
-message/rfc822 = /home/me/somewhere/recoll-thunderbird-open-file  %f
----
-
-You should then be able to open the messages in Thunderbird, which is
-useful, for example, to handle the attachments. 
-
-With recent Recoll versions, if using the normal option of letting the
-Desktop chose the _Open_ application to use (_Use Desktop default_),
-you should also add +message/rfc822+ to the exceptions, and the whole
-thing is probably more easily done from the Recoll GUI. 
--- a/website/faqsandhowtos/PreventIndexingDir.txt
+++ b/website/faqsandhowtos/PreventIndexingDir.txt
@ -1,30 +0,0 @@
-== Preventing indexing in a directory
-
-=== Why would you want to do this ?
-
-By default, recollindex (or the indexing thread inside the recoll QT user
-interface) will process your home directories and most its subdirectories,
-at the exception of some well known places (thumbnails, beagle and web
-browser caches, etc.) 
-
-You may want to prevent indexing in some directories where you don't expect
-interesting search results. This will avoid polluting the search result
-lists, speed up indexing times and make the index smaller. 
-
-=== How to do it
-
-There are two ways to block indexing at certain points: either by listing
-specific paths, or by directory name pattern matches. 
-
- Blocking specific paths: this is controlled by the 'skippedPaths'
-  variable in the main configuration file. You can adjust the value either
-  by editing the file or by using the indexing configuration dialog:
-  _Preferences->Indexing configuration->Global parameters->Skipped paths_
- Using pattern matches: these are listed in the skippedNames variable in
-  the main configuration file. You can adjust the value either by editing
-  the file or by using the GUI: _Preferences->Indexing configuration->Local
-  parameters->Skipped names_
-
-The
-link:../usermanual/webhelp/docs/RCL.INSTALL.CONFIG.RECOLLCONF.WHATDOCS.html[configuration
-section] of the manual has a bit more detail about the two variables.
--- a/website/faqsandhowtos/ProblemSolvingData.txt
+++ b/website/faqsandhowtos/ProblemSolvingData.txt
@ -1,157 +0,0 @@
-== Gathering useful data for asking help about or reporting a Recoll issue
-
-Once in a while it will happen that a Recoll program will either signal an
-error, or even crash (either the *recoll* graphical interface or the
-*recollindex* command line indexing command). 
-
-Reporting errors and crashes is very useful. It can help others, and it can
-get your own problem solved. 
-
-Any problem report should include the exact Recoll and system versions.
-
-If at all possible, reading the following and performing part of the
-suggested steps will be useful. This is not a condition for obtaining help
-though ! If you have any problem and have a difficulty with the following,
-just contact the mailing list or the developers (see contacts on
-link:https://www.recoll.org/support.html[the Recoll site support page]).
-
-If the problem concerns indexing, and was initially found using the
-*recoll* GUI, you should try to reproduce it using the
-*recollindex* command-line indexer, which is much simpler and easier to
-debug. 
-
-There are then two sources of useful information to diagnose the issue: the
-debug log file and, possibly, in case of a crash, a stack trace. 
-
-Crash and other problem reports are of very high value to me, and I am
-willing to help you with any of the steps described below if it is not
-familiar to you. I do realize that not everybody is a programmer or a
-system administrator. 
-
-=== Obtaining information from the log file
-
-All Recoll commands write a varying amount of information to a common log file.
-
-_All commands use the same log, and the file is reset every time a command
-is started: so it is important to make a copy right after the problem
-occurs (for example, do not start *recoll* after a *recollindex*
-crash, this would reset the log). A workaround for this issue is to let the
-messages go to the default +stderr+, and redirect this._
-
-By default, the messages are output to +stderr+, and you probably don't even
-see them if Recoll is started from the desktop. In this case, you need to
-set the parameters so that output goes to a file, and the appropriate
-verbosity level is set. When using the command-line, you may actually
-prefer to redirect stderr to avoid the log-truncating issue described
-above. 
-
-You can set the log parameters from the GUI _Indexing parameters_
-section or by editing the '~/.recoll/recoll.conf' file: set the
-+loglevel+ and +logfilename+ parameters. E.g.: 
-
----
-loglevel = 6
-logfilename = /tmp/recolltrace
----
-
-The log file can become very big if you need a big indexing run to
-reproduce the problem. Choose a file system with enough space available
-(possibly a few gigabytes). 
-
-Then run the sequence that leads to the problem, and make a copy of the log
-file just after. If the log is too big, it will usually be sufficient to
-use the last 500 lines or so (tail -500). 
-
-==== Single file indexing issues
-
-When the problem concerns, or can be reproduced with, a single file it is
-very cumbersome to have to run a full indexing pass to reproduce it. There
-are two ways around this: 
-
- Set up an ad hoc configuration with only the file of interest, or its
-  parent directory: 
----
-cd
-mkdir recoll-test
-cd recoll-test
-echo /path/to/my/file/or/its/parent/dir > recoll.conf
-echo 'loglevel = 6' >> recoll.conf
-echo 'logfilename = /tmp/recolltrace' >> recoll.conf
-recollindex -z -c .
----
- Use the -e and -i options to recollindex to erase/reindex a single
-  file. Set up the log, then: 
----
-recollindex -e /path/to/my/file
-recollindex -i /path/to/my/file
----
-
-When using the second approach, you must take care that the path used is
-consistent with the paths listed/used in the configuration (ie: if '/home' is
-a link to '/usr/home', and '/usr/home/me' is used in the configuration
-+topdirs+, `recollindex -i /home/me/myfile` will not work, you need
-to use `recollindex -i /usr/home/me/myfile`.
-
-
-=== Obtaining a stack trace
-
-If the program actually crashes, and in order to maximize usefulness, a
-crash report should also include a so-called stack trace, something that
-indicates what the program was doing when it crashed. Getting a useful
-stack trace is not very difficult, but it may need a little work on your
-part (which will then enable me do my part of the work). 
-
-If your distribution includes a separate package for Recoll debugging
-symbols, it probably also has a page on its web site explaining how to use
-them to get a stack trace. You should follow these instructions. If there
-is no debugging package, you should follow the instructions below. A little
-familiarity with the command line will be necessary. 
-
-==== Compiling and installing a debugging version
-
- Obtain the recoll source for the version you are using (www.recoll.org),
-  and extract the source tree. 
- Follow the
-  link:http://www.lesbonscomptes.com/recoll/usermanual/rcl.install.building.html[instructions
-  for building Recoll from source] with the following modifications:
- Before running configure, edit the mk/localdefs.in file and remove the
-  -O2 option(s). 
- When running configure, specify the standard installation location for
-  your system as a prefix (to avoid ending up with two installed versions,
-  which would almost certainly end in confusion). On Linux this would
-  typically be: `configure --prefix=/usr`
- When installing, arrange for the installed executables not to be stripped
-  of debugging symbols by specifying a value for the STRIP environment
-  variable (ie: *echo* or *ls*): `sudo make install STRIP=ls`
-
-==== Getting a core dump
-    
-You will need to run the operation that caused the crash inside a writable
-directory, and tell the system that you accept core dumps. The commands
-need to be run in a shell inside a terminal window. E.g.: 
-
----
-cd
-ulimit -c unlimited
-recoll  #(or recollindex or whatever you want to run).
----
-
-Hopefuly, you will succeed in getting the command to crash, and you will
-get a core file. A possible approach then would be to make both the
-executable and the core files available to me by uploading it to a file
-sharing site (the core file may be quite big). You should be aware though
-that the core file may contain some of the data that was being indexed,
-which may be a privacy issue. Another approach is to generate the stack
-trace yourself. 
-
-=== Using gdb to get a stack trace
-
- Install gdb if it is not already on the system.
- Run gdb on the command that crashed and the core file (depending on the
-  system, the core file may be named "core" or something else, like
-  recollindex.core, or core.pid), ie: {{{gdb /usr/bin/recollindex core}}} 
- Inside gdb, you need to use different commands to get a stack trace for
-  recoll and recollindex. For recollindex you can use the bt command. For
-  recoll use `thread apply all bt full`
- Copy/paste the output to your report email :), and quit gdb ("q").
-
--- a/website/faqsandhowtos/QpdfviewHelperScript.txt
+++ b/website/faqsandhowtos/QpdfviewHelperScript.txt
@ -1,61 +0,0 @@
-== Starting native applications ==
-
-Another example of using an intermediary script for an application with a
-command line syntax which can't be directly defined in mimeview. 
-
-We use a script to preprocess and adapt the options before calling the
-actual command. 
-
-Details about configuring how the native application or script are called
-are given with the
-link:http://www.recoll.org/usermanual/usermanual.html#RCL.INSTALL.CONFIG.MIMEVIEW[description
-of the mimeview configuration file].
-
-*qpdfview* (link:http://launchpad.net/qpdfview[web site]) is a very
-lightweight tabbed PDF viewer with great search performance and result
-highlighting.
-
-It does support parsing the search term and page number from the command
-line with the following syntax:
-
----
-qpdfview --unique "%f"#%p --search "%s"
----
-
-However, qpdfview will not launch if either %p or %s are empty in the
-command above. To accommodate for that, Recoll user Florian has written a
-small wrapper shell script:
-
----
-#!/bin/bash
-
-qpdfviewpath=qpdfview
-
-if [ -z $2 ]
-then
-    page=""
-
-else
-    page="#"$2""
-fi
-
-if [ -z $3 ]
-then
-    search=""
-
-else
-    search="--search "$3""
-fi
-
-$qpdfviewpath --unique "$1"$page $search >&0 2>&0 &
----
-
-
-The corresponding handler line for Recoll would be (depending on how you
-name the script and where you store it):
-
----
-      qpdfviewwrapper %f %p %s
----
-
-
--- a/website/faqsandhowtos/QueryFromC.txt
+++ b/website/faqsandhowtos/QueryFromC.txt
@ -1,18 +0,0 @@
-== Querying Recoll from a C program
-
-The easiest way to query Recoll from a C or C++ program is to execute an
-external search command (`recollq` or `recoll -t`).
-
-I have written a simple C module which deals with the related housekeeping
-and presents an easy to use API to the rest of the code. You will find it
-here:
-
-    https://bitbucket.org/medoc/recoll-capi
-
-It is a bit experimental and will only work with recoll 1.20 for now
-(because it uses a new option for recollq). However it would be trivial to
-modify for working with 1.19, get in touch with me if you need this.
-
-The other approach is to link with the Recoll library. This has no official
-API, but in practise, the internal one is fairly stable, and if you want to
-choose this approach, you should start from the code in recollq.cpp
--- a/website/faqsandhowtos/ReplaceCategories.txt
+++ b/website/faqsandhowtos/ReplaceCategories.txt
@ -1,58 +0,0 @@
-== Replacing the Category filter controls
-
-The document category filter controls normally appear at the top of the
-*recoll* GUI, either as checkboxes just above the result list, or as a
-dropbox in the tool area.
-
-By default, they are labeled _Media_, _Message_, _Spreadsheet_, _Text_,
-etc. and each map to a document category.
-
-The mapping used to be fixed. You could change the number and composition
-of categories by redefining them inside the {{{mimeconf}}} configuration
-file (you still can), but the filters always used document categories.
-
-Categories can also be selected from the query language by using an
-+rclcat:+ selector. E.g.: _rclcat:message_.
-
-As of Recoll release 1.17, the filters are not hard-wired any more. They
-map to query language fragments. This means that you can freely redefine
-what they do. 
-
-The associations are configured inside the 'mimeconf' file, in the
-+[guifilters]+ section. Most GUI parameters are stored in the *Qt*
-configuration file, so this is not entirely consistent, and you will have
-to bear with my lazyness here.
-
-A simple exemple will hopefuly make things clearer. If you add the 
-following to your '~/.recoll/mimeconf' file:
-
----
-[guifilters]
-
-Big Books = dir:"~/My Books" size>10K
-My Docs = dir:"~/My Documents"
-Small Books = dir:"~/My Books" size<10K
-System Docs = dir:/usr/share/doc
-
----
-
-You will have four filter checkboxes, labelled _Big Books_, _My Docs_, etc.
-
-The text after the equal sign must be a valid query language fragment, and
-will be translated to a *Recoll* query and combined with the rest of the
-query with an AND conjunction.
-
-Any name text before a colon character will be erased in the display, but
-used for sorting. You can use this to display the checkboxes in any order
-you like. For exemple, the following would do exactly the same as above,
-but ordering the checkboxes in the reverse order.
-
----
-[guifilters]
-
-d:Big Books = dir:"~/My Books" size>10K
-c:My Docs = dir:"~/My Documents"
-b:Small Books = dir:"~/My Books" size<10K
-a:System Docs = dir:/usr/share/doc
-
----
--- a/website/faqsandhowtos/ResultsThumbnails.txt
+++ b/website/faqsandhowtos/ResultsThumbnails.txt
@ -1,23 +0,0 @@
-== Result list thumbnails and how to create them
-
-Recoll will display thumbnails for the results if the images exist in the 
-standard location ('$HOME/.thumbnails' or '$HOME/.cache/thumbnails' depending
-on the xdg version). 
-
-But it will not create thumbnails, mainly because it is very hard to do
-portably.
-
-Thumbnails are most commonly created when you visit a directory with your
-file manager, but visiting the whole file tree just to create thumbnails is
-a bit fastidious.
-
-One simple trick to create thumbnails from the recoll GUI is to visit the
-parent directory for a result by using the _Open parent document/folder_
-entry in the right-click menu.
-
-You can also find tools for the systematic creation of thumbnails for a
-directory tree. Three such tools are discussed on this 
-link:http://askubuntu.com/questions/199110/how-can-i-instruct-nautilus-to-pre-generate-pdf-thumbnails[askubuntu.com discussion]
-
-Also please note that no thumbnails can currently be generated or displayed
-for embedded documents (attachments, archive members, etc.).
--- a/website/faqsandhowtos/SavingConfig.txt
+++ b/website/faqsandhowtos/SavingConfig.txt
@ -1,61 +0,0 @@
-== User configuration backup
-
-=== Why you would want to do this
-
-If you are going to reinstall your system, and have some custom
-configuration, you may save some time by making a backup of your
-configuration and restoring it on the new system, rather than going through
-the menus to recreate it.
-
-=== How to do it
-
-==== Index/search configuration
-
-The main recoll configuration data is normally kept inside '~/.recoll' or
-whatever *$RECOLL_CONFDIR* is set to.
-
-This directory contains both configuration files and generated index
-data.In a standard configuration, the following files and directories
-contain generated data: 
-
- 'xapiandb' contains the Xapian index, which normally consumes most of the
-  total space. 
- 'aspdict.en.rws' contains the aspell dictionary used for spelling
-  corrections. 
- 'mboxcache' contains cached offset data for email messages inside mbox
-  folders. 
- 'webcache' contains saved web pages. This is more than a cache as
-  destroying it will purge the corresponding data during the next
-  indexing. 
-
-The other files are either very small or contain configuration data.
-
-If you want to only save configuration, using minimum space, you can
-destroy the above files and directories (with the possible exception of
-'webcache'). Then taking a copy of the '.recoll' directory and adding the
-GUI configuration data described in the next will get you a full
-configuration data backup. 
-
-==== GUI configuration
-
-The parameters set from the _Query configuration_ Qt menus are stored in
-Qt standard places:
-
- '~/.qt/recollrc' for Qt 3.x
- '~/.config/Recoll.org/recoll.conf' for Qt 4 and later
-
-
-==== Other data
-
-If you wish to save index data in addition to the customisation files,
-which only makes sense if the document access paths do not change after
-reinstallation, you can just take a backup of the full '.recoll'
-directory, taking care that the storage locations for some data elements
-can be changed (not be inside '.recoll'): 
-
- The index data is normally kept inside '~/.recoll/xapiandb', but the
-  location of this directory can be modified by the +dbdir+
-  configuration parameter if it is set (check 'recoll.conf'). 
- If you use the Firefox Recoll plugin, the WEB history cache is normally
-  kept inside '~/.recoll/webcache', but the location can be modified by
-  the +webcachedir+ configuration parameter. 
--- a/website/faqsandhowtos/UnityLens.txt
+++ b/website/faqsandhowtos/UnityLens.txt
@ -1,109 +0,0 @@
-== Building and Installing the Ubuntu Unity Recoll Lens
-
-Important preliminary notes:
-
- This only makes sense for Ubuntu versions using the Unity environment:
-  Natty (11.04), Oneiric (11.10), Precise (12.04), and later. 
- _Remember that you still need to use the recoll GUI (or the recollindex
-  //command) to get the indexing going !_
- The Lens is artificially limited to showing at most 20 results. Use the
-  recoll GUI for more complete capabilities (or edit rclsearch.py, change
-  the "if actual_results >= 20:" line). 
-
-
-=== The Lens with Recoll 1.17 and later
-
-If you are willing to install or upgrade to Recoll version 1.17, all
-necessary packages are on the Recoll PPA, you just need to add the
-repository to your system sources and add or upgrade the packages: *_/This
-is the recommended approach!_*
-
----
-sudo add-apt-repository ppa:recoll-backports/recoll-1.15-on
-sudo apt-get update
-sudo apt-get install recoll-lens recoll
----
-
-This document may still be useful if you want to modify the lens source
-code.
-
-=== The Lens with older Recoll versions
-
-If, for some reason, you wish to test the Lens with an older Recoll
-version, read the following. 
-
-Please not that such an installation is somewhat crippled: you will not be
-able to display results for embedded documents (emails inside an mbox,
-attachments etc.). This requires a recoll command line option which is only
-available in 1.17 
-
-The Lens is based on the Recoll Python module which is not built by default
-for versions prior to 1.17, so so you will first need to pull the Recoll
-source code (for you version), then untar and proceed with the
-configure/build instructions below. 
-
-The following uses --prefix=/usr. I have no real reason to believe 
-that this would not work with /usr/local (lenses are also searched there by
-default). If you confirm that things work with another prefix, please drop
-me a line.
-
-When doing this over a previous Recoll compilation, run a "make clean" to
-get rid of the non-PIC objects. 
-
-Note that the following instructions change nothing to your existing Recoll
-installation, they only install the Python module and the Unity Lens,
-recoll, recollindex etc. are unaffected. 
-
-'/TOP/OF/RECOLL/SRC' designates the top of the recoll source tree.
-
-=== Configure and build the recoll library and python module, install the module
-
-The following needs the development packages for Xapian, Python and zlib.
-
----
-cd /TOP/OF/RECOLL/SRC 
-# May fail if no previous build was performed
-make clean
-
-# the gui/x11 disabling is just here to avoid having to install the
-# development libraries for Qt.
-configure --prefix=/usr --enable-pic --without-x --disable-qtgui
-make
-
-cd python/recoll
-python setup.py build
-sudo python setup.py install
----
-
-=== Build and install the Unity Lens
-
----
-cd /TOP/OF/RECOLL/SRC
-cd desktop/unity-lens-recoll
-configure --prefix=/usr --sysconfdir=/etc 
-sudo make install
-
----
-
-Voilà, it should work...
-
-Try to start the Dash, you should see the Recoll checkerboard (or
-whatever...) in the Lens list. 
-
-The Recoll Lens expects a Recoll query language string, so you can use
-field searches, directory, size, and date filtering (see the
-link:http://www.lesbonscomptes.com/recoll/usermanual/rcl.search.lang.html[Recoll
-manual] for a description of the query language).  
-
-If you want to disable the Lens, I think that you just have to delete
-'/usr/share/unity/lenses/recoll'
-
-Other installed files:
-
----
-/usr/libexec/unity-recoll-daemon
-/usr/share/dbus-1/services/unity-lens-recoll.service
-/usr/share/doc/unity-lens-recoll
-/usr/share/unity-lens-recoll
----
-
--- a/website/faqsandhowtos/UsingOpenWith.txt
+++ b/website/faqsandhowtos/UsingOpenWith.txt
@ -1,68 +0,0 @@
-== Using the _Open With_ context menu in recoll 1.20 and newer
-
-Recoll versions and newer have an _Open With_ entry in the result list
-context menu (the thing which pops up on a right click).
-
-This allows choosing the application used to edit the document, instead of
-using the default one.
-
-The list of applications is built from the desktop files found inside
-'/usr/share/applications'. For each application on the system, these
-files lists the mime types that the application can process.
-
-If the application which you would want listed does not appear, the most
-probable cause is that it has no desktop file, which could happen due to a
-number of reasons.
-
-This can be fixed very easily: just add a +.desktop+ file to
-'/usr/share/applications', starting from an existing one as a template.
-
-As an example, based on an original idea from Recoll user +florianbw+,
-the following describes setting up a script for editing a PDF document
-title found in the recoll result list.
-
-The script uses the *zenity* shell script dialog box tool to let you
-enter the new title, and then executes *exiftool* to actually change
-the document.
-
----
-#!/bin/sh
-
-PDF=$1
-TITLE=`exiftool -Title -s3 "$PDF"`
-
-RES=`zenity --entry \
-  --title="Change PDF Title" \
-  --text="Enter the Title:" \
-  --entry-text "$TITLE"`
-
-if [ "$RES" != "" ]; then 
-    echo -n "Changing title to $RES ... " && \
-        exiftool -Title="$RES" "$PDF" && \
-        recollindex -i "$PDF" && echo "Done!"
-else 
-     echo "No title entered"
-fi
----
-
-Name it, for example, 'pdf-edit-title.sh', and make it executable 
-(`chmod a+x pdf-edit-title.sh`).
-
-Then create a file named 'pdf-edit-title.desktop' inside
-'/usr/share/applications'. The file name does not need to be the same as the
-script's, this is just to make things clearer:
-
----
-[Desktop Entry]
-Name=PDF Title Editor
-Comment=Small script based on exiftool used to edit a pdf document title
-Exec=/home/dockes/bin/pdf-edit-title.sh %F
-Type=Application
-MimeType=application/pdf;
----
-
-You're done ! Restart Recoll, perform a search and right-click on a PDF
-result: you should see an entry named _PDF Title Editor_ in the _Open
-With_ list. Click on it, and you will be able to edit the title.
-
-
--- a/website/faqsandhowtos/WhyIsMyFileNotIndexed.txt
+++ b/website/faqsandhowtos/WhyIsMyFileNotIndexed.txt
@ -1,100 +0,0 @@
-== Using the log file to investigate indexing issues
-
-All *Recoll* processes print trace messages. By default these go to the
-standard error output, and you may not ever see them (in the case, for
-example, of the *recoll* GUI started from the desktop interface). 
-
-There are a number of potential issues with indexing that may need
-investigation, such as: 
-
- A file can't be found by searching even if it appears that it should have
-  be indexed (this could happen because the file is not selected at all or
-  because a filter program crashes). 
- The indexing process gets stuck and never finishes.
- The indexing process ends up with an error.
- The indexing process seems to be using too much system capacity.
-
-The right way to approach these problems is to use the *recollindex*
-command line tool (instead of the *recoll* GUI), and to set up the
-trace log to provide information about what indexing is actually doing. 
-
-Trace log parameters can be set either from the GUI _Preferences->Indexing
-Configuration->Global Parameters_ panel, or by editing the configuration
-file '~/.recoll/recoll.conf'. You should set the following parameters: 
-
----
-loglevel = 6
-logfilename = stderr
-thrQSizes = -1 -1 -1
----
-
-We use _stderr_ instead of an actual file in order to capture direct filter
-messages (such as a *python* stack trace) along with normal
-*recollindex* messages. 
-
-The last line sets recollindex for single-threaded operation, which will
-make the log much more readable. 
-
-You should then check that no *recoll* or *recollindex* process is
-currently running, and kill any you find. 
-
-Then, if this is an issue about an identified file, try indexing it only:
-
----
-recollindex -i myunfindablefile.xxx > /tmp/myindexlog 2>&1
----
-
-If this is a general issue with indexing (process not finishing properly),
-just start it: 
-
----
-recollindex > /tmp/myindexlog 2>&1
----
-
-Usually, having a look at the trace will allow to see what is wrong (e.g.:
-a configuration issue or missing filter), and solve the problem.  
-
-In case of indexer misbehaviour (e.g. using too much memory, you should run
-_tail -f_ on the log to see what is going on. 
-
-If this is not enough, please
-link:https://opensourceprojects.eu/p/recoll1/tickets/new/[open a tracker
-issue] and attach or link to the log data, or just email me (jfd at
-recoll.org).  
-
-*recollindex* and *recollindex -i* usually have the same criteria to
-include a file or not (but see the _Path gotcha_ note below). It may
-happen that they behave differently, so it may sometimes be useful to run a
-full *recollindex* even for a specific file, but this will produce a
-big log file. 
-
-When you are done, it is  better to reset the verbosity to a reasonable
-level (e.g.: +2+ : just errors, +3+ : information, listing indexed files). 
-
-=== Note: the path gotcha
-
-*recollindex -i* will only index files under the directories defined by the
-+topdirs+ configuration variable (your home directory by
-default). Unfortunately, the test is done on the file path text, ignoring
-possible symbolic links. If you give a simple file name as a parameter to
-*recollindex -i* and there are symbolic links inside the +topdirs+
-entries, the comparison may fail. For example, if your home directory is
-'/home/me/' and '/home/' is a link to '/usr/home/', *recollindex -i
-somefilename* will actually try to index '/usr/home/somefilename/', and
-fail (because '/usr/home/me/' is not a subdirectory of '/home/me/'). This
-will manifest itself in the log by a message like the following.  
-
----
-:4:../index/fsindexer.cpp:149:FsIndexer::indexFiles: skipping [/usr/home/me/somefile] (ntd)
----
-
-If this happens, give a full path consistent with what is found in the
-configuration file (e.g.: _recollindex -i /home/me/somefile_). 
-
-=== File system occupation
-
-One of the possible reasons for failed indexing is a +maxfsoccup+
-parameter set too low. This is the value of file system occupation, not
-free space, where indexing will stop. It is set from the GUI indexing
-configuration or by editing 'recoll.conf'. A value of 0 implies no
-checking, but a very low, non-zero, value will just prevent indexing. 
--- a/website/faqsandhowtos/WikiIndex.txt
+++ b/website/faqsandhowtos/WikiIndex.txt
@ -1,65 +0,0 @@
-== Recoll Wiki file index
-link:ElinksWeb.html[Extending the Recoll Firefox visited web page indexing mechanism to other browsers]
-
-link:FaqsAndHowTos.html[Faqs and Howtos]
-
-link:FilterArch.html[Recoll input filters ]
-
-link:FilterRetrofit.html[Installing a filter for a new document type]
-
-link:FilteringOutZipArchiveMembers.html[Filtering out Zip archive members]
-
-link:GUIKeyboard.html[# Recoll GUI keyboard navigation]
-
-link:HandleCustomField.html[Generating a custom field and using it to sort results]
-
-link:Home.html[Welcome to the Recoll Wiki]
-
-link:HotRecoll.html[Recoll hotkey: starting / hiding recoll with a keyboard shortcut]
-
-link:IndexMailHeader.html[Indexing arbitrary mail headers ]
-
-link:IndexMozillaCalendari.html[Indexing Mozilla calendar data ]
-
-link:IndexOnAc.html[Laptops: automatically starting or stopping indexing according to AC power status]
-
-link:IndexOutlook.html[Indexing Outlook archives]
-
-link:IndexWebHistory.html[Indexing Web history with the Firefox extension ]
-
-link:MultipleIndexes.html[Creating and using multiple indexes]
-
-link:MuttAndRecoll.html[Interfacing Recoll and Mutt]
-
-link:NonAsciiFileNames.html[Unix and non-ASCII file names, a summary of issues]
-
-link:OpenHelperScript.html[Starting native applications ]
-
-link:PreventIndexingDir.html[Preventing indexing in a directory]
-
-link:ProblemSolvingData.html[Gathering useful data for asking help about or reporting a Recoll issue]
-
-link:QpdfviewHelperScript.html[Starting native applications ]
-
-link:QueryFromC.html[Querying Recoll from a C program]
-
-link:ReplaceCategories.html[Replacing the Category filter controls]
-
-link:ResultsThumbnails.html[Result list thumbnails and how to create them]
-
-link:SavingConfig.html[User configuration backup]
-
-link:UnityLens.html[Building and Installing the Ubuntu Unity Recoll Lens]
-
-link:UsingOpenWith.html[Using the Open With context menu in recoll 1.20 and newe]
-
-link:WhyIsMyFileNotIndexed.html[Using the log file to investigate indexing issues]
-
-link:XDGBase.html[XDG: Tidying Recoll data storage]
-
-link:ZDevCaseAndDiacritics1.html[Character case and diacritic marks (1), issues with stemming]
-
-link:ZDevCaseAndDiacritics2.html[Character case and diacritic marks (2), user interface]
-
-link:ZDevCaseAndDiacritics3.html[Character case and diacritic marks (3), implementation]
-
--- a/website/faqsandhowtos/XDGBase.txt
+++ b/website/faqsandhowtos/XDGBase.txt
@ -1,42 +0,0 @@
-== XDG: Tidying Recoll data storage ==
-
-The default storage structure of Recoll configuration and index data is
-quite at odds with what recommends the 
-link:http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html[XDG
-Base Directory Specification], the reason being that it predates said spec.
-
-By default, Recoll stores all its data in a single directory: '$HOME/.recoll'
-
-This is not going to change, because it would be quite disturbing for
-current users.
-
-However, the location of this directory can be modified using the
-+$RECOLL_CONFDIR+ environment variable.
-
-Furthermore all significant Recoll data categories can be moved away from
-the configuration directory (maybe to '$HOME/.cache'), by setting
-configuration variables:
-
-* _dbdir_ defines the location for storing the Xapian
-  index. This could be set to, e.g., '$HOME/.cache/recoll/xapiandb'. It is
-  quite recommended that 
-  this directory be dedicated to Xapian (don't store other things in
-  there).
-* _mboxcachedir_ defines the location for caching access speedup information
-  about mail folders in mbox format. e.g. '$HOME/.cache/recoll/mboxcache'
-* New in 1.22: you can use _aspellDictDir_ to define the storage
-  location for the aspell spelling approximation
-  dictionary. E.g. '$HOME/.cache/recoll'
-* _webcachedir_ may be used to define where the visited web pages
-  archive is stored. E.g. '$HOME/.cache/recoll/webcache'. This is only used
-  if you activate the Firefox plugin and web history indexing. You may
-  want to think a bit more about where to store it, because, contrary to
-  the above, this is not discardable data: your Recoll Web history goes
-  away if you delete it.
-
-If you use multiple Recoll configurations, each will have to be customized.
-
-Once these are put away, there are still a few modifyiable files in the
-configuration directory, for example the 'recoll.pid' and 'history'
-files, but these are small files. Moving 'recoll.pid' away would be a
-serious headache because it is used by scripts. 
--- a/website/faqsandhowtos/ZDevCaseAndDiacritics1.txt
+++ b/website/faqsandhowtos/ZDevCaseAndDiacritics1.txt
@ -1,143 +0,0 @@
-== Character case and diacritic marks (1), issues with stemming
-
-=== Case and diacritics in Recoll
-
-Recoll versions up to 1.17 almost fully ignore character case and diacritic
-marks. 
-
-All terms are converted to lower case and unaccented before they are
-written to the index. There are only two exceptions:
-
- * File paths (as used in _dir:_ clauses) are not converted. This might
-   be a bug or a feature, but the main reason is that we don't know how they
-   are encoded.
- * It is possible to specify that some characters will keep their diacritic
-   marks, because the entity formed by the character and the diacritic mark
-   is considered to be a different letter, not a modified one. This is
-   highly dependant on the language. For exemple, in Swedish, +å+ should
-   be preserved, not turned into +a+.
-
-As a necessary consequence, the same transformations are applied to search
-terms, and it is impossible to search for a specific capitalization of a
-word (+US+ is looked for as +us+), or a specific accented form
-(+café+ will be looked for as +cafe+).
-
-However, there are some cases where you would like to be more specific:
-
- * Searching for +US+ or +us+ should probably return different results.
- * Diacritics are seldom significant in English, but we can find a
-   few examples anyway: +sake+ and +saké+, +mate+ and +maté+. Of
-   course, there are many more cases in languages which use more diacritics.
-
-On the other hand, accents are often mistyped or forgotten (résumé, résume,
-resume?), and capitalization is most often unsignificant, so that it is
-very important to retain the capability to ignore accent and character
-case differences, and that the discrimination can be easily switched on or
-off for each search (or even for specific terms).
-
-This text and other pages which will follow will discuss issues in adding
-character case and diacritics sensitivity to Recoll, under the assumption
-that the main index will contain the raw source terms instead of
-case-folded and unaccented ones.
-
-The following will use the _unaccent_ neologism to mean _remove
-diacritic marks_ (and not only accents). 
-
-English examples are used when possible, but given the limited use of
-diacritics in English, some French will probably creep in.
-
-=== Diacritics and stemming
-
-Stemming is the process by which we extend a search to terms related by
-grammatical inflexion, for example singular/plural, verb tenses, etc. For
-example a search for +floor+ is normally expanded by Recoll to +floors,
-floored, flooring, ...+
-
-In practice Recoll has a separate data structure that has stemmed terms
-(stems) as keys pointing to a list of expansion terms 
-{{{floor -> (floor,floors,floorings,...)}}}
-
-Stemming should be applied to terms before they are stripped of
-diacritics. Accents may have a grammatical significance, and the accent may
-change how the term is stemmed. For example, in French the +âmes+ suffix
-generally marks a past conjugation but +ames+ does not. The standard
-Xapian French stemmer will turn +évitâmes+ (avoided) into an +évit+ stem,
-but +évitames+ will be turned into +évitam+ (stripping
-plural and feminine suffixes).
-
-When the search is set to ignore diacritics, this poses a specific problem:
-if the user enters the search term without accents (which is correct
-because the system is supposed to ignore them), there is no warranty that
-the term will be correctly expanded by stemming.
-
-The diacritic mismatch breaks the family relationship between the stem
-siblings, and this is independant of the type of index: it will happen with
-an index where diacritics are stripped just as with a raw one.
-
-The simpler case where diacritics in the original term only affects
-diacritics in the stem also necessitates specific processing, but it is
-easier to work around.
-
-Two examples illustrating these issues follow.
-
-==== The simple case: diacritics in the term only affect diacritics in the stem
-
-Let's imagine that the document set contains the term +éviter+
-(infinitive of +to avoid+), but not +évite+ (present). The only term in
-the actual index is then +éviter+.
-
-The user enters an unaccented +evite+, counting on the
-diacritics-insensitive search mode to deal with the accents. As +évite+
-is not present in the index, we have no way to guess that +evite+ is
-really +évite+.
-
-The stemmer will turn +evite+ into +evit+. There is no way that this
-can be related to +éviter+, and this legitimate result can't be found.
-
-There is a way around this: we can compute a separate
-stem expansion dictionary for unaccented terms. This dictionary, to be used
-with diacritic-unsensitive searches only, contains the relationship
-between +evit+ and +eviter+ (as +éviter+ is in the index). We can
-then relate +eviter+ and +éviter+ because they differ only by accents,
-and the search will find the document with +éviter+.
-
-==== The bad case: diacritics in the term change the stem beyond diacritics
-
-Some grammatically significant accents will cause unexpectedly missing
-search results when using a supposedly diacritics-insensitive search mode.
-
-Let's imagine that the document set contains the term +éviter+ 
-(infinitive of +to avoid+), but not +évitâmes+ (past). So the stemming
-expansion table has an entry for +évit+ -> +éviter+.
-
-If the user enters an unaccented +evitames+, she would expect to find the
-documents containing +éviter+ in the results, because the latter term is
-a stemming sibling of +évitâmes+ and the search is supposedly not
-influenced by diacritics, so that +evitames+ and +évitâmes+ should be
-equivalent. 
-
-However, our search is now in trouble, because +évitâmes+ is not in any
-document, so that there is no data in the index which would inform us about
-how to transform the input term into something that differs only by accents
-but would yield a correct input for the stemmer.
-
-If we try to feed the raw user input to the stemmer, it will propose 
-an +evitam+ stem, which will not work, because the stem that actually 
-exists is +évit+, and +evitam+ can not be related to +éviter+.
-
-The only palliative approach I can think of would be a spelling correction
-of the input, performed independantly of the actual index contents, which
-would notice that +évitames+ is not a French word and propose a change or an
-expansion to +évitâmes+, which would correctly stem to +évit+ and allow
-us to find +éviter+.
-
-This issue is not specific to Recoll or indeed to the fact that the index
-retains accent or not. As far as I can see, it is an intrinsic bad
-interaction between diacritics insensitivity and stemming.
-
-It is also interesting to note that this case becomes less probable when
-the data set becomes bigger, because more term inflexions will then be
-present in the index.
-
-We'll next think about an link:ZDevCaseAndDiacritics2.html[appropriate
-interface].
--- a/website/faqsandhowtos/ZDevCaseAndDiacritics2.txt
+++ b/website/faqsandhowtos/ZDevCaseAndDiacritics2.txt
@ -1,122 +0,0 @@
-== Character case and diacritic marks (2), user interface
-
-In a link:ZDevCaseAndDiacritics1.html[previous document], we discussed some
-of the problems which arise when mixing case/diacritics sensitivity and
-stemming.
-
-As of version 1.18, Recoll can create two types of indexes:
-* _Dumb_ indexes contain terms which are lowercased and stripped of
-  diacritics. Searches using such an index are naturally case- and
-  diacritics- insensitive: search terms are stripped before processing.
-* _Raw_ indexes contain terms which are just like they were found in the
-  source document. Searching such an index is naturally sensitive to case
-  and diacritics, and can be made insensitive by further processing.
-
-The following explains how users can control these Recoll features.
-
-=== Controlling the type of index we create: stripped or raw
-
-The kind of index that recoll creates is determined by:
-
- * A build-time *configure* switch: _--enable-stripchars_. If this is
-   set, the code for case and diacritics sensitivity is not compiled in and
-   recoll will work like the previous versions: unaccented and casefolded
-   index, no runtime options for case or diacritics sensitivity
-
- * An indexing configuration switch (in recoll.conf): if Recoll was built
-   with _--disable-stripchars_, this will provide a dynamic way to return
-   to the "traditional" index. The case and diacritics code will be present
-   but inactive. Normally, a recoll installation with this switch set
-   should behave exactly like one built with _--enable-stripchars_. When
-   using multiple indexes, this switch MUST be consistent between
-   indexes. There is no support whatsoever for mixing raw and dumb indexes.
-   The option is named _indexStripChars_, and it is not settable from the
-   GUI to avoid errors. This is something that would typically be set once
-   and for all for a given installation. We need to decide what the default
-   value will be for 1.18
-
- * A number of query time switches. Using these it is also possible to
-   perform a search insensitive to case and diacritics on a raw index. Note
-   however, that, given the complexity of the issues involved, I give no
-   guaranty at this time that this will yield exactly the same results as
-   searching a dumb index. Details about query time behaviour follow.
-
-
-=== Controlling stem, case and diacritics expansion: user query interface 
-
-Recoll versions up to 1.17 were insensitive to case and diacritics. We only
-needed to give the user a way to control stem expansion. This was done in
-three ways:
-
- * Globally, by setting a menu option.
- * Globally, by setting the stemming language value to empty.
- * On a term by term basis by Capitalizing the term, or, in query language
-   mode only, by using an 'l' clause modifier (_"term"l_).
-
-After switching to an unstripped index, capable of case and diacritic
-sensitivity, we need ways to control what processing is performed among:
-
- * Case expansion.
- * Diacritics expansion.
- * Stem expansion.
-
-The default mode will be compatible with the previous version, because
-this is is most generally what we want to do: ignore case and diacritics,
-expand stems.
-
-There are two easy approaches for controlling the parameters:
- * Global options set in the GUI menus or as *recollq* command line
-   switches. 
- * Per-clause options set by modifiers in the query language.
-
-We would like, however to let the user entry automatically override the
-defaults in a sensible way. For example:
-
- * If a term is entered with diacritics, diacritic sensitivity is turned on
-   (for this term only).
- * If a term is entered with upper-case characters, case sensitivity is
-   turned on. In this case, we turn off stem expansion, because it makes
-   really no sense with case sensitivity.
-
-With this method we are stuck with 3 problems (only if the global mode is
-set to insensitive, and we're not using the query language):
-
- * Turning off stemming without turning on case sensitivity.
- * Searching for an all lower-case term in case-sensitive mode.
- * Searching for a term without diacritics in diacritic-sensitive mode.
-
-The two latter issues are relatively marginal and can be worked around easily
-by switching to query language mode or using negative clauses in the
-advanced search. 
-
-However, we need to be able to turn stemming off while remaining
-insensitive to case, and we need to stay reasonably compatible with the
-previous versions. This means that a term which has a capital first letter
-but is otherwise lowercase will turn stemming off, but not case sensitivity
-on. 
-
-So we're left with how to search for such a term in a case-sensitive way,
-and for this, you'll have to use global options or the query language.
-
-The modified method is:
-
- * If a term is entered with diacritics, diacritic sensitivity is turned on
-   (for this term only).
- * If the first letter in a term is upper-case and the rest is lower-case,
-   we turn stem expansion off, but we do not become case-sensitive
- * If any letter in a term except the first is upper-case, case sensitivity
-   is turned on. Stem expansion is also turned-off (even if the first
-   letter is lower-case), because it makes really no sense with case
-   sensitivity.
- * To search for an all lower-case or capitalized term in a case-sensitive
-   way, use the query language: "Capitalized"C, "lowercase"C
- * Use the query language and the "D" modifier to turn on diacritics
-   sensitivity.
-
-It can be noted that some combinations of choices do not make sense and
-they are not allowed by Recoll: for example, diacritics or case sensitivity
-do not make sense with stem expansion (which cannot preserve diacritics in
-any meaningful general way).
-
-The [[ZDevCaseAndDiacritics3.wiki|next page]] describes the actual
-implementation in Recoll 1.18.
--- a/website/faqsandhowtos/ZDevCaseAndDiacritics3.txt
+++ b/website/faqsandhowtos/ZDevCaseAndDiacritics3.txt
@ -1,67 +0,0 @@
-== Character case and diacritic marks (3), implementation
-
-In previous pages, we discussed link:ZDevCaseAndDiacritics1.html[diacritics
-and stemming], and an link:ZDevCaseAndDiacritics2.html[appropriate
-interface] for switchable search sensitivity to diacritics and character
-case.
-
-So you are in this mood again and you don't want to type accents (maybe you're
-stuck with a QWERTY American english keyboard), or conversely you're
-want to resume looking for your résumé, and you've told Recoll as much,
-using the appropriate interface. What happens then ?
-
-The second case is easy if the index is raw, and mostly impossible if it is
-stripped. So we'll concentrate on the first case: how to achieve case and
-diacritics insensitivity on a raw index ?
-
-Recoll uses three expansion tables:
-
-* The first table has stripped and lowercased terms as keys and raw terms as
-  data: +mate -> (mate, maté, MATE,...)+.
-
-* The second table has lowercased stems as keys and original lowercase terms
-  as data (when using multiple languages, there are several such tables):
-  +évit -> (éviter, évite, évitâmes, ...)+.
-
-* The third table has stripped and lowercased stems as keys and stripped
-  lowercased terms as data:
-  +evit -> (eviter, evite, evitons)+ and +evitam -> (evitames, ...)+
-
-The first table can be used for full case and diacritics expansion or for
-only one of those, by post-filtering the results of full expansion (e.g. if
-we only want diacritics expansion, we filter by stripping diacritics from
-each result term and check that it's identical to the input). For example
-if we have +mate -> (mate, maté, MATE, MATÉ)+ in the table and want to
-only perform case expansion for an input of +maté+, we apply case folding
-to the initial output and keep only +maté+, as +mate+ differs from the
-input.
-
-We only perform stemming expansion when case and diacritics sensitivity is
-off. It is performed using the second and third tables, both on the
-lowercased and lowercased/stripped output of the first step, and each term
-in the output stemming is expanded again for case (using the first table).
-
-A full example of the expansion occurring during an insensitive search 
-for +resume+ using French stemming on a mixed English/French index
-follows. An important thing to remember is that the result of each
-expansion is a function of the terms actually present in the index, not
-some arbitrary computation (and so, of course, many of the possible but
-absent variations are missing).
-
-# The case and diacritics expansion of +resume+ yields +RESUME Resume
-  Résumé resumé résume résumé resume+ 
-
-# The Stem expansion input list (lower-cased) is:
- +resume resumé résume résumé+, and the output is:
- +resum resume resumenes resumer resumes resumé resumée résum résumait
- résumant résume résumer résumerai résumerait résumes résumez résumé résumée
- résumées résumés+ 
-
-# Each of the above terms is then fed to case and diacritics expansion (first
- table), for the final output:
- +resume résumé Résumé résumer résume Resume résumés RESUME resumes
- resumer résumant resúmenes resumé résumait résumes résumée resumee
- résumerait Résumez résumerai RÉSUMÉES Resumée Resumes résumées+.
-
-A Xapian OR query is finally constructed from the expanded term list.
-
--- a/website/faqsandhowtos/faqsindex.txt
+++ b/website/faqsandhowtos/faqsindex.txt
@ -1,67 +0,0 @@
-== Recoll Faqs and Howtos file index
-link:ElinksWeb.html[Extending the Recoll Firefox visited web page indexing mechanism to other browsers]
-
-link:FilterArch.html[Recoll input handlers]
-
-link:FilterRetrofit.html[Installing a filter for a new document type]
-
-link:FilteringOutZipArchiveMembers.html[Filtering out Zip archive members]
-
-link:GUIKeyboard.html[Recoll GUI keyboard navigation]
-
-link:HandleCustomField.html[Generating a custom field and using it to sort results]
-
-link:Home.html[Welcome to the Recoll Faqs and Recipees]
-
-link:HotRecoll.html[Recoll hotkey: starting / hiding recoll with a keyboard shortcut]
-
-link:IndexMailHeader.html[Indexing arbitrary mail headers]
-
-link:IndexMozillaCalendari.html[Indexing Mozilla calendar data]
-
-link:IndexOnAc.html[Laptops: starting or stopping indexing according to AC power status]
-
-link:IndexOutlook.html[Indexing Outlook archives]
-
-link:IndexWebHistory.html[Indexing Web history with the Firefox extension ]
-
-link:MultipleIndexes.html[Creating and using multiple indexes]
-
-link:MuttAndRecoll.html[Interfacing Recoll and Mutt]
-
-link:NonAsciiFileNames.html[Unix and non-ASCII file names, a summary of issues]
-
-link:OpenHelperScript.html[Starting native applications]
-
-link:PreventIndexingDir.html[Preventing indexing in a directory]
-
-link:ProblemSolvingData.html[Gathering useful data for asking help about or reporting a Recoll issue]
-
-link:QpdfviewHelperScript.html[Starting native applications ]
-
-link:QueryFromC.html[Querying Recoll from a C program]
-
-link:ReplaceCategories.html[Replacing the Category filter controls]
-
-link:ResultsThumbnails.html[Result list thumbnails and how to create them]
-
-link:SavingConfig.html[User configuration backup]
-
-link:UnityLens.html[Building and Installing the Ubuntu Unity Recoll Lens]
-
-link:UsingOpenWith.html[Using the _Open With_ context menu in recoll 1.20 and newer]
-
-link:WhyIsMyFileNotIndexed.html[Using the log file to investigate indexing issues]
-
-link:WikiIndex.html[Recoll Wiki file index]
-
-link:XDGBase.html[XDG: Tidying Recoll data storage]
-
-link:ZDevCaseAndDiacritics1.html[Character case and diacritic marks (1), issues with stemming]
-
-link:ZDevCaseAndDiacritics2.html[Character case and diacritic marks (2), user interface]
-
-link:ZDevCaseAndDiacritics3.html[Character case and diacritic marks (3), implementation]
-
-link:index.html[Faqs and Howtos]
-
--- a/website/faqsandhowtos/index.txt
+++ b/website/faqsandhowtos/index.txt
@ -1,41 +0,0 @@
-== Faqs and Howtos
-
-link:..[Back to recoll.org top page]
-
-link:faqsindex.html[Full file index]
-
-=== Indexing
-* link:WhyIsMyFileNotIndexed.html[Why is this file not indexed ? Investigating indexing issues]
-* link:PreventIndexingDir.html[Preventing the indexing of a directory]
-* link:IndexOnAc.html[Starting/stopping the indexer depending on power/battery status]
-* link:IndexMozillaCalendari.html[Indexing Mozilla Sunbird / Lightning calendar data]
-* link:MultipleIndexes.html[Creating and using multiple indexes]
-* link:IndexWebHistory.html[Indexing Web history with the Firefox browser extension]
-* link:ElinksWeb.html[Extending the Web queue mechanism to other browsers and general WEB indexing]
-* link:IndexMailHeader.html[Indexing arbitrary mail headers]
-* link:IndexOutlook.html[Indexing Outlook archives]
-* link:HandleCustomField.html[Generating a custom field and using it to sort results]
-* link:http://www.recoll.org/recoll_XMP/index.html.html[An example of filter/field customisation, using XMP metadata with PDFs]
-* link:FilteringOutZipArchiveMembers.html[Filtering out Zip archive members]
-
-=== Searching
-* link:GUIKeyboard.html[Recoll GUI keyboard navigation]
-* link:HotRecoll.html[On the desktop: using a keyboard shortcut for starting/hiding recoll]
-* link:OpenHelperScript.html[Handling issues for starting native apps, esp. email clients - getting Thunderbird to open message files]
-* link:QpdfviewHelperScript.html[Another example open helper script - using qpdfview to open pdf and postscript files, with support for page and search options]
-* link:UsingOpenWith.html[Using the new Open With menu in recoll 1.20 with a custom
-  app]
-* link:ReplaceCategories.html[Replacing the document category filters]
-* link:ResultsThumbnails.html[Result list thumbnails and how to create them]
-* link:MuttAndRecoll.html[Interfacing Recoll and Mutt]
-* link:QueryFromC.html[Querying from a C program]
-
-=== Administration and miscellaneous
-* link:http://www.recoll.org/pages/recoll-webui-install-wsgi.html.html[Installation of the Recoll WebUI with Apache]
-* link:FilterRetrofit.wiki.html[Installing a filter for a new document type]
-* link:UnityLens.html[Building and Installing the Ubuntu Unity Recoll Lens]
-* link:SavingConfig.wiki.html[Recoll configuration backup]
-* link:XDGBase.wiki.html[Tidying Recoll data storage]
-* link:ProblemSolvingData.html[Collecting diagnostic information]
-* link:NonAsciiFileNames.html[Unix and non-ascii file names]
-* link:FilterArch.html[Recoll filters]
--- a/website/faqsandhowtos/makeindex.sh
+++ b/website/faqsandhowtos/makeindex.sh
@ -1,20 +0,0 @@
-#!/bin/sh
-WIDX=faqsindex.txt
-
-echo "== Recoll Faqs and Howtos file index" > $WIDX
-for f in *.txt; do
- if test "$f" = $WIDX ; then continue; fi
- h="`basename $f .txt`.html"
- title=`head -1 "$f" | sed -e 's/=//g' -e 's/^ *//' -e 's/ *$//' -e 's/
//g'`
- echo 'link:'$h'['$title']' >> $WIDX
- echo >> $WIDX
-done
-
-exit 0
-# Check and display what files are in the index but not in the contents table:
-
-grep \| FaqsAndHowTos.txt | awk -F\| '{print $1}'  | sed -e 's/\* \[\[//' -e 's/.wiki//' |sort > ctfiles.tmp
-grep '\[\[' WikiIndex.txt | awk -F\| '{print $1}'  | sed -e 's/\[\[//' -e 's/.wiki//' -e 's/.md//' | sort > ixfiles.tmp
-echo 'diff ContentFiles  IndexFiles:'
-diff ctfiles.tmp ixfiles.tmp
-rm ctfiles.tmp ixfiles.tmp
--- a/website/favicon.ico
+++ b/website/favicon.ico
--- a/website/features.html
+++ b/website/features.html
@ -1,490 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-
-<html>
-  <head>
-    <title>RECOLL: a personal text search system for
-    Unix/Linux</title>
-    <meta name="generator" content="HTML Tidy, see www.w3.org">
-    <meta name="Author" content="Jean-Francois Dockes">
-    <meta name="Description" content=
-    "recoll is a simple full-text search system for unix and linux based on the powerful and mature xapian engine">
-    <meta name="Keywords" content=
-    "full text search,fulltext,desktop search,unix,linux,solaris,open source,free">
-    <meta http-equiv="Content-language" content="en">
-    <meta http-equiv="content-type" content=
-    "text/html; charset=iso-8859-1">
-    <meta name="robots" content="All,Index,Follow">
-    <link type="text/css" rel="stylesheet" href="styles/style.css">
-  </head>
-
-  <body>
-    <div class="rightlinks">
-      <ul>
-        <li><a href="index.html">Home</a></li>
-
-        <li><a href="pics/index.html">Screenshots</a></li>
-
-        <li><a href="download.html">Downloads</a></li>
-
-        <li><a href="doc.html">Documentation</a></li>
-
-        <li><a href="support.html">Support</a></li>
-
-        <li><a href="devel.html">Development</a></li>
-      </ul>
-    </div>
-
-    <div class="content">
-      <h1>Recoll features</h1>
-
-      <div class="intrapage">
-	<table width=100%>
-	  <tbody>
-	    <tr>
-	      <td><a href="#systems">Supported systems</a></td>
-              <td><a href="#doctypes">Document types</a></td>
-	      <td><a href="#other">Other features</a></td>
-	      <td><a href="#integration">Desktop and web integration</a></td>
-	      <td><a href="#stemming">Stemming</a></td>
-	    </tr>
-	  </tbody>
-	</table>
-       </div>
-
-      <h2><a name="general">General features</a></h2>
-      <ul>
-        <li>Easy installation, few dependancies. No database daemon,
-	  web server, desktop environment or exotic language necessary.</li>
-	<li>Will run on most Unix-based <a href="features.html#systems">
-            systems</a>, and on MS-Windows too.</li> 
-        <li>Qt 4 GUI, plus command line, Unity Lens, KIO and krunner
-          interfaces.</li>
-
-        <li>Searches most common 
-	  <a href="features.html#doctypes">document types</a>, emails and
-	    their attachments. Transparently handles decompression
-	    (gzip, bzip2).</li> 
-	
-        <li>Powerful query facilities, with boolean searches,
-	  phrases, proximity, wildcards, filter on file types and directory
-	  tree.</li>
-
-        <li>Multi-language and multi-character set with Unicode based
-	  internals.</li> 
-
-	<li>Extensive documentation, with a
-	  complete <a href="usermanual/usermanual.html">user
-	    manual</a> and manual pages for each command.</li>
-
-      </ul>
-
-      <h2><a name="systems">Supported systems</a></h2>
-
-      <p><span class="application">Recoll</span> has been compiled and
-      tested on Linux, MS-Windows 7-10, MacOS X and Solaris (initial
-      versions Redhat 7, Fedora Core 5, Suse 10, Gentoo, Debian 3.1,
-      Solaris 8). It should compile and run on all subsequent releases
-      of these systems and probably a few others too.</p>
-
-      <p>Qt versions from 4.7 and later</p>
-
-      <h2><a name="doctypes">Document types</a></h2>
-
-      <p><span class="application">Recoll</span> can index many document
-        types (along with their compressed versions). Some types are
-        handled internally (no external application needed). Other types
-        need a separate application to be installed to extract the
-        text. Types that only need very common utilities
-        (awk/sed/groff/Python etc.)  are listed in the native section.</p>
-
-      <p>The MS-Windows installer includes the supporting application,
-        the only additional package you will need is the Python language
-        installation.</p>
-
-      <p>Many formats are processed
-        by <span class="application">Python</span> scripts. The Python
-        dependency will not always be mentionned. In general, Recoll
-        expects Python 2.x to be available (many, but not all, scripts
-        are compatible with Python 3). Formats which are processed
-        using <span class="application">Python</span> and its standard
-        library are listed in the <i>native</i> section.</p>
-        
-      <h4>File types indexed natively</h4>
-
-      <ul>
-        <li><span class="application">text</span>.</li>
-        <li><span class="application">html</span>.</li>
-        <li><span class="application">maildir</span>,
-          <span class="application">mh</span>, and 
-          <span class="application">mailbox</span> (
-          <span class="application">Mozilla</span>, 
-          <span class="application">Thunderbird</span> and 
-          <span class="application">Evolution</span> mail ok).
-          <em><b>Evolution note</b>: be sure to remove <tt>.cache</tt> from
-            the <tt>skippedNames</tt> list in the GUI <tt>Indexing
-              preferences/Local Parameters/</tt> pane if you want to
-              index local copies of Imap mail.</em>
-        </li>
-
-        <li><span class="application">gaim</span> and 
-          <span class="application">purple</span> log files.</li>
-
-        <li><span class="application">Scribus</span> files.</li>
-
-        <li><span class="application">Man pages</span> (needs
-          <span class="application">groff</span>).</li>
-
-        <li><span class="application">Dia</span> diagrams.</li>
-        <li><span class="application">Excel</span>
-          and <span class="application">Powerpoint</span>
-          for <span class="application">Recoll</span> versions 1.19.12
-          and later.</li>
-
-        <li><span class="application">Tar</span> archives. Tar file
-        indexing is disabled by default (because tar archives don't
-        typically contain the kind of documents that people search
-        for), you will need to enable it explicitely, like with the
-        following in your
-          <span class="filename">$HOME/.recoll/mimeconf</span> file:
-          <pre>
-[index]
-application/x-tar = execm rcltar
-</pre>
-        </li> 
-
-        <li><span class="application">Zip</span> archives.</li>
-        <li><span class="application">Konqueror webarchive</span>
-          format with Python (uses the <tt>tarfile</tt> standard
-          library module).</li>
-
-        <li><span class="application">Mimehtml web archive
-            format</span> (support based on the mail 
-          filter, which introduces some mild weirdness, but still
-          usable).</li>
-      </ul>
-
-
-
-      <h4>File types indexed with external helpers</h4>
-
-      <p>Many document types need the <span class="command">iconv</span>
-      command in addition to the applications specifically listed.</p>
-
-      <h5>The XML ones</h5>
-
-      <p>The following types need <span class="command">
-          xsltproc</span> from the <b>libxslt</b> package for recoll
-        versions before 1.22, and in addition, python-libxslt1 and
-        python-libxml2 for 1.22 and newer.
-        Quite a few also need <span class="command">unzip</span>:</p>
-
-      <ul>
-        <li><span class="application">Abiword</span> files.</li>
-
-        <li><span class="application">Fb2</span> ebooks.</li>
-
-        <li><span class="application">Kword</span> files.</li>
-
-        <li><span class="application">Microsoft Office Open XML</span>
-        files.</li>
-
-        <li><span class="application">OpenOffice</span> files.</li>
-
-        <li><span class="application">SVG</span> files.</li>
-        <li><span class="application">Gnumeric</span> files.</li>
-        <li><span class="application">Okular</span> annotations files.</li>
-        
-      </ul>
-
-      <h5>Other formats</h5>
-
-      <p>The following need miscellaneous helper programs to decode
-        the internal formats.</p>
-
-      <ul>
-        <li><span class="application">pdf</span> with the <span class=
-        "command">pdftotext</span> command, which comes with 
-          <a href="http://poppler.freedesktop.org/">poppler</a>,
-          (the package name is quite often <tt>poppler-utils</tt>). <br/>
-          Note: the older <span class="command">pdftotext</span> command
-            which comes with <span class="application">xpdf</span> is
-            not compatible with <span class="application">
-              Recoll</span><br/>
-
-          <em>New in 1.21</em>: if the <span class="application">
-            tesseract</span> OCR application, and the 
-          <span class="command">pdftoppm</span> command are available
-          on the system, the <span class="command">rclpdf</span>
-          filter has the capability to run OCR. See the comments at
-          the top of <span class="command">rclpdf</span> (usually
-          found
-          in <span class="filename">/usr/share/recoll/filters</span>)
-          for how to enable this and configuration details.<br/>
-          <em>Opening PDFs at the right page</em>: the default
-          configuration uses <span class="command">evince</span>,
-          which has options for direct page access and pre-setting the
-          search strings (hits will be highlighted). There is an
-          example line in the default mimeview for doing the same
-          thing with <span class="command">qpdfview</span>
-          (<span class="literal">qpdfview --search %s %f#%p</span>).
-          Okular does not have a search string option (but it does
-          have a page number one). 
-        </li> 
-
-        <li><span class="application">msword</span> with <a href=
-        "http://www.winfield.demon.nl/">antiword</a>.  It is also useful to
-        have <a href="http://wvware.sourceforge.net/">wvWare</a> installed
-        as it may be be used as a fallback for some files which antiword
-        does not handle.</li>
-
-        <li><span class="application">Wordperfect</span> with the 
-         <span class="command">wpd2html</span> command from <a href=
-        "http://libwpd.sourceforge.net">libwpd</a>. On some distributions,
-        the command may come with a package named <span
-        class="literal">libwpd-tools</span> or such, not the base <a
-        span="literal">libwpd</a> package.</li>
-
-        <li><span class="application">Lyx</span> files (needs 
-          <span class="application">Lyx</span> to be installed).</li>
-
-        <li><span class="application">Powerpoint</span> and <span
-        class="application">Excel</span> with the <a href=
-        "http://vitus.wagner.pp.ru/software/catdoc/">catdoc</a>
-        utilities up to recoll 1.19.12. Recoll 1.19.12 and later use
-        internal Python filters for Excel and Powerpoint, and catdoc
-        is not needed at all (catdoc did not work on many semi-recent
-        Excel and Powerpoint files).</li>
-
-        <li><span class="application">CHM (Microsoft help)</span> files
-          with <span class="command">Python, 
-            <a href="http://gnochm.sourceforge.net/pychm.html">pychm</a>
-          and <a href="http://www.jedrea.com/chmlib/">chmlib</a></span>.</li>
-
-        <li><span class="application">GNU info</span> files
-        with <span class="command">Python</span> and the 
-        <span class="command">info</span> command.</li>
-
-        <li><span class="application">EPUB</span> files
-          with <span class="command">Python</span> and this
-          <a href="http://pypi.python.org/pypi/epub/">Python epub</a>
-            decoding module, which is packaged on Fedora, but not Debian.</li>
-        
-        <li><span class="application">Rar</span> archives (needs <span
-        class="command">Python</span>), the
-        <a href="http://pypi.python.org/pypi/rarfile/">rarfile</a> Python
-        module and the <a
-        href="http://www.rarlab.com/rar_add.htm">unrar</a>
-        utility. The Python module is packaged by Fedora, not by Debian.</li>
-
-        <li><span class="application">7zip</span> archives (needs 
-          <span class="command">Python</span> and  
-          the <a href="https://pypi.python.org/pypi/pylzma">pylzma
-            module</a>). This is a recent addition, and you need to
-            download the filter from
-          the <a href="filters/filters.html">filters pages</a> for
-          all Recoll versions prior to 1.21.</li>
-
-        <li><span class="application">iCalendar</span>(.ics) files
-        (needs <span class="command">Python, <a href=
-        "http://pypi.python.org/pypi/icalendar/2.1">icalendar</a></span>).</li>
-
-        <li><span class="application">Mozilla calendar data</span> See
-        <a href="faqsandhowtos/IndexMozillaCalendari.html">
-        the Howto</a> about this.</li>
-
-        <li><span class="application">postscript</span> with <a href=
-        "http://www.gnu.org/software/ghostscript/ghostscript.html">
-            ghostscript</a> and <a href=
-        "http://www.cs.wisc.edu/~ghost/doc/pstotext.htm">pstotext</a>.
-        Pstotext 1.9 has a serious issue with special characters in
-        file names, and you should either use the version packaged for
-        your system which is probably patched, or apply the Debian
-        patch which is stored <a href=
-        "files/pstotext-1.9_4-debian.patch">here</a> for
-        convenience. See http://packages.debian.org/squeeze/pstotext
-        and http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=356988
-        for references/explanations.
-          <blockquote>
-            To make things a bit easier, I also
-            store <a href="files/pstotext-1.9-patched.tar.gz">an
-            already patched version</a>. I added an
-            install target to the Makefile... This installs to
-            /usr/local, use <i>make install PREFIX=/usr</i> to
-            change. So all you need is:
-            <pre>
-              tar xvzf pstotext-1.9-patched.tar.gz
-              cd pstotext-1.9-patched
-              make
-              make install
-            </pre>
-          </blockquote>
-        </li>
-
-
-        <li><span class="application">RTF</span> files with 
-          <a href="http://www.gnu.org/software/unrtf/unrtf.html">
-            unrtf</a>. Please note that up to version 0.21.3, 
-          <span class="command">unrtf</span> mostly does not work with
-          non western-european character sets. Many serious problems
-          (crashes with serious security implications and infinite
-          loops) were fixed in unrtf 0.21.8, so you really want to use
-          this or a newer release. Building Unrtf from source is quick
-          and easy.</li>
-
-        <li><span class="application">TeX</span> with <span class=
-        "command">untex</span>. If there is no untex package for
-        your distribution, <a href="untex/untex-1.3.jf.tar.gz">a
-        source package is stored on this site</a> (as untex has no
-        obvious home). Will also work with <a href=
-        "http://www.cs.purdue.edu/homes/trinkle/detex/">detex</a>
-        if this is installed.</li>
-
-        <li><span class="application">dvi</span> with <a href=
-        "http://www.radicaleye.com/dvips.html">dvips</a>.</li>
-
-        <li><span class="application">djvu</span> with <a href=
-        "http://djvu.sourceforge.net">DjVuLibre</a>.</li>
-
-        <li><span class="application">Audio file tags</span>.
-          Recoll releases 1.14 and later use a Python filter based
-          on <a href="http://code.google.com/p/mutagen/">mutagen</a>
-          for all audio types.</li>
-
-        <li><span class="application">Image file tags</span> with <a href=
-        "http://www.sno.phy.queensu.ca/~phil/exiftool/">exiftool</a>.
-        This is a perl program, so you also need perl on the
-        system. This works with about any possible image file and
-        tag format (jpg, png, tiff, gif etc.).</li>
-
-        <li><span class="application">Midi karaoke files</span> with
-          Python, the
-          <a href="http://pypi.python.org/pypi/midi/0.2.1">
-            midi module</a>, and some help
-          from <a href="http://chardet.feedparser.org/">chardet</a>. There
-          is probably a <tt>python-chardet</tt> package for your distribution,
-          but you will quite probably need to build the midi
-          package. This is easy but see the <a href="helpernotes.html#midi">
-            notes here</a>.
-        </li> 
-
-        <li><span class="application">MediaWiki dump files</span>:
-          Thomas Levine has written a handler for these, you will find
-          it here:
-          <a href="https://bitbucket.org/tlevine/recoll/src/0127be78bffdd8a294067966a3ba7b2663d7b0cf/src/filters/rclmwdump?at=default&fileviewer=file-view-default">rclmwdump</a>.</li>
-        
-      </ul>
-
-      <h2><a name="other">Other features</a></h2>
-
-      <ul>
-        <li>Can use a Firefox extension to index visited Web pages
-          history. See <a href="faqsandhowtos/IndexWebHistory.html">the
-            Howto</a> for more detail.</li>
-
-        <li>Processes all email attachments, and more generally any
-         realistic level of container imbrication (the "msword attachment to
-         a message inside a mailbox in a zip" thingy...) .</li>
-
-        <li>Multiple selectable databases.</li>
-
-        <li>Powerful query facilities, with boolean searches,
-        phrases, filter on file types and directory tree.</li>
-
-        <li>Xesam-compatible query language.</li>
-
-        <li>Wildcard searches (with a specific and faster function
-        for file names).</li>
-
-        <li>Support for multiple charsets. Internal processing and
-        storage uses Unicode UTF-8.</li>
-
-        <li><a href="#Stemming">Stemming</a> performed at query
-        time (can switch stemming language after indexing).</li>
-
-        <li>Easy installation. No database daemon, web server or
-        exotic language necessary.</li>
-
-        <li>An indexer which runs either as a batch, cron'able
-          program, or as a real-time indexing daemon, depending on
-          preference.</li>
-      </ul>
-
-      <h2><a name="integration">Desktop and web integration</a></h2>
-
-      <p>The <span class="application">Recoll</span> GUI has many
-	features that help to specify an efficient search and to manage
-	the results. However it maybe sometimes preferable to use a
-	simpler tool with a better integration with your desktop
-	interfaces. Several solutions exist:</p>
-      <ul>
-	<li>The <span class="application">Recoll</span> KIO module
-	  allows starting queries and viewing results from the
-	  Konqueror browser or KDE applications <em>Open</em> dialogs.</li>
-	<li>The <a href="http://kde-apps.org">recollrunner</a> krunner
-	  module allows integrating Recoll search results into a
-	  krunner query.</li>
-        <li>The Ubuntu Unity Recoll Lens (or Scope for newer Unity
-          versions) lets you access Recoll search
-          from the Unity Dash. More
-          slightly obsolete information <a href="faqsandhowtos/UnityLens.html">
-            here</a>. </li>
-        <li>The <a href="http://github.com/medoc92/recoll-webui">Recoll
-            Web UI</a> lets you query a Recoll index from a web browser</li>
-      </ul>
-      <p>Recoll also has 
-	<a href="usermanual/usermanual.html#RCL.PROGRAM.PYTHONAPI">
-	  <span class="application">Python</span></a> and
-	<span class="application">PHP</span> modules which can allow
-	easy integration with web or other applications.</p>
-
-      <h2><a name="stemming"></a>Stemming</h2>
-
-      <p>Stemming is a process which transforms inflected words
-      into their most basic form. For example, <i>flooring</i>,
-      <i>floors</i>, <i>floored</i> would probably all be
-      transformed to <i>floor</i> by a stemmer for the English
-      language.</p>
-
-      <p>In many search engines, the stemming process occurs during
-      indexing. The index will only contain the stemmed form of
-      words, with exceptions for terms which are detected as being
-      probably proper nouns (ie: capitalized). At query time, the
-      terms entered by the user are stemmed, then matched against
-      the index.</p>
-
-      <p>This process results into a smaller index, but it has the
-      grave inconvenient of irrevocably losing information during
-      indexing.</p>
-
-      <p>Recoll works in a different way. No stemming is performed
-      at query time, so that all information gets into the index.
-      The resulting index is bigger, but most people probably don't
-      care much about this nowadays, because they have a 100Gb disk
-      95% full of binary data <em>which does not get
-      indexed</em>.</p>
-
-      <p>At the end of an indexing pass, Recoll builds one or
-      several stemming dictionaries, where all word stems are
-      listed in correspondence to the list of their
-      derivatives.</p>
-
-      <p>At query time, by default, user-entered terms are stemmed,
-      then matched against the stem database, and the query is
-      expanded to include all derivatives. This will yield search
-      results analogous to those obtained by a classical engine.
-      The benefits of this approach is that stem expansion can be
-      controlled instantly at query time in several ways:</p>
-
-      <ul>
-        <li>It can be selectively turned-off for any query term by
-        capitalizing it (<i>Floor</i>).</li>
-
-        <li>The stemming language (ie: english, french...) can be
-        selected (this supposes that several stemming databases
-        have been built, which can be configured as part of the
-        indexing, or done later, in a reasonably fast way).</li>
-      </ul>
-    </div>
-  </body>
-</html>
-
--- a/website/filters/filters.html
+++ b/website/filters/filters.html
@ -1,242 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-
-<html>
-  <head>
-    <title>Recoll updated filters</title>
-
-    <meta name="generator" content="HTML Tidy, see www.w3.org">
-    <meta name="Author" content="Jean-Francois Dockes">
-    <meta name="Description" content=
-    "recoll is a simple full-text search system for unix and linux
-    based on the powerful and mature xapian engine">
-    <meta name="Keywords" content=
-    "full text search, desktop search, unix, linux">
-    <meta http-equiv="Content-language" content="en">
-    <meta http-equiv="content-type" content="text/html; charset=iso-8859-1">
-    <meta name="robots" content="All,Index,Follow">
-
-    <link type="text/css" rel="stylesheet" href="../styles/style.css">
-  </head>
-
-  <body>
-    
-    <div class="rightlinks">
-      <ul>
-	<li><a href="../index.html">Home</a></li>
-	<li><a href="../download.html">Downloads</a></li>
-	<li><a href="../usermanual/index.html">User manual</a></li>
-	<li><a href="../usermanual/RCL.INSTALL.html">Installation</a></li>
-	<li><a href="../index.html#support">Support</a></li>
-      </ul>
-    </div>
-    
-    <div class="content">
-
-      <h1>Updated filters for Recoll</h1>
-      
-      <p>The following describe new and updated filters, which will be
-        part of the next release, but can be installed on an older
-        release if you need them.</p>
-
-      <p>For updated filters, you just need to copy the script to the
-        filters directory which may be typically either <span
-      class="filename">/usr/share/recoll/filters</span>, or <span
-      class="filename">/usr/local/share/recoll/filters</span>. Please check
-      that the script is executable after copying it, and make it so if
-      needed (chmod a+x <i>scriptname</i>)</p>
-
-      <p>For new filters, you'll need to copy the script file as
-	above, possibly install the supporting application, and usually
-	edit the 
-	<span class="filename">mimemap</span>, 
-	<span class="filename">mimeview</span> and
-	<span class="filename">mimeconf</span> files, either in the
-	shared directory
-	(<span class="filename">
-	  /usr[/local]/share/recoll/examples</span>), or 
-	in your personal configuration directory 
-	(<span class="filename">$HOME/.recoll</span> or 
-	<span class="filename">$RECOLL_CONFDIR</span>).</p>
-      
-      <p>Alternatively, you can replace your system files with
-	these updated and complete versions:
-	<a href="mimemap">mimemap</a>
-	<a href="mimeconf">mimeconf</a> 
-	<a href="mimeview">mimeview</a>.</p>
-
-      <p>There is a slightly more detailed description of the filter
-      installation procedure on the 
-       <a href="http://www.recoll.org/faqsandhowtos/FilterRetrofit.html">
-      Recoll Wiki</a>.</p>
-
-      <p>The following entries are in reverse chronologic order. Each
-        lists the latest Recoll release on which the update makes sense
-        (newer releases have an up to date version of the filter).</p>
-      
-      <p>However, if you are running a Recoll version older than 1.17,
-        you should really upgrade.</p>
-
-      <h2>PDF documents</h2>
-      <p>Fixded <a href="rclpdf">rclpdf</a> filter, compatible with
-        newer poppler pdftotext versions, which now properly escape
-        text inside the html <head> section (but not the body,
-        curiously).</p>
-
-      <h2>Scribus documents</h2>
-      <p>An improved <a href="rclscribus">rclscribus</a> filter,
-        thanks to Morten Langlo.</p>
-
-      <h2>7zip archives</h2>
-      <p>A new <a href="rcl7z">rcl7z</a> filter by François Botha
-        for 7zip archives. Needs the 
-        <a href="https://pypi.python.org/pypi/pylzma">pylzma Python
-        module</a>. </p> 
-
-      <h2>Attachments to PDF documents (1.20 and older)</h2>
-
-      <p>A new <a href="rclmpdf">rclmpdf</a> filter for processing
-        PDF files with attachments. This replaces the old <b>rclpdf</b>
-        filter. You need to add it to ~/.recoll/mimeconf until it is
-        made standard (this is still a bit experimental, and a big
-        change from the previous filter):
-        <pre><tt>
-        [index]
-        application/pdf = execm rclmpdf
-        </tt></pre>
-        Note the <tt>execm</tt> instead of <tt>exec</tt>. </p>  
-
-      <h2><a name="soff1">Open/Libre-Office documents (1.19 and older)</a></h2>
-
-      <p><a href="rclsoff">rclsoff</a>: the previous version did not
-        produce white space between input tab-separated words, leading
-        to search failures.</p>
-
-
-      <h2>Purple logs (1.20 and older)</h2>
-
-      <p>New <a href="rclpurple">rclpurple</a> filter for Pidging and
-        other chat applications log files. Handles newer log
-        formats. </p>  
-
-      <h2>PowerPoint documents (1.19 and older)</h2>
-
-      <p>The <b>rclppt</b> filter was based on <b>catppt</b>, but this
-        seems to fail quite often on newer PPT
-        documents. The new version is based on code from
-        the <b>libreoffice</b> <b>mso-dump</b> project. It is both
-        reasonably fast and quite thorough.
-      </p> 
-
-      <p>Installation:<ul>
-          <li>As <tt>recollindex</tt> was executing <b>catppt</b>
-            directly in the default configuration, you will also need to add
-            the following to
-            the <tt>mimeconf</tt> file (e.g.: ~/.recoll/mimeconf):
-<pre>
-  [index]
-  application/vnd.ms-powerpoint = exec rclppt
-</pre>
-</li>
-          <li>Copy the 3 following files to the Recoll filters directory (e.g:
-            <i>/usr/share/recoll/filters</i>) and make sure
-            that <tt>ppt-dump.py</tt> and <tt>rclppt</tt> are executable.
-            <ul>
-              <li><a href="rclppt">rclppt</a></li>
-              <li><a href="ppt-dump.py">ppt-dump.py</a></li>
-              <li><a href="msodump.zip">msodump.zip</a></li>
-            </ul>
-          </li>
-        </ul>
-      </p>
-
-      <h2>EPUB documents (1.17 and older)</h2>
-
-      <p>New <a href="rclepub">rclepub</a> filter for EPUB documents.
-        This needs
-        the <a href="http://pypi.python.org/pypi/epub/0.5.0">
-          python epub decoding module</a>. </p> 
-
-      <h2>CHM files (1.17.1 and older)</h2>
-      <p><a href="rclchm">rclchm</a>. The previous version of the
-        filter mishandled files which had encoded internal URLs (not
-        very frequent, but happens).</p>
-
-      <h2>Updated Open Document filter (1.17 and older)</h2>
-
-      <p>The <a href="rclsoff">new filter</a> will correctly handle
-      exported Google Docs documents and also Open/LibreOffice ones in
-      some cases. The previous filters concatenated all the text
-      inside the exported Google docs without any spacing...</p>
-
-      <h2>TAR archives (1.17 and older)</h2>
-
-      <p>New <a href="rcltar">rcltar</a> filter for tar archives. The
-        indexing of tar archives is disabled by default in the sample
-        configuration (stored here). This is an <tt>execm</tt>
-        filter&nbsp;!.  You'll need to add an <br>
-        <tt>application/x-tar = execm rcltar</tt><br>
-        line in the [index] section of your
-        $HOME/mimeconf to enable it, not an <tt>exec</tt> one.</p>
-
-      <h2>XML files (1.17 and older)</h2>
-
-      <p>By default, the current recoll version does not index xml
-        content (except for known formats like dia, svg etc.). This
-        new <a href="rclxml">rclxml</a> filter will extract the data
-        from any xml file. Only text data is extracted, no attribute
-        values. The other option is to treat xml file as plain text
-        one (see comment in mimeconf), and index everything, including
-        a lot of garbage.</p>
-
-      <h2>DIA files (1.16 and older)</h2>
-      <p><a href="rcldia">rcldia</a> is a new filter
-        for <a href="http://projects.gnome.org/dia/">Dia</a> files,
-        contributed by Stefan Friedel.</p>
-
-
-      <h2>Okular annotations (1.16 and older)</h2>
-      <p><a href="rclokulnote">rclokulnote</a>. Okular lets you create
-        annotations for PDF documents and stores them in xml format
-        somewhere under ~/.kde. This filter does not do a nice job to
-        format the data, but will at least let you find it...</p>
-
-      <h2>Gnumeric (1.16 and older)</h2>
-      <p><a href="rclgnm">rclgnm</a>. Needs xsltproc and
-      gunzip. As <tt>.gnumeric</tt> was in the list of
-      explicitely ignored suffixes, you can't just add the mime
-      and indexer script lines to your local mimemap and mimeconf, you
-      also need to define recoll_noindex in the local mimemap (to
-        override the system one which
-        contains <tt>.gnumeric</tt>). The simplest approach may be to
-        just replace the system files with those above.</p>
-
-      <h2>Rar archive support (1.15 and older)</h2>
-      <p><a href="rclrar">rclrar</a>. This is up to date in Recoll
-        1.16.2 but may be added to Recoll 1.15. It needs the Python
-               rarfile module. </p> 
-
-      <h2>Mimehtml support (1.15)</h2>
-      <p>This is based on the internal mail filter, you just need to
-        download and install the configuration files (mimemap and
-        mimeconf. Will only work with 1.15 and later.</p>
-
-      <h2>Konqueror webarchive (.war) filter (1.15)</h2>
-      <p><a href="rclwar">rclwar</a></p>
-
-      <h2>Updated zip archive filter (1.15)</h2>
-      <p>The filter is corrected to handle utf-8 paths in zip archives: 
-        <a href="rclzip">rclzip</a>. Up to date in Recoll 1.16, but
-        may be useful with Recoll 1.15</p>
-
-      <h2>Updated audio tag filter (1.14)</h2>
-      <p>The mutagen-based rclaudio filter delivered with recoll 1.14.2
-	used a very recent mutagen interface which will only work with
-	mutagen versions after 1.17 (probably. at least works with 1.19,
-	doesn't with 1.15).
-	You can download the <a href="rclaudio">corrected script
-	here. Not useful with Recoll 1.5 or 1.6</a>. 
-      </p>
-
-    </div>
-  </body>
-</html>
--- a/website/fr/features.html
+++ b/website/fr/features.html
@ -1,211 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-
-<html>
-  <head>
-    <title>RECOLL: un outil personnel de recherche textuelle pour 
-    Unix et Linux</title>
-    <meta name="generator" content="HTML Tidy, see www.w3.org">
-    <meta name="Author" content="Jean-Francois Dockes">
-    <meta name="Description" content=
-    "recoll est un logiciel personnel de recherche textuelle pour unix et linux basé sur Xapian, un moteur d'indexation puissant et mature.">
-    <meta name="Keywords" content=
-      "recherche textuelle,desktop,unix,linux,solaris,open source,free">
-    <meta http-equiv="Content-language" content="fr">
-    <meta http-equiv="content-type" content=
-    "text/html; charset=iso-8859-1">
-    <meta name="robots" content="All,Index,Follow">
-    <link type="text/css" rel="stylesheet" href="../styles/style.css">
-  </head>
-
-  <body>
-
-    <div class="rightlinks">
-      <ul>
-	<li><a href="../index.html">Base</a></li>
-	<li><a href="../pics/index.html">Copies d'écrans</a></li>
-	<li><a href="../download.html">Téléchargements</a></li>
-	<li><a href="../doc.html">Documentation</a></li>
-	<li><a href="../index.html#support">Support</a></li>
-	<li><a href="../devel.html">Développement</a></li>
-      </ul>
-    </div>
-
-    <div class="content">
-
-      <h1 class="intro">Caractéristiques de Recoll</h1>
-
-      <dl>
-	<dt><a name="systems">Systèmes</a></dt>
-	<dd><span class="application">Recoll</span> a été compilé et
-	testé sur FreeBSD, Linux, Darwin, Solaris (versions
-	  FreeBSD 5/6, Fedora Core 5/6, Suse 10.1, Gentoo,
-	  Debian 3.1, Ubuntu Edgy, Solaris 8/9, mais d'autres versions
-	  récentes conviennent sans doute également).</dd>
-
-	<dd>Versions de QT: 3.2, 3.3 et 4.2</dd>
-
-        <dt><a name="doctypes">Types de documents</a></dt>
-	<dd>Recoll peut traiter les types de documents suivants, ainsi
-	que des fichiers compressés du même type: 
-
-          <dl>
-            <dt>En interne</dt>
-
-            <dd>
-              <ul>
-                <li><var class="literal">text</var>.</li>
-
-                <li><var class="literal">html</var>.</li>
-
-                <li><span class="application">OpenOffice</span>
-                (avec l'aide de la commande <b>unzip</b>).</li>
-
-                <li><span class="application">Abiword</span>.</li>
-
-                <li><span class="application">Kword</span>.</li>
-
-                <li><var class="literal">maildir</var>,
-                    <var class="literal">mh</var> et <var
-		    class="literal">mailbox</var> (<span class=
-		    "application">Mozilla</span>, <span class=
-		    "application">Thunderbird</span>, <span class=
-		    "application">Evolution</span> et sans doute
-		    d'autres).</li> 
-
-                <li>Fichiers de conversation <span class="application">
-		    gaim</span>.</li>
-                <li><span class="application">Lyx</span> (qui doit
-                  &ecirc;tre présent).</li>
-
-                <li><span class="application">Scribus</span>.</li>
-
-              </ul>
-            </dd>
-
-            <dt>Avec des paquets externes</dt>
-
-            <dd>
-              <ul>
-                <li><var class="literal">pdf</var> avec <a href=
-                "http://www.foolabs.com/xpdf/">xpdf</a>.</li>
-
-                <li><var class="application">Wordperfect</var> avec <a href=
-                "http://libwpd.sourceforge.net">libwpd</a>.</li>
-
-                <li><var class="literal">postscript</var> avec 
-           <a href="http://www.gnu.org/software/ghostscript/ghostscript.html">
-                ghostscript</a> et 
-           <a href="http://www.cs.wisc.edu/~ghost/doc/pstotext.htm">
-		    pstotext</a>.</li>
-
-                <li><span class="application">msword</span> avec <a href=
-                "http://www.winfield.demon.nl/">antiword</a>.</li>
-
-                <li><span class="application">Powerpoint</span> et 
-		  <span class="application">Excel</span> avec les utilitaires
-		  <a href="http://www.45.free.net/~vitus/software/catdoc/">
-		    catdoc</a>.</li>
-
-                <li><var class="literal">rtf</var> avec <a href=
-                "http://www.gnu.org/software/unrtf/unrtf.html">unrtf</a>.</li>
-
-		<li><var class="literal">dvi</var> avec 
-		  <a href="http://www.radicaleye.com/dvips.html">dvips</a>.
-		</li>
-
-		<li><var class="literal">djvu</var> avec 
-		  <a href="http://djvulibre.djvuzone.org/doc/index.html">
-		    DjVuLibre</a>. </li>
-
-		<li>Tags <var class="literal">mp3</var> avec 
-		  <a href="http://id3lib.sourceforge.net/">
-		    id3info (id3lib)</a>. </li>
-
-              </ul>
-            </dd>
-          </dl>
-	</dd>
-
-	<dt>Autres caractéristiques</dt>
-	<dd>
-	  <ul>
-	    <li>Index multiples interrogeables ensemble ou séparément.</li>
-
-	    <li>Fonctions de recherche puissantes, avec expressions
-	    booléennes, phrases et proximité, caractères jokers,
-	    filtrage sur les types de fichiers où l'emplacement.</li>
-
-	    <li>Fonction spécifique de recherche de noms de fichiers.</li>
-
-	    <li>Support de jeux de caractères multiples. Les traitements
-	      internes et l'index utilisent l'encodage Unicode UTF-8.</li>
-
-	    <li>L'extraction des racines de mots <a href="#Stemming">
-		Stemming</a> est effectuée au moment de la recherche
-		(permet de changer de langue après l'indexation).</li>
-
-	    <li>Installation facile. Pas de processus permanent, de
-	      serveur web ou environnement exotique.</li>
-
-	    <li>Un indexeur qui peut fonctionner soit comme un
-	      processus léger dans l'interface de consultation, comme un
-	      programme batch externe intégrable par 
-	      <span class="application">cron</span>, ou comme un processus
-	      permanent pour l'indexation au fil de l'eau.</li>
-
-	  </ul>
-	</dd>
-      </ul>
-
-      <h2><a name="#stemming"></a>Lemmatisation</h2>
-
-      <p><em>Note: je serais preneur d'une traduction française
-	agréable pour "stemming".</em></p>
-      <p>La lemmatisation transforme un mot dérivé vers sa racine.
-       Par exemple, <i>aimer</i>, <i>aimerai</i>, <i>aimait</i>,
-	<i>aimez</i> etc. seraient transformés en <i>aim</i> en
-	français. Une recherche de l'un quelconque des dérivés peut
-	automatiquement être étendue vers tous les autres</p>
-
-      <p>Certains moteurs de recherche appliquent la transformation
-      pendant l'indexation. L'index ne stocke que les racines des
-      mots, avec des exceptions pour les termes qui sont reconnus
-      comme des noms propres (capitalisation). Au moment de la
-      recherche, les termes de la requête sont également transformés
-      avant comparaison à l'index.</p>
-      
-      <p>Cette approche permet un index plus petit, mais elle perd
-	irrévocablement de l'information pendant l'indexation.</p>
-
-      <p>Recoll fonctionne différemment. Les termes sont indexés sans
-	transformation. L'index résultant est plus gros, ce qui n'a
-	probablement pas beaucoup d'importance à une époque de disques
-	de 100 Go principalement remplis d'information multimédia
-	<em>non indexée</em>.
-
-      <p>À la fin de l'indexation, Recoll construit un ou plusieurs
-      dictionnaires de transformation (pour différents langages), où
-      toutes les racines sont listées avec leurs transformations
-      possibles.</p>
-
-
-      <p>Au moment de la recherche, par défaut, les termes de
-      l'utilisateurs sont transformés, et étendus aux dérivés par
-      utilisation du dictionnaire.
-	Les résultats obtenus sont analogues à ceux de
-	l'autre méthode. L'avantage est que l'expansion peut être
-	contrôlée au moment de la recherche:
-	<ul>
-	<li>On peut la supprimer pour n'importe quel terme de la
-	  requête, (en le faisant débuter par une capitale:
-	  <em>Aime</em> par exemple pour chercher la ville d'Aime la
-	  Plagne). </li>
-	<li>Le langage de transformation peut également être changé,
-	en supposant que plusieurs dictionnaires de transformation
-	aient été construits lors de l'indexation.</li>
-      </ul>
-	
-    </div>
-  </body>
-</html>
-
--- a/website/helpernotes.html
+++ b/website/helpernotes.html
@ -1,74 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-
-<html>
-  <head>
-    <title>RECOLL: a personal text search system for
-    Unix/Linux</title>
-    <meta name="generator" content="HTML Tidy, see www.w3.org">
-    <meta name="Author" content="Jean-Francois Dockes">
-    <meta name="Description" content=
-    "recoll is a simple full-text search system for unix and linux based on the powerful and mature xapian engine">
-    <meta name="Keywords" content=
-    "full text search,fulltext,desktop search,unix,linux,solaris,open source,free">
-    <meta http-equiv="Content-language" content="en">
-    <meta http-equiv="content-type" content=
-    "text/html; charset=iso-8859-1">
-    <meta name="robots" content="All,Index,Follow">
-    <link type="text/css" rel="stylesheet" href="styles/style.css">
-  </head>
-
-  <body>
-    <div class="rightlinks">
-      <ul>
-        <li><a href="index.html">Home</a></li>
-
-        <li><a href="features.html#doctypes">Back to document types</a></li>
-
-        <li><a href="pics/index.html">Screenshots</a></li>
-
-        <li><a href="download.html">Downloads</a></li>
-
-        <li><a href="doc.html">User manual</a></li>
-
-        <li><a href="index.html#support">Support</a></li>
-
-        <li><a href="devel.html">Development</a></li>
-      </ul>
-    </div>
-
-    <div class="content">
-      <h1>Notes about building/using specific external helper
-        applications</h1> 
-
-      <h2><a name="midi">The Python midi module</a></h2>
-      <p>The normal procedure for building a Python module
-        applies:</p>
-      <pre><tt>
-          tar xvzf midi-0.2.1.tar.gz
-          cd midi-0.2.1
-          python setup.py build
-          sudo python setup.py install
-      </tt></pre>
-
-      <p>However, the midi module includes an alsa driver interface
-        which needs Swig to build and probably does not build at all
-        on recent Linux versions (the last version for the package
-        dates from 2006). Recoll does not need midi sequencer hardware
-        :), so if you don't need for other purposes, you can disable
-        the Alsa interface by editing setup.py and changing the 
-        platform name at line 37 (the Alsa thing is only tried on
-        Linux):</p>
-
-
-      <pre><tt>
-          37c37
-          &lt; if platform.startswith('linux'):
-          ---
-          > if platform.startswith('NONE'):
-      </tt></pre>
-
-      <p>The package should then build and install just fine.</p>
-
-    </div>
-  </body>
-</html>
--- a/website/id3lib.html
+++ b/website/id3lib.html
@ -1,57 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-
-<html>
-  <head>
-    <title>RECOLL: building id3lib with gcc 4.4</title>
-    <meta name="generator" content="HTML Tidy, see www.w3.org">
-    <meta name="Author" content="Jean-Francois Dockes">
-    <meta name="Description" content=
-    "recoll is a simple full-text search system for unix and linux based on the powerful and mature xapian engine">
-    <meta name="Keywords" content=
-      "full text search,fulltext,desktop search,unix,linux,solaris,open source,free">
-    <meta http-equiv="Content-language" content="en">
-    <meta http-equiv="content-type" content=
-    "text/html; charset=iso-8859-1">
-    <meta name="robots" content="All,Index,Follow">
-    <link type="text/css" rel="stylesheet" href="styles/style.css">
-  </head>
-
-  <body>
-
-    <div class="rightlinks">
-      <ul>
-	<li><a href="index.html">Home</a></li>
-	<li><a href="features.html">Features</a></li>
-	<li><a href="pics/index.html">Screenshots</a></li>
-	<li><a href="download.html">Downloads</a></li>
-	<li><a href="doc.html">User manual</a></li>
-	<li><a href="index.html#support">Support</a></li>
-	<li><a href="devel.html">Development</a></li>
-      </ul>
-    </div>
-
-    <div class="content">
-
-      <h2>Compiling id3lib with recent gcc versions (2010-06-29)</h1>
-       <p>Recoll uses a program installed by the id3lib package for
-       indexing mp3 files. Id3lib has not been updated for some time and
-       will not compile with gcc versions after 4.4 because of gcc
-       incompatibilities.</p>
-       <p><a href="files/id3lib-3.8.3-gcc44.patch">Here is a minuscule
-       patch</a> to help compiling id3lib. To use it:<p>
-       <ul>
-	  <li>Download the patch (right-click the link and use 'Save As').</li>
-	  <li>Extract the id3 lib source 
-               (<tt>tar xvzf id3lib-3.8.3.tar.gz</tt>).</li>
-          <li>Change your current directory to the top of the id3lib source
-          tree and apply the patch:<br>
-            <tt>cd id3lib-3.8.3<br>
-            patch -p1 < /path/to/the/saved/patch</tt></li>
-          <li>Run autoconf (you may have to install it, but your package
-          manager can certainly do it for you).</li>
-          <li>Run <tt>make</tt> and <tt>make install</tt>.</li>
-      </ul>
-    </div>
-  </body>
-</html>
-
--- a/website/idxthreads/Makefile
+++ b/website/idxthreads/Makefile
@ -1,7 +0,0 @@
-.SUFFIXES: .txt .html
-
-.txt.html:
-	asciidoc $<
-
-all: threadingRecoll.html forkingRecoll.html xapDocCopyCrash.html
-
--- a/website/idxthreads/assembly.dia
+++ b/website/idxthreads/assembly.dia
--- a/website/idxthreads/assembly.png
+++ b/website/idxthreads/assembly.png
--- a/website/idxthreads/forkingRecoll.txt
+++ b/website/idxthreads/forkingRecoll.txt
@ -1,224 +0,0 @@
-= Recoll command execution performance
-:Author: Jean-François Dockès
-:Email: jfd@recoll.org
-:Date: 2015-05-22
-
-== Abstract
-
-== Introduction
-
-The Recoll indexer, *recollindex*, is a big process which executes many
-others, mostly for extracting text from documents. Some of the executed
-processes are quite short-lived, and the time used by the process execution
-machinery can actually dominate the time used to translate data. This
-document explores possible approaches to improving performance without
-adding excessive complexity or damaging reliability.
-
-Studying fork/exec performance is not exactly a new venture, and there are
-many texts which address the subject. While researching, though, I found
-out that not so many were accurate and that a lot of questions were left as
-an exercise to the reader.
-
-== Issues with fork
-
-The traditional way for a Unix process to start another is the
-+fork()+/+exec()+ system call pair. 
-
-+fork()+ duplicates the process address space and resources (open files
-etc.), then duplicates the thread of execution, ending up with 2 mostly
-identical processes.  
-
-+exec()+ then replaces part of the newly executing process with an address
-space initialized from an executable file, inheriting some of the resources
-under various conditions.
-
-This was all fine with the small processes of the first Unix systems, but
-as time progressed, processes became bigger and the copy-before-discard
-operation was found to waste significant resources. It was optimized using
-two methods (at very different points in time):
-
- - The first approach was to supplement +fork()+ with the +vfork()+ call, which
-   is similar but does not duplicate the address space: the new process
-   thread executes in the old address space. The old thread is blocked
-   until the new one calls +exec()+ and frees up access to the memory
-   space. Any modification performed by the child thread persists when
-   the old one resumes.
-
- - The more modern approach, which cohexists with +vfork()+, was to replace
-   the full duplication of the memory space with duplication of the page
-   descriptors only. The pages in the new process are marked copy-on-write
-   so that the new process has write access to its memory without
-   disturbing its parent. This approach was supposed to make +vfork()+
-   obsolete, but the operation can still be a significant resource consumer
-   for big processes mapping a lot of memory, so that +vfork()+ is still
-   around. Programs can have big memory spaces not only because they have
-   huge data segments (rare), but just because they are linked to many
-   shared libraries (more common).
-
-NOTE: Orders of magnitude: a *recollindex* process will easily grow into a
-few hundred of megabytes of virtual space. It executes the small and
-efficient *antiword* command to extract text from *ms-word* files. While
-indexing multiple such files, *recollindex* can spend '60% of its CPU time'
-doing `fork()`/`exec()` housekeeping instead of useful work (this is on Linux,
-where `fork()` uses copy-on-write).
-
-Apart from the performance cost, another issue with +fork()+ is that a big
-process can fail executing a small command because of the temporary need to
-allocate twice its address space. This is a much discussed subject which we
-will leave aside because it generally does not concern *recollindex*, which
-in typical conditions uses a small portion of the machine virtual memory,
-so that a temporary doubling is not an issue.
-
-The Recoll indexer is multithreaded, which may introduce other issues. Here
-is what happens to threads during the +fork()+/+exec()+ interval:
-
- - +fork()+:
-   * The parent process threads all go on their merry way.
-   * The child process is created with only one thread active, duplicated
-     from the one which called +fork()+
- - +vfork()+
-   * The parent process thread calling +vfork()+ is suspended, the others
-     are unaffected.
-   * The child is created with only one thread, as for +fork()+. 
-     This thread shares the memory space with the parent ones, without
-     having any means to synchronize with them (pthread locks are not
-     supposed to work across processes): caution needed !
-
-NOTE: for a multithreaded program using the classical pipe method to
-communicate with children, the sequence between the `pipe()` call and the
-parent `close()` of the unused side is a candidate for a critical section:
-if several threads can interleave in there, children process may inherit
-descriptors which 'belong' to other `fork()`/`exec()` operations, which may
-in turn be a problem or not depending on how descriptor cleanup is
-performed in the child (if no cleanup is performed, pipes may remain open
-at both ends which will prevents seeing EOFs etc.). Thanks to StackExchange
-user Celada for explaining this to me.
-
-For multithreaded programs, both +fork()+ and +vfork()+ introduce possibilities
-of deadlock, because the resources held by a non-forking thread in the
-parent process can't be released in the child because the thread is not
-duplicated. This used to happen from time to time in *recollindex* because
-of an error logging call performed if the +exec()+ failed after the +fork()+
-(e.g. command not found).
-
-With +vfork()+ it is also possible to trigger a deadlock in the parent by
-(inadvertently) modifying data in the child. This could happen just
-link:http://www.oracle.com/technetwork/server-storage/solaris10/subprocess-136439.html[because
-of dynamic linker operation] (which, seriously, should be considered a
-system bug).
-
-
-In general, the state of program data in the child process is a semi-random
-snapshot of what it was in the parent, and the official word about what you
-can do is that you can only call
-link:http://man7.org/linux/man-pages/man7/signal.7.html[async-safe library
-functions] between +fork()+ and +exec()+. These are functions which are
-safe to call from a signal handler because they are either reentrant or
-can't be interrupted by a signal. A notable missing entry in the list is
-`malloc()`.
-
-These are normally not issues for programs which only fork to execute
-another program (but the devil is in the details as demonstrated by the
-logging call issue...).
-
-One of the approaches often proposed for working around this mine-field is
-to use an auxiliary small process to execute any command needed by the main
-one. The small process can just use +fork()+/+exec()+ with no performance
-issues. This has the inconvenient of complicating communication a lot if
-data needs to be transferred one way or another.
-
-////
-Passing descriptors around
-http://stackoverflow.com/questions/909064/portable-way-to-pass-file-descriptor-between-different-processes
-http://www.normalesup.org/~george/comp/libancillary/
-http://stackoverflow.com/questions/28003921/sending-file-descriptor-by-linux-socket/
-
-The process would then be:
- - Tell slave to fork/exec cmd (issue with cmd + args format)
- - Get fds
- - Tell slave to wait, recover status.
-////
-
-== The posix_spawn() Linux non-event
-
-Given the performance issues of `fork()` and tricky behaviour of `vfork()`,
-a "simpler" method for starting a child process was introduced by Posix:
-`posix_spawn()`.
-
-The `posix_spawn()` function is a black box, externally equivalent to a
-`fork()`/`exec()` sequence, and has parameters to specify the usual
-house-keeping performed at this time (file descriptors and signals
-management etc.). Hiding the internals gives the system a chance to
-optimize the performance and avoid `vfork()` pitfalls like the `ld.so`
-lockup described in the Oracle article.
-
-The Linux posix_spawn() is implemented by a `fork()`/`exec()` pair by default. 
-
-`vfork()` is used either if specified by an input flag or no
-signal/scheduler/process_group changes are requested. There must be a
-reason why signal handling changes would preclude `vfork()` usage, but I
-could not find it (signal handling data is stored in the kernel task_struct).
-
-The Linux glibc `posix_spawn()` currently does nothing that user code could
-not do. Still, using it would probably be a good future-proofing idea, but
-for a significant problem: there is no way to specify closing all open
-descriptors bigger than a specified value (closefrom() equivalent). This is
-available on Solaris and quite necessary in fact, because we have no way to
-be sure that all open descriptors have the CLOEXEC flag set.
-
-So, no `posix_spawn()` for us (support was implemented inside
-*recollindex*, but the code is normally not used).
-
-== The chosen solution
-
-The previous version of +recollindex+ used to use +vfork()+ if it was running
-a single thread, and +fork()+ if it ran multiple ones.
-
-After another careful look at the code, I could see few issues with
-using +vfork()+ in the multithreaded indexer, so this was committed. 
-
-The only change necessary was to get rid of an implementation of the
-lacking Linux +closefrom()+ call (used to close all open descriptors above a
-given value). The previous Recoll implementation listed the +/proc/self/fd+
-directory to look for open descriptors but this was unsafe because of of
-possible memory allocations in +opendir()+ etc.
-
-== Test results
-
-.Indexing 12500 small .doc files 
-[options="header"]
-|===============================
-|call  |real      |user       |sys
-|fork  |0m46.025s |0m26.574s |0m39.494s
-|vfork |0m18.223s |0m17.753s |0m1.736s
-|spawn/fork| 0m45.726s|0m27.082s| 0m40.575s
-|spawn/vfork|0m18.915s|0m18.681s|0m3.828s
-|recoll 1.18|1m47.589s|0m21.537s|0m29.458s
-|================================
-
-No surprise here, given the implementation of +posix_spawn()+, it gets the
-same times as the +fork()+/+vfork()+ options.
-
-The tests were performed on an Intel Core i5 750 (4 cores, 4 threads).
-
-It would be painful to play it safe and discard the 60% reduction in
-execution time offered by using +vfork()+, so this was adopted for Recoll
-1.21. To this day, no problems were discovered, but, still crossing
-fingers...
-
-The last line in the table is just for the fun: *recollindex* 1.18
-(single-threaded) needed almost 6 times as long to process the same
-files...
-
-////
-Objections to vfork: 
-  sigaction locks
-https://bugzilla.redhat.com/show_bug.cgi?id=193631
-Is Linux vfork thread-safe ? Quoting interesting comments from Solaris
-implementation: No answer to the issues cited though.
-https://sourceware.org/bugzilla/show_bug.cgi?id=378
-Aussi:
-http://blog.famzah.net/2009/11/20/fork-gets-slower-as-parent-process-use-more-memory/
-http://blog.famzah.net/2009/11/20/a-much-faster-popen-and-system-implementation-for-linux/
-Avec un workaround basé sur clone (donc linux-only). Tried it but crashes.
-////
--- a/website/idxthreads/multipara.dia
+++ b/website/idxthreads/multipara.dia
--- a/website/idxthreads/multipara.png
+++ b/website/idxthreads/multipara.png
--- a/website/idxthreads/nothreads.dia
+++ b/website/idxthreads/nothreads.dia
--- a/website/idxthreads/nothreads.png
+++ b/website/idxthreads/nothreads.png
--- a/website/idxthreads/threadingRecoll.html
+++ b/website/idxthreads/threadingRecoll.html
--- a/website/idxthreads/threadingRecoll.txt
+++ b/website/idxthreads/threadingRecoll.txt
@ -1,406 +0,0 @@
-= Converting Recoll indexing to multithreading
-:Author: Jean-François Dockès
-:Email: jfd@recoll.org
-:Date: 2012-12-03
-
-== Abstract
-
-This relates lessons learned while modifying *Recoll* indexing to be
-multithreaded. I am by no means a threaded applications expert, so that a
-few of the observations I made whole doing this may be of use to other
-novices. 
-
-== Introduction
-
-http://www.recoll.org[*Recoll*] is a document indexing application, it
-allows you to find documents by specifying search terms. 
-
-The documents need to be _indexed_ for searches to be fast. In a nutshell,
-we convert the different document formats to text, then split the text into
-terms and remember where those occur. This is a time-consuming operation.
-
-Up to version 1.18 *Recoll* indexing is single-threaded: routines which
-call each other sequentially.
-
-In most personal indexer contexts, it is also CPU-bound. There is a lot of
-conversion work necessary for turning those PDF (or other) files into
-appropriately cleaned up pure text, then split it into terms and update the
-index. Given the relatively modest amount of data, and the speed of
-storage, I/O issues are secondary.
-
-Looking at the _CPU idle_ *top* output stuck at 75% on my quad-core CPU,
-while waiting for the indexing to finish, was frustrating, and I was
-tempted to find a way to keep those other cores at temperature and shorten
-the waiting.
-
-For some usages, the best way to accomplish this may be to just partition
-the index and independantly start indexing on different configurations,
-using multiple processes to better utilize the available processing power.
-
-This is not an universal solution though, as it is complicated to set up,
-not optimal in general for indexing performance, and not always optimal
-either at query time. 
-
-The most natural way to improve indexing times is to increase CPU
-utilization by using multiple threads inside an indexing process.
-
-Something similar had been done with earlier versions of the *Recoll* GUI,
-which had an internal indexing thread. This had been a frequent source of
-trouble though, and linking the GUI and indexing process lifetimes was a
-bad idea, so, in recent versions, the indexing is always performed by an
-external process. Still, this experience had put in light most of the
-problem areas, and prepared the code for further work.
-
-It should be noted that, as `recollindex` is both _nice_'d and _ionice_'d
-as a lowest priority process, it will only use free computing power on the
-machine, and will step down as soon as anything else wants to work.
-
-****
-
-The only case where you may notice that the indexing is at work
-is when the machine is short on memory and things (such as
-your Web browser) get swapped-out while you are not actively using
-them. You then notice a long delay when you want to start, because they
-need to be swapped back in. There is little which can be done about
-this. Setting _idxflushmb_ to a low value may help in some cases (depending
-on the document sizes). May I also suggest in this case that, if your
-machine can take more memory, it may be a good idea to procure some, as
-memory is nowadays quite cheap, and memory-starved machines are not fun.
-
-****
-
-In general, augmenting the machine utilisation by `recollindex` just does
-not change its responsiveness. My PC has a an Intel Pentium Core i5 750 (4
-cores, no hyperthreading), which is far from being a high performance CPU
-(nowadays...), and I often forget that I am running indexing tests, it is
-just not noticeable. The machine does have a lot of memory though (12GB).
-
-
-== The Recoll indexing processing flow
-
-image::nothreads.png["Basic flow", float="right"]
-
-There are 4 main steps in the `recollindex` processing pipeline:
-
- . Find the file
- . Convert it to text
- . Process the text (split, strip etc.) and create a *Xapian* document
- . Update the index
-
-The first step, walking the file system (or some other data source), is
-usually much faster than the others, and we just leave it alone to be
-performed by the main thread. It outputs file names (and the associated
-*POSIX* _stat_ data).
-
-The last step, *Xapian* index updating, can only be single-threaded.
-
-The first idea is to change the indexing pipeline so that each step is
-performed by an independant worker thread, passing its output to the next
-thread, in assembly-line fashion.
-
-In order to achieve this, we need to decouple the different phases. They
-are normally linked by procedure calls, which we replace with a job
-control object: the 'WorkQueue'.
-
-=== The WorkQueue
-
-
-The _WorkQueue_ object is implemented by a reasonably simple class, which
-manages an input queue on which client append jobs, and a set of worker
-threads, which retrieve and perform the jobs, and whose lifetime are
-managed by the _WorkQueue_ object. The implementation is straightforward
-with *POSIX* threads synchronization functions and C++ *STL* data
-structures.
-
-In practise it proved quite simple to modify existing code to create a job
-object and put it on the queue, instead of calling the downstream routine
-with the job parameters, _while keeping the capacity to call the downstream
-routine directly_. The kind of coupling is determined either by compilation
-flags (for global disabling/enabling of multithreading), or according to
-configuration data, which allows experimenting with different threads
-arrangements just by changing parameters in a file, without recompiling.
-
-Each _WorkQueue_ accepts two parameters: the length of the input queue
-(before a client will block when trying to add a job), and the number of
-worker threads. Both parameters can be set in the *Recoll* configuration
-file for each of the three queues used in the indexing pipeline. Setting
-the queue length to -1 will disable the corresponding queue (using a direct
-call instead).
-
-unfloat::[]
-
-
-== The Assembly Line
-
-image::assembly.png["Assembly line", float="right"]
-
-So the first idea is to create 3 explicit threads to manage the file
-conversion, the term generation, and the *Xapian* index update. The first
-thread prepares a file, passes it on to the term generation thread, and
-immediately goes back to work on the next file, etc. 
-
-The presumed advantage of this method is that the different stages, which
-perform disjointed processing, should share little, so that we can hope to
-minimize the changes necessitated by the threads interactions.
-
-However some changes to the code were needed to make this work (and a few
-bugs were missed, which only became apparent at later stages, confirming
-that the _low interaction_ idea was not completely false).
-
-=== Converting to multithreading: what to look for
-
-I am probably stating the obvious here, but when preparing a program for
-multi-threading, problems can only arise where non-constant data is
-accessed by different threads.
-
-Once you have solved the core problems posed by the obvious data that needs
-to be shared, you will be left to deal with less obvious, hidden,
-interactions inside the program.
-
-Classically this would concern global or static data, but in a C++ program,
-class members will be a concern if a single object can be accessed by
-several threads.
-
-Hunting for static data inside a program of non trivial size is not always
-obvious. Two approaches can be used: hunting for the _static_ keyword in
-source code, or looking at global and static data symbols in *nm* output.
-
-Once found, there are mostly three types of static/global data:
-
- * Things that need to be eliminated: for example, routines can be made
-   reentrant by letting the caller supply a storage buffer instead of using
-   an internal static one (which was a bad idea in the first place
-   anyway).
- * Things that need to be protected: sometimes, the best approach is just
-   to protect the access with a mutex lock. It is trivial to encapsulate
-   the locks in C++ objects to use the "Resource Acquisition is
-   Initialization" idiom, easily making sure that locks are freed when
-   exiting the critical section. Recoll used to include a basic home-made
-   implementation, but now lets C++11 work for it.
- * Things which can stay: this is mostly initialization data such as value
-   tables which are computed once, and then stay logically constant during
-   program execution. In order to be sure of a correct single-threaded
-   initialization, it is best to explicitly initialize the modules or
-   functions that use this kind of data in the main thread when the program
-   starts.
-
-=== Assembly line approach: the results
-
-Unfortunately, the assembly line approach yields very modest improvements
-when used inside *Recoll* indexing. The reason, is that this method needs
-stages of equivalent complexity to be efficient. If one of the stages
-dominates the others, its thread will be the only one active at any time,
-and little will be gained.
-
-What is especially problematic is that the balance between tasks need not
-only exist on average, but also for the majority of individual jobs.
-
-For *Recoll* indexing, even if the data preparation and index update steps
-are often of the same order of magnitude _on average_, their balance
-depends a lot on the kind of data being processed, so that things are
-usually unbalanced at any given time: the index update thread is mostly
-idle while processing PDF files, and the data preparation has little to do
-when working on HTML or plain text.
-
-In practice, very modest indexing time improvements from 5% to 15% were
-achieved with this method.
-
-[[recoll.idxthreads.multistage]]
-== The next step: multi-stage parallelism
-
-image::multipara.png["Multi-stage parallelism", float="right"]
-
-Given the limitations of the assembly line approach, the next step in the
-transformation of *Recoll* indexing was to enable full parallelism wherever
-possible.
-
-Of the four processing steps (see figures), two are not candidates for
-parallelization: 
-
- * File system walking is so fast compared to the other steps that using
-   several threads would make no sense (it would also quite probably become
-   IO bound if we tried anyway).
- * The *Xapian* library index updating code is not designed for
-   multi-threading and must stay protected from multiple accesses.
-
-The two other steps are good candidates.
-
-Most of the work to make *Recoll* code reentrant had been performed for the
-previous transformation. Going full-parallel only implied protecting the
-data structures that needed to be shared by the threads performing a given
-processing step.
-
-Just for the anecdotic value, a list of the elements that needed mutexes:
-
- Filter subprocesses cache: some file conversion subprocesses may be
-  expensive (starting a Python process is no piece of cake), so they are
-  cached for reuse after they are done translating a file. The shared cache
-  needs protection.
- Status updates: an object used to update the current file name and indexing
-  status to a shared file. 
- Missing store: the list of missing helper programs
- The readonly *Xapian* database object: a Xapian::Database object which is
-  used for checking the validity of current index data against a file's
-  last modification date.
- Document existence map: a bit array used to store an existence bit about
-  every document, and purge the disappeared at the end of the indexing
-  pass. This is accessed both from the file conversion and database update
-  code, so it also needed protection in the previous assembly line
-  approach. 
- Mbox offsets cache. Used to store the offsets of individual messages
-  inside *mbox* files.
- *iconv* control blocks: these are cached for reuse in several places, and
-  need protection. Actually, it might be better in multithreading context
-  to just suppress the reuse and locking. Rough tests seem to indicate that
-  the impact on overall performance is small, but this might change with
-  higher parallelism (or not...).
-
-The *Recoll* configuration also used to be managed by a single shared
-object, which is mutable as values may depend on what area of the
-file-system we are exploring, so that the object is stateful and updated as
-we change directories. The choice made here was to duplicate the object
-where needed (each indexing thread gets its own). This gave rise to the
-sneakiest bug in the whole transformation (see further down).
-
-Having a dynamic way to define the threads configuration makes it easy to
-experiment. For example, the following data defines the configuration that
-was finally found to be best overall on my hardware:
-
- thrQSizes = 2 2 2
- thrTCounts =  4 2 1
-
-This is using 3 queues of depth 2, 4 threads working on file conversion, 2
-on text splitting and other document processing, and 1 on Xapian updating
-(no choice here).
-
-unfloat::[]
-
-== Bench results
-
-So the big question after all the work: was it worth it ? I could only get
-a real answer when the program stopped crashing, so this took some time and
-a little faith, but the answer is positive, as far as I'm
-concerned. Performance has improved significantly and this was a fun
-project.
-
-
-.Results on a variety of file system areas:
-[options="header", width="70%"]
-|=======================
-|Area |Seconds before |Seconds after| Percent Improvement| Speed Factor
-|home |12742     | 6942 | 46%| 1.8
-|mail |2700     | 1563 | 58% | 1.7
-|projets | 5022 | 1970 | 61% | 2.5
-|pdf  | 2164 | 770 | 64% | 2.8
-|otherhtml | 5593 | 4014| 28% | 1.4
-|=======================
-
-.Characteristics of the data
-[options="header", width="70%"]
-|=======================
-|Area | Files MB | Files | DB MB | Documents
-|home | 64106 | 44897 | 1197 | 104797
-|mail | 813 | 232 | 663 | 47267
-|projets | 2056 | 34504 | 549 | 40281
-|pdf  | 1123 | 1139 | 111 | 1139 
-|otherhtml | 3442 | 223007 | 2080 | 221890 |
-|=======================
-
-_home_ is my home directory. The high megabyte value is due to a number of
-very big and not indexed *VirtualBox* images. Otherwise, it's a wide
-mix of source files, email,  miscellaneous documents and ebooks.
-
-_mail_ is my mail directory, full of *mbox* files.
-
-_projets_ mostly holds source files, and a number of documents.
-
-_pdf_ holds random *pdf* files harvested on the internets. The performance
-is quite spectacular, because most of the processing time goes to
-converting them to text, and this is done in parallel. Probably could be
-made a bit faster with more cores, until we hit the *Xapian* update speed
-limit.
-
-_otherhtml_ holds myriad of small html files, mostly from
-*wikipedia*. The improvement is not great here because a lot of time is
-spent in the single-threaded *Xapian* index update.
-
-The tests were made with queue depths of 2 on all queues, and 4 threads
-working on the file conversion step, 2 on the term generation.
-
-== A variation: linear parallelism
-
-Once past the assembly-line idea, another possible transformation would be
-to get rid of the two downstream queues, and just create a job for each
-file and let it go to the end (using a mutex to protect accesses to the
-writable *Xapian* database). 
-
-With the current *Recoll* code, this can be defined by the following
-parameters (one can also use a deeper front queue, this changes little):
-
- thrQSizes = 2 -1 -1
- thrTCounts =  4 0 0
-
-In practise, the performance is close to the one for the multistage
-version.
-
-If we were to hard-code this approach, this would be a simpler
-modification, necessitating less changes to the code, but it has a slight
-inconvenient: when working on a single big multi-document file, no
-parallelism at all can be obtained. In this situation, the multi-stage
-approach brings us back to the assembly-line behaviour, so the improvements
-are not great, but they do exist.
-
-
-
-== Miscellany
-
-=== The big gotcha: my stack dump staring days
-
-Overall, debugging the modified program was reasonably
-straightforward. Data access synchronization issues mostly provoke dynamic
-data corruption, which can be beastly to debug. I was lucky enough that
-most crashes occurred in the code that was actually related to the
-corrupted data, not in some randomly located and unrelated dynamic memory
-user, so that the issues were reasonably easy to find.
-
-One issue though kept me working for a few days. The indexing process kept
-crashing randomly at an interval of a few thousands documents, segfaulting
-on a bad pointer. An access to the configuration data structure seemed to
-be involved, but, as each thread was supposed to have its own copy, I was
-out of ideas.
-
-After reviewing all the uses for the configuration data (there are quite a
-few), the problem was finally revealed to lie with the filter process
-cache. Each filter structure stored in the cache stores a pointer to a
-configuration structure. This belonged to the thread which initially
-created the filter. But the filter would often be reused by a different
-thread, with the consequence that the configuration object was now accessed
-and modified by two unsynchronized threads... Resetting the config pointer
-at the time of filter reuse was a very simple (almost)single-line fix to
-this evasive problem. 
-
-Looking at multi-threaded stack dumps is mostly fun for people with several
-heads, which is unfortunately not my case, so I was quite elated when this
-was over.
-
-=== Fork performance issues
-
-On a quite unrelated note, something that I discovered while evaluating the
-program performance is that forking a big process like `recollindex` can be
-quite expensive. Even if the memory space of the forked process is not
-copied (it's Copy On Write, and we write very little before the following
-exec), just duplicating the memory maps can be slow when the process uses a
-few hundred megabytes.
-
-I modified the single-threaded version of `recollindex` to use *vfork*
-instead of *fork*, but this can't be used with multiple threads (no
-modification of the process memory space is allowed in the child between
-*vfork* and *exec*, so we'd have to have a way to suspend all the threads
-first).
-
-I did not implement a solution to this issue, and I don't think
-that a simple one exists. The workaround is to use modest *Xapian* flush
-values to prevent the process from becoming too big.
-
-A longer time solution would be to implement a small slave process to do
-the executing of ephemeral external commands.
--- a/website/idxthreads/xapDocCopyCrash.txt
+++ b/website/idxthreads/xapDocCopyCrash.txt
@ -1,138 +0,0 @@
-= The case of the bad Xapian::Document copy
-
-== How things were supposed to work
-
-Coming from the link:threadingRecoll.html[threading *Recoll*] page, 
-you may remember that the third stage of the
-processing pipeline breaks up text into terms, producing a *Xapian*
-document (+Xapian::Document+) which is finally processed by the last stage,
-the index updater. 
-
-What happens in practise is that the main routine in this stage has a local
-+Xapian::Document+ object, automatically allocated on the stack, which it
-updates appropriately and then copies into a task object which is placed on
-the input queue for the last stage.
-
-The text-splitting routine then returns, and its local +Xapian::Document+
-object is (implicitely) deleted while the stack unwinds.
-
-The idea is that the *copy* of the document which is on the queue should be
-unaffected, it is independant of the original and will further be processed
-by the index update thread, without interaction with the text-splitting one.
-
-At no point do multiple threads access the +Xapian::Document+ data, so
-there should be no problem.
-
-== The problem 
-
-Most *Xapian* objects are reference-counted, which means that the object
-itself is a small block of house-keeping variables. The actual data is
-allocated on the heap through eventual calls to new/malloc, and is shared
-by multiple copies of the object.  This is the case for +Xapian::Document+
-
-This is aboundantly documented, and users are encouraged to use copies
-instead of passing pointers around (copies are cheap because only a small
-block of auxiliary data is actually duplicated). This in general makes
-memory management easier.
-
-This is well-known, and it would not appear to be a problem in the above
-case as the +Xapian::Document+ actual data is never accessed by multiple
-threads.
-
-The problem is that the reference counter which keeps track of the object
-usage and triggers actual deletion when it goes to zero is accessed by two
-threads:
-
- - It is decremented while the first local object is destroyed during the
-   stack unwind in the first thread
- - It is also updated by the last stage thread, incremented if copies are
-   made, then decremented until it finally goes down to 0 when we are done
-   with the object, at which point the document data is unallocated.
-
-As the counter is not protected in any way against concurrent access, the
-actual sequence of events is undefined and at least two kinds of problems
-may occur: double deletion of the data, or accesses to already freed heap
-data (potentially thrashing other threads allocations, or reading modified
-data).
-
-A relatively simple fix for this would be to use atomic test-and-set
-operations for the counter (which is what the GNU +std::string+ does). But
-the choice made by *Xapian* to let the application deal with all
-synchronization issues is legitimate and documented, nothing to complain
-about here. I just goofed.
-
-Because the counter test and update operations are very fast, and occur
-among a lot of processing from the final stage thread, the chances of
-concurrent access are low, which is why the problem manifests itself very
-rarely. Depending on thread scheduling and all manners of semi-random
-conditions, it is basically impossible to reproduce reliably.
-
-== The fix
-
-The implemented fix was trivial: the upstream thread allocates the initial
-+Xapian::Document+ on the heap, copies the pointer to the queue object, and
-forgets about it. The index-updating thread peruses the object then
-+delete+'s it. Real easy.
-
-An alternative solution would have been to try and use locking to protect
-the counter updates. The only place where such locking operations could
-reasonably occur is inside the +Xapian::Document+ refcounted pointer
-object, which we can't modify. Otherwise, we would have to protect the
-_whole scopes of existence_ of the Xapian::Document object in any routine
-which creates/copies or (implicitely) deletes it, which would cause many
-problems and/or contention issues
-
-== Why did I miss this ?
-
-The mechanism of the crashes is simple enough, quasi-obvious. 
-How on earth could I miss this problem while writing the code ? 
-
-For the sake of anecdote, my first brush with atomicity for updates of
-reference counters was while debugging a System V release 4 kernel VFS file
-system module, at the time when SVR4 got a preemptive kernel with SVR4-MP,
-circa 1990... I ended up replacing a +counter+++ with +atomic_add()+ after
-a set of _interesting_ debugging sessions interspersed with kernel crashes
-and +fsck+ waits. This should have left some memories. So what went wrong ?
-Here follow a list of possible reasons:
-
- Reasoning by analogy: std::string are safe to use in this way. The other
-  objects used in the indexing pipe are also safe. I just used
-  +Xapian::Document+ in the same way without thinking further.
- Probably not how I would do it: faced with designing +Xapian::Document+,
-  (not clever enough to do this anyway), I'd probably conclude that not
-  wanting to deal with full-on concurrency is one thing, not protecting the
-  reference counters is another, and going too far.
- The problem was not so easily visible because the object deletion is
-  implicitely performed during the stack unwind: this provides no clue, no
-  specific operation to think about.
- Pure lazyness.
-
-
-As a conclusion, a humble request to library designers: when an
-interface works counter to the reasonable expectations of at least some of
-the users (for example because it looks like, but works differently, than a
-standard library interface), it is worth it to be very specific in the
-documentation and header file comments about the gotcha's. Saving people
-from their own deficiencies is a worthy goal.
-
-Here, a simple statement that the reference count was not mt-safe
-(admittedly redundant with the general statement that the *Xapian* library
-does not deal with threads), would have got me thinking and avoided the
-error.
-
-++++
-      <h2 id="comments">Comments</h2>
-
-      <div id="disqus_thread"></div>
-      <script type="text/javascript">
-        var disqus_shortname = 'lesbonscomptes'; 
-        (function() {
-            var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
-            dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
-            (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
-        })();
-      </script>
-      <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
-      <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a>
-
-++++
--- a/website/index.html.en
+++ b/website/index.html.en
@ -1,401 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html>
-  <head>
-    <title>Recoll text search finds your documents</title>
-    <meta name="generator" content="HTML Tidy, see www.w3.org">
-    <meta name="Description" content="Recoll is a desktop text search application for Unix, Linux, Microsoft Windows and Mac OS X, based on the Xapian search engine library.">
-    <meta name="Keywords" content="text search, pdf search, document search, full-text search, desktop search, open source,free">
-    <meta http-equiv="Content-language" content="en">
-    <meta http-equiv="content-type" content="text/html; charset=utf-8">
-    <meta name="robots" content="All,Index,Follow">
-    <link type="text/css" rel="stylesheet" href="styles/style.css">
-    <link rel="shortcut icon" href="favicon.ico" />
-
-  </head>
-
-  <body>
-
-    <div class="rightlinks">
-      <ul>
-	<li><a href="http://www.recoll.org">Home</a></li>
-	<li><a href="pics/index.html">Screenshots</a></li>
-	<li><a href="download.html">Downloads</a></li>
-	<li><a href="doc.html">Documentation</a></li>
-        <li><a href="support.html">Support</a></li>
-	<li><a href="devel.html">Helping out</a></li>
-	<li><a href="index.html.fr">En Français</a></li>
-        <li><a class="weak" href="../pages/lbc-hosting.html">lesbonscomptes</a> 
-      </ul>
-      <p class="indexthumb">
-      <a href="pics/index.html"><img width="100" alt=
-	  "Thumbnail of recoll main screen" src=
-	  "pics/recoll0-thumb.png"></a>
-	</p>
-
-    </div>
-
-    <div class="content">
-
-      <h1><img align="center" src="pics/recoll64.png"/>
-        <a href="http://www.recoll.org/">Recoll</a> is
-	a desktop full-text search tool.</h1>
-
-      <p><span class="application">Recoll</span> finds keywords
-        inside documents as well as file names.</p>
-      <ul>
-        <li>Versions are available for <a href="download.html">Linux</a>
-          and <a href="pages/recoll-windows.html">MS Windows</a>.</li>
-        <li>A
-          <a href="https://github.com/koniu/recoll-webui">WEB
-            front-end</a> with preview and download features can
-            replace or supplement the GUI for remote
-          use.</li>
-        <li>It can search
-          most <span class="important"><a href="features.html#doctypes">document
-          formats</a></span>. <a href="features.html#doctypes">You may
-            need external applications for text extraction</a>.</li>
-        <li>It can reach any storage place: files, 
-          archive members, email attachments, transparently
-          handling decompression.</li>
-        <li>One click will open the document inside a native editor or
-          display an even quicker text preview.</li>
-        <li>The software is free, open source,
-	  and licensed under the GPL.</li>
-        <li><a href="features.html">Detailed features</a> and
-          application requirements for supported document types.</li>  
-      </ul>
-
-      <p>The current <span class="application">Recoll</span> version is
-	<a href="download.html">1.23.2</a> 
-	(<a href="release-1.23.html">Release notes</a>,
-        <a href="BUGS.html">known bugs</a>,
-        <a href="release-history.html">Release history</a>).</p>
-
-
-      <p><span class="application">Recoll</span> is based on the very
-        capable <a href="http://www.xapian.org">Xapian</a> search
-        engine library, for which it provides a powerful text
-	extraction layer and a complete, yet easy to use, Qt graphical
-	interface.</p>
-
-      <p class="remark">Recoll will index an <b>MS-Word</b> document
-        stored as an <b>attachment</b> to an <b>e-mail message</b> inside
-	a <b>Thunderbird folder</b> archived in a <b>Zip file</b> (and
-	more...). It will also help you search for it with a friendly and
-	powerful interface, and let you open a copy of a PDF at the right
-	page with two clicks. There is little that will remain
-	hidden on your disk.</p>
-
-      <p>Recoll has extensive <a href="doc.html">
-          documentation</a>. If you run into a problem, or want to
-          propose improvements, you are welcome to use
-          the <a href="support.html">
-          <span class="important">mailing list or problem
-          tracker</span></a>.</p>
-
-      <p><b><i>Recoll user ?</i></b> Maybe there are still a few useful
-      	  search tricks that you don't know about. A quick look at
-	the <a href="usermanual/RCL.SEARCH.html#RCL.SEARCH.GUI.TIPS">search
-	tips</a> might prove useful ! Also
-	the <a href="faqsandhowtos/index.html">
-	  Faqs and Howtos section</a>, and some contributed
-        <a href="custom.html">result list formats</a>.</p>
-
-      <h2>Thanks</h2>
-      <p>Recoll borrows a lot of code
-	from other packages, and welcomes code and ideas from
-	contributors, see some of the 
-	<a class="important" href="credits.html">Credits</a>.</p>
-
-      <h2>News</h2>
-      <div class="news">
-      
-        <dl>
-          <dt>2017-07-31</dt><dd>Finalizing the move to the new site,
-            I am closing the old BitBucket project. The existing
-            BitBucket issues <a href="bitbucket-issues-recoll/index.html">
-              have been archived</a>.</dd>
-          
-          <dt>2017-07-02</dt><dd>The source code repository and issue
-            tracker are moving to a
-            <a href="https://opensourceprojects.eu/p/recoll1/">
-              new place</a>.<br clear="all"></dd>
-
-          <dt>2017-05-23</dt><dd>Release 1.23.2 has gotten much
-            better at <a href="recoll_XMP">processing PDF XMP
-              data</a>.</dd>
-          
-          <dt>2017-05-15</dt><dd>Release 1.23.2. This fixes a couple
-            of quite serious bugs. See
-            the <a href="release-1.23.html">Release notes</a></dd>
-
-          <dt>2017-03-09</dt><dd>Release 1.23.1. See
-            the <a href="release-1.23.html">Release notes</a></dd>
-
-          <dt>2016-11-25</dt><dd>Release 1.22.4 is available and fixes
-            an ennoying qt5 glitch (advanced search 'start search'
-            button doing nothing). <a href="release-1.22.html">Release
-              notes</a></dd>
-          
-          <dt>2016-06-21</dt><dd>Release 1.22.3 is available. This is
-            going to replace 1.21 as the main release. See
-            the <a href="release-1.22.html">the release
-            notes</a>. Some input handler dependancies have changed.</dd>
-
-          <dt>2016-05-11</dt><dd>Release 1.21.7 fixes an ennoying but
-            benign GUI crash-on-exit bug reported on Fedora 23 (qt5).</dd>
-          
-          <dt>2016-04-21</dt><dd>I experimented with installing
-            the <a href="https://github.com/koniu/recoll-webui">Recoll
-              Web UI</a> with Apache, and found out
-            that <a href="pages/recoll-webui-install-wsgi.html">this
-            is really easy</a>, actually both easier to set up and
-            more useful than running it standalone. Recently added:
-            instructions for running with Nginx instead of Apache.</dd>
-          
-          <dt>2016-04-18</dt><dd>Found a <a href="BUGS.html#GUIADV">GUI
-            crash bug</a> with a reasonably easy workaround.</dd>
-
-        <dt>2016-04-14</dt><dd>Release 1.22.0 is now available from
-          the download area. The binary packages should wait until
-          enough brave souls have tested it. See
-          the <a href="release-1.22.html">the release notes</a>.</dd>
-
-        <dt>2016-04-07</dt><dd>Release 1.21.6  adds KDE5 compatibility
-          for the KIO slave.</dd>
-        
-        <dt>2016-01-29</dt><dd>Release 1.21.5 is out. It fixes a
-          relatively nasty bug affecting all previous 1.21 versions:
-          the query language parser processed incorrectly multiple
-          mime type or category specifications, with missing results
-          as a consequence </dd>
-
-        <dt>2016-01-12</dt><dd>It seems that we currently have a
-          relatively frequent problem resulting in damaged indexes. If
-          you are experimenting heavy reindexing (incremental indexing
-          takes longer than it should), or missing search results,
-          please take a look at the top of
-          the <a href="BUGS.html">known bugs page</a></dd>
-
-        <dt>2015-11-09</dt>
-        <dd><a href="pics/windows-recoll.html">
-            <img align="left" width="100" alt="Recoll on MS-Windows" 
-                 src="pics/windows-recoll-thumb.png"></a>
-          <span class="important">Recoll for
-            MS-Windows</span>. Still a few things missing (like
-          real-time monitoring), but it does work, and it has a proper
-          installer, so you can easily get rid of it if you don't like
-          it. <a href="pages/recoll-windows.html">Have a look.</a>.
-          This is an almost-native port, based on Qt and the Windows
-          API, no need for Cygwin. Thanks to Christian Motz for
-          helping with the filter interface (and the rest).  I would
-          love some feedback!<br clear="all">
-        </dd>
-
-        <dt>2015-10-17</dt>
-        <dd>A bug in the verification of configuration file path variables
-          generates spurious warnings from recollindex when the
-          skippedPaths variable contains elements with wildcards. This
-          has no consequence except for the spurious error
-          message.</dd>
-
-        <dt>2015-10-01</dt>
-        <dd>Release 1.21.2 is out, and replaces 1.20 as production
-          release. </dd>
-
-        <dt>2015-06-30</dt>
-        <dd>A new rclpdf filter, with improved compatibility with
-        recent poppler pdftotext
-          versions. See <a href="filters/filters.html">rclpdf
-          filter</a>.</dd>
-
-        <dt>2015-06-16</dt>
-        <dd>Recoll 1.21.0 is out. This has a new query parser and
-          should be considered an instable release, please do not
-          package it (1.20.6 is the one you want for stability). It
-          also <a href="idxthreads/forkingRecoll.html">changes the way
-            filters are executed</a> for better performance. See the
-          <a href="release-1.21.html">release notes</a> for more
-          detail about the few other changes.</dd>
-
-        <dt>2015-04-25</dt>
-        <dd>Recoll 1.20.6 is out, with mostly small fixes to
-          compressed file handling, which may make a big difference in
-          some cases. See the <a href="release-1.20.html">release
-          notes</a>. Of course it also incorportates the Qt 5
-          compatibility from 1.20.5 (Qt
-          5.3.2 ok, 5.2 does not work).</dd>
-
-        <dt>2015-03-30</dt> 
-        <dd>Recoll 1.20.4 released. This fixes real time indexing of
-          the web history (when using the Firefox plugin).</dd>
-		
-        <dt>2014-12-27</dt> 
-        <dd><a href="https://www.gnu.org/software/unrtf/">
-          Unrtf 21.8</a> has been released. This fixes many issues
-          in unrtf, some with possible security implications. You
-          really want to use this version.</dd>
-
-        <dt>2014-12-18</dt> <dd>Recoll 1.20.1 is out and replaces 1.19
-          as the main version. I have been using 1.20 for months
-          (along with a number of fearless builders-from-source), and
-          it's as stable as 1.19, with nice
-          small <a href="release-1.20.html">new features</a>. Packages
-          will follow shortly. It is recommended (but not strictly
-          required, see the notes) to run an index reset when
-          upgrading.</dd>
-
-        <dt>2014-12-10</dt> <dd>The aspell command used for
-          orthographic suggestions is broken on Debian Jessie (because
-          of an aspell packaging issue), and this will not be fixed
-          for the Debian release. See the <a href="BUGS.html#aspelljessie">
-            simple workaround here</a>.</dd>
-
-        <dt>2014-11-09</dt> <dd>If you are still running anything
-          older than 1.19.14p2, <span class="important">YOU SHOULD
-          UPGRADE</span>. In
-          particular, <a href="release-1.19.html#rodb">this index
-            corruption issue</a> leading to repeated reindexing of
-            documents, and possibly query problems too, can be pretty
-          ennoying.<br/> 
-          GOTO <a href="download.html">download</a> and
-          install 1.19.14p2 or 1.20. <em>Reset your index after
-            upgrading (rm -rf ~/.recoll/xapiandb)</em>.</dd>
-
-        <dt>2014-07-28</dt> <dd>A nice new application to complement
-          Recoll: <a href="https://github.com/pidlug/recollfs">recollfs</a>
-          implements a Fuse filesystem where Recoll queries are
-          represented as directories, the contents of which are links
-          to the result documents.</dd>
-
-        <dt>2014-07-16</dt> <dd>Recoll version 1.19.14p2 fixes more
-          resource management issues in the Python module (only the
-          Python package needs upgrading for this), and the processing
-          of Bengali characters (no more diacritics stripping).</dd>
-
-        <dt>2014-06-24</dt> <dd><a href="filters/filters.html#soff1">An
-            updated filter</a> for Open/LibreOffice documents. The
-          previous version merged words which were tab-separated in
-          the input.</dd>
-
-        <dt>2014-06-17</dt> <dd>The source tarball for version 1.20.0
-          has been released. This version has
-          a <a href="release-1.20.html">number of improvements</a> over
-          1.19, but also some incompatibilities. The first minor
-          releases for 1.20 may contain some functional changes in
-          addition to bug fixes, so they may be slightly less stable
-          than 1.19, and 1.19 packages remain the "safe Recoll" for
-          now. Still, if you build from source, there are a few nice 
-          things in 1.20...</dd>
-
-        <dt>2014-06-07</dt> <dd>Version 1.19.14 is out and fixes a
-          handful of minor-to-ennoying indexing glitches (see the 
-          <a href="release-1.19.html">Release notes</a>).</dd>
-
-        <dt>2014-05-06</dt> <dd>Version 1.19.13 is out and hopefully
-          fixes the remaining (rare) crashes of multithreaded
-          indexing.</dd>
-
-        <dt>2014-04-03</dt> <dd>I have separated the code for the 
-          <a href="https://opensourceprojects.eu/p/unityscoperecol/">Recoll
-            Unity Scope</a> from the main body of code, in hope that it may
-          interest someone to work on it. It's Python and simple,
-          mostly depending on the Unity API. The Ubuntu Unity API is
-          apparently going to change *again* for the next version, and
-          I think I've seen enough of it.</dd>
-
-        <dt>2014-04-02</dt> <dd>1.19.12 is out. It's mostly identical
-          to 1.19.11 apart from a new parameter to change the max size
-          of stored attributes. No need to update in general.</dd>
-
-        <dt>2014-02-27</dt> <dd>I hear from time to time about
-          recollindex crashes. These appear to be quite rare, but they
-          do happen, and I think that they are linked to a yet unfound
-          bug in multithread indexing. If you experience such crashes or
-          stalls, you can disable multithreading by adding the following
-          to your recoll.conf:
-          <pre><tt>thrQSizes = -1 -1 -1</tt></pre>
-        </dd>
-
-        <dt>2014-02-27</dt><dd>While working on a 
-          <a href="http://www.recoll.org/faqsandhowtos/MuttAndRecoll.html">
-            Recoll-Mutt interface</a> I discovered incidentally that
-            the <a href="https://github.com/koniu/recoll-webui">Recoll
-            Webui Web interface</a> works quite well with the 
-          <a href="http://links.twibright.com/">links</a> web browser
-          inside a terminal window. This appears to be an interesting
-          solution for people looking for a search interface usable in
-          a non-GUI environment.</dd>
-
-        <dt>2013-11-19</dt> <dd>A <a href="filters/filters.html">new
-            filter</a> for PowerPoint files. The previous one was
-          based on the ancient <b>catppt</b> from the <b>catdoc</b>
-          utilities and usually extracted nothing from more recent
-          PowerPoint files (this is about .ppt: .pptx is handled by a native
-          Recoll filter).</dd>
-        
-        <dt>2013-05-18</dt><dd>Sometimes things 
-          <a href="http://www.lesbonscomptes.com/pages/happysearch.html">
-            just work</a>...</dd>
-
-        <dt>2013-04-30</dt><dd>Thanks to some of its users, Recoll now
-          has filters to 
-          <a href="http://sourceforge.net/projects/rcollnotesfiltr/">
-            index and retrieve Lotus Notes messages</a> 
-          (some 
-          <a href="http://richardappleby.wordpress.com/2013/04/11/you-dont-have-to-know-the-answer-to-everything-just-how-to-find-it/">
-            implementation notes from an early user</a>), and there is
-            also now a 
-            <a href="https://github.com/koniu/recoll-webui/">
-              Web browser interface</a> for querying your Recoll
-              indexes.</dd>  
-
-        <dt>2012-10-25</dt> <dd>A problem with a simple workaround has caused
-          several reported <span class="important">recollindex
-          crashes</span> recently (for 1.17). If you store and index
-          Mozilla/Thunderbird email out of the standard location
-          (~/.thunderbird), you should add the following at the end of
-          your configuration file (e.g.:
-          ~/.recoll/recoll.conf): <pre><tt>
-              [/path/to/my/mozilla/mail]
-              mhmboxquirks = tbird
-          </tt></pre> Adjust the path to your local value of course...
-          Without this hint, recollindex has trouble finding the
-          message delimiters inside the folder files, and will
-          possibly use all the computer's memory and crash. Apart from
-          crashes, which only occur for very big folders, this also
-          causes incorrect mail indexing.
-        </dd>
-
-        <dt>2012-09-11</dt> <dd>A new user-contributed script for those who use
-          real-time indexing on laptops: stop or start indexing
-          according to AC power status. See the details on
-          the <a href="http://www.recoll.org/faqsandhowtos/IndexOnAc.html">
-            Wiki</a>. </dd>
-
-        <dt>2012-04-07</dt><dd>We now have a Chinese user manual:
-        Recoll现在有中文手册咯： 
-        <a href="http://stupidbeauty.com/Blog/2012/03/recoll%E7%94%A8%E6%88%B6%E6%89%8B%E5%86%8A%E7%BF%BB%E8%AD%AF%EF%BC%8Crecoll-user-manual-2/">
-          Recoll中文手册，HTML</a></dd>
-
-
-      </dl>
-      </div>
-	
-      <h2>On the side</h2>
-
-      <div class="news">
-      <blockquote>
-        <p>We rent <a href="http://www.metairie-enbor.com/index.html.en">
-	    a big country house</a> in the Aude area, in the south of
-	  France (<a href="http://www.metairie-enbor.com/acces.html.en">see
-	    map on the site</a>). If you are
-	  looking for a wonderful country place with a pool to
-	  spend holidays with a big bunch of family and/or
-          friends in a nice historical but very quiet area, this may be it.</p>
-      </blockquote>
-      </div>
-
-    </div>
-  </body>
-</html>
--- a/website/index.html.fr
+++ b/website/index.html.fr
@ -1,193 +0,0 @@
-<!&#68;DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-
-<html>
-  <head>
-    <title>RECOLL: un outil personnel de recherche textuelle pour 
-      Unix et Linux</title>
-    <meta name="generator" content="HTML Tidy, see www.w3.org">
-    <meta name="Description" content=
-    "recoll est un logiciel personnel de recherche textuelle pour unix et linux basé sur Xapian, un moteur d'indexation puissant et mature.">
-    <meta name="Keywords" content=
-      "recherche textuelle,desktop,unix,linux,solaris,open
-      source, free, bois de chauffage">
-    <meta http-equiv="Content-language" content="fr">
-    <meta http-equiv="content-type" content="text/html; charset=iso-8859-1">
-    <meta name="robots" content="All,Index,Follow">
-    <link type="text/css" rel="stylesheet" href="styles/style.css">
-    <link rel="shortcut icon" href="favicon.ico" />
-  </head>
-
-  <body>
-
-    <div class="rightlinks">
-      <ul>
-	<li><a href="http://www.recoll.org">Base</a></li>
-	<li><a href="pics/index.html">Copies d'écrans</a></li>
-	<li><a href="download.html">Téléchargements</a></li>
-	<li><a href="doc.html">Documentation</a></li>
-	<li><a href="devel.html">Développement</a></li>
-      </ul>
-      <p class="indexthumb">
-      <a href="pics/index.html"><img width="100" alt=
-	  "Imagette de l'écran principal" src=
-	  "pics/recoll0-thumb.png"></a>
-	</p>
-
-    </div>
-
-    <div class="content">
-
-      <h1><img align="center" src="pics/recoll64.png"/>
-        <a href="http://www.recoll.org/">Recoll</a> est 
-	un outil personnel de recherche textuelle pour Unix et Linux</h1>
-
-      <p>Il est basé sur le puissant moteur d'indexation <a href=
-	  "http://www.xapian.org">Xapian</a>, pour lequel il offre une
-	  interface graphique QT facile d'utilisation, riche, et facile à
-	  mettre en oeuvre.</p>
-
-      <p><span class="application">Recoll</span> est un logiciel libre
-	gratuit, dont le code source est disponible sous licence GPL.
-	La dernière version est 
-        <a class="important" href="download.html">1.23.1</a>
-	(<a href="release-1.23.html">notes sur la version, en
-	anglais</a>)</p> 
-
-      <p>L'interface utilisateur de 
-	<span class="application">Recoll</span> est traduite en
-	Français, mais pas encore la documentation, malheureusement,
-	et la plupart des liens de cette page pointent sur des textes
-	en Anglais.</p>
-
-
-      <h2>Caractéristiques: </h2>
-
-      <ul>
-        <li>Installation facile, peu de dépendances. Pas besoin de
-        démon permanent, de serveur http, d'un environnement de bureau
-        particulier ou d'un langage exotique.</li>
-
-	<li>Tourne sur la plupart des 
-	  <a href="fr/features.html#systems">systèmes</a> fondés sur 
-	  Unix.</li> 
-
-        <li>Interface conçue avec <a href="http://www.trolltech.com">
-	    Qt 4 ou 5 selon les plateformes.</a></li> 
-
-        <li>Traite la plupart des <a href="fr/features.html#doctypes">
-	    types de documents</a> courants, les messages et leurs fichiers
-	    attachés. Peut aussi traiter leurs versions comprimées
-	    (gzip ou bzip2) de tous ces documents.
-          <a href="features.html#doctypes">Application externes pour
-            l'extraction du texte</a>.</li>
-
-        <li>Fonctions de recherche puissantes, avec expressions Booléennes,
-          phrases et proximité, wildcards, filtrage sur les types de fichiers
-          ou l'emplacement.</li>
-
-        <li>Multi-langage et multi-jeu de caractères, utilisant
-	  Unicode en interne.</li> 
-
-	<li><a class="weak" href="fr/features.html">
-	    (plus de détails)</a></li>
-
-      </ul>
-      
-      <p><b><i>Déjà utilisateur ?</i></b> Il est possible qu'il 
-	y ait encore quelques astuces qui vous aient échappées. Un coup
-	d'oeil rapide sur la page des <a
-        href="usermanual/RCL.SEARCH.html#RCL.SEARCH.GUI.TIPS"> petites
-        recettes de recherche</a> (en anglais) pourrait s'avérer
-        fructueux ! Également, en anglais,
-        la <a href="faqsandhowtos/index.html">section des questions
-          fréquentes et trucs divers</a>.</p>
-
-      <h2>Nouvelles: </h2>
-
-      <dl>
-          <dt>2017-05-15</dt><dd>Version 1.23.2. Corrige quelques bugs
-          sérieux. Voir les <a href="release-1.23.html">Release notes (en
-            anglais).</a></dd>
-          <dt>2017-03-09</dt><dd>Version 1.23.1. 
-            the <a href="release-1.23.html">Release notes (en
-            anglais).</a></dd>
-          
-        <td>2016-11-23</td><dd>Version 1.22.4.</dd>
-        <dt>2016-06-15</dt><dd>La version 1.22.3 est disponible et va
-          progressivement remplacer 1.21 comme version
-          principale. <a href="release-1.22.html">Notes de version</a>
-          (en anglais).</dd>
-          
-        <dt>2016-05-11</dt><dd>Release 1.21.7: corrige un crash bénin
-          mais agaçant au moment de quitter l'interface utilisateur
-          (Fedora 23 / qt5).</dd>
-
-        <dt>2015-11-09</dt>
-        <dd>Recoll indexe Windows ! Il y a encore quelques éléments
-          manquants, comme l'indexation temps-réel, et la traduction
-          en Français, mais ça marche suffisamment bien pour être
-          essayé. Il y a un installeur standard, donc si vous n'aimez
-          pas, c'est facile à désinstaller...
-          Pas de traduction Française pour le moment. Il y
-          a <a href="pages/recoll-windows.html"> quelques
-            explications en Anglais sur l'installation </a>.
-          Si vous l'essayez, dites moi ce que vous en pensez !
-        </dd>
-
-        <dt>2012-10-25</dt><dd> Un problème avec une solution simple
-          peut provoquer 
-          des <span class="important">plantages de
-          recollindex</span>. 
-          Si vous indexez des messages mail Mozilla/Thunderbird
-          ailleurs qu'à l'endroit standard (~/.thunderbird), vous
-          devriez ajouter les lignes qui suivent à la fin de votre
-          fichier de configuration (~/.recoll/recoll.conf):
-          <pre><tt>
-              [/path/to/my/mozilla/mail]
-              mhmboxquirks = tbird
-          </tt></pre> Changez le chemin d'accès pour le votre bien
-          sûr.  Sans cette indication, recollindex a des difficultés à
-          déterminer les limites de message dans les fichiers mailbox,
-          et peut arriver à utiliser toute la mémoire de la machine,
-          et à se planter. Dans les cas moins graves (avec des
-          fichiers de taille "raisonnable"), cela provoque aussi une
-          indexation incorrecte des messages.
-        </dd>
-
-        <dt>2010-11-20</dt><dd>Un petit script pour activer/cacher recoll sur un
-          bureau gnome d'un seul coup de clavier: 
-          <a href="http://www.recoll.org/faqsandhowtos/HotRecoll.html">
-            recette d'installation</a>.</dd>
-
-      </ul>
-
-      <h2><a name="support">Support</a></h2>
-
-      <p>Si vous avez un problème quelconque avec le logiciel ou son
-      installation, ou une idée de fonctions à ajouter, merci de me
-	<a href=
-	  "mailto:jfd@recoll.org">contacter</a>.</p>
-
-      <p>Voir aussi la <a href="devel.html">page sur le
-      développement</a>.</p>
-      <p><a href="BUGS.html">Liste des probl&egrave;mes connus</a> (en
-      anglais). </p>
-
-      <h2>Remerciements</h2>
-      <p><span class="application">Recoll</span> emprunte beaucoup de code
-	d'autres logiciels libres, et accueille volontiers les
-	contributions en code ou en suggestions, voir la page des 
-	<a class="important" href="credits.html">Attributions</a>.</p>
-
-      <h2>Autres</h2>
-      <p>Je loue une 
-	<a href="http://www.metairie-enbor.com/index.html.fr">
-	  grande maison sympa dans l'Aude</a> :), et nous produisons aussi
-	  du <a href="http://www.metairie-enbor.com/bois-de-chauffage.html">
-          bois de chauffage</a>. (Il faut bien que cette page me serve
-          tout de même à <em>quelque chose</em> à moi aussi de temps
-          en temps !).</p>
-
-    </div>
-  </body>
-</html>
--- a/website/pages/Makefile
+++ b/website/pages/Makefile
@ -1,10 +0,0 @@
-.SUFFIXES: .txt .html
-
-.txt.html:
-	asciidoc $<
-
-all: recoll-windows.html recoll-windows-faq.html \
-     recoll-webui-install-wsgi.html
-
-clean:
-	rm -f *.html
--- a/website/pages/recoll-webui-install-wsgi.txt
+++ b/website/pages/recoll-webui-install-wsgi.txt
@ -1,280 +0,0 @@
-= Recoll WebUI Apache and nginx installation from scratch 
-
-NOTE: thanks to Michael L. Wilson for the `nginx` part.
-
-The https://github.com/koniu/recoll-webui[Recoll WebUI] offers an
-alternative, WEB-based, interface for querying a Recoll index.
-
-It can be quite useful to extend the use of a shared index to multiple
-workstations, without the need for a local Recoll installation and shared
-data storage.
-
-The Recoll WebUI is based on the
-http://bottlepy.org/docs/dev/index.html[Bottle Python framework], which has
-a built-in WEB server, and the simplest deployment approach is to run it
-standalone. However the built-in server is restricted to handling one
-request at a time, which is problematic in multi-user situations,
-especially because some requests, like extracting a result list into a CSV
-file, can take a significant amount of time.
-
-The Bottle framework can work with several multi-threading Python HTTP
-server libraries, but, given the limitations of the Recoll Python module
-and the Python interpreter itself, this will not yield optimal performance,
-and, especially can't efficiently leverage the now ubiquitous
-multiprocessors.
-
-In multi-user situations, you can get better performance and ease of use
-from the Recoll WebUI by running it under Apache or Nginx rather than as a
-standalone process. With this approach, a few requests per second can
-easily be handled even in the presence of long-running ones.
-
-Neither Recoll nor the WebUI are optimized for high multi-user load, and it
-would be very unwise to use them as the search interface to a busy WEB
-site.
-
-The instructions about using the WebUI under Apache as given in the
-repository README are a bit terse, and are missing a few details,
-especially ones which impact performance.
-
-Here follows the synopsis of three WebUI installations on initially
-Apache-less Ubuntu (14.04) and DragonFly BSD systems, and for
-Nginx/BSD. The first should extend easily to other Debian-based systems,
-the second at least to FreeBSD. rpm-based systems are left as an exercise
-to the reader, at least for now...
-
-
-CAUTION: THE CONFIGURATIONS DESCRIBED HAVE NO ACCESS CONTROL. ANYONE WITH
-ACCESS TO THE NETWORK WHERE THE SERVER IS LOCATED CAN RETRIEVE ANY
-DOCUMENT.
-
-link:#nginx[Jump to the nginx section].
-
-[[apache]]
-== Apache
-=== On a Debian/Ubuntu system
-
-==== Install recoll 
-
-    sudo apt-get install recoll python-recoll
-
-Configure the indexing and check that the normal search works (I spent
-quite a lot of time trying to understand why the WebUI did not work, when
-in fact it was the normal recoll configuration which was broken and the
-regular search did not work either).
-
-Take care to be logged in as the user you want to run the web search as
-while you do this.
-
-
-==== Install the WebUI
-
-Clone the github repository, or extract the master tar installation, and
-move it to '/var/www/recoll-webui-master/'. Take care that it is read/execute
-accessible by your user.
-
-==== Install Apache and mod-wsgi
-
-
-    sudo apt-get install apache2 libapache2-mod-wsgi
-
-I then got the following message:
-
-    AH00558: apache2: Could not reliably determine the server's fully qualified domain name, using 127.0.1.1. Set the 'ServerName' directive globally to suppress this message
-
-To clear it, I added a ServerName directive to the Apache config, maybe you
-won't need it. Edit '/etc/apache2/sites-available/000-default.conf' and add
-the following at the top (globally). Things work without this fix anyway,
-this is just to suppress the error message. You probably need to adjust the
-address or use a real host name:
-
-    ServerName 192.168.4.6
-
-
-Edit '/etc/apache2/mods-enabled/wsgi.conf', add the following at the end of
-the "IfModule" section.
-
-Change the user ('dockes' in the example) taking care that he is the one who
-owns the index ('.recoll' is in his home directory).
-
-    WSGIDaemonProcess recoll user=dockes group=dockes \
-        threads=1 processes=5 display-name=%{GROUP} \
-        python-path=/var/www/recoll-webui-master
-    WSGIScriptAlias /recoll /var/www/recoll-webui-master/webui-wsgi.py
-    <Directory /var/www/recoll-webui-master>
-            WSGIProcessGroup recoll
-            Order allow,deny
-            allow from all
-    </Directory>
-
-NOTE: the Recoll WebUI application is mostly single-threaded, so it is of
-little use (and may actually be counter-productive in some cases) to
-specify multiple threads on the WSGIDaemonProcess line. Specify multiple
-processes instead to put multiple CPUs to work on simultaneous requests.
-
-
-Then run the following to restart Apache:
-
-    sudo apachectl restart
-
-The Recoll WebUI should now be accessible. on 'http://my.server.com/recoll/'
-
-NOTE: Take care that you need a '/' at the end of the URL used to access
-the search (use: 'http://my.server.com/recoll/', not
-'http://my.server.com/recoll'), else files other than the script itself are
-not found (the page looks weird and the search does not work).
-
-CAUTION: THERE IS NO ACCESS CONTROL. ANYONE WITH ACCESS TO THE NETWORK
-WHERE THE SERVER IS LOCATED CAN RETRIEVE ANY DOCUMENT.
-
-=== Apache Variant for BSD/ports
-
-==== Packages
-
-As root:
-
-    pkg install recoll
-
-
-Do what you need to do to configure the indexing and check that the normal
-search works.
-
-Take care to be logged in as the user you want to run the web search as
-while you do this.
-
-    pkg install apache24
-
-Add apache24_enable="YES" in /etc/rc.conf
-
-    pkg install ap24-mod_wsgi4
-    pkg install git
-
-==== Clone the webui repository
-
-    cd /usr/local/www/apache24/
-    git clone https://github.com/koniu/recoll-webui.git recoll-webui-master
-
-Important: most input handler helper applications (e.g. 'pdftotext') are
-installed in '/usr/local/bin' which is not in the PATH as seen by Apache
-(at least on DragonFly). The simplest way to fix this is to modify the
-launcher module for the webui app so that it fixes the PATH.
-
-Edit 'recoll-webui-master/webui-wsgi.py' and add the following line after
-the 'import os' line:
-
-    os.environ['PATH'] = os.environ['PATH'] + ':' + '/usr/local/bin'
-
-
-
-==== Configure Apache
-
-Edit /usr/local/etc/apache24/modules.d/270_mod_wsgi.conf
-
-Uncomment the LoadModule line, and add the directives to alias /recoll/ to
-the webui script.
-
-Change the user (dockes in the example) taking care that he is the one who
-owns the index (.recoll is in his home directory).
-
-Contents of the file:
-
-    ## $FreeBSD$
-    ## vim: set filetype=apache:
-    ##
-    ## module file for mod_wsgi
-    ##
-    ## PROVIDE: mod_wsgi
-    ## REQUIRE:
-    
-    LoadModule wsgi_module        libexec/apache24/mod_wsgi.so
-    
-    WSGIDaemonProcess recoll user=dockes group=dockes \
-        threads=1 processes=5 display-name=%{GROUP} \
-        python-path=/usr/local/www/apache24/recoll-webui-master/
-    WSGIScriptAlias /recoll /usr/local/www/apache24/recoll-webui-master/webui-wsgi.py
-    
-    <Directory /usr/local/www/apache24/recoll-webui-master>
-            WSGIProcessGroup recoll
-            Require all granted
-    </Directory>
-
-==== Restart Apache
-
-As root:
-
-    apachectl restart
-
-
-[[nginx]]
-== Nginx
-=== Nginx for BSD/ports
-
-As root:
-
-     pkg install recoll
-
-Do what you need to do to configure the indexing and check that the normal
-search works. Take care to be logged in as the user you want to run the web
-search as while you do this.
-
-Install required packages:
-
-     pkg install nginx uwsgi git
-
-=== Nginx: clone the webui repository
-
-     rm /usr/local/www/nginx
-     mkdir /usr/local/www/nginx
-     cd /usr/local/www/nginx
-     git clone https://github.com/koniu/recoll-webui.git recoll-webui-master
-
-Important: most input handler helper applications (e.g. 'pdftotext') are
-installed in '/usr/local/bin' which is not in the PATH as seen by Nginx
-(at least on DragonFly). The simplest way to fix this is to modify the
-launcher module for the webui app so that it fixes the PATH.
-
-Edit 'recoll-webui-master/webui-wsgi.py' and add the following line after
-the 'import os' line:
-
-     os.environ['PATH'] = os.environ['PATH'] + ':' + '/usr/local/bin'
-
-Also change the following to find the correct path:
-
-     #os.chdir(os.path.dirname(__file__))
-     os.chdir('/usr/local/www/nginx/recoll-webui-master')
-
-
-=== Nginx: configure uWSGI
-
-Assuming the user running the search is "dockes" (change it to your user),
-
-sysrc uwsgi_uid=$(id -u dockes)
-sysrc uwsgi_gid=$(id -g dockes)
-sysrc uwsgi_flags="-M -L --wsgi-file /usr/local/www/nginx/recoll-webui-master/webui-wsgi.py"
-
-(ALTERNATIVELY)
-
-Add the following to rc.conf
-
-uwsgi_uid="dockes"			
-uwsgi_gid="dockes"
-uwsgi_flags="-M -L --wsgi-file /usr/local/www/nginx/recoll-webui-master/webui-wsgi.py"
-
-
-=== Configure nginx
-
-Edit /usr/local/etc/nginx/nginx.conf and set up a proxy to uwsgi service:
-
-     location / {
-         include uwsgi_params;
-         uwsgi_pass unix:///tmp/uwsgi.sock;
-     }
-
-=== Enable and start both services
-
-As root:
-
-     sysrc uwsgi_enable=YES	#Or uwsgi_enable="YES" (in rc.conf)
-     sysrc nginx_enable=YES	#Or nginx_enable="YES" (in rc.conf)
-
-     service uwsgi start
-     service nginx start
--- a/website/pages/recoll-windows-faq.txt
+++ b/website/pages/recoll-windows-faq.txt
@ -1,88 +0,0 @@
-= Recoll on Windows tips and tricks
-Jean-Francois Dockes <jf@dockes.org>
-:toc:
-
-== Checking that Python is in the PATH
-
-Recoll input handlers are the programs which extract the documents text
-content for indexing. Most of these programs are Python scripts. If Recoll
-can find documents by file name but not by content, the first thing to
-check is that you do have the Python interpreter in your PATH.
-
-NOTE: Only Python 2 is supported at the moment (2.7 and later were
-tested). This limitation is not caused by the Recoll scripts themselves but
-to some of the auxiliary libraries (e.g.: the one used for LibreOffice text
-extraction). If you also have Python 3 installed, you will have to arrange
-for Recoll to only 'see' the Python 2 version.
-
-For simple cases, to check that the Python interpreter is in the PATH, the
-easiest approach is to start a command window and type 'python' in it. You
-should see messages from the Python interpreter, which you can then
-exit by typing 'quit()'. If the command interpreter complains about Python
-not being found, you probably need to adjust the PATH.
-
-NOTE: To start a command window, type 'command' in the start menu input
-area and select 'Command Prompt'.
-
-If the Python interpreter is not found, check that Python 2 is indeed
-installed. Adding the Python binary to the PATH is an option during
-installation (so one approach to fix the issue is to just run the
-installation again).
-
-You can also edit the environment variable directly:
-
- - Start the Control Panel
- - Select 'System and Security'
- - Select 'System'
- - Select 'Advanced system settings' in the left panel,
- - Select 'Environment Variables' at the bottom of the dialog
- - Edit 'Path' inside 'System variables' and add:
-   `C:\Python27\;C:\Python27\Scripts;` to it.
-
-== Using an alternate configuration directory
-
-This tip is useful if you want to manage several configurations, or if you
-really have some reason to not let the configuration directory stay in its
-default location ($HOMEDIR/AppData/Local/Recoll). If your concerns are only
-a bout storage space, and do not actually want to manage multiple
-configuration directories, you can more simply change the index storage
-location from the GUI 'Index Configuration' panel.
-
-The easiest approach is to create a shortcut on the desktop and have it
-start the GUI with a '-c' option. For example, set the shortcut's 'Target'
-to something like:
-
----
-"C:\Program Files (x86)\Recoll\recoll.exe" -c c:/path/to/my/configdir
----
-
-_Do use forward slashes for the configuration directory path_. This will
-hopefully be fixed some day.
-
-You will need to create the configuration directory, Recoll will not do it
-by itself. You can just leave it empty, Recoll will then propose to start
-the configuration editor.
-
-You can find a more complete and general explanation about using shortcuts,
-for example http://www.rjlsoftware.com/support/faq/sa.cfm?q=6&n=61[on this
-page].
-
-
-== File name character case sensitivity
-
-_This should be fixed as of the the November 2016 version. Please report
-the problem if you still see case sensitivity issues_
-
-Recoll was born on Unix, on which file names are case-sensitive. At the
-moment this is also the case for path-related queries on Windows, including
-the drive letters.
-
-When filtering results on location (e.g. with a 'dir:' clause), you need to
-enter all path elements as they appear in the URLs in result lists (and use
-forward slashes).
-
-It is also advisable to enter configuration filenames with their actual
-case (e.g. _topdirs_).
-
-I am looking into fixing this, but this made a bit complicated by non ASCII
-character sets issues.
--- a/website/pages/recoll-windows.txt
+++ b/website/pages/recoll-windows.txt
@ -1,191 +0,0 @@
-= Recoll on Windows
-Jean-Francois Dockes <jf at dockes.org>
-:date:
-
-:recollversion: 1.23.0-2017-01-07-78b8ad
-:windir: downwin-12e3f
-
-image:recoll-windows10-thumb.png[link="recoll-windows10.png"]
-
-Recoll for Windows was built on Windows 7, and tried on Windows 7 and
-10. It does not work on Windows XP.
-
-Recoll is free and licensed under the GPL. You will be asked to accept the
-license during the installation. For a regular user, and in a nutshell, the
-license means that you are free to do what you want with the program (use,
-copy, share, etc.). If you are a developper and intend to modify and
-distribute the program, you probably know the GPL, else you should read it.
-
-NOTE: As much as I have fun writing software, producing the Windows version is
-just tedious. If you use Recoll on Windows, please consider contributing to
-its availability: image:/donations/btn_donate_LG.gif[link="/donations/index.html"]
-
-Actually I'm tired of nobody ever using the donate button among thousands
-of downloads, so recoll for windows is gone for now.
-
-== Note for updating
-
-Recoll versions 1.23.0-9c5e32-20161216 and 1.23.0-2bfd80-20161115 had been
-switched to using Xapian 1.4 which has a new and different index
-format. Due to issues in Xapian 1.4, I have switched back to using Xapian
-1.2 as of Recoll 1.23.0-2017-01-07-78b8ad.
-
-This simply means that, if your index was created by one of the above
-versions, it will have to be recreated from scratch after installing the
-current Recoll version. I advise explicitely deleting
-$HOME/AppData/Local/Recoll/xapiandb, as this will avoid leaving around 1.4
-files which would take space for nothing otherwise.
-
-== Installation
-
- Download and install Python 2.7.10 or 2.7.11 (e.g.
-  https://www.python.org/ftp/python/2.7.11/python-2.7.11.msi[Python
-  2.7.11]). Recoll currently does not work with Python3. *_On the
-  `Customize installation` screen, select "Add python.exe to Path"_*
-
- Optional: download and install the 7-zip program from
-  http://www.7-zip.org/. This is only useful if you need to index files
-  compressed with Unix methods (not needed for zip files).
-
- Download the 
-  http://www.recoll.org/downloads/{windir}/recoll-setup-{recollversion}.exe[Recoll
-  setup file]. - Not possible right now -
-
- Execute the setup file. This is a vanilla installer generated by Inno
-  Setup, and it will ask the usual questions.
-
-//NOTE: The installer needs administrator rights in order to install to
-//`C:\Program Files`. If you want to install on a machine where you have no
-//administrator rights, you can use the
-//http://www.recoll.org/downloads/{windir}/recoll-{recollversion}.7z[installation
-//directory archive] instead and extract it anywhere, this works just the
-//same (you will need the free http://www.7-zip.org/[7z] to extract it). If
-//you are in this case, you can ignore the setup-related steps of the
-//procedure of course.
-
-== Configuration
-
- Start recoll. It will ask if you want to customize the configuration.
-  The default is to index the content of your user directory. Then start
-  indexing. This can take some time.
- The default result list font is particularly ugly. Change it from 
-  `Preferences->GUI Configuration->Result List->Result List Font`
-
- Have a look at the
-  https://www.lesbonscomptes.com/recoll/usermanual/webhelp/docs/index.html[Recoll
-manual] !
- I have also started a small link:recoll-windows-faq.html[Recoll on
-  MS-Windows FAQ]. 
-
-== Support
-
-Please use the 
-https://opensourceprojects.eu/p/recoll1/tickets/new/[Recoll issues tracker]
-for reporting problems, or contact me by email: jfd at recoll.org.
-
-
-== Known problems:
-
- Having a drive root (e.g.: c:/) in the topdirs (things to index) list
-  does not work (it indexes nothing). You need to list the sub-directories
-  to index. This will be fixed in a future release.
-  
- Setting the log level to 4 or higher can cause the GUI to deadlock while
-  displaying results. This will be fixed in a future release.
-  
- Indexing is very slow, especially when using external commands (e.g. for
-  PDF files). I don't know if this is a case of my doing something stupid,
-  or if the general architecture is really bad fitted for Windows. If
-  someone with good Windows programming knowledge reads this, I'd be very
-  interested by a discussion. Windows indexing can be ten times slower than
-  the Linux version. The index formats are compatible, so, if you have
-  shared Linux/Windows data, it's best to process it on Linux.
-
- Filtering by directory location ('dir:' clauses) used to be
-  case-sensitive, including drive letters. This is hopefully fixed by the
-  November 2016 version.
-
- Also, when filtering the search with a `dir:` clause, an absolute path
-  should be specified as `/c/mydir` instead of `c:/mydir`
-
- There is no real-time or scheduled indexing as on Linux. For now, you
-  create and update the index by using the `File` menu (or executing
-  `recollindex.exe` from a command window).
-
-== Change Log
-
-Changes in 20161115
-
- File path names case sensitivity and other small path issues should be fixed.
- Based on Xapian 1.4. New stemming languages are available (e.g. Arabic).
- Fixed date display encoding issues.
-
-Changes in 20160414
-
- The setup script has changed back to needing administrator rights,
-  because this is what is convenient for most people. Use the installation
-  directory archive to install in a non-standard location without admin
-  rights.
- Fixed a bug which had the whole indexing stop if a script would time out
-  on a specific file (it will very rarely happen that a pathologically bad
-  file can throw an input handler in a loop).
-
-
-Changes in 20160317
-
- Small change to the setup script so that administrative rights are not
-  required. 
-
-Changes/fixes in 20160129
-
- Changed the method used for checking that index data is up to date with
-  documents. This will impose a re-indexing of all data, but it was
-  necessary because the previous method was incorrect.
- Fixed crash which occured after changing some configuration parameters.
- Warn when editing a temporary copy of a document (e.g. a temp file
-  extracted from a zip archive.
-  
-Changes in 20151202
-
- Fixed mbox parsing. This was getting the message separators completely
-  wrong, and taking a lot of time to do it. This should be especially
-  welcome by Thunderbird users.
-
- Fixed email attachement processing. A fault in the code which saved
-  attachment data to disk for further processing resulted in a practical
-  fuzzing experiment on the input processors. Especially, frequent crashes
-  in the image tag extractor caused very ennoying Windows popups about
-  a Python error.
-
-Fixed in 20151115 and later
-
- A relatively rare crash which seemed to occur mostly on some email
-  messages
- Forgotten MIME settings for .cs, .js and .css
-
-Fixed in 20151112 and later
-
- Forgotten dll prevents the unrtf program to work, so no rtf indexing.
-
-Fixed in 20151109 (hopefully?)
-
- The GUI sometimes crashes when you click `Preview` or `Open`. This does
-  not occur often, and usually for one of the first tries after starting
-  the program. Don't despair. This seems to be fixed in the latest version
-  (20151109), but I am not 100% certain that it is gone.
-
-++++
-<h2 id="comments">Comments</h2>
-
-      <div id="disqus_thread"></div>
-      <script type="text/javascript">
-        var disqus_shortname = 'lesbonscomptes'; 
-        (function() {
-            var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
-            dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
-            (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
-        })();
-      </script>
-      <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
-      <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a>
-++++
--- a/website/perfs.html
+++ b/website/perfs.html
@ -1,416 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-
-<html>
-  <head>
-    <title>RECOLL indexing performance and index sizes</title>
-    <meta name="generator" content="HTML Tidy, see www.w3.org">
-    <meta name="Author" content="Jean-Francois Dockes">
-    <meta name="Description" content=
-    "recoll is a simple full-text search system for unix and linux based on the powerful and mature xapian engine">
-    <meta name="Keywords" content=
-      "full text search,fulltext,desktop search,unix,linux,solaris,open source,free">
-    <meta http-equiv="Content-language" content="en">
-    <meta http-equiv="content-type" content=
-    "text/html; charset=iso-8859-1">
-    <meta name="robots" content="All,Index,Follow">
-    <link type="text/css" rel="stylesheet" href="styles/style.css">
-  </head>
-
-  <body>
-
-    <div class="rightlinks">
-      <ul>
-	<li><a href="index.html">Home</a></li>
-	<li><a href="pics/index.html">Screenshots</a></li>
-	<li><a href="download.html">Downloads</a></li>
-	<li><a href="doc.html">Documentation</a></li>
-      </ul>
-    </div>
-
-    <div class="content">
-
-      <h1>Recoll: Indexing performance and index sizes</h1>
-
-      <p>The time needed to index a given set of documents, and the
-	resulting index size depend of many factors.
-
-      <p>The index size depends almost only on the size of the
-        uncompressed input text, and you can expect it to be roughly
-        of the same order of magnitude. Depending on the type of file,
-        the proportion of text to file size varies very widely, going
-        from close to 1 for pure text files to a very small factor
-        for, e.g., metadata tags in mp3 files.</p>
-
-      <p>Estimating indexing time is a much more complicated issue,
-        depending on the type and size of input and on system
-        performance. There is no general way to determine what part of
-        the hardware should be optimized. Depending on the type of
-        input, performance may be bound by I/O read or write
-        performance, CPU single-processing speed, or combined
-        multi-processing speed.</p>
-
-      <p>It should be noted that Recoll performance will not be an
-        issue for most people. The indexer can process 1000 typical
-        PDF files per minute, or 500 Wikipedia HTML pages per second
-        on medium-range hardware, meaning that the initial indexing of
-        a typical dataset will need a few dozen minutes at
-        most. Further incremental index updates will be much faster
-        because most files will not need to be processed again.</p>
-
-      <p>However, there are Recoll installations with
-        terabyte-sized datasets, on which indexing can take days. For
-        such operations (or even much smaller ones), it is very
-        important to know what kind of performance can be expected,
-        and what aspects of the hardware should be optimized.</p>
-
-      <p>In order to provide some reference points, I have run a
-        number of benchs on medium-sized datasets, using typical
-        mid-range desktop hardware, and varying the indexing
-        configuration parameters to show how they affect the results.</p>
-
-      <p>The following may help you check that you are getting typical
-        performance for your indexing, and give some indications about
-        what to adjust to improve it.</p>
-        
-      <p>From time to time, I receive a report about a system becoming
-        unusable during indexing. As far as I know, with the default
-        Recoll configuration, and barring an exceptional issue (bug),
-        this is always due to a system problem (typically bad hardware
-        such as a disk doing retries). The tests below were mostly run
-        while I was using the desktop, which never became
-        unusable. However, some tests rendered it less responsive and
-        this is noted with the results.</p>
-
-      <p>The following text refers to the indexing parameters without
-        further explanation. Here follow links to more explanation about the
-        <a href="http://www.lesbonscomptes.com/recoll/idxthreads/threadingRecoll.html#recoll.idxthreads.multistage">processing
-        model</a> and
-        <a href="https://www.lesbonscomptes.com/recoll/usermanual/webhelp/docs/RCL.INSTALL.CONFIG.RECOLLCONF.PERFS.html">configuration
-          parameters</a>.</p>
-      
-
-      <p>All text were run without generating the stemming database or
-        aspell dictionary. These phases are relatively short and there
-        is nothing which can be optimized about them.</p>
-      
-      <h2>Hardware</h2>
-
-      <p>The tests were run on what could be considered a mid-range
-        desktop PC:
-        <ul>
-          <li>Intel Core I7-4770T CPU: 2.5 Ghz, 4 physical cores, and
-            hyper-threading for a total of 8 hardware threads</li>
-          <li>8 GBytes of RAM</li>
-          <li>Asus H87I-Plus motherboard, Samsung 850 EVO SSD storage</li>
-        </ul>
-      </p>
-
-      <p>This is usually a fanless PC, but I did run a fan on the
-        external case fins during some of the tests (esp. PDF
-        indexing), because the CPU was running a bit too hot.</p>
-
-
-      <h2>Indexing PDF files</h2>
-      
-
-      <p>The tests were run on 18000 random PDFs harvested on
-        Google, with a total size of around 30 GB, using Recoll 1.22.3
-        and Xapian 1.2.22. The resulting index size was 1.2 GB.</p>
-
-      <h3>PDF: storage</h3>
-
-      <p>Typical PDF files have a low text to file size ratio, and a
-        lot of data needs to be read for indexing. With the test
-        configuration, the indexer needs to read around 45 MBytes / S
-        from multiple files. This means that input storage makes a
-        difference and that you need an SSD or a fast array for
-        optimal performance.</p>
-
-      <table border=1>
-	<thead>
-	  <tr>
-	    <th>Storage</th>
-	    <th>idxflushmb</th>
-	    <th>thrTCounts</th>
-	    <th>Real Time</th>
-	  </tr>
-	<tbody>
-	  <tr>
-	    <td>NFS drive (gigabit)</td>
-	    <td>200</td>
-	    <td>6/4/1</td>
-	    <td>24m40</td>
-	  </tr>
-	  <tr>
-	    <td>local SSD</td>
-	    <td>200</td>
-	    <td>6/4/1</td>
-	    <td>11m40</td>
-	  </tr>
-	</tbody>
-      </table>
-        
-
-      <h3>PDF: threading</h3>
-
-      <p>Because PDF files are bulky and complicated to process, the
-        dominant step for indexing them is input processing. PDF text
-        extraction is performed by multiple instances
-        the <i>pdftotext</i> program, and parallelisation works very
-        well.</p>
-
-      <p>The following table shows the indexing times with a variety
-        of threading parameters.</p>
-
-      <table border=1>
-	<thead>
-	  <tr>
-	    <th>idxflushmb</th>
-	    <th>thrQSizes</th>
-	    <th>thrTCounts</th>
-	    <th>Time R/U/S</th>
-	  </tr>
-          <tbody>
-	  <tr>
-	    <td>200</td>
-	    <td>2/2/2</td>
-	    <td>2/1/1</td>
-	    <td>19m21</td>
-	  </tr>
-	  <tr>
-	    <td>200</td>
-	    <td>2/2/2</td>
-	    <td>10/10/1</td>
-	    <td>10m38</td>
-	  </tr>
-	  <tr>
-	    <td>200</td>
-	    <td>2/2/2</td>
-	    <td>100/10/1</td>
-	    <td>11m</td>
-	  </tr>
-          </tbody>
-      </table>
-
-      <p>10/10/1 was the best value for thrTCounts for this test. The
-        total CPU time was around 78 mn.</p>
-
-      <p>The last line shows the effect of a ridiculously high thread
-        count value for the input step, which is not much. Using
-        sligthly lower values than the optimum has not much impact
-        either. The only thing which really degrades performance is
-        configuring less threads than available from the hardware.</p>
-
-      <p>With the optimal parameters above, the peak recollindex
-        resident memory size is around 930 MB, to which we should add
-        ten instances of pdftotext (10MB typical), and of the
-        rclpdf.py Python input handler (around 15 MB each). This means
-        that the total resident memory used by indexing is around 1200
-        MB, quite a modest value in 2016.</p>
-
-
-      <h3>PDF: Xapian flushes</h3>
-
-      <p>idxflushmb has practically no influence on the indexing time
-        (tested from 40 to 1000), which is not too surprising because
-        the Xapian index size is very small relatively to the input
-        size, so that the cost of Xapian flushes to disk is not very
-        significant. The value of 200 used for the threading tests
-        could be lowered in practise, which would decrease memory
-        usage and not change the indexing time significantly.</p>
-
-      <h3>PDF: conclusion</h3>
-
-      <p>For indexing PDF files, you need many cores and a fast
-        input storage system. Neither single-thread performance nor
-        amount of memory will be critical aspects.</p>
-
-      <p>Running the PDF indexing tests had no influence on the system
-        "feel", I could work on it just as if it were quiescent.</p>
-
-
-      <h2>Indexing HTML files</h2>
-
-      <p>The tests were run on an (old) French Wikipedia dump: 2.9
-        million HTML files stored in 42000 directories, for an
-        approximate total size of 41 GB (average file size
-        14 KB).
-
-        <p>The files are stored on a local SSD. Just reading them with
-          find+cpio takes close to 8 mn.</p>
-
-        <p>The resulting index has a size of around 30 GB.</p>
-
-        <p>I was too lazy to extract 3 million entries tar file on a
-          spinning disk, so all tests were performed with the data
-          stored on a local SSD.</p>
-
-        <p>For this test, the indexing time is dominated by the Xapian
-          index updates. As these are single threaded, only the flush
-          interval has a real influence.</p>
-
-      <table border=1>
-	<thead>
-	  <tr>
-	    <th>idxflushmb</th>
-	    <th>thrQSizes</th>
-	    <th>thrTCounts</th>
-	    <th>Time R/U/S</th>
-	  </tr>
-          <tbody>
-	  <tr>
-	    <td>200</td>
-	    <td>2/2/2</td>
-	    <td>2/1/1</td>
-	    <td>88m</td>
-	  </tr>
-	  <tr>
-	    <td>200</td>
-	    <td>2/2/2</td>
-	    <td>6/4/1</td>
-	    <td>91m</td>
-	  </tr>
-	  <tr>
-	    <td>200</td>
-	    <td>2/2/2</td>
-	    <td>1/1/1</td>
-	    <td>96m</td>
-	  </tr>
-	  <tr>
-	    <td>100</td>
-	    <td>2/2/2</td>
-	    <td>1/2/1</td>
-	    <td>120m</td>
-	  </tr>
-	  <tr>
-	    <td>100</td>
-	    <td>2/2/2</td>
-	    <td>6/4/1</td>
-	    <td>121m</td>
-	  </tr>
-	  <tr>
-	    <td>40</td>
-	    <td>2/2/2</td>
-	    <td>1/2/1</td>
-	    <td>173m</td>
-	  </tr>
-          </tbody>
-      </table>
-
-
-      <p>The indexing process becomes quite big (resident size around
-        4GB), and the combination of high I/O load and high memory
-        usage makes the system less responsive at times (but not
-        unusable). As this happens principally when switching
-        applications, my guess would be that some program pages
-        (e.g. from the window manager and X) get flushed out, and take
-        time being read in, during which time the display appears
-        frozen.</p>
-
-      <p>For this kind of data, single-threaded CPU performance and
-        storage write speed can make a difference. Multithreading does
-        not help.</p>
-
-      <h2>Adjusting hardware to improve indexing performance</h2>
-
-      <p>I think that the following multi-step approach has a good
-        chance to improve performance:
-        <ul>
-          <li>Check that multithreading is enabled (it is, by default
-            with recent Recoll versions).</li>
-          <li>Increase the flush threshold until the machine begins to
-            have memory issues. Maybe add memory.</li>
-          <li>Store the index on an SSD. If possible, also store the
-            data on an SSD. Actually, when using many threads, it is
-            probably almost more important to have the data on an
-            SSD.</li>
-          <li>If you have many files which will need temporary copies
-            (email attachments, archive members, compressed files): use
-            a memory temporary directory. Add memory.</li>
-          <li>More CPUs...</li>
-        </ul>
-      </p>
-
-      <p>At some point, the index updating and writing may become the
-        bottleneck (this depends on the data mix, very quickly with
-        HTML or text files). As far as I can think, the only possible
-        approach is then to partition the index. You can query the
-        multiple Xapian indices either by using the Recoll external
-        index capability, or by actually merging the results with
-        xapian-compact.</p>
-
-
-
-      <h5>Old benchmarks</h5>
-
-      <p>To provide a point of comparison for the evolution of
-        hardware and software...</p>
-      
-      <p>The following very old data was obtained (around 2007?) on a
-        machine with a 1800 Mhz AMD Duron CPU, 768Mb of Ram, and a
-        7200 RPM 160 GBytes IDE disk, running Suse 10.1.</p>
-
-      <p><b>recollindex</b> (version 1.8.2 with xapian 1.0.0) is
-	executed with the default flush threshold value. 
-	The process memory usage is the one given by <b>ps</b></p>
-
-      <table border=1>
-	<thead>
-	  <tr>
-	    <th>Data</th>
-	    <th>Data size</th>
-	    <th>Indexing time</th>
-	    <th>Index size</th>
-	    <th>Peak process memory usage</th>
-	  </tr>
-	<tbody>
-	  <tr>
-	    <td>Random pdfs harvested on Google</td>
-	    <td>1.7 GB, 3564 files</td>
-	    <td>27 mn</td>
-	    <td>230 MB</td>
-	    <td>225 MB</td>
-	  </tr>
-	  <tr>
-	    <td>Ietf mailing list archive</td>
-	    <td>211 MB, 44,000 messages</td>
-	    <td>8 mn</td>
-	    <td>350 MB</td>
-	    <td>90 MB</td>
-	  </tr>
-	  <tr>
-	    <td>Partial Wikipedia dump</td>
-	    <td>15 GB, one million files</td>
-	    <td>6H30</td>
-	    <td>10 GB</td>
-	    <td>324 MB</td>
-	  </tr>
-	  <tr>
-	    <!-- DB: ndocs 3564 lastdocid 3564 avglength 6460.71 -->
-	    <td>Random pdfs harvested on Google<br>
-	    Recoll 1.9, <em>idxflushmb</em> set to 10</td>
-	    <td>1.7 GB, 3564 files</td>
-	    <td>25 mn</td>
-	    <td>262 MB</td>
-	    <td>65 MB</td>
-	  </tr>
-	</tbody>
-      </table>
-
-      <p>Notice how the index size for the mail archive is bigger than
-	the data size. Myriads of small pure text documents will do
-	this. The factor of expansion would be even much worse with
-	compressed folders of course (the test was on uncompressed
-	data).</p>
-
-      <p>The last test was performed with Recoll 1.9.0 which has an
-	ajustable flush threshold (<em>idxflushmb</em> parameter), here
-	set to 10 MB. Notice the much lower peak memory usage, with no
-	performance degradation. The resulting index is bigger though,
-	the exact reason is not known to me, possibly because of
-	additional fragmentation </p>
-
-    </div>
-  </body>
-</html>
-
--- a/website/pics/00REMAKEALB.sh
+++ b/website/pics/00REMAKEALB.sh
@ -1,2 +0,0 @@
-#!/bin/sh
-onlylist=1 photalb . .
--- a/website/pics/index.html
+++ b/website/pics/index.html
@ -1,44 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-
-<html>
-  <head>
-    <title>Recoll screenshots</title>
-
-    <meta name="generator" content="HTML Tidy, see www.w3.org">
-    <meta name="Author" content="Jean-Francois Dockes">
-    <meta name="Description" content=
-    "recoll is a simple full-text search system for unix and linux
-    based on the powerful and mature xapian engine">
-    <meta name="Keywords" content=
-    "full text search, desktop search, unix, linux">
-    <meta http-equiv="Content-language" content="en">
-    <meta http-equiv="content-type" content="text/html; charset=utf-8">
-    <meta name="robots" content="All,Index,Follow">
-
-    <link type="text/css" rel="stylesheet" href="../styles/style.css">
-  </head>
-
-  <body>
-    
-    <div class="content">
-
-      <h1>Recoll Screenshots</h1>
-	<li><a href="../index.html">Back to Recoll home</a></li>
-      
-<table>
-<tr>
-        <td align="center"><a href="recoll0.html"><img src="recoll0-thumb.png"></a></td>
-        <td align="center"><a href="result-table.html"><img src="result-table-thumb.png"></a></td>
-        <td align="center"><a href="recoll1.html"><img src="recoll1-thumb.png"></a></td>
-        <td align="center"><a href="recoll2.html"><img src="recoll2-thumb.png"></a></td>
-</tr>
-<tr>
-        <td align="center"><a href="recoll3.html"><img src="recoll3-thumb.png"></a></td>
-        <td align="center"><a href="recoll4.html"><img src="recoll4-thumb.png"></a></td>
-        <td align="center"><a href="recoll5.html"><img src="recoll5-thumb.png"></a></td>
-        <td align="center"><a href="recoll_chinese.html"><img src="recoll_chinese-thumb.png"></a></td>
-</tr>
-<tr>
-        <td align="center"><a href="recoll-HTML_search_results.html"><img src="recoll-HTML_search_results-thumb.png"></a></td>
-</tr></table>
-</body></html>
--- a/website/pics/index.html.head
+++ b/website/pics/index.html.head
@ -1,27 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-
-<html>
-  <head>
-    <title>Recoll screenshots</title>
-
-    <meta name="generator" content="HTML Tidy, see www.w3.org">
-    <meta name="Author" content="Jean-Francois Dockes">
-    <meta name="Description" content=
-    "recoll is a simple full-text search system for unix and linux
-    based on the powerful and mature xapian engine">
-    <meta name="Keywords" content=
-    "full text search, desktop search, unix, linux">
-    <meta http-equiv="Content-language" content="en">
-    <meta http-equiv="content-type" content="text/html; charset=utf-8">
-    <meta name="robots" content="All,Index,Follow">
-
-    <link type="text/css" rel="stylesheet" href="../styles/style.css">
-  </head>
-
-  <body>
-    
-    <div class="content">
-
-      <h1>Recoll Screenshots</h1>
-	<li><a href="../index.html">Back to Recoll home</a></li>
-      
--- a/website/pics/mario-thumb.png
+++ b/website/pics/mario-thumb.png
--- a/website/pics/mario.html
+++ b/website/pics/mario.html
@ -1,13 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html>
-  <head>
-    <title>Photo</title>
-  </head>
-  <body>
-    <p><a href="recoll-HTML_search_results.html">Prev</a> <a href="../index.html">Up</a> 
-          <a href="smile.html">Next</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
-	  <a href="mario.png">Image</a></p>
-    <p></p>
-    <p><img height="90%" src="mario.png"></p>
-  </body>
-</html>
--- a/website/pics/mario.png
+++ b/website/pics/mario.png
--- a/website/pics/piclist.txt
+++ b/website/pics/piclist.txt
@ -1,9 +0,0 @@
-recoll0.png 
-result-table.png
-recoll1.png
-recoll2.png
-recoll3.png
-recoll4.png
-recoll5.png
-recoll_chinese.png
-recoll-HTML_search_results.png 
--- a/website/pics/recoll-HTML_search_results-thumb.png
+++ b/website/pics/recoll-HTML_search_results-thumb.png
--- a/website/pics/recoll-HTML_search_results.html
+++ b/website/pics/recoll-HTML_search_results.html
@ -1,40 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html>
-  <head>
-    <title>Photo</title>
-  </head>
-  <body>
-    <p><a href="recoll_chinese.html">Prev</a> <a href=".">Up</a> 
-          <a href="recoll0.html">Next</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
-	  <a href="recoll-HTML_search_results.png">Image</a></p>
-    <p>A customized result list, thanks to Michael Croes. The html code follows, 
-it should be pasted into the
-<i>Preferences->Query&nbsp;Configuration->Result&nbsp;paragraph&nbsp;format&nbsp;string</i> entry.
-
-<pre>
-&lt;table border="1" bgcolor="lightyellow">
-    &lt;tr>
-        &lt;td rowspan="4" width="40px" align="center"
-                valign="center">
-            &lt;img src="%I" width="32" height="32">
-            &lt;p>&lt;b>%R&lt;/b>&lt;/p>
-            &lt;p>&lt;a href="P%N">Aperçu&lt;/a>&lt;/p>
-        &lt;/td>
-        &lt;th colspan="3" bgcolor="lightgrey">%T&lt;/th>
-    &lt;/tr>
-    &lt;tr>
-        &lt;td align="center">%M&lt;/td>
-        &lt;td align="center">%D&lt;/td>
-        &lt;td align="center">%S&lt;/td>
-    &lt;/tr>
-    &lt;tr>
-        &lt;td colspan="3">&lt;a href="E%N">%U&lt;/a>&lt;/td>
-    &lt;/tr>
-    &lt;tr>
-        &lt;td colspan="3">%A&lt;/td>
-    &lt;/tr>
-&lt;/table>
-</pre></p>
-    <p><img height="90%" src="recoll-HTML_search_results.png"></p>
-  </body>
-</html>
--- a/website/pics/recoll-HTML_search_results.png
+++ b/website/pics/recoll-HTML_search_results.png
--- a/website/pics/recoll-HTML_search_results.txt
+++ b/website/pics/recoll-HTML_search_results.txt
@ -1,28 +0,0 @@
-A customized result list, thanks to Michael Croes. The html code follows, 
-it should be pasted into the
-<i>Preferences->Query&nbsp;Configuration->Result&nbsp;paragraph&nbsp;format&nbsp;string</i> entry.
-
-<pre>
-&lt;table border="1" bgcolor="lightyellow">
-    &lt;tr>
-        &lt;td rowspan="4" width="40px" align="center"
-                valign="center">
-            &lt;img src="%I" width="32" height="32">
-            &lt;p>&lt;b>%R&lt;/b>&lt;/p>
-            &lt;p>&lt;a href="P%N">Aperçu&lt;/a>&lt;/p>
-        &lt;/td>
-        &lt;th colspan="3" bgcolor="lightgrey">%T&lt;/th>
-    &lt;/tr>
-    &lt;tr>
-        &lt;td align="center">%M&lt;/td>
-        &lt;td align="center">%D&lt;/td>
-        &lt;td align="center">%S&lt;/td>
-    &lt;/tr>
-    &lt;tr>
-        &lt;td colspan="3">&lt;a href="E%N">%U&lt;/a>&lt;/td>
-    &lt;/tr>
-    &lt;tr>
-        &lt;td colspan="3">%A&lt;/td>
-    &lt;/tr>
-&lt;/table>
-</pre>
--- a/website/pics/recoll0-thumb.png
+++ b/website/pics/recoll0-thumb.png
--- a/website/pics/recoll0.html
+++ b/website/pics/recoll0.html
@ -1,13 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html>
-  <head>
-    <title>Photo</title>
-  </head>
-  <body>
-    <p><a href=".">Prev</a> <a href=".">Up</a> 
-          <a href="result-table.html">Next</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
-	  <a href="recoll0.png">Image</a></p>
-    <p>Search results.</p>
-    <p><img height="90%" src="recoll0.png"></p>
-  </body>
-</html>
--- a/website/pics/recoll0.png
+++ b/website/pics/recoll0.png
--- a/website/pics/recoll0.txt
+++ b/website/pics/recoll0.txt
@ -1,2 +0,0 @@
-Search results.
-
--- a/website/pics/recoll1-thumb.png
+++ b/website/pics/recoll1-thumb.png
--- a/website/pics/recoll1.html
+++ b/website/pics/recoll1.html
@ -1,13 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html>
-  <head>
-    <title>Photo</title>
-  </head>
-  <body>
-    <p><a href="result-table.html">Prev</a> <a href=".">Up</a> 
-          <a href="recoll2.html">Next</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
-	  <a href="recoll1.png">Image</a></p>
-    <p>A result list with a preview window open.</p>
-    <p><img height="90%" src="recoll1.png"></p>
-  </body>
-</html>
--- a/website/pics/recoll1.png
+++ b/website/pics/recoll1.png
--- a/website/pics/recoll1.txt
+++ b/website/pics/recoll1.txt
@ -1,4 +0,0 @@
-A result list with a preview window open.
-
-
-
--- a/website/pics/recoll2-thumb.png
+++ b/website/pics/recoll2-thumb.png
--- a/website/pics/recoll2.html
+++ b/website/pics/recoll2.html
@ -1,13 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html>
-  <head>
-    <title>Photo</title>
-  </head>
-  <body>
-    <p><a href="recoll1.html">Prev</a> <a href=".">Up</a> 
-          <a href="recoll3.html">Next</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
-	  <a href="recoll2.png">Image</a></p>
-    <p>The two tabs in the advanced search dialog.</p>
-    <p><img height="90%" src="recoll2.png"></p>
-  </body>
-</html>
--- a/website/pics/recoll2.png
+++ b/website/pics/recoll2.png
--- a/website/pics/recoll2.txt
+++ b/website/pics/recoll2.txt
@ -1 +0,0 @@
-The two tabs in the advanced search dialog.
--- a/website/pics/recoll3-thumb.png
+++ b/website/pics/recoll3-thumb.png
--- a/website/pics/recoll3.html
+++ b/website/pics/recoll3.html
@ -1,14 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html>
-  <head>
-    <title>Photo</title>
-  </head>
-  <body>
-    <p><a href="recoll2.html">Prev</a> <a href=".">Up</a> 
-          <a href="recoll4.html">Next</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
-	  <a href="recoll3.png">Image</a></p>
-    <p>A result list from which the native application (firefox)
-was started by clicking the Edit link.</p>
-    <p><img height="90%" src="recoll3.png"></p>
-  </body>
-</html>
--- a/website/pics/recoll3.png
+++ b/website/pics/recoll3.png
--- a/website/pics/recoll3.txt
+++ b/website/pics/recoll3.txt
@ -1,2 +0,0 @@
-A result list from which the native application (firefox)
-was started by clicking the Edit link.
--- a/website/pics/recoll4-thumb.png
+++ b/website/pics/recoll4-thumb.png
--- a/website/pics/recoll4.html
+++ b/website/pics/recoll4.html
@ -1,14 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html>
-  <head>
-    <title>Photo</title>
-  </head>
-  <body>
-    <p><a href="recoll3.html">Prev</a> <a href=".">Up</a> 
-          <a href="recoll5.html">Next</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
-	  <a href="recoll4.png">Image</a></p>
-    <p>The document history window looks a little like a result list
-I'm afraid...</p>
-    <p><img height="90%" src="recoll4.png"></p>
-  </body>
-</html>
--- a/website/pics/recoll4.png
+++ b/website/pics/recoll4.png
--- a/Show More
+++ b/Show More