392 changed files with 38002 additions and 86573 deletions
--- a/.gitignore
+++ b/.gitignore
@ -24,9 +24,8 @@ build-*-Debug
 build-*-Release
 libtool
 ptrans
 **/Makefile.in
 src/Makefile
-src/rclgrep/Makefile
+src/Makefile.in
 src/TAGS
 src/aclocal.m4
 src/autom4te.cache
@ -78,6 +77,7 @@ src/recollq
 src/sampleconf/rclmon.sh
 src/sampleconf/recoll.conf
 src/testmains/Makefile
 src/testmains/Makefile.in
 src/xadump
 stamp-h1
 tests/casediac/aspdict.en.rws
@ -103,7 +103,6 @@ tests/indexedmimetypes/missing
 tests/indexedmimetypes/recoll.conf
 tests/indexedmimetypes/xapiandb
 tests/xattr/mimeview
 unac/autom4te.cache
 website/faqsandhowtos/*.html
 website/idxthreads/forkingRecoll.html
 website/idxthreads/xapDocCopyCrash.html
--- a/packaging/FreeBSD/recoll/pkg-plist
+++ b/packaging/FreeBSD/recoll/pkg-plist
@ -14,8 +14,8 @@ share/pixmaps/recoll.png
 %%DATADIR%%/filters/hotrecoll.py	
 %%DATADIR%%/filters/rclabw
 %%DATADIR%%/filters/rclaptosidman
-%%DATADIR%%/filters/rclaudio.py
+%%DATADIR%%/filters/rclaudio
-%%DATADIR%%/filters/rclchm.py
+%%DATADIR%%/filters/rclchm
 %%DATADIR%%/filters/rcldjvu
 %%DATADIR%%/filters/rcldoc
 %%DATADIR%%/filters/rcldvi
@ -23,11 +23,11 @@ share/pixmaps/recoll.png
 %%DATADIR%%/filters/rclfb2
 %%DATADIR%%/filters/rclflac
 %%DATADIR%%/filters/rclgaim
-%%DATADIR%%/filters/rclics.py
+%%DATADIR%%/filters/rclics
 %%DATADIR%%/filters/rclid3
 %%DATADIR%%/filters/rclimg
-%%DATADIR%%/filters/rclinfo.py
+%%DATADIR%%/filters/rclinfo
-%%DATADIR%%/filters/rclkar.py
+%%DATADIR%%/filters/rclkar
 %%DATADIR%%/filters/rclkwd
 %%DATADIR%%/filters/rcllatinclass.py
 %%DATADIR%%/filters/rcllatinstops.zip
@ -41,7 +41,7 @@ share/pixmaps/recoll.png
 %%DATADIR%%/filters/rclps
 %%DATADIR%%/filters/rclpurple
 %%DATADIR%%/filters/rclpython
-%%DATADIR%%/filters/rclrar.py
+%%DATADIR%%/filters/rclrar
 %%DATADIR%%/filters/rclrtf
 %%DATADIR%%/filters/rclscribus
 %%DATADIR%%/filters/rclshowinfo
@ -51,11 +51,11 @@ share/pixmaps/recoll.png
 %%DATADIR%%/filters/rcltex
 %%DATADIR%%/filters/rcltext
 %%DATADIR%%/filters/rcluncomp
-%%DATADIR%%/filters/rclwar.py
+%%DATADIR%%/filters/rclwar
 %%DATADIR%%/filters/rclwpd
 %%DATADIR%%/filters/rclxls
-%%DATADIR%%/filters/rclzip.py
+%%DATADIR%%/filters/rclzip
-%%DATADIR%%/filters/rcl7z.py
+%%DATADIR%%/filters/rcl7z
 %%DATADIR%%/filters/xdg-open
 %%DATADIR%%/images/aptosid-book.png
 %%DATADIR%%/images/aptosid-manual.png
--- a/packaging/debian/buildppa.sh
+++ b/packaging/debian/buildppa.sh
@ -5,30 +5,30 @@
 # sudo apt-get install pkg-kde-tools  cdbs
 # Active series:
 # 16.04LTS xenial 2021-04
 # 18.04LTS bionic 2023-04
 # 20.04LTS focal  2025-04
-# 22.04LTS jammy  2027-04
+# 20.10    groovy 2021-07
-SERIES="bionic focal jammy kinetic"
+# 21.04    hirsute 2022-01
 PPA_KEYID=7808CE96D38B9201
-RCLVERS=1.33.1
+RCLVERS=1.31.0
 SCOPEVERS=1.20.2.4
-GSSPVERS=1.1.1
+GSSPVERS=1.1.0
 PPAVERS=1
 # 
-#Y=/y
+RCLSRC=/y/home/dockes/projets/fulltext/recoll/src
-Y=
+SCOPESRC=/y/home/dockes/projets/fulltext/unity-scope-recoll
-RCLSRC=${Y}/home/dockes/projets/fulltext/recoll/src
+GSSPSRC=/y/home/dockes/projets/fulltext/gssp-recoll
-SCOPESRC=${Y}/home/dockes/projets/fulltext/unity-scope-recoll
+RCLDOWNLOAD=/y/home/dockes/projets/lesbonscomptes/recoll
 GSSPSRC=${Y}/home/dockes/projets/fulltext/gssp-recoll
 RCLDOWNLOAD=${Y}/home/dockes/projets/lesbonscomptes/recoll
 PPANAME=recoll15-ppa
 PPANAME=recollexp1-ppa
 #PPANAME=recoll-webengine-ppa
 case $RCLVERS in
    [23]*) PPANAME=recollexp-ppa;;
    *)     PPANAME=recoll15-ppa;;
 esac
 #PPANAME=recollexp-ppa
 echo "PPA: $PPANAME. Type CR if Ok, else ^C"
 read rep
@ -49,8 +49,8 @@ check_recoll_orig()
 ####### QT
 debdir=debian
-series=$SERIES
+series="bionic focal groovy hirsute"
-#series=bionic
+series=
 if test "X$series" != X ; then
    check_recoll_orig
@ -77,7 +77,7 @@ for series in $series ; do
      -e s/PPAVERS/${PPAVERS}/g \
      < ${debdir}/changelog > recoll-${RCLVERS}/debian/changelog
-  (cd recoll-${RCLVERS};debuild -d -k$PPA_KEYID -S -sa)  || break
+  (cd recoll-${RCLVERS};debuild -k$PPA_KEYID -S -sa)  || break
  dput $PPANAME recoll_${RCLVERS}-1~ppa${PPAVERS}~${series}1_source.changes
 done
@ -85,8 +85,8 @@ done
 ### KIO.
-series=$SERIES
+series="bionic focal groovy hirsute"
-series=
+#series=
 debdir=debiankio
 topdir=kio-recoll-${RCLVERS}
@ -125,7 +125,7 @@ for svers in $series ; do
 done
 ### GSSP
-series=$SERIES
+series="bionic focal groovy hirsute"
 series=
 debdir=debiangssp
--- a/packaging/debian/debian/changelog
+++ b/packaging/debian/debian/changelog
@ -1,169 +1,3 @@
 recoll (1.33.1-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Small updates to the build files to accomodate the new rclgrep utility.
  * New textunknownasplain configuration variable to index all files with no known association
    but identified as text/xxx by the "file" or "xdg-mime" command.
  * Make sure that a single double-quoted word is not  stem-expanded (act as if it was
    capitalized). Expanding a quoted term is unexpected.
  * Apply stemming to terms containing a single dash. These were not expanded before.
  * Linux real time: fix monitoring under topdirs members which are symbolic links.
  * Fix the GUI simple search which was broken in 1.33.0 when switching filters on/off
  * Exclude Tamil characters from unac processing (experimental for now).
  * Windows GUI directory side filters: the computed paths were wrong on Windows.
 -- Jean-Francois Dockes <jf@dockes.org>  Sun, 25 Sep 2022 19:19:00 +0200
 recoll (1.33.0-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Query processing: simplify queries a bit more before sending to Xapian, allows using OP_FILTER
    for path filtering. -> Medium version bump.
  * GUI: allow specifying a fixed geometry for the results list viewport by setting
    RECOLL_RESULTS_GEOMETRY=widthxheight . For people with fixed-width result formats CSS.
  * recollq: add option to extract a result document into a file.
  * Replace application/x-flac with audio/flac for FLAC audio files.
  * Fix web queue processing for non-default configuration directories.
  * Fix encoding issue in pdf attachment extraction.
  * GUI: result list: fix issue with webengine builds not displaying Icons. Paging still not working
    right with webengine (QTBUG-105842). Main builds revert/remain to webkit.
  * Misc. small adjustments.
 -- Jean-Francois Dockes <jf@dockes.org>  Mon, 30 Aug 2022 10:59:00 +0200
 recoll (1.32.8-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Add environment variable RECOLL_RESULTS_GEOMETRY for forcing a fixed geometry to the results
    list viewport.
  * Fix result list Qt Webengine icon display issue.
  * Improve result list paging behaviour. Only fully works with Qt Webkit.
  * recollq: add option to extract result document to a file.
 -- Jean-Francois Dockes <jf@dockes.org>  Sun, 21 Aug 2022 07:59:00 +0200
 recoll (1.32.7-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * kio_recoll: updated to work with newer kf5 versions (it seems that 5.96 broke it at least on
    arch linux).
  * rclaudio: fix extracting comment fields from flac files.
  * Python code preview:  get rid of spurious encoding value output.
  * Fix glitch in Qt GUI when between list and table display.
 -- Jean-Francois Dockes <jf@dockes.org>  Sun, 07 Aug 2022 17:42:00 +0200
 recoll (1.32.5-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * GUI: switch to using Qt-Webengine instead of Qt-Webkit because of CSS support issues in Webkit.
  * GUI: result list paragraph format. Preserve unquoted % characters if there is no matching
    translation. USer manual: document the need to quote % as %% anyway.
  * GUI: result list devel/debug. Add parameter to dump the HTML sent to the engine.
 -- Jean-Francois Dockes <jf@dockes.org>  Tue, 05 Jul 2022 09:56:00 +0200
 recoll (1.32.4-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Try to improve font size consistency by using px sizes everywhere.
  * Fix Increase/Decrease font size menu options.
  * Allow displaying line numbers in snippets.
 -- Jean-Francois Dockes <jf@dockes.org>  Wed, 29 Jun 2022 09:36:00 +0200
 recoll (1.32.3-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Fix issues when opening a file with %F (parent of subdocument): avoid creating a temporary file
    when this can be avoided.
 -- Jean-Francois Dockes <jf@dockes.org>  Tue, 21 Jun 2022 20:51:00 +0200
 recoll (1.32.2-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Restore result list popup menu function when using webengine. This was broken in 1.32.1.
  * Show progress dialog when result list abstracts generation takes too long
 -- Jean-Francois Dockes <jf@dockes.org>  Tue, 14 Jun 2022 07:51:00 +0200
 recoll (1.32.1-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * GUI side panel filters: make sure the filter is applied even if set before
    the query.
  * GUI side panel directory filter: compute the tree from the index, not the
    file system, to allow filtering data from external indexes. Update the tree
    when an indexing completes.
  * Implement whole UI scaling factor (fonts only, no icons).
  * Orgmode: add orgmodesubdocs configuration variable to decide if we index
    whole files or create subdocuments for nodes. Also index text before the
    first heading.
  * GUI: fix path translation for importing an index from Windows.
 -- Jean-Francois Dockes <jf@dockes.org>  Fri, 20 May 2022 10:55:00 +0200
 recoll (1.32.0-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * GUI: added a collapsible side pane for quick filtering on dates or
    directories.
  * Showing duplicates now uses a spreadsheet like the result table for
    easy access to the duplicate files.
  * Fixed the temporary copy open dialog (again!).
  * The default mimeview and mimeconf configuration files were
    separated into generic and system-specific parts to avoid update
    errors (no consequences for users).
  * Renamed all Python input handler with a .py extension. This is
    relied on Windows rather than listing an explicit python
    interpreter.
  * Added %l specification to viewer definitions for opening at a
    specific line.
 -- Jean-Francois Dockes <jf@dockes.org>  Fri, 11 Mar 2022 18:17:00 +0100
 recoll (1.31.6-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Almost no change: translation files update.
 -- Jean-Francois Dockes <jf@dockes.org>  Sat, 20 Dec 2021 09:25:00 +0100
 recoll (1.31.5-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Fix ennoying bug in tesseract OCR temporary files cleanup.
 -- Jean-Francois Dockes <jf@dockes.org>  Sat, 04 Dec 2021 10:05:00 +0100
 recoll (1.31.4-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Linux/Mac: Bug in threads management could result in index corruption or crash
    after signal interrupt.
 -- Jean-Francois Dockes <jf@dockes.org>  Thu, 25 Nov 2021 16:30:00 +0100
 recoll (1.31.3-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Highligthing for group (phrase/near): eliminate some spurious matches.
  * Fix page number string detection which could sometimes prevent correct
    highlighting in snippets.
  * Avoid query completer consuming excessive resources on unstripped
    indexes.
  * Fix some cases where different instances of the indexer could use
    different pid/lock files.
  * Fix processing on some unicode dash and apos character variations.
  * PST: fix indexing in marginal cases. Extract message dates.
 -- Jean-Francois Dockes <jf@dockes.org>  Sat, 13 Nov 2021 16:30:00 +0100
 recoll (1.31.2-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Add support for .ipynb iPython/Jupyter notebook format.
  * Implement Alt+/ shortcut to search the menu entries and possibly execute the result.
  * Fix configuration GUI button margins on Mac OS.
  * Add *.pyc __pycache__ .pytest_cache .tox  and .direnv to the default skipped names list.
  * Add /opt/homebrew/bin to the helper search path when built under Mac Homebrew.
  * Linux: let recollindex adjust its OOM killer "badness" on startup.
  * simple search: add Ctrl+H as keyboard shortcut for "show history".
  * Renamed the fragment buttons configuration file from fragbuts.xml to fragment-buttons.xml.
  * Zip archives: set the modification date attribute for members.
  * ost/pst filter: fix not fetching the message dates.
  * Anchored searches: remove unwarranted slack increase. The anchor term should behave like a
    normal one for slack computations.
  * Fix djvu issues on Windows.
 -- Jean-Francois Dockes <jf@dockes.org>  Mon, 11 Oct 2021 10:51:00 +0200
 recoll (1.31.0-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * GUI: modified shortcuts were not read from the preferences !
--- a/packaging/debian/debian/control
+++ b/packaging/debian/debian/control
@ -3,19 +3,18 @@ Section: x11
 Priority: optional
 Maintainer: Jean-Francois Dockes <jfd@recoll.org>
 Build-Depends: bison,
-               debhelper (>= 10),
+               debhelper (>= 9),
               dh-python,
               dpkg-dev (>= 1.16.1~),
               libaspell-dev,
               libchm-dev,
 #               qtwebengine5-dev,
               libqt5webkit5-dev,
               libx11-dev,
               libxapian-dev (>= 1.2.0),
               libxslt1-dev,
               libz-dev,
               pkg-config,
-               python2-dev (>= 2.6.6-3~),
+               python-all-dev (>= 2.6.6-3~),
               python-setuptools,
               python3-all-dev,
               python3-setuptools,
--- a/packaging/debian/debian/control-bionic
+++ b/packaging/debian/debian/control-bionic
@ -1,115 +0,0 @@
 Source: recoll
 Section: x11
 Priority: optional
 Maintainer: Jean-Francois Dockes <jfd@recoll.org>
 Build-Depends: bison,
               debhelper (>= 9),
               dh-python,
               dpkg-dev (>= 1.16.1~),
               libaspell-dev,
               libchm-dev,
               libqt5webkit5-dev,
 #               qtwebengine5-dev,
               libx11-dev,
               libxapian-dev (>= 1.2.0),
               libxslt1-dev,
               libz-dev,
               pkg-config,
               python-all-dev (>= 2.6.6-3~),
               python-setuptools,
               python3-all-dev,
               python3-setuptools,
               qtbase5-dev
 X-Python3-Version: >= 3.4
 Vcs-Git: https://salsa.debian.org/debian/recoll.git
 Vcs-Browser: https://salsa.debian.org/debian/recoll
 Homepage: https://www.lesbonscomptes.com/recoll
 Standards-Version: 4.2.1
 Package: recoll
 Architecture: all
 Depends: recollcmd, recollgui, ${misc:Depends}
 Description: Personal full text search package
 This package is a personal full text search package is based on a very strong
 backend (Xapian), for which it provides an easy to use and feature-rich
 interface.
 .
 Features:
  * Qt-based GUI.
  * Supports the following document types (and their compressed versions)
   - Natively: text, html, OpenOffice files,  excel, ppt, maildir and
     mailbox (Mozilla and IceDove mail) with attachments, pidgin log files
   - With external helpers:  pdf (pdftotext), postscript (ghostscript), msword
     (antiword), rtf (unrtf). And others...
  * Powerful query facilities, with boolean searches, phrases, filter on file
    types and directory tree.
  * Support for multiple charsets, Internal processing and storage uses Unicode
    UTF-8.
  * Stemming performed at query time (can switch stemming language after
    indexing).
  * Easy installation. No database daemon, web server or exotic language
    necessary.
  * The indexer can run either continuously or in batch.
 Package: recollcmd
 Architecture: any
 Breaks: recoll (<< 1.23.7-2)
 Replaces: recoll (<< 1.23.7-2)
 Depends: python3, ${misc:Depends}, ${shlibs:Depends}
 Recommends: antiword,
            aspell,
            groff,
            libimage-exiftool-perl,
            poppler-utils,
            python3-lxml,
            python3-recoll,
            python3-six,
            python3-mutagen,
            python3-rarfile,
            unrtf,
            unzip,
            xdg-utils
 Suggests: ghostscript,
          libinotifytools0,
          untex,
          wv
 Description: Command line programs for recoll
 This package supports indexing and command line querying.
 Package: recollgui
 Architecture: any
 Breaks: recoll (<< 1.23.7-2)
 Replaces: recoll (<< 1.23.7-2)
 Depends: recollcmd (= ${binary:Version}), ${misc:Depends}, ${shlibs:Depends}
 Description: GUI program and elements for recoll
 Main recoll GUI for configuring, controlling and querying recoll indexes.
 Package: python-recoll
 Architecture: any
 Section: python
 Depends: python2,
         recollcmd (= ${binary:Version}),
         ${misc:Depends},
         ${python:Depends},
         ${shlibs:Depends}
 Description: Python extension for recoll
 Personal full text search package which is based on a very strong backend
 (Xapian), for which it provides an easy to use and feature-rich interface.
 .
 This package provides Python extension module for recoll which can be use to
 extend recoll such as an Ubuntu Unity Lens.
 Package: python3-recoll
 Architecture: any
 Section: python
 Depends: python3,
         recollcmd (= ${binary:Version}),
         ${misc:Depends},
         ${python3:Depends},
         ${shlibs:Depends}
 Description: Python extension for recoll (Python3)
 Personal full text search package which is based on a very strong backend
 (Xapian), for which it provides an easy to use and feature-rich interface.
 .
 This package provides Python3 extension module for recoll which can be use to
 extend recoll such as an Ubuntu Unity Lens.
--- a/packaging/debian/debian/patches/mbox-use-streamptr-for-jessie.diff
+++ b/packaging/debian/debian/patches/mbox-use-streamptr-for-jessie.diff
@ -1,5 +1,5 @@
-diff --git a/src/internfile/mh_mbox.cpp b/src/internfile/mh_mbox.cpp
+diff --git a/internfile/mh_mbox.cpp b/srcinternfile/mh_mbox.cpp
-index c77d42c8..ccd6a613 100644
+index 2a0918cf..92ad7e23 100644
 --- a/internfile/mh_mbox.cpp
 +++ b/internfile/mh_mbox.cpp
@@ -27,6 +27,7 @@
@ -19,25 +19,19 @@ index c77d42c8..ccd6a613 100644
     int        msgnum{0}; // Current message number in folder. Starts at 1
     int64_t    lineno{0}; // debug 
     int64_t    fsize{0};
-@@ -322,13 +323,6 @@ void MimeHandlerMbox::clear_impl()
+@@ -321,7 +322,6 @@ void MimeHandlerMbox::clear_impl()
 {
     m->fn.erase();
     m->ipath.erase();
- 
+-    m->instream = ifstream();
 -    // We used to use m->instream = ifstream() which fails with some compilers, as the copy
 -    // constructor is marked deleted in standard c++ (works with many compilers though).
 -    if (m->instream.is_open()) {
 -        m->instream.close();
 -    }
 -    m->instream.clear();
 -
     m->msgnum = 0;
     m->lineno = 0;
     m->fsize = 0;
-@@ -346,8 +340,9 @@ bool MimeHandlerMbox::set_document_file_impl(const string&, const string &fn)
+@@ -339,8 +339,9 @@ bool MimeHandlerMbox::set_document_file_impl(const string&, const string &fn)
     LOGDEB("MimeHandlerMbox::set_document_file(" << fn << ")\n");
     clear_impl();
     m->fn = fn;
-    m->instream.open(fn.c_str(), std::ifstream::binary);
+-    m->instream = ifstream(fn.c_str(), std::ifstream::binary);
 -    if (!m->instream.good()) {
 +    m->instream = std::unique_ptr<ifstream>(
 +        new ifstream(fn.c_str(), std::ifstream::binary));
@ -45,7 +39,7 @@ index c77d42c8..ccd6a613 100644
         LOGSYSERR("MimeHandlerMail::set_document_file", "ifstream", fn);
         return false;
     }
-@@ -396,13 +391,13 @@ bool MimeHandlerMbox::Internal::tryUseCache(int mtarg)
+@@ -389,13 +390,13 @@ bool MimeHandlerMbox::Internal::tryUseCache(int mtarg)
                                    fsize)) < 0) {
         goto out;
     }
@ -63,7 +57,7 @@ index c77d42c8..ccd6a613 100644
         LOGSYSERR("tryUseCache", "getline", "");
         goto out;
     }
-@@ -411,7 +406,7 @@ bool MimeHandlerMbox::Internal::tryUseCache(int mtarg)
+@@ -404,7 +405,7 @@ bool MimeHandlerMbox::Internal::tryUseCache(int mtarg)
     if ((fromregex(line) ||
          ((quirks & MBOXQUIRK_TBIRD) && minifromregex(line)))  ) {
         LOGDEB0("MimeHandlerMbox: Cache: From_ Ok\n");
@ -72,7 +66,7 @@ index c77d42c8..ccd6a613 100644
         msgnum = mtarg -1;
         cachefound = true;
     } else {
-@@ -421,7 +416,7 @@ bool MimeHandlerMbox::Internal::tryUseCache(int mtarg)
+@@ -414,7 +415,7 @@ bool MimeHandlerMbox::Internal::tryUseCache(int mtarg)
 out:
     if (!cachefound) {
         // No cached result: scan.
@ -81,7 +75,7 @@ index c77d42c8..ccd6a613 100644
         msgnum = 0;
     }
     return cachefound;
-@@ -429,7 +424,7 @@ out:
+@@ -422,7 +423,7 @@ out:
 bool MimeHandlerMbox::next_document()
 {
@ -90,7 +84,7 @@ index c77d42c8..ccd6a613 100644
         LOGERR("MimeHandlerMbox::next_document: not open\n");
         return false;
     }
-@@ -465,10 +460,10 @@ bool MimeHandlerMbox::next_document()
+@@ -458,10 +459,10 @@ bool MimeHandlerMbox::next_document()
     msgtxt.erase();
     string line;
     for (;;) {
--- a/packaging/debian/debian/rules
+++ b/packaging/debian/debian/rules
@ -21,11 +21,10 @@ endif
 # main packaging script based on dh7 syntax
 %:
-	dh $@ --with python2 --with python3
+	dh $@ --parallel --with python2 --with python3
 override_dh_auto_configure:
-	dh_auto_configure -- --enable-recollq --enable-xadump --enable-webkit
+	dh_auto_configure -- --enable-recollq --enable-xadump
 #	dh_auto_configure -- --enable-recollq --enable-xadump --enable-webengine
 build3vers := $(shell py3versions -sv)
--- a/packaging/debian/debiankio/changelog
+++ b/packaging/debian/debiankio/changelog
@ -1,76 +1,3 @@
 kio-recoll (1.33.0-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Follow
 -- Jean-Francois Dockes <jf@dockes.org>  Mon, 30 Aug 2022 10:59:00 +0200
 kio-recoll (1.32.7-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Updated kio_recoll to work with newer kf5 versions (it seems that 5.96 broke it at least on
    arch).
 -- Jean-Francois Dockes <jf@dockes.org>  Sun, 07 Aug 2022 17:42:00 +0200
 kio-recoll (1.32.5-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Follow
 -- Jean-Francois Dockes <jf@dockes.org>  Tue, 05 Jul 2022 09:56:00 +0200
 kio-recoll (1.32.4-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Follow
 -- Jean-Francois Dockes <jf@dockes.org>  Wed, 29 Jun 2022 09:36:00 +0200
 kio-recoll (1.32.2-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Follow
 -- Jean-Francois Dockes <jf@dockes.org>  Tue, 14 Jun 2022 07:51:00 +0200
 kio-recoll (1.32.1-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Follow
 -- Jean-Francois Dockes <jf@dockes.org>  Sun, 15 May 2022 08:07:00 +0200
 kio-recoll (1.32.0-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Follow
 -- Jean-Francois Dockes <jf@dockes.org>  Fri, 11 Mar 2022 18:17:00 +0100
 kio-recoll (1.31.6-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Follow
 -- Jean-Francois Dockes <jf@dockes.org>  Sat, 20 Dec 2021 09:25:00 +0100
 kio-recoll (1.31.5-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Follow recoll version
 -- Jean-Francois Dockes <jf@dockes.org>  Sat, 04 Dec 2021 10:05:00 +0100
 kio-recoll (1.31.4-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Follow recoll version
 -- Jean-Francois Dockes <jf@dockes.org>  Thu, 25 Nov 2021 16:30:00 +0100
 kio-recoll (1.31.3-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Follow recoll version
 -- Jean-Francois Dockes <jf@dockes.org>  Sat, 13 Nov 2021 16:30:00 +0200
 kio-recoll (1.31.2-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Follow recoll version
 -- Jean-Francois Dockes <jf@dockes.org>  Mon, 11 Oct 2021 10:55:00 +0200
 kio-recoll (1.31.0-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * Follow recoll version
--- a/packaging/debian/debiankio/patches/dirif-fix-kio-version-include.patch
+++ b/packaging/debian/debiankio/patches/dirif-fix-kio-version-include.patch
@ -1,13 +0,0 @@
 diff --git a/src/kde/kioslave/kio_recoll/dirif.cpp b/src/kde/kioslave/kio_recoll/dirif.cpp
 index 4438a1e7..48284ece 100644
 --- a/kde/kioslave/kio_recoll/dirif.cpp
 +++ b/kde/kioslave/kio_recoll/dirif.cpp
@@ -35,7 +35,7 @@
 #include <QDebug>
 #include <QUrl>
 #include <QStandardPaths>
 -#include <KF5/kio_version.h>
 +#include <kio_version.h>
 #include "kio_recoll.h"
 #include "pathut.h"
--- a/packaging/debian/rclgrep/debian/README.Debian
+++ b/packaging/debian/rclgrep/debian/README.Debian
@ -1,8 +0,0 @@
 README for Debian
 -----------------
 The rclgrep package is a partial installation of the recollcmd package, with no
 Xapian dependency. It conflicts with recollcmd, which also provides the rclgrep
 command.
 -- Jean-Francois Dockes <jf@dockes.org>  Tue, 20 Sep 2022 08:32:00 +0200
--- a/packaging/debian/rclgrep/debian/changelog
+++ b/packaging/debian/rclgrep/debian/changelog
@ -1,7 +0,0 @@
 rclgrep (1.33.1-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
  * 1st version of rclgrep: a non-indexed search program using recoll
    data extraction modules to effect grep-like function.
 -- Jean-Francois Dockes <jf@dockes.org>  Sun, 11 Sep 2022 10:59:00 +0200
--- a/packaging/debian/rclgrep/debian/compat
+++ b/packaging/debian/rclgrep/debian/compat
@ -1 +0,0 @@
 11
--- a/packaging/debian/rclgrep/debian/control
+++ b/packaging/debian/rclgrep/debian/control
@ -1,44 +0,0 @@
 Source: rclgrep
 Section: x11
 Priority: optional
 Maintainer: Jean-Francois Dockes <jfd@recoll.org>
 Build-Depends: debhelper (>= 10),
               dh-python,
               dpkg-dev (>= 1.16.1~),
               libchm-dev,
               libmagic-dev,
               libxslt1-dev,
               libz-dev,
               pkg-config,
               python3-all-dev,
               python3-setuptools
 X-Python3-Version: >= 3.6
 Homepage: https://www.lesbonscomptes.com/recoll
 Standards-Version: 4.2.1
 Package: rclgrep
 Architecture: any
 Depends: python3, ${misc:Depends}, ${shlibs:Depends}
 Conflicts: recollcmd
 Recommends: antiword,
            groff,
            libimage-exiftool-perl,
            poppler-utils,
            python3-lxml,
            python3-six,
            python3-mutagen,
            python3-rarfile,
            unrtf,
            unzip,sfami
            xdg-utils
 Suggests: ghostscript,
          untex,
          wv
 Description: grep-like program based on recoll data extraction modules.
 The program supports most grep options and aims at supplying a very similar
 output format. It will search all formats supported by Recoll, including
 compound documents and nested archives (mbox, zip, ....) with full
 regexp support (unlike recoll). It does not not create
 an index and the package has no dependency on Xapian. In consequence, 
 searching is vastly slower than when using recoll.
--- a/packaging/debian/rclgrep/debian/copyright
+++ b/packaging/debian/rclgrep/debian/copyright
@ -1,141 +0,0 @@
 Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
 Upstream-Name: recoll
 Upstream-Contact: Jean-Francois Dockes <jfd@recoll.org>
 Source: https://www.lesbonscomptes.com/recoll/
 Files: *
 Copyright: 2005-2014, Jean-Francois Dockes <jfd@recoll.org>
 License: GPL-2+
 Files: bincimapmime/*
 Copyright: 2002-2005, Andreas Aardal Hanssen <andreas-binc@bincimap.org>
 License: GPL-2+
 Files: filters/rcl* internfile/htmlparse.cpp
 Copyright: 2000-2004, Mikio Hirabayashi
 License: GPL-2+
 Files: filters/rclpython
 Copyright: J\xfcrgen Hermann, Mike Brown, Christopher Arndt
 <http://chrisarndt.de/en/software/python/colorize.html>
 License: GPL-2+
 Files: internfile/htmlparse.cpp internfile/mh_html.cpp
 Copyright: 1999-2001, BrightStation PLC,
           2001, Ananova Ltd,
           2002-2004, Olly Betts.
 License: GPL-2+
 Files: unac/*
 Copyright: 2000-2002, Loic Dachary <loic@senga.org>
 License: GPL-2+
 Files: common/*
 Copyright: 2004-2005, J.F.Dockes
 License: GPL-2+
 Files: debian/*
 Copyright: 2007-2014, Kartik Mistry <kartik@debian.org>
 License: GPL-2+
 License: GPL-2+
 This package is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
 Foundation; either version 2 of the License, or (at your option) any later
 version.
 .
 This package is distributed in the hope that it will be useful, but WITHOUT
 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 .
 You should have received a copy of the GNU General Public License along with
 this package; if not, write to the Free Software Foundation, Inc., 51 Franklin
 St, Fifth Floor, Boston, MA 02110-1301 USA
 .
 On Debian systems, the complete text of the GNU General Public License can be
 found in `/usr/share/common-licenses/GPL-2' and
 `/usr/share/common-licenses/GPL-3'.
 Files: aspell/*
 Copyright: 2001-2002, by Kevin Atkinson
 License: LGPL-2+
 License: LGPL-2+
 This package is free software; you can redistribute it and/or modify it under
 the terms of the GNU Lesser General Public License as published by the Free
 Software Foundation; either version 2 of the License, or (at your option) any
 later version.
 .
 This package is distributed in the hope that it will be useful, but WITHOUT
 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
 details.
 .
 You should have received a copy of the GNU Lesser General Public License along
 with this package; if not, write to the Free Software Foundation, Inc., 51
 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
 .
 On Debian systems, the complete text of the GNU Lesser General Public License
 can be found in `/usr/share/common-licenses/LGPL-2' and
 `/usr/share/common-licenses/LGPL-2.1' and `/usr/share/common-licenses/LGPL-3'.
 Files: common/uproplist.h
 Copyright: 1991-2006, Unicode, Inc.
 License: Unicode
 License: Unicode
 All rights reserved. Distributed under the Terms of Use in
 https://www.unicode.org/copyright.html
 .
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of the Unicode data files and any associated documentation (the "Data Files")
 or Unicode software and any associated documentation (the "Software") to deal
 in the Data Files or Software without restriction, including without
 limitation the rights to use, copy, modify, merge, publish, distribute, and/or
 sell copies of the Data Files or Software, and to permit persons to whom the
 Data Files or Software are furnished to do so, provided that (a) the above
 copyright notice(s) and this permission notice appear with all copies of the
 Data Files or Software, (b) both the above copyright notice(s) and this
 permission notice appear in associated documentation, and (c) there is clear
 notice in each modified Data File or in the Software as well as in the
 documentation associated with the Data File(s) or Software that the data or
 software has been modified.
 .
 THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
 KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD
 PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN
 THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
 DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
 PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE
 DATA FILES OR SOFTWARE.
 .
 Except as contained in this notice, the name of a copyright holder shall not
 be used in advertising or otherwise to promote the sale, use or other dealings
 in these Data Files or Software without prior written authorization of the
 copyright holder.
 Files: utils/md5.*
 Copyright: 1991-1992, RSA Data Security, Inc. All rights reserved.
 License: RSA
 License: RSA
 MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
 .
 License to copy and use this software is granted provided that it is
 identified as the "RSA Data Security, Inc. MD5 Message-Digest Algorithm" in
 all material mentioning or referencing this software or this function.
 .
 License is also granted to make and use derivative works provided that such
 works are identified as "derived from the RSA Data Security, Inc. MD5
 Message-Digest Algorithm" in all material mentioning or referencing the
 derived work.
 .
 RSA Data Security, Inc. makes no representations concerning either the
 merchantability of this software or the suitability of this software for any
 particular purpose. It is provided "as is" without express or implied warranty
 of any kind.
 .
 These notices must be retained in any copies of any part of this documentation
 and/or software.
--- a/packaging/debian/rclgrep/debian/rclgrep.install
+++ b/packaging/debian/rclgrep/debian/rclgrep.install
@ -1,2 +0,0 @@
 usr/lib/python*/*-packages/recollchm/*
 usr/lib/python*/*-packages/recollchm-*/*
--- a/packaging/debian/rclgrep/debian/rules
+++ b/packaging/debian/rclgrep/debian/rules
@ -1,44 +0,0 @@
 #!/usr/bin/make -f
 # Uncomment this to turn on verbose mode.
 #export DH_VERBOSE=1
 export DEB_BUILD_MAINT_OPTIONS = hardening=+all
 DPKG_EXPORT_BUILDFLAGS = 1
 include /usr/share/dpkg/buildflags.mk
 DEB_HOST_GNU_TYPE   ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)
 DEB_BUILD_GNU_TYPE  ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)
 build3vers := $(shell py3versions -sv)
 #build qt5 UI
 export QT_SELECT := qt5
 ifneq (,$(filter parallel=%,$(DEB_BUILD_OPTIONS)))
 	NJOBS := -j $(patsubst parallel=%,%,$(filter parallel=%,$(DEB_BUILD_OPTIONS)))
 endif
 # main packaging script based on dh7 syntax
 %:
 	dh $@ --with python3
 override_dh_auto_configure:
 	dh_auto_configure -- --enable-rclgrep --disable-python-module --disable-indexer \
 				--disable-qtgui --disable-recollq --disable-testmains \
 				--disable-xadump
 build3vers := $(shell py3versions -sv)
 override_dh_auto_install:
 	dh_auto_install
 	set -e && for i in $(build3vers); do \
 	(cd python/pychm; python$$i ./setup.py install \
 	                                --install-layout=deb \
 					--prefix=/usr \
 	                                --root=$(CURDIR)/debian/tmp/ ) ; \
 	done
 	find $(CURDIR) -type f -name '*.la' -exec rm -f '{}' \;
 	find $(CURDIR) -type f -name '*.pyc' -exec rm -f '{}' \;
 	rm -rf $(CURDIR)/debian/rclgrep/usr/lib/python*/*/*/__pycache__
 	rm -rf $(CURDIR)/debian/tmp/usr/lib/python*/*/*/__pycache__
--- a/packaging/debian/rclgrep/debian/source/format
+++ b/packaging/debian/rclgrep/debian/source/format
@ -1 +0,0 @@
 3.0 (quilt)
--- a/packaging/debian/rclgrep/debian/upstream/signing-key.asc
+++ b/packaging/debian/rclgrep/debian/upstream/signing-key.asc
@ -1,41 +0,0 @@
 -----BEGIN PGP PUBLIC KEY BLOCK-----
 mQINBFbJ6UABEADLsFg8qXTrNrYUnNS5UXlAWUH7/ZHNRgr/EIkhKAbdlzVAywTM
 fX6wo9crKzlqT3IcEOFe0RVJoh0FSNEQQlUhyJAFNlbcocsDYNqk7pDjxnUBUMM2
 U3ikLEPzRxWDhVepAVQPeloD1i8b4MJrSHnLb49PMmXg+6MHA+dzOS59onE5QDcz
 kw1RF0N0gl7693rOMP/ATefA2KPQyKCIweKB/3NbOcv4/T1XDyag0G7xYkT4stEl
 TN2P8c6HSyhWDxp2slZ04kdf17TuoeOqMO9gKE+eEC17lllLuhSrbBdfYTYt05pN
 Y1eRup+6oamoMc3ITD2U2GtY+65AHw5MxjGigpZ3kj5DwF/f2IgtDBSoXjm8aaRb
 iYMvt3kXnb3Ai/oVvSlkIQMlDDpdAQmzB0FO0MCzzykq5mQVbl3Uw3i2q5vg1IIL
 fGOB1USa0JOVRSq8C66ncijYO6Jafx3uYCGVdIypoLs332kGsyQaIatoJRbPkKT/
 Wu/DGE8kHOaCo5795HbRk0O/Up5wQP3N/OXGmrQPtbafRz9bkjXOKGtq660VJ67K
 ttgY9L1fD7jb+zDoUaY33K8Trfqaxm5aGkI6Pj3VvQSF2CAaJuEnh/c0r9UdGn0e
 e1L0yP1kUj80Qv99QFEoH2UtBrfLsXAiRvcr/PfyGTp/+Q7wkCHsHC84TwARAQAB
 tCRKZWFuLUZyYW5jb2lzIERvY2tlcyA8amZAZG9ja2VzLm9yZz6JAkAEEwEKACoC
 GwMFCwkIBwMFFQoJCAsFFgIDAQACHgECF4ACGQEFAlitGxQFCQWlmM4ACgkQeAjO
 ltOLkgEE4xAAqGOSt6U+CGdI333Yx7KaCA+XgJPsiaqfG2AIuv4Y0/LW8467uy4u
 DdbgJ3GQ6kWUZD0b/nrp74Ly5ZM9DCIZzOX9FQ3R9FBhbBS1fVfqFD2yZQv4lze9
 Bjj7EMRieRGUtVIb7BiUrmJOyIbiGktEOuqqTj7RehN/2sflv5jH2NW33+i3t/x7
 YWTAPHxieiOYO0Z0JtXe+ZXJ92LNaR+5DOsGItTSeJKzbh1oUtAcbt5DDDQKMJwb
 tIRg+9Mjj3IUqCsiFkKOfq34TXDu2paKWkdGuOJ8u2DqvgUYkqFfY4JOpWrax+Mt
 hsS6VSDIxL7H9UqaJpXWMMhUN2gFM+wy/y1OeNo5bKM4KiLbaugEvOb1RCQm2R6h
 HNcDO52KSFJMZSCzO/jjN2qJjDcLu2DAsQzWI+bzZgP+tpr3yWvW2OCCY+PdT4ZA
 5lwnd85P5x1wYhb/eoXi9QyWburu3vaNGdDWUljUkBB29l09hoDbAYPwWujLDGyT
 0j6+NWh27dLe8bnwe5YEBPHcwvuLnSBVVXY+UM/0toEWwpRdTvnxZUKKxtN+yiCA
 k82qRNXaUGaWpaL0xYPfanZSXi1dSNNEertS/BrF5PpmNdQsK1+sibNF1KKhR5ge
 2QSrjoNzL4kBgJq4ojJBcBd75p9HzheXCHdG1EHQBTeetDqiwEPbO/W0KUplYW4t
 RnJhbmNvaXMgRG9ja2VzIDxqZmRvY2tlc0BnbWFpbC5jb20+iQI9BBMBCgAnAhsD
 BQsJCAcDBRUKCQgLBRYCAwEAAh4BAheABQJYrRsbBQkFpZjOAAoJEHgIzpbTi5IB
 GKYP/09nsWnDCqv+3OKzmbHlMKCLvGU8IGU1q909sUelKmyjSFXmh3BsgR4DrfXu
 hGWtmu/mPYzCWzbK8TWYsU1O6em7YRY8lt/q/8gciSahl+xFT/G5GJHf7KFgtsSn
 QcbA18dzXKpxmTOTMEmWLh4zZlaUbaH2gmpXBQvH4smu/FV6rq5YYYDG9A3PDujr
 QmOyguD7wNvb6ahrgpTbMawsj6zLIT1pkC6t1Orz/gmYsuk47EJFfvaO3+YIUQ0D
 pFN9EkDjhcIa3vSsd+EBMbmweFB6y9gs7LmolqwiddUlYv5wGOLgiE1EJEI4bcvr
 vm8RWHziWytmpTPjzFpETaBVMC8xTt6tiNWNeTUkjbBX0Fek9GEvzAJIpe18LnM8
 raFREUriHuUwIGrrFrhj7rBAX51DiiJUguDi+842SjlzVE2SCwyjXVlglDItBPKO
 Y284KpI+wLhJCggtwtzZOQcAc5l8j3JpXjhm1tjSKggEONdBu2l7mWZRAJCBziMK
 mnUPL8q44l2hc/sDu4cCpsHW+pssGDQqtR+t/fPMGFuXd+WnfYskhyQVms44yAYJ
 Y/cx43tgYLHDx2TraTQZqh1qgmrXesS2DiT+5pCjQh0ChwTEBjGCz41WcQkD9nTL
 k3E6amPE6WAPS07bX9zkLHYYIOu8wd8nRoPKlVjhMpBvz8LE
 =2J/9
 -----END PGP PUBLIC KEY BLOCK-----
--- a/packaging/debian/rclgrep/debian/watch
+++ b/packaging/debian/rclgrep/debian/watch
@ -1,4 +0,0 @@
 version=4
 opts=pgpmode=auto \
 https://www.lesbonscomptes.com/recoll/download.html \
 (?:|.*/)recoll(?:[_\-]v?|)(\d[^\s/]*)\.(?:tar\.xz|txz|tar\.bz2|tbz2|tar\.gz|tgz)
--- a/packaging/homebrew/recoll.rb
+++ b/packaging/homebrew/recoll.rb
@ -60,40 +60,40 @@ index f41a9f39..dc3085a4 100755
 #
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
-diff --git filters/rcl7z.py filters/rcl7z.py
+diff --git filters/rcl7z filters/rcl7z
 index c68c8bcb..ac50c4ec 100755
--- filters/rcl7z.py
+--- filters/rcl7z
-+++ filters/rcl7z.py
+++ filters/rcl7z
@@ -1,4 +1,4 @@
 -#!/usr/bin/env python2
 +#!/usr/bin/env python2.7
 # 7-Zip file filter for Recoll
-diff --git filters/rclaudio.py filters/rclaudio.py
+diff --git filters/rclaudio filters/rclaudio
 index 94ca0be7..08d6375a 100755
--- filters/rclaudio.py
+--- filters/rclaudio
-+++ filters/rclaudio.py
+++ filters/rclaudio
@@ -1,4 +1,4 @@
 -#!/usr/bin/env python2
 +#!/usr/bin/env python2.7
 # Audio tag filter for Recoll, using mutagen
-diff --git filters/rclchm.py filters/rclchm.py
+diff --git filters/rclchm filters/rclchm
 index f9811c37..3bc9b16d 100755
--- filters/rclchm.py
+--- filters/rclchm
-+++ filters/rclchm.py
+++ filters/rclchm
@@ -1,4 +1,4 @@
 -#!/usr/bin/env python2
 +#!/usr/bin/env python2.7
 """Extract Html files from a Microsoft Compiled Html Help file (.chm)
 Needs at least python 2.2 for HTMLParser (chmlib needs 2.2 too)"""
-diff --git filters/rcldia.py filters/rcldia.py
+diff --git filters/rcldia filters/rcldia
 index 282148eb..a480294b 100755
--- filters/rcldia.py
+--- filters/rcldia
-+++ filters/rcldia.py
+++ filters/rcldia
@@ -1,4 +1,4 @@
 -#!/usr/bin/env python2
 +#!/usr/bin/env python2.7
@ -120,30 +120,30 @@ index e8fa1831..b92b185d 100755
 from __future__ import print_function
 import rclexecm
-diff --git filters/rclepub.py filters/rclepub.py
+diff --git filters/rclepub filters/rclepub
 index 8042d7f9..51786af1 100755
--- filters/rclepub.py
+--- filters/rclepub
-+++ filters/rclepub.py
+++ filters/rclepub
@@ -1,4 +1,4 @@
 -#!/usr/bin/env python2
 +#!/usr/bin/env python2.7
 """Extract Html content from an EPUB file (.epub)"""
 from __future__ import print_function
-diff --git filters/rclepub.py1 filters/rclepub.py1
+diff --git filters/rclepub1 filters/rclepub1
 index bd44f635..a7ea6c06 100755
--- filters/rclepub.py1
+--- filters/rclepub1
-+++ filters/rclepub.py1
+++ filters/rclepub1
@@ -1,4 +1,4 @@
 -#!/usr/bin/env python2
 +#!/usr/bin/env python2.7
 """Extract Html content from an EPUB file (.chm), concatenating all sections"""
 from __future__ import print_function
-diff --git filters/rclics.py filters/rclics.py
+diff --git filters/rclics filters/rclics
 index 0ef04f2d..de177024 100755
--- filters/rclics.py
+--- filters/rclics
-+++ filters/rclics.py
+++ filters/rclics
@@ -1,4 +1,4 @@
 -#!/usr/bin/env python2
 +#!/usr/bin/env python2.7
@ -160,20 +160,20 @@ index 7eb1da91..4eb6c9b0 100755
 # Python-based Image Tag extractor for Recoll. This is less thorough
 # than the Perl-based rclimg script, but useful if you don't want to
-diff --git filters/rclinfo.py filters/rclinfo.py
+diff --git filters/rclinfo filters/rclinfo
 index f353d19e..36cf34e0 100755
--- filters/rclinfo.py
+--- filters/rclinfo
-+++ filters/rclinfo.py
+++ filters/rclinfo
@@ -1,4 +1,4 @@
 -#!/usr/bin/env python2
 +#!/usr/bin/env python2.7
 # Read a file in GNU info format and output its nodes as subdocs,
 # interfacing with recoll execm
-diff --git filters/rclkar.py filters/rclkar.py
+diff --git filters/rclkar filters/rclkar
 index d6570dd5..34b8d2a2 100755
--- filters/rclkar.py
+--- filters/rclkar
-+++ filters/rclkar.py
+++ filters/rclkar
@@ -1,4 +1,4 @@
 -#!/usr/bin/env python2
 +#!/usr/bin/env python2.7
@ -230,10 +230,10 @@ index 615455b3..1e411890 100755
 # -*- coding: iso-8859-1 -*-
 """
     MoinMoin - Python source parser and colorizer
-diff --git filters/rclrar.py filters/rclrar.py
+diff --git filters/rclrar filters/rclrar
 index 8f723fa5..5f6adfb0 100755
--- filters/rclrar.py
+--- filters/rclrar
-+++ filters/rclrar.py
+++ filters/rclrar
@@ -1,4 +1,4 @@
 -#!/usr/bin/env python2
 +#!/usr/bin/env python2.7
@ -280,10 +280,10 @@ index 8c1b8aea..cee17324 100755
 # Copyright (C) 2014 J.F.Dockes
 #   This program is free software; you can redistribute it and/or modify
 #   it under the terms of the GNU General Public License as published by
-diff --git filters/rcltar.py filters/rcltar.py
+diff --git filters/rcltar filters/rcltar
 index d8bf100d..ab4b306e 100755
--- filters/rcltar.py
+--- filters/rcltar
-+++ filters/rcltar.py
+++ filters/rcltar
@@ -1,4 +1,4 @@
 -#!/usr/bin/env python2
 +#!/usr/bin/env python2.7
@ -320,10 +320,10 @@ index 32a11c1a..eab3b257 100644
 from __future__ import print_function
 import rclexecm
-diff --git filters/rclwar.py filters/rclwar.py
+diff --git filters/rclwar filters/rclwar
 index b654f3b3..301e28e9 100755
--- filters/rclwar.py
+--- filters/rclwar
-+++ filters/rclwar.py
+++ filters/rclwar
@@ -1,4 +1,4 @@
 -#!/usr/bin/env python2
 +#!/usr/bin/env python2.7
@ -360,10 +360,10 @@ index 158e1222..602769af 100755
 # Copyright (C) 2016 J.F.Dockes
 #   This program is free software; you can redistribute it and/or modify
 #   it under the terms of the GNU General Public License as published by
-diff --git filters/rclzip.py filters/rclzip.py
+diff --git filters/rclzip filters/rclzip
 index 35739625..0c597fbd 100755
--- filters/rclzip.py
+--- filters/rclzip
-+++ filters/rclzip.py
+++ filters/rclzip
@@ -1,4 +1,4 @@
 -#!/usr/bin/env python2
 +#!/usr/bin/env python2.7
--- a/packaging/mac/make-recoll-dmg.sh
+++ b/packaging/mac/make-recoll-dmg.sh
@ -12,31 +12,27 @@ usage()
 # Adjustable things
 top=~/Recoll
-# The possibly bogus version we have in paths (may be harcoded in the .pro)
+qtversion=5.14.2
-# qcbuildloc=Desktop_Qt_5_15_2_clang_64bit
+# Will probably need adjustment on M1
-qcbuildloc=Qt_6_2_4_for_macOS
+path_clang=clang_64
 deploy=~/Qt/${qtversion}/${path_clang}/bin/macdeployqt
 # qtversion=5.15.2
 qtversion=6.2.4
-#deploy=~/Qt/${qtversion}/macos/clang_64bit/macdeployqt
+qt_ver_sion=`echo $qtversion | sed -e 's/\./_/g'`
 deploy=~/Qt/${qtversion}/macos/bin/macdeployqt
 toprecoll=$top/recoll/src
-appdir=$toprecoll/build-recoll-win-${qcbuildloc}-Release/recoll.app
+appdir=$toprecoll/build-recoll-win-Desktop_Qt_${qt_ver_sion}_${path_clang}bit-Release/recoll.app
-rclindexdir=$toprecoll/windows/build-recollindex-${qcbuildloc}-Release
+rclindexdir=$toprecoll/windows/build-recollindex-Desktop_Qt_${qt_ver_sion}_${path_clang}bit-Release
 rclqdir=$toprecoll/windows/build-recollq-${qcbuildloc}-Release
 bindir=$appdir/Contents/MacOS
 datadir=$appdir/Contents/Resources
 dmg=$appdir/../recoll.dmg
-version=`cat $toprecoll/RECOLL-VERSION.txt`
+version=`cat $toprecoll/VERSION`
 test -d $appdir || fatal Must first have built recoll in $appdir
 cp $rclindexdir/recollindex $bindir || exit 1
 cp $rclqdir/recollq $bindir || exit 1
 cp $top/antiword/antiword $bindir || exit 1
 mkdir -p $datadir/antiword || exit 1
@ -49,7 +45,7 @@ $deploy $appdir -dmg || exit 1
 hash=`(cd $top/recoll;git log -n 1  | head -1  | awk '{print $2}' |cut -b 1-8)`
-dte=`date +%Y%m%d`
+
-mv $dmg ~/Documents/recoll-$version-$dte-$hash.dmg || exit 1
+mv $dmg ~/Documents/recoll-$version-$hash.dmg || exit 1
 ls -l ~/Documents/recoll-$version-*.dmg
--- a/packaging/rpm/recoll.spec
+++ b/packaging/rpm/recoll.spec
@ -3,7 +3,7 @@
 Summary:        Desktop full text search tool with Qt GUI
 Name:           recoll
-Version:        1.32.7
+Version:        1.29.2
 Release:        2%{?dist}
 Group:          Applications/Databases
 License:        GPLv2+
@ -13,26 +13,15 @@ Source10:       qmake-qt5.sh
 BuildRequires:  aspell-devel
 BuildRequires:  bison
 BuildRequires:  desktop-file-utils
-
+# kio
-#BuildRequires:  kdelibs4-devel
+BuildRequires:  kdelibs4-devel
 # Fedora
 BuildRequires:  qt5-qtbase-devel
 BuildRequires:  qt5-qtwebkit-devel
-#BuildRequires: qt5-qtwebengine-devel
+BuildRequires:  extra-cmake-modules
 BuildRequires:  kf5-kio-devel
 BuildRequires:  python2-devel
 BuildRequires:  python3-devel
 BuildRequires:  xapian-core-devel
 BuildRequires:  kf5-kio-devel
 # Opensuse
 #BuildRequires:  libQt5Gui-devel
 #BuildRequires:  libqt5-qtwebengine-devel
 #BuildRequires:  python310-devel
 #BuildRequires:  libxapian-devel
 #BuildRequires:  kio-devel
 BuildRequires:  extra-cmake-modules
 BuildRequires:  python2-devel
 BuildRequires:  zlib-devel
 BuildRequires:  chmlib-devel
 BuildRequires:  libxslt-devel
@ -66,7 +55,7 @@ LDFLAGS="%{?__global_ldflags}"; export LDFLAGS
 install -m755 -D %{SOURCE10} qmake-qt5.sh
 export QMAKE=qmake-qt5
-%configure --enable-webengine
+%configure
 make %{?_smp_mflags}
 %install
@ -81,7 +70,7 @@ rm -f %{buildroot}/usr/share/recoll/filters/xdg-open
 # kio_recoll -kde5
 (
-#mkdir kde/kioslave/kio_recoll/build && pushd kde/kioslave/kio_recoll/build
+mkdir kde/kioslave/kio_recoll/build && pushd kde/kioslave/kio_recoll/build
 %cmake ..
 make %{?_smp_mflags} VERBOSE=1
 make install DESTDIR=%{buildroot}
@ -148,12 +137,12 @@ exit 0
 %{_datadir}/icons/hicolor/48x48/apps/%{name}.png
 %{_datadir}/pixmaps/%{name}.png
 %{_libdir}/recoll
-%{python2_sitearch}/recoll
+%{python_sitearch}/recoll
-%{python2_sitearch}/Recoll*.egg-info
+%{python_sitearch}/Recoll*.egg-info
 %{python3_sitearch}/recoll
 %{python3_sitearch}/Recoll*.egg-info
-%{python2_sitearch}/recollchm
+%{python_sitearch}/recollchm
-%{python2_sitearch}/recollchm*.egg-info
+%{python_sitearch}/recollchm*.egg-info
 %{python3_sitearch}/recollchm
 %{python3_sitearch}/recollchm*.egg-info
 %{_mandir}/man1/%{name}.1*
--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -4,16 +4,12 @@
 if COND_TESTMAINS
  MAYBE_TESTMAINS = testmains
 endif
-if COND_RCLGREP
+SUBDIRS = . $(MAYBE_TESTMAINS)
  MAYBE_RCLGREP = rclgrep
 endif
 SUBDIRS = . $(MAYBE_TESTMAINS) $(MAYBE_RCLGREP)
 DIST_SUBDIRS = .
 CXXFLAGS ?= @CXXFLAGS@
-XAPIAN_LIBS=@XAPIAN_LIBS@
+LIBXAPIAN=@LIBXAPIAN@
-XAPIAN_CFLAGS=@XAPIAN_CFLAGS@
+XAPIANCXXFLAGS=@XAPIANCXXFLAGS@
 XSLT_CFLAGS=@XSLT_CFLAGS@
 XSLT_LIBS=@XSLT_LIBS@
 LIBICONV=@LIBICONV@
@ -42,7 +38,7 @@ COMMONCPPFLAGS = -I. \
 AM_CPPFLAGS = -Wall -Wno-unused -std=c++11 \
    $(COMMONCPPFLAGS) \
    $(INCICONV) \
-    $(XAPIAN_CFLAGS) \
+    $(XAPIANCXXFLAGS) \
    $(XSLT_CFLAGS) \
    $(X_CFLAGS) \
    -DRECOLL_DATADIR=\"${pkgdatadir}\" \
@ -59,10 +55,7 @@ else
 endif
 librcldir = $(libdir)/recoll
-librcl_LTLIBRARIES =
+librcl_LTLIBRARIES = librecoll.la
 if MAKE_RECOLL_LIB
 librcl_LTLIBRARIES += librecoll.la
 endif
 librecoll_la_SOURCES = \
 aspell/rclaspell.cpp \
@ -294,7 +287,7 @@ AM_YFLAGS = -d
 # need it
 librecoll_la_LDFLAGS = -release $(VERSION) -no-undefined @NO_UNDEF_LINK_FLAG@
-librecoll_la_LIBADD = $(XSLT_LIBS) $(XAPIAN_LIBS) $(LIBICONV) $(X_LIBX11) $(LIBTHREADS)
+librecoll_la_LIBADD = $(XSLT_LIBS) $(LIBXAPIAN) $(LIBICONV) $(X_LIBX11) $(LIBTHREADS)
 # There is probably a better way to do this. The KIO needs to be linked
 # with librecoll, but librecoll is installed into a non-standard place
@ -313,10 +306,7 @@ PicStatic: $(librecoll_la_OBJECTS)
 	$(LIBTOOL) --tag=LD --mode=link gcc -g -O -o librecoll.la \
 		$(librecoll_la_OBJECTS)
-bin_PROGRAMS =
+bin_PROGRAMS = recollindex
 if MAKEINDEXER
    bin_PROGRAMS += recollindex
 endif
 if MAKECMDLINE
    bin_PROGRAMS += recollq
 endif
@ -344,8 +334,10 @@ recollq_SOURCES = query/recollqmain.cpp
 recollq_LDADD = librecoll.la
 xadump_SOURCES = query/xadump.cpp
-xadump_LDADD = librecoll.la $(XAPIAN_LIBS) $(LIBICONV)
+xadump_LDADD = librecoll.la $(LIBXAPIAN) $(LIBICONV)
 # Note: I'd prefer the generated query parser files not to be distributed
 # at all, but failed to achieve this
 EXTRA_DIST = \
 bincimapmime/00README.recoll bincimapmime/AUTHORS bincimapmime/COPYING \
 \
@ -365,8 +357,8 @@ doc/user/custom.xsl doc/user/usermanual.xml \
 filters/injectcommon.sh filters/recfiltcommon filters/rcltxtlines.py \
 \
 index/rclmon.sh \
-index/recollindex@.service \
+index/recollindex-system.service \
-index/recollindex.service \
+index/recollindex-user.service \
 \
 kde/kioslave/kio_recoll/00README.txt \
 kde/kioslave/kio_recoll/CMakeLists.txt \
@ -377,10 +369,9 @@ kde/kioslave/kio_recoll/dirif.cpp \
 kde/kioslave/kio_recoll/htmlif.cpp \
 kde/kioslave/kio_recoll/kio_recoll.cpp \
 kde/kioslave/kio_recoll/kio_recoll.h \
 kde/kioslave/kio_recoll/recoll.json \
 kde/kioslave/kio_recoll/recoll.protocol \
 kde/kioslave/kio_recoll/recollf.protocol \
 kde/kioslave/kio_recoll/recollnolist.protocol \
 kde/kioslave/kio_recoll/recoll.protocol \
 \
 kde/kioslave/kio_recoll-kde4/00README.txt \
 kde/kioslave/kio_recoll-kde4/CMakeLists.txt \
@ -397,9 +388,6 @@ kde/kioslave/kio_recoll-kde4/recoll.protocol \
 \
 query/location.hh  query/position.hh  query/stack.hh \
 \
 qtgui/actsearch.ui \
 qtgui/actsearch_w.cpp \
 qtgui/actsearch_w.h \
 qtgui/advsearch.ui \
 qtgui/advsearch_w.cpp \
 qtgui/advsearch_w.h \
@ -419,8 +407,6 @@ qtgui/fragbuts.h \
 qtgui/guiutils.cpp \
 qtgui/guiutils.h \
 qtgui/i18n/*.qm qtgui/i18n/*.ts \
 qtgui/idxmodel.cpp \
 qtgui/idxmodel.h \
 qtgui/idxsched.h \
 qtgui/idxsched.ui \
 qtgui/images/asearch.png \
@ -489,7 +475,6 @@ qtgui/rclm_idx.cpp \
 qtgui/rclm_menus.cpp \
 qtgui/rclm_preview.cpp \
 qtgui/rclm_saveload.cpp \
 qtgui/rclm_sidefilters.cpp \
 qtgui/rclm_view.cpp \
 qtgui/rclm_wins.cpp \
 qtgui/rclmain.ui \
@ -582,20 +567,16 @@ python/samples/recollgui/rclmain.ui \
 python/samples/recollq.py \
 python/samples/recollqsd.py \
 \
-rclgrep/Makefile.am \
+sampleconf/fields sampleconf/fragbuts.xml sampleconf/mimeconf \
-rclgrep/rclgrep.cpp \
+sampleconf/mimemap sampleconf/mimeview sampleconf/mimeview.mac \
 \
 sampleconf/fields sampleconf/fragment-buttons.xml sampleconf/mimeconf \
 sampleconf/mimemap sampleconf/mimeview sampleconf/macos/mimeview \
 sampleconf/recoll.conf sampleconf/recoll.qss \
 sampleconf/recoll-common.css sampleconf/recoll-common.qss \
 sampleconf/recoll-dark.qss sampleconf/recoll-dark.css \
 \
 testmains/Makefile.am \
 \
 unac/AUTHORS unac/COPYING unac/README unac/README.recoll unac/unac.c \
 \
-RECOLL-VERSION.txt
+VERSION
 # EXTRA_DIST: The Php Code does not build anymore. No need to ship it until
 # someone fixes it:
@ -637,13 +618,13 @@ install-exec-local:: rclpychm-install
 clean-local:: rclpychm-clean
 rclpychm:
 	(cd python/pychm; set -x; \
-        for v in 3;do \
+        for v in 2 3;do \
        test -n "`which python$${v}`" && python$${v} setup.py build;\
        done \
        )
 rclpychm-install:
 	(cd python/pychm; set -x; \
-        for v in 3;do test -n "`which python$${v}`" && \
+        for v in 2 3;do test -n "`which python$${v}`" && \
        python$${v} setup.py install \
         --prefix=${prefix} --root=$${DESTDIR:-/} $(OPTSFORPYTHON); \
        done \
@ -673,19 +654,17 @@ defconfdir = $(pkgdatadir)/examples
 defconf_DATA = \
 desktop/recollindex.desktop \
 index/rclmon.sh \
-index/recollindex.service \
+index/recollindex-system.service \
-index/recollindex@.service \
+index/recollindex-user.service \
 sampleconf/fragbuts.xml \
 sampleconf/fields \
 sampleconf/fragment-buttons.xml \
 sampleconf/mimeconf \
 sampleconf/mimemap \
 sampleconf/mimeview \
 sampleconf/recoll-common.css \
 sampleconf/recoll-common.qss \
 sampleconf/recoll-dark.css \
 sampleconf/recoll-dark.qss \
 sampleconf/recoll.conf \
-sampleconf/recoll.qss
+sampleconf/mimeconf \
 sampleconf/recoll.qss \
 sampleconf/recoll-dark.qss \
 sampleconf/recoll-dark.css \
 sampleconf/mimemap \
 sampleconf/mimeview 
 filterdir = $(pkgdatadir)/filters
 dist_filter_DATA = \
@ -704,31 +683,30 @@ filters/openxml-xls-body.xsl \
 filters/openxml-word-body.xsl \
 filters/openxml-meta.xsl \
 filters/ppt-dump.py \
-filters/rcl7z.py \
+filters/rcl7z \
 filters/rclaptosidman \
-filters/rclaudio.py \
+filters/rclaudio \
 filters/rclbasehandler.py \
 filters/rclbibtex.sh \
 filters/rclcheckneedretry.sh \
-filters/rclchm.py \
+filters/rclchm \
-filters/rcldia.py \
+filters/rcldia \
 filters/rcldjvu.py \
 filters/rcldoc.py \
 filters/rcldvi \
-filters/rclepub.py \
+filters/rclepub \
-filters/rclepub1.py \
+filters/rclepub1 \
 filters/rclexec1.py \
 filters/rclexecm.py \
 filters/rclfb2.py \
 filters/rclgaim \
 filters/rclgenxslt.py \
 filters/rclhwp.py \
-filters/rclics.py \
+filters/rclics \
 filters/rclimg \
 filters/rclimg.py \
-filters/rclinfo.py \
+filters/rclinfo \
-filters/rclipynb.py \
+filters/rclkar \
 filters/rclkar.py \
 filters/rclkwd \
 filters/rcllatinclass.py \
 filters/rcllatinstops.zip \
@ -747,21 +725,21 @@ filters/rclps \
 filters/rclpst.py \
 filters/rclpurple \
 filters/rclpython.py \
-filters/rclrar.py \
+filters/rclrar \
 filters/rclrtf.py \
 filters/rclscribus \
 filters/rclshowinfo \
-filters/rcltar.py \
+filters/rcltar \
 filters/rcltex \
 filters/rcltext.py \
 filters/rcluncomp \
 filters/rcluncomp.py \
-filters/rclwar.py \
+filters/rclwar \
 filters/rclxls.py \
 filters/rclxml.py \
 filters/rclxmp.py \
 filters/rclxslt.py \
-filters/rclzip.py \
+filters/rclzip \
 filters/recoll-we-move-files.py \
 filters/recollepub.zip \
 filters/svg.xsl \
@ -771,13 +749,6 @@ filters/xml.xsl \
 python/recoll/recoll/conftree.py \
 python/recoll/recoll/rclconfig.py 
 if INSTALL_SYSTEMD_UNITS
 systemd_system_unitdir = @SYSTEMD_SYSTEM_UNIT_DIR@
 systemd_user_unitdir = @SYSTEMD_USER_UNIT_DIR@
 systemd_system_unit_DATA = index/recollindex@.service
 systemd_user_unit_DATA = index/recollindex.service
 endif
 install-data-hook: 
 	(cd $(DESTDIR)/$(filterdir); \
 	chmod a+x rcl* ppt-dump.py xls-dump.py xlsxmltocsv.py hotrecoll.py; \
@ -796,14 +767,14 @@ doc/user/usermanual.html: doc/user/usermanual.xml
 endif
 dist_man1_MANS = doc/man/recoll.1 doc/man/recollq.1 \
-               doc/man/recollindex.1 doc/man/xadump.1 doc/man/rclgrep.1
+               doc/man/recollindex.1 doc/man/xadump.1
 dist_man5_MANS = doc/man/recoll.conf.5
 dist-hook:
 	(cd $(top_srcdir); find . \
            \( -name '*.pyc' -o -name '#*' -o -name '*~' \) -delete)
 	if test -z "$(NOTAG)";then \
-	  test -z "`git status -s|grep -v recoll-$(RECOLL-VERSION.txt)`"||exit 1; \
+	  test -z "`git status -s|grep -v recoll-$(VERSION)`"||exit 1; \
 	  vers=`echo $(VERSION) | sed -e 's/~/_/g'`;\
 	  git tag -a RECOLL-$$vers -m "Release $$vers tagged"; \
 	fi
--- a/src/README
+++ b/src/README
@ -2763,8 +2763,8 @@ Chapter 4. Programming interface
   If you can program and want to write an execm handler, it should not be
   too difficult to make sense of one of the existing modules. For example,
-   look at rclzip.py which uses Zip file paths as identifiers (ipath), and
+   look at rclzip which uses Zip file paths as identifiers (ipath), and
-   rclics.py, which uses an integer index. Also have a look at the comments
+   rclics, which uses an integer index. Also have a look at the comments
   inside the internfile/mh_execm.h file and possibly at the corresponding
   module.
@ -2819,7 +2819,7 @@ Chapter 4. Programming interface
 text/rtf = exec unrtf --nopict --html; charset=iso-8859-1; mimetype=text/html
- application/x-chm = execm rclchm.py
+ application/x-chm = execm rclchm
   The fragment specifies that:
--- a/src/RECOLL-VERSION.txt
+++ b/src/RECOLL-VERSION.txt
@ -1 +0,0 @@
 1.33.1
--- a/src/VERSION
+++ b/src/VERSION
@ -0,0 +1 @@
 1.31.0
--- a/src/aspell/rclaspell.h
+++ b/src/aspell/rclaspell.h
@ -41,8 +41,6 @@ class Aspell {
 public:
    Aspell(const RclConfig *cnf);
    ~Aspell();
    Aspell(const Aspell &) = delete;
    Aspell& operator=(const Aspell &) = delete;
    /** Check health */
    bool ok() const;
--- a/src/autogen.sh
+++ b/src/autogen.sh
@ -4,17 +4,14 @@ set -x
 aclocal
-# detect libtoolize on linux or glibtoolize in some systems
+if test X"$HOMEBREW_ENV" != X; then
-if (libtoolize --version) < /dev/null > /dev/null 2>&1; then
+    glt=`which glibtoolize`
-  LIBTOOLIZE=libtoolize
+fi
-elif (glibtoolize --version) < /dev/null > /dev/null 2>&1; then
+if test X"$glt" != X; then
-  LIBTOOLIZE=glibtoolize
+    $glt --copy
-else
+else
-  echo "libtoolize or glibtoolize was not found! Please install libtool." 1>&2
+    libtoolize --copy
  exit 1
 fi
 $LIBTOOLIZE --copy
 automake --add-missing --force-missing --copy
 autoconf
--- a/src/common/autoconfig-mac.h
+++ b/src/common/autoconfig-mac.h
@ -11,7 +11,7 @@
 /* #undef AC_APPLE_UNIVERSAL_BUILD */
 /* Path to the aspell program */
-#undef ASPELL_PROG
+#define ASPELL_PROG "/opt/local/bin/aspell"
 /* No X11 session monitoring support */
 #define DISABLE_X11MON 1
@ -125,7 +125,7 @@
 #define PACKAGE_NAME "Recoll"
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "Recoll 1.33.0"
+#define PACKAGE_STRING "Recoll 1.30.2"
 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "recoll"
@ -134,7 +134,7 @@
 #define PACKAGE_URL ""
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "1.33.0"
+#define PACKAGE_VERSION "1.30.2"
 /* putenv parameter is const */
 /* #undef PUTENV_ARG_CONST */
--- a/src/common/autoconfig-win.h
+++ b/src/common/autoconfig-win.h
@ -118,7 +118,7 @@
 #define PACKAGE_NAME "Recoll"
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "Recoll 1.33.0"
+#define PACKAGE_STRING "Recoll 1.30.2"
 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "recoll"
@ -127,13 +127,13 @@
 #define PACKAGE_URL ""
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "1.33.0"
+#define PACKAGE_VERSION "1.30.2"
 /* putenv parameter is const */
 /* #undef PUTENV_ARG_CONST */
 /* Real time monitoring option */
-#define RCL_MONITOR 1
+#undef RCL_MONITOR
 /* Split camelCase words */
 /* #undef RCL_SPLIT_CAMELCASE */
--- a/src/common/conf_post.h
+++ b/src/common/conf_post.h
@ -67,18 +67,4 @@ typedef int ssize_t;
 #  define PRETEND_USE(expr) ((void)(expr))
 #endif /* PRETEND_USE */
 // It's complicated to really detect gnu gcc because other compilers define __GNUC__
 // See stackoverflow questions/38499462/how-to-tell-clang-to-stop-pretending-to-be-other-compilers
 #if defined(__GNUC__) && !defined(__llvm__) && !defined(__INTEL_COMPILER)
 #define REAL_GCC   __GNUC__ // probably
 #endif
 #ifdef REAL_GCC
 // Older gcc versions pretended to supply std::regex, but the resulting programs mostly crashed.
 #include <features.h>
 #if ! __GNUC_PREREQ(6,0)
 #define NO_STD_REGEX 1
 #endif
 #endif
 #endif /* INCLUDED */
--- a/src/common/rclconfig.cpp
+++ b/src/common/rclconfig.cpp
@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2022 J.F.Dockes 
+/* Copyright (C) 2004 J.F.Dockes 
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
@ -33,11 +33,14 @@
 #endif
 #include <algorithm>
 #include <list>
 #include <iostream>
 #include <sstream>
 #include <fstream>
 #include <cstdlib>
 #include <cstring>
 #include <unordered_map>
 #include <iterator>
 #include "cstr.h"
 #include "pathut.h"
@ -55,16 +58,6 @@
 using namespace std;
 // Naming the directory for platform-specific default config files, overriding the top-level ones
 // E.g. /usr/share/recoll/examples/windows
 #ifdef _WIN32
 static const string confsysdir{"windows"};
 #elif defined(_APPLE__)
 static const string confsysdir{"macos"};
 #else
 static const string confsysdir;
 #endif
 // Static, logically const, RclConfig members or module static
 // variables are initialized once from the first object build during
 // process initialization.
@ -97,8 +90,9 @@ void RclConfig::setPlusMinus(const string& sbase, const set<string>& upd,
    stringToStrings(sbase, base);
    vector<string> diff;
-    auto it = set_difference(base.begin(), base.end(), upd.begin(), upd.end(),
+    auto it =
-                             std::inserter(diff, diff.begin()));
+        set_difference(base.begin(), base.end(), upd.begin(), upd.end(),
                       std::inserter(diff, diff.begin()));
    sminus = stringsToString(diff);
    diff.clear();
@ -145,7 +139,7 @@ bool ParamStale::needrecompute()
            string newvalue;
            conffile->get(paramnames[i], newvalue, parent->m_keydir);
            LOGDEB1("ParamStale::needrecompute: " << paramnames[i] << " -> " <<
-                    newvalue << " keydir " << parent->m_keydir << "\n");
+                    newvalue << " keydir " << parent->m_keydir << endl);
            if (newvalue.compare(savedvalues[i])) {
                savedvalues[i] = newvalue;
                needrecomp = true;
@ -182,7 +176,8 @@ void ParamStale::init(ConfNull *cnf)
 bool RclConfig::isDefaultConfig() const
 {
-    string defaultconf = path_cat(path_homedata(), path_defaultrecollconfsubdir());
+    string defaultconf = path_cat(path_homedata(),
                                  path_defaultrecollconfsubdir());
    path_catslash(defaultconf);
    string specifiedconf = path_canon(m_confdir);
    path_catslash(specifiedconf);
@ -192,7 +187,8 @@ bool RclConfig::isDefaultConfig() const
 RclConfig::RclConfig(const RclConfig &r) 
    : m_oldstpsuffstate(this, "recoll_noindex"),
-      m_stpsuffstate(this, {"noContentSuffixes", "noContentSuffixes+", "noContentSuffixes-"}),
+      m_stpsuffstate(this, {"noContentSuffixes", "noContentSuffixes+",
                  "noContentSuffixes-"}),
      m_skpnstate(this, {"skippedNames", "skippedNames+", "skippedNames-"}),
      m_onlnstate(this, "onlyNames"),
      m_rmtstate(this, "indexedmimetypes"),
@ -204,7 +200,8 @@ RclConfig::RclConfig(const RclConfig &r)
 RclConfig::RclConfig(const string *argcnf)
    : m_oldstpsuffstate(this, "recoll_noindex"),
-      m_stpsuffstate(this, {"noContentSuffixes", "noContentSuffixes+", "noContentSuffixes-"}),
+      m_stpsuffstate(this, {"noContentSuffixes", "noContentSuffixes+",
                  "noContentSuffixes-"}),
      m_skpnstate(this, {"skippedNames", "skippedNames+", "skippedNames-"}),
      m_onlnstate(this, "onlyNames"),
      m_rmtstate(this, "indexedmimetypes"),
@ -233,7 +230,8 @@ RclConfig::RclConfig(const string *argcnf)
    if (argcnf && !argcnf->empty()) {
        m_confdir = path_absolute(*argcnf);
        if (m_confdir.empty()) {
-            m_reason = string("Cant turn [") + *argcnf + "] into absolute path";
+            m_reason = 
                string("Cant turn [") + *argcnf + "] into absolute path";
            return;
        }
    } else {
@ -251,8 +249,9 @@ RclConfig::RclConfig(const string *argcnf)
    // this is the default conf
    if (!autoconfdir && !isDefaultConfig()) {
        if (!path_exists(m_confdir)) {
-            m_reason = std::string("Explicitly specified configuration [") + m_confdir +
+            m_reason = "Explicitly specified configuration "
-                "] directory must exist (won't be automatically created). Use mkdir first";
+                "directory must exist"
                " (won't be automatically created). Use mkdir first";
            return;
        }
    }
@ -292,7 +291,8 @@ RclConfig::RclConfig(const string *argcnf)
            o_localecharset = string(cstr_cp1252);
        }
 #endif
-        LOGDEB1("RclConfig::getDefCharset: localecharset ["  << o_localecharset << "]\n");
+        LOGDEB1("RclConfig::getDefCharset: localecharset ["  <<
                o_localecharset << "]\n");
    }
    const char *cp;
@ -310,15 +310,8 @@ RclConfig::RclConfig(const string *argcnf)
        m_cdirs.push_back(cp);
    } 
-    // Base/installation config, and its platform-specific overrides
+    // Base/installation config
-    std::string defaultsdir = path_cat(m_datadir, "examples");
+    m_cdirs.push_back(path_cat(m_datadir, "examples"));
    if (!confsysdir.empty()) {
        std::string sdir = path_cat(defaultsdir, confsysdir);
        if (path_isdir(sdir)) {
            m_cdirs.push_back(sdir);
        }
    }
    m_cdirs.push_back(defaultsdir);
    string cnferrloc;
    for (const auto& dir : m_cdirs) {
@ -346,14 +339,17 @@ RclConfig::RclConfig(const string *argcnf)
    // there are several. This only uses the distributed file, not any
    // local customization (too complicated).
    if (mime_suffixes.empty()) {
-        ConfSimple mm(path_cat(path_cat(m_datadir, "examples"), "mimemap").c_str());
+        ConfSimple mm(
            path_cat(path_cat(m_datadir, "examples"), "mimemap").c_str());
        vector<ConfLine> order = mm.getlines();
        for (const auto& entry: order) {
            if (entry.m_kind == ConfLine::CFL_VAR) {
-                LOGDEB1("CONFIG: " << entry.m_data << " -> " << entry.m_value << "\n");
+                LOGDEB1("CONFIG: " << entry.m_data << " -> " << entry.m_value <<
                        endl);
                // Remember: insert() only does anything for new keys,
                // so we only have the first value in the map
-                mime_suffixes.insert(pair<string,string>(entry.m_value, entry.m_data));
+                mime_suffixes.insert(
                    pair<string,string>(entry.m_value, entry.m_data));
            }
        }
    }
@ -388,9 +384,9 @@ RclConfig::RclConfig(const string *argcnf)
 bool RclConfig::updateMainConfig()
 {
-    ConfStack<ConfTree> *newconf = new ConfStack<ConfTree>("recoll.conf", m_cdirs, true);
+    ConfStack<ConfTree> *newconf = 
        new ConfStack<ConfTree>("recoll.conf", m_cdirs, true);
    if (newconf == 0 || !newconf->ok()) {
        std::cerr << "updateMainConfig: new Confstack not ok\n";
        if (m_conf)
            return false;
        m_ok = false;
@ -520,7 +516,8 @@ bool RclConfig::getConfParam(const string &name, vector<int> *vip,
        char *ep;
        vip->push_back(strtol(vs[i].c_str(), &ep, 0));
        if (ep == vs[i].c_str()) {
-            LOGDEB("RclConfig::getConfParam: bad int value in [" << name << "]\n");
+            LOGDEB("RclConfig::getConfParam: bad int value in [" << name <<
                   "]\n");
            return false;
        }
    }
@ -589,10 +586,12 @@ void RclConfig::initThrConf()
 out:
    ostringstream sconf;
    for (unsigned int i = 0; i < 3; i++) {
-        sconf << "(" << m_thrConf[i].first << ", " << m_thrConf[i].second << ") ";
+        sconf << "(" << m_thrConf[i].first << ", " << m_thrConf[i].second <<
            ") ";
    }
-    LOGDEB("RclConfig::initThrConf: chosen config (ql,nt): " << sconf.str() << "\n");
+    LOGDEB("RclConfig::initThrConf: chosen config (ql,nt): " << sconf.str() <<
           "\n");
 }
 pair<int,int> RclConfig::getThrConf(ThrStage who) const
@ -685,7 +684,7 @@ public:
 class SuffCmp {
 public:
    int operator()(const SfString& s1, const SfString& s2) const {
-        //cout << "Comparing " << s1.m_str << " and " << s2.m_str << "\n";
+        //cout << "Comparing " << s1.m_str << " and " << s2.m_str << endl;
        string::const_reverse_iterator 
            r1 = s1.m_str.rbegin(), re1 = s1.m_str.rend(),
            r2 = s2.m_str.rbegin(), re2 = s2.m_str.rend();
@ -735,7 +734,8 @@ vector<string>& RclConfig::getStopSuffixes()
                m_maxsufflen = int(entry.length());
        }
    }
-    LOGDEB1("RclConfig::getStopSuffixes: ->" << stringsToString(m_stopsuffvec) << "\n");
+    LOGDEB1("RclConfig::getStopSuffixes: ->" <<
            stringsToString(m_stopsuffvec) << endl);
    return m_stopsuffvec;
 }
@ -845,22 +845,16 @@ string RclConfig::getMimeHandlerDef(const string &mtype, bool filtertypes, const
        if (!m_excludeMTypes.empty() && m_excludeMTypes.count(stringtolower(mtype))) {
            IdxDiags::theDiags().record(IdxDiags::ExcludedMime, fn, mtype);
            LOGDEB1("RclConfig::getMimeHandlerDef: " << mtype << " in excluded mime list (fn " <<
-                    fn << ")\n");
+                   fn << ")\n");
            return hs;
        }
    }
    if (!mimeconf->get(mtype, hs, "index")) {
        if (mtype.find("text/") == 0) {
            bool alltext{false};
            getConfParam("textunknownasplain", &alltext);
            if (alltext && mimeconf->get("text/plain", hs, "index")) {
                return hs;
            }
        }
        if (mtype != "inode/directory") {
            IdxDiags::theDiags().record(IdxDiags::NoHandler, fn, mtype);
-            LOGDEB1("getMimeHandlerDef: no handler for '" << mtype << "' (fn " << fn << ")\n");
+            LOGDEB1("getMimeHandlerDef: no handler for '" << mtype << "' (fn " <<
                    fn << ")\n");
        }
    }
    return hs;
@ -879,11 +873,12 @@ const vector<MDReaper>& RclConfig::getMDReapers()
        ConfSimple attrs;
        valueSplitAttributes(sreapers, value, attrs);
        vector<string> nmlst = attrs.getNames(cstr_null);
-        for (const auto& nm : nmlst) {
+        for (vector<string>::const_iterator it = nmlst.begin();
             it != nmlst.end(); it++) {
            MDReaper reaper;
-            reaper.fieldname = fieldCanon(nm);
+            reaper.fieldname = fieldCanon(*it);
            string s;
-            attrs.get(nm, s);
+            attrs.get(*it, s);
            stringToStrings(s, reaper.cmdv);
            m_mdreapers.push_back(reaper);
        }
@ -909,17 +904,11 @@ bool RclConfig::getGuiFilter(const string& catfiltername, string& frag) const
    return true;
 }
-bool RclConfig::valueSplitAttributes(const string& whole, string& value, ConfSimple& attrs)
+bool RclConfig::valueSplitAttributes(const string& whole, string& value, 
                                     ConfSimple& attrs)
 {
-    bool inquote{false};
+    /* There is currently no way to escape a semi-colon */
-    string::size_type semicol0;    
+    string::size_type semicol0 = whole.find_first_of(";");
    for (semicol0 = 0; semicol0 < whole.size(); semicol0++) {
        if (whole[semicol0] == '"') {
            inquote = !inquote;
        } else if (whole[semicol0] == ';' && !inquote) {
            break;
        }
    }
    value = whole.substr(0, semicol0);
    trimstring(value);
    string attrstr;
@ -1025,14 +1014,15 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
                valuetype = FieldTraits::INT;
            } else {
                LOGERR("readFieldsConfig: bad type for value for " <<
-                       fieldname << " : " << tval << "\n");
+                       fieldname << " : " << tval << endl);
                return 0;
            }
        }
        int valuelen = (int)attrs.getInt("len", 0);
        // Find or insert traits entry
        const auto pit =
-            m_fldtotraits.insert(pair<string, FieldTraits>(canonic, FieldTraits())).first;
+            m_fldtotraits.insert(
                pair<string, FieldTraits>(canonic, FieldTraits())).first;
        pit->second.valueslot = valueslot;
        pit->second.valuetype = valuetype;
        pit->second.valuelen = valuelen;
@ -1109,7 +1099,8 @@ bool RclConfig::getFieldTraits(const string& _fld, const FieldTraits **ftpp,
                pit->second.pfx << "]\n");
        return true;
    } else {
-        LOGDEB1("RclConfig::getFieldTraits: no prefix for field [" << fld << "]\n");
+        LOGDEB1("RclConfig::getFieldTraits: no prefix for field [" << fld <<
                "]\n");
        *ftpp = 0;
        return false;
    }
@ -1131,7 +1122,8 @@ string RclConfig::fieldCanon(const string& f) const
    string fld = stringtolower(f);
    const auto it = m_aliastocanon.find(fld);
    if (it != m_aliastocanon.end()) {
-        LOGDEB1("RclConfig::fieldCanon: [" << f << "] -> [" << it->second << "]\n");
+        LOGDEB1("RclConfig::fieldCanon: [" << f << "] -> [" << it->second <<
                "]\n");
        return it->second;
    }
    LOGDEB1("RclConfig::fieldCanon: [" << f << "] -> [" << fld << "]\n");
@ -1142,7 +1134,8 @@ string RclConfig::fieldQCanon(const string& f) const
 {
    const auto it = m_aliastoqcanon.find(stringtolower(f));
    if (it != m_aliastoqcanon.end()) {
-        LOGDEB1("RclConfig::fieldQCanon: [" << f << "] -> ["  << it->second << "]\n");
+        LOGDEB1("RclConfig::fieldQCanon: [" << f << "] -> ["  << it->second <<
                "]\n");
        return it->second;
    }
    return fieldCanon(f);
@ -1172,14 +1165,15 @@ set<string> RclConfig::getMimeViewerAllEx() const
    string base, plus, minus;
    mimeview->get("xallexcepts", base, "");
-    LOGDEB1("RclConfig::getMimeViewerAllEx(): base: " << base << "\n");
+    LOGDEB1("RclConfig::getMimeViewerAllEx(): base: " << base << endl);
    mimeview->get("xallexcepts+", plus, "");
-    LOGDEB1("RclConfig::getMimeViewerAllEx(): plus: " << plus << "\n");
+    LOGDEB1("RclConfig::getMimeViewerAllEx(): plus: " << plus << endl);
    mimeview->get("xallexcepts-", minus, "");
-    LOGDEB1("RclConfig::getMimeViewerAllEx(): minus: " << minus << "\n");
+    LOGDEB1("RclConfig::getMimeViewerAllEx(): minus: " << minus << endl);
    computeBasePlusMinus(res, base, plus, minus);
-    LOGDEB1("RclConfig::getMimeViewerAllEx(): res: " << stringsToString(res) << "\n");
+    LOGDEB1("RclConfig::getMimeViewerAllEx(): res: " << stringsToString(res)
            << endl);
    return res;
 }
@ -1206,9 +1200,11 @@ bool RclConfig::setMimeViewerAllEx(const set<string>& allex)
    return true;
 }
-string RclConfig::getMimeViewerDef(const string &mtype, const string& apptag, bool useall) const
+string RclConfig::getMimeViewerDef(const string &mtype, const string& apptag,
                                   bool useall) const
 {
-    LOGDEB2("RclConfig::getMimeViewerDef: mtype [" << mtype << "] apptag [" << apptag << "]\n");
+    LOGDEB2("RclConfig::getMimeViewerDef: mtype [" << mtype << "] apptag ["
            << apptag << "]\n");
    string hs;
    if (mimeview == 0)
        return hs;
@ -1235,18 +1231,9 @@ string RclConfig::getMimeViewerDef(const string &mtype, const string& apptag, bo
        // Fallthrough to normal case.
    }
-    if (apptag.empty() || !mimeview->get(mtype + string("|") + apptag, hs, "view"))
+    if (apptag.empty() || !mimeview->get(mtype + string("|") + apptag,
                                         hs, "view"))
        mimeview->get(mtype, hs, "view");
    // Last try for text/xxx if alltext is set
    if (hs.empty() && mtype.find("text/") == 0 && mtype != "text/plain") {
        bool alltext{false};
        getConfParam("textunknownasplain", &alltext);
        if (alltext) {
            return getMimeViewerDef("text/plain", apptag, useall);
        }
    }
    return hs;
 }
@ -1255,8 +1242,9 @@ bool RclConfig::getMimeViewerDefs(vector<pair<string, string> >& defs) const
    if (mimeview == 0)
        return false;
    vector<string>tps = mimeview->getNames("view");
-    for (const auto& tp : tps) {
+    for (vector<string>::const_iterator it = tps.begin(); 
-        defs.push_back(pair<string, string>(tp, getMimeViewerDef(tp, "", 0)));
+         it != tps.end();it++) {
        defs.push_back(pair<string, string>(*it, getMimeViewerDef(*it, "", 0)));
    }
    return true;
 }
@ -1410,39 +1398,17 @@ string RclConfig::getIdxStatusFile() const
 // Thanks to user Madhu for this fix.
 string RclConfig::getPidfile() const
 {
-    static string fn;
+    const char *p = getenv("XDG_RUNTIME_DIR");
-    if (fn.empty()) {
+    if (p) {
-#ifndef _WIN32
+        string base = path_canon(p);
-        const char *p = getenv("XDG_RUNTIME_DIR");
+        string digest, hex;
-        string rundir;
+        string cfdir = path_canon(getConfDir());
-        if (nullptr == p) {
+        path_catslash(cfdir);
-            // Problem is, we may have been launched outside the desktop, maybe by cron. Basing
+        MD5String(cfdir, digest);
-            // everything on XDG_RUNTIME_DIR was a mistake, sometimes resulting in different pidfiles
+        MD5HexPrint(digest, hex);
-            // being used by recollindex instances. So explicitely test for /run/user/$uid, still
+        return path_cat(base, "/recoll-" + hex + "-index.pid");
            // leaving open the remote possibility that XDG_RUNTIME_DIR would be set to something
            // else...
            rundir = path_cat("/run/user", lltodecstr(getuid()));
            if (path_isdir(rundir)) {
                p = rundir.c_str();
            }
        }
        if (p) {
            string base = path_canon(p);
            string digest, hex;
            string cfdir = path_canon(getConfDir());
            path_catslash(cfdir);
            MD5String(cfdir, digest);
            MD5HexPrint(digest, hex);
            fn =  path_cat(base, "recoll-" + hex + "-index.pid");
            goto out;
        }
 #endif // ! _WIN32
        fn = path_cat(getCacheDir(), "index.pid");
    out:
        LOGINF("RclConfig: pid/lock file: " << fn << "\n");
    } 
-    return fn;
+    return path_cat(getCacheDir(), "index.pid");
 }
@ -1475,7 +1441,7 @@ static string path_diffstems(const string& p1, const string& p2,
            break;
        }
    }
-    //cerr << "Common length = " << cl << "\n";
+    //cerr << "Common length = " << cl << endl;
    if (cl == 0) {
        reason = "Input paths are empty or have no common part";
        return reason;
@ -1507,12 +1473,13 @@ void RclConfig::urlrewrite(const string& dbdir, string& url) const
            cur_confdir = m_confdir;
        }
        LOGDEB1("RclConfig::urlrewrite: orgidxconfdir: " << orig_confdir <<
-                " cur_confdir " << cur_confdir << "\n");
+                " cur_confdir " << cur_confdir << endl);
-        string reason = path_diffstems(orig_confdir, cur_confdir, confstemorg, confstemrep);
+        string reason = path_diffstems(orig_confdir, cur_confdir,
                                       confstemorg, confstemrep);
        if (!reason.empty()) {
            LOGERR("urlrewrite: path_diffstems failed: " << reason <<
                   " : orig_confdir [" << orig_confdir <<
-                   "] cur_confdir [" << cur_confdir << "\n");
+                   "] cur_confdir [" << cur_confdir << endl);
            confstemorg = confstemrep = "";
        }
    }
@ -1520,7 +1487,8 @@ void RclConfig::urlrewrite(const string& dbdir, string& url) const
    // Do path translations exist for this index ?
    bool needptrans = true;
    if (m_ptrans == 0 || !m_ptrans->hasSubKey(dbdir)) {
-        LOGDEB2("RclConfig::urlrewrite: no paths translations (m_ptrans " << m_ptrans << ")\n");
+        LOGDEB2("RclConfig::urlrewrite: no paths translations (m_ptrans " <<
                m_ptrans << ")\n");
        needptrans = false;
    }
@ -1671,7 +1639,6 @@ vector<string> RclConfig::getDaemSkippedPaths() const
 // and filtersdir from the config file to the PATH, then use execmd::which()
 string RclConfig::findFilter(const string &icmd) const
 {
    LOGDEB2("findFilter: " << icmd << "\n");
    // If the path is absolute, this is it
    if (path_isabsolute(icmd))
        return icmd;
@ -1719,18 +1686,12 @@ bool RclConfig::processFilterCmd(std::vector<std::string>& cmd) const
    LOGDEB0("processFilterCmd: in: " << stringsToString(cmd) << "\n");
    auto it = cmd.begin();
-#ifdef _WIN32
+    // Special-case python and perl on windows: we need to also locate the
-    // Special-case interpreters on windows: we used to have an additional 1st argument "python" in
+    // first argument which is the script name "python somescript.py". 
-    // mimeconf, but we now rely on the .py extension for better sharing of mimeconf.
+    // On Unix, thanks to #!, we usually just run "somescript.py", but need
-    std::string ext = path_suffix(*it);
+    // the same change if we ever want to use the same cmd line as windows
-    if ("py" == ext) {
+    bool hasinterp = !stringlowercmp("python", *it) ||
-        it = cmd.insert(it, findFilter("python"));
+        !stringlowercmp("perl", *it);
        it++;
    } else if ("pl" == ext) {
        it = cmd.insert(it, findFilter("perl"));
        it++;
    }
 #endif
    // Note that, if the cmd vector size is 1, post-incrementing the
    // iterator in the following statement, which works on x86, leads
@ -1739,15 +1700,25 @@ bool RclConfig::processFilterCmd(std::vector<std::string>& cmd) const
    // whatever... We do it later then.
    *it = findFilter(*it);
    if (hasinterp) {
        if (cmd.size() < 2) {
            LOGERR("processFilterCmd: python/perl cmd: no script?. [" <<
                   stringsToString(cmd) << "]\n");
            return false;
        } else {
            ++it;
            *it = findFilter(*it);
        }
    }
    LOGDEB0("processFilterCmd: out: " << stringsToString(cmd) << "\n");
    return true;
 }
-// This now does nothing more than processFilterCmd (after we changed to relying on the py extension)
+bool RclConfig::pythonCmd(const std::string& scriptname,
-bool RclConfig::pythonCmd(const std::string& scriptname, std::vector<std::string>& cmd) const
+                          std::vector<std::string>& cmd) const
 {
 #ifdef _WIN32
-    cmd = {scriptname};
+    cmd = {"python", scriptname};
 #else
    cmd = {scriptname};
 #endif
--- a/src/common/rclconfig.h
+++ b/src/common/rclconfig.h
@ -103,14 +103,6 @@ public:
        freeAll();
    }
    RclConfig& operator=(const RclConfig &r) {
        if (this != &r) {
            freeAll();
            initFrom(r);
        }
        return *this;
    }
    // Return a writable clone of the main config. This belongs to the
    // caller (must delete it when done)
    ConfNull *cloneMainConfig();
@ -259,18 +251,12 @@ public:
    string getMimeHandlerDef(const string &mimetype, bool filtertypes=false,
                             const std::string& fn = std::string());
-    /** For lines like: [name = some value; attr1 = value1; attr2 = val2]
+    /** For lines like: "name = some value; attr1 = value1; attr2 = val2"
     * Separate the value and store the attributes in a ConfSimple 
-     *
+     * @param whole the raw value. No way to escape a semi-colon in there.
     * In the value part, semi-colons inside double quotes are ignored, and double quotes are
     * conserved. In the common case where the string is then processed by stringToStrings() to
     * build a command line, this allows having semi-colons inside arguments. However, no backslash
     * escaping is possible, so that, for example "bla\"1;2\"" would not work (the value part
     * would stop at the semi-colon).
     *
     * @param whole the raw value.
     */
-    static bool valueSplitAttributes(const string& whole, string& value, ConfSimple& attrs) ;
+    static bool valueSplitAttributes(const string& whole, string& value, 
                                     ConfSimple& attrs) ;
    /** Compute difference between 'base' and 'changed', as elements to be
     * added and substracted from base. Input and output strings are in
@ -376,6 +362,14 @@ public:
        return o_origcwd;
    }
    RclConfig& operator=(const RclConfig &r) {
        if (this != &r) {
            freeAll();
            initFrom(r);
        }
        return *this;
    }
    friend class ParamStale;
 private:
--- a/src/common/rclinit.cpp
+++ b/src/common/rclinit.cpp
@ -312,7 +312,7 @@ RclConfig *recollinit(int flags,
 #if defined(MACPORTS)
    PATH = string("/opt/local/bin/") + ":" + PATH;
 #elif defined(HOMEBREW)
-    PATH = string("/opt/homebrew/bin:/usr/local/bin/") + ":" + PATH;
+    PATH = string("/usr/local/bin/") + ":" + PATH;
 #else
    // Native qt build. Add our own directory to the path so that
    // recoll finds recollindex pkgdatadir:
--- a/src/common/textsplit.cpp
+++ b/src/common/textsplit.cpp
@ -81,8 +81,6 @@ unsigned int  TextSplit::o_CJKNgramLen{2};
 bool          TextSplit::o_noNumbers{false};
 bool          TextSplit::o_deHyphenate{false};
 int           TextSplit::o_maxWordLength{40};
 int           TextSplit::o_maxWordsInSpan{6};
 static const int o_CJKMaxNgramLen{5};
 bool o_exthangultagger{false};
@ -92,7 +90,6 @@ static char underscoreatend = '_';
 void TextSplit::staticConfInit(RclConfig *config)
 {
    config->getConfParam("maxtermlength", &o_maxWordLength);
    config->getConfParam("maxwordsinspan", &o_maxWordsInSpan);
    bool bvalue{false};
    if (config->getConfParam("nocjk", &bvalue) && bvalue == true) {
@ -209,26 +206,32 @@ public:
 };
 static const CharClassInit charClassInitInstance;
-static inline bool isvisiblewhite(int c)
+static inline int whatcc(unsigned int c, char *asciirep = nullptr)
 {
    return visiblewhite.find(c) != visiblewhite.end();
 }
 static inline int whatcc(unsigned int c)
 {
    if (c <= 127) {
        return charclasses[c]; 
    } else {
-        if (c == 0x2010 || c == 0x2019 || c == 0x275c || c == 0x02bc) {
+        if (c == 0x2010) {
            // Special treatment for hyphen: handle as ascii minus. See
            // doc/notes/minus-hyphen-dash.txt
            if (asciirep)
                *asciirep = '-';
            return c;
        } else if (c == 0x2019 || c == 0x275c || c == 0x02bc) {
            // Things sometimes replacing a single quote. Use single
            // quote so that span processing works ok
            if (asciirep)
                *asciirep = '\'';
            return c;
        } else if (sskip.find(c) != sskip.end()) {
            return SKIP;
        } else if (spunc.find(c) != spunc.end()) {
            return SPACE;
        } else {
-            auto it = lower_bound(vpuncblocks.begin(), vpuncblocks.end(), c);
+            vector<unsigned int>::iterator it = 
-            if (it == vpuncblocks.end())
+                lower_bound(vpuncblocks.begin(), vpuncblocks.end(), c);
-                return LETTER;
+                if (it == vpuncblocks.end())
                        return LETTER;
            if (c == *it)
                return SPACE;
            if ((it - vpuncblocks.begin()) % 2 == 1) {
@ -242,16 +245,16 @@ static inline int whatcc(unsigned int c)
 // testing whatcc...
 #if 0
-unsigned int testvalues[] = {'a', '0', 0x80, 0xbf, 0xc0, 0x05c3, 0x1000, 
+  unsigned int testvalues[] = {'a', '0', 0x80, 0xbf, 0xc0, 0x05c3, 0x1000, 
-                             0x2000, 0x2001, 0x206e, 0x206f, 0x20d0, 0x2399, 
+                               0x2000, 0x2001, 0x206e, 0x206f, 0x20d0, 0x2399, 
-                             0x2400, 0x2401, 0x243f, 0x2440, 0xff65};
+                               0x2400, 0x2401, 0x243f, 0x2440, 0xff65};
-int ntest = sizeof(testvalues) / sizeof(int);
+  int ntest = sizeof(testvalues) / sizeof(int);
-for (int i = 0; i < ntest; i++) {
+  for (int i = 0; i < ntest; i++) {
-    int ret = whatcc(testvalues[i]);
+      int ret = whatcc(testvalues[i]);
-    printf("Tested value 0x%x, returned value %d %s\n",
+      printf("Tested value 0x%x, returned value %d %s\n",
-           testvalues[i], ret, ret == LETTER ? "LETTER" : 
+             testvalues[i], ret, ret == LETTER ? "LETTER" : 
-           ret == SPACE ? "SPACE" : "OTHER");
+             ret == SPACE ? "SPACE" : "OTHER");
-}
+  }
 #endif
 // CJK Unicode character detection. CJK text is indexed using an n-gram
@ -284,16 +287,16 @@ for (int i = 0; i < ntest; i++) {
 // FF00..FFEF; Halfwidth and Fullwidth Forms
 // 20000..2A6DF; CJK Unified Ideographs Extension B
 // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
-#define UNICODE_IS_CJK(p)                       \
+#define UNICODE_IS_CJK(p)                                               \
-    (((p) >= 0x1100 && (p) <= 0x11FF) ||        \
+    (((p) >= 0x1100 && (p) <= 0x11FF) ||                                \
-     ((p) >= 0x2E80 && (p) <= 0x2EFF) ||        \
+     ((p) >= 0x2E80 && (p) <= 0x2EFF) ||                                \
-     ((p) >= 0x3000 && (p) <= 0x9FFF) ||        \
+     ((p) >= 0x3000 && (p) <= 0x9FFF) ||                                \
-     ((p) >= 0xA700 && (p) <= 0xA71F) ||        \
+     ((p) >= 0xA700 && (p) <= 0xA71F) ||                                \
-     ((p) >= 0xAC00 && (p) <= 0xD7AF) ||        \
+     ((p) >= 0xAC00 && (p) <= 0xD7AF) ||                                \
-     ((p) >= 0xF900 && (p) <= 0xFAFF) ||        \
+     ((p) >= 0xF900 && (p) <= 0xFAFF) ||                                \
-     ((p) >= 0xFE30 && (p) <= 0xFE4F) ||        \
+     ((p) >= 0xFE30 && (p) <= 0xFE4F) ||                                \
-     ((p) >= 0xFF00 && (p) <= 0xFFEF) ||        \
+     ((p) >= 0xFF00 && (p) <= 0xFFEF) ||                                \
-     ((p) >= 0x20000 && (p) <= 0x2A6DF) ||      \
+     ((p) >= 0x20000 && (p) <= 0x2A6DF) ||                              \
     ((p) >= 0x2F800 && (p) <= 0x2FA1F))
 // We should probably map 'fullwidth ascii variants' and 'halfwidth
@ -301,9 +304,9 @@ for (int i = 0; i < ntest; i++) {
 // filter, KuromojiNormalizeFilter.java
 // 309F is Hiragana.
 #ifdef KATAKANA_AS_WORDS
-#define UNICODE_IS_KATAKANA(p)                  \
+#define UNICODE_IS_KATAKANA(p)                                          \
-    ((p) != 0x309F &&                           \
+    ((p) != 0x309F &&                                                   \
-     (((p) >= 0x3099 && (p) <= 0x30FF) ||       \
+     (((p) >= 0x3099 && (p) <= 0x30FF) ||                               \
      ((p) >= 0x31F0 && (p) <= 0x31FF)))
 #else
 #define UNICODE_IS_KATAKANA(p) false
@ -312,14 +315,14 @@ for (int i = 0; i < ntest; i++) {
 #ifdef HANGUL_AS_WORDS
 // If no external tagger is configured, we process HANGUL as generic
 // cjk (n-grams)
-#define UNICODE_IS_HANGUL(p) (                  \
+#define UNICODE_IS_HANGUL(p) (                 \
-        o_exthangultagger &&                    \
+        o_exthangultagger &&                   \
-        (((p) >= 0x1100 && (p) <= 0x11FF) ||    \
+        (((p) >= 0x1100 && (p) <= 0x11FF) ||   \
-         ((p) >= 0x3130 && (p) <= 0x318F) ||    \
+         ((p) >= 0x3130 && (p) <= 0x318F) ||   \
-         ((p) >= 0x3200 && (p) <= 0x321e) ||    \
+         ((p) >= 0x3200 && (p) <= 0x321e) ||   \
-         ((p) >= 0x3248 && (p) <= 0x327F) ||    \
+         ((p) >= 0x3248 && (p) <= 0x327F) ||   \
-         ((p) >= 0x3281 && (p) <= 0x32BF) ||    \
+         ((p) >= 0x3281 && (p) <= 0x32BF) ||   \
-         ((p) >= 0xAC00 && (p) <= 0xD7AF))      \
+         ((p) >= 0xAC00 && (p) <= 0xD7AF))     \
        )
 #else
 #define UNICODE_IS_HANGUL(p) false
@ -348,16 +351,19 @@ bool TextSplit::isNGRAMMED(int c)
 }
-// This is used to detect katakana/other transitions, which must trigger a word split (there is not
+// This is used to detect katakana/other transitions, which must
-// always a separator, and katakana is otherwise treated like other, in the same routine, unless cjk
+// trigger a word split (there is not always a separator, and katakana
 // is otherwise treated like other, in the same routine, unless cjk
 // which has its span reader causing a word break)
 enum CharSpanClass {CSC_HANGUL, CSC_CJK, CSC_KATAKANA, CSC_OTHER};
-std::vector<CharFlags> csc_names {CHARFLAGENTRY(CSC_HANGUL), CHARFLAGENTRY(CSC_CJK),
+std::vector<CharFlags> csc_names {CHARFLAGENTRY(CSC_HANGUL),
-                                  CHARFLAGENTRY(CSC_KATAKANA), CHARFLAGENTRY(CSC_OTHER)};
+        CHARFLAGENTRY(CSC_CJK), CHARFLAGENTRY(CSC_KATAKANA),
        CHARFLAGENTRY(CSC_OTHER)};
-// Final term checkpoint: do some checking (the kind which is simpler to do here than in the main
+// Final term checkpoint: do some checking (the kind which is simpler
-// loop), then send term to our client.
+// to do here than in the main loop), then send term to our client.
-inline bool TextSplit::emitterm(bool isspan, string &w, int pos, size_t btstart, size_t btend)
+inline bool TextSplit::emitterm(bool isspan, string &w, int pos, 
                                size_t btstart, size_t btend)
 {
    LOGDEB2("TextSplit::emitterm: [" << w << "] pos " << pos << "\n");
@ -372,38 +378,39 @@ inline bool TextSplit::emitterm(bool isspan, string &w, int pos, size_t btstart,
    PRETEND_USE(isspan);
 #endif
-    if (l == 0 || l > o_maxWordLength) {
+    if (l > 0 && l <= o_maxWordLength) {
-        return true;
+        // 1 byte word: we index single ascii letters and digits, but
-    }
+        // nothing else. We might want to turn this into a test for a
-    if (l == 1) {
+        // single utf8 character instead ?
-        // 1 byte word: we index single ascii letters and digits, but nothing else. We might want to
+        if (l == 1) {
-        // turn this into a test for a single utf8 character instead ?
+            unsigned int c = ((unsigned int)w[0]) & 0xff;
-        unsigned int c = ((unsigned int)w[0]) & 0xff;
+            if (charclasses[c] != A_ULETTER && charclasses[c] != A_LLETTER && 
-        if (charclasses[c] != A_ULETTER && charclasses[c] != A_LLETTER && 
+                charclasses[c] != DIGIT &&
-            charclasses[c] != DIGIT &&
+                (!(m_flags & TXTS_KEEPWILD) || charclasses[c] != WILD)
-            (!(m_flags & TXTS_KEEPWILD) || charclasses[c] != WILD)
+                ) {
-            ) {
+                //cerr << "ERASING single letter term " << c << endl;
-            //cerr << "ERASING single letter term " << c << endl;
+                return true;
-            return true;
+            }
        }
        if (pos != m_prevpos || l != m_prevlen) {
            bool ret = takeword(w, pos, int(btstart), int(btend));
            m_prevpos = pos;
            m_prevlen = int(w.length());
            return ret;
        }
        LOGDEB2("TextSplit::emitterm:dup: [" << w << "] pos " << pos << "\n");
    }
    if (pos != m_prevpos || l != m_prevlen) {
        bool ret = takeword(w, pos, int(btstart), int(btend));
        m_prevpos = pos;
        m_prevlen = int(w.length());
        return ret;
    }
    LOGDEB2("TextSplit::emitterm:dup: [" << w << "] pos " << pos << "\n");
    return true;
 }
-// Check for an acronym/abbreviation ie I.B.M. This only works with ascii (we do not detect
+// Check for an acronym/abbreviation ie I.B.M. This only works with
-// non-ascii utf-8 acronyms)
+// ascii (no non-ascii utf-8 acronym are possible)
 bool TextSplit::span_is_acronym(string *acronym)
 {
    bool acron = false;
-    if (m_wordLen != m_span.length() && m_span.length() > 2 && m_span.length() <= 20) {
+    if (m_wordLen != m_span.length() && 
        m_span.length() > 2 && m_span.length() <= 20) {
        acron = true;
        // Check odd chars are '.'
        for (unsigned int i = 1 ; i < m_span.length(); i += 2) {
@ -432,23 +439,27 @@ bool TextSplit::span_is_acronym(string *acronym)
 }
-// Generate terms from span. Have to take into account the flags: ONLYSPANS, NOSPANS, noNumbers
+// Generate terms from span. Have to take into account the
 // flags: ONLYSPANS, NOSPANS, noNumbers
 bool TextSplit::words_from_span(size_t bp)
 {
 #if 0
-    cerr << "Span: [" << m_span << "] " << " bp " << bp <<
+    cerr << "Span: [" << m_span << "] " << " w_i_s size: " << 
-        " w_i_s size: " << m_words_in_span.size() <<  " : ";
+        m_words_in_span.size() <<  " : ";
    for (unsigned int i = 0; i < m_words_in_span.size(); i++) {
-        cerr << " [" << m_words_in_span[i].first << " " << m_words_in_span[i].second << "] ";
+        cerr << " [" << m_words_in_span[i].first << " " <<
            m_words_in_span[i].second << "] ";
    }
    cerr << endl;
 #endif
    int spanwords = int(m_words_in_span.size());
-    // It seems that something like: tv_combo-sample_util.Po@am_quote can get the splitter to call
+    // It seems that something like: tv_combo-sample_util.Po@am_quote
-    // doemit with a span of '@' and words_in_span==0, which then causes a crash when accessing
+    // can get the splitter to call doemit with a span of '@' and
-    // words_in_span[0] if the stl assertions are active (e.g. Fedora RPM build). Not too sure what
+    // words_in_span==0, which then causes a crash when accessing
-    // the right fix would be, but for now, just defend against it
+    // words_in_span[0] if the stl assertions are active (e.g. Fedora
    // RPM build). Not too sure what the right fix would be, but for
    // now, just defend against it
    if (spanwords == 0) {
        return true;
    }
@ -456,17 +467,21 @@ bool TextSplit::words_from_span(size_t bp)
    // Byte position of the span start
    size_t spboffs = bp - m_span.size();
-    if (o_deHyphenate && spanwords == 2 && m_span[m_words_in_span[0].second] == '-') {
+    if (o_deHyphenate && spanwords == 2 && 
        m_span[m_words_in_span[0].second] == '-') {
        unsigned int s0 = m_words_in_span[0].first;
        unsigned int l0 = m_words_in_span[0].second - m_words_in_span[0].first;
        unsigned int s1 = m_words_in_span[1].first;
        unsigned int l1 = m_words_in_span[1].second - m_words_in_span[1].first;
        string word = m_span.substr(s0, l0) + m_span.substr(s1, l1);
        if (l0 && l1) 
-            emitterm(false, word, m_spanpos, spboffs, spboffs + m_words_in_span[1].second);
+            emitterm(false, word,
                     m_spanpos, spboffs, spboffs + m_words_in_span[1].second);
    }
-    for (int i = 0; i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords); i++) {
+    for (int i = 0; 
         i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords); 
         i++) {
        int deb = m_words_in_span[i].first;
        bool noposinc = m_words_in_span[i].second == deb;
@ -475,7 +490,8 @@ bool TextSplit::words_from_span(size_t bp)
             j++) {
            int fin = m_words_in_span[j].second;
-            //cerr << "i " << i << " j " << j << " deb " << deb << " fin " << fin << endl;
+            //cerr << "i " << i << " j " << j << " deb " << deb << 
            //" fin " << fin << endl;
            if (fin - deb > int(m_span.size()))
                break;
            string word(m_span.substr(deb, fin-deb));
@ -503,7 +519,7 @@ bool TextSplit::words_from_span(size_t bp)
 * 
 * @return true if ok, false for error. Splitting should stop in this case.
 * @param spanerase Set if the current span is at its end. Process it.
- * @param bp        The current BYTE position in the stream (it's beyond the current span data).
+ * @param bp        The current BYTE position in the stream
 */
 inline bool TextSplit::doemit(bool spanerase, size_t _bp)
 {
@ -516,7 +532,7 @@ inline bool TextSplit::doemit(bool spanerase, size_t _bp)
    if (m_wordLen) {
        // We have a current word. Remember it
-        if (int(m_words_in_span.size()) >= o_maxWordsInSpan) {
+        if (m_words_in_span.size() >= 6) {
            // Limit max span word count
            spanerase = true;
        }
@ -534,13 +550,38 @@ inline bool TextSplit::doemit(bool spanerase, size_t _bp)
        return true;
    }
-    // Span is done (too long or span-terminating character). Produce terms and reset it.
+
    // Span is done (too long or span-terminating character). Produce
    // terms and reset it.
    string acronym;
    if (span_is_acronym(&acronym)) {
        if (!emitterm(false, acronym, m_spanpos, bp - m_span.length(), bp))
            return false;
    }
    // Maybe trim at end. These are chars that we might keep
    // inside a span, but not at the end.
    string::size_type trimsz{0};
    while (trimsz < m_span.length()) {
        auto c = m_span[m_span.length() - 1 - trimsz];
        if (c == '.' || c == '-' || c == ',' || c == '@' || c == '\'' ||
            c == underscoreatend) {
            trimsz++;
            if (m_words_in_span.size() &&
                m_words_in_span.back().second > int(m_span.size())) {
                m_words_in_span.back().second = int(m_span.size());
            }
            if (--bp < 0) {
                bp = 0;
            }
        } else {
            break;
        }
    }
    if (trimsz > 0) {
        m_span.resize(m_span.length() - trimsz);
    }
    if (!words_from_span(bp)) {
        return false;
    }
@ -599,7 +640,6 @@ bool TextSplit::text_to_words(const string &in)
    clearsplitstate();
    bool pagepending = false;
    bool nlpending = false;
    bool softhyphenpending = false;
    // Running count of non-alphanum chars. Reset when we see one;
@ -673,7 +713,8 @@ bool TextSplit::text_to_words(const string &in)
        prev_csc = csc;
 #endif
-        int cc = whatcc(c);
+        char asciirep = 0;
        int cc = whatcc(c, &asciirep);
        switch (cc) {
        case SKIP:
@ -709,10 +750,6 @@ bool TextSplit::text_to_words(const string &in)
                pagepending = false;
                newpage(m_wordpos);
            }
            if (nlpending) {
                nlpending = false;
                newline(m_wordpos);
            }
            break;
        case WILD:
@ -736,7 +773,7 @@ bool TextSplit::text_to_words(const string &in)
                } 
            } else if (m_inNumber) {
                if ((m_span[m_span.length() - 1] == 'e' ||
-                     m_span[m_span.length() - 1] == 'E')) {
+                                      m_span[m_span.length() - 1] == 'E')) {
                    if (isdigit(whatcc(it[it.getCpos()+1]), m_flags)) {
                        m_wordLen += it.appendchartostring(m_span);
                        STATS_INC_WORDCHARS;
@ -744,24 +781,17 @@ bool TextSplit::text_to_words(const string &in)
                    }
                }
            } else {
                int nextc = it[it.getCpos()+1];
                if (cc == '+') {
-                    if (nextc == '+' || nextc == -1 || isvisiblewhite(nextc)) {
+                    int nextc = it[it.getCpos()+1];
                    if (nextc == '+' || nextc == -1 || visiblewhite.find(nextc) 
                        != visiblewhite.end()) {
                        // someword++[+...] !
                        m_wordLen += it.appendchartostring(m_span);
                        STATS_INC_WORDCHARS;
                        break;
                    }
                } else {
-                    // Note about dangling hyphens: we always strip '-' found before whitespace,
+                    // Treat '-' inside span as glue char
                    // even before a newline, then generate two terms, before and after the line
                    // break. We have no way to know if '-' is there because a word was broken by
                    // justification or if it was part of an actual compound word (would need a
                    // dictionary to check). As soft-hyphen *should* be used if the '-' is not part
                    // of the text.
                    if (nextc == -1 || isvisiblewhite(nextc)) {
                        goto SPACE;
                    }
                    if (!doemit(false, it.getBpos()))
                        return false;
                    m_inNumber = false;
@ -797,7 +827,8 @@ bool TextSplit::text_to_words(const string &in)
                        m_inNumber = true;
                        m_wordLen += it.appendchartostring(m_span);
                    } else {
-                        m_words_in_span.push_back(pair<int,int>(m_wordStart, m_wordStart));
+                        m_words_in_span.
                            push_back(pair<int,int>(m_wordStart, m_wordStart));
                        m_wordStart += it.appendchartostring(m_span);
                    }
                    STATS_INC_WORDCHARS;
@ -814,28 +845,38 @@ bool TextSplit::text_to_words(const string &in)
        }
        break;
-        case 0x2010: // hyphen
+        case 0x2010:
-        case 0x2019: // variations on single quote
+        case 0x2019:
        case 0x275c:
        case 0x02bc:
            // Unicode chars which we replace with ascii for
            // processing (2010 -> -,others -> '). It happens that
            // they all work as glue chars and use the same code, but
            // there might be cases needing different processing.
            // Hyphen is replaced with ascii minus
            if (m_wordLen) {
                // Inside span: glue char
                if (!doemit(false, it.getBpos()))
                    return false;
                m_inNumber = false;
                m_span += asciirep;
                m_wordStart++;
                break;
            }
            goto SPACE;
        case '@':
        case '_': // If underscoreasletter is set, we'll never get this
        case '\'':
        {
            // If in word, potential span: o'brien, jf@dockes.org,
            // else just ignore
            int nextc = it[it.getCpos()+1];
            if (nextc == -1 || isvisiblewhite(nextc)) {
                goto SPACE;
            }
            if (m_wordLen) {
                if (!doemit(false, it.getBpos()))
                    return false;
                m_inNumber = false;
                m_wordStart += it.appendchartostring(m_span);
            }
-        }
+            break;
        break;
        case '#':  {
            int w = whatcc(it[it.getCpos()+1]);
@ -858,10 +899,19 @@ bool TextSplit::text_to_words(const string &in)
            break;
        case '\n':
            nlpending = true;
            /* FALLTHROUGH */
        case '\r':
-            if (softhyphenpending) {
+            if (m_span.length() && *m_span.rbegin() == '-') {
                // if '-' is the last char before end of line, we
                // strip it.  We have no way to know if this is added
                // because of the line split or if it was part of an
                // actual compound word (would need a dictionary to
                // check).  As soft-hyphen *should* be used if the '-'
                // is not part of the text, it is better to properly
                // process a real compound word, and produce wrong
                // output from wrong text. The word-emitting routine
                // will strip the trailing '-'.
                goto SPACE;
            } else if (softhyphenpending) {
                // Don't reset soft-hyphen
                continue;
            } else {
@ -1057,7 +1107,7 @@ bool TextSplit::cjk_to_words(Utf8Iter& it, unsigned int *cp)
 // Specialization for countWords 
 class TextSplitCW : public TextSplit {
-public:
+ public:
    int wcnt;
    TextSplitCW(Flags flags) : TextSplit(flags), wcnt(0) {}
    bool takeword(const string &, int, int, int) {
@ -1082,7 +1132,7 @@ bool TextSplit::hasVisibleWhite(const string &in)
            LOGERR("hasVisibleWhite: error while scanning UTF-8 string\n");
            return false;
        }
-        if (isvisiblewhite(c))
+        if (visiblewhite.find(c) != visiblewhite.end())
            return true;
    }
    return false;
@ -1107,7 +1157,7 @@ template <class T> bool u8stringToStrings(const string &s, T &tokens)
        }
        switch (c) {
-        case '"': 
+            case '"': 
            switch(state) {
            case SPACE: state = INQUOTE; continue;
            case TOKEN: goto push_char;
@ -1116,7 +1166,7 @@ template <class T> bool u8stringToStrings(const string &s, T &tokens)
                state = SPACE; continue;
            }
            break;
-        case '\\': 
+            case '\\': 
            switch(state) {
            case SPACE: 
            case TOKEN: state=TOKEN; goto push_char;
@ -1125,25 +1175,25 @@ template <class T> bool u8stringToStrings(const string &s, T &tokens)
            }
            break;
-        case ' ': 
+            case ' ': 
-        case '\t': 
+            case '\t': 
-        case '\n': 
+            case '\n': 
-        case '\r': 
+            case '\r': 
            switch(state) {
-            case SPACE: continue;
+              case SPACE: continue;
-            case TOKEN: tokens.push_back(current); current.clear();
+              case TOKEN: tokens.push_back(current); current.clear();
                state = SPACE; continue; 
            case INQUOTE: 
            case ESCAPE: goto push_char;
            }
            break;
-        default:
+            default:
            switch(state) {
-            case ESCAPE: state = INQUOTE; break;
+              case ESCAPE: state = INQUOTE; break;
-            case SPACE:  state = TOKEN;  break;
+              case SPACE:  state = TOKEN;  break;
-            case TOKEN: 
+              case TOKEN: 
-            case INQUOTE: break;
+              case INQUOTE: break;
            }
        push_char:
            it.appendchartostring(current);
@ -1164,3 +1214,4 @@ bool TextSplit::stringToStrings(const string &s, vector<string> &tokens)
 {
    return u8stringToStrings<vector<string> >(s, tokens);
 }
--- a/src/common/textsplit.h
+++ b/src/common/textsplit.h
@ -50,8 +50,6 @@ public:
    TextSplit(Flags flags = Flags(TXTS_NONE))
        : m_flags(flags) {}
    virtual ~TextSplit() {}
    TextSplit(const TextSplit&) = delete;
    TextSplit& operator=(const TextSplit&) = delete;
    /** Call at program initialization to read non default values from the 
        configuration */
@ -73,9 +71,6 @@ public:
     * just don't know about pages. */
    virtual void newpage(int /*pos*/) {}
    /** Called when we encounter newline \n 0x0a. Override to use the event. */
    virtual void newline(int /*pos*/) {}
    // Static utility functions:
    /** Count words in string, as the splitter would generate them */
@ -162,16 +157,12 @@ private:
    static bool o_deHyphenate; // false
    static unsigned int o_CJKNgramLen; // 2
    static int o_maxWordLength; // 40
    static int o_maxWordsInSpan; // 6
    Flags         m_flags;
    // Current span. Might be jf.dockes@wanadoo.f
    std::string        m_span; 
    // Words in span: byte positions of start and end of words in m_span. For example:
    // 0   4    9
    // bill@some.com -> (0,4) (5,9) (10,13)
    std::vector <std::pair<int, int> > m_words_in_span;
    // Current word: no punctuation at all in there. Byte offset
--- a/src/common/textsplitko.cpp
+++ b/src/common/textsplitko.cpp
@ -133,7 +133,7 @@ bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
    unordered_map<string, string> args;
    args.insert(pair<string,string>{"data", string()});
-    string& inputdata(args.begin()->second);
+    string& inputdata{args.begin()->second};
    // We send the tagger name every time but it's only used the first
    // one: can't change it after init. We could avoid sending it
--- a/src/common/unacpp.cpp
+++ b/src/common/unacpp.cpp
@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2021 J.F.Dockes
+/* Copyright (C) 2004-2019 J.F.Dockes
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
@ -29,7 +29,7 @@
 using namespace std;
 bool unacmaybefold(const string &in, string &out,
-                   const char *encoding, UnacOp what)
+           const char *encoding, UnacOp what)
 {
    char *cout = 0;
    size_t out_len;
@ -37,13 +37,16 @@ bool unacmaybefold(const string &in, string &out,
    switch (what) {
    case UNACOP_UNAC:
-        status = unac_string(encoding, in.c_str(), in.length(), &cout, &out_len);
+        status = unac_string(encoding, in.c_str(), in.length(), 
                             &cout, &out_len);
        break;
    case UNACOP_UNACFOLD:
-        status = unacfold_string(encoding, in.c_str(), in.length(), &cout, &out_len);
+        status = unacfold_string(encoding, in.c_str(), in.length(), 
                                 &cout, &out_len);
        break;
    case UNACOP_FOLD:
-        status = fold_string(encoding, in.c_str(), in.length(), &cout, &out_len);
+        status = fold_string(encoding, in.c_str(), in.length(), 
                             &cout, &out_len);
        break;
    }
--- a/src/common/webstore.h
+++ b/src/common/webstore.h
@ -34,8 +34,6 @@ class WebStore {
 public:
    WebStore(RclConfig *config);
    ~WebStore();
    WebStore(const WebStore&) = delete;
    WebStore& operator=(const WebStore&) = delete;
    bool getFromCache(const std::string& udi, Rcl::Doc &doc, std::string& data,
                      std::string *hittype = 0);
--- a/src/configure.ac
+++ b/src/configure.ac
@ -1,7 +1,7 @@
-AC_INIT([Recoll],[m4_esyscmd_s(cat RECOLL-VERSION.txt)])
+AC_INIT([Recoll], m4_esyscmd_s(cat VERSION))
 AC_CONFIG_HEADERS([common/autoconfig.h])
 AH_BOTTOM([#include "conf_post.h"])
-AC_PREREQ([2.69])
+AC_PREREQ(2.53)
 AC_CONFIG_SRCDIR(index/recollindex.cpp)
 AM_INIT_AUTOMAKE([1.10 no-define subdir-objects foreign])
@ -21,7 +21,7 @@ if test C$CXX = C ; then
   AC_MSG_ERROR([C++ compiler needed. Please install one (ie: gnu g++)])
 fi
 AC_LANG_PUSH([C++])
-AC_LINK_IFELSE([AC_LANG_PROGRAM([[]], [[]])],[rcl_link_ok=yes],[rcl_link_ok=no])
+AC_TRY_LINK([],[], rcl_link_ok=yes, rcl_link_ok=no)
 if test "$rcl_link_ok" = "no" ; then
   AC_MSG_ERROR([No working C++ compiler was found])
 fi
@ -42,7 +42,7 @@ esac
 AC_PROG_YACC
-LT_INIT
+AC_PROG_LIBTOOL
 AC_C_BIGENDIAN
 AC_SYS_LARGEFILE
@ -53,7 +53,8 @@ AC_CHECK_HEADERS([sys/param.h, spawn.h])
 if test "x$ac_cv_func_posix_spawn" = xyes; then :
   AC_ARG_ENABLE(posix_spawn,
-    AS_HELP_STRING([--enable-posix_spawn],[Enable the use of posix_spawn().]),
+    AC_HELP_STRING([--enable-posix_spawn],
   [Enable the use of posix_spawn().]),
        posixSpawnEnabled=$enableval, posixSpawnEnabled=no)
 fi
 if test X$posixSpawnEnabled = Xyes ; then
@ -68,35 +69,11 @@ AC_CHECK_HEADERS([sys/mount.h sys/statfs.h sys/statvfs.h sys/vfs.h malloc.h mall
 AC_CHECK_FUNCS([posix_spawn setrlimit kqueue vsnprintf malloc_trim posix_fadvise])
 AC_CHECK_FUNCS(mkdtemp)
 AC_CHECK_LIB([pthread], [pthread_create], [], [])
 AC_SEARCH_LIBS([dlopen], [dl], [], [])
 if test X$ac_cv_search_function != Xno ; then
   AC_DEFINE(HAVE_DLOPEN, 1, [dlopen function is available])
 fi   
 AC_CHECK_LIB([z], [zlibVersion], [], [])
 ############# Putenv
 AC_MSG_CHECKING(for type of string parameter to putenv)
 AC_LANG_PUSH([C++])
 AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
    #include <stdlib.h>
  ]], [[
    putenv((const char *)0);
  ]])],[rcl_putenv_string_const="1"],[rcl_putenv_string_const="0"])
 if test X$rcl_putenv_string_const = X1 ; then
  AC_DEFINE(PUTENV_ARG_CONST, 1, [putenv parameter is const])
 fi
 AC_LANG_POP([C++])
 PKG_CHECK_MODULES([XSLT], [libxslt], [], AC_MSG_ERROR([libxslt]))
 # Use specific 'file' command ? (Useful on solaris to specify
 # /usr/local/bin/file instead of the system's which doesn't understand '-i'
 AC_ARG_WITH(file-command, 
-    AS_HELP_STRING([--with-file-command],[Specify version of 'file' command (ie: --with-file-command=/usr/local/bin/file)]),
+    AC_HELP_STRING([--with-file-command],
   [Specify version of 'file' command (ie: --with-file-command=/usr/local/bin/file)]),
        withFileCommand=$withval, withFileCommand=file)
 case $withFileCommand in
  file)
@ -119,7 +96,8 @@ AC_DEFINE(USE_SYSTEM_FILE_COMMAND, 1,
 # we do compile the aspell module using an internal copy of aspell.h
 # Only --with-aspell=no will completely disable aspell support
 AC_ARG_WITH(aspell, 
-    AS_HELP_STRING([--without-aspell],[Disable use of aspell spelling package to provide term expansion to other spellings]),
+    AC_HELP_STRING([--without-aspell],
   [Disable use of aspell spelling package to provide term expansion to other spellings]),
        withAspell=$withval, withAspell=yes)
 case $withAspell in
     no);;
@ -148,7 +126,8 @@ fi
 # Real time monitoring with inotify
 AC_ARG_WITH(inotify, 
-    AS_HELP_STRING([--with-inotify],[Use inotify for almost real time indexing of modified files (the default
+    AC_HELP_STRING([--with-inotify],
   [Use inotify for almost real time indexing of modified files (the default
    is yes on Linux).]),
        withInotify=$withval, withInotify=$inot_default)
@ -162,7 +141,8 @@ fi
 # Real time monitoring with FAM
 AC_ARG_WITH(fam, 
-    AS_HELP_STRING([--with-fam],[Use File Alteration Monitor for almost real time indexing of modified files. Give the fam/gamin library as argument (ie: /usr/lib/libfam.so) if configure does not find the right one.]),
+    AC_HELP_STRING([--with-fam],
   [Use File Alteration Monitor for almost real time indexing of modified files. Give the fam/gamin library as argument (ie: /usr/lib/libfam.so) if configure does not find the right one.]),
        withFam=$withval, withFam=yes)
 if test X$withFam != Xno -a X$withInotify != Xno ; then
@ -226,15 +206,21 @@ if test X$idxthreadsEnabled = Xyes ; then
  AC_DEFINE(IDX_THREADS, 1, [Use multiple threads for indexing])
 fi
 AC_ARG_ENABLE(testmains,
    AC_HELP_STRING([--enable-testmains],
   [Enable building small test drivers. These are not unit tests.]),
        buildtestmains=$enableval, buildtestmains=no)
 AM_CONDITIONAL([COND_TESTMAINS], [test "$buildtestmains" = yes])
 # Enable CamelCase word splitting. This is optional because it causes 
 # problems with phrases: with camelcase enabled, "MySQL manual"
 # will be matched by "MySQL manual" and "my sql manual" but not 
 # "mysql manual" (which would need increased slack as manual is now at pos
 # 2 instead of 1
 AC_ARG_ENABLE(camelcase,
-    AS_HELP_STRING([--enable-camelcase],
+    AC_HELP_STRING([--enable-camelcase],
-    [Enable splitting camelCase words. This is not enabled by default as
+   [Enable splitting camelCase words. This is not enabled by default as
-   it makes phrase matches more difficult: you need to use matching
+   this makes phrase matches more difficult: you need to use matching
   case in the phrase query to get a match. Ie querying for 
   "MySQL manual" and "my sql manual" are the same, but not the same as
   "mysql manual" (in phrases only and you could raise the phrase slack to
@ -244,46 +230,109 @@ if test X$camelcaseEnabled = Xyes ; then
  AC_DEFINE(RCL_SPLIT_CAMELCASE, 1, [Split camelCase words])
 fi
 AC_ARG_ENABLE(testmains,
    AS_HELP_STRING([--enable-testmains],[Enable building small test drivers. These are not unit tests.]),
        buildtestmains=$enableval, buildtestmains=no)
 AM_CONDITIONAL([COND_TESTMAINS], [test "$buildtestmains" = yes])
 AC_ARG_ENABLE(rclgrep,
    AS_HELP_STRING([--enable-rclgrep],[Enable building the index-less search tool.]),
        buildrclgrep=$enableval, buildrclgrep=no)
 AM_CONDITIONAL([COND_RCLGREP], [test "$buildrclgrep" = yes])
 # Disable building the python module.
 AC_ARG_ENABLE(python-module,
-    AS_HELP_STRING([--disable-python-module],[Do not build the Python module.]),
+    AC_HELP_STRING([--disable-python-module],
    [Do not build the Python module.]),
        pythonEnabled=$enableval, pythonEnabled=yes)
 AM_CONDITIONAL(MAKEPYTHON, [test X$pythonEnabled = Xyes])
 # Disable building the libchm python wrapper
-AC_ARG_ENABLE(python-chm,
+AC_ARG_ENABLE(python-chm, AC_HELP_STRING([--disable-python-chm],
-    AS_HELP_STRING([--disable-python-chm], [Do not build the libchm Python wrapper.]),
+    [Do not build the libchm Python wrapper.]),
    pythonChmEnabled=$enableval, pythonChmEnabled=yes)
 if test X$pythonChmEnabled = Xyes; then
   AC_CHECK_LIB([chm], [chm_resolve_object], [],
   [AC_MSG_ERROR([--enable-python-chm is set but libchm is not found])])
 fi
 AM_CONDITIONAL(MAKEPYTHONCHM, [test X$pythonChmEnabled = Xyes])
-AC_ARG_ENABLE(indexer, 
+AC_CHECK_FUNCS(mkdtemp)
-    AS_HELP_STRING([--disable-indexer],[Disable building the recollindex indexer.]),
+AC_CHECK_LIB([pthread], [pthread_create], [], [])
-        enableINDEXER=$enableval, enableINDEXER="yes")
+AC_SEARCH_LIBS([dlopen], [dl], [], [])
-AM_CONDITIONAL(MAKEINDEXER, [test X$enableINDEXER = Xyes])
+if test X$ac_cv_search_function != Xno ; then
   AC_DEFINE(HAVE_DLOPEN, 1, [dlopen function is available])
 fi   
 AC_CHECK_LIB([z], [zlibVersion], [], [])
 ############# Putenv
 AC_MSG_CHECKING(for type of string parameter to putenv)
 AC_LANG_PUSH([C++])
 AC_TRY_COMPILE([
    #include <stdlib.h>
  ],[
    putenv((const char *)0);
  ], rcl_putenv_string_const="1", rcl_putenv_string_const="0")
 if test X$rcl_putenv_string_const = X1 ; then
  AC_DEFINE(PUTENV_ARG_CONST, 1, [putenv parameter is const])
 fi
 AC_LANG_POP([C++])
 #### Look for Xapian. Done in a strange way to work around autoconf
 # cache
 XAPIAN_CONFIG=${XAPIAN_CONFIG:-no}
 if test "$XAPIAN_CONFIG" = "no"; then 
    AC_PATH_PROG(XAPIAN_CONFIG0, [xapian-config], no)
    XAPIAN_CONFIG=$XAPIAN_CONFIG0
 fi
 if test "$XAPIAN_CONFIG" = "no"; then 
   AC_PATH_PROG(XAPIAN_CONFIG1, [xapian-config-1.3], no)
   XAPIAN_CONFIG=$XAPIAN_CONFIG1
 fi
 if test "$XAPIAN_CONFIG" = "no"; then 
   AC_PATH_PROG(XAPIAN_CONFIG2, [xapian-config-1.1], no)
   XAPIAN_CONFIG=$XAPIAN_CONFIG2
 fi
 if test "$XAPIAN_CONFIG" = "no" ; then
   AC_MSG_ERROR([Cannot find xapian-config command in $PATH. Is
 xapian-core installed ?])
   exit 1
 fi
 LIBXAPIAN=`$XAPIAN_CONFIG --libs`
 # The --static thing fails with older Xapians. Happily enough they don't
 # need it either (because there are no needed libraries (no uuid and we
 # deal explicitly with libz)
 LIBXAPIANSTATICEXTRA=`$XAPIAN_CONFIG --static --libs 2> /dev/null`
 # Workaround for problem in xapian-config in some versions: wrongly lists
 # libstdc++.la in the lib list
 for i in $LIBXAPIAN ; do
    case $i in
    *stdc++*|-lm|-lgcc_s|-lc);;
    *) tmpxaplib="$tmpxaplib $i";;
    esac
 done
 LIBXAPIAN=$tmpxaplib
 LIBXAPIANDIR=`$XAPIAN_CONFIG --libs | awk '{print $1}'`
 case A"$LIBXAPIANDIR" in
  A-L*) LIBXAPIANDIR=`echo $LIBXAPIANDIR | sed -e 's/-L//'`;;
  *) LIBXAPIANDIR="";;
 esac
 XAPIANCXXFLAGS=`$XAPIAN_CONFIG --cxxflags`
 #echo XAPIAN_CONFIG: $XAPIAN_CONFIG 
 #echo LIBXAPIAN: $LIBXAPIAN
 #echo LIBXAPIANDIR: $LIBXAPIANDIR
 #echo LIBXAPIANSTATICEXTRA: $LIBXAPIANSTATICEXTRA
 #echo XAPIANCXXFLAGS: $XAPIANCXXFLAGS
 PKG_CHECK_MODULES([XSLT], [libxslt], [], AC_MSG_ERROR([libxslt]))
 AC_ARG_ENABLE(xadump, 
-    AS_HELP_STRING([--enable-xadump],[Enable building the xadump low level Xapian access program.]),
+    AC_HELP_STRING([--enable-xadump],
   [Enable building the xadump low level Xapian access program.]),
        enableXADUMP=$enableval, enableXADUMP="no")
 AM_CONDITIONAL(MAKEXADUMP, [test X$enableXADUMP = Xyes])
 AC_ARG_ENABLE(userdoc,
-    AS_HELP_STRING([--disable-userdoc],[Disable building the user manual. (Avoids the need for docbook xml/xsl files and TeX tools.]),
+    AC_HELP_STRING([--disable-userdoc],
       [Disable building the user manual. (Avoids the need for docbook xml/xsl files and TeX tools.]),
        enableUserdoc=$enableval, enableUserdoc="yes")
 AM_CONDITIONAL(MAKEUSERDOC, [test X$enableUserdoc = Xyes])
@ -311,12 +360,14 @@ AM_CONDITIONAL(MAKEUSERDOC, [test X$enableUserdoc = Xyes])
 # will have failed, and we tell the user to check his environment.
 #
 AC_ARG_ENABLE(qtgui, 
-    AS_HELP_STRING([--disable-qtgui],[Disable the QT-based graphical user interface.]),
+    AC_HELP_STRING([--disable-qtgui],
   [Disable the QT-based graphical user interface.]),
        enableQT=$enableval, enableQT="yes")
 AM_CONDITIONAL(MAKEQT, [test X$enableQT = Xyes])
 AC_ARG_ENABLE(recollq, 
-    AS_HELP_STRING([--enable-recollq],[Enable building the recollq command line query tool (recoll -t without
+    AC_HELP_STRING([--enable-recollq],
   [Enable building the recollq command line query tool (recoll -t without
   need for Qt). This is done by default if --disable-qtgui is set but this
   option enables forcing it.]),
        enableRECOLLQ=$enableval, enableRECOLLQ="no")
@ -347,11 +398,28 @@ if test X$enableQT = Xyes ; then
  qt development files and tools and/or set the QTDIR environment variable?])
  fi
  QMAKE=$QMAKEPATH
-  QTGUI=qtgui
+  
  # Check Qt version
  qmakevers="`${QMAKE} --version 2>&1`"
  #echo "qmake version: $qmakevers"
  v4=`expr "$qmakevers" : '.*Qt[ ][ ]*version[ ][ ]*4.*'`
  v5=`expr "$qmakevers" : '.*Qt[ ][ ]*version[ ][ ]*5.*'`
  if test X$v4 = X0 -a X$v5 = X0; then 
     AC_MSG_ERROR([Bad qt/qmake version string (not 4 or 5?): $qmakevers])
  else
    if test X$v4 != X0 ; then
       AC_MSG_ERROR([Qt version (from qmake found with QMAKE/QTDIR/PATH) is 4 but Recoll now needs version 5])
    else
       AC_MSG_NOTICE([using qt version 5 user interface])
    fi
    QTGUI=qtgui
  fi
 ##### Using Qt webkit for reslist display? Else Qt textbrowser
  AC_ARG_ENABLE(webkit,
-    AS_HELP_STRING([--disable-webkit],[Disable use of qt-webkit (only meaningful if qtgui is enabled).]),
+    AC_HELP_STRING([--disable-webkit],
      [Disable use of qt-webkit (only meaningful if qtgui is enabled).]),
        enableWebkit=$enableval, enableWebkit="yes")
  if test "$enableWebkit" = "yes" ; then
@ -363,7 +431,8 @@ if test X$enableQT = Xyes ; then
  fi
  AC_ARG_ENABLE(webengine,
-    AS_HELP_STRING([--enable-webengine],[Enable use of qt-webengine (only meaningful if qtgui is enabled), in
+    AC_HELP_STRING([--enable-webengine],
      [Enable use of qt-webengine (only meaningful if qtgui is enabled), in
      place or qt-webkit.]),
        enableWebengine=$enableval, enableWebengine="no")
@ -379,7 +448,8 @@ if test X$enableQT = Xyes ; then
 ##### Using QZeitGeist lib ? Default no for now
  AC_ARG_WITH(qzeitgeist,
-    AS_HELP_STRING([--with-qzeitgeist],[Enable the use of the qzeitgeist library to send zeitgeist events.]),
+    AC_HELP_STRING([--with-qzeitgeist],
      [Enable the use of the qzeitgeist library to send zeitgeist events.]),
        withQZeitgeist=$withval, withQZeitgeist="no")
  case "$withQZeitgeist" in 
@ -396,73 +466,22 @@ if test X$enableQT = Xyes ; then
   QMAKE_DISABLE_ZEITGEIST=""
  fi
 # Retain debugging symbols in GUI recoll ? This makes it enormous (~50MB)
  AC_ARG_ENABLE(guidebug,
    AS_HELP_STRING([--enable-guidebug],[Generate and retain debug symbols in GUI program (makes the file very big).]),
        enableGuiDebug=$enableval, enableGuiDebug="no")
  if test "$enableGuiDebug" = "yes" ; then
   QMAKE_ENABLE_GUIDEBUG=""
  else
   QMAKE_ENABLE_GUIDEBUG="#"
  fi
  AC_CONFIG_FILES($QTGUI/recoll.pro)
  ##################### End QT stuff
 fi
 dnl Borrow a macro definition from pkg.config,
 dnl for older installs that lack it.
 m4_ifndef([PKG_CHECK_VAR], [
 dnl PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE,
 dnl [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
 dnl -------------------------------------------
 dnl Retrieves the value of the pkg-config variable for the given module.
 AC_DEFUN([PKG_CHECK_VAR],
 [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
 AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl
 _PKG_CONFIG([$1], [variable="][$3]["], [$2])
 AS_VAR_COPY([$1], [pkg_cv_][$1])
 AS_VAR_IF([$1], [""], [$5], [$4])dnl
 ])dnl PKG_CHECK_VAR
 ])
 ### Systemd
 AC_ARG_WITH([systemd],
    AS_HELP_STRING([--without-systemd],[Disable installation of the systemd unit files.]))
 AC_ARG_WITH([system-unit-dir],
    AS_HELP_STRING([--with-system-unit-dir=DIR],[Install location for systemd system unit files]),
    [SYSTEMD_SYSTEM_UNIT_DIR="$withval"],
    [PKG_CHECK_VAR([SYSTEMD_SYSTEM_UNIT_DIR], [systemd], [systemdsystemunitdir])])
 AC_ARG_WITH([user-unit-dir],
    AS_HELP_STRING([--with-user-unit-dir=DIR],[Install location for systemd user unit files]),
    [SYSTEMD_USER_UNIT_DIR="$withval"],
    [PKG_CHECK_VAR([SYSTEMD_USER_UNIT_DIR], [systemd], [systemduserunitdir])])
 if test X$enableINDEXER = Xno -o "x$SYSTEMD_SYSTEM_UNIT_DIR" = "x" -o \
        "x$SYSTEMD_USER_UNIT_DIR" = "x"; then
    with_systemd="no"
 fi
 AM_CONDITIONAL([INSTALL_SYSTEMD_UNITS], [test "X$with_systemd" != "Xno"])
 ### X11: this is needed for the session monitoring code (in recollindex -m)
 AC_ARG_ENABLE(x11mon, 
-    AS_HELP_STRING([--disable-x11mon],[Disable recollindex support for X11 session monitoring.]),
+    AC_HELP_STRING([--disable-x11mon],
   [Disable recollindex support for X11 session monitoring.]),
        enableX11mon=$enableval, enableX11mon="yes")
-if test X$enableINDEXER = Xno ; then 
+if test X$withInotify = Xno -a X$withFam = Xno ; then
  enableX11mon=no
 else
  if test X$withInotify = Xno -a X$withFam = Xno ; then
    enableX11mon=no
  fi
 fi
-if test "$enableX11mon" = yes ; then
+if test "$enableX11mon" = "yes" ; then
  AC_PATH_XTRA
  X_LIBX11=-lX11
 else
@ -472,17 +491,6 @@ fi
 #echo X_CFLAGS "'$X_CFLAGS'" X_PRE_LIBS "'$X_PRE_LIBS'" X_LIBS \
 #      "'$X_LIBS'" X_LIBX11 "'$X_LIBX11'" X_EXTRA_LIBS "'$X_EXTRA_LIBS'"
 # Check if anything needs Xapian. We also need to build the shared lib if this is the case.
 xapian_needed=yes
 if test X$buildtestmains = Xno -a X$pythonEnabled = Xno -a X$enableINDEXER = Xno \
   -a X$enableXADUMP = Xno -a X$enableQT = Xno -a X$enableRECOLLQ = Xno ; then
   xapian_needed=no
 fi
 if test X$xapian_needed = Xyes; then
   PKG_CHECK_MODULES([XAPIAN], xapian-core, [], AC_MSG_ERROR([libxapian]))
 fi
 AM_CONDITIONAL([MAKE_RECOLL_LIB], [test X$xapian_needed = Xyes])
 # For communicating the value of RECOLL_DATADIR to non-make-based
 # subpackages like python-recoll, we have to expand prefix in here, because
 # things like "datadir = ${prefix}/share" (which is what we'd get by
@ -506,17 +514,17 @@ AC_SUBST(X_LIBX11)
 AC_SUBST(X_EXTRA_LIBS)
 AC_SUBST(INCICONV)
 AC_SUBST(LIBICONV)
-AC_SUBST(XAPIAN_LIBS)
+AC_SUBST(LIBXAPIAN)
-AC_SUBST(XAPIAN_CFLAGS)
+AC_SUBST(LIBXAPIANDIR)
 AC_SUBST(LIBXAPIANSTATICEXTRA)
 AC_SUBST(LIBFAM)
 AC_SUBST(QMAKE)
 AC_SUBST(QTGUI)
 AC_SUBST(XAPIANCXXFLAGS)
 AC_SUBST(QMAKE_ENABLE_WEBKIT)
 AC_SUBST(QMAKE_DISABLE_WEBKIT)
 AC_SUBST(QMAKE_ENABLE_WEBENGINE)
 AC_SUBST(QMAKE_DISABLE_WEBENGINE)
 AC_SUBST(QMAKE_ENABLE_GUIDEBUG)
 AC_SUBST(QMAKE_DISABLE_GUIDEBUG)
 AC_SUBST(QMAKE_ENABLE_ZEITGEIST)
 AC_SUBST(QMAKE_DISABLE_ZEITGEIST)
 AC_SUBST(LIBQZEITGEIST)
@ -524,8 +532,6 @@ AC_SUBST(RCLVERSION)
 AC_SUBST(RCLLIBVERSION)
 AC_SUBST(XSLT_CFLAGS)
 AC_SUBST(XSLT_LIBS)
 AC_SUBST([SYSTEMD_SYSTEM_UNIT_DIR])
 AC_SUBST([SYSTEMD_USER_UNIT_DIR])
 AC_CONFIG_FILES([Makefile python/recoll/setup.py
 python/pychm/setup.py])
@ -533,8 +539,5 @@ AC_CONFIG_FILES([Makefile python/recoll/setup.py
 if test X$buildtestmains = Xyes ; then
   AC_CONFIG_FILES([testmains/Makefile])
 fi
 if test X$buildrclgrep = Xyes ; then
   AC_CONFIG_FILES([rclgrep/Makefile])
 fi
 AC_OUTPUT
--- a/src/doc/man/rclgrep.1
+++ b/src/doc/man/rclgrep.1
@ -1,12 +0,0 @@
 .TH RCLGREP 1 "20 September 2022"
 .SH NAME
 rclgrep \- grep-like program based on the recoll data extraction functions
 .SH SYNOPSIS
 .B rclgrep
 [
 .B \--config
 <configdir>
 ]
 .SH DESCRIPTION
 Some bla bla
--- a/src/doc/man/recoll.conf.5
+++ b/src/doc/man/recoll.conf.5
@ -148,7 +148,7 @@ not set, the daemon uses skippedPaths.
 .TP
 .BI "zipUseSkippedNames = "bool
 Use skippedNames inside Zip archives. Fetched
-directly by the rclzip.py handler. Skip the patterns defined by skippedNames
+directly by the rclzip handler. Skip the patterns defined by skippedNames
 inside Zip archives. Can be redefined for subdirectories.
 See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html
@ -195,7 +195,7 @@ lets you turn off md5 computation for selected types. It is global (no
 redefinition for subtrees). At the moment, it only has an effect for
 external handlers (exec and execm). The file types can be specified by
 listing either MIME types (e.g. audio/mpeg) or handler names
-(e.g. rclaudio.py).
+(e.g. rclaudio).
 .TP
 .BI "compressedfilemaxkbs = "int
 Size limit for compressed
@ -613,7 +613,8 @@ location before copy, to allow path translation computations.  For
 example if a dataset originally indexed as '/home/me/mydata/config' has
 been mounted to '/media/me/mydata', and the GUI is running from a copied
 configuration, orgidxconfdir would be '/home/me/mydata/config', and
-curidxconfdir (as set in the copied configuration) would be '/media/me/mydata/config'.
+curidxconfdir (as set in the copied configuration) would be
 '/media/me/mydata/config'.
 .TP
 .BI "idxrundir = "dfn
 Indexing process current directory. The input
--- a/src/doc/man/recollq.1
+++ b/src/doc/man/recollq.1
@ -59,10 +59,6 @@ recollq \- command line / standard output Recoll query command.
 .B \-F
 <quoted space separated field name list>
 ]
 [
 .B \--extract-to
 <file path>
 ]
 <query string>
 .B recollq \-P
@ -124,10 +120,9 @@ sorts the results according to the specified field. Use
 for descending order.
 .PP
 .B \-n
-<[first-]cnt>
+<cnt>
 can be used to set the maximum number of results that should be
-printed. The default is 2000. Use a value of 0 for no limit. If the argument is of the form
+printed. The default is 2000. Use a value of 0 for no limit.
 first-cnt, it also defines the first result to output (from 0).
 .PP
 .B \-s
 <language>
@ -149,11 +144,6 @@ base64 and separated by one space character. Empty fields are indicated by
 consecutive space characters. There is one additional space character at
 the end of each line.
 .PP
 .B \--extract-to
 <file path>
 Will extract the first result document of the query to the argument path, which must not exist. Use
 -n first-cnt to select the document.
 .PP
 .B recollq \-P
 (Period) will print the minimum and maximum modification years for
 documents in the index.
--- a/src/doc/user/Makefile
+++ b/src/doc/user/Makefile
@ -13,7 +13,6 @@
 #XSLDIR="/opt/local/share/xsl/docbook-xsl/"
 #Linux
 XSLDIR="/usr/share/xml/docbook/stylesheet/docbook-xsl/"
 UTILBUILDS=/home/dockes/tmp/builds/medocutils/
 # Options common to the single-file and chunked versions
@ -49,10 +48,11 @@ index.html: usermanual.xml recoll.conf.xml
 usermanual.pdf: usermanual.xml recoll.conf.xml
 	dblatex --xslt-opts="--xinclude" -tpdf $<
-recoll.conf.xml: ../../sampleconf/recoll.conf
+UTILBUILDS=/home/dockes/tmp/builds/medocutils/
-	test -x $(UTILBUILDS)/confxml && $(UTILBUILDS)/confxml --docbook \
+recoll-conf-xml:
 	$(UTILBUILDS)/confxml --docbook \
        --idprefix=RCL.INSTALL.CONFIG.RECOLLCONF  \
-        ../../sampleconf/recoll.conf  > recoll.conf.xml || touch recoll.conf.xml
+        ../../sampleconf/recoll.conf  > recoll.conf.xml
 # Generating a restructured text version, for uploading to readthedocs.
 # Does not really work, the internal links are botched. pandoc
@ -65,7 +65,7 @@ recoll.conf.xml: ../../sampleconf/recoll.conf
 # script. 
 # Also could not get readthedocs to generate the left pane TOC? could
 # probably be fixed...
-#usermanual-rst: recoll.conf.xml
+#usermanual-rst: recoll-conf-xml
 #	tail -n +2 recoll.conf.xml > rcl-conf-tail.xml
 #	sed -e '/xi:include/r rcl-conf-tail.xml' \
 #		< usermanual.xml > full-man.xml
--- a/src/doc/user/recoll.conf.xml
+++ b/src/doc/user/recoll.conf.xml
@ -8,34 +8,28 @@
 <listitem><para>Space-separated list of files or
 directories to recursively index. Default to ~ (indexes
 $HOME). You can use symbolic links in the list, they will be followed,
-independently of the value of the followLinks variable.
+independently of the value of the followLinks variable.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONITORDIRS">
 <term><varname>monitordirs</varname></term>
 <listitem><para>Space-separated list of files or directories to monitor for
 updates. When running the real-time indexer, this allows monitoring only a
 subset of the whole indexed area. The elements must be included in the
-tree defined by the 'topdirs' members.
+tree defined by the 'topdirs' members.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SKIPPEDNAMES">
 <term><varname>skippedNames</varname></term>
-<listitem><para>Files and directories which should be ignored.  White space separated list of wildcard patterns (simple ones, not paths, must contain no
+<listitem><para>Files and directories which should be ignored. 
-'/' characters), which will be tested against file and directory names.
+White space separated list of wildcard patterns (simple ones, not paths,
-</para><para>
+must contain no / ), which will be tested against file and directory
-Have a look at the default configuration for the initial value, some entries may not suit your
+names.  The list in the default configuration does not exclude hidden
-situation. The easiest way to see it is through the GUI Index configuration "local parameters"
+directories (names beginning with a dot), which means that it may index
-panel.
+quite a few things that you do not want. On the other hand, email user
-</para><para>
+agents like Thunderbird usually store messages in hidden directories, and
-The list in the default configuration does not exclude hidden directories (names beginning with a
+you probably want this indexed. One possible solution is to have ".*" in
-dot), which means that it may index quite a few things that you do not want. On the other hand,
+"skippedNames", and add things like "~/.thunderbird" "~/.evolution" to
-email user agents like Thunderbird usually store messages in hidden directories, and you probably
+"topdirs".  Not even the file names are indexed for patterns in this
-want this indexed. One possible solution is to have ".*" in "skippedNames", and add things like
+list, see the "noContentSuffixes" variable for an alternative approach
-"~/.thunderbird" "~/.evolution" to "topdirs".
+which indexes the file names. Can be redefined for any
-</para><para>
+subtree.</para></listitem></varlistentry>
 Not even the file names are indexed for patterns in this list, see the "noContentSuffixes"
 variable for an alternative approach which indexes the file names. Can be redefined for any
 subtree.
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SKIPPEDNAMES-">
 <term><varname>skippedNames-</varname></term>
 <listitem><para>List of name endings to remove from the default skippedNames
@ -48,8 +42,7 @@ list. </para></listitem></varlistentry>
 <term><varname>onlyNames</varname></term>
 <listitem><para>Regular file name filter patterns If this is set, only the file names not in skippedNames and
 matching one of the patterns will be considered for indexing. Can be
-redefined per subtree. Does not apply to directories.
+redefined per subtree. Does not apply to directories.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOCONTENTSUFFIXES">
 <term><varname>noContentSuffixes</varname></term>
 <listitem><para>List of name endings (not necessarily dot-separated suffixes) for
@ -60,8 +53,7 @@ which will go away in a future release (the move from mimemap to
 recoll.conf allows editing the list through the GUI). This is different
 from skippedNames because these are name ending matches only (not
 wildcard patterns), and the file name itself gets indexed normally. This
-can be redefined for subdirectories.
+can be redefined for subdirectories.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOCONTENTSUFFIXES-">
 <term><varname>noContentSuffixes-</varname></term>
 <listitem><para>List of name endings to remove from the default noContentSuffixes
@ -72,26 +64,19 @@ list. </para></listitem></varlistentry>
 list. </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SKIPPEDPATHS">
 <term><varname>skippedPaths</varname></term>
-<listitem><para>Absolute paths we should not go into. Space-separated list of wildcard expressions for absolute filesystem paths (for files or
+<listitem><para>Absolute paths we should not go into. Space-separated list of wildcard expressions for absolute
-directories). The variable must be defined at the top level of the configuration file, not in a
+filesystem paths. Must be defined at the top level of the configuration
-subsection.
+file, not in a subsection. Can contain files and directories. The database and
-</para><para>
+configuration directories will automatically be added. The expressions
-Any value in the list must be textually consistent with the values in topdirs, no attempts are
+are matched using 'fnmatch(3)' with the FNM_PATHNAME flag set by
-made to resolve symbolic links. In practise, if, as is frequently the case, /home is a link to
+default. This means that '/' characters must be matched explicitly. You
-/usr/home, your default topdirs will have a single entry '~' which will be translated to
+can set 'skippedPathsFnmPathname' to 0 to disable the use of FNM_PATHNAME
-'/home/yourlogin'. In this case, any skippedPaths entry should start with '/home/yourlogin' *not*
+(meaning that '/*/dir3' will match '/dir1/dir2/dir3'). The default value
-with '/usr/home/yourlogin'.
+contains the usual mount point for removable media to remind you that it
-</para><para>
+is a bad idea to have Recoll work on these (esp. with the monitor: media
-The index and configuration directories will automatically be added to the list.
+gets indexed on mount, all data gets erased on unmount). Explicitly
-</para><para>
+adding '/media/xxx' to the 'topdirs' variable will override
-The expressions are matched using 'fnmatch(3)' with the FNM_PATHNAME flag set by default. This
+this.</para></listitem></varlistentry>
 means that '/' characters must be matched explicitly. You can set 'skippedPathsFnmPathname' to 0
 to disable the use of FNM_PATHNAME (meaning that '/*/dir3' will match '/dir1/dir2/dir3').
 </para><para>
 The default value contains the usual mount point for removable media to remind you that it is in
 most cases a bad idea to have Recoll work on these Explicitly adding '/media/xxx' to the 'topdirs'
 variable will override this.
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SKIPPEDPATHSFNMPATHNAME">
 <term><varname>skippedPathsFnmPathname</varname></term>
 <listitem><para>Set to 0 to
@ -100,19 +85,17 @@ paths. </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOWALKFN">
 <term><varname>nowalkfn</varname></term>
 <listitem><para>File name which will cause its parent directory to be skipped. Any directory containing a file with this name will be skipped as
-if it was part of the skippedPaths list. Ex: .recoll-noindex
+if it was part of the skippedPaths list. Ex: .recoll-noindex</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DAEMSKIPPEDPATHS">
 <term><varname>daemSkippedPaths</varname></term>
 <listitem><para>skippedPaths equivalent specific to
 real time indexing. This enables having parts of the tree
 which are initially indexed but not monitored. If daemSkippedPaths is
-not set, the daemon uses skippedPaths.
+not set, the daemon uses skippedPaths.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ZIPUSESKIPPEDNAMES">
 <term><varname>zipUseSkippedNames</varname></term>
 <listitem><para>Use skippedNames inside Zip archives. Fetched
-directly by the rclzip.py handler. Skip the patterns defined by skippedNames
+directly by the rclzip handler. Skip the patterns defined by skippedNames
 inside Zip archives. Can be redefined for subdirectories.
 See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html
 </para></listitem></varlistentry>
@ -134,8 +117,7 @@ multiple indexing of linked files. No effort is made to avoid duplication
 when this option is set to true. This option can be set individually for
 each of the 'topdirs' members by using sections. It can not be changed
 below the 'topdirs' level. Links in the 'topdirs' list itself are always
-followed.
+followed.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXEDMIMETYPES">
 <term><varname>indexedmimetypes</varname></term>
 <listitem><para>Restrictive list of
@ -144,16 +126,14 @@ supported types are indexed). If it is set, only the types from the list
 will have their contents indexed. The names will be indexed anyway if
 indexallfilenames is set (default). MIME type names should be taken from
 the mimemap file (the values may be different from xdg-mime or file -i
-output in some cases). Can be redefined for subtrees.
+output in some cases). Can be redefined for subtrees.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.EXCLUDEDMIMETYPES">
 <term><varname>excludedmimetypes</varname></term>
 <listitem><para>List of excluded MIME
 types. Lets you exclude some types from indexing. MIME type
 names should be taken from the mimemap file (the values may be different
 from xdg-mime or file -i output in some cases) Can be redefined for
-subtrees.
+subtrees.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOMD5TYPES">
 <term><varname>nomd5types</varname></term>
 <listitem><para>Don't compute md5 for these types. md5 checksums are used only for deduplicating results, and can be
@ -162,43 +142,32 @@ lets you turn off md5 computation for selected types. It is global (no
 redefinition for subtrees). At the moment, it only has an effect for
 external handlers (exec and execm). The file types can be specified by
 listing either MIME types (e.g. audio/mpeg) or handler names
-(e.g. rclaudio.py).
+(e.g. rclaudio).</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.COMPRESSEDFILEMAXKBS">
 <term><varname>compressedfilemaxkbs</varname></term>
 <listitem><para>Size limit for compressed
 files. We need to decompress these in a
 temporary directory for identification, which can be wasteful in some
 cases. Limit the waste. Negative means no limit. 0 results in no
-processing of any compressed file. Default 100 MB.
+processing of any compressed file. Default 50 MB.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TEXTFILEMAXMBS">
 <term><varname>textfilemaxmbs</varname></term>
-<listitem><para>Size limit for text files. Mostly for skipping monster logs. Default 20 MB. Use a value of -1 to
+<listitem><para>Size limit for text
-disable.
+files. Mostly for skipping monster
-</para></listitem></varlistentry>
+logs. Default 20 MB.</para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TEXTUNKNOWNASPLAIN">
 <term><varname>textunknownasplain</varname></term>
 <listitem><para>Process unknown text/xxx files as text/plain Allows indexing misc. text files identified as text/whatever by 'file' or 'xdg-mime'
 without having to explicitely set config entries for them. This works fine for indexing (but will
 cause processing of a lot of garbage though), but the documents indexed this way will be opened by
 the desktop viewer, even if text/plain has a specific editor.
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXALLFILENAMES">
 <term><varname>indexallfilenames</varname></term>
 <listitem><para>Index the file names of
 unprocessed files Index the names of files the contents of
 which we don't index because of an excluded or unsupported MIME
-type.
+type.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.USESYSTEMFILECOMMAND">
 <term><varname>usesystemfilecommand</varname></term>
 <listitem><para>Use a system command
 for file MIME type guessing as a final step in file type
 identification This is generally useful, but will usually
 cause the indexing of many bogus 'text' files. See 'systemfilecommand'
-for the command used.
+for the command used.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SYSTEMFILECOMMAND">
 <term><varname>systemfilecommand</varname></term>
 <listitem><para>Command used to guess
@ -206,14 +175,12 @@ MIME types if the internal methods fails This should be a
 "file -i" workalike.  The file path will be added as a last parameter to
 the command line. "xdg-mime" works better than the traditional "file"
 command, and is now the configured default (with a hard-coded fallback to
-"file")
+"file")</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PROCESSWEBQUEUE">
 <term><varname>processwebqueue</varname></term>
 <listitem><para>Decide if we process the
 Web queue. The queue is a directory where the Recoll Web
-browser plugins create the copies of visited pages.
+browser plugins create the copies of visited pages.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TEXTFILEPAGEKBS">
 <term><varname>textfilepagekbs</varname></term>
 <listitem><para>Page size for text
@ -222,14 +189,12 @@ into documents of approximately this size. Will reduce memory usage at
 index time and help with loading data in the preview window at query
 time. Particularly useful with very big files, such as application or
 system logs. Also see textfilemaxmbs and
-compressedfilemaxkbs.
+compressedfilemaxkbs.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MEMBERMAXKBS">
 <term><varname>membermaxkbs</varname></term>
 <listitem><para>Size limit for archive
 members. This is passed to the filters in the environment
-as RECOLL_FILTER_MAXMEMBERKB.
+as RECOLL_FILTER_MAXMEMBERKB.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 </variablelist></sect3>
 <sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.TERMS">
 <title>Parameters affecting how we generate terms and organize the index </title><variablelist>
@ -241,34 +206,28 @@ searches sensitive to case and diacritics can be performed, but the index
 will be bigger, and some marginal weirdness may sometimes occur. The
 default is a stripped index. When using multiple indexes for a search,
 this parameter must be defined identically for all. Changing the value
-implies an index reset.
+implies an index reset.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXSTOREDOCTEXT">
 <term><varname>indexStoreDocText</varname></term>
 <listitem><para>Decide if we store the
 documents' text content in the index. Storing the text
 allows extracting snippets from it at query time, instead of building
 them from index position data.
 </para><para>
 Newer Xapian index formats have rendered our use of positions list
 unacceptably slow in some cases. The last Xapian index format with good
 performance for the old method is Chert, which is default for 1.2, still
 supported but not default in 1.4 and will be dropped in 1.6.
 </para><para>
 The stored document text is translated from its original format to UTF-8
 plain text, but not stripped of upper-case, diacritics, or punctuation
 signs. Storing it increases the index size by 10-20% typically, but also
 allows for nicer snippets, so it may be worth enabling it even if not
 strictly needed for performance if you can afford the space.
 </para><para>
 The variable only has an effect when creating an index, meaning that the
 xapiandb directory must not exist yet. Its exact effect depends on the
 Xapian version.
 </para><para>
 For Xapian 1.4, if the variable is set to 0, the Chert format will be
 used, and the text will not be stored. If the variable is 1, Glass will
 be used, and the text stored.
 </para><para>
 For Xapian 1.2, and for versions after 1.5 and newer, the index format is
 always the default, but the variable controls if the text is stored or
 not, and the abstract generation method. With Xapian 1.5 and later, and
@ -285,31 +244,26 @@ still be). Numbers are often quite interesting to search for, and this
 should probably not be set except for special situations, ie, scientific
 documents with huge amounts of numbers in them, where setting nonumbers
 will reduce the index size. This can only be set for a whole index, not
-for a subtree.
+for a subtree.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DEHYPHENATE">
 <term><varname>dehyphenate</varname></term>
 <listitem><para>Determines if we index 'coworker'
 also when the input is 'co-worker'. This is new
 in version 1.22, and on by default. Setting the variable to off allows
-restoring the previous behaviour.
+restoring the previous behaviour.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.BACKSLASHASLETTER">
 <term><varname>backslashasletter</varname></term>
 <listitem><para>Process backslash as normal letter. This may make sense for people wanting to index TeX commands as
-such but is not of much general use.
+such but is not of much general use.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.UNDERSCOREASLETTER">
 <term><varname>underscoreasletter</varname></term>
 <listitem><para>Process underscore as normal letter. This makes sense in so many cases that one wonders if it should
-not be the default.
+not be the default.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MAXTERMLENGTH">
 <term><varname>maxtermlength</varname></term>
 <listitem><para>Maximum term length. Words longer than this will be discarded.
 The default is 40 and used to be hard-coded, but it can now be
-adjusted. You need an index reset if you change the value.
+adjusted. You need an index reset if you change the value.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOCJK">
 <term><varname>nocjk</varname></term>
 <listitem><para>Decides if specific East Asian
@ -317,23 +271,20 @@ adjusted. You need an index reset if you change the value.
 off. This will save a small amount of CPU if you have no CJK
 documents. If your document base does include such text but you are not
 interested in searching it, setting nocjk may be a
-significant time and space saver.
+significant time and space saver.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.CJKNGRAMLEN">
 <term><varname>cjkngramlen</varname></term>
 <listitem><para>This lets you adjust the size of
 n-grams used for indexing CJK text. The default value of 2 is
 probably appropriate in most cases. A value of 3 would allow more precision
 and efficiency on longer words, but the index will be approximately twice
-as large.
+as large.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXSTEMMINGLANGUAGES">
 <term><varname>indexstemminglanguages</varname></term>
 <listitem><para>Languages for which to create stemming expansion
 data. Stemmer names can be found by executing 'recollindex
 -l', or this can also be set from a list in the GUI. The values are full
-language names, e.g. english, french...
+language names, e.g. english, french...</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DEFAULTCHARSET">
 <term><varname>defaultcharset</varname></term>
 <listitem><para>Default character
@ -344,39 +295,37 @@ set, the default character set is the one defined by the NLS environment
 ($LC_ALL, $LC_CTYPE, $LANG), or ultimately iso-8859-1 (cp-1252 in fact).
 If for some reason you want a general default which does not match your
 LANG and is not 8859-1, use this variable. This can be redefined for any
-sub-directory.
+sub-directory.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.UNAC_EXCEPT_TRANS">
 <term><varname>unac_except_trans</varname></term>
-<listitem><para>A list of characters, encoded in UTF-8, which should be handled specially when converting
+<listitem><para>A list of characters,
-text to unaccented lowercase. For example, in Swedish, the letter a with diaeresis has full alphabet citizenship and
+encoded in UTF-8, which should be handled specially
-should not be turned into an a.  Each element in the space-separated list has the special
+when converting text to unaccented lowercase. For
-character as first element and the translation following. The handling of both the lowercase and
+example, in Swedish, the letter a with diaeresis has full alphabet
-upper-case versions of a character should be specified, as appartenance to the list will turn-off
+citizenship and should not be turned into an a.
-both standard accent and case processing. The value is global and affects both indexing and
+Each element in the space-separated list has the special character as
-querying.  We also convert a few confusing Unicode characters (quotes, hyphen) to their ASCII
+first element and the translation following. The handling of both the
-equivalent to avoid "invisible" search failures.
+lowercase and upper-case versions of a character should be specified, as
-</para><para>
+appartenance to the list will turn-off both standard accent and case
 processing. The value is global and affects both indexing and querying.
 Examples:
 Swedish:
-unac_except_trans = ää Ää öö Öö üü Üü ßss œoe Œoe æae Æae ﬀff ﬁfi ﬂfl åå Åå ’' ❜' ʼ' ‐-
+unac_except_trans = ää Ää öö Öö üü Üü ßss œoe Œoe æae Æae ﬀff ﬁfi ﬂfl åå Åå
 . German:
-unac_except_trans = ää Ää öö Öö üü Üü ßss œoe Œoe æae Æae ﬀff ﬁfi ﬂfl ’' ❜' ʼ' ‐-
+unac_except_trans = ää Ää öö Öö üü Üü ßss œoe Œoe æae Æae ﬀff ﬁfi ﬂfl
-. French: you probably want to decompose oe and ae and nobody would type
+In French, you probably want to decompose oe and ae and nobody would type
 a German ß
-unac_except_trans = ßss œoe Œoe æae Æae ﬀff ﬁfi ﬂfl ’' ❜' ʼ' ‐-
+unac_except_trans = ßss œoe Œoe æae Æae ﬀff ﬁfi ﬂfl
 . The default for all until someone protests follows. These decompositions
 are not performed by unac, but it is unlikely that someone would type the
 composed forms in a search.
-unac_except_trans = ßss œoe Œoe æae Æae ﬀff ﬁfi ﬂfl ’' ❜' ʼ' ‐-
+unac_except_trans = ßss œoe Œoe æae Æae ﬀff ﬁfi ﬂfl</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MAILDEFCHARSET">
 <term><varname>maildefcharset</varname></term>
 <listitem><para>Overrides the default
 character set for email messages which don't specify
 one. This is mainly useful for readpst (libpst) dumps,
-which are utf-8 but do not say so.
+which are utf-8 but do not say so.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.LOCALFIELDS">
 <term><varname>localfields</varname></term>
 <listitem><para>Set fields on all files
@ -384,8 +333,7 @@ which are utf-8 but do not say so.
 name = value ; attr1 = val1 ; [...]
 value is empty so this needs an initial semi-colon. This is useful, e.g.,
 for setting the rclaptg field for application selection inside
-mimeview.
+mimeview.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TESTMODIFUSEMTIME">
 <term><varname>testmodifusemtime</varname></term>
 <listitem><para>Use mtime instead of
@ -407,12 +355,12 @@ undetected). Perform a full index reset after changing this.
 <term><varname>noxattrfields</varname></term>
 <listitem><para>Disable extended attributes
 conversion to metadata fields. This probably needs to be
-set if testmodifusemtime is set.
+set if testmodifusemtime is set.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.METADATACMDS">
 <term><varname>metadatacmds</varname></term>
 <listitem><para>Define commands to
-gather external metadata, e.g. tmsu tags. There can be several entries, separated by semi-colons, each defining
+gather external metadata, e.g. tmsu tags. 
 There can be several entries, separated by semi-colons, each defining
 which field name the data goes into and the command to use. Don't forget the
 initial semi-colon. All the field names must be different. You can use
 aliases in the "field" file if necessary.
@ -437,15 +385,13 @@ cachedir is ~/.cache/recoll, the default dbdir would be
 mboxcachedir, aspellDicDir, which can still be individually specified to
 override cachedir.  Note that if you have multiple configurations, each
 must have a different cachedir, there is no automatic computation of a
-subpath under cachedir.
+subpath under cachedir.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MAXFSOCCUPPC">
 <term><varname>maxfsoccuppc</varname></term>
 <listitem><para>Maximum file system occupation
 over which we stop indexing. The value is a percentage,
 corresponding to what the "Capacity" df output column shows. The default
-value is 0, meaning no checking.
+value is 0, meaning no checking.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DBDIR">
 <term><varname>dbdir</varname></term>
 <listitem><para>Xapian database directory
@ -453,43 +399,36 @@ location. This will be created on first indexing. If the
 value is not an absolute path, it will be interpreted as relative to
 cachedir if set, or the configuration directory (-c argument or
 $RECOLL_CONFDIR).  If nothing is specified, the default is then
-~/.recoll/xapiandb/
+~/.recoll/xapiandb/</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXSTATUSFILE">
 <term><varname>idxstatusfile</varname></term>
 <listitem><para>Name of the scratch file where the indexer process updates its
 status. Default: idxstatus.txt inside the configuration
-directory.
+directory.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MBOXCACHEDIR">
 <term><varname>mboxcachedir</varname></term>
 <listitem><para>Directory location for storing mbox message offsets cache
 files. This is normally 'mboxcache' under cachedir if set,
 or else under the configuration directory, but it may be useful to share
-a directory between different configurations.
+a directory between different configurations.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MBOXCACHEMINMBS">
 <term><varname>mboxcacheminmbs</varname></term>
 <listitem><para>Minimum mbox file size over which we cache the offsets. There is really no sense in caching offsets for small files. The
-default is 5 MB.
+default is 5 MB.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MBOXMAXMSGMBS">
 <term><varname>mboxmaxmsgmbs</varname></term>
 <listitem><para>Maximum mbox member message size in megabytes. Size over which we assume that the mbox format is bad or we
-misinterpreted it, at which point we just stop processing the file.
+misinterpreted it, at which point we just stop processing the file.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBCACHEDIR">
 <term><varname>webcachedir</varname></term>
 <listitem><para>Directory where we store the archived web pages. This is only used by the web history indexing code
 Default: cachedir/webcache if cachedir is set, else
-$RECOLL_CONFDIR/webcache
+$RECOLL_CONFDIR/webcache</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBCACHEMAXMBS">
 <term><varname>webcachemaxmbs</varname></term>
 <listitem><para>Maximum size in MB of the Web archive. This is only used by the web history indexing code.
 Default: 40 MB.
-Reducing the size will not physically truncate the file.
+Reducing the size will not physically truncate the file.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBQUEUEDIR">
 <term><varname>webqueuedir</varname></term>
 <listitem><para>The path to the Web indexing queue. This used to be
@ -497,42 +436,29 @@ hard-coded in the old plugin as ~/.recollweb/ToIndex so there would be no
 need or possibility to change it, but the WebExtensions plugin now downloads
 the files to the user Downloads directory, and a script moves them to
 webqueuedir. The script reads this value from the config so it has become
-possible to change it.
+possible to change it.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBDOWNLOADSDIR">
 <term><varname>webdownloadsdir</varname></term>
 <listitem><para>The path to browser downloads directory. This is
 where the new browser add-on extension has to create the files. They are
-then moved by a script to webqueuedir.
+then moved by a script to webqueuedir.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBCACHEKEEPINTERVAL">
 <term><varname>webcachekeepinterval</varname></term>
 <listitem><para>Page recycle interval By default, only one instance of an URL is kept in the cache. This
 can be changed by setting this to a value determining at what frequency
 we keep multiple instances ('day', 'week', 'month',
 'year'). Note that increasing the interval will not erase existing
 entries.
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ASPELLDICDIR">
 <term><varname>aspellDicDir</varname></term>
 <listitem><para>Aspell dictionary storage directory location. The
 aspell dictionary (aspdict.(lang).rws) is normally stored in the
 directory specified by cachedir if set, or under the configuration
-directory.
+directory.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.FILTERSDIR">
 <term><varname>filtersdir</varname></term>
 <listitem><para>Directory location for executable input handlers. If
 RECOLL_FILTERSDIR is set in the environment, we use it instead. Defaults
 to $prefix/share/recoll/filters. Can be redefined for
-subdirectories.
+subdirectories.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ICONSDIR">
 <term><varname>iconsdir</varname></term>
 <listitem><para>Directory location for icons. The only reason to
 change this would be if you want to change the icons displayed in the
-result list. Defaults to $prefix/share/recoll/images
+result list. Defaults to $prefix/share/recoll/images</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 </variablelist></sect3>
 <sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.PERFS">
 <title>Parameters affecting indexing performance and resource usage </title><variablelist>
@ -550,24 +476,20 @@ value (from this file) is now 50 MB, and should be ok in many cases.
 You can set it as low as 10 to conserve memory, but if you are looking
 for maximum speed, you may want to experiment with values between 20 and
 200. In my experience, values beyond this are always counterproductive. If
-you find otherwise, please drop me a note.
+you find otherwise, please drop me a note.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.FILTERMAXSECONDS">
 <term><varname>filtermaxseconds</varname></term>
 <listitem><para>Maximum external filter execution time in
 seconds. Default 1200 (20mn). Set to 0 for no limit. This
 is mainly to avoid infinite loops in postscript files
-(loop.ps)
+(loop.ps)</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.FILTERMAXMBYTES">
 <term><varname>filtermaxmbytes</varname></term>
 <listitem><para>Maximum virtual memory space for filter processes
-(setrlimit(RLIMIT_AS)), in megabytes. Note that this includes any mapped libs (there is no reliable
+(setrlimit(RLIMIT_AS)), in megabytes. Note that this
-Linux way to limit the data space only), so we need to be a bit generous
+includes any mapped libs (there is no reliable Linux way to limit the
-here. Anything over 2000 will be ignored on 32 bits machines. The
+data space only), so we need to be a bit generous here. Anything over
-previous default value of 2000 would prevent java pdftk to work when
+2000 will be ignored on 32 bits machines.</para></listitem></varlistentry>
 executed from Python rclpdf.py.
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.THRQSIZES">
 <term><varname>thrQSizes</varname></term>
 <listitem><para>Stage input queues configuration. There are three
@ -579,8 +501,7 @@ next stage. In practise, deep queues have not been shown to increase
 performance. Default: a value of 0 for the first queue tells Recoll to
 perform autoconfiguration based on the detected number of CPUs (no need
 for the two other values in this case).  Use thrQSizes = -1 -1 -1 to
-disable multithreading entirely.
+disable multithreading entirely.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.THRTCOUNTS">
 <term><varname>thrTCounts</varname></term>
 <listitem><para>Number of threads used for each indexing stage. The
@ -590,8 +511,7 @@ in thrQSizes: if the first queue depth is 0, all counts are ignored
 (autoconfigured); if a value of -1 is used for a queue depth, the
 corresponding thread count is ignored. It makes no sense to use a value
 other than 1 for the last stage because updating the Xapian index is
-necessarily single-threaded (and protected by a mutex).
+necessarily single-threaded (and protected by a mutex).</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 </variablelist></sect3>
 <sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.MISC">
 <title>Miscellaneous parameters </title><variablelist>
@ -599,8 +519,7 @@ necessarily single-threaded (and protected by a mutex).
 <term><varname>loglevel</varname></term>
 <listitem><para>Log file verbosity 1-6. A value of 2 will print
 only errors and warnings. 3 will print information like document updates,
-4 is quite verbose and 6 very verbose.
+4 is quite verbose and 6 very verbose.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.LOGFILENAME">
 <term><varname>logfilename</varname></term>
 <listitem><para>Log file destination. Use 'stderr' (default) to write to the
@ -611,25 +530,16 @@ console. </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXLOGFILENAME">
 <term><varname>idxlogfilename</varname></term>
 <listitem><para>Override logfilename for the indexer. </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.HELPERLOGFILENAME">
 <term><varname>helperlogfilename</varname></term>
 <listitem><para>Destination file for external helpers standard error output. The external program error output is left alone by default,
 e.g. going to the terminal when the recoll[index] program is executed
 from the command line. Use /dev/null or a file inside a non-existent
 directory to completely suppress the output.
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DAEMLOGLEVEL">
 <term><varname>daemloglevel</varname></term>
 <listitem><para>Override loglevel for the indexer in real time
 mode. The default is to use the idx... values if set, else
-the log... values.
+the log... values.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DAEMLOGFILENAME">
 <term><varname>daemlogfilename</varname></term>
 <listitem><para>Override logfilename for the indexer in real time
 mode. The default is to use the idx... values if set, else
-the log... values.
+the log... values.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PYLOGLEVEL">
 <term><varname>pyloglevel</varname></term>
 <listitem><para>Override loglevel for the python module. </para></listitem></varlistentry>
@ -642,8 +552,7 @@ the log... values.
 configuration directory inside the directory tree makes it possible to
 provide automatic query time path translations once the data set has
 moved (for example, because it has been mounted on another
-location).
+location).</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.CURIDXCONFDIR">
 <term><varname>curidxconfdir</varname></term>
 <listitem><para>Current location of the configuration directory. Complement orgidxconfdir for movable datasets. This should be used
@ -655,8 +564,7 @@ example if a dataset originally indexed as '/home/me/mydata/config' has
 been mounted to '/media/me/mydata', and the GUI is running from a copied
 configuration, orgidxconfdir would be '/home/me/mydata/config', and
 curidxconfdir (as set in the copied configuration) would be
-'/media/me/mydata/config'.
+'/media/me/mydata/config'.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXRUNDIR">
 <term><varname>idxrundir</varname></term>
 <listitem><para>Indexing process current directory. The input
@ -665,22 +573,17 @@ makes sense to have recollindex chdir to some temporary directory. If the
 value is empty, the current directory is not changed. If the
 value is (literal) tmp, we use the temporary directory as set by the
 environment (RECOLL_TMPDIR else TMPDIR else /tmp). If the value is an
-absolute path to a directory, we go there.
+absolute path to a directory, we go there.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.CHECKNEEDRETRYINDEXSCRIPT">
 <term><varname>checkneedretryindexscript</varname></term>
 <listitem><para>Script used to heuristically check if we need to retry indexing
 files which previously failed.  The default script checks
 the modified dates on /usr/bin and /usr/local/bin. A relative path will
 be looked up in the filters dirs, then in the path. Use an absolute path
-to do otherwise.
+to do otherwise.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.RECOLLHELPERPATH">
 <term><varname>recollhelperpath</varname></term>
-<listitem><para>Additional places to search for helper executables. This is used, e.g., on Windows by the Python code, and on Mac OS by the bundled recoll.app
+<listitem><para>Additional places to search for helper executables. This is only used on Windows for now.</para></listitem></varlistentry>
 (because I could find no reliable way to tell launchd to set the PATH). The example below is for
 Windows. Use ':' as entry separator for Mac and Ux-like systems, ';' is for Windows only.
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXABSMLEN">
 <term><varname>idxabsmlen</varname></term>
 <listitem><para>Length of abstracts we store while indexing. Recoll stores an abstract for each indexed file.
@ -692,72 +595,57 @@ defines the size of the stored abstract. The default value is 250
 bytes. The search interface gives you the choice to display this stored
 text or a synthetic abstract built by extracting text around the search
 terms. If you always prefer the synthetic abstract, you can reduce this
-value and save a little space.
+value and save a little space.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXMETASTOREDLEN">
 <term><varname>idxmetastoredlen</varname></term>
 <listitem><para>Truncation length of stored metadata fields. This
 does not affect indexing (the whole field is processed anyway), just the
 amount of data stored in the index for the purpose of displaying fields
 inside result lists or previews. The default value is 150 bytes which
-may be too low if you have custom fields.
+may be too low if you have custom fields.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXTEXTTRUNCATELEN">
 <term><varname>idxtexttruncatelen</varname></term>
 <listitem><para>Truncation length for all document texts. Only index
 the beginning of documents. This is not recommended except if you are
 sure that the interesting keywords are at the top and have severe disk
-space issues.
+space issues.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXSYNONYMS">
 <term><varname>idxsynonyms</varname></term>
 <listitem><para>Name of the index-time synonyms file. This is used for indexing multiword synonyms as single terms,
 which in turn is only useful if you want to perform proximity searches
 with such terms.
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ASPELLLANGUAGE">
 <term><varname>aspellLanguage</varname></term>
 <listitem><para>Language definitions to use when creating the aspell
 dictionary. The value must match a set of aspell language
 definition files. You can type "aspell dicts" to see a list The default
 if this is not set is to use the NLS environment to guess the value. The
-values are the 2-letter language codes (e.g. 'en', 'fr'...)
+values are the 2-letter language codes (e.g. 'en', 'fr'...)</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ASPELLADDCREATEPARAM">
 <term><varname>aspellAddCreateParam</varname></term>
 <listitem><para>Additional option and parameter to aspell dictionary creation
 command. Some aspell packages may need an additional option
 (e.g. on Debian Jessie: --local-data-dir=/usr/lib/aspell). See Debian bug
-772415.
+772415.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ASPELLKEEPSTDERR">
 <term><varname>aspellKeepStderr</varname></term>
 <listitem><para>Set this to have a look at aspell dictionary creation
 errors. There are always many, so this is mostly for
-debugging.
+debugging.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOASPELL">
 <term><varname>noaspell</varname></term>
 <listitem><para>Disable aspell use. The aspell dictionary generation
 takes time, and some combinations of aspell version, language, and local
 terms, result in aspell crashing, so it sometimes makes sense to just
-disable the thing.
+disable the thing.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONAUXINTERVAL">
 <term><varname>monauxinterval</varname></term>
 <listitem><para>Auxiliary database update interval. The real time
 indexer only updates the auxiliary databases (stemdb, aspell)
 periodically, because it would be too costly to do it for every document
-change. The default period is one hour.
+change. The default period is one hour.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONIXINTERVAL">
 <term><varname>monixinterval</varname></term>
 <listitem><para>Minimum interval (seconds) between processings of the indexing
 queue. The real time indexer does not process each event
 when it comes in, but lets the queue accumulate, to diminish overhead and
 to aggregate multiple events affecting the same file. Default 30
-S.
+S.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONDELAYPATTERNS">
 <term><varname>mondelaypatterns</varname></term>
 <listitem><para>Timing parameters for the real time indexing. Definitions for files which get a longer delay before reindexing
@ -766,25 +654,21 @@ reindexed once in a while. A list of wildcardPattern:seconds pairs. The
 patterns are matched with fnmatch(pattern, path, 0) You can quote entries
 containing white space with double quotes (quote the whole entry, not the
 pattern). The default is empty.
-Example: mondelaypatterns = *.log:20 "*with spaces.*:30"
+Example: mondelaypatterns = *.log:20 "*with spaces.*:30"</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXNICEPRIO">
 <term><varname>idxniceprio</varname></term>
 <listitem><para>"nice" process priority for the indexing processes. Default: 19
-(lowest) Appeared with 1.26.5. Prior versions were fixed at 19.
+(lowest) Appeared with 1.26.5. Prior versions were fixed at 19.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONIONICECLASS">
 <term><varname>monioniceclass</varname></term>
 <listitem><para>ionice class for the indexing process. Despite the misleading name, and on platforms where this is
 supported, this affects all indexing processes,
 not only the real time/monitoring ones. The default value is 3 (use
-lowest "Idle" priority).
+lowest "Idle" priority).</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONIONICECLASSDATA">
 <term><varname>monioniceclassdata</varname></term>
 <listitem><para>ionice class level parameter if the class supports it. The default is empty, as the default "Idle" class has no
-levels.
+levels.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 </variablelist></sect3>
 <sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.QUERY">
 <title>Query-time parameters (no impact on the index) </title><variablelist>
@ -793,8 +677,7 @@ levels.
 <listitem><para>auto-trigger diacritics sensitivity (raw index only). IF the index is not stripped, decide if we automatically trigger
 diacritics sensitivity if the search term has accented characters (not in
 unac_except_trans). Else you need to use the query language and the "D"
-modifier to specify diacritics sensitivity. Default is no.
+modifier to specify diacritics sensitivity. Default is no.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.AUTOCASESENS">
 <term><varname>autocasesens</varname></term>
 <listitem><para>auto-trigger case sensitivity (raw index only). IF
@ -802,46 +685,40 @@ the index is not stripped (see indexStripChars), decide if we
 automatically trigger character case sensitivity if the search term has
 upper-case characters in any but the first position. Else you need to use
 the query language and the "C" modifier to specify character-case
-sensitivity. Default is yes.
+sensitivity. Default is yes.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MAXTERMEXPAND">
 <term><varname>maxTermExpand</varname></term>
 <listitem><para>Maximum query expansion count
 for a single term (e.g.: when using wildcards). This only
 affects queries, not indexing. We used to not limit this at all (except
 for filenames where the limit was too low at 1000), but it is
-unreasonable with a big index. Default 10000.
+unreasonable with a big index. Default 10000.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MAXXAPIANCLAUSES">
 <term><varname>maxXapianClauses</varname></term>
 <listitem><para>Maximum number of clauses
 we add to a single Xapian query. This only affects queries,
 not indexing. In some cases, the result of term expansion can be
 multiplicative, and we want to avoid eating all the memory. Default
-50000.
+50000.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SNIPPETMAXPOSWALK">
 <term><varname>snippetMaxPosWalk</varname></term>
 <listitem><para>Maximum number of positions we walk while populating a snippet for
 the result list. The default of 1,000,000 may be
 insufficient for very big documents, the consequence would be snippets
-with possibly meaning-altering missing words.
+with possibly meaning-altering missing words.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 </variablelist></sect3>
 <sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.PDF">
 <title>Parameters for the PDF input script </title><variablelist>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PDFOCR">
 <term><varname>pdfocr</varname></term>
 <listitem><para>Attempt OCR of PDF files with no text content. This can be defined in subdirectories. The default is off because
-OCR is so very slow.
+OCR is so very slow.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PDFATTACH">
 <term><varname>pdfattach</varname></term>
 <listitem><para>Enable PDF attachment extraction by executing pdftk (if
 available). This is
 normally disabled, because it does slow down PDF indexing a bit even if
-not one attachment is ever found.
+not one attachment is ever found.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PDFEXTRAMETA">
 <term><varname>pdfextrameta</varname></term>
 <listitem><para>Extract text from selected XMP metadata tags. This
@ -849,8 +726,7 @@ is a space-separated list of qualified XMP tag names. Each element can also
 include a translation to a Recoll field name, separated by a '|'
 character. If the second element is absent, the tag name is used as the
 Recoll field names. You will also need to add specifications to the
-"fields" file to direct processing of the extracted data.
+"fields" file to direct processing of the extracted data.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PDFEXTRAMETAFIX">
 <term><varname>pdfextrametafix</varname></term>
 <listitem><para>Define name of XMP field editing script. This
@ -859,8 +735,7 @@ values. The script should define a 'MetaFixer' class with a metafix()
 method which will be called with the qualified tag name and value of each
 selected field, for editing or erasing. A new instance is created for
 each document, so that the object can keep state for, e.g. eliminating
-duplicate values.
+duplicate values.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 </variablelist></sect3>
 <sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.OCR">
 <title>Parameters for OCR processing </title><variablelist>
@ -872,20 +747,17 @@ the input file. Modules for tesseract (tesseract) and ABBYY FineReader
 (abbyy) are present in the standard distribution. For compatibility with
 the previous version, if this is not defined at all, the default value is
 "tesseract". Use an explicit empty value if needed. A value of "abbyy
-tesseract" will try everything.
+tesseract" will try everything.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.OCRCACHEDIR">
 <term><varname>ocrcachedir</varname></term>
 <listitem><para>Location for caching OCR data. The default if this is empty or undefined is to store the cached
-OCR data under $RECOLL_CONFDIR/ocrcache.
+OCR data under $RECOLL_CONFDIR/ocrcache.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TESSERACTLANG">
 <term><varname>tesseractlang</varname></term>
 <listitem><para>Language to assume for tesseract OCR. Important for improving the OCR accuracy. This can also be set
 through the contents of a file in
 the currently processed directory. See the rclocrtesseract.py
-script. Example values: eng, fra... See the tesseract documentation.
+script. Example values: eng, fra... See the tesseract documentation.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TESSERACTCMD">
 <term><varname>tesseractcmd</varname></term>
 <listitem><para>Path for the tesseract command. Do not quote. This is mostly useful on Windows, or for specifying a non-default
@ -904,19 +776,11 @@ script. Typical values: English, French... See the ABBYY documentation.
 <listitem><para>Path for the abbyy command The ABBY directory is usually not in the path, so you should set this.
 </para></listitem></varlistentry>
 </variablelist></sect3>
 <sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.MISCHANDLERPARAMS">
 <title>Parameters for specific handlers </title><variablelist>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ORGMODESUBDOCS">
 <term><varname>orgmodesubdocs</varname></term>
 <listitem><para>Index org-mode level 1 sections as separate sub-documents This is the default. If set to false, org-mode files will be indexed as plain text
 </para></listitem></varlistentry>
 </variablelist></sect3>
 <sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.SPECLOCATIONS">
 <title>Parameters set for specific locations </title><variablelist>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MHMBOXQUIRKS">
 <term><varname>mhmboxquirks</varname></term>
 <listitem><para>Enable thunderbird/mozilla-seamonkey mbox format quirks Set this for the directory where the email mbox files are
-stored.
+stored.</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 </variablelist></sect3>
 </sect2>
--- a/src/doc/user/usermanual.html
+++ b/src/doc/user/usermanual.html
--- a/src/doc/user/usermanual.xml
+++ b/src/doc/user/usermanual.xml
--- a/src/filters/cmdtalk.py
+++ b/src/filters/cmdtalk.py
@ -20,6 +20,8 @@
 # All data is binary. This is important for Python3
 # All parameter names are converted to and processed as str/unicode
 from __future__ import print_function
 import sys
 import os
 import tempfile
@ -27,13 +29,25 @@ import shutil
 import getopt
 import traceback
-def makebytes(data):
+PY3 = sys.version > '3'
-    if data is None:
+
-        return b""
+if PY3:
-    if isinstance(data, bytes):
+    def makebytes(data):
-        return data
+        if data is None:
-    else:
+            return b""
-        return data.encode("UTF-8")
+        if isinstance(data, bytes):
            return data
        else:
            return data.encode("UTF-8")
 else:
    def makebytes(data):
        if data is None:
            return ""
        if isinstance(data, unicode):
            return data.encode("UTF-8")
        else:
            return data
 ############################################
 # CmdTalk implements the communication protocol with the master
@ -102,7 +116,10 @@ class CmdTalk(object):
    # followed by data. The param name is returned as str/unicode, the data
    # as bytes
    def readparam(self):
-        inf = self.infile.buffer
+        if PY3:
            inf = self.infile.buffer
        else:
            inf = self.infile
        s = inf.readline()
        if s == b'':
            if self.exitfunc:
@ -126,7 +143,7 @@ class CmdTalk(object):
                      (paramsize, len(paramdata)), 1, 1)
        else:
            paramdata = b''
-        if not self.nodecodeinput:
+        if PY3 and not self.nodecodeinput:
            try:
                paramdata = paramdata.decode('utf-8')
            except Exception as ex:
@ -137,11 +154,18 @@ class CmdTalk(object):
        #          (paramname, paramsize, paramdata))
        return (paramname, paramdata)
-    def senditem(self, nm, data):
+    if PY3:
-        data = makebytes(data)
+        def senditem(self, nm, data):
-        l = len(data)
+            data = makebytes(data)
-        self.outfile.buffer.write(makebytes("%s: %d\n" % (nm, l)))
+            l = len(data)
-        self.breakwrite(self.outfile.buffer, data)
+            self.outfile.buffer.write(makebytes("%s: %d\n" % (nm, l)))
            self.breakwrite(self.outfile.buffer, data)
    else:
        def senditem(self, nm, data):
            data = makebytes(data)
            l = len(data)
            self.outfile.write(makebytes("%s: %d\n" % (nm, l)))
            self.breakwrite(self.outfile, data)
    # Send answer: document, ipath, possible eof.
    def answer(self, outfields):
@ -218,7 +242,7 @@ def main(proto, processor):
        params[args[2*i]] = args[2*i+1]
    res = processor.process(params)
-    ioout = sys.stdout.buffer
+    ioout = sys.stdout.buffer if PY3 else sys.stdout
    for nm,value in res.items():
        #self.log("Senditem: [%s] -> [%s]" % (nm, value))
--- a/src/filters/kosplitter.py
+++ b/src/filters/kosplitter.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/python3
 #################################
 # Copyright (C) 2020 J.F.Dockes
 #   This program is free software; you can redistribute it and/or modify
--- a/src/filters/rcl7z.py
+++ b/src/filters/rcl7z.py
@ -3,7 +3,7 @@
 # 7-Zip file filter for Recoll
 # Thanks to Recoll user Martin Ziegler
-# This is a modified version of rclzip.py, with some help from rcltar.py
+# This is a modified version of rclzip, with some help from rcltar
 #
 # Normally using py7zr https://github.com/miurahr/py7zr
 #
--- a/src/filters/rclaudio.py
+++ b/src/filters/rclaudio.py
@ -238,7 +238,7 @@ class AudioTagExtractor(RclBaseHandler):
                if tagname.startswith('APIC:'):
                    #self.em.rclog("mp3 img: %s" % mutf[tagname].mime)
                    return 'jpg' if mutf[tagname].mime == 'image/jpeg' else 'png'
-        elif 'audio/flac' in mime:
+        elif 'audio/x-flac' in mime:
            if mutf.pictures:
                return 'jpg' if mutf.pictures[0].mime == 'image/jpeg' else 'png'
        elif 'audio/mp4' in mime:
@ -351,11 +351,6 @@ class AudioTagExtractor(RclBaseHandler):
        # Metadata tags. The names vary depending on the file type. We
        # just have a big translation dictionary for all
        for tag,val in mutf.items():
            #print(f"TAG {tag} VAL {val}", file=sys.stderr)
            # Mutagen sends out COMM==eng= with tag COMM::eng We don't know what to do with the
            # language (or possible other attributes), so get rid of it for now:
            if tag.find("COMM::") == 0:
                tag = "COMM"
            if tag.find('TXXX:') == 0:
                tag = tag[5:].upper()
            elif tag.find('TXX:') == 0:
--- a/src/filters/rclbasehandler.py
+++ b/src/filters/rclbasehandler.py
@ -18,6 +18,8 @@
 # Base for extractor classes. With some common generic implementations
 # for the boilerplate functions.
 from __future__ import print_function
 import os
 import sys
 import rclexecm
--- a/src/filters/rclcheckneedretry.sh
+++ b/src/filters/rclcheckneedretry.sh
@ -17,15 +17,9 @@
 # with retry set).
 #
 # If $HOME does not exist, there is nothing we can do (happens, for example when run as upmpdcli)
 if test ! -d "$HOME" ; then
    exit 0
 fi
 # Bin dirs to be tested:
 bindirs="/usr/bin /usr/local/bin $HOME/bin /opt/*/bin"
 rfiledir=$HOME/.config/Recoll.org
 rfile=$rfiledir/needidxretrydate
 nrfile=$rfiledir/tneedidxretrydate
--- a/src/filters/rclchm.py
+++ b/src/filters/rclchm.py
--- a/src/filters/rcldia.py
+++ b/src/filters/rcldia.py
@ -1,11 +1,12 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 from __future__ import print_function
 # dia (http://live.gnome.org/Dia) file filter for recoll
 # stefan.friedel@iwr.uni-heidelberg.de 2012
 #
 # add the following to ~/.recoll/mimeconf into the [index] section:
-# application/x-dia-diagram = execm rcldia.py;mimetype=text/plain;charset=utf-8
+# application/x-dia-diagram = execm rcldia;mimetype=text/plain;charset=utf-8
 # and into the [icons] section:
 # application/x-dia-diagram = drawing
 # and finally under [categories]:
--- a/src/filters/rcldjvu.py
+++ b/src/filters/rcldjvu.py
@ -17,6 +17,8 @@
 # Recoll DJVU extractor
 from __future__ import print_function
 import os
 import sys
 import re
--- a/src/filters/rcldoc.py
+++ b/src/filters/rcldoc.py
@ -1,4 +1,5 @@
 #!/usr/bin/env python3
 from __future__ import print_function
 import rclexecm
 import rclexec1
--- a/src/filters/rclepub.py
+++ b/src/filters/rclepub.py
@ -1,5 +1,6 @@
-#!/usr/bin/env python3
+#!/usr/bin/python3
 """Extract Html content from an EPUB file (.epub)"""
 from __future__ import print_function
 rclepub_html_mtype = "text/html"
--- a/src/filters/rclepub1.py
+++ b/src/filters/rclepub1.py
@ -1,5 +1,6 @@
 #!/usr/bin/env python3
 """Extract Html content from an EPUB file (.chm), concatenating all sections"""
 from __future__ import print_function
 import sys
 import os
--- a/src/filters/rclexec1.py
+++ b/src/filters/rclexec1.py
@ -26,6 +26,8 @@
 # this would be to slow. So this helps implementing a permanent script
 # to repeatedly execute single commands.
 from __future__ import print_function
 import subprocess
 import rclexecm
 from rclbasehandler import RclBaseHandler
--- a/src/filters/rclexecm.py
+++ b/src/filters/rclexecm.py
@ -20,6 +20,8 @@
 # All data is binary. This is important for Python3
 # All parameter names are converted to and processed as str/unicode
 from __future__ import print_function
 import sys
 import os
 import tempfile
@ -28,6 +30,7 @@ import getopt
 import rclconfig
 import cmdtalk
 PY3 = (sys.version > '3')
 _g_mswindows = (sys.platform == "win32")
 _g_execdir = os.path.dirname(sys.argv[0])
@ -59,11 +62,12 @@ def makebytes(data):
 # Possibly decode binary file name for use as subprocess argument,
 # depending on platform.
 def subprocfile(fn):
-    # On Windows Python 3 the list2cmdline() method in subprocess assumes that all args are str, and
+    # On Windows PY3 the list2cmdline() method in subprocess assumes that
-    # we receive file names as UTF-8. So we need to convert.
+    # all args are str, and we receive file names as UTF-8. So we need
-    # On Unix all list elements get converted to bytes in the C _posixsubprocess module, nothing to
+    # to convert.
-    # do.
+    # On Unix all list elements get converted to bytes in the C
-    if _g_mswindows and type(fn) != type(''):
+    # _posixsubprocess module, nothing to do.
    if PY3 and _g_mswindows and type(fn) != type(''):
        return fn.decode('UTF-8')
    else:
        return fn
@ -261,29 +265,18 @@ def execPythonScript(icmd):
 # Temp dir helper
 class SafeTmpDir:
-    def __init__(self, tag, em=None):
+    def __init__(self, em):
        self.tag = tag
        self.em = em
-        self.toptmp = None
+        self.toptmp = ""
-        self.tmpdir = None
+        self.tmpdir = ""
    def __del__(self):
-        if self.toptmp:
+        try:
-            try:
+            if self.toptmp:
-                if self.tmpdir:
+                shutil.rmtree(self.tmpdir, True)
                    shutil.rmtree(self.tmpdir, True)
                os.rmdir(self.toptmp)
-            except Exception as err:
+        except Exception as err:
-                if self.em:
+            self.em.rclog("delete dir failed for " + self.toptmp)
                    self.em.rclog("delete dir failed for " + self.toptmp)
    def vacuumdir(self):
        if self.tmpdir:
            for fn in os.listdir(self.tmpdir):
                path = os.path.join(self.tmpdir, fn)
                if os.path.isfile(path):
                    os.unlink(path)
        return True
    def getpath(self):
        if not self.tmpdir:
@ -293,7 +286,7 @@ class SafeTmpDir:
            else:
                self.toptmp = tempfile.mkdtemp(prefix='rcltmp')
-            self.tmpdir = os.path.join(self.toptmp, self.tag)
+            self.tmpdir = os.path.join(self.toptmp, 'rclsofftmp')
            os.makedirs(self.tmpdir)
        return self.tmpdir
@ -312,7 +305,8 @@ def main(proto, extract):
    # Not running the main loop: either acting as single filter (when called
    # from other filter for example), or debugging
    def usage():
-        print("Usage: rclexecm.py [-d] [-f] [-h] [-i ipath] [-s] <filename>", file=sys.stderr)
+        print("Usage: rclexecm.py [-d] [-s] [-i ipath] <filename>",
              file=sys.stderr)
        print("       rclexecm.py -w <prog>", file=sys.stderr)
        sys.exit(1)
@ -367,7 +361,7 @@ def main(proto, extract):
    params = {'filename' : makebytes(path)}
-    # Some filters (e.g. rclaudio.py) need/get a MIME type from the indexer.
+    # Some filters (e.g. rclaudio) need/get a MIME type from the indexer.
    # We make a half-assed attempt to emulate:
    mimetype = _g_config.mimeType(path)
    if not mimetype and not _g_mswindows:
@ -379,7 +373,10 @@ def main(proto, extract):
        print("Open error", file=sys.stderr)
        sys.exit(1)
-    ioout = sys.stdout.buffer
+    if PY3:
        ioout = sys.stdout.buffer
    else:
        ioout = sys.stdout
    if ipath != b"" or actAsSingle:
        params['ipath'] = ipath
        ok, data, ipath, eof = extract.getipath(params)
--- a/src/filters/rclfb2.py
+++ b/src/filters/rclfb2.py
@ -16,6 +16,8 @@
 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 ######################################
 from __future__ import print_function
 import sys
 import rclexecm
 import rclxslt
--- a/src/filters/rclgenxslt.py
+++ b/src/filters/rclgenxslt.py
@ -18,6 +18,8 @@
 # Base class for simple (one stylesheet) xslt-based handlers
 from __future__ import print_function
 import sys
 import rclxslt
 import gzip
--- a/src/filters/rclhwp.py
+++ b/src/filters/rclhwp.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/python3
 # Copyright (C) 2020 J.F.Dockes
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
--- a/src/filters/rclics.py
+++ b/src/filters/rclics.py
@ -1,4 +1,5 @@
 #!/usr/bin/env python3
 from __future__ import print_function
 # Read an ICS file, break it into "documents" which are events, todos,
 # or journal entries, and interface with recoll execm
--- a/src/filters/rclimg.py
+++ b/src/filters/rclimg.py
@ -6,6 +6,7 @@
 #
 # Uses pyexiv2. Also tried Pillow, found it useless for tags.
 #
 from __future__ import print_function
 import sys
 import os
--- a/src/filters/rclinfo.py
+++ b/src/filters/rclinfo.py
@ -3,6 +3,8 @@
 # Read a file in GNU info format and output its nodes as subdocs,
 # interfacing with recoll execm
 from __future__ import print_function
 import rclexecm
 import sys
 import os
@ -139,7 +141,7 @@ class InfoSimpleSplitter:
                        if name == b'File':
                            infofile = value
                except Exception as err:
-                    print("rclinfo.py: bad line in %s: [%s] %s\n" % \
+                    print("rclinfo: bad line in %s: [%s] %s\n" % \
                          (infofile, line, err), file = sys.stderr)
                    nodename = prevnodename
                    node += line
--- a/src/filters/rclipynb.py
+++ b/src/filters/rclipynb.py
@ -1,59 +0,0 @@
 #!/usr/bin/env python3
 # Copyright (C) 2021 J.F.Dockes
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 2 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the
 # Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 # Recoll handler for iPython / Jupyter notebook files.
 import os
 import sys
 import json
 import rclexecm
 from rclbasehandler import RclBaseHandler
 class IPYNBextractor(RclBaseHandler):
    def __init__(self, em):
        super(IPYNBextractor, self).__init__(em)
    def html_text(self, fn):
        text = open(fn, 'rb').read()
        data = json.loads(text)
        mdtext = ""
        if "worksheets" in data:
            cells = data["worksheets"][0]["cells"]
        else:
            cells = data["cells"]
        for cell in cells:
            if cell["cell_type"] == "markdown":
                mdtext += "\n"
                for line in cell["source"]:
                    mdtext += "# " + line + "\n"
            elif cell["cell_type"] == "code":
                mdtext += "\n\n"
                key = "source" if "source" in cell else "input"
                for line in cell[key]:
                    mdtext += line
                mdtext += "\n"
        #print("%s"%mdtext, file=sys.stderr)
        self.outputmimetype = 'text/plain'
        return mdtext
 # Main program: create protocol handler and extractor and run them
 proto = rclexecm.RclExecM()
 extract = IPYNBextractor(proto)
 rclexecm.main(proto, extract)
--- a/src/filters/rclkar.py
+++ b/src/filters/rclkar.py
@ -1,6 +1,8 @@
 #!/usr/bin/env python3
 # Read a .kar midi karaoke file and translate to recoll indexable format
 # This does not work with Python3 yet because python:midi doesn't 
 from __future__ import print_function
 import rclexecm
 import sys
@ -44,7 +46,11 @@ htmltemplate = '''
 nlbytes = b'\n'
 bsbytes = b'\\'
-nullchar = 0
+PY3 = sys.version > '3'
 if PY3:
    nullchar = 0
 else:
    nullchar = chr(0)
 class KarTextExtractor(RclBaseHandler):
    # Afaik, the only charset encodings with null bytes are variations on
--- a/src/filters/rcllatinclass.py
+++ b/src/filters/rcllatinclass.py
@ -13,7 +13,12 @@ epsilon with dasia (in unicode but not iso). Can this be replaced by either epsi
 with acute accent ?
 """
 from __future__ import print_function
 import sys
 PY3 = sys.version > '3'
 if not PY3:
    import string
 import glob
 import os
 import os.path
@ -33,7 +38,10 @@ class European8859TextClassifier:
        # Table to translate from punctuation to spaces
        self.punct = b'''0123456789<>/*?[].@+-,#_$%&={};.,:!"''' + b"'\n\r"
        spaces = len(self.punct) * b' '
-        self.spacetable = bytes.maketrans(self.punct, spaces)
+        if PY3:
            self.spacetable = bytes.maketrans(self.punct, spaces)
        else:
            self.spacetable = string.maketrans(self.punct, spaces)
    def readlanguages(self, langzip):
        """Extract the stop words lists from the zip file.
--- a/src/filters/rclmidi.py
+++ b/src/filters/rclmidi.py
@ -23,15 +23,24 @@
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 # 
 from __future__ import print_function
 import sys
 from struct import unpack, pack
 import six
-def next_byte_as_int(data):
+PY3 = sys.version > '3'
-    return next(data)
+
-def next_byte_as_char(data):
+if PY3:
-    return bytes([next(data)])
+    def next_byte_as_int(data):
        return next(data)
    def next_byte_as_char(data):
        return bytes([next(data)])
 else:
    def next_byte_as_int(data):
        return ord(data.next())
    def next_byte_as_char(data):
        return next(data)
 ##
 ## Constants
@ -261,8 +270,12 @@ class NoteEvent(Event):
                                self.velocity)
    def decode_data(self):
-        self.pitch = self.data[0]
+        if PY3:
-        self.velocity = self.data[1]
+            self.pitch = self.data[0]
            self.velocity = self.data[1]
        else:
            self.pitch = ord(self.data[0])
            self.velocity = ord(self.data[1])
 class NoteOnEvent(NoteEvent):
@ -296,8 +309,12 @@ class ControlChangeEvent(Event):
                                hex(ord(self.data[1])))
    def decode_data(self):
-        self.control = self.data[0]
+        if PY3:
-        self.value = self.data[1]
+            self.control = self.data[0]
            self.value = self.data[1]
        else:
            self.control = ord(self.data[0])
            self.value = ord(self.data[1])
 class ProgramChangeEvent(Event):
@ -311,7 +328,10 @@ class ProgramChangeEvent(Event):
                                hex(ord(self.data[0])))
    def decode_data(self):
-        self.value = self.data[0]
+        if PY3:
            self.value = self.data[0]
        else:
            self.value = ord(self.data[0])
 class ChannelAfterTouchEvent(Event):
@ -336,8 +356,12 @@ class PitchWheelEvent(Event):
                                hex(ord(self.data[1])))
    def decode_data(self):
-        first = self.data[0]
+        if PY3:
-        second = self.data[1]
+            first = self.data[0]
            second = self.data[1]
        else:
            first = ord(self.data[0]) 
            second = ord(self.data[1])
        self.value = ((second << 7) | first) - 0x2000
@ -437,7 +461,10 @@ class PortEvent(MetaEvent):
    def decode_data(self):
        assert(len(self.data) == 1)
-        self.port = self.data[0]
+        if PY3:
            self.port = self.data[0]
        else:
            self.port = ord(self.data[0])
 class TrackLoopEvent(MetaEvent):
    name = 'Track Loop'
@ -471,7 +498,13 @@ class SetTempoEvent(MetaEvent):
    def decode_data(self):
        assert(len(self.data) == 3)
-        self.mpqn = (self.data[0] << 16) + (self.data[1] << 8) + self.data[2]
+        if PY3:
            self.mpqn = (self.data[0] << 16) + (self.data[1] << 8) \
                        + self.data[2]
        else:
            self.mpqn = (ord(self.data[0]) << 16) + (ord(self.data[1]) << 8) \
                        + ord(self.data[2])
        self.tempo = float(6e7) / self.mpqn
@ -490,13 +523,22 @@ class TimeSignatureEvent(MetaEvent):
                            (super(TimeSignatureEvent, self).__str__(),
                                self.numerator, self.denominator,
                                self.metronome, self.thirtyseconds)
-    def decode_data(self):
+    if PY3:
-        assert(len(self.data) == 4)
+        def decode_data(self):
-        self.numerator = self.data[0]
+            assert(len(self.data) == 4)
-        # Weird: the denominator is two to the power of the data variable
+            self.numerator = self.data[0]
-        self.denominator = 2 ** self.data[1]
+            # Weird: the denominator is two to the power of the data variable
-        self.metronome = self.data[2]
+            self.denominator = 2 ** self.data[1]
-        self.thirtyseconds = self.data[3]
+            self.metronome = self.data[2]
            self.thirtyseconds = self.data[3]
    else:
        def decode_data(self):
            assert(len(self.data) == 4)
            self.numerator = ord(self.data[0])
            # Weird: the denominator is two to the power of the data variable
            self.denominator = 2 ** ord(self.data[1])
            self.metronome = ord(self.data[2])
            self.thirtyseconds = ord(self.data[3])
 class KeySignatureEvent(MetaEvent):
--- a/src/filters/rclocr.py
+++ b/src/filters/rclocr.py
@ -26,8 +26,6 @@
 import os
 import sys
 import atexit
 import signal
 import importlib.util
 import rclconfig
@ -35,27 +33,7 @@ import rclocrcache
 import rclexecm
 def _deb(s):
-    rclexecm.logmsg("rclocr: %s" % s)
+    rclexecm.logmsg(s)
 ocrcleanupmodule = None
@atexit.register
 def finalcleanup():
    if ocrcleanupmodule:
        ocrcleanupmodule.cleanocr()
 def signal_handler(sig, frame):
    sys.exit(1)
 # Not all signals necessary exist on all systems, use catch
 try: signal.signal(signal.SIGHUP, signal_handler)
 except: pass
 try: signal.signal(signal.SIGINT, signal_handler)
 except: pass
 try: signal.signal(signal.SIGQUIT, signal_handler)
 except: pass
 try: signal.signal(signal.SIGTERM, signal_handler)
 except: pass
 def Usage():
    _deb("Usage: rclocr.py <imagefilename>")
@ -94,7 +72,7 @@ if incache:
    try:
        breakwrite(sys.stdout.buffer, data)
    except Exception as e:
-        _deb("error writing: %s" % e)
+        _deb("RCLOCR error writing: %s" % e)
        sys.exit(1)
    sys.exit(0)
@ -134,7 +112,6 @@ if not ok:
 # The OCR module will retrieve its specific parameters from the
 # configuration
 ocrcleanupmodule = ocr
 status, data = ocr.runocr(config, path)
 if not status:
--- a/src/filters/rclocrabbyy.py
+++ b/src/filters/rclocrabbyy.py
@ -42,9 +42,6 @@ abbyocrdir = ""
 def _deb(s):
    rclexecm.logmsg(s)
 def cleanocr():
    pass
 # Return true if abbyy appears to be available
 def ocrpossible(config, path):
    global abbyyocrcmd
--- a/src/filters/rclocrcache.py
+++ b/src/filters/rclocrcache.py
@ -22,63 +22,37 @@
 # OCR is extremely slow, caching the results is necessary.
 #
 # The cache stores 2 kinds of objects:
-# - Path files are named from the hash of the image file path and contain the
+# - Path files are named from the hash of the image file path and
-#   image data hash, the modification time and size of the image file at the
+#   contain the image data hash, the modification time and size of the
-#   time the OCR'd data was stored in the cache, and the image path itself (the
+#   image file at the time the OCR'd data was stored in the cache, and
-#   last is for purging only).
+#   the image path itself (the last is for purging only).
-# - Data files are named with the hash of the image data and contain the
+# - Data files are named with the hash of the image data and contain
-#   zlib-compressed OCR'd data.
+#   the zlib-compressed OCR'd data.
 # - The cache Path and Data files are stored under top subdirectories: objects/
 #   and paths/.
 #
 # When retrieving data from the cache:
-#  - We first use the image file size and modification time: if an entry exists
+#  - We first use the image file size and modification time: if an
-#    for the imagepath/mtime/size triplet, and is up to date, the corresponding
+#    entry exists for the imagepath/mtime/size triplet, and is up to
-#    data is obtained from the data file and returned.
+#    date, the corresponding data is obtained from the data file and
-#  - Else we then use the image data: if an entry exists for the computed hashed
+#    returned.
-#    value of the data, it is returned. This allows moving files around without
+#  - Else we then use the image data: if an entry exists for the
-#    needing to run OCR again, but of course, it is more expensive than the
+#    computed hashed value of the data, it is returned. This allows
-#    first step
+#    moving files around without needing to run OCR again, but of
 #    course, it is more expensive than the first step
 #
-# In both cases, the paths are hashed with sha1, and the first two characters of
+#  If we need to use the second step, as a side effect, a path file is
-# the hash are used as a top level directory, the rest as a file name. E.g. for:
+#  created or updated so that the data will be found with the first
-#   pd,pf = self._hashpath(path), the result would be stored under pd/pf
+#  step next time around.
 #
-# If we need to use the second step, as a side effect, a path file is created or
+# Purging the cache of obsolete data.
 # updated so that the data will be found with the first step next time around.
 #
-# When processing embedded documents like email attachments, recoll uses
+#  - The cache path and data files are stored under 2 different
-# temporary copies in TMPDIR (which defaults to /tmp) or RECOLL_TMPDIR. Of
+#    directories (objects, paths) to make purging easier.
-# course the paths for the temporary files changes when re-processing a given
+#  - Purging the paths tree just involves walking it, reading the
-# document. We do not store the Path file for data stored in TMPDIR or
+#    files, and checking the existence of the recorded paths.
-# RECOLL_TMPDIR, because doing so would cause an indefinite accumulation of
+#  - There is no easy way to purge the data tree. The only possibility
-# unusable Path files. This means that access to the OCR data for these
+#    is to input a list of possible source files (e.g. result of a
-# documents always causes the computation of the data hash, and is slower. With
+#    find in the image files area), and compute all the hashes. Data
-# recent Recoll versions which cache the text content in the index, this only
+#    files which do not match one of the hashes are deleted.
 # occurs when reindexing (with older versions, this could also occur for
 # Preview).
 #
 # Purging the cache of obsolete data:
 #
 # This can be done by running this file as a top level script with a --purge
 # option (possibly completed by a --purgedata option but see below)
 #  - Purging the paths tree just involves walking it, reading the files, and
 #    checking the existence of the recorded paths. Path files for non-existent
 #    files are deleted.
 #  - Purging the data tree: we make a list of all Data files referenced by at
 #    least one Path file, then walk the data tree, deleting unreferenced
 #    files. This means that Data files from temporary document copies (see
 #    above) will be deleted, which is quite unsatisfying. This would be
 #    difficult to change:
 #    - There is no way to detect the affected files because the Data files store
 #      no origin information
 #    - Even if we wanted to store an indication that the data file comes from a
 #      temporary document, we'd have no way to access the original document
 #      because the full ipath is not available. Changing this would be close to
 #      impossible because internfile...
 # In consequence the --purgedata option must be explicitely added for a data
 # purge to be performed. Only set it if re-OCRing all embedded documents is reasonable.
 import sys
 import os
@ -87,18 +61,10 @@ import urllib.parse
 import zlib
 import glob
-from rclexecm import logmsg as _deb
+import rclexecm
-def _catslash(p):
+def _deb(s):
-    if p and p[-1] != "/":
+    rclexecm.logmsg(s)
        p += "/"
    return p
 _tmpdir = os.environ["TMPDIR"] if "TMPDIR" in os.environ else "/tmp"
 _tmpdir = _catslash(_tmpdir)
 _recoll_tmpdir = os.environ["RECOLL_TMPDIR"] if "RECOLL_TMPDIR" in os.environ else None
 _recoll_tmpdir = _catslash(_recoll_tmpdir)
 class OCRCache(object):
@ -124,7 +90,7 @@ class OCRCache(object):
    # Compute sha1 of path data contents, as two parts of 2 and 38 chars
    def _hashdata(self, path):
-        # _deb("Hashing DATA")
+        #_deb("Hashing DATA")
        m = hashlib.sha1()
        with open(path, "rb") as f:
            while True:
@ -135,35 +101,35 @@ class OCRCache(object):
                h = m.hexdigest()
        return h[0:2], h[2:]
    def _readpathfile(self, ppf):
        '''Read path file and return values. We do not decode the image path
        as this is only used for purging'''
        with open(ppf, 'r') as f:
            line = f.read()
-        dd, df, tm, sz, pth = line.split()
+        dd,df,tm,sz,pth = line.split()
        tm = int(tm)
        sz = int(sz)
-        return dd, df, tm, sz, pth
+        return dd,df,tm,sz,pth
    # Try to read the stored attributes for a given path: data hash,
    # modification time and size. If this fails, the path itself is
    # not cached (but the data still might be, maybe the file was moved)
    def _cachedpathattrs(self, path):
-        pd, pf = self._hashpath(path)
+        pd,pf = self._hashpath(path)
        pathfilepath = os.path.join(self.pathdir, pd, pf)
        if not os.path.exists(pathfilepath):
            return False, None, None, None, None
        try:
            dd, df, tm, sz, pth = self._readpathfile(pathfilepath)
            return True, dd, df, tm, sz
-        except Exception as ex:
+        except:
            _deb(f"Error while trying to access pathfile {pathfilepath}: {ex}")
            return False, None, None, None, None
    # Compute the path hash, and get the mtime and size for given
    # path, for updating the cache path file
    def _newpathattrs(self, path):
-        pd, pf = self._hashpath(path)
+        pd,pf = self._hashpath(path)
        tm = int(os.path.getmtime(path))
        sz = int(os.path.getsize(path))
        return pd, pf, tm, sz
@ -176,25 +142,31 @@ class OCRCache(object):
        if not ret:
            return False, None, None
        pd, pf, ntm, nsz = self._newpathattrs(path)
-        # _deb(" tm %d  sz %d" % (ntm, nsz))
+        #_deb(" tm %d  sz %d" % (ntm, nsz))
-        # _deb("otm %d osz %d" % (otm, osz))
+        #_deb("otm %d osz %d" % (otm, osz))
        if otm != ntm or osz != nsz:
            return False, None, None
        return True, od, of
    # Check if cache appears up to date for path (no data check),
    # return True/False
    def pathincache(self, path):
        ret, dd, df = self._pathincache(path)
        return ret
    # Compute the data file name for path. Expensive: we compute the data hash.
    # Return both the data file path and path elements (for storage in path file)
    def _datafilename(self, path):
        d, f = self._hashdata(path)
        return os.path.join(self.objdir, d, f), d, f
    # Check if the data for path is in cache: expensive, needs to
    # compute the hash for the path's data contents. Returns True/False
    def dataincache(self, path):
        return os.path.exists(self._datafilename(path)[0])
    # Create path file with given elements.
    def _updatepathfile(self, pd, pf, dd, df, tm, sz, path):
        global _tmpdir, _recoll_tmpdir
        if (_tmpdir and path.startswith(_tmpdir)) or \
           (_recoll_tmpdir and path.startswith(_recoll_tmpdir)):
            _deb(f"ocrcache: not storing path data for temporary file {path}")
            return
        dir = os.path.join(self.pathdir, pd)
        if not os.path.exists(dir):
            os.makedirs(dir)
@ -206,7 +178,7 @@ class OCRCache(object):
    # Store data for path. Only rewrite an existing data file if told
    # to do so: this is only useful if we are forcing an OCR re-run.
    def store(self, path, datatostore, force=False):
-        dd, df = self._hashdata(path)
+        dd,df = self._hashdata(path)
        pd, pf, tm, sz = self._newpathattrs(path)
        self._updatepathfile(pd, pf, dd, df, tm, sz, path)
        dir = os.path.join(self.objdir, dd)
@ -214,7 +186,7 @@ class OCRCache(object):
            os.makedirs(dir)
        dfile = os.path.join(dir, df)
        if force or not os.path.exists(dfile):
-            # _deb("Storing data")
+            #_deb("Storing data")
            cpressed = zlib.compress(datatostore)
            with open(dfile, "wb") as f:
                f.write(cpressed)
@ -231,12 +203,11 @@ class OCRCache(object):
            dfn, dd, df = self._datafilename(path)
        if not os.path.exists(dfn):
            _deb(f"ocrcache: no existing OCR data file for {path}")
            return False, b""
        if not pincache:
-            # File may have moved. Create/Update path file for next time
+            # File has moved. create/Update path file for next time
-            _deb(f"ocrcache::get: data ok but path file for {path} does not exist: creating it")
+            _deb("ocrcache::get file %s was moved, updating path data" % path)
            pd, pf, tm, sz = self._newpathattrs(path)
            self._updatepathfile(pd, pf, dd, df, tm, sz, path)
@ -252,7 +223,7 @@ class OCRCache(object):
        ntm = int(os.path.getmtime(origpath))
        nsz = int(os.path.getsize(origpath))
        if ntm != otm or nsz != osz:
-            # _deb("Purgepaths otm %d ntm %d osz %d nsz %d"%(otm, ntm, osz, nsz))
+            #_deb("Purgepaths otm %d ntm %d osz %d nsz %d"%(otm, ntm, osz, nsz))
            return True
        return False
@ -280,15 +251,15 @@ class OCRCache(object):
    def _pgdt_pathcb(self, f):
        '''Get a pathfile name, read it, and record datafile identifier
        (concatenate data file subdir and file name)'''
-        # _deb("_pgdt_pathcb: %s" % f)
+        #_deb("_pgdt_pathcb: %s" % f)
        dd, df, tm, sz, orgpath = self._readpathfile(f)
        self._pgdt_alldatafns.add(dd+df)
    def _pgdt_datacb(self, datafn):
        '''Get a datafile name and check that it is referenced by a previously
        seen pathfile'''
-        p1, fn = os.path.split(datafn)
+        p1,fn = os.path.split(datafn)
-        p2, dn = os.path.split(p1)
+        p2,dn = os.path.split(p1)
        tst = dn+fn
        if tst in self._pgdt_alldatafns:
            _deb("purgedata: ok         : %s" % datafn)
@ -311,59 +282,48 @@ class OCRCache(object):
        self._walk(self.objdir, self._pgdt_datacb)
 if __name__ == '__main__':
    import rclconfig
-    import getopt
+    def _Usage():
-
+        _deb("Usage: rclocrcache.py --purge")
    def Usage(f=sys.stderr):
        print("Usage: rclocrcache.py --purge [--purgedata]", file=f)
        print("Usage: rclocrcache.py --store <imgdatapath> <ocrdatapath>", file=f)
        print("Usage: rclocrcache.py --get <imgdatapath>", file=f)
        sys.exit(1)
    if len(sys.argv) != 2:
        _Usage()
    if sys.argv[1] != "--purge":
        _Usage()
    conf = rclconfig.RclConfig()
    cache = OCRCache(conf)
-    opts, args = getopt.getopt(sys.argv[1:], "h", ["help", "purge", "purgedata", "store", "get"])
+    cache.purgepaths()
-    purgedata = False
+    cache.purgedata()
-    purge = False
+    sys.exit(0)
    for opt, arg in opts:
        if opt in ['-h', '--help']:
            Usage(sys.stdout)
        elif opt in ['--purgedata']:
            purgedata = True
        elif opt in ['--purge']:
            if len(args) != 0:
                Usage()
            purge = True
        elif opt in ['--store']:
            if len(args) != 2:
                Usage()
            imgdatapath = args[0]
            ocrdatapath = args[1]
            ocrdata = open(ocrdatapath, "rb").read()
            cache.store(imgdatapath, ocrdata, force=False)
            sys.exit(0)
        elif opt in ['--get']:
            if len(args) != 1:
                Usage()
            imgdatapath = args[0]
            incache, data = cache.get(imgdatapath)
            if incache:
                print(f"OCR data from cache {data}")
                sys.exit(0)
            else:
                print("OCR Data was not found in cache", file=sys.stderr)
                sys.exit(1)
        else:
            print(f"Unknown option {opt}", file=sys.stderr)
            Usage()
    # End options. Need purging ?
    if purge:
        cache.purgepaths()
        if purgedata:
            cache.purgedata()
    Usage()
 #    def trycache(p):
 #        _deb("== CACHE tests for %s"%p)
 #        ret = cache.pathincache(p)
 #        s = "" if ret else " not"
 #        _deb("path for %s%s in cache" % (p, s))
 #        if not ret:
 #            return False
 #        ret = cache.dataincache(p)
 #        s = "" if ret else " not"
 #        _deb("data for %s%s in cache" % (p, s))
 #        return ret
 #    def trystore(p):
 #        _deb("== STORE test for %s" % p)
 #        cache.store(p, b"my OCR'd text is one line\n", force=False)
 #    def tryget(p):
 #        _deb("== GET test for %s" % p)
 #        incache, data = cache.get(p)
 #        if incache:
 #            _deb("Data from cache [%s]" % data)
 #        else:
 #            _deb("Data was not found in cache")
 #        return incache, data
 #    if False:
 #        path = sys.argv[1]
 #        incache, data = tryget(path)
 #        if not incache:
 #            trystore(path)
 #
--- a/src/filters/rclocrtesseract.py
+++ b/src/filters/rclocrtesseract.py
@ -21,6 +21,7 @@
 import os
 import sys
 import atexit
 import tempfile
 import subprocess
 import glob
@ -37,28 +38,39 @@ _okexts = ('.tif', '.tiff', '.jpg', '.png', '.jpeg')
 tesseractcmd = None
 pdftoppmcmd = None
-pdftocairocmd = None
+
 def _deb(s):
-    rclexecm.logmsg("rclocrtesseract: %s" % s)
+    rclexecm.logmsg(s)
 def vacuumdir(dir):
    if dir:
        for fn in os.listdir(dir):
            path = os.path.join(dir, fn)
            if os.path.isfile(path):
                os.unlink(path)
    return True
 tmpdir = None
 def _maybemaketmpdir():
    global tmpdir
    if tmpdir:
-        if not tmpdir.vacuumdir():
+        if not vacuumdir(tmpdir):
-            _deb("openfile: vacuumdir %s failed" % tmpdir.getpath())
+            _deb("openfile: vacuumdir %s failed" % tmpdir)
            return False
    else:
-        tmpdir = rclexecm.SafeTmpDir("rclocrtesseract")
+        tmpdir = tempfile.mkdtemp(prefix='rclmpdf')
-def cleanocr():
+def finalcleanup():
    global tmpdir
    if tmpdir:
-        del tmpdir
+        vacuumdir(tmpdir)
-        tmpdir = None
+        os.rmdir(tmpdir)
 atexit.register(finalcleanup)
 # Return true if tesseract and the appropriate conversion program for
@ -95,16 +107,12 @@ def ocrpossible(config, path):
        # legacy code used pdftoppm for some reason, and it appears
        # that the newest builds from conda-forge do not include
        # pdftocairo. So stay with pdftoppm.
-        global pdftoppmcmd, pdftocairocmd
+        global pdftoppmcmd
-        if not pdftoppmcmd and not pdftocairocmd:
+        if not pdftoppmcmd:
-            pdftocairocmd = rclexecm.which("pdftocairo")
+            pdftoppmcmd = rclexecm.which("pdftoppm")
-            if not pdftocairocmd:
+            if not pdftoppmcmd:
-                pdftocairocmd = rclexecm.which("poppler/pdftocairo")
+                pdftoppmcmd = rclexecm.which("poppler/pdftoppm")
-            if not pdftocairocmd:
+        if pdftoppmcmd:
                pdftoppmcmd = rclexecm.which("pdftoppm")
                if not pdftoppmcmd:
                    pdftoppmcmd = rclexecm.which("poppler/pdftoppm")
        if pdftoppmcmd or pdftocairocmd:
            return True
    return False
@ -161,17 +169,14 @@ def _pdftesseract(config, path):
    tesseractlang = _guesstesseractlang(config, path)
-    #tesserrorfile = os.path.join(tmpdir.getpath(), "tesserrorfile")
+    #tesserrorfile = os.path.join(tmpdir, "tesserrorfile")
-    tmpfile = os.path.join(tmpdir.getpath(), "ocrXXXXXX")
+    tmpfile = os.path.join(tmpdir, "ocrXXXXXX")
    # Split pdf pages
    try:
-        tmpdir.vacuumdir()
+        vacuumdir(tmpdir)
-        if pdftocairocmd:
+        cmd = [pdftoppmcmd, "-r", "300", path, tmpfile]
-            cmd = [pdftocairocmd, "-tiff", "-tiffcompression", "lzw", "-r", "300", path, tmpfile]
+        #_deb("Executing %s" % cmd)
        else:
            cmd = [pdftoppmcmd, "-r", "300", path, tmpfile]
            #_deb("Executing %s" % cmd)
        subprocess.check_call(cmd)
    except Exception as e:
        _deb("%s failed: %s" % (pdftoppmcmd,e))
@ -181,8 +186,8 @@ def _pdftesseract(config, path):
    # system is full. There is no really good way to check for
    # this. We consider any empty file to signal an error
-    pages = glob.glob(tmpfile + "*")
+    ppmfiles = glob.glob(tmpfile + "*")
-    for f in pages:
+    for f in ppmfiles:
        size = os.path.getsize(f)
        if os.path.getsize(f) == 0:
            _deb("pdftoppm created empty files. "
@ -198,7 +203,7 @@ def _pdftesseract(config, path):
        except:
            pass
-    for f in sorted(pages):
+    for f in sorted(ppmfiles):
        out = b''
        try:
            out = subprocess.check_output(
--- a/src/filters/rclorgmode.py
+++ b/src/filters/rclorgmode.py
@ -1,37 +1,18 @@
 #!/usr/bin/env python3
-# Copyright (C) 2020-2022 J.F.Dockes
+from __future__ import print_function
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 2 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the
 # Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-'''Read an org-mode file, optionally break it into subdocs" along level 1 headings'''
+# Read an org-mode file, break it into "documents" along the separator lines
-
+# and interface with recoll execm
 import sys
 import re
 import rclexecm
-import rclconfig
+import sys
-import conftree
+import re
 class OrgModeExtractor:
    def __init__(self, em):
        self.file = ""
        self.contents = []
        self.em = em
        self.selftext = ""
        self.docs = []
        config = rclconfig.RclConfig()
        self.createsubdocs = conftree.valToBool(config.getConfParam("orgmodesubdocs"))
    def extractone(self, index):
        if index >= len(self.docs):
@ -42,7 +23,7 @@ class OrgModeExtractor:
        iseof = rclexecm.RclExecM.noteof
        if self.currentindex >= len(self.docs) -1:
            iseof = rclexecm.RclExecM.eofnext
-        self.em.setmimetype("text/x-orgmode-sub")
+        self.em.setmimetype("text/plain")
        try:
            self.em.setfield("title", docdata.splitlines()[0])
        except:
@ -52,6 +33,7 @@ class OrgModeExtractor:
    ###### File type handler api, used by rclexecm ---------->
    def openfile(self, params):
        self.file = params["filename"]
        try:
            data = open(self.file, "rb").read()
        except Exception as e:
@ -59,15 +41,9 @@ class OrgModeExtractor:
            return False
        self.currentindex = -1
        if not self.createsubdocs:
            self.selftext = data
            return True
        res = rb'''^\* '''
        self.docs = re.compile(res, flags=re.MULTILINE).split(data)
        # Note that there can be text before the first heading. This goes into the self doc,
        # because it's not a proper entry.
        self.selftext = self.docs[0]
        self.docs = self.docs[1:]
        #self.em.rclog("openfile: Entry count: %d" % len(self.docs))
        return True
@ -83,8 +59,6 @@ class OrgModeExtractor:
        return self.extractone(index)
    def getnext(self, params):
        if not self.createsubdocs:
            return (True, self.selftext, "", rclexecm.RclExecM.eofnext)
        if self.currentindex == -1:
            # Return "self" doc
@ -94,7 +68,7 @@ class OrgModeExtractor:
                eof = rclexecm.RclExecM.eofnext
            else:
                eof = rclexecm.RclExecM.noteof
-            return (True, self.selftext, "", eof)
+            return (True, "", "", eof)
        if self.currentindex >= len(self.docs):
            self.em.rclog("getnext: EOF hit")
--- a/src/filters/rclpdf.py
+++ b/src/filters/rclpdf.py
@ -33,7 +33,6 @@ import glob
 import traceback
 import atexit
 import signal
 import time
 import rclexecm
 import rclconfig
@ -67,17 +66,11 @@ _htmlprefix =b'''<html><head>
 _htmlsuffix = b'''</pre></body></html>'''
 def finalcleanup():
    global tmpdir
    if tmpdir:
-        del tmpdir
+        vacuumdir(tmpdir)
-        tmpdir = None
+        os.rmdir(tmpdir)
 ocrproc = None
 def signal_handler(signal, frame):
    global ocrproc
    if ocrproc:
        ocrproc.wait()
        ocrproc = None
    sys.exit(1)
 atexit.register(finalcleanup)
@ -92,6 +85,14 @@ except: pass
 try: signal.signal(signal.SIGTERM, signal_handler)
 except: pass
 def vacuumdir(dir):
    if dir:
        for fn in os.listdir(dir):
            path = os.path.join(dir, fn)
            if os.path.isfile(path):
                os.unlink(path)
    return True
 class PDFExtractor:
    def __init__(self, em):
        self.currentindex = 0
@ -212,7 +213,7 @@ class PDFExtractor:
            # no big deal
            return True
        try:
-            tmpdir.vacuumdir()
+            vacuumdir(tmpdir)
            # Note: the java version of pdftk sometimes/often fails
            # here with writing to stdout:
            #    Error occurred during initialization of VM
@ -222,9 +223,9 @@ class PDFExtractor:
            # output, until we fix the error or preferably find a way
            # to do it with poppler...
            subprocess.check_call(
-                [self.pdftk, self.filename, "unpack_files", "output", tmpdir.getpath()],
+                [self.pdftk, self.filename, "unpack_files", "output",
-                stdout=sys.stderr)
+                 tmpdir], stdout=sys.stderr)
-            self.attachlist = sorted(os.listdir(tmpdir.getpath()))
+            self.attachlist = sorted(os.listdir(tmpdir))
            return True
        except Exception as e:
            self.em.rclog("extractAttach: failed: %s" % e)
@ -398,12 +399,11 @@ class PDFExtractor:
    def maybemaketmpdir(self):
        global tmpdir
        if tmpdir:
-            if not tmpdir.vacuumdir():
+            if not vacuumdir(tmpdir):
-                self.em.rclog("openfile: vacuumdir %s failed" % tmpdir.getpath())
+                self.em.rclog("openfile: vacuumdir %s failed" % tmpdir)
                return False
        else:
-            tmpdir = rclexecm.SafeTmpDir("rclpdf", self.em)
+            tmpdir = tempfile.mkdtemp(prefix='rclmpdf')
            #self.em.rclog("Using temporary directory %s" % tmpdir.getpath())
            if self.pdftk and re.match("/snap/", self.pdftk):
                # We know this is Unix (Ubuntu actually). Check that tmpdir
                # belongs to the user as snap commands can't use /tmp to share
@ -415,7 +415,9 @@ class PDFExtractor:
                    if st.st_uid == os.getuid():
                        ok = True
                if not ok:
-                    self.em.rclog("pdftk is a snap command and needs TMPDIR to be owned by you")
+                    self.em.rclog(
                        "pdftk is a snap command and needs TMPDIR to be "
                        "a directory you own")
    def _process_annotations(self, html):
        doc = Poppler.Document.new_from_file(
@ -489,11 +491,9 @@ class PDFExtractor:
            s = self.config.getConfParam("pdfocr")
            if rclexecm.configparamtrue(s):
                try:
-                    cmd = [sys.executable, os.path.join(_execdir, "rclocr.py"), self.filename]
+                    cmd = [sys.executable, os.path.join(_execdir, "rclocr.py"),
-                    global ocrproc
+                           self.filename]
-                    ocrproc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+                    data = subprocess.check_output(cmd)
                    data, stderr = ocrproc.communicate()
                    ocrproc = None
                    html = _htmlprefix + rclexecm.htmlescape(data) + _htmlsuffix
                except Exception as e:
                    self.em.rclog("%s failed: %s" % (cmd, e))
@ -520,9 +520,7 @@ class PDFExtractor:
        if not self.attextractdone:
            if not self.extractAttach():
                return (False, "", "", rclexecm.RclExecM.eofnow)
-        if type(ipath) != type(""):
+        path = os.path.join(tmpdir, ipath)
            ipath = ipath.decode('utf-8')
        path = os.path.join(tmpdir.getpath(), ipath)
        if os.path.isfile(path):
            f = open(path, "rb")
            docdata = f.read();
--- a/src/filters/rclppt.py
+++ b/src/filters/rclppt.py
@ -2,6 +2,8 @@
 # Recoll PPT text extractor
 from __future__ import print_function
 import rclexecm
 import rclexec1
 import re
--- a/src/filters/rclpst.py
+++ b/src/filters/rclpst.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/python3
 #################################
 # Copyright (C) 2019 J.F.Dockes
 #   This program is free software; you can redistribute it and/or modify
@ -28,14 +28,12 @@ import os
 import pathlib
 import email.parser
 import email.policy
 import email.message
 import mailbox
 import subprocess
 import rclexecm
 import rclconfig
 import conftree
 import base64
 import traceback
 _mswindows = (sys.platform == "win32" or sys.platform == "msys")
 if _mswindows:
@ -97,26 +95,14 @@ class EmailBuilder(object):
        newmsg = email.message.EmailMessage(policy=email.policy.default)
        headerstr = self.headers.decode("UTF-8", errors='replace')
        # print("%s" % headerstr)
-        try:
+        headers = self.parser.parsestr(headerstr, headersonly=True)
            headers = self.parser.parsestr(headerstr, headersonly=True)
        except:
            # This sometimes fails, for example with 'day is out of range for month'. Try to go on
            # without headers
            headers = email.message.EmailMessage()
        #self.log("EmailBuilder: content-type %s" % headers['content-type'])
-        for nm in ('from', 'subject', 'date'):
+        for nm in ('from', 'subject'):
            if nm in headers:
-                try:
+                newmsg.add_header(nm, headers[nm])
                    newmsg.add_header(nm, headers[nm])
                except:
                    pass
        for h in ('to', 'cc'):
-            try:
+            tolist = headers.get_all(h)
                tolist = headers.get_all(h)
            except:
                tolist = []
            if not tolist:
                continue
            alldests = ""
@ -127,10 +113,7 @@ class EmailBuilder(object):
                    alldests += sd + ", "
            if alldests:
                alldests = alldests.rstrip(", ")
-                try:
+                newmsg.add_header(h, alldests)
                    newmsg.add_header(h, alldests)
                except:
                    pass
 # Decoding the body: the .pst contains the text value decoded from qp
 # or base64 (at least that's what libpff sends). Unfortunately, it
@ -152,13 +135,8 @@ class EmailBuilder(object):
                charset = headers.get_content_charset()
                body = ''
                if charset:
-                    if charset == 'unicode':
+                    body = self.body.decode(charset, errors='replace')
-                        charset = 'utf-16'
+                    #self.log("DECODE FROM HEADER CHARSET %s SUCCEEDED"% charset)
                    try:
                        body = self.body.decode(charset, errors='replace')
                        #self.log("DECODE FROM HEADER CHARSET %s SUCCEEDED"% charset)
                    except:
                        pass
                else:
                    try:
                        body = self.body.decode('utf-8')
@ -399,7 +377,6 @@ class PstExtractor(object):
                return(False, "", "", rclexecm.RclExecM.eofnow)
        except Exception as ex:
            self.em.rclog("getnext: exception: %s" % ex)
            traceback.print_exc()
            return(False, "", "", rclexecm.RclExecM.eofnow)
        return (True, doc, ipath, rclexecm.RclExecM.noteof)
--- a/src/filters/rclpython.py
+++ b/src/filters/rclpython.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/python3
 # Rclpython is based on "colorize.py" from:
 # http://chrisarndt.de/en/software/python/colorize.html
@ -51,12 +51,6 @@ _css_classes = {
    _TEXT:              'text',
 }
 # python3.8 token.py sends an ENCODING token which we ignore
 try:
    token_encoding_type = token.ENCODING
 except:
    token_encoding_type = 62
 _HTML_HEADER = """\
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
  "http://www.w3.org/TR/html4/loose.dtd">
@ -152,21 +146,17 @@ class Parser:
    def __call__(self, toktype, toktext, startpos, endpos, line):
        """ Token handler.
        """
        srow, scol = startpos
        erow, ecol = endpos
        if 0:
            print("type %s %s text %s start %s %s end %s %s<br>\n" % \
                  (toktype, token.tok_name[toktype], toktext, \
-                   srow, scol,erow,ecol), file=sys.stderr)
+                   srow, scol,erow,ecol))
-
+        srow, scol = startpos
        erow, ecol = endpos
        # calculate new positions
        oldpos = self.pos
        newpos = self.lines[srow] + scol
        self.pos = newpos + len(toktext)
        if toktype == token_encoding_type:
            return
        # handle newlines
        if toktype in [token.NEWLINE, tokenize.NL]:
            self.out.write(b'\n')
--- a/src/filters/rclrar.py
+++ b/src/filters/rclrar.py
@ -18,6 +18,8 @@
 #   Free Software Foundation, Inc.,
 #   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 from __future__ import print_function
 import sys
 import rclexecm
 import os
@ -58,7 +60,7 @@ except Exception as ex:
 # (https://www.rarlab.com/rar_add.htm). The unrar-free version fails
 # with the message "Failed the read enough data"
 # 
-# This is identical to rclzip.py except I did a search/replace from zip
+# This is identical to rclzip except I did a search/replace from zip
 # to rar, and changed this comment.
 class RarExtractor:
    def __init__(self, em):
--- a/src/filters/rclrtf.py
+++ b/src/filters/rclrtf.py
@ -1,4 +1,5 @@
 #!/usr/bin/env python3
 from __future__ import print_function
 import rclexecm
 import rclexec1
--- a/src/filters/rcltar.py
+++ b/src/filters/rcltar.py
@ -2,10 +2,12 @@
 # Tar-file filter for Recoll
 # Thanks to Recoll user Martin Ziegler
-# This is a modified version of /usr/share/recoll/filters/rclzip.py
+# This is a modified version of /usr/share/recoll/filters/rclzip
 # It works not only for tar-files, but automatically for gzipped and
 # bzipped tar-files at well.
 from __future__ import print_function
 import rclexecm
 try:
--- a/src/filters/rcltext.py
+++ b/src/filters/rcltext.py
@ -18,6 +18,8 @@
 # Wrapping a text file. Recoll does it internally in most cases, but
 # this is for use by another filter.
 from __future__ import print_function
 import rclexecm
 import sys
 from rclbasehandler import RclBaseHandler
--- a/src/filters/rcltxtlines.py
+++ b/src/filters/rcltxtlines.py
@ -2,6 +2,7 @@
 """Index text lines as document (execm handler sample). This exists
 to demonstrate the execm interface and is not meant to be useful or
 efficient"""
 from __future__ import print_function
 import sys
 import os
--- a/src/filters/rcluncomp.py
+++ b/src/filters/rcluncomp.py
@ -1,4 +1,5 @@
 # No shebang: this is only used on Windows. We use a shell script on Linux
 from __future__ import print_function
 import rclexecm
 import sys
--- a/src/filters/rclwar.py
+++ b/src/filters/rclwar.py
@ -2,6 +2,8 @@
 # WAR web archive filter for recoll. War file are gzipped tar files
 from __future__ import print_function
 import rclexecm
 import tarfile
--- a/src/filters/rclxmp.py
+++ b/src/filters/rclxmp.py
@ -16,6 +16,7 @@
 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 # Code to extract XMP tags using libexempi and python-xmp
 from __future__ import print_function
 can_xmp = True
 try:
--- a/src/filters/rclzip.py
+++ b/src/filters/rclzip.py
@ -18,11 +18,11 @@
 # Zip file extractor for Recoll
 from __future__ import print_function
 import os
 import posixpath
 import fnmatch
 import datetime
 import rclexecm
 from zipfile import ZipFile
@ -49,7 +49,7 @@ if not hasrclconfig:
 # and stores it in the catalog as an unicode object. Else it uses the
 # binary string, which it decodes as CP437 (zip standard).
 #
-# When reading the file, the input file name is used by rclzip.py
+# When reading the file, the input file name is used by rclzip
 # directly as an index into the catalog.
 #
 # When we send the file name data to the indexer, we have to serialize
@ -119,8 +119,6 @@ class ZipExtractor:
                # element).
                filename = posixpath.basename(ipath)
                self.em.setfield("filename", filename)
                dt = datetime.datetime(*info.date_time)
                self.em.setfield("modificationdate", str(int(dt.timestamp())))
            except:
                pass
            ok = True
@ -153,11 +151,14 @@ class ZipExtractor:
            if skipped is not None:
                self.skiplist += conftree.stringToStrings(skipped)
        try:
-            # Note: py3 ZipFile wants an str file name, which
+            if rclexecm.PY3:
-            # is wrong: file names are binary. But it accepts an
+                # Note: py3 ZipFile wants an str file name, which
-            # open file, and open() has no such restriction
+                # is wrong: file names are binary. But it accepts an
-            self.f = open(filename, 'rb')
+                # open file, and open() has no such restriction
-            self.zip = ZipFile(self.f)
+                self.f = open(filename, 'rb')
                self.zip = ZipFile(self.f)
            else:
                self.zip = ZipFile(filename)
            return True
        except Exception as err:
            self.em.rclog("openfile: failed: [%s]" % err)
--- a/src/filters/recoll-we-move-files.py
+++ b/src/filters/recoll-we-move-files.py
@ -1,5 +1,5 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
-# Copyright (C) 2017-2022 J.F.Dockes
+# Copyright (C) 2017 J.F.Dockes
 #   This program is free software; you can redistribute it and/or modify
 #   it under the terms of the GNU General Public License as published by
 #  the Free Software Foundation; either version 2 of the License, or
@ -31,7 +31,6 @@ but it can also be run by hand.
 import sys
 import os
 import re
 import getopt
 try:
    from hashlib import md5 as md5
 except:
@ -95,44 +94,28 @@ def list_all_files(dir):
    return mfiles,cfiles
 #######################
 def msg(s):
    print(f"{s}", file=sys.stderr)
 def usage():
-    msg("Usage: recoll-we-move-files.py [-c <recollconfigdir>]")
+    print("Usage: recoll-we-move-files.py [<downloaddir>]", file=sys.stderr)
    msg(" The script needs the recoll configuration directory. This can be set either through")
    msg(" the RECOLL_CONFDIR environment variable or the '-c' command line option (which takes")
    msg(" precedence). If none is set, the default configuration directory will be used.")
    sys.exit(1)
 config = rclconfig.RclConfig()
-opts, args = getopt.getopt(sys.argv[1:], "c:")
+# Source dir is parameter, else from config else default Downloads directory
 if not len(args) == 0:
    usage()
 configdir = None
 for opt,val in opts:
    #logdeb(f"opt {opt} val {val}")
    if opt == "-c":
        configdir = val
        if not os.path.isdir(val):
            msg(f"{val} is not a directory")
            usage()
    else:
        usage()
 config = rclconfig.RclConfig(argcnf=configdir)
 # Get the directory where the browser extension creates the page files. Our user can set it as a
 # subdirectory of the default Downloads directory, for tidyness
 downloadsdir = config.getConfParam("webdownloadsdir")
 if not downloadsdir:
    downloadsdir = "~/Downloads"
 downloadsdir = os.path.expanduser(downloadsdir)
 if not os.path.isdir(downloadsdir):
    msg(f"Downloads directory {downloadsdir} does not exist")
    sys.exit(1)
-# Get the target recoll webqueue directory, into which we are going to move the downloaded files.
+if len(sys.argv) == 2:
    mydir = sys.argv[1]
 elif len(sys.argv) == 1:
    mydir = downloadsdir
 else:
    usage()
 if not os.path.isdir(mydir):
    usage()
 # Get target webqueue recoll directory from recoll configuration
 webqueuedir = config.getConfParam("webqueuedir")
 if not webqueuedir:
    if _mswindows:
@ -142,11 +125,10 @@ if not webqueuedir:
 webqueuedir = os.path.expanduser(webqueuedir)
 os.makedirs(webqueuedir, exist_ok = True)
-
+# logdeb("webqueuedir is %s" % webqueuedir)
 #logdeb(f"recoll confdir [{configdir}] downloadsdir [{downloadsdir}] webqueuedir [{webqueuedir}]")
 # Get the lists of all files created by the browser addon
-mfiles, cfiles = list_all_files(downloadsdir)
+mfiles, cfiles = list_all_files(mydir)
 # Only keep the last version
 mfiles = delete_previous_instances(mfiles, downloadsdir)
@ -161,7 +143,7 @@ cfiles = delete_previous_instances(cfiles, downloadsdir)
 # The old plugin created the data first, so we move data then meta
 for hash in cfiles.keys():
    if hash in mfiles.keys():
-        newname = "firefox-recoll-web-" + hash
+        newname = "firefox-recoll-web-"+hash
        shutil.move(os.path.join(downloadsdir, cfiles[hash]),
                    os.path.join(webqueuedir, newname))
        shutil.move(os.path.join(downloadsdir, mfiles[hash]),
--- a/src/filters/xlsxmltocsv.py
+++ b/src/filters/xlsxmltocsv.py
@ -23,6 +23,8 @@
 # the minimum version supported.
 from __future__ import print_function
 import sys
 import xml.sax
--- a/src/index/exefetcher.cpp
+++ b/src/index/exefetcher.cpp
@ -61,7 +61,8 @@ public:
 EXEDocFetcher::EXEDocFetcher(const EXEDocFetcher::Internal& _m)
 {
    m = new Internal(_m);
-    LOGDEB("EXEDocFetcher::EXEDocFetcher: fetch is " << stringsToString(m->sfetch) << "\n");
+    LOGDEB("EXEDocFetcher::EXEDocFetcher: fetch is " <<
           stringsToString(m->sfetch) << "\n");
 }
 bool EXEDocFetcher::fetch(RclConfig*, const Rcl::Doc& idoc, RawDoc& out)
@ -76,7 +77,8 @@ bool EXEDocFetcher::makesig(RclConfig*, const Rcl::Doc& idoc, string& sig)
 }
 // Lookup bckid in the config and create an appropriate fetcher.
-std::unique_ptr<EXEDocFetcher> exeDocFetcherMake(RclConfig *config, const string& bckid)
+std::unique_ptr<EXEDocFetcher> exeDocFetcherMake(RclConfig *config,
                                                 const string& bckid)
 {
    // The config we only read once, not gonna change.
    static ConfSimple *bconf;
--- a/src/index/exefetcher.h
+++ b/src/index/exefetcher.h
@ -40,8 +40,6 @@ public:
    class Internal;
    EXEDocFetcher(const Internal&);
    virtual ~EXEDocFetcher() {}
    EXEDocFetcher(const EXEDocFetcher&) = delete;
    EXEDocFetcher& operator=(const EXEDocFetcher&) = delete;
    virtual bool fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out);
    /** Calls stat to retrieve file signature data */
@ -53,6 +51,7 @@ private:
 };
 // Lookup bckid in the config and create an appropriate fetcher.
-std::unique_ptr<EXEDocFetcher> exeDocFetcherMake(RclConfig *config, const std::string& bckid);
+std::unique_ptr<EXEDocFetcher> exeDocFetcherMake(RclConfig *config,
                                                 const std::string& bckid);
 #endif /* _EXEFETCHER_H_INCLUDED_ */
--- a/src/index/fetcher.h
+++ b/src/index/fetcher.h
@ -72,18 +72,18 @@ public:
     * @param idoc the data gathered from the index for this doc (udi/ipath)
     * @param sig output. 
     */
-    virtual bool makesig(RclConfig* cnf, const Rcl::Doc& idoc, std::string& sig) = 0;
+    virtual bool makesig(RclConfig* cnf, const Rcl::Doc& idoc,
                         std::string& sig) = 0;
    enum Reason{FetchOk, FetchNotExist, FetchNoPerm, FetchOther};
    virtual Reason testAccess(RclConfig*, const Rcl::Doc&) {
        return FetchOther;
    }
    DocFetcher() {}
    virtual ~DocFetcher() {}
    DocFetcher(const DocFetcher&) = delete;
    DocFetcher& operator=(const DocFetcher&) = delete;
 };
-/** Return an appropriate fetcher object given the backend string identifier inside idoc*/
+/** Return an appropriate fetcher object given the backend string 
-std::unique_ptr<DocFetcher> docFetcherMake(RclConfig *config, const Rcl::Doc& idoc);
+ * identifier inside idoc*/
 std::unique_ptr<DocFetcher> docFetcherMake(RclConfig *config,
                                           const Rcl::Doc& idoc);
 #endif /* _FETCHER_H_INCLUDED_ */
--- a/src/index/fsfetcher.h
+++ b/src/index/fsfetcher.h
@ -23,18 +23,14 @@
 /** 
 * The file-system fetcher: 
 */
-class FSDocFetcher : public DocFetcher {
+class FSDocFetcher : public DocFetcher{
 public:
    /** FSDocFetcher::fetch always returns a file name */
    virtual bool fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out);
    /** Calls stat to retrieve file signature data */
    virtual bool makesig(RclConfig* cnf,const Rcl::Doc& idoc, std::string& sig);
    virtual DocFetcher::Reason testAccess(RclConfig* cnf, const Rcl::Doc& idoc);
    FSDocFetcher() {}
    virtual ~FSDocFetcher() {}
    FSDocFetcher(const FSDocFetcher&) = delete;
    FSDocFetcher& operator=(const FSDocFetcher&) = delete;
 };
 extern void fsmakesig(const struct PathStat *stp, std::string& out);
--- a/src/index/fsindexer.cpp
+++ b/src/index/fsindexer.cpp
@ -195,7 +195,6 @@ bool FsIndexer::index(int flags)
        m_walker.setMaxDepth(2);
    }
    bool walkok(true);
    for (const auto& topdir : m_tdl) {
        LOGDEB("FsIndexer::index: Indexing " << topdir << " into " <<
               getDbDir() << "\n");
@ -230,46 +229,29 @@ bool FsIndexer::index(int flags)
        if (m_walker.walk(topdir, *this) != FsTreeWalker::FtwOk) {
            LOGERR("FsIndexer::index: error while indexing " << topdir <<
                   ": " << m_walker.getReason() << "\n");
-            // DO NOT return: we need to flush the queues before the Db can be closed !
+            return false;
            walkok = false;
            break;
        }
    }
-    shutdownQueues(walkok);
+#ifdef IDX_THREADS
    if (m_haveInternQ) 
        m_iwqueue.waitIdle();
    if (m_haveSplitQ)
        m_dwqueue.waitIdle();
    m_db->waitUpdIdle();
 #endif // IDX_THREADS
    if (m_missing) {
        string missing;
        m_missing->getMissingDescription(missing);
        if (!missing.empty()) {
-            LOGINFO("FsIndexer::index missing helper program(s):\n" << missing << "\n");
+            LOGINFO("FsIndexer::index missing helper program(s):\n" <<
                    missing << "\n");
        }
        m_config->storeMissingHelperDesc(missing);
    }
-    LOGINFO("fsindexer: status: " << walkok << " index time:  " << chron.millis() << " mS\n");
+    LOGINFO("fsindexer index time:  " << chron.millis() << " mS\n");
-    return walkok;
+    return true;
 }
 void FsIndexer::shutdownQueues(bool ok)
 {
 #ifdef IDX_THREADS
    if (!ok) {
        // Error or more probably interrupt. Discard everything for fast shutdown
        if (m_haveInternQ)  {
            m_iwqueue.closeShop();
        }
        if (m_haveSplitQ) {
            m_dwqueue.closeShop();
        }
        m_db->closeQueue();
    }
    if (m_haveInternQ)  {
        m_iwqueue.waitIdle();
    }
    if (m_haveSplitQ) {
        m_dwqueue.waitIdle();
    }
    m_db->waitUpdIdle();
 #endif // IDX_THREADS
 }
 static bool matchesSkipped(
@ -377,7 +359,7 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
    FsTreeWalker walker;
    walker.setSkippedPaths(m_config->getSkippedPaths());
-    for (auto it = files.begin(); it != files.end(); ) {
+    for (list<string>::iterator it = files.begin(); it != files.end(); ) {
        LOGDEB2("FsIndexer::indexFiles: [" << *it << "]\n");
        m_config->setKeyDir(path_getfather(*it));
@ -421,14 +403,22 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
    ret = true;
 out:
-    shutdownQueues(ret);
+#ifdef IDX_THREADS
    if (m_haveInternQ) 
        m_iwqueue.waitIdle();
    if (m_haveSplitQ)
        m_dwqueue.waitIdle();
    m_db->waitUpdIdle();
 #endif // IDX_THREADS
    // Purge possible orphan documents
    if (ret == true) {
        LOGDEB("Indexfiles: purging orphans\n");
-        for (const auto& udi : m_purgeCandidates.getCandidates()) {
+        const vector<string>& purgecandidates = m_purgeCandidates.getCandidates();
-            LOGDEB("Indexfiles: purging orphans for " << udi << "\n");
+        for (vector<string>::const_iterator it = purgecandidates.begin();
-            m_db->purgeOrphans(udi);
+             it != purgecandidates.end(); it++) {
            LOGDEB("Indexfiles: purging orphans for " << *it << "\n");
            m_db->purgeOrphans(*it);
        }
 #ifdef IDX_THREADS
        m_db->waitUpdIdle();
@ -468,7 +458,13 @@ bool FsIndexer::purgeFiles(list<string>& files)
    ret = true;
 out:
-    shutdownQueues(ret);
+#ifdef IDX_THREADS
    if (m_haveInternQ) 
        m_iwqueue.waitIdle();
    if (m_haveSplitQ)
        m_dwqueue.waitIdle();
    m_db->waitUpdIdle();
 #endif // IDX_THREADS
    LOGDEB("FsIndexer::purgeFiles: done\n");
    return ret;
 }
@ -492,9 +488,10 @@ void FsIndexer::localfieldsfromconf()
    ConfSimple attrs;
    m_config->valueSplitAttributes(sfields, value, attrs);
    vector<string> nmlst = attrs.getNames(cstr_null);
-    for (const auto& anm : nmlst) {
+    for (vector<string>::const_iterator it = nmlst.begin();
-        string nm = m_config->fieldCanon(anm);
+         it != nmlst.end(); it++) {
-        attrs.get(anm, m_localfields[nm]);
+        string nm = m_config->fieldCanon(*it);
        attrs.get(*it, m_localfields[nm]);
        LOGDEB2("FsIndexer::localfieldsfromconf: [" << nm << "]->[" <<
                m_localfields[nm] << "]\n");
    }
@ -502,11 +499,12 @@ void FsIndexer::localfieldsfromconf()
 void FsIndexer::setlocalfields(const map<string, string>& fields, Rcl::Doc& doc)
 {
-    for (const auto& field : fields) {
+    for (map<string, string>::const_iterator it = fields.begin();
         it != fields.end(); it++) {
        // Being chosen by the user, localfields override values from
        // the filter. The key is already canonic (see
        // localfieldsfromconf())
-        doc.meta[field.first] = field.second;
+        doc.meta[it->first] = it->second;
    }
 }
@ -842,7 +840,9 @@ FsTreeWalker::Status FsIndexer::processonefile(
            }
        }
 #if defined(HAVE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
-        // See framagit issue 26. This is off by default and controlled by a command line switch.
+        // See framagit issue 26. If this appears to be a good idea
        // after all (not sure), we'll need a command line switch to
        // control it. For now it's compile-time only.
        if (m_cleancache) {
            int fd = open(fn.c_str(), O_RDONLY);
            if (fd >= 0) {
--- a/src/index/fsindexer.h
+++ b/src/index/fsindexer.h
@ -27,6 +27,7 @@
 #endif // IDX_THREADS
 class FIMissingStore;
 struct PathStat;
 class DbUpdTask;
 class InternfileTask;
@ -54,8 +55,6 @@ public:
     */
    FsIndexer(RclConfig *cnf, Rcl::Db *db);
    virtual ~FsIndexer();
    FsIndexer(const FsIndexer&) = delete;
    FsIndexer& operator=(const FsIndexer&) = delete;
    /** 
     * Top level file system tree index method for updating a given database.
@ -158,7 +157,6 @@ private:
    processonefile(RclConfig *config, const string &fn, 
                   const struct PathStat *,
                   const map<string,string>& localfields);
    void shutdownQueues(bool);
 };
 #endif /* _fsindexer_h_included_ */
--- a/src/index/idxstatus.h
+++ b/src/index/idxstatus.h
@ -57,8 +57,6 @@ class DbIxStatusUpdater {
 public:
    DbIxStatusUpdater(const RclConfig *config, bool nox11monitor);
    virtual ~DbIxStatusUpdater(){}
    DbIxStatusUpdater(const DbIxStatusUpdater&) = delete;
    DbIxStatusUpdater& operator=(const DbIxStatusUpdater&) = delete;
    enum Incr {IncrNone, IncrDocsDone = 0x1, IncrFilesDone = 0x2, IncrFileErrors = 0x4};
    // Change phase/fn and update
--- a/src/index/indexer.cpp
+++ b/src/index/indexer.cpp
@ -59,10 +59,12 @@ bool runWebFilesMoverScript(RclConfig *config)
    static string downloadsdir;
    if (downloadsdir.empty()) {
        if (!config->getConfParam("webdownloadsdir", downloadsdir)) {
-            downloadsdir = "~/Downloads";
+            downloadsdir = path_tildexpand("~/Downloads");
        }
        downloadsdir = path_tildexpand(downloadsdir);
    }
    vector<string> cmdvec;
    config->pythonCmd("recoll-we-move-files.py", cmdvec);
    /* Arrange to not actually run the script if the directory did not change */
    static time_t dirmtime;
    time_t ndirmtime = 0;
@ -70,17 +72,17 @@ bool runWebFilesMoverScript(RclConfig *config)
    if (path_fileprops(downloadsdir.c_str(), &st) == 0) {
        ndirmtime = st.pst_mtime;
    }
-    // If stat fails, presumably Downloads does not exist or is not accessible, dirmtime and
+    /* If stat fails, presumably Downloads does not exist or is not
-    // mdirmtime stay at 0, and we never execute the script, which is the right thing.
+       accessible, dirmtime and mdirmtime stay at 0, and we never
       execute the script, which is the right thing. */
    if (dirmtime != ndirmtime) {
-        // The script is going to change the directory, so updating dirmtime before it runs means
+        /* The script is going to change the directory, so updating
-        // that we are going to execute it one time too many (it will run without doing anything),
+           dirmtime before it runs means that we are going to execute
-        // but we can't set the mtime to after the run in case files are created during the run.
+           it one time too many (it will run without doing anything),
           but we can't set the mtime to after the run in case files
           are created during the run. */
        dirmtime = ndirmtime;
        vector<string> cmdvec;
        config->pythonCmd("recoll-we-move-files.py", cmdvec);
        ExecCmd cmd;
        cmd.putenv("RECOLL_CONFDIR", config->getConfDir());
        int status = cmd.doexec1(cmdvec);
        return status == 0;
    }
--- a/Show More
+++ b/Show More
		`@ -1,2 +0,0 @@`
			`usr/lib/python/-packages/recollchm/*`
			`usr/lib/python/-packages/recollchm-/`