Merge branch 'master' of https://opensourceprojects.eu/git/p/recoll1/code

2018-10-12 12:06:21 +02:00 · 2018-10-12 12:06:21 +02:00 · beebb1028b
commit beebb1028b
parent f737a6c584 8a09948745
151 changed files with 10323 additions and 3194 deletions
--- a/.gitignore
+++ b/.gitignore
@ -36,6 +36,8 @@ src/depcomp
 src/doc/user/usermanual.pdf
 src/doc/user/webhelp/docs/*
 src/doc/user/webhelp/xincluded-profiled.xml
+src/filters/conftree.py
+src/filters/rclconfig.py
 src/filters/rclexecm.pyc
 src/filters/rcllatinclass.pyc
 src/install-sh
--- a/packaging/debian/buildppa.sh
+++ b/packaging/debian/buildppa.sh
@ -6,9 +6,9 @@

 PPA_KEYID=D38B9201

-RCLVERS=1.23.8
+RCLVERS=1.24.1
 SCOPEVERS=1.20.2.4
-PPAVERS=1
+PPAVERS=2

 # 
 RCLSRC=/y/home/dockes/projets/fulltext/recoll/src
--- a/packaging/debian/debian/changelog
+++ b/packaging/debian/debian/changelog
@ -1,3 +1,9 @@
+recoll (1.25.0pre0-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
+                
+  * Not a release: 1.25 development and testing
+    
+ -- Jean-Francois Dockes <jf@dockes.org>  Wed, 13 Jun 2018 08:38:00 +0200
+
 recoll (1.24.1-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
                
  * New release 1.24.1.
--- a/packaging/debian/debian/control
+++ b/packaging/debian/debian/control
@ -19,10 +19,10 @@ X-Python-Version: >= 2.7
 Vcs-Git: https://anonscm.debian.org/cgit/collab-maint/recoll.git
 Vcs-Browser: https://anonscm.debian.org/cgit/collab-maint/recoll.git
 Homepage: http://www.lesbonscomptes.com/recoll
-Standards-Version: 3.9.8
+Standards-Version: 4.1.4

 Package: recoll
-Architecture: any
+Architecture: all
 Depends: recollgui, recollcmd, ${misc:Depends}
 Description: Personal full text search package with a Qt GUI
 This package is a personal full text search package is based on a very strong
@ -49,30 +49,40 @@ Description: Personal full text search package with a Qt GUI

 Package: recollcmd
 Architecture: any
-Breaks: recoll (<< 1.23.4)
-Replaces: recoll (<< 1.23.4)
-Depends: python, ${misc:Depends}, ${shlibs:Depends}
-Recommends: python-recoll, aspell, xdg-utils, xsltproc,
-            python-libxml2, python-libxslt1
+Breaks: recoll (<< 1.23.9)
+Replaces: recoll (<< 1.23.9)
+Depends: python, python3, ${misc:Depends}, ${shlibs:Depends}
+Recommends: aspell,
+            python-future,
+            python-libxml2,
+            python-libxslt1,
+            python-recoll,
+            python3-recoll,
+            xdg-utils,
+            xsltproc
 Suggests: antiword,
          ghostscript,
+          groff,
          libimage-exiftool-perl,
+          libinotifytools0,
          poppler-utils,
          pstotext,
          python-chm,
+          python-lzma,
+          python-mido,
          python-mutagen,
+          python-rarfile,
          unrtf,
-          untex
+          untex,
+          wv
 Description: Command line programs for recoll
 This package supports indexing and command line querying.
 
 Package: recollgui
 Architecture: any
-Breaks: recoll (<< 1.23.4)
-Replaces: recoll (<< 1.23.4)
-Depends: recollcmd (= ${binary:Version}),
-         ${misc:Depends},
-         ${shlibs:Depends}
+Breaks: recoll (<< 1.23.9)
+Replaces: recoll (<< 1.23.9)
+Depends: recollcmd (= ${binary:Version}), ${misc:Depends}, ${shlibs:Depends}
 Description: GUI program and elements for recoll
 Main recoll GUI for configuring, controlling and querying recoll indexes.

--- a/packaging/debian/debian/python-recoll.install
+++ b/packaging/debian/debian/python-recoll.install
@ -1,2 +1,2 @@
-usr/lib/python2*/*-packages/*.egg-info
+usr/lib/python2*/*-packages/Recoll*.egg-info
 usr/lib/python2*/*-packages/recoll/*
--- a/packaging/debian/debian/python3-recoll.install
+++ b/packaging/debian/debian/python3-recoll.install
@ -1,2 +1,2 @@
-usr/lib/python3*/*-packages/*.egg-info
+usr/lib/python3*/*-packages/Recoll*.egg-info
 usr/lib/python3*/*-packages/recoll/*
--- a/packaging/debian/debian/recollcmd.install
+++ b/packaging/debian/debian/recollcmd.install
@ -1,7 +1,9 @@
 usr/bin/recollindex
 usr/bin/recollq
 usr/bin/xadump
-usr/lib/recoll
+usr/lib/*/recoll
+usr/lib/python*/*-packages/recollchm/*
+usr/lib/python*/*-packages/recollchm-*/*
 usr/share/man
 usr/share/recoll/doc
 usr/share/recoll/examples
--- a/packaging/debian/debian/rules
+++ b/packaging/debian/debian/rules
@ -1,96 +1,54 @@
 #!/usr/bin/make -f
+# See debhelper(7) (uncomment to enable)
+# output every command that modifies files on the build system.
+#DH_VERBOSE = 1

-# Uncomment this to turn on verbose mode.
-#export DH_VERBOSE=1
+# see EXAMPLES in dpkg-buildflags(1) and read /usr/share/dpkg/*
+DPKG_EXPORT_BUILDFLAGS = 1
+include /usr/share/dpkg/default.mk

-export DEB_BUILD_MAINT_OPTIONS = hardening=+all
+# see FEATURE AREAS in dpkg-buildflags(1)
+#export DEB_BUILD_MAINT_OPTIONS = hardening=+all

-DEB_HOST_GNU_TYPE   ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)
-DEB_BUILD_GNU_TYPE  ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)
+# see ENVIRONMENT in dpkg-buildflags(1)
+# package maintainers to append CFLAGS
+#export DEB_CFLAGS_MAINT_APPEND  = -Wall -pedantic
+# package maintainers to append LDFLAGS
+#export DEB_LDFLAGS_MAINT_APPEND = -Wl,--as-needed

-CPPFLAGS:=$(shell dpkg-buildflags --get CPPFLAGS)
-CFLAGS:=$(shell dpkg-buildflags --get CFLAGS) $(CPPFLAGS)
-CXXFLAGS:=$(shell dpkg-buildflags --get CXXFLAGS) $(CPPFLAGS)
-LDFLAGS:=$(shell dpkg-buildflags --get LDFLAGS)
+# main packaging script based on dh7 syntax
+%:
+	dh $@ --parallel --with python2 --with python3 --with autotools-dev 
+
+override_dh_auto_configure:
+	dh_auto_configure -- --enable-recollq --enable-xadump

 build3vers := $(shell py3versions -sv)

-#build qt5 UI
-export QT_SELECT := qt5
-
-ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS)))
-	CFLAGS += -O0
-else
-	CFLAGS += -O2
-endif
-
-config.status: configure
-	dh_testdir
-	./configure CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)" \
-		    --host=$(DEB_HOST_GNU_TYPE) \
-		    --build=$(DEB_BUILD_GNU_TYPE) \
-		    --mandir=\$${prefix}/share/man \
-		    --prefix=/usr \
-		    --enable-recollq \
-		    --enable-xadump
-
-build: build-arch build-indep
-build-arch: build-stamp
-build-indep: build-stamp
-build-stamp: config.status
-	dh_testdir
-	$(MAKE)
-	touch $@
-
-clean:
-	dh_testdir
-	dh_testroot
-	rm -f build-stamp config.log
-	[ ! -f Makefile ] || $(MAKE) distclean
-	dh_clean Makefile
-
-install:
-	dh_testdir
-	dh_testroot
-	dh_prep
-	dh_installdirs
-	$(MAKE) STRIP=ls prefix=$(CURDIR)/debian/tmp/usr install
-	# Executable fixes
-	chmod +x $(CURDIR)/debian/tmp/usr/share/recoll/examples/rclmon.sh
-	chmod -x $(CURDIR)/debian/tmp/usr/share/recoll/filters/rclexec1.py
-	chmod -x $(CURDIR)/debian/tmp/usr/share/recoll/filters/rclxslt.py
-	find $(CURDIR) -type f -name '*.la' -exec rm -f '{}' \;
-	(cd python/recoll; python setup.py install \
+override_dh_auto_install:
+	dh_auto_install
+	(cd python/recoll; libdir=/usr/lib/$${DEB_BUILD_MULTIARCH} python \
+					   ./setup.py install \
 	                                   --install-layout=deb \
-	                                   --prefix=$(CURDIR)/debian/tmp/usr )
+					   --prefix=/usr \
+	                                   --root=$(CURDIR)/debian/tmp/usr )
 	set -e && for i in $(build3vers); do \
-	  (cd python/recoll; python$$i ./setup.py install \
-	                                   --install-layout=deb \
-	                                   --prefix=$(CURDIR)/debian/tmp/usr ) ; \
+	(cd python/recoll; libdir=/usr/lib/$${DEB_BUILD_MULTIARCH} python$$i \
+					./setup.py install \
+	                                --install-layout=deb \
+					--prefix=/usr \
+	                                --root=$(CURDIR)/debian/tmp/ ) ; \
 	done
-
-binary-arch: build install
-	dh_testdir
-	dh_testroot
-	dh_installchangelogs ChangeLog
-	dh_installdocs README
-	dh_installman
-	dh_install --sourcedir=debian/tmp
-	dh_makeshlibs
-	dh_python2 -p python-recoll
-	dh_python3 -p python3-recoll
-	dh_link
-	dh_strip
-	dh_compress
-	dh_fixperms
-	dh_lintian
-	dh_installdeb
-	dh_shlibdeps
-	dh_gencontrol
-	dh_md5sums
-	dh_builddeb
-
-binary-indep: build install
-
-binary: binary-indep binary-arch
-.PHONY: build build-arch build-indep clean binary-indep binary-arch binary install
+	(cd python/pychm; python ./setup.py install \
+	                                   --install-layout=deb \
+					   --prefix=/usr \
+	                                   --root=$(CURDIR)/debian/tmp/ )
+	set -e && for i in $(build3vers); do \
+	(cd python/pychm; python$$i ./setup.py install \
+	                                --install-layout=deb \
+					--prefix=/usr \
+	                                --root=$(CURDIR)/debian/tmp/ ) ; \
+	done
+	find $(CURDIR) -type f -name '*.la' -exec rm -f '{}' \;
+	find $(CURDIR) -type f -name '*.pyc' -exec rm -f '{}' \;
+	rm -rf $(CURDIR)/debian/tmp/usr/lib/python*/*/*/__pycache__
--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -58,8 +58,8 @@ bincimapmime/mime-printbody.cc \
 bincimapmime/mime-utils.h \
 bincimapmime/mime.cc \
 bincimapmime/mime.h \
-common/beaglequeuecache.cpp \
-common/beaglequeuecache.h \
+common/webstore.cpp \
+common/webstore.h \
 common/conf_post.h \
 common/cstr.cpp \
 common/cstr.h \
@ -76,10 +76,10 @@ common/unacpp.h \
 common/uproplist.h \
 common/utf8fn.cpp \
 common/utf8fn.h \
-index/beaglequeue.cpp \
-index/beaglequeue.h \
-index/bglfetcher.cpp \
-index/bglfetcher.h \
+index/webqueue.cpp \
+index/webqueue.h \
+index/webqueuefetcher.cpp \
+index/webqueuefetcher.h \
 index/checkretryfailed.cpp \
 index/checkretryfailed.h \
 index/exefetcher.cpp \
@ -236,7 +236,6 @@ utils/rclutil.h \
 utils/rclutil.cpp \
 utils/readfile.cpp \
 utils/readfile.h \
-utils/refcntr.h \
 utils/smallut.cpp \
 utils/smallut.h \
 utils/strmatcher.cpp \
@ -507,11 +506,30 @@ qtgui/xmltosd.cpp \
 qtgui/xmltosd.h \
 \
 python/README.txt \
-python/recoll/Makefile.in \
+python/pychm/AUTHORS \
+python/pychm/COPYING \
+python/pychm/MANIFEST.in \
+python/pychm/README-RECOLL.txt \
+python/pychm/pychm.egg-info \
+python/pychm/pychm.egg-info/PKG-INFO \
+python/pychm/pychm.egg-info/SOURCES.txt \
+python/pychm/pychm.egg-info/dependency_links.txt \
+python/pychm/pychm.egg-info/top_level.txt \
+python/pychm/recollchm \
+python/pychm/recollchm/__init__.py \
+python/pychm/recollchm/__pycache__ \
+python/pychm/recollchm/chm.py \
+python/pychm/recollchm/chmlib.py \
+python/pychm/recollchm/extra.c \
+python/pychm/recollchm/swig_chm.c \
+python/pychm/recollchm/swig_chm.i \
+python/pychm/setup.py.in \
+python/recoll/Makefile \
 python/recoll/pyrclextract.cpp \
 python/recoll/pyrecoll.cpp \
 python/recoll/pyrecoll.h \
 python/recoll/recoll/__init__.py \
+python/recoll/recoll/conftree.py \
 python/recoll/recoll/rclconfig.py \
 python/recoll/setup.py.in \
 python/samples/docdups.py \
@ -538,34 +556,67 @@ VERSION
 # php/00README.txt php/recoll/config.m4 php/recoll/make.sh 
 # php/recoll/php_recoll.h php/recoll/recoll.cpp php/sample/shell.php 

+OPTSFORPYTHON = $(shell test -f /etc/debian_version && echo --install-layout=deb)
+
 if MAKEPYTHON
-all-local: recollpython
+all-local:: recollpython
+install-exec-local:: recollpython-install
+clean-local:: recollpython-clean
 recollpython: librecoll.la
-	${MAKE} -C python/recoll libdir=$(libdir)
-install-exec-local: recollpython-install
+	(cd python/recoll; set -x; \
+        for v in 2 3;do test -n "`which python$${v}`" && \
+        libdir=$(libdir) python$${v} setup.py build; \
+        done \
+        )
 recollpython-install:
-	(cd python/recoll; \
-        if test -f /etc/debian_version ; then \
-            OPTSFORPYTHON=--install-layout=deb; \
-        fi; \
-        set -x; \
-        python setup.py install \
-         --prefix=${prefix} --root=$${DESTDIR:-/} $${OPTSFORPYTHON})
-clean-local: recollpython-clean
+	(cd python/recoll; set -x; \
+        for v in 2 3;do test -n "`which python$${v}`" && \
+        python$${v} setup.py install \
+         --prefix=${prefix} --root=$${DESTDIR:-/} $(OPTSFORPYTHON); \
+        done; \
+         )
 recollpython-clean:
+	rm -f python/recoll/*.pyc
+	rm -rf python/pychm/build
+	rm -rf python/pychm/recollchm.egg-info
+	rm -rf python/pychm/setup.py
+	rm -rf python/recoll/Recoll.egg-info
+	rm -rf python/recoll/__pycache__
 	rm -rf python/recoll/build
 endif

+if MAKEPYTHONCHM
+all-local:: rclpychm
+install-exec-local:: rclpychm-install
+clean-local:: rclpychm-clean
+rclpychm:
+	(cd python/pychm; set -x; \
+        for v in 2 3;do \
+        test -n "`which python$${v}`" && python$${v} setup.py build;\
+        done \
+        )
+rclpychm-install:
+	(cd python/pychm; set -x; \
+        for v in 2 3;do test -n "`which python$${v}`" && \
+        python$${v} setup.py install \
+         --prefix=${prefix} --root=$${DESTDIR:-/} $(OPTSFORPYTHON); \
+        done \
+        )
+rclpychm-clean:
+	rm -rf python/pychm/build
+	rm -rf python/pychm/dist/*
+endif
+
 if MAKEQT
-all-local: recollqt
+all-local:: recollqt
 recollqt: librecoll.la
 	(cd $(QTGUI); ${QMAKE} PREFIX=${prefix} recoll.pro)
 	$(MAKE) -C $(QTGUI) LFLAGS="$(LDFLAGS)" prefix=$(prefix) \
                exec_prefix=$(exec_prefix) libdir=$(libdir)
-clean-local: recollqt-clean
+clean-local:: recollqt-clean
 recollqt-clean:
 	-$(MAKE) -C $(QTGUI) clean
-install-exec-local: recollqt-install
+install-exec-local:: recollqt-install
 recollqt-install:
 	$(MAKE) -C $(QTGUI) LFLAGS="$(LDFLAGS)" INSTALL_ROOT=$(DESTDIR) \
                prefix=$(prefix) exec_prefix=$(exec_prefix) libdir=$(libdir) \
@ -588,9 +639,10 @@ filterdir = $(pkgdatadir)/filters
 filter_DATA = \
 desktop/hotrecoll.py \
 filters/rcl7z \
-filters/rclabw \
+filters/rclabw.py \
 filters/rclaptosidman \
 filters/rclaudio \
+filters/rclbasehandler.py \
 filters/rclbibtex.sh \
 filters/rclcheckneedretry.sh \
 filters/rclchm \
@ -602,9 +654,10 @@ filters/rclepub \
 filters/rclepub1 \
 filters/rclexec1.py \
 filters/rclexecm.py \
-filters/rclfb2 \
+filters/rclfb2.py \
 filters/rclgaim \
-filters/rclgnm \
+filters/rclgenxslt.py \
+filters/rclgnm.py \
 filters/rclics \
 filters/rclimg \
 filters/rclimg.py \
@ -617,17 +670,15 @@ filters/rcllyx \
 filters/rclman \
 filters/rclmidi.py \
 filters/rclpdf.py \
-filters/rclokulnote \
+filters/rclokulnote.py \
 filters/rclopxml.py \
 filters/rclppt.py \
-filters/rclps \
 filters/rclpurple \
 filters/rclpython \
 filters/rclrar \
 filters/rclrtf.py \
 filters/rclscribus \
 filters/rclshowinfo \
-filters/rclsiduxman \
 filters/rclsoff.py \
 filters/rclsoff-flat.py \
 filters/rclsvg.py \
@ -637,7 +688,6 @@ filters/rcltext.py \
 filters/rcluncomp \
 filters/rcluncomp.py \
 filters/rclwar \
-filters/rclwpd \
 filters/rclxls.py \
 filters/rclxml.py \
 filters/rclxmp.py \
@ -648,13 +698,16 @@ filters/ppt-dump.py \
 filters/xls-dump.py \
 filters/xlsxmltocsv.py \
 filters/msodump.zip \
+filters/recollepub.zip \
+python/recoll/recoll/conftree.py \
 python/recoll/recoll/rclconfig.py 

 install-data-hook: 
 	(cd $(DESTDIR)/$(filterdir); \
 	chmod a+x rcl* ppt-dump.py xls-dump.py xlsxmltocsv.py hotrecoll.py; \
 	chmod a+x recoll-we-move-files.py; \
-	chmod 0644 msodump.zip rclexecm.py rcllatinstops.zip rclconfig.py rclmidi.py)
+	chmod 0644 msodump.zip recollepub.zip rclexecm.py rcllatinstops.zip \
+        rclconfig.py conftree.py rclmidi.py)

 if MAKEUSERDOC
 rdocdir = $(pkgdatadir)/doc
--- a/src/VERSION
+++ b/src/VERSION
@ -1 +1 @@
-1.24.1
+1.25.0pre0
--- a/src/common/rclconfig.cpp
+++ b/src/common/rclconfig.cpp
@ -38,6 +38,7 @@
 #include <sstream>
 #include <cstdlib>
 #include <cstring>
+#include <unordered_map>

 #include "cstr.h"
 #include "pathut.h"
@ -70,6 +71,12 @@ bool o_uptodate_test_use_mtime = false;
 string RclConfig::o_localecharset; 
 string RclConfig::o_origcwd; 

+// We build this once. Used to ensure that the suffix used for a temp
+// file of a given MIME type is the FIRST one from the mimemap config
+// file. Previously it was the first in alphabetic (map) order, with
+// sometimes strange results.
+static unordered_map<string, string> mime_suffixes;
+
 // Compute the difference of 1st to 2nd sets and return as plus/minus
 // sets. Some args are std::set and some others stringToString()
 // strings for convenience
@ -316,6 +323,27 @@ RclConfig::RclConfig(const string *argcnf)
 	m_reason = string("No or bad mimemap file in: ") + cnferrloc;
 	return;
    }
+
+    // Maybe create the MIME to suffix association reverse map. Do it
+    // in file order so that we can control what suffix is used when
+    // there are several. This only uses the distributed file, not any
+    // local customization (too complicated).
+    if (mime_suffixes.empty()) {
+        ConfSimple mm(
+            path_cat(path_cat(m_datadir, "examples"), "mimemap").c_str());
+        vector<ConfLine> order = mm.getlines();
+        for (const auto& entry: order) {
+            if (entry.m_kind == ConfLine::CFL_VAR) {
+                LOGDEB1("CONFIG: " << entry.m_data << " -> " << entry.m_value <<
+                        endl);
+                // Remember: insert() only does anything for new keys,
+                // so we only have the first value in the map
+                mime_suffixes.insert(
+                    pair<string,string>(entry.m_value, entry.m_data));
+            }
+        }
+    }
+
    mimeconf = new ConfStack<ConfSimple>("mimeconf", m_cdirs, true);
    if (mimeconf == 0 || !mimeconf->ok()) {
 	m_reason = string("No/bad mimeconf in: ") + cnferrloc;
@ -753,14 +781,20 @@ string RclConfig::getMimeTypeFromSuffix(const string& suff) const

 string RclConfig::getSuffixFromMimeType(const string &mt) const
 {
-    string suffix;
-    vector<string>sfs = mimemap->getNames(cstr_null);
-    string mt1;
-    for (vector<string>::const_iterator it = sfs.begin(); 
-	 it != sfs.end(); it++) {
-	if (mimemap->get(*it, mt1, cstr_null))
-	    if (!stringicmp(mt, mt1))
-		return *it;
+    // First try from standard data, ensuring that we can control the value
+    // from the order in the configuration file.
+    auto rclsuff = mime_suffixes.find(mt);
+    if (rclsuff != mime_suffixes.end()) {
+        return rclsuff->second;
+    }
+    // Try again from local data. The map is in the wrong direction,
+    // have to walk it.
+    vector<string> sfs = mimemap->getNames(cstr_null);
+    for (const auto& suff : sfs) {
+        string mt1;
+	if (mimemap->get(suff, mt1, cstr_null) && !stringicmp(mt, mt1)) {
+            return suff;
+        }
    }
    return cstr_null;
 }
--- a/src/common/syngroups.cpp
+++ b/src/common/syngroups.cpp
@ -68,11 +68,11 @@ SynGroups::SynGroups()

 bool SynGroups::setfile(const string& fn)
 {
-    LOGDEB("SynGroups::setfile("  << (fn) << ")\n" );
+    LOGDEB("SynGroups::setfile(" << fn << ")\n");
    if (!m) {
        m = new Internal;
        if (!m) {
-            LOGERR("SynGroups:setfile:: new Internal failed: no mem ?\n" );
+            LOGERR("SynGroups:setfile:: new Internal failed: no mem ?\n");
            return false;
        }
    }
@ -86,7 +86,7 @@ bool SynGroups::setfile(const string& fn)
    ifstream input;
    input.open(fn.c_str(), ios::in);
    if (!input.is_open()) {
-	LOGERR("SynGroups:setfile:: could not open "  << (fn) << " errno "  << (errno) << "\n" );
+	LOGSYSERR("SynGroups:setfile", "open", fn);
 	return false;
    }	    

@ -101,7 +101,7 @@ bool SynGroups::setfile(const string& fn)
 	getline(input, cline);
 	if (!input.good()) {
 	    if (input.bad()) {
-                LOGERR("Syngroup::setfile("  << (fn) << "):Parse: input.bad()\n" );
+                LOGERR("Syngroup::setfile(" << fn << "):Parse: input.bad()\n");
 		return false;
 	    }
 	    // Must be eof ? But maybe we have a partial line which
@ -142,23 +142,25 @@ bool SynGroups::setfile(const string& fn)

 	vector<string> words;
 	if (!stringToStrings(line, words)) {
-	    LOGERR("SynGroups:setfile: "  << (fn) << ": bad line "  << (lnum) << ": "  << (line) << "\n" );
+	    LOGERR("SynGroups:setfile: " << fn << ": bad line " << lnum <<
+                   ": " << line << "\n");
 	    continue;
 	}

 	if (words.empty())
 	    continue;
 	if (words.size() == 1) {
-	    LOGERR("Syngroup::setfile("  << (fn) << "):single term group at line "  << (lnum) << " ??\n" );
+	    LOGERR("Syngroup::setfile(" << fn << "):single term group at line "
+                   << lnum << " ??\n");
 	    continue;
 	}

 	m->groups.push_back(words);
-	for (vector<string>::const_iterator it = words.begin();
-	     it != words.end(); it++) {
-	    m->terms[*it] = m->groups.size()-1;
+	for (const auto& word : words) {
+	    m->terms[word] = m->groups.size()-1;
 	}
-	LOGDEB1("SynGroups::setfile: group: ["  << (stringsToString(m->groups.back())) << "]\n" );
+	LOGDEB1("SynGroups::setfile: group: [" <<
+                stringsToString(m->groups.back()) << "]\n");
    }
    m->ok = true;
    return true;
@ -170,16 +172,15 @@ vector<string> SynGroups::getgroup(const string& term)
    if (!ok())
 	return ret;

-    std::unordered_map<string, unsigned int>::const_iterator it1 =
-        m->terms.find(term);
+    const auto it1 = m->terms.find(term);
    if (it1 == m->terms.end()) {
-	LOGDEB1("SynGroups::getgroup: ["  << (term) << "] not found in direct map\n" );
+	LOGDEB1("SynGroups::getgroup: [" << term<<"] not found in direct map\n");
 	return ret;
    }

    unsigned int idx = it1->second;
    if (idx >= m->groups.size()) {
-        LOGERR("SynGroups::getgroup: line index higher than line count !\n" );
+        LOGERR("SynGroups::getgroup: line index higher than line count !\n");
        return ret;
    }
    return m->groups[idx];
--- a/src/common/syngroups.h
+++ b/src/common/syngroups.h
@ -28,14 +28,17 @@ class SynGroups {
 public:
    SynGroups();
    ~SynGroups();
+    SynGroups(const SynGroups&) = delete;
+    SynGroups& operator=(const SynGroups&) = delete;
+    SynGroups(const SynGroups&&) = delete;
+    SynGroups& operator=(const SynGroups&&) = delete;
+
    bool setfile(const std::string& fname);
    std::vector<std::string> getgroup(const std::string& term);
    bool ok();
 private:
    class Internal;
    Internal *m;
-    SynGroups(const SynGroups&);
-    SynGroups& operator=(const SynGroups&);
 };

 #endif /* _SYNGROUPS_H_INCLUDED_ */
--- a/src/common/beaglequeuecache.cpp
+++ b/src/common/beaglequeuecache.cpp
@ -17,10 +17,11 @@

 #include "autoconfig.h"

+#include "webstore.h"
+
 #include <stdint.h>

 #include "cstr.h"
-#include "beaglequeuecache.h"
 #include "circache.h"
 #include "log.h"
 #include "rclconfig.h"
@ -29,42 +30,43 @@

 const string cstr_bgc_mimetype("mimetype");

-BeagleQueueCache::BeagleQueueCache(RclConfig *cnf) 
+WebStore::WebStore(RclConfig *cnf) 
 {
    string ccdir = cnf->getWebcacheDir();

    int maxmbs = 40;
    cnf->getConfParam("webcachemaxmbs", &maxmbs);
    if ((m_cache = new CirCache(ccdir)) == 0) {
-	LOGERR("BeagleQueueCache: cant create CirCache object\n" );
+	LOGERR("WebStore: cant create CirCache object\n" );
 	return;
    }
    if (!m_cache->create(int64_t(maxmbs)*1000*1024, CirCache::CC_CRUNIQUE)) {
-	LOGERR("BeagleQueueCache: cache file creation failed: "  << (m_cache->getReason()) << "\n" );
+	LOGERR("WebStore: cache file creation failed: " <<
+               m_cache->getReason() << "\n");
 	delete m_cache;
 	m_cache = 0;
 	return;
    }
 }

-BeagleQueueCache::~BeagleQueueCache()
+WebStore::~WebStore()
 {
    delete m_cache;
 }

 // Read  document from cache. Return the metadata as an Rcl::Doc
-// @param htt Beagle Hit Type 
-bool BeagleQueueCache::getFromCache(const string& udi, Rcl::Doc &dotdoc, 
+// @param htt Web Hit Type 
+bool WebStore::getFromCache(const string& udi, Rcl::Doc &dotdoc, 
 				    string& data, string *htt)
 {
    string dict;

    if (m_cache == 0) {
-	LOGERR("BeagleQueueCache::getFromCache: cache is null\n" );
+	LOGERR("WebStore::getFromCache: cache is null\n");
 	return false;
    }
    if (!m_cache->get(udi, dict, &data)) {
-	LOGDEB("BeagleQueueCache::getFromCache: get failed\n" );
+	LOGDEB("WebStore::getFromCache: get failed\n");
 	return false;
    }

--- a/src/common/beaglequeuecache.h
+++ b/src/common/beaglequeuecache.h
@ -14,11 +14,10 @@
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
-#ifndef _beaglequeuecache_h_included_
-#define _beaglequeuecache_h_included_
+#ifndef _webstore_h_included_
+#define _webstore_h_included_

 #include <string>
-using std::string;

 class RclConfig;
 namespace Rcl {
@ -28,23 +27,24 @@ namespace Rcl {
 class CirCache;

 /**
- * Manage the CirCache for the Beagle Queue indexer. Separated from the main
+ * Manage the CirCache for the Web Queue indexer. Separated from the main
 * indexer code because it's also used for querying (getting the data for a
 * preview 
 */
-class BeagleQueueCache {
+class WebStore {
 public:
-    BeagleQueueCache(RclConfig *config);
-    ~BeagleQueueCache();
+    WebStore(RclConfig *config);
+    ~WebStore();

-    bool getFromCache(const string& udi, Rcl::Doc &doc, string& data,
-                      string *hittype = 0);
+    bool getFromCache(const std::string& udi, Rcl::Doc &doc, std::string& data,
+                      std::string *hittype = 0);
    // We could write proxies for all the circache ops, but why bother?
    CirCache *cc() {return m_cache;}

 private:
    CirCache *m_cache;
 };
-extern const string cstr_bgc_mimetype;

-#endif /* _beaglequeuecache_h_included_ */
+extern const std::string cstr_bgc_mimetype;
+
+#endif /* _webstore_h_included_ */
--- a/src/configure.ac
+++ b/src/configure.ac
@ -252,6 +252,13 @@ fi

 AM_CONDITIONAL(MAKEPYTHON, [test X$pythonEnabled = Xyes])

+# Disable building the libchm python wrapper
+AC_ARG_ENABLE(python-chm, AC_HELP_STRING([--disable-python-chm],
+    [Do not build the libchm Python wrapper.]),
+    pythonChmEnabled=$enableval, pythonChmEnabled=yes)
+
+AM_CONDITIONAL(MAKEPYTHONCHM, [test X$pythonChmEnabled = Xyes])
+

 AC_CHECK_FUNCS(mkdtemp)
 AC_CHECK_LIB([pthread], [pthread_create], [], [])
@ -523,6 +530,6 @@ AC_SUBST(RCLLIBVERSION)
 AC_CONFIG_FILES(Makefile)
 AC_CONFIG_FILES(common/rclversion.h)
 AC_CONFIG_FILES(python/recoll/setup.py)
-AC_CONFIG_FILES(python/recoll/Makefile)
+AC_CONFIG_FILES(python/pychm/setup.py)

 AC_OUTPUT
--- a/src/desktop/hotrecoll.py
+++ b/src/desktop/hotrecoll.py
@ -1,4 +1,7 @@
-#!/usr/bin/python
+#!/usr/bin/python2
+#
+# wnck does not have a python3 binding as far as I can see (or at
+# least it's not packaged by, e.g. Debian. So python2 only for now.
 #
 # This script should be linked to a keyboard shortcut. Under gnome,
 # you can do this from the main preferences menu, or directly execute
--- a/src/desktop/recoll-searchgui.desktop
+++ b/src/desktop/recoll-searchgui.desktop
@ -1,8 +1,10 @@
 [Desktop Entry]
-Categories=Utility;Filesystem;Database;
+Categories=Qt;Utility;Filesystem;Database;
 Comment=Find documents by specifying search terms
+Comment[ru]=ðÏÉÓË ÄÏËÕÍÅÎÔÏ× ÐÏ ÚÁÄÁÎÎÙÍ ÕÓÌÏ×ÉÑÍ
 Exec=recoll
 GenericName=Local Text Search
+GenericName[ru]=ìÏËÁÌØÎÙÊ ÔÅËÓÔÏ×ÙÊ ÐÏÉÓË
 Icon=recoll
 Name=Recoll
 Terminal=false
--- a/src/doc/user/usermanual.html
+++ b/src/doc/user/usermanual.html
@ -10,7 +10,7 @@
  <link rel="stylesheet" type="text/css" href="docbook-xsl.css">
  <meta name="generator" content="DocBook XSL Stylesheets V1.79.1">
  <meta name="description" content=
-  "Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license can be found at the following location: GNU web site. This document introduces full text search notions and describes the installation and use of the Recoll application. This version describes Recoll 1.23.">
+  "Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license can be found at the following location: GNU web site. This document introduces full text search notions and describes the installation and use of the Recoll application. This version describes Recoll 1.24.">
 </head>
 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084"
 alink="#0000FF">
@ -53,7 +53,7 @@ alink="#0000FF">
            and describes the installation and use of the
            <span class="application">Recoll</span> application.
            This version describes <span class=
-            "application">Recoll</span> 1.23.</p>
+            "application">Recoll</span> 1.24.</p>
          </div>
        </div>
      </div>
@ -376,9 +376,15 @@ alink="#0000FF">
                <dt><span class="sect2">6.3.1. <a href=
                "#RCL.INSTALL.BUILDING.PREREQS">Prerequisites</a></span></dt>
                <dt><span class="sect2">6.3.2. <a href=
-                "#RCL.INSTALL.BUILDING.BUILD">Building</a></span></dt>
+                "#RCL.INSTALL.BUILDING.BUILDING">Building</a></span></dt>
                <dt><span class="sect2">6.3.3. <a href=
-                "#RCL.INSTALL.BUILDING.INSTALL">Installation</a></span></dt>
+                "#RCL.INSTALL.BUILDING.INSTALL">Installing</a></span></dt>
+                <dt><span class="sect2">6.3.4. <a href=
+                "#RCL.INSTALL.BUILDING.PYTHON">Python API
+                package</a></span></dt>
+                <dt><span class="sect2">6.3.5. <a href=
+                "#RCL.INSTALL.BUILDING.SOLARIS">Building on
+                Solaris</a></span></dt>
              </dl>
            </dd>
            <dt><span class="sect1">6.4. <a href=
@ -428,7 +434,7 @@ alink="#0000FF">
      <p>This document introduces full text search notions and
      describes the installation and use of the <span class=
      "application">Recoll</span> application. It is updated for
-      <span class="application">Recoll</span> 1.23.</p>
+      <span class="application">Recoll</span> 1.24.</p>
      <p><span class="application">Recoll</span> was for a long
      time dedicated to Unix-like systems. It was only lately
      (2015) ported to <span class="application">MS-Windows</span>.
@ -2128,8 +2134,8 @@ alink="#0000FF">
        grow quite big, depending on the log level.</p>
        <p>When building <span class="application">Recoll</span>,
        the real time indexing support can be customised during
-        package <a class="link" href="#RCL.INSTALL.BUILDING.BUILD"
-        title="6.3.2.&nbsp;Building">configuration</a> with the
+        package <a class="link" href="#RCL.INSTALL.BUILDING" title=
+        "6.3.&nbsp;Building from source">configuration</a> with the
        <code class="option">--with[out]-fam</code> or <code class=
        "option">--with[out]-inotify</code> options. The default is
        currently to include <span class=
@ -6170,31 +6176,13 @@ recollindex -c "$confdir"
          here. A paragraph at the end of this section will explain
          a few differences and ways to write code compatible with
          both versions.</p>
-          <p>The Python interface can be found in the source
-          package, under <code class=
-          "filename">python/recoll</code>.</p>
-          <p>The <code class="filename">python/recoll/</code>
-          directory contains the usual <code class=
-          "filename">setup.py</code>. After configuring the main
-          <span class="application">Recoll</span> code, you can use
-          the script to build and install the Python module:</p>
-          <pre class="screen">
-          <strong class=
-"userinput"><code>cd recoll-xxx/python/recoll</code></strong>
-          <strong class=
-"userinput"><code>python setup.py build</code></strong>
-          <strong class=
-"userinput"><code>python setup.py install</code></strong>
-        </pre>
-          <p>As of <span class="application">Recoll</span> 1.19,
-          the module can be compiled for Python3.</p>
-          <p>The normal <span class="application">Recoll</span>
-          installer installs the Python2 API along with the main
-          code. The Python3 version must be explicitely built and
-          installed.</p>
-          <p>When installing from a repository, and depending on
-          the distribution, the Python API can sometimes be found
-          in a separate package.</p>
+          <p>There is a good chance that your system repository has
+          packages for the Recoll Python API, sometimes in a
+          package separate from the main one (maybe named something
+          like python-recoll). Else refer to the <a class="link"
+          href="#RCL.INSTALL.BUILDING" title=
+          "6.3.&nbsp;Building from source">Building from source
+          chapter</a>.</p>
          <p>As an introduction, the following small sample will
          run a query and list the title and url for each of the
          results. It would work with <span class=
@ -6216,6 +6204,13 @@ recollindex -c "$confdir"
        for doc in results:
        print(doc.url, doc.title)
        </pre>
+          <p>You can also take a look at the source for the
+          <a class="ulink" href=
+          "https://github.com/koniu/recoll-webui" target=
+          "_top">Recoll WebUI</a>, or the <a class="ulink" href=
+          "https://opensourceprojects.eu/p/upmpdcli/code/ci/c8c8e75bd181ad9db2df14da05934e53ca867a06/tree/src/mediaserver/cdplugins/uprcl/uprclfolders.py"
+          target="_top">upmpdcli local media server</a>, which are
+          both based on the Python API.</p>
        </div>
        <div class="sect2">
          <div class="titlepage">
@ -7575,8 +7570,8 @@ for i in range(nres):
            <div>
              <div>
                <h3 class="title"><a name=
-                "RCL.INSTALL.BUILDING.BUILD" id=
-                "RCL.INSTALL.BUILDING.BUILD"></a>6.3.2.&nbsp;Building</h3>
+                "RCL.INSTALL.BUILDING.BUILDING" id=
+                "RCL.INSTALL.BUILDING.BUILDING"></a>6.3.2.&nbsp;Building</h3>
              </div>
            </div>
          </div>
@ -7718,7 +7713,7 @@ for i in range(nres):
          <strong class=
 "userinput"><code>(practices usual hardship-repelling invocations)</code></strong>
        </pre>
-          <p>When building from source cloned from the BitBucket
+          <p>When building from source cloned from the git
          repository, you also need to install <span class=
          "application">autoconf</span>, <span class=
          "application">automake</span>, and <span class=
@ -7726,29 +7721,6 @@ for i in range(nres):
          <code class="literal">sh autogen.sh</code> in the top
          source directory before running <code class=
          "literal">configure</code>.</p>
-          <div class="sect3">
-            <div class="titlepage">
-              <div>
-                <div>
-                  <h4 class="title"><a name=
-                  "RCL.INSTALL.BUILDING.BUILD.SOLARIS" id=
-                  "RCL.INSTALL.BUILDING.BUILD.SOLARIS"></a>6.3.2.1.&nbsp;Building
-                  on Solaris</h4>
-                </div>
-              </div>
-            </div>
-            <p>We did not test building the GUI on Solaris for
-            recent versions. You will need at least Qt 4.4. There
-            are some hints on <a class="ulink" href=
-            "http://www.recoll.org/download-1.14.html" target=
-            "_top">an old web site page</a>, they may still be
-            valid.</p>
-            <p>Someone did test the 1.19 indexer and Python module
-            build, they do work, with a few minor glitches. Be sure
-            to use GNU <span class=
-            "command"><strong>make</strong></span> and <span class=
-            "command"><strong>install</strong></span>.</p>
-          </div>
        </div>
        <div class="sect2">
          <div class="titlepage">
@ -7756,7 +7728,7 @@ for i in range(nres):
              <div>
                <h3 class="title"><a name=
                "RCL.INSTALL.BUILDING.INSTALL" id=
-                "RCL.INSTALL.BUILDING.INSTALL"></a>6.3.3.&nbsp;Installation</h3>
+                "RCL.INSTALL.BUILDING.INSTALL"></a>6.3.3.&nbsp;Installing</h3>
              </div>
            </div>
          </div>
@ -7769,6 +7741,66 @@ for i in range(nres):
          to <code class="filename"><em class=
          "replaceable"><code>prefix</code></em>/share/recoll</code>.</p>
        </div>
+        <div class="sect2">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h3 class="title"><a name=
+                "RCL.INSTALL.BUILDING.PYTHON" id=
+                "RCL.INSTALL.BUILDING.PYTHON"></a>6.3.4.&nbsp;Python
+                API package</h3>
+              </div>
+            </div>
+          </div>
+          <p>The Python interface can be found in the source tree,
+          under the <code class="filename">python/recoll</code>
+          directory.</p>
+          <p>As of <span class="application">Recoll</span> 1.19,
+          the module can be compiled for Python3.</p>
+          <p>The normal <span class="application">Recoll</span>
+          build procedure (see above) installs the API package for
+          the default system version (python) along with the main
+          code. The package for other Python versions (e.g. python3
+          if the system default is python2) must be explicitely
+          built and installed.</p>
+          <p>The <code class="filename">python/recoll/</code>
+          directory contains the usual <code class=
+          "filename">setup.py</code>. After configuring and
+          building the main <span class="application">Recoll</span>
+          code, you can use the script to build and install the
+          Python module:</p>
+          <pre class="screen">
+          <strong class=
+"userinput"><code>cd recoll-xxx/python/recoll</code></strong>
+          <strong class=
+"userinput"><code>pythonX setup.py build</code></strong>
+          <strong class=
+"userinput"><code>sudo pythonX setup.py install</code></strong>
+        </pre>
+        </div>
+        <div class="sect2">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h3 class="title"><a name=
+                "RCL.INSTALL.BUILDING.SOLARIS" id=
+                "RCL.INSTALL.BUILDING.SOLARIS"></a>6.3.5.&nbsp;Building
+                on Solaris</h3>
+              </div>
+            </div>
+          </div>
+          <p>We did not test building the GUI on Solaris for recent
+          versions. You will need at least Qt 4.4. There are some
+          hints on <a class="ulink" href=
+          "http://www.recoll.org/download-1.14.html" target=
+          "_top">an old web site page</a>, they may still be
+          valid.</p>
+          <p>Someone did test the 1.19 indexer and Python module
+          build, they do work, with a few minor glitches. Be sure
+          to use GNU <span class=
+          "command"><strong>make</strong></span> and <span class=
+          "command"><strong>install</strong></span>.</p>
+        </div>
      </div>
      <div class="sect1">
        <div class="titlepage">
--- a/src/doc/user/usermanual.xml
+++ b/src/doc/user/usermanual.xml
@ -5,7 +5,7 @@

 <!ENTITY RCL "<application>Recoll</application>">
 <!ENTITY RCLAPPS "<ulink url='http://www.recoll.org/features.html#doctypes'>http://www.recoll.org/features.html</ulink>">
-<!ENTITY RCLVERSION "1.23">
+<!ENTITY RCLVERSION "1.24">
 <!ENTITY XAP "<application>Xapian</application>">
 <!ENTITY WIN "<application>Windows</application>">
 <!ENTITY FAQS "https://www.lesbonscomptes.com/recoll/faqsandhowtos/">
@ -1470,7 +1470,7 @@

      <para>When building &RCL;, the real time indexing support can be
      customised during package <link
-      linkend="RCL.INSTALL.BUILDING.BUILD">configuration</link> with
+      linkend="RCL.INSTALL.BUILDING">configuration</link> with
      the <option>--with[out]-fam</option> or
      <option>--with[out]-inotify</option> options.  The default is
      currently to include <application>inotify</application>
@ -4817,30 +4817,11 @@ recollindex -c "$confdir"
        paragraph at the end of this section will explain a few differences
        and ways to write code compatible with both versions.</para>

-        <para>The Python interface can be found in the source package,
-        under <filename>python/recoll</filename>.</para>
-
-        <para>The <filename>python/recoll/</filename> directory
-        contains the usual <filename>setup.py</filename>. After
-        configuring the main &RCL; code, you can use the script to
-        build and install the Python module:
-        <screen>
-          <userinput>cd recoll-xxx/python/recoll</userinput>
-          <userinput>python setup.py build</userinput>
-          <userinput>python setup.py install</userinput>
-        </screen>
-        </para> 
-
-        <para>As of &RCL; 1.19, the module can be compiled for
-        Python3.</para>
-
-        <para>The normal &RCL; installer installs the Python2
-        API along with the main code. The Python3 version must be
-        explicitely built and installed.</para>
-
-        <para>When installing from a repository, and depending on the
-        distribution, the Python API can sometimes be found in a
-        separate package.</para>
+        <para>There is a good chance that your system repository has
+        packages for the Recoll Python API, sometimes in a package separate
+        from the main one (maybe named something like python-recoll).  Else
+        refer to the <link linkend="RCL.INSTALL.BUILDING">Building from
+        source chapter</link>.</para>

        <para>As an introduction, the following small sample will run a
        query and list the title and url for each of the results. It would
@ -4863,6 +4844,11 @@ recollindex -c "$confdir"
        print(doc.url, doc.title)
        ]]></programlisting>

+        <para>You can also take a look at the source for the <ulink
+        url="https://github.com/koniu/recoll-webui">Recoll
+        WebUI</ulink>, or the <ulink url="https://opensourceprojects.eu/p/upmpdcli/code/ci/c8c8e75bd181ad9db2df14da05934e53ca867a06/tree/src/mediaserver/cdplugins/uprcl/uprclfolders.py">upmpdcli local media server</ulink>, which are both
+        based on the Python API.</para>
+        
      </sect2>
      
      <sect2 id="RCL.PROGRAM.PYTHONAPI.ELEMENTS">
@ -5894,7 +5880,7 @@ for i in range(nres):

      </sect2>

-      <sect2 id="RCL.INSTALL.BUILDING.BUILD">
+      <sect2 id="RCL.INSTALL.BUILDING.BUILDING">
        <title>Building</title>

        <para>&RCL; has been built on Linux, FreeBSD, Mac OS X, and Solaris,
@ -6010,30 +5996,16 @@ for i in range(nres):
          <userinput>(practices usual hardship-repelling invocations)</userinput>
        </screen>

-        <para>When building from source cloned from the BitBucket repository,
+        <para>When building from source cloned from the git repository,
        you also need to install <application>autoconf</application>,
        <application>automake</application>, and
        <application>libtool</application> and you must execute <literal>sh
        autogen.sh</literal> in the top source directory before running
        <literal>configure</literal>.</para>
-        
-        <sect3 id="RCL.INSTALL.BUILDING.BUILD.SOLARIS">
-          <title>Building on Solaris</title>
-
-          <para>We did not test building the GUI on Solaris for recent
-          versions. You will need at least Qt 4.4. There are some hints
-          on <ulink url="http://www.recoll.org/download-1.14.html">an old
-          web site page</ulink>, they may still be valid.</para>
-
-          <para>Someone did test the 1.19 indexer and Python module build,
-          they do work, with a few minor glitches. Be sure to use
-          GNU <command>make</command> and <command>install</command>.</para> 
-        </sect3>
-
      </sect2>
-
+      
      <sect2 id="RCL.INSTALL.BUILDING.INSTALL">
-        <title>Installation</title>
+        <title>Installing</title>
        
        <para>Use <userinput>make install</userinput>
        in the root 
@ -6045,6 +6017,48 @@ for i in range(nres):
        </para>

      </sect2>
+
+      <sect2 id="RCL.INSTALL.BUILDING.PYTHON">
+        <title>Python API package</title>
+
+        <para>The Python interface can be found in the source tree,
+        under the <filename>python/recoll</filename> directory.</para>
+
+        <para>As of &RCL; 1.19, the module can be compiled for
+        Python3.</para>
+
+        <para>The normal &RCL; build procedure (see above) installs the API
+        package for the default system version (python) along with the main
+        code. The package for other Python versions (e.g. python3 if the
+        system default is python2) must be explicitely built and
+        installed.</para>
+
+        <para>The <filename>python/recoll/</filename> directory contains
+        the usual <filename>setup.py</filename>. After configuring and
+        building the main &RCL; code, you can use the script to build and
+        install the Python module:
+        <screen>
+          <userinput>cd recoll-xxx/python/recoll</userinput>
+          <userinput>pythonX setup.py build</userinput>
+          <userinput>sudo pythonX setup.py install</userinput>
+        </screen>
+        </para> 
+
+      </sect2>
+
+      <sect2 id="RCL.INSTALL.BUILDING.SOLARIS">
+        <title>Building on Solaris</title>
+
+        <para>We did not test building the GUI on Solaris for recent
+        versions. You will need at least Qt 4.4. There are some hints
+        on <ulink url="http://www.recoll.org/download-1.14.html">an old
+        web site page</ulink>, they may still be valid.</para>
+
+        <para>Someone did test the 1.19 indexer and Python module build,
+        they do work, with a few minor glitches. Be sure to use
+        GNU <command>make</command> and <command>install</command>.</para> 
+      </sect2>
+
    </sect1>

    <sect1 id="RCL.INSTALL.CONFIG">
--- a/src/filters/ppt-dump.py
+++ b/src/filters/ppt-dump.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 #
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
--- a/src/filters/rcl7z
+++ b/src/filters/rcl7z
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3

 # 7-Zip file filter for Recoll

--- a/src/filters/rclabw.py
+++ b/src/filters/rclabw.py
@ -1,91 +1,28 @@
-#!/bin/sh
-# @(#$Id: rclabw,v 1.3 2008-10-08 08:27:34 dockes Exp $  (C) 2004 J.F.Dockes
-# Parts taken from Estraier:
-#================================================================
-# Estraier: a personal full-text search system
-# Copyright (C) 2003-2004 Mikio Hirabayashi
-#================================================================
-#================================================================
-# Extract text from an abiword file
-#================================================================
+#!/usr/bin/env python3
+# Copyright (C) 2014 J.F.Dockes
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the
+#   Free Software Foundation, Inc.,
+#   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+######################################

-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname="rclabw"
-filetype=abiword
+from __future__ import print_function

+import sys
+import rclexecm
+import rclgenxslt

-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
-
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
-
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
-    echo RECFILTERROR $*
-    # Also alert on stderr just in case
-    echo ":2:$progname::: $*" 1>&2
-    exit 1
-}
-
-iscmd()
-{
-    cmd=$1
-    case $cmd in
-    */*)
-	if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;;
-    *)
-      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
-      for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done
-      return 1 ;;
-    esac
-}
-
-checkcmds()
-{
-    for cmd in $*;do
-      if iscmd $cmd 
-      then 
-        a=1
-      else 
-        senderror HELPERNOTFOUND $cmd
-      fi
-    done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help" 
-then
-  echo "Convert a $filetype file to HTML text for Recoll indexing."
-  echo "Usage: $progname [infile]"
-  exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
-  senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-checkcmds xsltproc
-
-xsltproc --nonet --novalid - "$infile" <<EOF
-<?xml version="1.0"?>
+stylesheet_all = '''<?xml version="1.0"?>
 <xsl:stylesheet version="1.0"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  xmlns:ab="http://www.abisource.com/awml.dtd" 
@ -173,7 +110,9 @@ xsltproc --nonet --novalid - "$infile" <<EOF
 </xsl:template>

 </xsl:stylesheet>
-EOF
+'''

-# exit normally
-exit 0
+if __name__ == '__main__':
+    proto = rclexecm.RclExecM()
+    extract = rclgenxslt.XSLTExtractor(proto, stylesheet_all)
+    rclexecm.main(proto, extract)
--- a/src/filters/rclaudio
+++ b/src/filters/rclaudio
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3

 # Audio tag filter for Recoll, using mutagen

@ -164,6 +164,13 @@ tagdict = {
    '\xa9wrt' : 'COMPOSER',
    }

+def tobytes(s):
+    if type(s) == type(b''):
+        return s
+    if type(s) != type(u''):
+        s = str(s)
+    return s.encode('utf-8', errors='replace')
+    
 # mp3:      album, title, artist, genre, date, tracknumber
 # flac:     album, title, artist, genre, xxx, tracknumber
 # oggvorbis:album, title, artist, genre, date, tracknumber
@ -236,6 +243,7 @@ class AudioTagExtractor:
        filename = params["filename:"]
        mimetype = params["mimetype:"]
        self.filename = filename
+        #self.em.rclog("%s" % filename)
        try:
            mutf = File(filename)
        except Exception as err:
@ -247,6 +255,7 @@ class AudioTagExtractor:
        ###################
        # Extract audio parameters. Not all file types supply all or
        # even use the same property names...
+        # minf has natural str keys, and encoded values
        minf = {}
        for prop,dflt in [('sample_rate', 44100), ('channels', 2),
                          ('length', 0), ('bitrate', 0)]:
@ -258,7 +267,7 @@ class AudioTagExtractor:

        if minf['bitrate'] == 0 and minf['length'] > 0:
            br = int(os.path.getsize(filename)* 8 / minf['length'])
-            minf['bitrate'] = str(br)
+            minf['bitrate'] = br

        minf['duration'] = minf['length']
        del minf['length']
@ -274,41 +283,37 @@ class AudioTagExtractor:
                minf['bits_per_sample'] = 16

        for tag,val in minf.items():
-            minf[tag] = str(val)
-
-        #self.em.rclog("minf after audio %s\n" % minf)
-        
+            minf[tag] = tobytes(val)
+            
        ####################
        # Metadata tags. The names vary depending on the file type. We
        # just have a big translation dictionary for all
        for tag,val in mutf.items():
-            #self.em.rclog("Original tag: <%s>, val <%s>" % (tag, val))
            if tag.upper() in tagdict:
                tag = tag.upper()
            if tag in tagdict:
+                #self.em.rclog("Original tag: <%s>, type0 %s val <%s>" %
+                #              (tag, type(val), val))
+                # Some file types return lists of value (e.g. FLAC)
+                try:
+                    val = " ".join(val)
+                    #self.em.rclog("Joined tag: <%s>, type0 %s val <%s>" %
+                    #              (tag, type(val), val))
+                except:
+                    pass
                ntag = tagdict[tag].lower()
                #self.em.rclog("New tag: %s" % ntag)
                try:
-                    if isinstance(val, bool):
-                        val0 = str(val)
-                    else:
-                        try:
-                            val0 = val[0]
-                        except:
-                            val0 = val
-                    if val0:
-                        if type(val0) == type(u""):
-                            val0 = val0.encode('utf-8', errors='replace')
-                        else:
-                            val0 = str(val0)
-                        minf[ntag] = val0
-                        #self.em.rclog("Tag %s -> %s" % (ntag, val0))
+                    minf[ntag] = tobytes(val)
+                    #self.em.rclog("Tag %s -> %s" % (ntag, val))
                except Exception as err:
                    self.em.rclog("Error while extracting tag: %s"%err)
            else:
                #self.em.rclog("Unprocessed tag: %s, value %s"%(tag,val))
                pass

+        #self.em.rclog("minf after extract %s\n" % minf)
+
        # TPA,TPOS,disc DISCNUMBER/TOTALDISCS
        # TRCK,TRK,trkn TRACKNUMBER/TOTALTRACKS
        for what in ('disc', 'track'):
@ -322,16 +327,17 @@ class AudioTagExtractor:
                    else:
                        l = l.split(b'/')
                else:
-                    self.em.rclog("l is tuple: %s" %l)
+                    self.em.rclog("l is tuple: %s tp1 %s tp2 %S" %
+                                  (l, type(l[0]), type(l[1])))
                if len(l) == 2:
-                    minf[k] = str(l[0])
+                    minf[k] = l[0]
                    #self.em.rclog("minf[%s] = %s" % (k, minf[k]))
                    if l[1] != 0:
-                        minf['total' + what + 's'] = str(l[1])
+                        minf['total' + what + 's'] = l[1]

        if 'orchestra' in minf:
            val = minf['orchestra']
-            if val.startswith('orchestra='):
+            if val.startswith(b'orchestra='):
                minf['orchestra'] = val[10:]
                
        #self.em.rclog("minf after tags %s\n" % minf)
@ -340,7 +346,7 @@ class AudioTagExtractor:
        embdimg = self._embeddedImageFormat(mutf)
        if embdimg:
            #self.em.rclog("Embedded image format: %s" % embdimg)
-            minf["embdimg"] = embdimg
+            embdimg = tobytes(embdimg)
        
        self.em.setmimetype("text/plain")
        self.em.setfield("charset", 'utf-8')
@ -353,7 +359,7 @@ class AudioTagExtractor:
                self.em.setfield('author', val)    

        try:
-            docdata = mutf.pprint().encode('utf-8', errors='replace')
+            docdata = tobytes(mutf.pprint())
        except Exception as err:
            self.em.rclog("Doc pprint error: %s" % err)

--- a/src/filters/rclbasehandler.py
+++ b/src/filters/rclbasehandler.py
@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+# Copyright (C) 2016 J.F.Dockes
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+# Base for extractor classes. With some common generic implementations
+# for the boilerplate functions.
+
+from __future__ import print_function
+
+import os
+import sys
+import rclexecm
+
+class RclBaseHandler(object):
+    def __init__(self, em):
+        self.em = em
+
+
+    def extractone(self, params):
+        #self.em.rclog("extractone %s %s" % (params["filename:"], \
+        #params["mimetype:"]))
+        if not "filename:" in params:
+            self.em.rclog("extractone: no file name")
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+        fn = params["filename:"]
+
+        try:
+            html = self.html_text(fn)
+        except Exception as err:
+            self.em.rclog("RclBaseDumper: %s : %s" % (fn, err))
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+
+        self.em.setmimetype('text/html')
+        return (True, html, "", rclexecm.RclExecM.eofnext)
+        
+
+    ###### File type handler api, used by rclexecm ---------->
+    def openfile(self, params):
+        self.currentindex = 0
+        return True
+
+    def getipath(self, params):
+        return self.extractone(params)
+
+    def getnext(self, params):
+        if self.currentindex >= 1:
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+        else:
+            ret= self.extractone(params)
+            self.currentindex += 1
+            return ret
--- a/src/filters/rclchm
+++ b/src/filters/rclchm
@ -1,12 +1,9 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 """Extract Html files from a Microsoft Compiled Html Help file (.chm)
 Needs at least python 2.2 for HTMLParser (chmlib needs 2.2 too)"""

 from __future__ import print_function

-# Note: this is not converted to Py3, libchm does not have a
-# Py3 wrapper at this point (2018-03)
-
 rclchm_html_mtype = "text/html"

 import sys
@ -18,21 +15,32 @@ if PY3:
    from urllib.parse import unquote as urllib_unquote
    from urllib.parse import urlparse as urlparse_urlparse
    from html.parser import HTMLParser
+    chmpackname = 'pychm3.egg'
 else:
    from urlparse import urlparse as urlparse_urlparse
    from urllib import unquote as urllib_unquote
    from HTMLParser import HTMLParser
-    
+    chmpackname = 'pychm2.egg'
+
 import subprocess

 import rclconfig
 import rclexecm

+# pychm has no official port to Python3, hence no package in the
+# standard place.  Recoll bundles a python3 port which we install out
+# of the standard python places. Look for it:
+#  sys.path[0] is for MSW, where we install the egg in the filters
+#  directory? TBD for now
 try:
+    # First try the system version if any
    from chm import chm,chmlib
 except:
-    print("RECFILTERROR HELPERNOTFOUND python:chm")
-    sys.exit(1);
+    try:
+        from recollchm import chm,chmlib
+    except:
+        print("RECFILTERROR HELPERNOTFOUND python:chm")
+        sys.exit(1);

 # Small helper routines
 def getfile(chmfile, path):
--- a/src/filters/rcldia
+++ b/src/filters/rcldia
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 from __future__ import print_function

--- a/src/filters/rcldjvu.py
+++ b/src/filters/rcldjvu.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 # Copyright (C) 2016 J.F.Dockes
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
--- a/src/filters/rcldoc.py
+++ b/src/filters/rcldoc.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 from __future__ import print_function

 import rclexecm
--- a/src/filters/rcldvi
+++ b/src/filters/rcldvi
@ -17,11 +17,11 @@
 # 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

 #
-# Extract text from a dvi file by either executing dvitops and rclps
-# or using catdvi. dvitops has given better results during tests, and is
-# chosen first if available, but the dvitops/rclps combination is much
-# slower than catdvi 
-# set variables
+# Extract text from a dvi file by either executing dvitops and
+# pstotext or using catdvi. dvitops has given better results during
+# tests, and is chosen first if available, but the dvitops/pstotext
+# combination is much slower than catdvi set variables. In any case,
+# the program is not too good with special characters (e.g. ligatures)
 LANG=C ; export LANG
 LC_ALL=C ; export LC_ALL
 progname="rcldvi"
@ -94,26 +94,25 @@ umask 77
 # !! Leave the following line unmodified !
 #ENDRECFILTCOMMONCODE

-# Find rclps. Note: this only works because we are always executed with a
-# full path
-rclps=`dirname $0`/rclps
-
+decoderdvips()
+{
+    dvips -f $1 2> /dev/null | pstotext | iconv -f cp1252 -t utf-8 -c -s
+}
+decodercatdvi()
+{
+    catdvi $1
+}
 decoder=""
 if iscmd dvips -a iscmd pstotext ; then
-  decoder=dvips
+  decoder=decoderdvips
 elif iscmd catdvi ; then
-  decoder=catdvi
+  decoder=decodercatdvi
 fi

 if test X$decoder = X ; then
   senderror HELPERNOTFOUND dvips or catdvi
 fi

-if test X$decoder = Xdvips ; then
-   $decoder -f < "$infile" 2> /dev/null | $rclps -
-   exit $?
-fi
-
 # The strange 'BEGIN' setup is to prevent 'file' from thinking this file
 # is an awk program
 $decoder "$infile" |
--- a/src/filters/rclepub
+++ b/src/filters/rclepub
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 """Extract Html content from an EPUB file (.epub)"""
 from __future__ import print_function

@ -12,6 +12,7 @@ import subprocess
 import rclexecm
 import rclconfig

+sys.path.append(sys.path[0]+"/recollepub.zip")
 try:
    import epub
 except:
@ -112,7 +113,7 @@ class rclEPUB:
        

    def getipath(self, params):
-        return self.extractone(params["ipath:"])
+        return self.extractone(params["ipath:"].decode('UTF-8'))

    def getnext(self, params):
        if self.catenate:
--- a/src/filters/rclepub1
+++ b/src/filters/rclepub1
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 """Extract Html content from an EPUB file (.chm), concatenating all sections"""
 from __future__ import print_function

@ -8,6 +8,7 @@ import re

 import rclexecm

+sys.path.append(sys.path[0]+"/recollepub.zip")
 try:
    import epub
 except:
--- a/src/filters/rclexec1.py
+++ b/src/filters/rclexec1.py
@ -30,18 +30,18 @@ from __future__ import print_function

 import subprocess
 import rclexecm
+from rclbasehandler import RclBaseHandler

 # This class has the code to execute the subprocess and call a
 # data-specific post-processor. Command and processor are supplied by
 # the object which we receive as a parameter, which in turn is defined
 # in the actual executable filter (e.g. rcldoc.py)
-class Executor:
+class Executor(RclBaseHandler):
    opt_ignxval = 1
    
    def __init__(self, em, flt):
-        self.em = em
+        super(Executor, self).__init__(em)
        self.flt = flt
-        self.currentindex = 0

    def runCmd(self, cmd, filename, postproc, opt):
        ''' Substitute parameters and execute command, process output
@ -109,19 +109,4 @@ class Executor:
            return (ok, data, "", rclexecm.RclExecM.eofnext)
        else:
            return (ok, "", "", rclexecm.RclExecM.eofnow)
-        
-    ###### File type handler api, used by rclexecm ---------->
-    def openfile(self, params):
-        self.currentindex = 0
-        return True
-
-    def getipath(self, params):
-        return self.extractone(params)
-        
-    def getnext(self, params):
-        if self.currentindex >= 1:
-            return (False, "", "", rclexecm.RclExecM.eofnow)
-        else:
-            ret= self.extractone(params)
-            self.currentindex += 1
-            return ret
+     
--- a/src/filters/rclfb2
+++ b/src/filters/rclfb2
@ -1,139 +0,0 @@
-#!/bin/sh
-# @(#$Id: rclopxml,v 1.3 2008-10-08 08:27:34 dockes Exp $  (C) 2004 J.F.Dockes
-#================================================================
-# Extract text from an fb2 ebook (xml)
-#================================================================
-
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname=rclfb2
-filetype=fb2
-
-
-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
-
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
-
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
-    echo RECFILTERROR $*
-    # Also alert on stderr just in case
-    echo ":2:$progname::: $*" 1>&2
-    exit 1
-}
-
-iscmd()
-{
-    cmd=$1
-    case $cmd in
-    */*)
-	if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;;
-    *)
-      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
-      for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done
-      return 1 ;;
-    esac
-}
-
-checkcmds()
-{
-    for cmd in $*;do
-      if iscmd $cmd 
-      then 
-        a=1
-      else 
-        senderror HELPERNOTFOUND $cmd
-      fi
-    done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help" 
-then
-  echo "Convert a $filetype file to HTML text for Recoll indexing."
-  echo "Usage: $progname [infile]"
-  exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
-  senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-checkcmds xsltproc
-
-xsltproc --nonet --novalid - "$infile" <<EOF
-<?xml version="1.0"?>
-<xsl:stylesheet version="1.0"
-  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-  xmlns:fb="http://www.gribuser.ru/xml/fictionbook/2.0"
-  exclude-result-prefixes="fb"
-  >
-
-<xsl:output method="html" encoding="UTF-8"/>
-
-<xsl:template match="/fb:FictionBook">
- <html>
-  <xsl:apply-templates select="fb:description"/>
-  <xsl:apply-templates select="fb:body"/>
- </html>
-</xsl:template>
-
-<xsl:template match="fb:description">
-  <head>
-    <xsl:apply-templates select="fb:title-info"/>
-  </head><xsl:text>
-</xsl:text>
-</xsl:template>
-
-<xsl:template match="fb:description/fb:title-info">
-    <xsl:apply-templates select="fb:book-title"/>
-    <xsl:apply-templates select="fb:author"/>
-</xsl:template>
-
-<xsl:template match="fb:description/fb:title-info/fb:book-title">
-<title> <xsl:value-of select="."/> </title>
-</xsl:template>
-
-<xsl:template match="fb:description/fb:title-info/fb:author">
-  <meta>
-  <xsl:attribute name="name">author</xsl:attribute>
-  <xsl:attribute name="content">
-     <xsl:value-of select="fb:first-name"/><xsl:text> </xsl:text>
-     <xsl:value-of select="fb:middle-name"/><xsl:text> </xsl:text>
-     <xsl:value-of select="fb:last-name"/>
-  </xsl:attribute>
-  </meta>
-</xsl:template>
-
-<xsl:template match="fb:body">
- <body>
- <xsl:apply-templates select="fb:section"/>
- </body>
-</xsl:template>
-
-<xsl:template match="fb:body/fb:section">
-  <xsl:for-each select="fb:p">
-  <p><xsl:value-of select="."/></p>
-  </xsl:for-each>
-</xsl:template>
-
-</xsl:stylesheet>
-EOF
--- a/src/filters/rclfb2.py
+++ b/src/filters/rclfb2.py
@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+# Copyright (C) 2014 J.F.Dockes
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the
+#   Free Software Foundation, Inc.,
+#   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+######################################
+
+from __future__ import print_function
+
+import sys
+import rclexecm
+import rclxslt
+import rclgenxslt
+
+stylesheet_all = '''<?xml version="1.0"?>
+<xsl:stylesheet version="1.0"
+  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+  xmlns:fb="http://www.gribuser.ru/xml/fictionbook/2.0"
+  exclude-result-prefixes="fb"
+  >
+
+<xsl:output method="html" encoding="UTF-8"/>
+
+<xsl:template match="/fb:FictionBook">
+ <html>
+  <xsl:apply-templates select="fb:description"/>
+  <xsl:apply-templates select="fb:body"/>
+ </html>
+</xsl:template>
+
+<xsl:template match="fb:description">
+  <head>
+    <xsl:apply-templates select="fb:title-info"/>
+  </head><xsl:text>
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="fb:description/fb:title-info">
+    <xsl:apply-templates select="fb:book-title"/>
+    <xsl:apply-templates select="fb:author"/>
+</xsl:template>
+
+<xsl:template match="fb:description/fb:title-info/fb:book-title">
+<title> <xsl:value-of select="."/> </title>
+</xsl:template>
+
+<xsl:template match="fb:description/fb:title-info/fb:author">
+  <meta>
+  <xsl:attribute name="name">author</xsl:attribute>
+  <xsl:attribute name="content">
+     <xsl:value-of select="fb:first-name"/><xsl:text> </xsl:text>
+     <xsl:value-of select="fb:middle-name"/><xsl:text> </xsl:text>
+     <xsl:value-of select="fb:last-name"/>
+  </xsl:attribute>
+  </meta>
+</xsl:template>
+
+<xsl:template match="fb:body">
+ <body>
+ <xsl:apply-templates select="fb:section"/>
+ </body>
+</xsl:template>
+
+<xsl:template match="fb:body/fb:section">
+  <xsl:for-each select="fb:p">
+  <p><xsl:value-of select="."/></p>
+  </xsl:for-each>
+</xsl:template>
+
+</xsl:stylesheet>
+'''
+
+if __name__ == '__main__':
+    proto = rclexecm.RclExecM()
+    extract = rclgenxslt.XSLTExtractor(proto, stylesheet_all)
+    rclexecm.main(proto, extract)
--- a/src/filters/rclgenxslt.py
+++ b/src/filters/rclgenxslt.py
@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+# Copyright (C) 2018 J.F.Dockes
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the
+#   Free Software Foundation, Inc.,
+#   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+######################################
+
+# Base class for simple (one stylesheet) xslt-based handlers
+
+from __future__ import print_function
+
+import sys
+import rclxslt
+import gzip
+from rclbasehandler import RclBaseHandler
+
+class XSLTExtractor(RclBaseHandler):
+    def __init__(self, em, stylesheet, gzip=False):
+        super(XSLTExtractor, self).__init__(em)
+        self.stylesheet = stylesheet
+        self.dogz = gzip
+
+    def html_text(self, fn):
+        if self.dogz:
+            data = gzip.open(fn, 'rb').read()
+        else:
+            data = open(fn, 'rb').read()
+        return rclxslt.apply_sheet_data(self.stylesheet, data)
--- a/src/filters/rclgnm
+++ b/src/filters/rclgnm
@ -1,191 +0,0 @@
-#!/bin/sh
-# @(#$Id: rclsoff,v 1.12 2008-10-08 08:27:34 dockes Exp $  (C) 2004 J.F.Dockes
-# Parts taken from Estraier:
-#================================================================
-# Estraier: a personal full-text search system
-# Copyright (C) 2003-2004 Mikio Hirabayashi
-#================================================================
-#================================================================
-# Extract text from a gnumeric spreadsheet
-#================================================================
-
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname="rclgnumeric"
-filetype=gnumeric
-
-
-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
-
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
-
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
-    echo RECFILTERROR $*
-    # Also alert on stderr just in case
-    echo ":2:$progname::: $*" 1>&2
-    exit 1
-}
-
-iscmd()
-{
-    cmd=$1
-    case $cmd in
-    */*)
-	if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;;
-    *)
-      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
-      for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done
-      return 1 ;;
-    esac
-}
-
-checkcmds()
-{
-    for cmd in $*;do
-      if iscmd $cmd 
-      then 
-        a=1
-      else 
-        senderror HELPERNOTFOUND $cmd
-      fi
-    done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help" 
-then
-  echo "Convert a $filetype file to HTML text for Recoll indexing."
-  echo "Usage: $progname [infile]"
-  exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
-  senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-checkcmds xsltproc gunzip
-
-# We need a temporary file
-if test z"$RECOLL_TMPDIR" != z; then
-   ttdir=$RECOLL_TMPDIR
-elif test z"$TMPDIR" != z ; then
-   ttdir=$TMPDIR
-else
-   ttdir=/tmp
-fi
-tmpfile=$ttdir/rclgnm.XXXXXX
-
-tmpfile=`mktemp "$tmpfile"`
-if [ $? -ne 0 ]; then
-   senderror "$0: Can't create temp file, exiting..."
-fi
-
-cleanup()
-{
-    rm -f $tmpfile
-}
-    
-trap cleanup EXIT HUP QUIT INT TERM
-
-gunzip < $1 > $tmpfile || senderror "Cant uncompress input"
-xsltproc --novalid --nonet - $tmpfile <<EOF
-<?xml version="1.0"?>
-<xsl:stylesheet version="1.0"
-  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-  xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" 
-  xmlns:xlink="http://www.w3.org/1999/xlink" 
-  xmlns:dc="http://purl.org/dc/elements/1.1/" 
-  xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" 
-  xmlns:ooo="http://openoffice.org/2004/office"
-  xmlns:gnm="http://www.gnumeric.org/v10.dtd"
-
-  exclude-result-prefixes="office xlink meta ooo dc"
-  >
-
-<xsl:output method="html" encoding="UTF-8"/>
-
-<xsl:template match="/">
-<html>
-  <head>
-   <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
-   <xsl:apply-templates select="//office:document-meta/office:meta"/>
-  </head>
-
-  <body>
-    <xsl:apply-templates select="//gnm:Cells"/>
-    <xsl:apply-templates select="//gnm:Objects"/>
-  </body>
-</html>
-</xsl:template>
-
-<xsl:template match="//dc:date">
-   <meta>
-     <xsl:attribute name="name">date</xsl:attribute>
-     <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
-   </meta>
-</xsl:template>
-
-<xsl:template match="//dc:description">
-  <meta>
-    <xsl:attribute name="name">abstract</xsl:attribute>
-    <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
-  </meta>
-</xsl:template>
-
-<xsl:template match="//meta:keyword">
-  <meta>
-    <xsl:attribute name="name">keywords</xsl:attribute>
-    <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
-  </meta>
-</xsl:template>
-
-<xsl:template match="//dc:subject">
-  <meta>
-    <xsl:attribute name="name">keywords</xsl:attribute>
-    <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
-  </meta>
-</xsl:template>
-
-<xsl:template match="//dc:title">
-  <title> <xsl:value-of select="."/> </title>
-</xsl:template>
-
-<xsl:template match="//meta:initial-creator">
-  <meta>
-    <xsl:attribute name="name">author</xsl:attribute>
-    <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
-  </meta>
-</xsl:template>
-
-<xsl:template match="office:meta/*"/>
-
-<xsl:template match="gnm:Cell">
-  <p><xsl:value-of select="."/></p>
-</xsl:template>
-
-<xsl:template match="gnm:CellComment">
-  <blockquote><xsl:value-of select="@Text"/></blockquote>
-</xsl:template>
-
-</xsl:stylesheet>
-EOF
-
--- a/src/filters/rclgnm.py
+++ b/src/filters/rclgnm.py
@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+# Copyright (C) 2014 J.F.Dockes
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the
+#   Free Software Foundation, Inc.,
+#   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+######################################
+
+from __future__ import print_function
+
+import sys
+import rclexecm
+import rclgenxslt
+
+
+stylesheet_all = '''<?xml version="1.0"?>
+<xsl:stylesheet version="1.0"
+  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+  xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" 
+  xmlns:xlink="http://www.w3.org/1999/xlink" 
+  xmlns:dc="http://purl.org/dc/elements/1.1/" 
+  xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" 
+  xmlns:ooo="http://openoffice.org/2004/office"
+  xmlns:gnm="http://www.gnumeric.org/v10.dtd"
+
+  exclude-result-prefixes="office xlink meta ooo dc"
+  >
+
+<xsl:output method="html" encoding="UTF-8"/>
+
+<xsl:template match="/">
+<html>
+  <head>
+   <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
+   <xsl:apply-templates select="//office:document-meta/office:meta"/>
+  </head>
+
+  <body>
+    <xsl:apply-templates select="//gnm:Cells"/>
+    <xsl:apply-templates select="//gnm:Objects"/>
+  </body>
+</html>
+</xsl:template>
+
+<xsl:template match="//dc:date">
+   <meta>
+     <xsl:attribute name="name">date</xsl:attribute>
+     <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
+   </meta>
+</xsl:template>
+
+<xsl:template match="//dc:description">
+  <meta>
+    <xsl:attribute name="name">abstract</xsl:attribute>
+    <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
+  </meta>
+</xsl:template>
+
+<xsl:template match="//meta:keyword">
+  <meta>
+    <xsl:attribute name="name">keywords</xsl:attribute>
+    <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
+  </meta>
+</xsl:template>
+
+<xsl:template match="//dc:subject">
+  <meta>
+    <xsl:attribute name="name">keywords</xsl:attribute>
+    <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
+  </meta>
+</xsl:template>
+
+<xsl:template match="//dc:title">
+  <title> <xsl:value-of select="."/> </title>
+</xsl:template>
+
+<xsl:template match="//meta:initial-creator">
+  <meta>
+    <xsl:attribute name="name">author</xsl:attribute>
+    <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
+  </meta>
+</xsl:template>
+
+<xsl:template match="office:meta/*"/>
+
+<xsl:template match="gnm:Cell">
+  <p><xsl:value-of select="."/></p>
+</xsl:template>
+
+<xsl:template match="gnm:CellComment">
+  <blockquote><xsl:value-of select="@Text"/></blockquote>
+</xsl:template>
+
+</xsl:stylesheet>
+'''
+
+
+if __name__ == '__main__':
+    proto = rclexecm.RclExecM()
+    extract = rclgenxslt.XSLTExtractor(proto, stylesheet_all, gzip=True)
+    rclexecm.main(proto, extract)
+
--- a/src/filters/rclics
+++ b/src/filters/rclics
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 from __future__ import print_function

 # Read an ICS file, break it into "documents" which are events, todos,
--- a/src/filters/rclimg
+++ b/src/filters/rclimg
@ -1,4 +1,4 @@
-#!/usr/bin/env perl
+#!/usr/bin/perl
 # @(#$Id: rclimg,v 1.5 2008-10-09 06:41:21 dockes Exp $  (C) 2007 Cedric Scott
 #######################################################
 # This program is free software; you can redistribute it and/or modify
--- a/src/filters/rclimg.py
+++ b/src/filters/rclimg.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3

 # Python-based Image Tag extractor for Recoll. This is less thorough
 # than the Perl-based rclimg script, but useful if you don't want to
@ -12,6 +12,7 @@ import sys
 import os
 import rclexecm
 import re
+from rclbasehandler import RclBaseHandler

 try:
    import pyexiv2
@ -41,31 +42,21 @@ meta_pyexiv2_keys = {
 exiv2_dates = ['Exif.Photo.DateTimeOriginal',
               'Exif.Image.DateTime', 'Exif.Photo.DateTimeDigitized']

-class ImgTagExtractor:
+class ImgTagExtractor(RclBaseHandler):
    def __init__(self, em):
-        self.em = em
-        self.currentindex = 0
+        super(ImgTagExtractor, self).__init__(em)

-    def extractone(self, params):
-        #self.em.rclog("extractone %s" % params["filename:"])
+    def html_text(self, filename):
        ok = False
-        if "filename:" not in params:
-            self.em.rclog("extractone: no file name")
-            return (ok, docdata, "", rclexecm.RclExecM.eofnow)
-        filename = params["filename:"]

-        try:
-            metadata = pyexiv2.ImageMetadata(filename)
-            metadata.read()
-            keys = metadata.exif_keys + metadata.iptc_keys + metadata.xmp_keys
-            mdic = {}
-            for k in keys:
-                # we skip numeric keys and undecoded makernote data
-                if k != 'Exif.Photo.MakerNote' and not khexre.match(k):
-                    mdic[k] = str(metadata[k].raw_value)
-        except Exception as err:
-            self.em.rclog("extractone: extract failed: [%s]" % err)
-            return (ok, "", "", rclexecm.RclExecM.eofnow)
+        metadata = pyexiv2.ImageMetadata(filename)
+        metadata.read()
+        keys = metadata.exif_keys + metadata.iptc_keys + metadata.xmp_keys
+        mdic = {}
+        for k in keys:
+            # we skip numeric keys and undecoded makernote data
+            if k != 'Exif.Photo.MakerNote' and not khexre.match(k):
+                mdic[k] = str(metadata[k].raw_value)

        docdata = b'<html><head>\n'

@ -101,25 +92,8 @@ class ImgTagExtractor:
                                     self.em.htmlescape(mdic[k]) + "<br />\n")
        docdata += b'</body></html>'

-        self.em.setmimetype("text/html")
+        return docdata

-        return (True, docdata, "", rclexecm.RclExecM.eofnext)
-
-    ###### File type handler api, used by rclexecm ---------->
-    def openfile(self, params):
-        self.currentindex = 0
-        return True
-
-    def getipath(self, params):
-        return self.extractone(params)
-        
-    def getnext(self, params):
-        if self.currentindex >= 1:
-            return (False, "", "", rclexecm.RclExecM.eofnow)
-        else:
-            ret= self.extractone(params)
-            self.currentindex += 1
-            return ret

 if __name__ == '__main__':
    proto = rclexecm.RclExecM()
--- a/src/filters/rclinfo
+++ b/src/filters/rclinfo
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3

 # Read a file in GNU info format and output its nodes as subdocs,
 # interfacing with recoll execm
--- a/src/filters/rclkar
+++ b/src/filters/rclkar
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3

 # Read a .kar midi karaoke file and translate to recoll indexable format
 # This does not work with Python3 yet because python:midi doesn't 
@ -10,6 +10,7 @@ import os.path
 import string
 import re
 import codecs
+from rclbasehandler import RclBaseHandler

 try:
    import rcllatinclass
@ -51,7 +52,7 @@ if PY3:
 else:
    nullchar = chr(0)
    
-class KarTextExtractor:
+class KarTextExtractor(RclBaseHandler):
    # Afaik, the only charset encodings with null bytes are variations on
    # utf-16 and utf-32 and iso relatives. A hopefully comprehensive
    # list follows, compiled from iconv and python values. This is used for
@ -66,8 +67,7 @@ class KarTextExtractor:
          'utf_16_le', 'utf_32', 'utf_32_be', 'utf_32_le'))

    def __init__(self, em):
-        self.em = em
-        self.currentindex = 0
+        super(KarTextExtractor, self).__init__(em)
        self.encoding = ""
        self.defaultencoding = ""
        self.hadnulls = False
@ -182,16 +182,7 @@ class KarTextExtractor:
        return (encoding, confidence)

    
-    def extractone(self, params):
-        '''Process one file'''
-        docdata = ""
-        ok = False
-
-        if "filename:" not in params:
-            self.em.rclog("extractone: no mime or file name")
-            return (ok, docdata, "", rclexecm.RclExecM.eofnow)
-        filename = params["filename:"]
-
+    def html_text(self, filename):
        # Character encoding from file name ?
        self.encoding = self.encodingfromfilename(filename)
        if self.encoding:
@ -200,18 +191,8 @@ class KarTextExtractor:
            except:
                self.encoding = ""

-        # Mimetype not used for now
-        if "mimetype:" not in params:
-            mimetype = 'audio/x-midi'
-        else:
-            mimetype = params["mimetype:"]
-
        # Read in and midi-decode the file
-        try:
-            stream = midi.read_midifile(filename)
-        except Exception as err:
-            self.em.rclog("extractone: read_midifile failed: [%s]" % err)
-            return (ok, docdata, "", rclexecm.RclExecM.eofnow)
+        stream = midi.read_midifile(filename)

        title = None
        author = None
@ -262,7 +243,6 @@ class KarTextExtractor:
            lyrics += self.nulltrunc(edata)
            lyricsN += edata

-        
        # Try to guess the encoding. First do it with the data
        # possibly containing nulls. If we get one of the accepted
        # nullbyte encodings, go with this, else repeat with the
@ -305,28 +285,8 @@ class KarTextExtractor:
        lyrics = self.reencode(lyrics)
        language = self.reencode(language)
        
-        self.em.setmimetype("text/html")
-        docdata = htmltemplate % (title, author, language, lyrics)
+        return htmltemplate % (title, author, language, lyrics)

-        ok = True
-        return (ok, docdata, "", rclexecm.RclExecM.eofnext)
-
-    ###### File type handler api, used by rclexecm. Some stuff makes little
-    # sense because we only have one doc per file.
-    def openfile(self, params):
-        self.currentindex = 0
-        return True
-
-    def getipath(self, params):
-        return self.extractone(params)
-        
-    def getnext(self, params):
-        if self.currentindex >= 1:
-            return (False, "", "", rclexecm.RclExecM.eofnow)
-        else:
-            ret= self.extractone(params)
-            self.currentindex += 1
-            return ret

 proto = rclexecm.RclExecM()
 extract = KarTextExtractor(proto)
--- a/src/filters/rcllatinclass.py
+++ b/src/filters/rcllatinclass.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 """Try to guess a text's language and character set by checking how it matches lists of
 common words. This is not a primary method of detection because it's slow and unreliable, but it
 may be a help in discrimating, for exemple, before european languages using relatively close
--- a/src/filters/rclokulnote
+++ b/src/filters/rclokulnote
@ -1,130 +0,0 @@
-#!/bin/sh
-# @(#$Id: rclsoff,v 1.12 2008-10-08 08:27:34 dockes Exp $  (C) 2004 J.F.Dockes
-# Parts taken from Estraier:
-#================================================================
-# Estraier: a personal full-text search system
-# Copyright (C) 2003-2004 Mikio Hirabayashi
-#================================================================
-#================================================================
-# Extract text from a gnumeric spreadsheet
-#================================================================
-
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname="rclgnumeric"
-filetype=gnumeric
-
-
-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
-
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
-
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
-    echo RECFILTERROR $*
-    # Also alert on stderr just in case
-    echo ":2:$progname::: $*" 1>&2
-    exit 1
-}
-
-iscmd()
-{
-    cmd=$1
-    case $cmd in
-    */*)
-	if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;;
-    *)
-      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
-      for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done
-      return 1 ;;
-    esac
-}
-
-checkcmds()
-{
-    for cmd in $*;do
-      if iscmd $cmd 
-      then 
-        a=1
-      else 
-        senderror HELPERNOTFOUND $cmd
-      fi
-    done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help" 
-then
-  echo "Convert a $filetype file to HTML text for Recoll indexing."
-  echo "Usage: $progname [infile]"
-  exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
-  senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-checkcmds xsltproc 
-    
-xsltproc --novalid --nonet - "$infile" <<EOF
-<?xml version="1.0"?>
-<xsl:stylesheet version="1.0"
-  xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
-
-<xsl:output method="html" encoding="UTF-8"/>
-<xsl:strip-space elements="*" />
-
-
-<xsl:template match="/">
-<html>
-  <head>
-   <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
-   <title>
-     Okular notes about: <xsl:value-of select="/documentInfo/@url" />
-   </title>
-  </head>
-  <body>
-    <xsl:apply-templates />
-  </body>
-</html>
-</xsl:template>
-
-<xsl:template match="node()">
-  <xsl:apply-templates select="@* | node() "/>
-</xsl:template>
-
-<xsl:template match="text()">
-  <p><xsl:value-of select="."/></p>
-<xsl:text >
-</xsl:text>
-</xsl:template>
-
-<xsl:template match="@contents|@author">
-  <p><xsl:value-of select="." /></p>
-<xsl:text >
-</xsl:text>
-</xsl:template>
-
-<xsl:template match="@*"/>
-
-</xsl:stylesheet>
-EOF
-
--- a/src/filters/rclokulnote.py
+++ b/src/filters/rclokulnote.py
@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+# Copyright (C) 2014 J.F.Dockes
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the
+#   Free Software Foundation, Inc.,
+#   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+######################################
+from __future__ import print_function
+
+import sys
+import rclexecm
+import rclgenxslt
+
+stylesheet_all = '''<?xml version="1.0"?>
+<xsl:stylesheet version="1.0"
+  xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+
+<xsl:output method="html" encoding="UTF-8"/>
+<xsl:strip-space elements="*" />
+
+<xsl:template match="/">
+<html>
+  <head>
+   <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+   <title>
+     Okular notes about: <xsl:value-of select="/documentInfo/@url" />
+   </title>
+  </head>
+  <body>
+    <xsl:apply-templates />
+  </body>
+</html>
+</xsl:template>
+
+<xsl:template match="node()">
+  <xsl:apply-templates select="@* | node() "/>
+</xsl:template>
+
+<xsl:template match="text()">
+  <p><xsl:value-of select="."/></p>
+<xsl:text >
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="@contents|@author">
+  <p><xsl:value-of select="." /></p>
+<xsl:text >
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="@*"/>
+
+</xsl:stylesheet>
+'''
+
+if __name__ == '__main__':
+   proto = rclexecm.RclExecM()
+   extract = rclgenxslt.XSLTExtractor(proto, stylesheet_all)
+   rclexecm.main(proto, extract)
+
--- a/src/filters/rclopxml.py
+++ b/src/filters/rclopxml.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 # Copyright (C) 2015 J.F.Dockes
 #   This program is free software; you can redistribute it and/or modify
 #   it under the terms of the GNU General Public License as published by
--- a/src/filters/rclpdf.py
+++ b/src/filters/rclpdf.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 # Copyright (C) 2014 J.F.Dockes
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@ -91,13 +91,12 @@ class PDFExtractor:
                # error at once
                return

-        cf = rclconfig.RclConfig()
-        self.confdir = cf.getConfDir()
-
+        self.config = rclconfig.RclConfig()
+        self.confdir = self.config.getConfDir()
        # The user can set a list of meta tags to be extracted from
        # the XMP metadata packet. These are specified as
        # (xmltag,rcltag) pairs
-        self.extrameta = cf.getConfParam("pdfextrameta")
+        self.extrameta = self.config.getConfParam("pdfextrameta")
        if self.extrameta:
            self._initextrameta()

@ -119,7 +118,7 @@ class PDFExtractor:
        # either the presence of a file in the config dir (historical)
        # or a set config variable.
        self.ocrpossible = False
-        cf_doocr = cf.getConfParam("pdfocr")
+        cf_doocr = self.config.getConfParam("pdfocr")
        if cf_doocr or os.path.isfile(os.path.join(self.confdir, "ocrpdf")):
            self.tesseract = rclexecm.which("tesseract")
            if self.tesseract:
@ -134,7 +133,7 @@ class PDFExtractor:
        # so it can be disabled in the configuration.
        self.attextractdone = False
        self.attachlist = []
-        cf_attach = cf.getConfParam("pdfattach")
+        cf_attach = self.config.getConfParam("pdfattach")
        if cf_attach:
            self.pdftk = rclexecm.which("pdftk")
        if self.pdftk:
@ -224,18 +223,28 @@ class PDFExtractor:
    # environment and hope for the best.
    def guesstesseractlang(self):
        tesseractlang = ""
-        pdflangfile = os.path.join(os.path.dirname(self.filename), ".ocrpdflang")
+
+        # First look for a language def file in the file's directory 
+        pdflangfile = os.path.join(os.path.dirname(self.filename),
+                                   b".ocrpdflang")
        if os.path.isfile(pdflangfile):
            tesseractlang = open(pdflangfile, "r").read().strip()
        if tesseractlang:
            return tesseractlang

+        # Then look for a global option. The normal way now that we
+        # have config reading capability in the handlers is to use the
+        # config. Then, for backwards compat, environment variable and
+        # file inside the configuration directory
+        tesseractlang = self.config.getConfParam("pdfocrlang")
+        if tesseractlang:
+            return tesseractlang
        tesseractlang = os.environ.get("RECOLL_TESSERACT_LANG", "");
        if tesseractlang:
            return tesseractlang
-        
-        tesseractlang = \
-                      open(os.path.join(self.confdir, "ocrpdf"), "r").read().strip()
+        pdflangfile = os.path.join(self.confdir, b"ocrpdf")
+        if os.path.isfile(pdflangfile):
+            tesseractlang = open(pdflangfile, "r").read().strip()
        if tesseractlang:
            return tesseractlang

@ -285,7 +294,7 @@ class PDFExtractor:
            except Exception as e:
                self.em.rclog("tesseract failed: %s" % e)

-            errlines = out.split('\n')
+            errlines = out.split(b'\n')
            if len(errlines) > 2:
                self.em.rclog("Tesseract error: %s" % out)

--- a/src/filters/rclppt.py
+++ b/src/filters/rclppt.py
@ -1,8 +1,6 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3

 # Recoll PPT text extractor
-# Mso-dumper is not compatible with Python3. We use sys.executable to
-# start the actual extractor, so we need to use python2 too.

 from __future__ import print_function

--- a/src/filters/rclps
+++ b/src/filters/rclps
@ -1,135 +0,0 @@
-#!/bin/sh
-# @(#$Id: rclps,v 1.10 2007-06-08 13:51:09 dockes Exp $  (C) 2004 J.F.Dockes
-# Parts taken from Estraier:
-#================================================================
-# Estraier: a personal full-text search system
-# Copyright (C) 2003-2004 Mikio Hirabayashi
-#================================================================
-#================================================================
-# Extract text from a postscript file by executing pstotext or ps2ascii. 
-#
-# The default is to use pstotext which can deal with accents, but in a
-# partially broken way (it always outputs iso8859-1, when it should use utf.
-#
-# OTOH, ps2ascii is much faster, comes with ghostscript, and sometimes work
-# better (ie: on some openoffice output files).
-#
-#================================================================
-
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname="rclps"
-decoder=pstotext
-#decoder=ps2ascii
-filetype=postscript
-
-
-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
-
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
-
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
-    echo RECFILTERROR $*
-    # Also alert on stderr just in case
-    echo ":2:$progname::: $*" 1>&2
-    exit 1
-}
-
-iscmd()
-{
-    cmd=$1
-    case $cmd in
-    */*)
-	if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;;
-    *)
-      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
-      for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done
-      return 1 ;;
-    esac
-}
-
-checkcmds()
-{
-    for cmd in $*;do
-      if iscmd $cmd 
-      then 
-        a=1
-      else 
-        senderror HELPERNOTFOUND $cmd
-      fi
-    done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help" 
-then
-  echo "Convert a $filetype file to HTML text for Recoll indexing."
-  echo "Usage: $progname [infile]"
-  exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
-  senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-checkcmds $decoder iconv awk
-
-# output the result
-# The strange 'BEGIN' setup is to prevent 'file' from thinking this file
-# is an awk program
-$decoder "$infile" |
-awk 'BEGIN'\
-' {
-  printf("<html><head><title></title>\n")
-  printf("<meta http-equiv=\"Content-Type\" content=\"text/html;charset=UTF-8\">\n")
-  printf("</head>\n<body><p>");
-  doescape = 1
-  cont = ""
-}
-{
-    $0 = cont $0
-    cont = ""
-
-    if ($0 == "\f") {
-       print "</p>\n<hr>\n\f<p>"
-       next
-    } else if ($0 ~ /$/) {
-      # Note : soft-hyphen is iso8859 0xad
-      # Break at last whitespace
-      match($0, "[ \t][^ \t]+$")
-      line = substr($0, 0, RSTART)
-      cont = substr($0, RSTART, RLENGTH)
-      $0 = line
-      gsub("", "", cont)
-    }
-
-    if(doescape > 0) {
-      gsub(/&/, "\\&amp;", $0)
-      gsub(/</, "\\&lt;", $0)
-      gsub(/>/, "\\&gt;", $0)
-    }
-    print $0 "<br>"
-}
-END {
-    print "</p></body></html>"
-}' | iconv -f iso-8859-1 -t UTF-8 -c -s 
-
--- a/src/filters/rclpython
+++ b/src/filters/rclpython
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 # -*- coding: iso-8859-1 -*-
 """
    MoinMoin - Python source parser and colorizer
--- a/src/filters/rclrar
+++ b/src/filters/rclrar
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3

 # Rar file filter for Recoll
 # Adapted from the Zip archive filter by mroark.
--- a/src/filters/rclrtf.py
+++ b/src/filters/rclrtf.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 from __future__ import print_function

 import rclexecm
--- a/src/filters/rclsiduxman
+++ b/src/filters/rclsiduxman
@ -1,92 +0,0 @@
-#!/bin/sh
-# @(#$Id: rclsiduxman,v 1.1 2008-06-09 09:12:05 dockes Exp $  (C) 2004 J.F.Dockes
-# Parts taken from Estraier:
-#================================================================
-# Estraier: a personal full-text search system
-# Copyright (C) 2003-2004 Mikio Hirabayashi
-#================================================================
-#================================================================
-# Strip the menu part from sidux manual pages to improve search precision
-#================================================================
-
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname="rclsiduxman"
-filetype="sidux manual htm"
-
-
-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
-
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
-
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
-    echo RECFILTERROR $*
-    # Also alert on stderr just in case
-    echo ":2:$progname::: $*" 1>&2
-    exit 1
-}
-
-iscmd()
-{
-    cmd=$1
-    case $cmd in
-    */*)
-	if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;;
-    *)
-      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
-      for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done
-      return 1 ;;
-    esac
-}
-
-checkcmds()
-{
-    for cmd in $*;do
-      if iscmd $cmd 
-      then 
-        a=1
-      else 
-        senderror HELPERNOTFOUND $cmd
-      fi
-    done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help" 
-then
-  echo "Convert a $filetype file to HTML text for Recoll indexing."
-  echo "Usage: $progname [infile]"
-  exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
-  senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-checkcmds sed
-# Delete everything from <div id="menu"> to <div id="main-page">
-# This prints an additional blank line at top which does not matter
-sed -n -e '1,/<div id="menu">/{x;p' -e '}' \
-    -e '/<div id="main-page">/,$p' < "$infile"
-
-# exit normally
-exit 0
--- a/src/filters/rclsoff-flat.py
+++ b/src/filters/rclsoff-flat.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 # Copyright (C) 2014 J.F.Dockes
 #   This program is free software; you can redistribute it and/or modify
 #   it under the terms of the GNU General Public License as published by
@ -22,6 +22,7 @@ import sys
 import rclexecm
 import rclxslt
 from zipfile import ZipFile
+from rclbasehandler import RclBaseHandler

 stylesheet_meta = '''<?xml version="1.0"?>
 <xsl:stylesheet version="1.0"
@ -139,24 +140,14 @@ stylesheet_content  = '''<?xml version="1.0"?>
 </xsl:stylesheet>
 '''

-class OOExtractor:
+class OOExtractor(RclBaseHandler):
    def __init__(self, em):
-        self.em = em
-        self.currentindex = 0
+        super(OOExtractor, self).__init__(em)

-    def extractone(self, params):
-        if "filename:" not in params:
-            self.em.rclog("extractone: no mime or file name")
-            return (False, "", "", rclexecm.RclExecM.eofnow)
-        fn = params["filename:"]
-
-        try:
-            f = open(fn, 'rb')
-            data = f.read()
-            f.close()
-        except Exception as err:
-            self.em.rclog("open failed: %s" % err)
-            return (False, "", "", rclexecm.RclExecM.eofnow)
+    def html_text(self, fn):
+        f = open(fn, 'rb')
+        data = f.read()
+        f.close()

        docdata = b'<html>\n<head>\n<meta http-equiv="Content-Type"' \
                  b'content="text/html; charset=UTF-8">\n'
@ -172,31 +163,12 @@ class OOExtractor:

        docdata += b'</head><body>'

-        try:
-            res = rclxslt.apply_sheet_data(stylesheet_content, data)
-            docdata += res
-            docdata += b'</body></html>'
-        except Exception as err:
-            self.em.rclog("bad data in %s: %s" % (fn, err))
-            return (False, "", "", rclexecm.RclExecM.eofnow)
+        res = rclxslt.apply_sheet_data(stylesheet_content, data)
+        docdata += res
+        docdata += b'</body></html>'

-        return (True, docdata, "", rclexecm.RclExecM.eofnext)
+        return docdata
    
-    ###### File type handler api, used by rclexecm ---------->
-    def openfile(self, params):
-        self.currentindex = 0
-        return True
-
-    def getipath(self, params):
-        return self.extractone(params)
-        
-    def getnext(self, params):
-        if self.currentindex >= 1:
-            return (False, "", "", rclexecm.RclExecM.eofnow)
-        else:
-            ret= self.extractone(params)
-            self.currentindex += 1
-            return ret

 if __name__ == '__main__':
    proto = rclexecm.RclExecM()
--- a/src/filters/rclsoff.py
+++ b/src/filters/rclsoff.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 # Copyright (C) 2014 J.F.Dockes
 #   This program is free software; you can redistribute it and/or modify
 #   it under the terms of the GNU General Public License as published by
--- a/src/filters/rclsvg.py
+++ b/src/filters/rclsvg.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 # Copyright (C) 2014 J.F.Dockes
 #   This program is free software; you can redistribute it and/or modify
 #   it under the terms of the GNU General Public License as published by
@ -15,12 +15,11 @@
 #   Free Software Foundation, Inc.,
 #   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 ######################################
-
 from __future__ import print_function

 import sys
 import rclexecm
-import rclxslt
+import rclgenxslt

 stylesheet_all = '''<?xml version="1.0"?>
 <xsl:stylesheet version="1.0"
@ -100,43 +99,7 @@ stylesheet_all = '''<?xml version="1.0"?>
 </xsl:stylesheet>
 '''

-class SVGExtractor:
-    def __init__(self, em):
-        self.em = em
-        self.currentindex = 0
-
-    def extractone(self, params):
-        if "filename:" not in params:
-            self.em.rclog("extractone: no mime or file name")
-            return (False, "", "", rclexecm.RclExecM.eofnow)
-        fn = params["filename:"]
-
-        try:
-            data = open(fn, 'rb').read()
-            docdata = rclxslt.apply_sheet_data(stylesheet_all, data)
-        except Exception as err:
-            self.em.rclog("%s: bad data: " % (fn, err))
-            return (False, "", "", rclexecm.RclExecM.eofnow)
-
-        return (True, docdata, "", rclexecm.RclExecM.eofnext)
-    
-    ###### File type handler api, used by rclexecm ---------->
-    def openfile(self, params):
-        self.currentindex = 0
-        return True
-
-    def getipath(self, params):
-        return self.extractone(params)
-        
-    def getnext(self, params):
-        if self.currentindex >= 1:
-            return (False, "", "", rclexecm.RclExecM.eofnow)
-        else:
-            ret= self.extractone(params)
-            self.currentindex += 1
-            return ret
-
 if __name__ == '__main__':
    proto = rclexecm.RclExecM()
-    extract = SVGExtractor(proto)
+    extract = rclgenxslt.XSLTExtractor(proto, stylesheet_all)
    rclexecm.main(proto, extract)
--- a/src/filters/rcltar
+++ b/src/filters/rcltar
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3

 # Tar-file filter for Recoll
 # Thanks to Recoll user Martin Ziegler
--- a/src/filters/rcltext.py
+++ b/src/filters/rcltext.py
@ -1,4 +1,19 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
+# Copyright (C) 2016 J.F.Dockes
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

 # Wrapping a text file. Recoll does it internally in most cases, but
 # this is for use by another filter.
@ -7,46 +22,19 @@ from __future__ import print_function

 import rclexecm
 import sys
+from rclbasehandler import RclBaseHandler

-class TxtDump:
+class TxtDump(RclBaseHandler):
    def __init__(self, em):
-        self.em = em
+        super(TxtDump, self).__init__(em)

-    def extractone(self, params):
-        #self.em.rclog("extractone %s %s" % (params["filename:"], \
-        #params["mimetype:"]))
-        if not "filename:" in params:
-            self.em.rclog("extractone: no file name")
-            return (False, "", "", rclexecm.RclExecM.eofnow)
-
-        fn = params["filename:"]
+    def html_text(self, fn):
        # No charset, so recoll will have to use its config to guess it
-        txt = b'<html><head><title></title></head><body><pre>'
-        try:
-            f = open(fn, "rb")
-            txt += self.em.htmlescape(f.read())
-        except Exception as err:
-            self.em.rclog("TxtDump: %s : %s" % (fn, err))
-            return (False, "", "", rclexecm.RclExecM.eofnow)
-            
-        txt += b'</pre></body></html>'
-        return (True, txt, "", rclexecm.RclExecM.eofnext)
-        
-    ###### File type handler api, used by rclexecm ---------->
-    def openfile(self, params):
-        self.currentindex = 0
-        return True
-
-    def getipath(self, params):
-        return self.extractone(params)
-        
-    def getnext(self, params):
-        if self.currentindex >= 1:
-            return (False, "", "", rclexecm.RclExecM.eofnow)
-        else:
-            ret= self.extractone(params)
-            self.currentindex += 1
-            return ret
+        html = b'<html><head><title></title></head><body><pre>'
+        f = open(fn, "rb")
+        html += self.em.htmlescape(f.read())
+        html += b'</pre></body></html>'
+        return html

 if __name__ == '__main__':
    proto = rclexecm.RclExecM()
--- a/src/filters/rcltxtlines.py
+++ b/src/filters/rcltxtlines.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 """Index text lines as document (execm handler sample). This exists
 to demonstrate the execm interface and is not meant to be useful or
 efficient"""
--- a/src/filters/rcluncomp.py
+++ b/src/filters/rcluncomp.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python
 from __future__ import print_function

 import rclexecm
--- a/src/filters/rclwar
+++ b/src/filters/rclwar
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3

 # WAR web archive filter for recoll. War file are gzipped tar files

--- a/src/filters/rclwpd
+++ b/src/filters/rclwpd
@ -1,87 +0,0 @@
-#!/bin/sh
-# @(#$Id: rclwpd,v 1.1 2007-08-26 13:34:59 dockes Exp $  (C) 2004 J.F.Dockes
-# Some inspiration from estraier
-#================================================================
-# convert wordperfect documents to html, by  executing the wpd2html program:
-#    http://libwpd.sourceforge.net/download.html
-#================================================================
-
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname="rclwpd"
-filetype=wpd
-
-
-
-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
-
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
-
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
-    echo RECFILTERROR $*
-    # Also alert on stderr just in case
-    echo ":2:$progname::: $*" 1>&2
-    exit 1
-}
-
-iscmd()
-{
-    cmd=$1
-    case $cmd in
-    */*)
-	if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;;
-    *)
-      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
-      for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done
-      return 1 ;;
-    esac
-}
-
-checkcmds()
-{
-    for cmd in $*;do
-      if iscmd $cmd 
-      then 
-        a=1
-      else 
-        senderror HELPERNOTFOUND $cmd
-      fi
-    done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help" 
-then
-  echo "Convert a $filetype file to HTML text for Recoll indexing."
-  echo "Usage: $progname [infile]"
-  exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
-  senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-checkcmds wpd2html
-
-# output the result. wpd2html output doesn't seem to need any adjustment?
-
-wpd2html  "$infile" 2> /dev/null 
--- a/src/filters/rclxls.py
+++ b/src/filters/rclxls.py
@ -1,8 +1,6 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3

 # Extractor for Excel files.
-# Mso-dumper is not compatible with Python3. We use sys.executable to
-# start the actual extractor, so we need to use python2 too.

 import rclexecm
 import rclexec1
--- a/src/filters/rclxml.py
+++ b/src/filters/rclxml.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 # Copyright (C) 2014 J.F.Dockes
 #   This program is free software; you can redistribute it and/or modify
 #   it under the terms of the GNU General Public License as published by
@ -18,7 +18,7 @@

 import sys
 import rclexecm
-import rclxslt
+import rclgenxslt

 stylesheet_all = '''<?xml version="1.0"?>
 <xsl:stylesheet version="1.0"
@ -56,43 +56,7 @@ stylesheet_all = '''<?xml version="1.0"?>
 </xsl:stylesheet>
 '''

-class XMLExtractor:
-    def __init__(self, em):
-        self.em = em
-        self.currentindex = 0
-
-    def extractone(self, params):
-        if "filename:" not in params:
-            self.em.rclog("extractone: no mime or file name")
-            return (False, "", "", rclexecm.RclExecM.eofnow)
-        fn = params["filename:"]
-
-        try:
-            data = open(fn, 'rb').read()
-            docdata = rclxslt.apply_sheet_data(stylesheet_all, data)
-        except Exception as err:
-            self.em.rclog("%s: bad data: " % (fn, err))
-            return (False, "", "", rclexecm.RclExecM.eofnow)
-
-        return (True, docdata, "", rclexecm.RclExecM.eofnext)
-    
-    ###### File type handler api, used by rclexecm ---------->
-    def openfile(self, params):
-        self.currentindex = 0
-        return True
-
-    def getipath(self, params):
-        return self.extractone(params)
-        
-    def getnext(self, params):
-        if self.currentindex >= 1:
-            return (False, "", "", rclexecm.RclExecM.eofnow)
-        else:
-            ret= self.extractone(params)
-            self.currentindex += 1
-            return ret
-
 if __name__ == '__main__':
    proto = rclexecm.RclExecM()
-    extract = XMLExtractor(proto)
+    extract = rclgenxslt.XSLTExtractor(proto, stylesheet_all)
    rclexecm.main(proto, extract)
--- a/src/filters/rclxmp.py
+++ b/src/filters/rclxmp.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 # Copyright (C) 2016 J.F.Dockes
 #   This program is free software; you can redistribute it and/or modify
 #   it under the terms of the GNU General Public License as published by
--- a/src/filters/rclxslt.py
+++ b/src/filters/rclxslt.py
@ -31,40 +31,42 @@ if PY2:
    except:
        print("RECFILTERROR HELPERNOTFOUND python:libxml2/python:libxslt1")
        sys.exit(1);
+    def _apply_sheet_doc(sheet, doc):
+        styledoc = libxml2.readMemory(sheet, len(sheet), '', '',
+                                      options=libxml2.XML_PARSE_NONET)
+        style = libxslt.parseStylesheetDoc(styledoc)
+        result = style.applyStylesheet(doc, None)
+        res = ""
+        try:
+            res = style.saveResultToString(result)
+        except Exception as err:
+            # print("saveResultToString got exception: %s"%err)
+            pass
+        style.freeStylesheet()
+        doc.freeDoc()
+        result.freeDoc()
+        return res
    def apply_sheet_data(sheet, data):
-        styledoc = libxml2.parseMemory(sheet, len(sheet))
-        style = libxslt.parseStylesheetDoc(styledoc)
-        doc = libxml2.parseMemory(data, len(data))
-        result = style.applyStylesheet(doc, None)
-        res = style.saveResultToString(result)
-        style.freeStylesheet()
-        doc.freeDoc()
-        result.freeDoc()
-        return res
+        doc = libxml2.readMemory(data, len(data), '', '',
+                                 options=libxml2.XML_PARSE_NONET)
+        return _apply_sheet_doc(sheet, doc)
    def apply_sheet_file(sheet, fn):
-        styledoc = libxml2.parseMemory(sheet, len(sheet))
-        style = libxslt.parseStylesheetDoc(styledoc)
-        doc = libxml2.parseFile(fn)
-        result = style.applyStylesheet(doc, None)
-        res = style.saveResultToString(result)
-        style.freeStylesheet()
-        doc.freeDoc()
-        result.freeDoc()
-        return res
+        doc = libxml2.readFile(fn, '', options=libxml2.XML_PARSE_NONET)
+        return _apply_sheet_doc(sheet, doc)
 else:
    try:
        from lxml import etree
    except:
        print("RECFILTERROR HELPERNOTFOUND python3:lxml")
        sys.exit(1);
+    def _apply_sheet_doc(sheet, doc):
+        styledoc = etree.fromstring(sheet)
+        transform = etree.XSLT(styledoc)
+        return bytes(transform(doc))
    def apply_sheet_data(sheet, data):
-        styledoc = etree.fromstring(sheet)
-        transform = etree.XSLT(styledoc)
        doc = etree.fromstring(data)
-        return bytes(transform(doc))
+        return _apply_sheet_doc(sheet, doc)
    def apply_sheet_file(sheet, fn):
-        styledoc = etree.fromstring(sheet)
-        transform = etree.XSLT(styledoc)
        doc = etree.parse(fn)
-        return bytes(transform(doc))
+        return _apply_sheet_doc(sheet, doc)

--- a/src/filters/rclzip
+++ b/src/filters/rclzip
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 # Copyright (C) 2014 J.F.Dockes
 #   This program is free software; you can redistribute it and/or modify
 #   it under the terms of the GNU General Public License as published by
@ -28,6 +28,7 @@ from zipfile import ZipFile

 try:
    from recoll import rclconfig
+    from recoll import conftree
    hasrclconfig = True
 except:
    hasrclconfig = False
@ -118,10 +119,14 @@ class ZipExtractor:
        if hasrclconfig:
            config = rclconfig.RclConfig()
            config.setKeyDir(os.path.dirname(filename))
+            usebaseskipped = config.getConfParam("zipUseSkippedNames")
+            if usebaseskipped:
+                skipped = config.getConfParam("skippedNames")
+                self.em.rclog("skippedNames: %s"%self.skiplist)
+                self.skiplist += conftree.stringToStrings(skipped)
            skipped = config.getConfParam("zipSkippedNames")
            if skipped is not None:
-                self.skiplist = skipped.split(" ")
-
+                self.skiplist += conftree.stringToStrings(skipped)
        try:
            if rclexecm.PY3:
                # Note: py3 ZipFile wants an str file name, which
--- a/src/filters/recollepub.zip
+++ b/src/filters/recollepub.zip
--- a/src/filters/xls-dump.py
+++ b/src/filters/xls-dump.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 #
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
--- a/src/filters/xlsxmltocsv.py
+++ b/src/filters/xlsxmltocsv.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 # Copyright (C) 2015 J.F.Dockes
 #   This program is free software; you can redistribute it and/or modify
 #   it under the terms of the GNU General Public License as published by
--- a/src/index/fetcher.cpp
+++ b/src/index/fetcher.cpp
@ -16,13 +16,11 @@
 */
 #include "autoconfig.h"

-
 #include "log.h"
 #include "rclconfig.h"
-
 #include "fetcher.h"
 #include "fsfetcher.h"
-#include "bglfetcher.h"
+#include "webqueuefetcher.h"
 #include "exefetcher.h"

 DocFetcher *docFetcherMake(RclConfig *config, const Rcl::Doc& idoc)
@ -37,7 +35,7 @@ DocFetcher *docFetcherMake(RclConfig *config, const Rcl::Doc& idoc)
 	return new FSDocFetcher;
 #ifndef DISABLE_WEB_INDEXER
    } else if (!backend.compare("BGL")) {
-	return new BGLDocFetcher;
+	return new WQDocFetcher;
 #endif
    } else {
        DocFetcher *f = exeDocFetcherMake(config, backend);
--- a/src/index/indexer.cpp
+++ b/src/index/indexer.cpp
@ -27,7 +27,7 @@
 #include "indexer.h"
 #include "fsindexer.h"
 #ifndef DISABLE_WEB_INDEXER
-#include "beaglequeue.h"
+#include "webqueue.h"
 #endif
 #include "mimehandler.h"
 #include "pathut.h"
@ -132,7 +132,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags)
    if (m_doweb && (typestorun & IxTWebQueue)) {
        runWebFilesMoverScript(m_config);
        deleteZ(m_webindexer);
-        m_webindexer = new BeagleQueueIndexer(m_config, &m_db, m_updater);
+        m_webindexer = new WebQueueIndexer(m_config, &m_db, m_updater);
        if (!m_webindexer || !m_webindexer->index()) {
 	    m_db.close();
            addIdxReason("indexer", "Web index creation failed. See log");
@ -208,7 +208,7 @@ bool ConfIndexer::indexFiles(list<string>& ifiles, int flag)

    if (m_doweb && !myfiles.empty() && !(flag & IxFNoWeb)) {
        if (!m_webindexer)
-            m_webindexer = new BeagleQueueIndexer(m_config, &m_db, m_updater);
+            m_webindexer = new WebQueueIndexer(m_config, &m_db, m_updater);
        if (m_webindexer) {
            ret = ret && m_webindexer->indexFiles(myfiles);
        } else {
@ -267,7 +267,7 @@ bool ConfIndexer::purgeFiles(list<string> &files, int flag)
 #ifndef DISABLE_WEB_INDEXER
    if (m_doweb && !myfiles.empty() && !(flag & IxFNoWeb)) {
        if (!m_webindexer)
-            m_webindexer = new BeagleQueueIndexer(m_config, &m_db, m_updater);
+            m_webindexer = new WebQueueIndexer(m_config, &m_db, m_updater);
        if (m_webindexer) {
            ret = ret && m_webindexer->purgeFiles(myfiles);
        } else {
--- a/src/index/indexer.h
+++ b/src/index/indexer.h
@ -29,7 +29,7 @@
 #include "idxstatus.h"

 class FsIndexer;
-class BeagleQueueIndexer;
+class WebQueueIndexer;

 /** Callback to say what we're doing. If the update func returns false, we
 * stop as soon as possible without corrupting state */
@ -118,7 +118,7 @@ class ConfIndexer {
    Rcl::Db    m_db;
    FsIndexer *m_fsindexer; 
    bool                m_doweb;
-    BeagleQueueIndexer *m_webindexer; 
+    WebQueueIndexer *m_webindexer; 
    DbIxStatusUpdater  *m_updater;
    string              m_reason;

--- a/src/index/recollindex.cpp
+++ b/src/index/recollindex.cpp
@ -49,7 +49,7 @@ using namespace std;
 #include "cancelcheck.h"
 #include "rcldb.h"
 #ifndef DISABLE_WEB_INDEXER
-#include "beaglequeue.h"
+#include "webqueue.h"
 #endif
 #include "recollindex.h"
 #include "fsindexer.h"
--- a/src/index/beaglequeue.cpp
+++ b/src/index/beaglequeue.cpp
@ -16,6 +16,8 @@
 */
 #include "autoconfig.h"

+#include "webqueue.h"
+
 #include <string.h>
 #include <errno.h>
 #include "safesysstat.h"
@ -26,8 +28,7 @@
 #include "rclutil.h"
 #include "log.h"
 #include "fstreewalk.h"
-#include "beaglequeue.h"
-#include "beaglequeuecache.h"
+#include "webstore.h"
 #include "circache.h"
 #include "smallut.h"
 #include "fileudi.h"
@ -44,12 +45,13 @@

 using namespace std;

-// Beagle creates a file named .xxx (where xxx is the name for the main file
-// in the queue), to hold external metadata (http or created by Beagle).
-// This class reads the .xxx, dotfile, and turns it into an Rcl::Doc holder
-class BeagleDotFile {
+// The browser plugin creates a file named .xxx (where xxx is the name
+// for the main file in the queue), to hold external metadata (http or
+// created by the plugin).  This class reads the .xxx, dotfile, and turns
+// it into an Rcl::Doc holder
+class WebQueueDotFile {
 public:
-    BeagleDotFile(RclConfig *conf, const string& fn)
+    WebQueueDotFile(RclConfig *conf, const string& fn)
        : m_conf(conf), m_fn(fn)
    {}

@ -62,7 +64,7 @@ public:
        m_input.getline(cline, LL-1);
        if (!m_input.good()) {
            if (m_input.bad()) {
-                LOGERR("beagleDotFileRead: input.bad()\n" );
+                LOGERR("WebQueueDotFileRead: input.bad()\n" );
            }
            return false;
        }
@ -72,18 +74,18 @@ public:
            ll--;
        }
        line.assign(cline, ll);
-        LOGDEB2("BeagleDotFile:readLine: ["  << (line) << "]\n" );
+        LOGDEB2("WebQueueDotFile:readLine: ["  << (line) << "]\n" );
        return true;
    }

-    // Process a beagle dot file and set interesting stuff in the doc
+    // Process a Web queue dot file and set interesting stuff in the doc
    bool toDoc(Rcl::Doc& doc)
    {
        string line;

 	m_input.open(m_fn.c_str(), ios::in);
        if (!m_input.good()) {
-            LOGERR("BeagleDotFile: open failed for ["  << (m_fn) << "]\n" );
+            LOGERR("WebQueueDotFile: open failed for ["  << (m_fn) << "]\n" );
            return false;
        }

@ -173,24 +175,24 @@ public:

 // Initialize. Compute paths and create a temporary directory that will be
 // used by internfile()
-BeagleQueueIndexer::BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db,
+WebQueueIndexer::WebQueueIndexer(RclConfig *cnf, Rcl::Db *db,
                                       DbIxStatusUpdater *updfunc)
    : m_config(cnf), m_db(db), m_cache(0), m_updater(updfunc), 
      m_nocacheindex(false)
 {
    m_queuedir = m_config->getWebQueueDir();
    path_catslash(m_queuedir);
-    m_cache = new BeagleQueueCache(cnf);
+    m_cache = new WebStore(cnf);
 }

-BeagleQueueIndexer::~BeagleQueueIndexer()
+WebQueueIndexer::~WebQueueIndexer()
 {
-    LOGDEB("BeagleQueueIndexer::~\n" );
+    LOGDEB("WebQueueIndexer::~\n" );
    deleteZ(m_cache);
 }

 // Index document stored in the cache. 
-bool BeagleQueueIndexer::indexFromCache(const string& udi)
+bool WebQueueIndexer::indexFromCache(const string& udi)
 {
    if (!m_db)
        return false;
@ -202,12 +204,12 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi)
    string hittype;

    if (!m_cache || !m_cache->getFromCache(udi, dotdoc, data, &hittype)) {
-	LOGERR("BeagleQueueIndexer::indexFromCache: cache failed\n" );
+	LOGERR("WebQueueIndexer::indexFromCache: cache failed\n" );
        return false;
    }

    if (hittype.empty()) {
-        LOGERR("BeagleIndexer::index: cc entry has no hit type\n" );
+        LOGERR("WebQueueIndexer::index: cc entry has no hit type\n" );
        return false;
    }
        
@ -224,11 +226,11 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi)
        try {
            fis = interner.internfile(doc);
        } catch (CancelExcept) {
-            LOGERR("BeagleQueueIndexer: interrupted\n" );
+            LOGERR("WebQueueIndexer: interrupted\n" );
            return false;
        }
        if (fis != FileInterner::FIDone) {
-            LOGERR("BeagleQueueIndexer: bad status from internfile\n" );
+            LOGERR("WebQueueIndexer: bad status from internfile\n" );
            return false;
        }

@ -242,7 +244,7 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi)
    }
 }

-void BeagleQueueIndexer::updstatus(const string& udi)
+void WebQueueIndexer::updstatus(const string& udi)
 {
    if (m_updater) {
        ++(m_updater->status.docsdone);
@ -253,18 +255,18 @@ void BeagleQueueIndexer::updstatus(const string& udi)
    }
 }

-bool BeagleQueueIndexer::index()
+bool WebQueueIndexer::index()
 {
    if (!m_db)
        return false;
-    LOGDEB("BeagleQueueIndexer::processqueue: ["  << (m_queuedir) << "]\n" );
+    LOGDEB("WebQueueIndexer::processqueue: ["  << (m_queuedir) << "]\n" );
    m_config->setKeyDir(m_queuedir);
    if (!path_makepath(m_queuedir, 0700)) {
-	LOGERR("BeagleQueueIndexer:: can't create queuedir ["  << (m_queuedir) << "] errno "  << (errno) << "\n" );
+	LOGERR("WebQueueIndexer:: can't create queuedir ["  << (m_queuedir) << "] errno "  << (errno) << "\n" );
 	return false;
    }
    if (!m_cache || !m_cache->cc()) {
-        LOGERR("BeagleQueueIndexer: cache initialization failed\n" );
+        LOGERR("WebQueueIndexer: cache initialization failed\n" );
        return false;
    }
    CirCache *cc = m_cache->cc();
@ -282,7 +284,7 @@ bool BeagleQueueIndexer::index()
        do {
            string udi;
            if (!cc->getCurrentUdi(udi)) {
-                LOGERR("BeagleQueueIndexer:: cache file damaged\n" );
+                LOGERR("WebQueueIndexer:: cache file damaged\n" );
                break;
            }
            if (udi.empty())
@ -295,7 +297,7 @@ bool BeagleQueueIndexer::index()
                    indexFromCache(udi);
                    updstatus(udi);
                } catch (CancelExcept) {
-                    LOGERR("BeagleQueueIndexer: interrupted\n" );
+                    LOGERR("WebQueueIndexer: interrupted\n" );
                    return false;
                }
            }
@ -307,17 +309,17 @@ bool BeagleQueueIndexer::index()
    FsTreeWalker walker(FsTreeWalker::FtwNoRecurse);
    walker.addSkippedName(".*");
    FsTreeWalker::Status status = walker.walk(m_queuedir, *this);
-    LOGDEB("BeagleQueueIndexer::processqueue: done: status "  << (status) << "\n" );
+    LOGDEB("WebQueueIndexer::processqueue: done: status "  << (status) << "\n" );
    return true;
 }

 // Index a list of files (sent by the real time monitor)
-bool BeagleQueueIndexer::indexFiles(list<string>& files)
+bool WebQueueIndexer::indexFiles(list<string>& files)
 {
-    LOGDEB("BeagleQueueIndexer::indexFiles\n" );
+    LOGDEB("WebQueueIndexer::indexFiles\n" );

    if (!m_db) {
-        LOGERR("BeagleQueueIndexer::indexfiles no db??\n" );
+        LOGERR("WebQueueIndexer::indexfiles no db??\n" );
        return false;
    }
    for (list<string>::iterator it = files.begin(); it != files.end();) {
@ -326,7 +328,7 @@ bool BeagleQueueIndexer::indexFiles(list<string>& files)
        }
        string father = path_getfather(*it);
        if (father.compare(m_queuedir)) {
-            LOGDEB("BeagleQueueIndexer::indexfiles: skipping ["  << *it << "] (nq)\n" );
+            LOGDEB("WebQueueIndexer::indexfiles: skipping ["  << *it << "] (nq)\n" );
            it++; continue;
        }
        // Pb: we are often called with the dot file, before the
@ -342,11 +344,11 @@ bool BeagleQueueIndexer::indexFiles(list<string>& files)
        }
        struct stat st;
        if (path_fileprops(*it, &st) != 0) {
-            LOGERR("BeagleQueueIndexer::indexfiles: cant stat ["  << *it << "]\n" );
+            LOGERR("WebQueueIndexer::indexfiles: cant stat ["  << *it << "]\n" );
            it++; continue;
        }
 	if (!S_ISREG(st.st_mode)) {
-	    LOGDEB("BeagleQueueIndexer::indexfiles: skipping ["  << *it << "] (nr)\n" );
+	    LOGDEB("WebQueueIndexer::indexfiles: skipping ["  << *it << "] (nr)\n" );
            it++; continue;
 	}

@ -360,7 +362,7 @@ bool BeagleQueueIndexer::indexFiles(list<string>& files)
 }

 FsTreeWalker::Status 
-BeagleQueueIndexer::processone(const string &path,
+WebQueueIndexer::processone(const string &path,
                               const struct stat *stp,
                               FsTreeWalker::CbFlag flg)
 {
@ -374,9 +376,9 @@ BeagleQueueIndexer::processone(const string &path,

    string dotpath = path_cat(path_getfather(path), 
                              string(".") + path_getsimple(path));
-    LOGDEB("BeagleQueueIndexer: prc1: ["  << (path) << "]\n" );
+    LOGDEB("WebQueueIndexer: prc1: ["  << (path) << "]\n" );

-    BeagleDotFile dotfile(m_config, dotpath);
+    WebQueueDotFile dotfile(m_config, dotpath);
    Rcl::Doc dotdoc;
    string udi, udipath;
    if (!dotfile.toDoc(dotdoc))
@ -388,7 +390,7 @@ BeagleQueueIndexer::processone(const string &path,
    udipath = path_cat(dotdoc.meta[Rcl::Doc::keybght], url_gpath(dotdoc.url));
    make_udi(udipath, cstr_null, udi);

-    LOGDEB("BeagleQueueIndexer: prc1: udi ["  << (udi) << "]\n" );
+    LOGDEB("WebQueueIndexer: prc1: udi ["  << (udi) << "]\n" );
    char ascdate[30];
    sprintf(ascdate, "%ld", long(stp->st_mtime));

@ -410,7 +412,7 @@ BeagleQueueIndexer::processone(const string &path,
    } else {
        Rcl::Doc doc;
        // Store the dotdoc fields in the future doc. In case someone wants
-        // to use beagle-generated fields like beagle:inurl
+        // to use fields generated by the browser plugin like inurl
        doc.meta = dotdoc.meta;

        FileInterner interner(path, stp, m_config,
@ -420,11 +422,11 @@ BeagleQueueIndexer::processone(const string &path,
        try {
            fis = interner.internfile(doc);
        } catch (CancelExcept) {
-            LOGERR("BeagleQueueIndexer: interrupted\n" );
+            LOGERR("WebQueueIndexer: interrupted\n" );
            goto out;
        }
        if (fis != FileInterner::FIDone && fis != FileInterner::FIAgain) {
-            LOGERR("BeagleQueueIndexer: bad status from internfile\n" );
+            LOGERR("WebQueueIndexer: bad status from internfile\n" );
            // TOBEDONE: internfile can return FIAgain here if it is
            // paging a big text file, we should loop. Means we're
            // only indexing the first page for text/plain files
@ -457,11 +459,11 @@ BeagleQueueIndexer::processone(const string &path,
        string fdata;
        file_to_string(path, fdata);
        if (!m_cache || !m_cache->cc()) {
-            LOGERR("BeagleQueueIndexer: cache initialization failed\n" );
+            LOGERR("WebQueueIndexer: cache initialization failed\n" );
            goto out;
        }
        if (!m_cache->cc()->put(udi, &dotfile.m_fields, fdata, 0)) {
-            LOGERR("BeagleQueueIndexer::prc1: cache_put failed; "  << (m_cache->cc()->getReason()) << "\n" );
+            LOGERR("WebQueueIndexer::prc1: cache_put failed; "  << (m_cache->cc()->getReason()) << "\n" );
            goto out;
        }
    }
--- a/src/index/beaglequeue.h
+++ b/src/index/beaglequeue.h
@ -14,18 +14,17 @@
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
-#ifndef _beaglequeue_h_included_
-#define _beaglequeue_h_included_
+#ifndef _webqueue_h_included_
+#define _webqueue_h_included_

 #include <list>

 /**
- * Process the Beagle indexing queue. 
+ * Process the WEB indexing queue. 
 *
- * Beagle MUST NOT be running, else mayhem will ensue. 
- *
- * This is mainly written to reuse the Beagle Firefox plug-in (which
- * copies visited pages and bookmarks to the queue).
+ * This was originally written to reuse the Beagle Firefox plug-in (which
+ * copied visited pages and bookmarks to the queue), long dead and replaced by a
+ * recoll-specific plugin.
 */

 #include "fstreewalk.h"
@ -34,16 +33,16 @@
 class DbIxStatusUpdater;
 class CirCache;
 class RclConfig;
-class BeagleQueueCache;
+class WebStore;
 namespace Rcl {
    class Db;
 }

-class BeagleQueueIndexer : public FsTreeWalkerCB {
+class WebQueueIndexer : public FsTreeWalkerCB {
 public:
-    BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db,
+    WebQueueIndexer(RclConfig *cnf, Rcl::Db *db,
                       DbIxStatusUpdater *updfunc = 0);
-    ~BeagleQueueIndexer();
+    ~WebQueueIndexer();

    /** This is called by the top indexer in recollindex. 
     *  Does the walking and the talking */
@ -68,7 +67,7 @@ public:
 private:
    RclConfig *m_config;
    Rcl::Db   *m_db;
-    BeagleQueueCache  *m_cache;
+    WebStore  *m_cache;
    string     m_queuedir;
    DbIxStatusUpdater *m_updater;
    bool       m_nocacheindex;
@ -77,4 +76,4 @@ private:
    void updstatus(const string& udi);
 };

-#endif /* _beaglequeue_h_included_ */
+#endif /* _webqueue_h_included_ */
--- a/src/index/webqueuefetcher.cpp
+++ b/src/index/webqueuefetcher.cpp
@ -16,23 +16,26 @@
 */
 #include "autoconfig.h"

+#include "webqueuefetcher.h"
+
 #include <mutex>

 #include "rcldoc.h"
 #include "fetcher.h"
-#include "bglfetcher.h"
 #include "log.h"
-#include "beaglequeuecache.h"
+#include "webstore.h"

-// We use a single beagle cache object to access beagle data. We protect it 
+using std::string;
+
+// We use a single WebStore object to access the data. We protect it
 // against multiple thread access.
 static std::mutex o_beagler_mutex;

-bool BGLDocFetcher::fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out)
+bool WQDocFetcher::fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out)
 {
    string udi;
    if (!idoc.getmeta(Rcl::Doc::keyudi, &udi) || udi.empty()) {
-	LOGERR("BGLDocFetcher:: no udi in idoc\n" );
+	LOGERR("WQDocFetcher:: no udi in idoc\n" );
 	return false;
    }
    Rcl::Doc dotdoc;
@ -41,24 +44,23 @@ bool BGLDocFetcher::fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out)
 	// Retrieve from our webcache (beagle data). The beagler
 	// object is created at the first call of this routine and
 	// deleted when the program exits.
-	static BeagleQueueCache o_beagler(cnf);
+	static WebStore o_beagler(cnf);
 	if (!o_beagler.getFromCache(udi, dotdoc, out.data)) {
-	    LOGINFO("BGLDocFetcher::fetch: failed for ["  << (udi) << "]\n" );
+	    LOGINFO("WQDocFetcher::fetch: failed for [" << udi << "]\n");
 	    return false;
 	}
    }
    if (dotdoc.mimetype.compare(idoc.mimetype)) {
-	LOGINFO("BGLDocFetcher:: udi ["  << (udi) << "], mimetp mismatch: in: ["  << (idoc.mimetype) << "], bgl ["  << (dotdoc.mimetype) << "]\n" );
+	LOGINFO("WQDocFetcher:: udi [" << udi << "], mimetp mismatch: in: [" <<
+                idoc.mimetype << "], bgl [" << dotdoc.mimetype << "]\n");
    }
    out.kind = RawDoc::RDK_DATA;
    return true;
 }
    
-bool BGLDocFetcher::makesig(RclConfig* cnf, const Rcl::Doc& idoc, string& sig)
+bool WQDocFetcher::makesig(RclConfig* cnf, const Rcl::Doc& idoc, string& sig)
 {
-    // Bgl sigs are empty
+    // Web queue sigs are empty
    sig.clear();
    return true;
 }
-
-
--- a/src/index/webqueuefetcher.h
+++ b/src/index/webqueuefetcher.h
@ -14,18 +14,19 @@
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
-#ifndef _BGLFETCHER_H_INCLUDED_
-#define _BGLFETCHER_H_INCLUDED_
+#ifndef _WEBQUEUEFETCHER_H_INCLUDED_
+#define _WEBQUEUEFETCHER_H_INCLUDED_
+
 #include "fetcher.h"

 /** 
- * The Beagle cache fetcher: 
+ * The WEB queue cache fetcher: 
 */
-class BGLDocFetcher : public DocFetcher{
+class WQDocFetcher : public DocFetcher{
    virtual bool fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out);
    virtual bool makesig(RclConfig* cnf, const Rcl::Doc& idoc,
                         std::string& sig);
-    virtual ~BGLDocFetcher() {}
+    virtual ~WQDocFetcher() {}
 };

-#endif /* _BGLFETCHER_H_INCLUDED_ */
+#endif /* _WEBQUEUEFETCHER_H_INCLUDED_ */
--- a/src/internfile/internfile.cpp
+++ b/src/internfile/internfile.cpp
@ -47,6 +47,7 @@ using namespace std;
 #include "copyfile.h"
 #include "fetcher.h"
 #include "extrameta.h"
+#include "uncomp.h"

 // The internal path element separator. This can't be the same as the rcldb 
 // file to ipath separator : "|"
@ -188,7 +189,7 @@ void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf,
 	    int maxkbs = -1;
 	    if (!m_cfg->getConfParam("compressedfilemaxkbs", &maxkbs) ||
 		maxkbs < 0 || !stp || int(stp->st_size / 1024) < maxkbs) {
-		if (!m_uncomp.uncompressfile(m_fn, ucmd, m_tfile)) {
+		if (!m_uncomp->uncompressfile(m_fn, ucmd, m_tfile)) {
 		    return;
 		}
 		LOGDEB1("FileInterner:: after ucomp: tfile " << m_tfile <<"\n");
@ -293,8 +294,8 @@ void FileInterner::init(const string &data, RclConfig *cnf,
 	result = df->set_document_data(m_mimetype, data.c_str(), data.length());
    } else if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_FILE_NAME)) {
 	TempFile temp = dataToTempFile(data, m_mimetype);
-	if (temp && 
-	    (result = df->set_document_file(m_mimetype, temp->filename()))) {
+	if (temp.ok() && 
+	    (result = df->set_document_file(m_mimetype, temp.filename()))) {
 	    m_tmpflgs[m_handlers.size()] = true;
 	    m_tempfiles.push_back(temp);
 	}
@ -312,7 +313,8 @@ void FileInterner::init(const string &data, RclConfig *cnf,
 void FileInterner::initcommon(RclConfig *cnf, int flags)
 {
    m_cfg = cnf;
-    m_uncomp = m_forPreview = ((flags & FIF_forPreview) != 0);
+    m_forPreview = ((flags & FIF_forPreview) != 0);
+    m_uncomp = new Uncomp(m_forPreview);
    // Initialize handler stack.
    m_handlers.reserve(MAXHANDLERS);
    for (unsigned int i = 0; i < MAXHANDLERS; i++)
@ -373,10 +375,10 @@ bool FileInterner::makesig(RclConfig *cnf, const Rcl::Doc& idoc, string& sig)

 FileInterner::~FileInterner()
 {
-    for (vector<RecollFilter*>::iterator it = m_handlers.begin();
-	 it != m_handlers.end(); it++) {
-        returnMimeHandler(*it);
+    for (auto& entry: m_handlers) {
+        returnMimeHandler(entry);
    }
+    delete m_uncomp;
    // m_tempfiles will take care of itself
 }

@ -386,14 +388,14 @@ FileInterner::~FileInterner()
 TempFile FileInterner::dataToTempFile(const string& dt, const string& mt)
 {
    // Create temp file with appropriate suffix for mime type
-    TempFile temp(new TempFileInternal(m_cfg->getSuffixFromMimeType(mt)));
-    if (!temp->ok()) {
+    TempFile temp(m_cfg->getSuffixFromMimeType(mt));
+    if (!temp.ok()) {
 	LOGERR("FileInterner::dataToTempFile: cant create tempfile: " <<
-               temp->getreason() << "\n");
+               temp.getreason() << "\n");
 	return TempFile();
    }
    string reason;
-    if (!stringtofile(dt, temp->filename(), reason)) {
+    if (!stringtofile(dt, temp.filename(), reason)) {
 	LOGERR("FileInterner::dataToTempFile: stringtofile: " <<reason << "\n");
 	return TempFile();
    }
@ -723,8 +725,8 @@ int FileInterner::addHandler()
 	setres = newflt->set_document_data(mimetype,txt->c_str(),txt->length());
    } else if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_FILE_NAME)) {
 	TempFile temp = dataToTempFile(*txt, mimetype);
-	if (temp && 
-	    (setres = newflt->set_document_file(mimetype, temp->filename()))) {
+	if (temp.ok() && 
+	    (setres = newflt->set_document_file(mimetype, temp.filename()))) {
 	    m_tmpflgs[m_handlers.size()] = true;
 	    m_tempfiles.push_back(temp);
 	    // Hack here, but really helps perfs: if we happen to
@ -765,7 +767,7 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc,const string& ipath)
    LOGDEB("FileInterner::internfile. ipath [" << ipath << "]\n");

    // Get rid of possible image tempfile from older call
-    m_imgtmp.reset();
+    m_imgtmp = TempFile();

    if (m_handlers.size() < 1) {
 	// Just means the constructor failed
@ -916,9 +918,8 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc,const string& ipath)
 bool FileInterner::tempFileForMT(TempFile& otemp, RclConfig* cnf, 
                                 const string& mimetype)
 {
-    TempFile temp(new TempFileInternal(
-                      cnf->getSuffixFromMimeType(mimetype)));
-    if (!temp->ok()) {
+    TempFile temp(cnf->getSuffixFromMimeType(mimetype));
+    if (!temp.ok()) {
        LOGERR("FileInterner::tempFileForMT: can't create temp file\n");
        return false;
    }
@ -970,7 +971,7 @@ bool FileInterner::topdocToFile(
        if (!tempFileForMT(temp, cnf, idoc.mimetype)) {
            return false;
        }
-        filename = temp->filename();
+        filename = temp.filename();
    } else {
        filename = tofile.c_str();
    }
@ -985,7 +986,7 @@ bool FileInterner::topdocToFile(
                return false;
            }
        }
-        fn = temp ? temp->filename() : rawdoc.data;
+        fn = temp.ok() ? temp.filename() : rawdoc.data;
        if (!copyfile(fn.c_str(), filename, reason)) {
            LOGERR("FileInterner::idocToFile: copyfile: " << reason << "\n");
            return false;
@ -1040,7 +1041,7 @@ bool FileInterner::interntofile(TempFile& otemp, const string& tofile,
        if (!tempFileForMT(temp, m_cfg, mimetype)) {
            return false;
        }
-	filename = temp->filename();
+	filename = temp.filename();
    } else {
 	filename = tofile.c_str();
    }
@ -1106,9 +1107,8 @@ bool FileInterner::maybeUncompressToTemp(TempFile& temp, const string& fn,
                " kbs\n");
        return false;
    }
-    temp = 
-      TempFile(new TempFileInternal(cnf->getSuffixFromMimeType(doc.mimetype)));
-    if (!temp->ok()) {
+    temp = TempFile(cnf->getSuffixFromMimeType(doc.mimetype));
+    if (!temp.ok()) {
        LOGERR("FileInterner: cant create temporary file\n");
        return false;
    }
@ -1123,9 +1123,9 @@ bool FileInterner::maybeUncompressToTemp(TempFile& temp, const string& fn,
    // reason for this, but it's not nice here. Have to move, the
    // uncompressed file, hopefully staying on the same dev.
    string reason;
-    if (!renameormove(uncomped.c_str(), temp->filename(), reason)) {
+    if (!renameormove(uncomped.c_str(), temp.filename(), reason)) {
        LOGERR("FileInterner::maybeUncompress: move [" << uncomped <<
-               "] -> [" << temp->filename() << "] failed: " << reason << "\n");
+               "] -> [" << temp.filename() << "] failed: " << reason << "\n");
        return false;
    }
    return true;
--- a/src/internfile/internfile.h
+++ b/src/internfile/internfile.h
@ -28,14 +28,15 @@ using std::map;
 using std::set;

 #include "mimehandler.h"
-#include "uncomp.h"
 #include "pathut.h"
+#include "rclutil.h"

 class RclConfig;
 namespace Rcl {
    class Doc;
 }

+class Uncomp;
 struct stat;

 /** Storage for missing helper program info. We want to keep this out of the 
@ -277,7 +278,7 @@ class FileInterner {
    string                 m_reason;
    FIMissingStore        *m_missingdatap{nullptr};

-    Uncomp                 m_uncomp;
+    Uncomp                 *m_uncomp{nullptr};

    bool                   m_noxattrs; // disable xattrs usage
    bool                   m_direct; // External app did the extraction
--- a/src/internfile/mh_mail.cpp
+++ b/src/internfile/mh_mail.cpp
@ -260,8 +260,8 @@ bool MimeHandlerMail::processAttach()
            att->m_charset << "] fn [" << att->m_filename << "]\n");

    // Erase current content and replace
-    m_metaData[cstr_dj_keycontent] = string();
    string& body = m_metaData[cstr_dj_keycontent];
+    body.clear();
    att->m_part->getBody(body, 0, att->m_part->bodylength);
    {
 	string decoded;
@ -285,10 +285,15 @@ bool MimeHandlerMail::processAttach()

    // Special case for text/plain content. Internfile should deal
    // with this but it expects text/plain to be utf-8 already, so we
-    // handle the transcoding if needed
+    // handle the transcoding if needed. Same kind of issue for the MD5
    if (m_metaData[cstr_dj_keymt] == cstr_textplain) {
-	if (!txtdcode("MimeHandlerMail::processAttach"))
+	if (!txtdcode("MimeHandlerMail::processAttach")) {
 	    body.clear();
+        } else if (!m_forPreview) {
+            string md5, xmd5;
+            MD5String(body, md5);
+            m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
+        }
    }

    // Ipath
--- a/src/internfile/mh_mbox.cpp
+++ b/src/internfile/mh_mbox.cpp
@ -23,16 +23,6 @@
 #include "safesysstat.h"
 #include <time.h>

-#if defined(_WIN32)
-#define USING_STD_REGEX
-#endif
-
-#ifdef USING_STD_REGEX
-#include <regex>
-#else
-#include <regex.h>
-#endif
-
 #include <cstring>
 #include <map>
 #include <mutex>
@ -363,7 +353,7 @@ static inline void stripendnl(line_type& line, int& ll)
 // This was added as an alternative format. By the way it also fools "mail" and
 // emacs-vm, Recoll is not alone
 // Update: 2009-11-27: word after From may be quoted string: From "john bull"
-static const  char *frompat =  
+static const string frompat{
 "^From[ ]+([^ ]+|\"[^\"]+\")[ ]+"    // 'From (toto@tutu|"john bull") '
 "[[:alpha:]]{3}[ ]+[[:alpha:]]{3}[ ]+[0-3 ][0-9][ ]+" // Fri Oct 26
 "[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?[ ]+"             // Time, seconds optional
@ -374,45 +364,15 @@ static const  char *frompat =
 "[[:alpha:]]{3},[ ]+[0-3]?[0-9][ ]+[[:alpha:]]{3}[ ]+" // Mon, 8 May
 "[12][0-9][0-9][0-9][ ]+"                              // Year
 "[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?"                  // Time, secs optional
-    ;
+    };

 // Extreme thunderbird brokiness. Will sometimes use From lines
 // exactly like: From ^M (From followed by space and eol). We only
 // test for this if QUIRKS_TBIRD is set
-static const char *miniTbirdFrom = "^From $";
-#ifndef USING_STD_REGEX
-static regex_t fromregex;
-static regex_t minifromregex;
-#define M_regexec(A,B,C,D,E) regexec(&(A),B,C,D,E)
-#else
-basic_regex<char> fromregex;
-basic_regex<char> minifromregex;
-#define REG_NOSUB std::regex_constants::nosubs
-#define REG_EXTENDED std::regex_constants::extended
-#define M_regexec(A, B, C, D, E) (!regex_match(B,A))
+static const string miniTbirdFrom{"^From $"};

-#endif
-
-static bool regcompiled;
-static std::mutex o_regex_mutex;
-
-static void compileregexes()
-{
-    std::unique_lock<std::mutex> locker(o_regex_mutex);
-    // As the initial test of regcompiled is unprotected the value may
-    // have changed while we were waiting for the lock. Test again now
-    // that we are alone.
-    if (regcompiled)
-	return;
-#ifndef USING_STD_REGEX
-    regcomp(&fromregex, frompat, REG_NOSUB|REG_EXTENDED);
-    regcomp(&minifromregex, miniTbirdFrom, REG_NOSUB|REG_EXTENDED);
-#else
-    fromregex = basic_regex<char>(frompat, REG_NOSUB | REG_EXTENDED);
-    minifromregex = basic_regex<char>(miniTbirdFrom, REG_NOSUB | REG_EXTENDED);
-#endif
-    regcompiled = true;
-}
+static SimpleRegexp fromregex(frompat, SimpleRegexp::SRE_NOSUB);
+static SimpleRegexp minifromregex(miniTbirdFrom, SimpleRegexp::SRE_NOSUB);

 bool MimeHandlerMbox::next_document()
 {
@ -432,13 +392,11 @@ bool MimeHandlerMbox::next_document()
 	LOGDEB("MimeHandlerMbox::next_document: can't preview folders!\n");
 	return false;
    }
-    LOGDEB0("MimeHandlerMbox::next_document: fn " << (m_fn) << ", msgnum " << (m_msgnum) << " mtarg " << (mtarg) << " \n");
+    LOGDEB0("MimeHandlerMbox::next_document: fn " << m_fn << ", msgnum " <<
+            m_msgnum << " mtarg " << mtarg << " \n");
    if (mtarg == 0)
 	mtarg = -1;

-    if (!regcompiled) {
-	compileregexes();
-    }

    // If we are called to retrieve a specific message, seek to bof
    // (then scan up to the message). This is for the case where the
@ -452,14 +410,14 @@ bool MimeHandlerMbox::next_document()
    if (mtarg > 0) {
        mbhoff_type off;
        line_type line;
-        LOGDEB0("MimeHandlerMbox::next_doc: mtarg " << (mtarg) << " m_udi[" << (m_udi) << "]\n");
+        LOGDEB0("MimeHandlerMbox::next_doc: mtarg " << mtarg << " m_udi[" <<
+                m_udi << "]\n");
        if (!m_udi.empty() && 
            (off = o_mcache.get_offset(m_config, m_udi, mtarg)) >= 0 && 
            fseeko(fp, (off_t)off, SEEK_SET) >= 0 && 
            fgets(line, LL, fp) &&
-            (!M_regexec(fromregex, line, 0, 0, 0) || 
-	     ((m_quirks & MBOXQUIRK_TBIRD) && 
-	      !M_regexec(minifromregex, line, 0, 0, 0)))	) {
+            (fromregex(line) || ((m_quirks & MBOXQUIRK_TBIRD) && 
+                                 minifromregex(line)))	) {
                LOGDEB0("MimeHandlerMbox: Cache: From_ Ok\n");
                fseeko(fp, (off_t)off, SEEK_SET);
                m_msgnum = mtarg -1;
@ -487,7 +445,8 @@ bool MimeHandlerMbox::next_document()
 	m_lineno++;
 	int ll;
 	stripendnl(line, ll);
-	LOGDEB2("mhmbox:next: hadempty " << (hademptyline) << " lineno " << (m_lineno) << " ll " << (ll) << " Line: [" << (line) << "]\n");
+	LOGDEB2("mhmbox:next: hadempty " << hademptyline << " lineno " <<
+                m_lineno << " ll " << ll << " Line: [" << line << "]\n");
 	if (hademptyline) {
 	    if (ll > 0) {
 		// Non-empty line with empty line flag set, reset flag
@ -501,11 +460,12 @@ bool MimeHandlerMbox::next_document()
 		/* The 'F' compare is redundant but it improves performance
 		   A LOT */
 		if (line[0] == 'F' && (
-		    !M_regexec(fromregex, line, 0, 0, 0) || 
-		    ((m_quirks & MBOXQUIRK_TBIRD) && 
-		     !M_regexec(minifromregex, line, 0, 0, 0)))
+                        fromregex(line) || 
+                        ((m_quirks & MBOXQUIRK_TBIRD) && minifromregex(line)))
 		    ) {
-		    LOGDEB0("MimeHandlerMbox: msgnum " << (m_msgnum) << ", From_ at line " << (m_lineno) << ": [" << (line) << "]\n");
+		    LOGDEB0("MimeHandlerMbox: msgnum " << m_msgnum <<
+                            ", From_ at line " << m_lineno << ": [" << line
+                            << "]\n");
 		    if (storeoffsets)
 			m_offsets.push_back(message_end);
 		    m_msgnum++;
@ -528,13 +488,15 @@ bool MimeHandlerMbox::next_document()
 	    line[ll+1] = 0;
 	    msgtxt += line;
 	    if (msgtxt.size() > max_mbox_member_size) {
-		LOGERR("mh_mbox: huge message (more than " << (max_mbox_member_size/(1024*1024)) << " MB) inside " << (m_fn) << ", giving up\n");
+		LOGERR("mh_mbox: huge message (more than " <<
+                       max_mbox_member_size/(1024*1024) << " MB) inside " <<
+                       m_fn << ", giving up\n");
 		return false;
 	    }
 	}
    }
-    LOGDEB2("Message text length " << (msgtxt.size()) << "\n");
-    LOGDEB2("Message text: [" << (msgtxt) << "]\n");
+    LOGDEB2("Message text length " << msgtxt.size() << "\n");
+    LOGDEB2("Message text: [" << msgtxt << "]\n");
    char buf[20];
    // m_msgnum was incremented when hitting the next From_ or eof, so the data
    // is for m_msgnum - 1
--- a/src/internfile/uncomp.cpp
+++ b/src/internfile/uncomp.cpp
@ -35,6 +35,12 @@ using std::vector;

 Uncomp::UncompCache Uncomp::o_cache;

+Uncomp::Uncomp(bool docache)
+	: m_docache(docache)
+{
+    LOGDEB0("Uncomp::Uncomp: m_docache: " << m_docache << "\n");
+}
+
 bool Uncomp::uncompressfile(const string& ifn, 
 			    const vector<string>& cmdv, string& tfile)
 {
@ -57,7 +63,8 @@ bool Uncomp::uncompressfile(const string& ifn,
    }
    // Make sure tmp dir is empty. we guarantee this to filters
    if (!m_dir || !m_dir->ok() || !m_dir->wipe()) {
-	LOGERR("uncompressfile: can't clear temp dir "  << (m_dir->dirname()) << "\n" );
+	LOGERR("uncompressfile: can't clear temp dir " << m_dir->dirname() <<
+               "\n");
 	return false;
    }

@ -66,12 +73,14 @@ bool Uncomp::uncompressfile(const string& ifn,
    int pc;
    long long availmbs;
    if (!fsocc(m_dir->dirname(), &pc, &availmbs)) {
-        LOGERR("uncompressfile: can't retrieve avail space for "  << (m_dir->dirname()) << "\n" );
+        LOGERR("uncompressfile: can't retrieve avail space for " <<
+               m_dir->dirname() << "\n");
        // Hope for the best
    } else {
 	long long fsize = path_filesize(ifn);
        if (fsize < 0) {
-            LOGERR("uncompressfile: stat input file "  << (ifn) << " errno "  << (errno) << "\n" );
+            LOGERR("uncompressfile: stat input file " << ifn << " errno " <<
+                   errno << "\n");
            return false;
        }
        // We need at least twice the file size for the uncompressed
@ -83,7 +92,9 @@ bool Uncomp::uncompressfile(const string& ifn,
        long long filembs = fsize / (1024 * 1024); 
        
        if (availmbs < 2 * filembs + 1) {
-            LOGERR("uncompressfile. "  << (lltodecstr(availmbs)) << " MBs available in "  << (m_dir->dirname()) << " not enough to uncompress "  << (ifn) << " of size "  << (lltodecstr(filembs)) << " mbs\n" );
+            LOGERR("uncompressfile. " << availmbs << " MBs available in " <<
+                   m_dir->dirname() << " not enough to uncompress " <<
+                   ifn << " of size "  << filembs << " MBs\n");
            return false;
        }
    }
@ -107,9 +118,10 @@ bool Uncomp::uncompressfile(const string& ifn,
    ExecCmd ex;
    int status = ex.doexec(cmd, args, 0, &tfile);
    if (status || tfile.empty()) {
-	LOGERR("uncompressfile: doexec: failed for ["  << (ifn) << "] status 0x"  << (status) << "\n" );
+	LOGERR("uncompressfile: doexec: failed for [" << ifn << "] status 0x" <<
+               status << "\n");
 	if (!m_dir->wipe()) {
-	    LOGERR("uncompressfile: wipedir failed\n" );
+	    LOGERR("uncompressfile: wipedir failed\n");
 	}
 	return false;
    }
@ -122,6 +134,8 @@ bool Uncomp::uncompressfile(const string& ifn,

 Uncomp::~Uncomp()
 {
+    LOGDEB0("Uncomp::~Uncomp: m_docache: " << m_docache << " m_dir " <<
+            (m_dir?m_dir->dirname():"(null)") << "\n");
    if (m_docache) {
        std::unique_lock<std::mutex> lock(o_cache.m_lock);
 	delete o_cache.m_dir;
@ -133,4 +147,12 @@ Uncomp::~Uncomp()
    }
 }

-
+void Uncomp::clearcache()
+{
+    LOGDEB0("Uncomp::clearcache\n");
+    std::unique_lock<std::mutex> lock(o_cache.m_lock);
+    delete o_cache.m_dir;
+    o_cache.m_dir = 0;
+    o_cache.m_tfile.clear();
+    o_cache.m_srcpath.clear();
+}
--- a/src/internfile/uncomp.h
+++ b/src/internfile/uncomp.h
@ -27,10 +27,7 @@
 /// Uncompression script interface.
 class Uncomp {
 public:
-    Uncomp(bool docache = false)
-	: m_dir(0), m_docache(docache)
-    {
-    }
+    explicit Uncomp(bool docache = false);
    ~Uncomp();

    /** Uncompress the input file into a temporary one, by executing the
@ -41,25 +38,22 @@ public:
    bool uncompressfile(const std::string& ifn, 
 			const std::vector<std::string>& cmdv,
 			std::string& tfile);
-
+    static void clearcache();
+    
 private:
-    TempDir *m_dir;
+    TempDir *m_dir{0};
    std::string   m_tfile;
    std::string   m_srcpath;
    bool m_docache;

    class UncompCache {
    public:
-	UncompCache()
-	    : m_dir(0)
-	{
-	}
-	~UncompCache()
-	{
+	UncompCache() {}
+	~UncompCache() {
 	    delete m_dir;
 	}
        std::mutex m_lock;
-	TempDir *m_dir;
+	TempDir *m_dir{0};
 	std::string   m_tfile;
 	std::string   m_srcpath;
    };
--- a/src/python/pychm/AUTHORS
+++ b/src/python/pychm/AUTHORS
@ -0,0 +1,28 @@
+Author
+------
+
+Rubens Ramos <rubensr@users.sourceforge.net>
+
+Maintainer
+----------
+
+Mikhail Gusarov <dottedmag@dottedmag.net>
+
+Python3 port minor changes
+--------------------------
+
+Jean-Francois Dockes <jf@dockes.org>
+
+Acknowledgements
+----------------
+
+This work would not have been possible without the existence of chmlib,
+developed by Jed Wing, and a lot of the python code used to parse the contents
+tree and to decode the index files was heavily based on the code implemented by
+Razvan Cojocaru <razvanco@gmx.net> for the xCHM viewer.
+
+Bug reports
+-----------
+
+can3p, Chang (changshu), Hristo Iliev, Carlos Liu, Torsten Marek, Dmitri
+(nebraskin), Fredrik de Vibe, Glenn Washburn
--- a/src/python/pychm/COPYING
+++ b/src/python/pychm/COPYING
@ -0,0 +1,281 @@
+	    	GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
--- a/src/python/pychm/MANIFEST.in
+++ b/src/python/pychm/MANIFEST.in
@ -0,0 +1,2 @@
+include COPYING
+include chm/swig_chm.i
--- a/src/python/pychm/README-RECOLL.txt
+++ b/src/python/pychm/README-RECOLL.txt
@ -0,0 +1,11 @@
+May 2018:
+
+pychm has no python3 version. The pull request I submitted for the port is
+sitting there, and so is the Debian bug.
+
+https://github.com/dottedmag/pychm/pull/5
+
+Which is why Recoll bundles pychm, enhanced for Python3, for now. The
+source repo is here:
+
+https://github.com/medoc92/pychm
--- a/src/python/pychm/pychm.egg-info/PKG-INFO
+++ b/src/python/pychm/pychm.egg-info/PKG-INFO
@ -0,0 +1,15 @@
+Metadata-Version: 1.0
+Name: pychm
+Version: 0.8.4.1+git
+Summary: Python package to handle CHM files
+Home-page: https://github.com/dottedmag/pychm
+Author: Mikhail Gusarov
+Author-email: dottedmag@dottedmag.net
+License: GPL
+Description: 
+        The chm package provides three modules, chm, chmlib and extra, which provide
+        access to the API implemented by the C library chmlib and some additional
+        classes and functions. They are used to access MS-ITSS encoded files -
+        Compressed Html Help files (.chm).
+        
+Platform: UNKNOWN
--- a/src/python/pychm/pychm.egg-info/SOURCES.txt
+++ b/src/python/pychm/pychm.egg-info/SOURCES.txt
@ -0,0 +1,10 @@
+setup.py
+/home/dockes/projets/fulltext/recoll/src/python/pychm/../../python/pychm/chm/__init__.py
+/home/dockes/projets/fulltext/recoll/src/python/pychm/../../python/pychm/chm/chm.py
+/home/dockes/projets/fulltext/recoll/src/python/pychm/../../python/pychm/chm/chmlib.py
+/home/dockes/projets/fulltext/recoll/src/python/pychm/../../python/pychm/pychm.egg-info/PKG-INFO
+/home/dockes/projets/fulltext/recoll/src/python/pychm/../../python/pychm/pychm.egg-info/SOURCES.txt
+/home/dockes/projets/fulltext/recoll/src/python/pychm/../../python/pychm/pychm.egg-info/dependency_links.txt
+/home/dockes/projets/fulltext/recoll/src/python/pychm/../../python/pychm/pychm.egg-info/top_level.txt
+/home/dockes/projets/fulltext/recoll/src/python/pychm/chm/extra.c
+/home/dockes/projets/fulltext/recoll/src/python/pychm/chm/swig_chm.c
--- a/src/python/pychm/pychm.egg-info/dependency_links.txt
+++ b/src/python/pychm/pychm.egg-info/dependency_links.txt
@ -0,0 +1 @@
+
--- a/src/python/pychm/pychm.egg-info/top_level.txt
+++ b/src/python/pychm/pychm.egg-info/top_level.txt
@ -0,0 +1 @@
+chm
--- a/src/python/pychm/recollchm/init.py
+++ b/src/python/pychm/recollchm/init.py
@ -0,0 +1,32 @@
+# Copyright (C) 2003-2006 Rubens Ramos <rubensr@users.sourceforge.net>
+#
+# pychm is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with this program; see the file COPYING.  If not,
+# write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA
+#
+
+'''
+   chm - A package to manipulate CHM files
+
+   The chm package provides four modules: chm, chmlib, extra and
+   _chmlib. _chmlib and chmlib are very low level libraries generated
+   from  SWIG interface files, and are simple wrappers around the API
+   defined by the C library chmlib.
+   The extra module adds full-text search support.
+   the chm module provides some higher level classes to simplify
+   access to the CHM files information.
+'''
+__all__ = ["chm", "chmlib", "_chmlib", "extra"]
+__version__ = "0.8.4.1+git"
+__revision__ = "$Id$"
--- a/src/python/pychm/recollchm/chm.py
+++ b/src/python/pychm/recollchm/chm.py
@ -0,0 +1,502 @@
+# Copyright (C) 2003-2006 Rubens Ramos <rubensr@users.sourceforge.net>
+#
+# Based on code by:
+# Copyright (C) 2003  Razvan Cojocaru <razvanco@gmx.net>
+#
+# pychm is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with this program; see the file COPYING.  If not,
+# write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA
+
+'''
+   chm - A high-level front end for the chmlib python module.
+
+   The chm module provides high level access to the functionality
+   included in chmlib. It encapsulates functions in the CHMFile class, and
+   provides some additional features, such as the ability to obtain
+   the contents tree of a CHM archive.
+
+'''
+
+from . import chmlib
+from . import extra
+import array
+import os.path
+import sys
+
+charset_table = {
+    0: 'iso8859_1',  # ANSI_CHARSET
+    238: 'iso8859_2',  # EASTEUROPE_CHARSET
+    178: 'iso8859_6',  # ARABIC_CHARSET
+    161: 'iso8859_7',  # GREEK_CHARSET
+    177: 'iso8859_8',  # HEBREW_CHARSET
+    162: 'iso8859_9',  # TURKISH_CHARSET
+    222: 'iso8859_11',  # THAI_CHARSET - hmm not in python 2.2...
+    186: 'iso8859_13',  # BALTIC_CHARSET
+    204: 'cp1251',  # RUSSIAN_CHARSET
+    255: 'cp437',  # OEM_CHARSET
+    128: 'cp932',  # SHIFTJIS_CHARSET
+    134: 'cp936',  # GB2312_CHARSET
+    129: 'cp949',  # HANGUL_CHARSET
+    136: 'cp950',  # CHINESEBIG5_CHARSET
+    1: None,  # DEFAULT_CHARSET
+    2: None,  # SYMBOL_CHARSET
+    130: None,  # JOHAB_CHARSET
+    163: None,  # VIETNAMESE_CHARSET
+    77: None,  # MAC_CHARSET
+}
+
+locale_table = {
+    0x0436: ('iso8859_1', "Afrikaans", "Western Europe & US"),
+    0x041c: ('iso8859_2', "Albanian", "Central Europe"),
+    0x0401: ('iso8859_6', "Arabic_Saudi_Arabia", "Arabic"),
+    0x0801: ('iso8859_6', "Arabic_Iraq", "Arabic"),
+    0x0c01: ('iso8859_6', "Arabic_Egypt", "Arabic"),
+    0x1001: ('iso8859_6', "Arabic_Libya", "Arabic"),
+    0x1401: ('iso8859_6', "Arabic_Algeria", "Arabic"),
+    0x1801: ('iso8859_6', "Arabic_Morocco", "Arabic"),
+    0x1c01: ('iso8859_6', "Arabic_Tunisia", "Arabic"),
+    0x2001: ('iso8859_6', "Arabic_Oman", "Arabic"),
+    0x2401: ('iso8859_6', "Arabic_Yemen", "Arabic"),
+    0x2801: ('iso8859_6', "Arabic_Syria", "Arabic"),
+    0x2c01: ('iso8859_6', "Arabic_Jordan", "Arabic"),
+    0x3001: ('iso8859_6', "Arabic_Lebanon", "Arabic"),
+    0x3401: ('iso8859_6', "Arabic_Kuwait", "Arabic"),
+    0x3801: ('iso8859_6', "Arabic_UAE", "Arabic"),
+    0x3c01: ('iso8859_6', "Arabic_Bahrain", "Arabic"),
+    0x4001: ('iso8859_6', "Arabic_Qatar", "Arabic"),
+    0x042b: (None, "Armenian", "Armenian"),
+    0x042c: ('iso8859_9', "Azeri_Latin", "Turkish"),
+    0x082c: ('cp1251', "Azeri_Cyrillic", "Cyrillic"),
+    0x042d: ('iso8859_1', "Basque", "Western Europe & US"),
+    0x0423: ('cp1251', "Belarusian", "Cyrillic"),
+    0x0402: ('cp1251', "Bulgarian", "Cyrillic"),
+    0x0403: ('iso8859_1', "Catalan", "Western Europe & US"),
+    0x0404: ('cp950', "Chinese_Taiwan", "Traditional Chinese"),
+    0x0804: ('cp936', "Chinese_PRC", "Simplified Chinese"),
+    0x0c04: ('cp950', "Chinese_Hong_Kong", "Traditional Chinese"),
+    0x1004: ('cp936', "Chinese_Singapore", "Simplified Chinese"),
+    0x1404: ('cp950', "Chinese_Macau", "Traditional Chinese"),
+    0x041a: ('iso8859_2', "Croatian", "Central Europe"),
+    0x0405: ('iso8859_2', "Czech", "Central Europe"),
+    0x0406: ('iso8859_1', "Danish", "Western Europe & US"),
+    0x0413: ('iso8859_1', "Dutch_Standard", "Western Europe & US"),
+    0x0813: ('iso8859_1', "Dutch_Belgian", "Western Europe & US"),
+    0x0409: ('iso8859_1', "English_United_States", "Western Europe & US"),
+    0x0809: ('iso8859_1', "English_United_Kingdom", "Western Europe & US"),
+    0x0c09: ('iso8859_1', "English_Australian", "Western Europe & US"),
+    0x1009: ('iso8859_1', "English_Canadian", "Western Europe & US"),
+    0x1409: ('iso8859_1', "English_New_Zealand", "Western Europe & US"),
+    0x1809: ('iso8859_1', "English_Irish", "Western Europe & US"),
+    0x1c09: ('iso8859_1', "English_South_Africa", "Western Europe & US"),
+    0x2009: ('iso8859_1', "English_Jamaica", "Western Europe & US"),
+    0x2409: ('iso8859_1', "English_Caribbean", "Western Europe & US"),
+    0x2809: ('iso8859_1', "English_Belize", "Western Europe & US"),
+    0x2c09: ('iso8859_1', "English_Trinidad", "Western Europe & US"),
+    0x3009: ('iso8859_1', "English_Zimbabwe", "Western Europe & US"),
+    0x3409: ('iso8859_1', "English_Philippines", "Western Europe & US"),
+    0x0425: ('iso8859_13', "Estonian", "Baltic",),
+    0x0438: ('iso8859_1', "Faeroese", "Western Europe & US"),
+    0x0429: ('iso8859_6', "Farsi", "Arabic"),
+    0x040b: ('iso8859_1', "Finnish", "Western Europe & US"),
+    0x040c: ('iso8859_1', "French_Standard", "Western Europe & US"),
+    0x080c: ('iso8859_1', "French_Belgian", "Western Europe & US"),
+    0x0c0c: ('iso8859_1', "French_Canadian", "Western Europe & US"),
+    0x100c: ('iso8859_1', "French_Swiss", "Western Europe & US"),
+    0x140c: ('iso8859_1', "French_Luxembourg", "Western Europe & US"),
+    0x180c: ('iso8859_1', "French_Monaco", "Western Europe & US"),
+    0x0437: (None, "Georgian", "Georgian"),
+    0x0407: ('iso8859_1', "German_Standard", "Western Europe & US"),
+    0x0807: ('iso8859_1', "German_Swiss", "Western Europe & US"),
+    0x0c07: ('iso8859_1', "German_Austrian", "Western Europe & US"),
+    0x1007: ('iso8859_1', "German_Luxembourg", "Western Europe & US"),
+    0x1407: ('iso8859_1', "German_Liechtenstein", "Western Europe & US"),
+    0x0408: ('iso8859_7', "Greek", "Greek"),
+    0x040d: ('iso8859_8', "Hebrew", "Hebrew"),
+    0x0439: (None, "Hindi", "Indic"),
+    0x040e: ('iso8859_2', "Hungarian", "Central Europe"),
+    0x040f: ('iso8859_1', "Icelandic", "Western Europe & US"),
+    0x0421: ('iso8859_1', "Indonesian", "Western Europe & US"),
+    0x0410: ('iso8859_1', "Italian_Standard", "Western Europe & US"),
+    0x0810: ('iso8859_1', "Italian_Swiss", "Western Europe & US"),
+    0x0411: ('cp932', "Japanese", "Japanese"),
+    0x043f: ('cp1251', "Kazakh", "Cyrillic"),
+    0x0457: (None, "Konkani", "Indic"),
+    0x0412: ('cp949', "Korean", "Korean"),
+    0x0426: ('iso8859_13', "Latvian", "Baltic",),
+    0x0427: ('iso8859_13', "Lithuanian", "Baltic",),
+    0x042f: ('cp1251', "Macedonian", "Cyrillic"),
+    0x043e: ('iso8859_1', "Malay_Malaysia", "Western Europe & US"),
+    0x083e: ('iso8859_1', "Malay_Brunei_Darussalam", "Western Europe & US"),
+    0x044e: (None, "Marathi", "Indic"),
+    0x0414: ('iso8859_1', "Norwegian_Bokmal", "Western Europe & US"),
+    0x0814: ('iso8859_1', "Norwegian_Nynorsk", "Western Europe & US"),
+    0x0415: ('iso8859_2', "Polish", "Central Europe"),
+    0x0416: ('iso8859_1', "Portuguese_Brazilian", "Western Europe & US"),
+    0x0816: ('iso8859_1', "Portuguese_Standard", "Western Europe & US"),
+    0x0418: ('iso8859_2', "Romanian", "Central Europe"),
+    0x0419: ('cp1251', "Russian", "Cyrillic"),
+    0x044f: (None, "Sanskrit", "Indic"),
+    0x081a: ('iso8859_2', "Serbian_Latin", "Central Europe"),
+    0x0c1a: ('cp1251', "Serbian_Cyrillic", "Cyrillic"),
+    0x041b: ('iso8859_2', "Slovak", "Central Europe"),
+    0x0424: ('iso8859_2', "Slovenian", "Central Europe"),
+    0x040a: ('iso8859_1', "Spanish_Trad_Sort", "Western Europe & US"),
+    0x080a: ('iso8859_1', "Spanish_Mexican", "Western Europe & US"),
+    0x0c0a: ('iso8859_1', "Spanish_Modern_Sort", "Western Europe & US"),
+    0x100a: ('iso8859_1', "Spanish_Guatemala", "Western Europe & US"),
+    0x140a: ('iso8859_1', "Spanish_Costa_Rica", "Western Europe & US"),
+    0x180a: ('iso8859_1', "Spanish_Panama", "Western Europe & US"),
+    0x1c0a: ('iso8859_1', "Spanish_Dominican_Repub", "Western Europe & US"),
+    0x200a: ('iso8859_1', "Spanish_Venezuela", "Western Europe & US"),
+    0x240a: ('iso8859_1', "Spanish_Colombia", "Western Europe & US"),
+    0x280a: ('iso8859_1', "Spanish_Peru", "Western Europe & US"),
+    0x2c0a: ('iso8859_1', "Spanish_Argentina", "Western Europe & US"),
+    0x300a: ('iso8859_1', "Spanish_Ecuador", "Western Europe & US"),
+    0x340a: ('iso8859_1', "Spanish_Chile", "Western Europe & US"),
+    0x380a: ('iso8859_1', "Spanish_Uruguay", "Western Europe & US"),
+    0x3c0a: ('iso8859_1', "Spanish_Paraguay", "Western Europe & US"),
+    0x400a: ('iso8859_1', "Spanish_Bolivia", "Western Europe & US"),
+    0x440a: ('iso8859_1', "Spanish_El_Salvador", "Western Europe & US"),
+    0x480a: ('iso8859_1', "Spanish_Honduras", "Western Europe & US"),
+    0x4c0a: ('iso8859_1', "Spanish_Nicaragua", "Western Europe & US"),
+    0x500a: ('iso8859_1', "Spanish_Puerto_Rico", "Western Europe & US"),
+    0x0441: ('iso8859_1', "Swahili", "Western Europe & US"),
+    0x041d: ('iso8859_1', "Swedish", "Western Europe & US"),
+    0x081d: ('iso8859_1', "Swedish_Finland", "Western Europe & US"),
+    0x0449: (None, "Tamil", "Indic"),
+    0x0444: ('cp1251', "Tatar", "Cyrillic"),
+    0x041e: ('iso8859_11', "Thai", "Thai"),
+    0x041f: ('iso8859_9', "Turkish", "Turkish"),
+    0x0422: ('cp1251', "Ukrainian", "Cyrillic"),
+    0x0420: ('iso8859_6', "Urdu", "Arabic"),
+    0x0443: ('iso8859_9', "Uzbek_Latin", "Turkish"),
+    0x0843: ('cp1251', "Uzbek_Cyrillic", "Cyrillic"),
+    0x042a: (None, "Vietnamese", "Vietnamese")
+}
+
+
+class CHMFile:
+    "A class to manage access to CHM files."
+    filename = ""
+    file = None
+    title = ""
+    home = "/"
+    index = None
+    topics = None
+    encoding = None
+    lcid = None
+    binaryindex = None
+
+    def __init__(self):
+        self.searchable = 0
+
+    def LoadCHM(self, archiveName):
+        '''Loads a CHM archive.
+        This function will also call GetArchiveInfo to obtain information
+        such as the index file name and the topics file. It returns 1 on
+        success, and 0 if it fails.
+        '''
+        if self.filename is not None:
+            self.CloseCHM()
+
+        self.file = chmlib.chm_open(archiveName)
+        if self.file is None:
+            return 0
+
+        self.filename = archiveName
+        self.GetArchiveInfo()
+
+        return 1
+
+    def CloseCHM(self):
+        '''Closes the CHM archive.
+        This function will close the CHM file, if it is open. All variables
+        are also reset.
+        '''
+        if self.filename is not None:
+            chmlib.chm_close(self.file)
+            self.file = None
+            self.filename = ''
+            self.title = ""
+            self.home = "/"
+            self.index = None
+            self.topics = None
+            self.encoding = None
+
+    def GetArchiveInfo(self):
+        '''Obtains information on CHM archive.
+        This function checks the /#SYSTEM file inside the CHM archive to
+        obtain the index, home page, topics, encoding and title. It is called
+        from LoadCHM.
+        '''
+
+        self.searchable = extra.is_searchable(self.file)
+        self.lcid = None
+
+        result, ui = chmlib.chm_resolve_object(self.file, b'/#SYSTEM')
+        if (result != chmlib.CHM_RESOLVE_SUCCESS):
+            sys.stderr.write('GetArchiveInfo: #SYSTEM does not exist\n')
+            return 0
+
+        size, text = chmlib.chm_retrieve_object(self.file, ui, 4, ui.length)
+        if (size == 0):
+            sys.stderr.write('GetArchiveInfo: file size = 0\n')
+            return 0
+
+        buff = array.array('B', text)
+
+        index = 0
+        while (index < size):
+            cursor = buff[index] + (buff[index+1] * 256)
+
+            if (cursor == 0):
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+                self.topics = b'/' + text[index:index+cursor-1]
+            elif (cursor == 1):
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+                self.index = b'/' + text[index:index+cursor-1]
+            elif (cursor == 2):
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+                self.home = b'/' + text[index:index+cursor-1]
+            elif (cursor == 3):
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+                self.title = text[index:index+cursor-1]
+            elif (cursor == 4):
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+                self.lcid = buff[index] + (buff[index+1] * 256)
+            elif (cursor == 6):
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+                tmp = text[index:index+cursor-1]
+                if not self.topics:
+                    tmp1 = b'/' + tmp + b'.hhc'
+                    tmp2 = b'/' + tmp + b'.hhk'
+                    res1, ui1 = chmlib.chm_resolve_object(self.file, tmp1)
+                    res2, ui2 = chmlib.chm_resolve_object(self.file, tmp2)
+                    if not self.topics and res1 == chmlib.CHM_RESOLVE_SUCCESS:
+                        self.topics = b'/' + tmp + b'.hhc'
+                    if not self.index and res2 == chmlib.CHM_RESOLVE_SUCCESS:
+                        self.index = b'/' + tmp + b'.hhk'
+            elif (cursor == 16):
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+                self.encoding = text[index:index+cursor-1]
+            else:
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+            index += cursor
+
+        self.GetWindowsInfo()
+
+        if not self.lcid:
+            self.lcid = extra.get_lcid(self.file)
+
+        return 1
+
+    def GetTopicsTree(self):
+        '''Reads and returns the topics tree.
+        This auxiliary function reads and returns the topics tree file
+        contents for the CHM archive.
+        '''
+        if self.topics is None:
+            return None
+
+        if self.topics:
+            res, ui = chmlib.chm_resolve_object(self.file, self.topics)
+            if (res != chmlib.CHM_RESOLVE_SUCCESS):
+                return None
+
+        size, text = chmlib.chm_retrieve_object(self.file, ui, 0, ui.length)
+        if (size == 0):
+            sys.stderr.write('GetTopicsTree: file size = 0\n')
+            return None
+        return text
+
+    def GetIndex(self):
+        '''Reads and returns the index tree.
+        This auxiliary function reads and returns the index tree file
+        contents for the CHM archive.
+        '''
+        if self.index is None:
+            return None
+
+        if self.index:
+            res, ui = chmlib.chm_resolve_object(self.file, self.index)
+            if (res != chmlib.CHM_RESOLVE_SUCCESS):
+                return None
+
+        size, text = chmlib.chm_retrieve_object(self.file, ui, 0, ui.length)
+        if (size == 0):
+            sys.stderr.write('GetIndex: file size = 0\n')
+            return None
+        return text
+
+    def ResolveObject(self, document):
+        '''Tries to locate a document in the archive.
+        This function tries to locate the document inside the archive. It
+        returns a tuple where the first element is zero if the function
+        was successful, and the second is the UnitInfo for that document.
+        The UnitInfo is used to retrieve the document contents
+        '''
+        if self.file:
+            # path = os.path.abspath(document) # wtf?? the index contents
+                                               # are independant of the os !
+            path = document
+            return chmlib.chm_resolve_object(self.file, path)
+        else:
+            return (1, None)
+
+    def RetrieveObject(self, ui, start=-1, length=-1):
+        '''Retrieves the contents of a document.
+        This function takes a UnitInfo and two optional arguments, the first
+        being the start address and the second is the length. These define
+        the amount of data to be read from the archive.
+        '''
+        if self.file and ui:
+            if length == -1:
+                len = ui.length
+            else:
+                len = length
+            if start == -1:
+                st = 0
+            else:
+                st = long(start)
+            return chmlib.chm_retrieve_object(self.file, ui, st, len)
+        else:
+            return (0, '')
+
+    def Search(self, text, wholewords=0, titleonly=0):
+        '''Performs full-text search on the archive.
+        The first parameter is the word to look for, the second
+        indicates if the search should be for whole words only, and
+        the third parameter indicates if the search should be
+        restricted to page titles.
+        This method will return a tuple, the first item
+        indicating if the search results were partial, and the second
+        item being a dictionary containing the results.'''
+        if text and text != '' and self.file:
+            return extra.search(self.file, text, wholewords, titleonly)
+        else:
+            return None
+
+    def IsSearchable(self):
+        '''Indicates if the full-text search is available for this
+        archive - this flag is updated when GetArchiveInfo is called'''
+        return self.searchable
+
+    def GetEncoding(self):
+        '''Returns a string that can be used with the codecs python package
+        to encode or decode the files in the chm archive. If an error is
+        found, or if it is not possible to find the encoding, None is
+        returned.'''
+        if self.encoding:
+            vals = self.encoding.split(b',')
+            if len(vals) > 2:
+                try:
+                    return charset_table[int(vals[2])]
+                except KeyError:
+                    pass
+        return None
+
+    def GetLCID(self):
+        '''Returns the archive Locale ID'''
+        if self.lcid in locale_table:
+            return locale_table[self.lcid]
+        else:
+            return None
+
+    def GetDWORD(self, buff, idx=0):
+        '''Internal method.
+        Reads a double word (4 bytes) from a buffer.
+        '''
+        result = buff[idx] + (buff[idx+1] << 8) + (buff[idx+2] << 16) + \
+            (buff[idx+3] << 24)
+
+        if result == 0xFFFFFFFF:
+            result = 0
+
+        return result
+
+    def GetString(self, text, idx):
+        '''Internal method.
+        Retrieves a string from the #STRINGS buffer.
+        '''
+        next = text.find(b'\x00', idx)
+        chunk = text[idx:next]
+        return chunk
+
+    def GetWindowsInfo(self):
+        '''Gets information from the #WINDOWS file.
+        Checks the #WINDOWS file to see if it has any info that was
+        not found in #SYSTEM (topics, index or default page.
+        '''
+        result, ui = chmlib.chm_resolve_object(self.file, b'/#WINDOWS')
+        if (result != chmlib.CHM_RESOLVE_SUCCESS):
+            return -1
+
+        size, text = chmlib.chm_retrieve_object(self.file, ui, 0, 8)
+        if (size < 8):
+            return -2
+
+        buff = array.array('B', text)
+        num_entries = self.GetDWORD(buff, 0)
+        entry_size = self.GetDWORD(buff, 4)
+
+        if num_entries < 1:
+            return -3
+
+        size, text = chmlib.chm_retrieve_object(self.file, ui, 8, entry_size)
+        if (size < entry_size):
+            return -4
+
+        buff = array.array('B', text)
+        toc_index = self.GetDWORD(buff, 0x60)
+        idx_index = self.GetDWORD(buff, 0x64)
+        dft_index = self.GetDWORD(buff, 0x68)
+
+        result, ui = chmlib.chm_resolve_object(self.file, b'/#STRINGS')
+        if (result != chmlib.CHM_RESOLVE_SUCCESS):
+            return -5
+
+        size, text = chmlib.chm_retrieve_object(self.file, ui, 0, ui.length)
+        if (size == 0):
+            return -6
+
+        if (not self.topics):
+            self.topics = self.GetString(text, toc_index)
+            if not self.topics.startswith(b"/"):
+                self.topics = b"/" + self.topics
+
+        if (not self.index):
+            self.index = self.GetString(text, idx_index)
+            if not self.index.startswith(b"/"):
+                self.index = b"/" + self.index
+
+        if (dft_index != 0):
+            self.home = self.GetString(text, dft_index)
+            if not self.home.startswith(b"/"):
+                self.home = b"/" + self.home
--- a/src/python/pychm/recollchm/chmlib.py
+++ b/src/python/pychm/recollchm/chmlib.py
@ -0,0 +1,180 @@
+# This file was automatically generated by SWIG (http://www.swig.org).
+# Version 3.0.10
+#
+# Do not make changes to this file unless you know what you are doing--modify
+# the SWIG interface file instead.
+
+
+
+
+
+from sys import version_info as _swig_python_version_info
+if _swig_python_version_info >= (2, 7, 0):
+    def swig_import_helper():
+        import importlib
+        pkg = __name__.rpartition('.')[0]
+        mname = '.'.join((pkg, '_chmlib')).lstrip('.')
+        try:
+            return importlib.import_module(mname)
+        except ImportError:
+            return importlib.import_module('_chmlib')
+    _chmlib = swig_import_helper()
+    del swig_import_helper
+elif _swig_python_version_info >= (2, 6, 0):
+    def swig_import_helper():
+        from os.path import dirname
+        import imp
+        fp = None
+        try:
+            fp, pathname, description = imp.find_module('_chmlib', [dirname(__file__)])
+        except ImportError:
+            import _chmlib
+            return _chmlib
+        if fp is not None:
+            try:
+                _mod = imp.load_module('_chmlib', fp, pathname, description)
+            finally:
+                fp.close()
+            return _mod
+    _chmlib = swig_import_helper()
+    del swig_import_helper
+else:
+    import _chmlib
+del _swig_python_version_info
+try:
+    _swig_property = property
+except NameError:
+    pass  # Python < 2.2 doesn't have 'property'.
+
+try:
+    import builtins as __builtin__
+except ImportError:
+    import __builtin__
+
+def _swig_setattr_nondynamic(self, class_type, name, value, static=1):
+    if (name == "thisown"):
+        return self.this.own(value)
+    if (name == "this"):
+        if type(value).__name__ == 'SwigPyObject':
+            self.__dict__[name] = value
+            return
+    method = class_type.__swig_setmethods__.get(name, None)
+    if method:
+        return method(self, value)
+    if (not static):
+        if _newclass:
+            object.__setattr__(self, name, value)
+        else:
+            self.__dict__[name] = value
+    else:
+        raise AttributeError("You cannot add attributes to %s" % self)
+
+
+def _swig_setattr(self, class_type, name, value):
+    return _swig_setattr_nondynamic(self, class_type, name, value, 0)
+
+
+def _swig_getattr(self, class_type, name):
+    if (name == "thisown"):
+        return self.this.own()
+    method = class_type.__swig_getmethods__.get(name, None)
+    if method:
+        return method(self)
+    raise AttributeError("'%s' object has no attribute '%s'" % (class_type.__name__, name))
+
+
+def _swig_repr(self):
+    try:
+        strthis = "proxy of " + self.this.__repr__()
+    except __builtin__.Exception:
+        strthis = ""
+    return "<%s.%s; %s >" % (self.__class__.__module__, self.__class__.__name__, strthis,)
+
+try:
+    _object = object
+    _newclass = 1
+except __builtin__.Exception:
+    class _object:
+        pass
+    _newclass = 0
+
+CHM_UNCOMPRESSED = _chmlib.CHM_UNCOMPRESSED
+CHM_COMPRESSED = _chmlib.CHM_COMPRESSED
+CHM_MAX_PATHLEN = _chmlib.CHM_MAX_PATHLEN
+class chmUnitInfo(_object):
+    __swig_setmethods__ = {}
+    __setattr__ = lambda self, name, value: _swig_setattr(self, chmUnitInfo, name, value)
+    __swig_getmethods__ = {}
+    __getattr__ = lambda self, name: _swig_getattr(self, chmUnitInfo, name)
+    __repr__ = _swig_repr
+    __swig_setmethods__["start"] = _chmlib.chmUnitInfo_start_set
+    __swig_getmethods__["start"] = _chmlib.chmUnitInfo_start_get
+    if _newclass:
+        start = _swig_property(_chmlib.chmUnitInfo_start_get, _chmlib.chmUnitInfo_start_set)
+    __swig_setmethods__["length"] = _chmlib.chmUnitInfo_length_set
+    __swig_getmethods__["length"] = _chmlib.chmUnitInfo_length_get
+    if _newclass:
+        length = _swig_property(_chmlib.chmUnitInfo_length_get, _chmlib.chmUnitInfo_length_set)
+    __swig_setmethods__["space"] = _chmlib.chmUnitInfo_space_set
+    __swig_getmethods__["space"] = _chmlib.chmUnitInfo_space_get
+    if _newclass:
+        space = _swig_property(_chmlib.chmUnitInfo_space_get, _chmlib.chmUnitInfo_space_set)
+    __swig_setmethods__["path"] = _chmlib.chmUnitInfo_path_set
+    __swig_getmethods__["path"] = _chmlib.chmUnitInfo_path_get
+    if _newclass:
+        path = _swig_property(_chmlib.chmUnitInfo_path_get, _chmlib.chmUnitInfo_path_set)
+
+    def __init__(self):
+        this = _chmlib.new_chmUnitInfo()
+        try:
+            self.this.append(this)
+        except __builtin__.Exception:
+            self.this = this
+    __swig_destroy__ = _chmlib.delete_chmUnitInfo
+    __del__ = lambda self: None
+chmUnitInfo_swigregister = _chmlib.chmUnitInfo_swigregister
+chmUnitInfo_swigregister(chmUnitInfo)
+
+
+def chm_open(filename):
+    return _chmlib.chm_open(filename)
+chm_open = _chmlib.chm_open
+
+def chm_close(h):
+    return _chmlib.chm_close(h)
+chm_close = _chmlib.chm_close
+CHM_PARAM_MAX_BLOCKS_CACHED = _chmlib.CHM_PARAM_MAX_BLOCKS_CACHED
+
+def chm_set_param(h, paramType, paramVal):
+    return _chmlib.chm_set_param(h, paramType, paramVal)
+chm_set_param = _chmlib.chm_set_param
+CHM_RESOLVE_SUCCESS = _chmlib.CHM_RESOLVE_SUCCESS
+CHM_RESOLVE_FAILURE = _chmlib.CHM_RESOLVE_FAILURE
+
+def chm_resolve_object(h, objPath):
+    return _chmlib.chm_resolve_object(h, objPath)
+chm_resolve_object = _chmlib.chm_resolve_object
+
+def chm_retrieve_object(h, ui, addr, len):
+    return _chmlib.chm_retrieve_object(h, ui, addr, len)
+chm_retrieve_object = _chmlib.chm_retrieve_object
+CHM_ENUMERATE_NORMAL = _chmlib.CHM_ENUMERATE_NORMAL
+CHM_ENUMERATE_META = _chmlib.CHM_ENUMERATE_META
+CHM_ENUMERATE_SPECIAL = _chmlib.CHM_ENUMERATE_SPECIAL
+CHM_ENUMERATE_FILES = _chmlib.CHM_ENUMERATE_FILES
+CHM_ENUMERATE_DIRS = _chmlib.CHM_ENUMERATE_DIRS
+CHM_ENUMERATE_ALL = _chmlib.CHM_ENUMERATE_ALL
+CHM_ENUMERATOR_FAILURE = _chmlib.CHM_ENUMERATOR_FAILURE
+CHM_ENUMERATOR_CONTINUE = _chmlib.CHM_ENUMERATOR_CONTINUE
+CHM_ENUMERATOR_SUCCESS = _chmlib.CHM_ENUMERATOR_SUCCESS
+
+def chm_enumerate(h, what, e, context):
+    return _chmlib.chm_enumerate(h, what, e, context)
+chm_enumerate = _chmlib.chm_enumerate
+
+def chm_enumerate_dir(h, prefix, what, e, context):
+    return _chmlib.chm_enumerate_dir(h, prefix, what, e, context)
+chm_enumerate_dir = _chmlib.chm_enumerate_dir
+# This file is compatible with both classic and new-style classes.
+
+
--- a/src/python/pychm/recollchm/extra.c
+++ b/src/python/pychm/recollchm/extra.c
@ -0,0 +1,803 @@
+/*
+ * extra.c - full-text search support for pychm
+ *
+ * Copyright (C) 2004 Rubens Ramos <rubensr@users.sourceforge.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Author: Rubens Ramos <rubensr@users.sourceforge.net>
+ *
+ * Heavily based on work done by:
+ * Pabs <pabs@zip.to> - chmdeco
+ * Razvan Cojocaru <razvanco@gmx.net> - xCHM
+ *
+ */
+
+#include "chm_lib.h"
+#ifdef __PYTHON__
+#include "Python.h"
+#else
+#include <stdio.h>
+#define PyObject void
+#endif
+
+typedef struct {
+  PyObject_HEAD
+  void *ptr;
+  void *ty;
+  int own;
+  PyObject *next;
+#ifdef SWIGPYTHON_BUILTIN
+  PyObject *dict;
+#endif
+} SwigPyObject;
+
+#include <stdlib.h>
+
+#if defined(_WIN32) || defined(__WIN32__)
+#       if defined(_MSC_VER)
+#               if defined(STATIC_LINKED)
+#                       define MODEXPORT(a) a
+#                       define MODIMPORT(a) extern a
+#               else
+#                       define MODEXPORT(a) __declspec(dllexport) a
+#                       define MODIMPORT(a) extern a
+#               endif
+#define uint64_t unsigned long long
+#define uint32_t unsigned int
+#define uint16_t unsigned short
+#define uint8_t  unsigned char
+#define size_t   int
+#define strcasecmp _stricmp
+#define strncasecmp _strnicmp
+#       else
+#               if defined(__BORLANDC__)
+#                       define MODEXPORT(a) a _export
+#                       define MODIMPORT(a) a _export
+#               else
+#                       define MODEXPORT(a) a
+#                       define MODIMPORT(a) a
+#               endif
+#       endif
+#else
+#       define MODEXPORT(a) a
+#       define MODIMPORT(a) a
+#include <inttypes.h>
+#include <strings.h>
+#endif
+
+#define false 0
+#define true 1
+
+#define FTS_HEADER_LEN 0x32
+#define TOPICS_ENTRY_LEN 16
+#define COMMON_BUF_LEN 1025
+
+#define FREE(x) free (x); x = NULL
+
+static uint16_t
+get_uint16 (uint8_t* b) {
+  return b[0] |
+    b[1]<<8;
+}
+
+static uint32_t
+get_uint32 (uint8_t* b) {
+  return b[0] |
+    b[1]<<8   |
+    b[2]<<16  |
+    b[3]<<24;
+}
+
+static uint64_t
+be_encint (unsigned char *buffer, size_t *length)
+{
+  uint64_t result = 0;
+  int shift=0;
+  *length = 0;
+
+  do {
+    result |= ((*buffer) & 0x7f) << shift;
+    shift += 7;
+    *length = *length + 1;
+
+  } while (*(buffer++) & 0x80);
+
+  return result;
+}
+
+/*
+  Finds the first unset bit in memory. Returns the number of set bits found.
+  Returns -1 if the buffer runs out before we find an unset bit.
+*/
+static int
+ffus (unsigned char* byte, int* bit, size_t *length) {
+  int bits = 0;
+  *length = 0;
+
+  while(*byte & (1 << *bit)){
+    if(*bit)
+      --(*bit);
+    else {
+      ++byte;
+      ++(*length);
+      *bit = 7;
+    }
+    ++bits;
+  }
+
+  if(*bit)
+    --(*bit);
+  else {
+    ++(*length);
+    *bit = 7;
+  }
+
+  return bits;
+}
+
+
+static uint64_t
+sr_int(unsigned char* byte, int* bit,
+       unsigned char s, unsigned char r, size_t *length)
+{
+  uint64_t ret;
+  unsigned char mask;
+  int n, n_bits, num_bits, base, count;
+  size_t fflen;
+
+  *length = 0;
+
+  if(!bit || *bit > 7 || s != 2)
+    return ~(uint64_t)0;
+  ret = 0;
+
+  count = ffus(byte, bit, &fflen);
+  *length += fflen;
+  byte += *length;
+
+  n_bits = n = r + (count ? count-1 : 0) ;
+
+  while (n > 0) {
+    num_bits = n > *bit ? *bit : n-1;
+    base = n > *bit ? 0 : *bit - (n-1);
+
+    switch (num_bits){
+    case 0:
+      mask = 1;
+      break;
+    case 1:
+      mask = 3;
+      break;
+    case 2:
+      mask = 7;
+      break;
+    case 3:
+      mask = 0xf;
+      break;
+    case 4:
+      mask = 0x1f;
+      break;
+    case 5:
+      mask = 0x3f;
+      break;
+    case 6:
+      mask = 0x7f;
+      break;
+    case 7:
+      mask = 0xff;
+      break;
+    default:
+      mask = 0xff;
+      break;
+    }
+
+    mask <<= base;
+    ret = (ret << (num_bits+1)) |
+      (uint64_t)((*byte & mask) >> base);
+
+    if( n > *bit ){
+      ++byte;
+      ++(*length);
+      n -= *bit+1;
+      *bit = 7;
+    } else {
+      *bit -= n;
+      n = 0;
+    }
+  }
+
+  if(count)
+    ret |= (uint64_t)1 << n_bits;
+
+  return ret;
+}
+
+
+static uint32_t
+get_leaf_node_offset(struct chmFile *chmfile,
+                     const char *text,
+                     uint32_t initial_offset,
+                     uint32_t buff_size,
+                     uint16_t tree_depth,
+                     struct chmUnitInfo *ui)
+{
+  unsigned char word_len;
+  unsigned char pos;
+  uint16_t free_space;
+  char *wrd_buf;
+  char *word = NULL;
+  uint32_t test_offset = 0;
+  uint32_t i = sizeof(uint16_t);
+  unsigned char *buffer = malloc (buff_size);
+
+  if (NULL == buffer)
+    return 0;
+
+  while (--tree_depth) {
+    if (initial_offset == test_offset) {
+      FREE(buffer);
+      return 0;
+    }
+
+    test_offset = initial_offset;
+    if (chm_retrieve_object (chmfile, ui, buffer,
+                             initial_offset, buff_size) == 0) {
+      FREE(buffer);
+      return 0;
+    }
+
+    free_space = get_uint16 (buffer);
+
+    while (i < buff_size - free_space) {
+
+      word_len = *(buffer + i);
+      pos = *(buffer + i + 1);
+
+      wrd_buf = malloc (word_len);
+      memcpy (wrd_buf, buffer + i + 2, word_len - 1);
+      wrd_buf[word_len - 1] = 0;
+
+      if (pos == 0) {
+        FREE (word);
+        word = (char *) strdup (wrd_buf);
+      } else {
+        word = realloc (word, word_len + pos + 1);
+        strcpy (word + pos, wrd_buf);
+      }
+
+      FREE(wrd_buf);
+
+      if (strcasecmp (text, word) <= 0) {
+        initial_offset = get_uint32 (buffer + i + word_len + 1);
+        break;
+      }
+
+      i += word_len + sizeof (unsigned char) + sizeof(uint32_t) +
+        sizeof(uint16_t);
+    }
+  }
+
+  if(initial_offset == test_offset)
+    initial_offset = 0;
+
+  FREE(word);
+  FREE(buffer);
+
+  return initial_offset;
+}
+
+static int
+pychm_process_wlc (struct chmFile *chmfile,
+                   uint64_t wlc_count, uint64_t wlc_size,
+                   uint32_t wlc_offset, unsigned char ds,
+                   unsigned char dr, unsigned char cs,
+                   unsigned char cr, unsigned char ls,
+                   unsigned char lr, struct chmUnitInfo *uimain,
+                   struct chmUnitInfo* uitbl,
+                   struct chmUnitInfo *uistrings,
+                   struct chmUnitInfo* topics,
+                   struct chmUnitInfo *urlstr,
+                   PyObject *dict)
+{
+  uint32_t stroff, urloff;
+  uint64_t i, j, count;
+  size_t length;
+  int wlc_bit = 7;
+  size_t off = 0;
+  uint64_t index = 0;
+  unsigned char entry[TOPICS_ENTRY_LEN];
+  unsigned char combuf[COMMON_BUF_LEN];
+  unsigned char *buffer = malloc (wlc_size);
+  char *url = NULL;
+  char *topic = NULL;
+
+  if (chm_retrieve_object(chmfile, uimain, buffer,
+                          wlc_offset, wlc_size) == 0) {
+    FREE(buffer);
+    return false;
+  }
+
+  for (i = 0; i < wlc_count; ++i) {
+
+    if(wlc_bit != 7) {
+      ++off;
+      wlc_bit = 7;
+    }
+
+    index += sr_int(buffer + off, &wlc_bit, ds, dr, &length);
+    off += length;
+
+    if(chm_retrieve_object(chmfile, topics, entry,
+                           index * 16, TOPICS_ENTRY_LEN) == 0) {
+      FREE(topic);
+      FREE(url);
+      FREE(buffer);
+      return false;
+    }
+
+    combuf[COMMON_BUF_LEN - 1] = 0;
+    stroff = get_uint32 (entry + 4);
+
+    FREE (topic);
+    if (chm_retrieve_object (chmfile, uistrings, combuf,
+                             stroff, COMMON_BUF_LEN - 1) == 0) {
+      topic = strdup ("Untitled in index");
+
+    } else {
+      combuf[COMMON_BUF_LEN - 1] = 0;
+
+      topic = strdup ((char *)combuf);
+    }
+
+    urloff = get_uint32 (entry + 8);
+
+    if(chm_retrieve_object (chmfile, uitbl, combuf,
+                            urloff, 12) == 0) {
+      FREE(buffer);
+      return false;
+    }
+
+    urloff = get_uint32 (combuf + 8);
+
+    if (chm_retrieve_object (chmfile, urlstr, combuf,
+                             urloff + 8, COMMON_BUF_LEN - 1) == 0) {
+      FREE(topic);
+      FREE(url);
+      FREE(buffer);
+      return false;
+    }
+
+    combuf[COMMON_BUF_LEN - 1] = 0;
+
+    FREE (url);
+    url = strdup ((char *)combuf);
+
+    if (url && topic) {
+#ifdef __PYTHON__
+      PyDict_SetItem(dict, 
+#if PY_MAJOR_VERSION >= 3
+                     PyBytes_FromStringAndSize(topic, strlen(topic)),
+                     PyBytes_FromStringAndSize(url, strlen(url))
+#else
+                     PyString_FromString (topic),
+                     PyString_FromString (url)
+#endif
+      );
+#else
+      printf ("%s ==> %s\n", url, topic);
+#endif
+    }
+
+    count = sr_int (buffer + off, &wlc_bit, cs, cr, &length);
+    off += length;
+
+    for (j = 0; j < count; ++j) {
+      sr_int (buffer + off, &wlc_bit, ls, lr, &length);
+      off += length;
+    }
+  }
+
+  FREE(topic);
+  FREE(url);
+  FREE(buffer);
+
+  return true;
+}
+
+static int
+chm_search (struct chmFile *chmfile,
+            const char *text, int whole_words,
+            int titles_only, PyObject *dict)
+{
+  unsigned char header[FTS_HEADER_LEN];
+  unsigned char doc_index_s;
+  unsigned char doc_index_r;
+  unsigned char code_count_s;
+  unsigned char code_count_r;
+  unsigned char loc_codes_s;
+  unsigned char loc_codes_r;
+  unsigned char word_len, pos;
+  unsigned char *buffer;
+  char *word = NULL;
+  uint32_t node_offset;
+  uint32_t node_len;
+  uint16_t tree_depth;
+  uint32_t i;
+  uint16_t free_space;
+  uint64_t wlc_count, wlc_size;
+  uint32_t wlc_offset;
+  char *wrd_buf;
+  unsigned char title;
+  size_t encsz;
+  struct chmUnitInfo ui, uitopics, uiurltbl, uistrings, uiurlstr;
+  int partial = false;
+
+  if (NULL == text)
+    return -1;
+
+  if (chm_resolve_object (chmfile, "/$FIftiMain", &ui) !=
+      CHM_RESOLVE_SUCCESS ||
+      chm_resolve_object (chmfile, "/#TOPICS", &uitopics) !=
+      CHM_RESOLVE_SUCCESS ||
+      chm_resolve_object (chmfile, "/#STRINGS", &uistrings) !=
+      CHM_RESOLVE_SUCCESS ||
+      chm_resolve_object (chmfile, "/#URLTBL", &uiurltbl) !=
+      CHM_RESOLVE_SUCCESS ||
+      chm_resolve_object (chmfile, "/#URLSTR", &uiurlstr) !=
+      CHM_RESOLVE_SUCCESS)
+    return false;
+
+  if(chm_retrieve_object(chmfile, &ui, header, 0, FTS_HEADER_LEN) == 0)
+    return false;
+
+  doc_index_s = header[0x1E];
+  doc_index_r = header[0x1F];
+  code_count_s = header[0x20];
+  code_count_r = header[0x21];
+  loc_codes_s = header[0x22];
+  loc_codes_r = header[0x23];
+
+  if(doc_index_s != 2 || code_count_s != 2 || loc_codes_s != 2) {
+    return false;
+  }
+
+  node_offset = get_uint32 (header + 0x14);
+  node_len = get_uint32 (header + 0x2e);
+  tree_depth = get_uint16 (header + 0x18);
+
+  i = sizeof(uint16_t);
+
+  buffer = malloc (node_len);
+
+  node_offset = get_leaf_node_offset (chmfile, text, node_offset, node_len,
+                                      tree_depth, &ui);
+
+  if (!node_offset) {
+    FREE(buffer);
+    return false;
+  }
+
+  do {
+
+    if (chm_retrieve_object (chmfile, &ui, buffer,
+                             node_offset, node_len) == 0) {
+      FREE(word);
+      FREE(buffer);
+      return false;
+    }
+
+    free_space = get_uint16 (buffer + 6);
+
+    i = sizeof(uint32_t) + sizeof(uint16_t) + sizeof(uint16_t);
+
+    encsz = 0;
+
+    while (i < node_len - free_space) {
+      word_len = *(buffer + i);
+      pos = *(buffer + i + 1);
+
+      wrd_buf = malloc (word_len);
+      memcpy (wrd_buf, buffer + i + 2, word_len - 1);
+      wrd_buf[word_len - 1] = 0;
+
+      if (pos == 0) {
+        FREE(word);
+        word = (char *) strdup (wrd_buf);
+      } else {
+        word = realloc (word, word_len + pos + 1);
+        strcpy (word + pos, wrd_buf);
+      }
+
+      FREE(wrd_buf);
+
+      i += 2 + word_len;
+      title = *(buffer + i - 1);
+
+      wlc_count = be_encint (buffer + i, &encsz);
+      i += encsz;
+
+      wlc_offset = get_uint32 (buffer + i);
+
+      i += sizeof(uint32_t) + sizeof(uint16_t);
+      wlc_size =  be_encint (buffer + i, &encsz);
+      i += encsz;
+
+      node_offset = get_uint32 (buffer);
+
+      if (!title && titles_only)
+        continue;
+
+      if (whole_words && !strcasecmp(text, word)) {
+        partial = pychm_process_wlc (chmfile, wlc_count, wlc_size,
+                                     wlc_offset, doc_index_s,
+                                     doc_index_r,code_count_s,
+                                     code_count_r, loc_codes_s,
+                                     loc_codes_r, &ui, &uiurltbl,
+                                     &uistrings, &uitopics,
+                                     &uiurlstr, dict);
+        FREE(word);
+        FREE(buffer);
+        return partial;
+      }
+
+      if (!whole_words) {
+        if (!strncasecmp (word, text, strlen(text))) {
+          partial = true;
+          pychm_process_wlc (chmfile, wlc_count, wlc_size,
+                             wlc_offset, doc_index_s,
+                             doc_index_r,code_count_s,
+                             code_count_r, loc_codes_s,
+                             loc_codes_r, &ui, &uiurltbl,
+                             &uistrings, &uitopics,
+                             &uiurlstr, dict);
+
+        } else if (strncasecmp (text, word, strlen(text)) < -1)
+          break;
+      }
+
+    }
+  } while (!whole_words &&
+           !strncmp (word, text, strlen(text)) &&
+           node_offset);
+
+  FREE(word);
+  FREE(buffer);
+
+  return partial;
+}
+
+typedef struct {
+  const char *file;
+  int offset;
+} Langrec;
+
+static Langrec lang_files[] = {
+  {"/$FIftiMain",               0x7E},
+  {"$WWKeywordLinks/BTree",     0x34},
+  {"$WWAssociativeLinks/BTree", 0x34}
+};
+
+#define LANG_FILES_SIZE (sizeof(lang_files)/sizeof(Langrec))
+
+static int
+chm_get_lcid (struct chmFile *chmfile) {
+  struct chmUnitInfo ui;
+  uint32_t lang;
+  int i;
+
+  for (i=0; i<LANG_FILES_SIZE; i++) {
+
+    if (chm_resolve_object (chmfile, lang_files[i].file, &ui) ==
+        CHM_RESOLVE_SUCCESS) {
+
+      if (chm_retrieve_object (chmfile, &ui, (unsigned char *) &lang,
+                               lang_files[i].offset, sizeof(uint32_t)) != 0)
+        return lang;
+    }
+  }
+
+  return -1;
+}
+
+#ifdef __PYTHON__
+
+static PyObject *
+is_searchable (PyObject *self, PyObject *args) {
+  struct chmFile *file;
+  PyObject *obj0;
+  struct chmUnitInfo ui;
+
+  if (PyArg_ParseTuple (args, "O:is_searchable", &obj0)) {
+
+      file = (struct chmFile *) ((SwigPyObject*)(obj0))->ptr;
+
+    if (chm_resolve_object (file, "/$FIftiMain", &ui) !=
+        CHM_RESOLVE_SUCCESS ||
+        chm_resolve_object (file, "/#TOPICS", &ui) !=
+        CHM_RESOLVE_SUCCESS ||
+        chm_resolve_object (file, "/#STRINGS", &ui) !=
+        CHM_RESOLVE_SUCCESS ||
+        chm_resolve_object (file, "/#URLTBL", &ui) !=
+        CHM_RESOLVE_SUCCESS ||
+        chm_resolve_object (file, "/#URLSTR", &ui) !=
+        CHM_RESOLVE_SUCCESS)
+      return Py_BuildValue ("i", 0);
+    else
+      return Py_BuildValue ("i", 1);
+  } else {
+    PyErr_SetString(PyExc_TypeError, "Expected chmfile (not CHMFile!)");
+    return NULL;
+  }
+}
+
+static PyObject *
+search (PyObject *self, PyObject *args) {
+  char *text;
+  int whole_words = 0;
+  int titles_only = 0;
+  int partial;
+  struct chmFile *file;
+  PyObject *obj0;
+  PyObject *dict;
+
+#if PY_MAJOR_VERSION >= 3
+  PyObject *obj1;
+  if (PyArg_ParseTuple (args, "OSii:search", &obj0, &obj1,
+#else
+  if (PyArg_ParseTuple (args, "Osii:search", &obj0, &text,
+#endif
+                        &whole_words, &titles_only)) {
+      
+#if PY_MAJOR_VERSION >= 3
+      text = PyBytes_AsString(obj1);
+#endif
+    dict = PyDict_New();
+
+    if (dict) {
+      file = (struct chmFile *) ((SwigPyObject*)(obj0))->ptr;
+
+      partial = chm_search (file,
+                            text, whole_words, titles_only, dict);
+      return Py_BuildValue ("(iO)", partial, dict);
+
+    } else {
+      PyErr_NoMemory();
+      return NULL;
+    }
+  } else {
+    PyErr_SetString(PyExc_TypeError,
+                    "Expected chmfile (not CHMFile!), string, int, int");
+    return NULL;
+  }
+}
+
+static PyObject *
+get_lcid (PyObject *self, PyObject *args) {
+  int code;
+  struct chmFile *file;
+  PyObject *obj0;
+
+  if (PyArg_ParseTuple (args, "O:get_lcid", &obj0)) {
+
+      file = (struct chmFile *) ((SwigPyObject*)(obj0))->ptr;
+
+      code = chm_get_lcid (file);
+
+      if (code != -1)
+        return Py_BuildValue ("i", code);
+      else
+        Py_INCREF(Py_None);
+      return Py_None;
+  } else {
+    PyErr_SetString(PyExc_TypeError,"Expected a chmfile (not a CHMFile!)");
+    return NULL;
+  }
+}
+
+static PyMethodDef
+IndexMethods[] = {
+  {"get_lcid", get_lcid, METH_VARARGS,
+   "Returns LCID (Locale ID) for archive."},
+  {"search", search, METH_VARARGS,
+   "Perform Full-Text search."},
+  {"is_searchable", is_searchable, METH_VARARGS,
+   "Return 1 if it is possible to search the archive, 0 otherwise."},
+  {NULL, NULL, 0, NULL}
+};
+
+
+#if PY_MAJOR_VERSION >= 3
+static struct PyModuleDef moduledef = {
+        PyModuleDef_HEAD_INIT,
+        "extra",
+        NULL,
+        -1,
+        IndexMethods,
+        NULL,
+        NULL,
+        NULL,
+        NULL
+};
+
+#define INITERROR return NULL
+
+#else /* python < 3 */
+
+#define INITERROR return
+
+#endif /* python 3/2 */
+
+
+#if PY_MAJOR_VERSION >= 3
+PyObject* PyInit_extra(void)
+#else
+void initextra (void)
+#endif
+{
+    PyObject *module;
+#if PY_MAJOR_VERSION >= 3
+    module = PyModule_Create(&moduledef);
+#else
+    module = Py_InitModule ("extra", IndexMethods);
+#endif
+    if (module == NULL)
+        INITERROR;
+
+#if PY_MAJOR_VERSION >= 3
+    return module;
+#endif
+}
+
+#else
+
+int
+main (int argc, char **argv) {
+  struct chmFile *file;
+  char text[255];
+  int whole_words, titles_only;
+  int partial;
+
+  if (argc == 2) {
+    file = chm_open (argv[1]);
+
+    if (file) {
+      printf ("\nLCID= %d (%08X)\n", chm_get_lcid(file), chm_get_lcid(file));
+      while (1) {
+        printf ("\n<whole_words> <titles_only> <string>\n");
+        printf ("> ");
+        if (scanf ("%d %d %s", &whole_words, &titles_only, text))
+          partial = chm_search (file,
+                                text, whole_words, titles_only, NULL);
+        else
+          break;
+
+        printf ("Partial = %d\n", partial);
+      }
+
+      chm_close (file);
+      return 0;
+    }
+
+    return -1;
+
+  } else {
+    printf ("\n%s <filename>\n", argv[0]);
+    return 0;
+  }
+}
+
+#endif
--- a/src/python/pychm/recollchm/swig_chm.c
+++ b/src/python/pychm/recollchm/swig_chm.c
--- a/src/python/pychm/recollchm/swig_chm.i
+++ b/src/python/pychm/recollchm/swig_chm.i
@ -0,0 +1,225 @@
+%module chmlib
+%begin %{
+#define SWIG_PYTHON_STRICT_BYTE_CHAR
+%}
+
+%include "typemaps.i"
+%include "cstring.i"
+
+%{
+/*
+ Copyright (C) 2003 Rubens Ramos <rubensr@users.sourceforge.net>
+
+ Based on code by:
+ Copyright (C) 2003  Razvan Cojocaru <razvanco@gmx.net>
+
+ pychm is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public
+ License along with this program; see the file COPYING.  If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA
+
+ $Id$
+*/
+
+#include "chm_lib.h"
+#include <stdio.h>
+
+static PyObject *my_callback = NULL;
+
+static PyObject *
+my_set_callback(PyObject *dummy, PyObject *arg)
+{
+    PyObject *result = NULL;
+
+    if (!PyCallable_Check(arg)) {
+      PyErr_SetString(PyExc_TypeError, "parameter must be callable");
+      return NULL;
+    }
+    Py_XINCREF(arg);         /* Add a reference to new callback */
+    Py_XDECREF(my_callback);  /* Dispose of previous callback */
+    my_callback = arg;       /* Remember new callback */
+    /* Boilerplate to return "None" */
+    Py_INCREF(Py_None);
+    result = Py_None;
+    return result;
+}
+
+int dummy_enumerator (struct chmFile *h,
+                      struct chmUnitInfo *ui,
+                      void *context) {
+    PyObject *arglist;
+    PyObject *result;
+    PyObject *py_h;
+    PyObject *py_ui;
+    PyObject *py_c;
+
+    py_h  = SWIG_NewPointerObj((void *) h, SWIGTYPE_p_chmFile, 0);
+    py_ui = SWIG_NewPointerObj((void *) ui, SWIGTYPE_p_chmUnitInfo, 0);
+    /* The following was: py_c  = PyCObject_AsVoidPtr(context); which did
+       not make sense because the function takes a PyObject * and returns a
+       void *, not the reverse. This was probably never used?? In doubt,
+       replace with a call which makes sense and hope for the best... */
+    py_c = PyCapsule_New(context, "context", NULL);
+
+    /* Time to call the callback */
+    arglist = Py_BuildValue("(OOO)", py_h, py_ui, py_c);
+    if (arglist) {
+      result = PyEval_CallObject(my_callback, arglist);
+      Py_DECREF(arglist);
+      Py_DECREF(result);
+
+      Py_DECREF(py_h);
+      Py_DECREF(py_ui);
+      Py_DECREF(py_c);
+
+      if (result == NULL) {
+        return 0; /* Pass error back */
+      } else {
+        return 1;
+      }
+    } else
+      return 0;
+ }
+%}
+
+%typemap(in) CHM_ENUMERATOR {
+  if (!my_set_callback(self, $input)) goto fail;
+  $1 = dummy_enumerator;
+}
+
+%typemap(in) void *context {
+  if (!($1 = PyCapsule_New($input, "context", NULL))) goto fail;
+}
+
+%typemap(in, numinputs=0) struct chmUnitInfo *OutValue (struct chmUnitInfo *temp = (struct chmUnitInfo *) calloc(1, sizeof(struct chmUnitInfo))) {
+  $1 = temp;
+}
+
+%typemap(argout) struct chmUnitInfo *OutValue {
+  PyObject *o, *o2, *o3;
+  o = SWIG_NewPointerObj((void *) $1, SWIGTYPE_p_chmUnitInfo, 1);
+  if ((!$result) || ($result == Py_None)) {
+    $result = o;
+  } else {
+    if (!PyTuple_Check($result)) {
+      PyObject *o2 = $result;
+      $result = PyTuple_New(1);
+      PyTuple_SetItem($result,0,o2);
+    }
+    o3 = PyTuple_New(1);
+    PyTuple_SetItem(o3,0,o);
+    o2 = $result;
+    $result = PySequence_Concat(o2,o3);
+    Py_DECREF(o2);
+    Py_DECREF(o3);
+  }
+}
+
+%typemap(check) unsigned char *OUTPUT {
+  /* nasty hack */
+#ifdef __cplusplus
+   $1 = ($1_ltype) new char[arg5];
+#else
+   $1 = ($1_ltype) malloc(arg5);
+#endif
+   if ($1 == NULL) SWIG_fail;
+}
+
+%typemap(argout,fragment="t_output_helper") unsigned char *OUTPUT {
+   PyObject *o;
+   o = SWIG_FromCharPtrAndSize((const char*)$1, arg5);
+/*   o = PyString_FromStringAndSize($1, arg5);*/
+   $result = t_output_helper($result,o);
+#ifdef __cplusplus
+   delete [] $1;
+#else
+   free($1);
+#endif
+}
+
+#ifdef WIN32
+typedef unsigned __int64 LONGUINT64;
+typedef __int64          LONGINT64;
+#else
+typedef unsigned long long LONGUINT64;
+typedef long long          LONGINT64;
+#endif
+
+/* the two available spaces in a CHM file                      */
+/* N.B.: The format supports arbitrarily many spaces, but only */
+/*       two appear to be used at present.                     */
+#define CHM_UNCOMPRESSED (0)
+#define CHM_COMPRESSED   (1)
+
+/* structure representing an ITS (CHM) file stream             */
+struct chmFile;
+
+/* structure representing an element from an ITS file stream   */
+#define CHM_MAX_PATHLEN  256
+struct chmUnitInfo
+{
+    LONGUINT64         start;
+    LONGUINT64         length;
+    int                space;
+    char               path[CHM_MAX_PATHLEN+1];
+};
+
+/* open an ITS archive */
+struct chmFile* chm_open(const char *filename);
+
+/* close an ITS archive */
+void chm_close(struct chmFile *h);
+
+/* methods for ssetting tuning parameters for particular file */
+#define CHM_PARAM_MAX_BLOCKS_CACHED 0
+void chm_set_param(struct chmFile *h,
+                   int paramType,
+                   int paramVal);
+
+/* resolve a particular object from the archive */
+#define CHM_RESOLVE_SUCCESS (0)
+#define CHM_RESOLVE_FAILURE (1)
+int chm_resolve_object(struct chmFile *h,
+                       const char *objPath,
+                       struct chmUnitInfo *OutValue);
+
+/* retrieve part of an object from the archive */
+LONGINT64 chm_retrieve_object(struct chmFile *h,
+                              struct chmUnitInfo *ui,
+                              unsigned char *OUTPUT,
+                              LONGUINT64 addr,
+                              LONGINT64 len);
+
+/* enumerate the objects in the .chm archive */
+typedef int (*CHM_ENUMERATOR)(struct chmFile *h,
+                              struct chmUnitInfo *ui,
+                              void *context);
+#define CHM_ENUMERATE_NORMAL    (1)
+#define CHM_ENUMERATE_META      (2)
+#define CHM_ENUMERATE_SPECIAL   (4)
+#define CHM_ENUMERATE_FILES     (8)
+#define CHM_ENUMERATE_DIRS      (16)
+#define CHM_ENUMERATE_ALL       (31)
+#define CHM_ENUMERATOR_FAILURE  (0)
+#define CHM_ENUMERATOR_CONTINUE (1)
+#define CHM_ENUMERATOR_SUCCESS  (2)
+int chm_enumerate(struct chmFile *h,
+                  int what,
+                  CHM_ENUMERATOR e,
+                  void *context);
+
+int chm_enumerate_dir(struct chmFile *h,
+                      const char *prefix,
+                      int what,
+                      CHM_ENUMERATOR e,
+                      void *context);
--- a/src/python/pychm/setup.py.in
+++ b/src/python/pychm/setup.py.in
@ -0,0 +1,36 @@
+from setuptools import setup, Extension
+
+long_description = '''
+Version of the chm package modified to support Python 3 and bundled with Recoll.
+The chm package provides three modules, chm, chmlib and extra, which provide
+access to the API implemented by the C library chmlib and some additional
+classes and functions. They are used to access MS-ITSS encoded files -
+Compressed Html Help files (.chm).
+'''
+
+# For shadow builds: references to the source tree
+import os
+top = os.path.join('@srcdir@', '..', '..')
+pytop = '@srcdir@'
+
+setup(name="recollchm",
+      version="0.8.4.1+git",
+      description="Python package to handle CHM files",
+      author="Rubens Ramos",
+      author_email="rubensr@users.sourceforge.net",
+      maintainer="Mikhail Gusarov",
+      maintainer_email="dottedmag@dottedmag.net",
+      url="https://github.com/dottedmag/pychm",
+      license="GPL",
+      long_description=long_description,
+      package_dir = {'' : os.path.join(top, 'python', 'pychm')},
+      py_modules=["recollchm.chm", "recollchm.chmlib"],
+      ext_modules=[Extension("recollchm._chmlib",
+                             [os.path.join(pytop, "recollchm/swig_chm.c")],
+                             libraries=["chm"],
+                             extra_compile_args=["-DSWIG_COBJECT_TYPES"]),
+                   Extension("recollchm.extra",
+                             [os.path.join(pytop, "recollchm/extra.c")],
+                             extra_compile_args=["-D__PYTHON__"],
+                             libraries=["chm"])]
+      )
--- a/Show More
+++ b/Show More