merged internal-xsl branch

2019-01-30 08:47:50 +01:00 · 2019-01-30 08:47:50 +01:00 · c0d87a3995
commit c0d87a3995
parent 134bc3fe60 7e672cee71
60 changed files with 11095 additions and 703 deletions
--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -2,6 +2,8 @@
 CXXFLAGS ?= @CXXFLAGS@
 LIBXAPIAN=@LIBXAPIAN@
 XAPIANCXXFLAGS=@XAPIANCXXFLAGS@
+XSLT_CFLAGS=@XSLT_CFLAGS@
+XSLT_LINKADD=@XSLT_LINKADD@
 LIBICONV=@LIBICONV@
 INCICONV=@INCICONV@
 LIBFAM = @LIBFAM@
@ -29,8 +31,10 @@ AM_CPPFLAGS = -Wall -Wno-unused -std=c++11 \
    $(COMMONCPPFLAGS) \
    $(INCICONV) \
    $(XAPIANCXXFLAGS) \
+    $(XSLT_CFLAGS) \
    $(X_CFLAGS) \
    -DRECOLL_DATADIR=\"${pkgdatadir}\" \
+    -DREADFILE_ENABLE_ZLIB -DREADFILE_ENABLE_MINIZ -DREADFILE_ENABLE_MD5 \
    -D_GNU_SOURCE \
    $(DEFS)

@ -121,6 +125,8 @@ internfile/mh_symlink.h \
 internfile/mh_text.cpp \
 internfile/mh_text.h \
 internfile/mh_unknown.h \
+internfile/mh_xslt.cpp \
+internfile/mh_xslt.h \
 internfile/mimehandler.cpp \
 internfile/mimehandler.h \
 internfile/myhtmlparse.cpp \
@ -224,6 +230,8 @@ utils/md5ut.cpp \
 utils/md5ut.h \
 utils/mimeparse.cpp \
 utils/mimeparse.h \
+utils/miniz.cpp \
+utils/miniz.h \
 utils/netcon.cpp \
 utils/netcon.h \
 utils/pathut.cpp \
@ -262,7 +270,7 @@ AM_YFLAGS = -d
 librecoll_la_LDFLAGS = -release $(VERSION) \
    -Wl,--no-undefined -Wl,--warn-unresolved-symbols

-librecoll_la_LIBADD = $(LIBXAPIAN) $(LIBICONV) $(LIBTHREADS)
+librecoll_la_LIBADD = $(XSLT_LINKADD) $(LIBXAPIAN) $(LIBICONV) $(LIBTHREADS)

 # There is probably a better way to do this. The KIO needs to be linked
 # with librecoll, but librecoll is installed into a non-standard place
@ -640,6 +648,18 @@ sampleconf/mimeview
 filterdir = $(pkgdatadir)/filters
 filter_DATA = \
 desktop/hotrecoll.py \
+filters/abiword.xsl \
+filters/fb2.xsl \
+filters/gnumeric.xsl \
+filters/msodump.zip \
+filters/okular-note.xsl \
+filters/opendoc-body.xsl \
+filters/opendoc-flat.xsl \
+filters/opendoc-meta.xsl \
+filters/openxml-xls-body.xsl \
+filters/openxml-word-body.xsl \
+filters/openxml-meta.xsl \
+filters/ppt-dump.py \
 filters/rcl7z \
 filters/rclabw.py \
 filters/rclaptosidman \
@ -671,19 +691,19 @@ filters/rcllatinstops.zip \
 filters/rcllyx \
 filters/rclman \
 filters/rclmidi.py \
-filters/rclpdf.py \
-filters/rclps \
 filters/rclokulnote.py \
 filters/rclopxml.py \
+filters/rclpdf.py \
 filters/rclppt.py \
+filters/rclps \
 filters/rclpurple \
 filters/rclpython \
 filters/rclrar \
 filters/rclrtf.py \
 filters/rclscribus \
 filters/rclshowinfo \
-filters/rclsoff.py \
 filters/rclsoff-flat.py \
+filters/rclsoff.py \
 filters/rclsvg.py \
 filters/rcltar \
 filters/rcltex \
@ -697,11 +717,11 @@ filters/rclxmp.py \
 filters/rclxslt.py \
 filters/rclzip \
 filters/recoll-we-move-files.py \
-filters/ppt-dump.py \
+filters/recollepub.zip \
+filters/svg.xsl \
 filters/xls-dump.py \
 filters/xlsxmltocsv.py \
-filters/msodump.zip \
-filters/recollepub.zip \
+filters/xml.xsl \
 python/recoll/recoll/conftree.py \
 python/recoll/recoll/rclconfig.py 

--- a/src/VERSION
+++ b/src/VERSION
@ -1 +1 @@
-1.25.0pre0
+1.25.1
--- a/src/common/autoconfig-win.h
+++ b/src/common/autoconfig-win.h
@ -112,7 +112,10 @@ overriden in the c++ code by ifdefs _WIN32 anyway  */
 #define PACKAGE_NAME "Recoll"

 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "Recoll 1.24.1"
+#define PACKAGE_STRING "Recoll 1.25.1"
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "1.25.1"

 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "recoll"
@ -120,9 +123,6 @@ overriden in the c++ code by ifdefs _WIN32 anyway  */
 /* Define to the home page for this package. */
 #define PACKAGE_URL ""

-/* Define to the version of this package. */
-#define PACKAGE_VERSION "1.24.1"
-
 /* putenv parameter is const */
 /* #undef PUTENV_ARG_CONST */

--- a/src/common/rclconfig.cpp
+++ b/src/common/rclconfig.cpp
@ -394,6 +394,7 @@ bool RclConfig::updateMainConfig()

    setKeyDir(cstr_null);

+    // Texsplit customization
    bool bvalue = false;
    if (getConfParam("nocjk", &bvalue) && bvalue == true) {
        TextSplit::cjkProcessing(false);
@ -405,16 +406,18 @@ bool RclConfig::updateMainConfig()
            TextSplit::cjkProcessing(true);
        }
    }
-
    bvalue = false;
    if (getConfParam("nonumbers", &bvalue) && bvalue == true) {
        TextSplit::noNumbers();
    }
-
    bvalue = false;
    if (getConfParam("dehyphenate", &bvalue)) {
        TextSplit::deHyphenate(bvalue);
    }
+    bvalue = false;
+    if (getConfParam("backslashasletter", &bvalue)) {
+        TextSplit::backslashAsLetter(bvalue);
+    }

    bvalue = true;
    if (getConfParam("skippedPathsFnmPathname", &bvalue) && bvalue == false) {
@ -1623,7 +1626,12 @@ string RclConfig::findFilter(const string &icmd) const
    // Prepend $datadir/filters
    temp = path_cat(m_datadir, "filters");
    PATH = temp + path_PATHsep() + PATH;
-
+#ifdef _WIN32
+    // Windows only: use the bundled Python
+    temp = path_cat(m_datadir, "filters");
+    temp = path_cat(temp, "python");
+    PATH = temp + path_PATHsep() + PATH;
+#endif
    // Prepend possible configuration parameter?
    if (getConfParam(string("filtersdir"), temp)) {
        temp = path_tildexpand(temp);
--- a/src/common/textsplit.cpp
+++ b/src/common/textsplit.cpp
@ -137,6 +137,14 @@ public:
 };
 static const CharClassInit charClassInitInstance;

+void TextSplit::backslashAsLetter(bool on) {
+    if (on) {
+        charclasses[int('\\')] = A_LLETTER;
+    } else {
+        charclasses[int('\\')] = SPACE;
+    }
+}
+
 static inline int whatcc(unsigned int c)
 {
    if (c <= 127) {
--- a/src/common/textsplit.h
+++ b/src/common/textsplit.h
@ -59,6 +59,11 @@ public:
 	o_deHyphenate = on;
    }

+    // Process backslashes as letters? Default is off, but it may be
+    // useful for searching for tex commands. Config variable:
+    // backslashasletter
+    static void backslashAsLetter(bool on);
+    
    enum Flags {
        // Default: will return spans and words (a_b, a, b)
        TXTS_NONE = 0, 
--- a/src/common/utf8fn.cpp
+++ b/src/common/utf8fn.cpp
@ -7,6 +7,11 @@ using namespace std;

 string compute_utf8fn(const RclConfig *config, const string& ifn, bool simple)
 {
+#ifdef _WIN32
+    // On windows file names are read as UTF16 wchar_t and converted to UTF-8
+    // while scanning directories
+    return ifn;
+#else
    string charset = config->getDefCharset(true);
    string utf8fn; 
    int ercnt;
@ -21,4 +26,5 @@ string compute_utf8fn(const RclConfig *config, const string& ifn, bool simple)
    LOGDEB1("compute_utf8fn: transcoded from ["  << lfn << "] to ["  <<
            utf8fn << "] ("  << charset << "->"  << "UTF-8)\n");
    return utf8fn;
+#endif
 }
--- a/src/configure.ac
+++ b/src/configure.ac
@ -321,6 +321,21 @@ XAPIANCXXFLAGS=`$XAPIAN_CONFIG --cxxflags`
 #echo LIBXAPIANSTATICEXTRA: $LIBXAPIANSTATICEXTRA
 #echo XAPIANCXXFLAGS: $XAPIANCXXFLAGS

+
+XSLT_CONFIG=${XSLT_CONFIG:-no}
+if test "$XSLT_CONFIG" = "no"; then 
+    AC_PATH_PROG(XSLT_CONFIG0, [xslt-config], no)
+    XSLT_CONFIG=$XSLT_CONFIG0
+fi
+if test "$XSLT_CONFIG" = "no" ; then
+   AC_MSG_ERROR([Cannot find xslt-config command in $PATH. Is
+libxslt installed ?])
+   exit 1
+fi
+
+XSLT_CFLAGS=`xslt-config --cflags`
+XSLT_LINKADD=`xslt-config --libs`
+
 AC_ARG_ENABLE(xadump, 
    AC_HELP_STRING([--enable-xadump],
   [Enable building the xadump low level Xapian access program.]),
@ -527,6 +542,8 @@ AC_SUBST(QMAKE_DISABLE_ZEITGEIST)
 AC_SUBST(LIBQZEITGEIST)
 AC_SUBST(RCLVERSION)
 AC_SUBST(RCLLIBVERSION)
+AC_SUBST(XSLT_CFLAGS)
+AC_SUBST(XSLT_LINKADD)

 # All object files depend on localdefs which has the cc flags. Avoid
 # changing it unless necessary
--- a/src/filters/abiword.xsl
+++ b/src/filters/abiword.xsl
@ -0,0 +1,88 @@
+<?xml version="1.0"?>
+<xsl:stylesheet version="1.0"
+  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+  xmlns:ab="http://www.abisource.com/awml.dtd" 
+  exclude-result-prefixes="ab"
+  >
+
+<xsl:output method="html" encoding="UTF-8"/>
+
+<xsl:template match="/">
+<html>
+  <head>
+    <xsl:apply-templates select="ab:abiword/ab:metadata"/>
+  </head>
+  <body>
+
+    <!-- This is for the older abiword format with no namespaces -->
+    <xsl:for-each select="abiword/section">
+      <xsl:apply-templates select="p"/>
+    </xsl:for-each>
+
+    <!-- Newer namespaced format -->
+    <xsl:for-each select="ab:abiword/ab:section">
+      <xsl:for-each select="ab:p">
+        <p><xsl:value-of select="."/></p><xsl:text>
+        </xsl:text>
+      </xsl:for-each>
+    </xsl:for-each>
+
+  </body>
+</html>
+</xsl:template>
+
+<xsl:template match="p">
+  <p><xsl:value-of select="."/></p><xsl:text>
+      </xsl:text>
+</xsl:template>
+
+<xsl:template match="ab:metadata">
+    <xsl:for-each select="ab:m">
+      <xsl:choose>
+        <xsl:when test="@key = 'dc.creator'">
+	  <meta>
+	    <xsl:attribute name="name">author</xsl:attribute>
+	    <xsl:attribute name="content">
+	    <xsl:value-of select="."/>
+	    </xsl:attribute>
+          </meta><xsl:text>
+	    </xsl:text>
+        </xsl:when>
+        <xsl:when test="@key = 'abiword.keywords'">
+	  <meta>
+	    <xsl:attribute name="name">keywords</xsl:attribute>
+	    <xsl:attribute name="content">
+	    <xsl:value-of select="."/>
+	    </xsl:attribute>
+          </meta><xsl:text>
+	    </xsl:text>
+        </xsl:when>
+        <xsl:when test="@key = 'dc.subject'">
+	  <meta>
+	    <xsl:attribute name="name">keywords</xsl:attribute>
+	    <xsl:attribute name="content">
+	    <xsl:value-of select="."/>
+	    </xsl:attribute>
+          </meta><xsl:text>
+	    </xsl:text>
+        </xsl:when>
+        <xsl:when test="@key = 'dc.description'">
+	  <meta>
+	    <xsl:attribute name="name">abstract</xsl:attribute>
+	    <xsl:attribute name="content">
+	    <xsl:value-of select="."/>
+	    </xsl:attribute>
+          </meta><xsl:text>
+	    </xsl:text>
+        </xsl:when>
+        <xsl:when test="@key = 'dc.title'">
+	  <title><xsl:value-of select="."/></title><xsl:text>
+	    </xsl:text>
+        </xsl:when>
+        <xsl:otherwise>
+        </xsl:otherwise>
+      </xsl:choose>
+    </xsl:for-each>
+</xsl:template>
+
+</xsl:stylesheet>
--- a/src/filters/fb2.xsl
+++ b/src/filters/fb2.xsl
@ -0,0 +1,56 @@
+<?xml version="1.0"?>
+<xsl:stylesheet version="1.0"
+  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+  xmlns:fb="http://www.gribuser.ru/xml/fictionbook/2.0"
+  exclude-result-prefixes="fb"
+  >
+
+<xsl:output method="html" encoding="UTF-8"/>
+
+<xsl:template match="/fb:FictionBook">
+ <html>
+  <xsl:apply-templates select="fb:description"/>
+  <xsl:apply-templates select="fb:body"/>
+ </html>
+</xsl:template>
+
+<xsl:template match="fb:description">
+  <head>
+    <xsl:apply-templates select="fb:title-info"/>
+  </head><xsl:text>
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="fb:description/fb:title-info">
+    <xsl:apply-templates select="fb:book-title"/>
+    <xsl:apply-templates select="fb:author"/>
+</xsl:template>
+
+<xsl:template match="fb:description/fb:title-info/fb:book-title">
+<title> <xsl:value-of select="."/> </title>
+</xsl:template>
+
+<xsl:template match="fb:description/fb:title-info/fb:author">
+  <meta>
+  <xsl:attribute name="name">author</xsl:attribute>
+  <xsl:attribute name="content">
+     <xsl:value-of select="fb:first-name"/><xsl:text> </xsl:text>
+     <xsl:value-of select="fb:middle-name"/><xsl:text> </xsl:text>
+     <xsl:value-of select="fb:last-name"/>
+  </xsl:attribute>
+  </meta>
+</xsl:template>
+
+<xsl:template match="fb:body">
+ <body>
+ <xsl:apply-templates select="fb:section"/>
+ </body>
+</xsl:template>
+
+<xsl:template match="fb:body/fb:section">
+  <xsl:for-each select="fb:p">
+  <p><xsl:value-of select="."/></p>
+  </xsl:for-each>
+</xsl:template>
+
+</xsl:stylesheet>
--- a/src/filters/gnumeric.xsl
+++ b/src/filters/gnumeric.xsl
@ -0,0 +1,79 @@
+<?xml version="1.0"?>
+<xsl:stylesheet version="1.0"
+  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+  xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" 
+  xmlns:xlink="http://www.w3.org/1999/xlink" 
+  xmlns:dc="http://purl.org/dc/elements/1.1/" 
+  xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" 
+  xmlns:ooo="http://openoffice.org/2004/office"
+  xmlns:gnm="http://www.gnumeric.org/v10.dtd"
+
+  exclude-result-prefixes="office xlink meta ooo dc"
+  >
+
+<xsl:output method="html" encoding="UTF-8"/>
+
+<xsl:template match="/">
+<html>
+  <head>
+   <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
+   <xsl:apply-templates select="//office:document-meta/office:meta"/>
+  </head>
+
+  <body>
+    <xsl:apply-templates select="//gnm:Cells"/>
+    <xsl:apply-templates select="//gnm:Objects"/>
+  </body>
+</html>
+</xsl:template>
+
+<xsl:template match="//dc:date">
+   <meta>
+     <xsl:attribute name="name">date</xsl:attribute>
+     <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
+   </meta>
+</xsl:template>
+
+<xsl:template match="//dc:description">
+  <meta>
+    <xsl:attribute name="name">abstract</xsl:attribute>
+    <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
+  </meta>
+</xsl:template>
+
+<xsl:template match="//meta:keyword">
+  <meta>
+    <xsl:attribute name="name">keywords</xsl:attribute>
+    <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
+  </meta>
+</xsl:template>
+
+<xsl:template match="//dc:subject">
+  <meta>
+    <xsl:attribute name="name">keywords</xsl:attribute>
+    <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
+  </meta>
+</xsl:template>
+
+<xsl:template match="//dc:title">
+  <title> <xsl:value-of select="."/> </title>
+</xsl:template>
+
+<xsl:template match="//meta:initial-creator">
+  <meta>
+    <xsl:attribute name="name">author</xsl:attribute>
+    <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
+  </meta>
+</xsl:template>
+
+<xsl:template match="office:meta/*"/>
+
+<xsl:template match="gnm:Cell">
+  <p><xsl:value-of select="."/></p>
+</xsl:template>
+
+<xsl:template match="gnm:CellComment">
+  <blockquote><xsl:value-of select="@Text"/></blockquote>
+</xsl:template>
+
+</xsl:stylesheet>
--- a/src/filters/okular-note.xsl
+++ b/src/filters/okular-note.xsl
@ -0,0 +1,40 @@
+<?xml version="1.0"?>
+<xsl:stylesheet version="1.0"
+  xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+
+<xsl:output method="html" encoding="UTF-8"/>
+<xsl:strip-space elements="*" />
+
+<xsl:template match="/">
+<html>
+  <head>
+   <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+   <title>
+     Okular notes about: <xsl:value-of select="/documentInfo/@url" />
+   </title>
+  </head>
+  <body>
+    <xsl:apply-templates />
+  </body>
+</html>
+</xsl:template>
+
+<xsl:template match="node()">
+  <xsl:apply-templates select="@* | node() "/>
+</xsl:template>
+
+<xsl:template match="text()">
+  <p><xsl:value-of select="."/></p>
+<xsl:text >
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="@contents|@author">
+  <p><xsl:value-of select="." /></p>
+<xsl:text >
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="@*"/>
+
+</xsl:stylesheet>
--- a/src/filters/opendoc-body.xsl
+++ b/src/filters/opendoc-body.xsl
@ -0,0 +1,32 @@
+<?xml version="1.0"?>
+<xsl:stylesheet version="1.0"
+  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+  xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
+  exclude-result-prefixes="text"
+>
+
+<xsl:output method="html" encoding="UTF-8"/>
+
+<xsl:template match="text:p">
+  <p><xsl:apply-templates/></p><xsl:text>
+  </xsl:text>
+</xsl:template>
+
+<xsl:template match="text:h">
+<p><xsl:apply-templates/></p><xsl:text>
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="text:s">
+<xsl:text> </xsl:text>
+</xsl:template>
+
+<xsl:template match="text:line-break">
+<br />
+</xsl:template>
+
+<xsl:template match="text:tab">
+<xsl:text>    </xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
--- a/src/filters/opendoc-flat.xsl
+++ b/src/filters/opendoc-flat.xsl
@ -0,0 +1,109 @@
+<?xml version="1.0"?>
+<xsl:stylesheet version="1.0"
+  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+  xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" 
+  xmlns:xlink="http://www.w3.org/1999/xlink" 
+  xmlns:dc="http://purl.org/dc/elements/1.1/" 
+  xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" 
+  xmlns:ooo="http://openoffice.org/2004/office"
+  xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
+  exclude-result-prefixes="office xlink meta ooo dc text"
+  >
+
+  <xsl:output method="html" encoding="UTF-8"/>
+
+  <xsl:template match="/">
+    <html>
+      <head>
+        <xsl:apply-templates select="/office:document/office:meta" />
+      </head>
+      <body>
+        <xsl:apply-templates select="/office:document/office:body" />
+      </body></html>
+  </xsl:template>
+
+
+  <xsl:template match="/office:document/office:meta">
+    <xsl:apply-templates select="dc:title"/>
+    <xsl:apply-templates select="dc:description"/>
+    <xsl:apply-templates select="dc:subject"/>
+    <xsl:apply-templates select="meta:keyword"/>
+    <xsl:apply-templates select="dc:creator"/>
+  </xsl:template>
+
+  <xsl:template match="/office:document/office:body">
+    <xsl:apply-templates select=".//text:p" />
+    <xsl:apply-templates select=".//text:h" />
+    <xsl:apply-templates select=".//text:s" />
+    <xsl:apply-templates select=".//text:line-break" />
+    <xsl:apply-templates select=".//text:tab" />
+  </xsl:template>
+
+  <xsl:template match="dc:title">
+    <title> <xsl:value-of select="."/> </title><xsl:text>
+  </xsl:text>
+  </xsl:template>
+
+  <xsl:template match="dc:description">
+    <meta>
+      <xsl:attribute name="name">abstract</xsl:attribute>
+      <xsl:attribute name="content">
+        <xsl:value-of select="."/>
+      </xsl:attribute>
+      </meta><xsl:text>
+    </xsl:text>
+  </xsl:template>
+
+  <xsl:template match="dc:subject">
+    <meta>
+      <xsl:attribute name="name">keywords</xsl:attribute>
+      <xsl:attribute name="content">
+        <xsl:value-of select="."/>
+      </xsl:attribute>
+      </meta><xsl:text>
+    </xsl:text>
+  </xsl:template>
+
+  <xsl:template match="dc:creator">
+    <meta>
+      <xsl:attribute name="name">author</xsl:attribute>
+      <xsl:attribute name="content">
+        <xsl:value-of select="."/>
+      </xsl:attribute>
+      </meta><xsl:text>
+    </xsl:text>
+  </xsl:template>
+
+  <xsl:template match="meta:keyword">
+    <meta>
+      <xsl:attribute name="name">keywords</xsl:attribute>
+      <xsl:attribute name="content">
+        <xsl:value-of select="."/>
+      </xsl:attribute>
+      </meta><xsl:text>
+    </xsl:text>
+  </xsl:template>
+
+  <xsl:template match="office:body//text:p">
+    <p><xsl:apply-templates/></p><xsl:text>
+  </xsl:text>
+  </xsl:template>
+
+  <xsl:template match="office:body//text:h">
+    <p><xsl:apply-templates/></p><xsl:text>
+  </xsl:text>
+  </xsl:template>
+
+  <xsl:template match="office:body//text:s">
+    <xsl:text> </xsl:text>
+  </xsl:template>
+
+  <xsl:template match="office:body//text:line-break">
+    <br />
+  </xsl:template>
+
+  <xsl:template match="office:body//text:tab">
+    <xsl:text>    </xsl:text>
+  </xsl:template>
+
+</xsl:stylesheet>
--- a/src/filters/opendoc-meta.xsl
+++ b/src/filters/opendoc-meta.xsl
@ -0,0 +1,67 @@
+<?xml version="1.0"?>
+<xsl:stylesheet version="1.0"
+  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+  xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" 
+  xmlns:xlink="http://www.w3.org/1999/xlink" 
+  xmlns:dc="http://purl.org/dc/elements/1.1/" 
+  xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" 
+  xmlns:ooo="http://openoffice.org/2004/office"
+  exclude-result-prefixes="office xlink meta ooo dc"
+  >
+
+<xsl:output method="html" encoding="UTF-8"/>
+
+<xsl:template match="/office:document-meta">
+  <xsl:apply-templates select="office:meta/dc:description"/>
+  <xsl:apply-templates select="office:meta/dc:subject"/>
+  <xsl:apply-templates select="office:meta/dc:title"/>
+  <xsl:apply-templates select="office:meta/meta:keyword"/>
+  <xsl:apply-templates select="office:meta/dc:creator"/>
+</xsl:template>
+
+<xsl:template match="dc:title">
+<title> <xsl:value-of select="."/> </title><xsl:text>
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="dc:description">
+  <meta>
+  <xsl:attribute name="name">abstract</xsl:attribute>
+  <xsl:attribute name="content">
+     <xsl:value-of select="."/>
+  </xsl:attribute>
+  </meta><xsl:text>
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="dc:subject">
+  <meta>
+  <xsl:attribute name="name">keywords</xsl:attribute>
+  <xsl:attribute name="content">
+     <xsl:value-of select="."/>
+  </xsl:attribute>
+  </meta><xsl:text>
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="dc:creator">
+  <meta>
+  <xsl:attribute name="name">author</xsl:attribute>
+  <xsl:attribute name="content">
+     <xsl:value-of select="."/>
+  </xsl:attribute>
+  </meta><xsl:text>
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="meta:keyword">
+  <meta>
+  <xsl:attribute name="name">keywords</xsl:attribute>
+  <xsl:attribute name="content">
+     <xsl:value-of select="."/>
+  </xsl:attribute>
+  </meta><xsl:text>
+</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
--- a/src/filters/openxml-meta.xsl
+++ b/src/filters/openxml-meta.xsl
@ -0,0 +1,49 @@
+<?xml version="1.0"?>
+<xsl:stylesheet 
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
+ xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:dcterms="http://purl.org/dc/terms/"
+ xmlns:dcmitype="http://purl.org/dc/dcmitype/"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+<!--  <xsl:output method="text"/> -->
+  <xsl:output omit-xml-declaration="yes"/>
+
+  <xsl:template match="cp:coreProperties">
+    <xsl:text>&#10;</xsl:text>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
+    <xsl:text>&#10;</xsl:text>
+    <xsl:apply-templates/>
+  </xsl:template>
+
+  <xsl:template match="dc:creator">
+    <meta>
+    <xsl:attribute name="name">
+      <!-- <xsl:value-of select="name()"/> pour sortir tous les meta avec 
+       le meme nom que dans le xml (si on devenait dc-natif) -->
+      <xsl:text>author</xsl:text> 
+    </xsl:attribute>
+    <xsl:attribute name="content">
+       <xsl:value-of select="."/>
+    </xsl:attribute>
+    </meta>
+    <xsl:text>&#10;</xsl:text>
+  </xsl:template>
+
+  <xsl:template match="dcterms:modified">
+    <meta>
+    <xsl:attribute name="name">
+      <xsl:text>date</xsl:text> 
+    </xsl:attribute>
+    <xsl:attribute name="content">
+       <xsl:value-of select="."/>
+    </xsl:attribute>
+    </meta>
+    <xsl:text>&#10;</xsl:text>
+  </xsl:template>
+
+  <xsl:template match="*">
+  </xsl:template>
+
+</xsl:stylesheet>
--- a/src/filters/openxml-word-body.xsl
+++ b/src/filters/openxml-word-body.xsl
@ -0,0 +1,27 @@
+<?xml version="1.0"?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
+                xmlns:ve="http://schemas.openxmlformats.org/markup-compatibility/2006"
+                xmlns:o="urn:schemas-microsoft-com:office:office"
+                xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
+                xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"
+                xmlns:v="urn:schemas-microsoft-com:vml"
+                xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
+                xmlns:w10="urn:schemas-microsoft-com:office:word"
+                xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
+                xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml">
+
+  <xsl:output omit-xml-declaration="yes"/>
+
+  <xsl:template match="/">
+    <div>
+      <xsl:apply-templates/> 
+    </div>
+  </xsl:template>
+
+  <xsl:template match="w:p">
+    <p>
+      <xsl:value-of select="."/>
+    </p>
+  </xsl:template>
+
+</xsl:stylesheet>
--- a/src/filters/openxml-xls-body.xsl
+++ b/src/filters/openxml-xls-body.xsl
@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
+                xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
+
+  <xsl:output omit-xml-declaration="yes"/>
+
+  <xsl:template match="/">
+    <div>
+      <xsl:apply-templates/> 
+    </div>
+  </xsl:template>
+
+  <xsl:template match="x:t">
+    <p>
+      <xsl:value-of select="."/>
+    </p>
+  </xsl:template>
+
+</xsl:stylesheet>
--- a/src/filters/rcl7z
+++ b/src/filters/rcl7z
@ -12,10 +12,9 @@ import fnmatch
 import rclexecm

 try:
-    import pylzma
    from py7zlib import Archive7z
 except:
-    print("RECFILTERROR HELPERNOTFOUND python:pylzma")
+    print("RECFILTERROR HELPERNOTFOUND python:py7zlib")
    sys.exit(1);

 try:
--- a/src/filters/rclchm
+++ b/src/filters/rclchm
@ -15,12 +15,10 @@ if PY3:
    from urllib.parse import unquote as urllib_unquote
    from urllib.parse import urlparse as urlparse_urlparse
    from html.parser import HTMLParser
-    chmpackname = 'pychm3.egg'
 else:
    from urlparse import urlparse as urlparse_urlparse
    from urllib import unquote as urllib_unquote
    from HTMLParser import HTMLParser
-    chmpackname = 'pychm2.egg'

 import subprocess

@ -28,12 +26,12 @@ import rclconfig
 import rclexecm

 # pychm has no official port to Python3, hence no package in the
-# standard place.  Recoll bundles a python3 port which we install out
-# of the standard python places. Look for it:
-#  sys.path[0] is for MSW, where we install the egg in the filters
-#  directory? TBD for now
+# standard place. Linux Recoll bundles a python3 port which is identical
+# to pychm, but named recollchm to avoid conflicts because it is installed
+# as a normal python package (in /usr/lib/pythonxx/dist-packages,
+# not recoll/filters.). No such issues on Windows
 try:
-    # First try the system version if any
+    # First try the system (or recoll-local on Windows) version if any
    from chm import chm,chmlib
 except:
    try:
--- a/src/filters/rclexec1.py
+++ b/src/filters/rclexec1.py
@ -61,7 +61,7 @@ class Executor(RclBaseHandler):
            return True, postproc.wrapData()
        else:
            try:
-                fullcmd = cmd + [filename]
+                fullcmd = cmd + [rclexecm.subprocfile(filename)]
                proc = subprocess.Popen(fullcmd,
                                        stdout = subprocess.PIPE)
                stdout = proc.stdout
--- a/src/filters/rclexecm.py
+++ b/src/filters/rclexecm.py
@ -29,13 +29,25 @@ import shutil
 import getopt
 import rclconfig

-PY3 = sys.version > '3'
+PY3 = (sys.version > '3')
+_mswindows = (sys.platform == "win32")

 def makebytes(data):
    if type(data) == type(u''):
        return data.encode("UTF-8")
    return data

+def subprocfile(fn):
+    # On Windows PY3 the list2cmdline() method in subprocess assumes that
+    # all args are str, and we receive file names as UTF-8. So we need
+    # to convert.
+    # On Unix all list elements get converted to bytes in the C
+    # _posixsubprocess module, nothing to do
+    if PY3 and _mswindows:
+        return fn.decode('UTF-8')
+    else:
+        return fn
+
 my_config = rclconfig.RclConfig()

 ############################################
@ -77,7 +89,10 @@ class RclExecM:
            self.errfout = sys.stderr
        
    def rclog(self, s, doexit = 0, exitvalue = 1):
-        print("RCLMFILT: %s: %s" % (self.myname, s), file=self.errfout)
+        # On windows, and I think that it changed quite recently (Qt change?)
+        # we get stdout as stderr. So don't write at all
+        if sys.platform != "win32":
+            print("RCLMFILT: %s: %s" % (self.myname, s), file=self.errfout)
        if doexit:
            sys.exit(exitvalue)

--- a/src/filters/rclimg
+++ b/src/filters/rclimg
@ -140,7 +140,7 @@ sub readparam {

 # JFD: replaced the "use" call with a runtime load with error checking,
 # for compat with the missing filter detection code.
-#use Image::ExifTool qw(:Public);
+use Image::ExifTool qw(:Public);
 eval {require Image::ExifTool; Image::ExifTool->import(qw(:Public));}; 
 if ($@) {
 	print "RECFILTERROR HELPERNOTFOUND Perl::Image::ExifTool\n";
--- a/src/filters/rclpdf.py
+++ b/src/filters/rclpdf.py
@ -479,7 +479,8 @@ class PDFExtractor:
            print("RECFILTERROR HELPERNOTFOUND pdftotext")
            sys.exit(1);

-        self.filename = params["filename:"]
+        self.filename = rclexecm.subprocfile(params["filename:"])
+
        #self.em.rclog("openfile: [%s]" % self.filename)
        self.currentindex = -1
        self.attextractdone = False
--- a/src/filters/rcluncomp.py
+++ b/src/filters/rcluncomp.py
@ -42,7 +42,8 @@ outdir = sys.argv[3]
 try:
    cmd = [sevenz, "e", "-bd", "-y", "-o" + outdir, infile]
    subprocess.check_output(cmd, stderr = subprocess.PIPE)
-    outputname = glob.glob(os.path.join(outdir, "*"))
+    # Don't use os.path.join, we always want to use '/'
+    outputname = glob.glob(outdir + "/*")
    # There should be only one file in there..
    print(outputname[0])
 except Exception as err:
--- a/src/filters/svg.xsl
+++ b/src/filters/svg.xsl
@ -0,0 +1,76 @@
+<?xml version="1.0"?>
+<xsl:stylesheet version="1.0"
+  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+  xmlns:svg="http://www.w3.org/2000/svg"
+  xmlns:dc="http://purl.org/dc/elements/1.1/"
+  exclude-result-prefixes="svg"
+  >
+
+<xsl:output method="html" encoding="UTF-8"/>
+
+<xsl:template match="/">
+  <html>
+  <head>
+  <xsl:apply-templates select="svg:svg/svg:title"/>
+  <xsl:apply-templates select="svg:svg/svg:desc"/>
+  <xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:creator"/>
+  <xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:subject"/>
+  <xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:description"/>
+  </head>
+  <body>
+  <xsl:apply-templates select="//svg:text"/>
+  </body>
+  </html>
+</xsl:template>
+
+<xsl:template match="svg:desc"> 
+  <meta>
+  <xsl:attribute name="name">keywords</xsl:attribute>
+  <xsl:attribute name="content">
+     <xsl:value-of select="."/>
+  </xsl:attribute>
+  </meta><xsl:text>
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="dc:creator"> 
+  <meta>
+  <xsl:attribute name="name">author</xsl:attribute>
+  <xsl:attribute name="content">
+     <xsl:value-of select="."/>
+  </xsl:attribute>
+  </meta><xsl:text>
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="dc:subject"> 
+  <meta>
+  <xsl:attribute name="name">keywords</xsl:attribute>
+  <xsl:attribute name="content">
+     <xsl:value-of select="."/>
+  </xsl:attribute>
+  </meta><xsl:text>
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="dc:description"> 
+  <meta>
+  <xsl:attribute name="name">description</xsl:attribute>
+  <xsl:attribute name="content">
+     <xsl:value-of select="."/>
+  </xsl:attribute>
+  </meta><xsl:text>
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="svg:title"> 
+  <title><xsl:value-of select="."/></title><xsl:text>
+  </xsl:text>
+</xsl:template>
+	    
+<xsl:template match="svg:text"> 
+  <p><xsl:value-of select="."/></p><xsl:text>
+  </xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
--- a/src/filters/xml.xsl
+++ b/src/filters/xml.xsl
@ -0,0 +1,34 @@
+<?xml version="1.0"?>
+<xsl:stylesheet version="1.0"
+		xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+
+  <xsl:output method="html" encoding="UTF-8"/>
+
+  <xsl:template match="/">
+    <html>
+      <head>
+	<xsl:if test="//*[local-name() = 'title']">
+	  <title>
+	    <xsl:value-of select="//*[local-name() = 'title'][1]"/>
+	  </title>
+	</xsl:if>
+      </head>
+      <body>
+	<xsl:apply-templates/>
+      </body>
+    </html>
+  </xsl:template>
+
+  <xsl:template match="text()">
+    <xsl:if test="string-length(normalize-space(.)) &gt; 0">
+      <p><xsl:value-of select="."/></p>
+      <xsl:text>
+      </xsl:text>
+    </xsl:if>
+  </xsl:template>
+
+  <xsl:template match="*">
+    <xsl:apply-templates/>
+  </xsl:template>
+
+</xsl:stylesheet>
--- a/src/internfile/mh_html.cpp
+++ b/src/internfile/mh_html.cpp
@ -29,17 +29,15 @@

 #include <iostream>

-#ifndef NO_NAMESPACES
 using namespace std;
-#endif /* NO_NAMESPACES */
-

 bool MimeHandlerHtml::set_document_file_impl(const string& mt, const string &fn)
 {
    LOGDEB0("textHtmlToDoc: " << fn << "\n");
    string otext;
-    if (!file_to_string(fn, otext)) {
-	LOGINFO("textHtmlToDoc: cant read: " << fn << "\n");
+    string reason;
+    if (!file_to_string(fn, otext, &reason)) {
+        LOGERR("textHtmlToDoc: cant read: " << fn << ": " << reason << "\n");
 	return false;
    }
    m_filename = fn;
--- a/src/internfile/mh_mail.h
+++ b/src/internfile/mh_mail.h
@ -38,9 +38,7 @@ public:
    MimeHandlerMail(RclConfig *cnf, const std::string &id);
    virtual ~MimeHandlerMail();
    virtual bool is_data_input_ok(DataInput input) const {
-        if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
-            return true;
-        return false;
+        return (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING);
    }
    virtual bool next_document();
    virtual bool skip_to_document(const std::string& ipath);
--- a/src/internfile/mh_xslt.cpp
+++ b/src/internfile/mh_xslt.cpp
@ -0,0 +1,316 @@
+/* Copyright (C) 2005 J.F.Dockes 
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the
+ *   Free Software Foundation, Inc.,
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+#include "autoconfig.h"
+
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include <libxslt/transform.h>
+#include <libxslt/xsltInternals.h>
+#include <libxslt/xsltutils.h>
+
+#include "cstr.h"
+#include "mh_xslt.h"
+#include "log.h"
+#include "smallut.h"
+#include "md5ut.h"
+#include "rclconfig.h"
+#include "readfile.h"
+
+using namespace std;
+
+// Do we need this? It would need to be called from recollinit
+// Call once, not reentrant
+// xmlInitParser();
+// LIBXML_TEST_VERSION;
+// Probably not:    xmlCleanupParser();
+        
+
+class FileScanXML : public FileScanDo {
+public:
+    FileScanXML(const string& fn) : m_fn(fn) {}
+    virtual ~FileScanXML() {
+        if (ctxt) {
+            xmlFreeParserCtxt(ctxt);
+        }
+    }
+
+    xmlDocPtr getDoc() {
+        int ret;
+        if ((ret = xmlParseChunk(ctxt, nullptr, 0, 1))) {
+            xmlError *error = xmlGetLastError();
+            LOGERR("FileScanXML: final xmlParseChunk failed with error " <<
+                   ret << " error: " <<
+                   (error ? error->message :
+                    " null return from xmlGetLastError()") << "\n");
+            return nullptr;
+        }
+        return ctxt->myDoc;
+    }
+
+    virtual bool init(int64_t size, string *) {
+        LOGDEB1("FileScanXML: init: size " << size << endl);
+        ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, m_fn.c_str());
+        if (ctxt == nullptr) {
+            LOGERR("FileScanXML: xmlCreatePushParserCtxt failed\n");
+            return false;
+        } else {
+            return true;
+        }
+    }
+    
+    virtual bool data(const char *buf, int cnt, string*) {
+        if (0) {
+            string dt(buf, cnt);
+            LOGDEB1("FileScanXML: data: cnt " << cnt << " data " << dt << endl);
+        } else {
+            LOGDEB1("FileScanXML: data: cnt " << cnt << endl);
+        }            
+        int ret;
+        if ((ret = xmlParseChunk(ctxt, buf, cnt, 0))) {
+            xmlError *error = xmlGetLastError();
+            LOGERR("FileScanXML: xmlParseChunk failed with error " <<
+                   ret << " for [" << buf << "] error " <<
+                   (error ? error->message :
+                    " null return from xmlGetLastError()") << "\n");
+            return false;
+        } else {
+            LOGDEB1("xmlParseChunk ok (sent " << cnt << " bytes)\n");
+            return true;
+        }
+    }
+
+private:
+    xmlParserCtxtPtr ctxt{nullptr};
+    string m_fn;
+};
+
+class MimeHandlerXslt::Internal {
+public:
+    Internal(MimeHandlerXslt *_p)
+        : p(_p) {}
+    ~Internal() {
+        if (metaOrAllSS) {
+            xsltFreeStylesheet(metaOrAllSS);
+        }
+        if (bodySS) {
+            xsltFreeStylesheet(bodySS);
+        }
+    }
+
+    xsltStylesheet *prepare_stylesheet(const string& ssnm);
+    bool process_doc_or_string(bool forpv, const string& fn, const string& data);
+    bool apply_stylesheet(
+        const string& fn, const string& member, const string& data,
+        xsltStylesheet *ssp, string& result, string *md5p);
+
+    MimeHandlerXslt *p;
+    bool ok{false};
+    string metamember;
+    xsltStylesheet *metaOrAllSS{nullptr};
+    string bodymember;
+    xsltStylesheet *bodySS{nullptr};
+    string result;
+    string filtersdir;
+};
+
+MimeHandlerXslt::~MimeHandlerXslt()
+{
+    delete m;
+}
+
+MimeHandlerXslt::MimeHandlerXslt(RclConfig *cnf, const std::string& id,
+                                 const std::vector<std::string>& params)
+    : RecollFilter(cnf, id), m(new Internal(this))
+{
+    LOGDEB("MimeHandlerXslt: params: " << stringsToString(params) << endl);
+    m->filtersdir = path_cat(cnf->getDatadir(), "filters");
+
+    xmlSubstituteEntitiesDefault(0);
+    xmlLoadExtDtdDefaultValue = 0;
+
+    // params can be "xslt stylesheetall" or
+    // "xslt metamember metastylesheet bodymember bodystylesheet"
+    if (params.size() == 2) {
+        m->metaOrAllSS = m->prepare_stylesheet(params[1]);
+        if (m->metaOrAllSS) {
+            m->ok = true;
+        }
+    } else if (params.size() == 5) {
+        m->metamember = params[1];
+        m->metaOrAllSS = m->prepare_stylesheet(params[2]);
+        m->bodymember = params[3];
+        m->bodySS =  m->prepare_stylesheet(params[4]);
+        if (m->metaOrAllSS && m->bodySS) {
+            m->ok = true;
+        }
+    } else {
+        LOGERR("MimeHandlerXslt: constructor with wrong param vector: " <<
+               stringsToString(params) << endl);
+    }
+}
+
+xsltStylesheet *MimeHandlerXslt::Internal::prepare_stylesheet(const string& ssnm)
+{
+    string ssfn = path_cat(filtersdir, ssnm);
+    FileScanXML XMLstyle(ssfn);
+    string reason;
+    if (!file_scan(ssfn, &XMLstyle, &reason)) {
+        LOGERR("MimeHandlerXslt: file_scan failed for style sheet " <<
+               ssfn << " : " << reason << endl);
+        return nullptr;
+    }
+    xmlDoc *stl = XMLstyle.getDoc();
+    if (stl == nullptr) {
+        LOGERR("MimeHandlerXslt: getDoc failed for style sheet " <<
+               ssfn << endl);
+        return nullptr;
+    }
+    return xsltParseStylesheetDoc(stl);
+}
+
+bool MimeHandlerXslt::Internal::apply_stylesheet(
+    const string& fn, const string& member, const string& data,
+    xsltStylesheet *ssp, string& result, string *md5p)
+{
+    FileScanXML XMLdoc(fn);
+    string md5, reason;
+    bool res;
+    if (!fn.empty()) {
+        if (member.empty()) {
+            res = file_scan(fn, &XMLdoc, 0, -1, &reason, md5p);
+        } else {
+            res = file_scan(fn, member, &XMLdoc, &reason);
+        }
+    } else {
+        if (member.empty()) {
+            res = string_scan(data.c_str(), data.size(), &XMLdoc, &reason, md5p);
+        } else {
+            res = string_scan(data.c_str(), data.size(), member, &XMLdoc,
+                                                      &reason);
+        }
+    }
+    if (!res) {
+        LOGERR("MimeHandlerXslt::set_document_: file_scan failed for "<<
+               fn << " " << member << " : " << reason << endl);
+        return false;
+    }
+
+    xmlDocPtr doc = XMLdoc.getDoc();
+    if (nullptr == doc) {
+        LOGERR("MimeHandlerXslt::set_document_: no parsed doc\n");
+        return false;
+    }
+    xmlDocPtr transformed = xsltApplyStylesheet(ssp, doc, NULL);
+    if (nullptr == transformed) {
+        LOGERR("MimeHandlerXslt::set_document_: xslt transform failed\n");
+        xmlFreeDoc(doc);
+        return false;
+    }
+    xmlChar *outstr;
+    int outlen;
+    xsltSaveResultToString(&outstr, &outlen, transformed, metaOrAllSS);
+    result = string((const char*)outstr, outlen);
+    xmlFree(outstr);
+    xmlFreeDoc(transformed);
+    xmlFreeDoc(doc);
+    return true;
+}
+
+bool MimeHandlerXslt::Internal::process_doc_or_string(
+    bool forpreview, const string& fn, const string& data)
+{
+    if (nullptr == metaOrAllSS && nullptr == bodySS) {
+        LOGERR("MimeHandlerXslt::set_document_file_impl: both ss empty??\n");
+        return false;
+    }
+    p->m_metaData[cstr_dj_keycharset] = cstr_utf8;
+    if (nullptr == bodySS) {
+        string md5;
+        if (apply_stylesheet(fn, string(), data, metaOrAllSS, result,
+                             forpreview ? nullptr : &md5)) {
+            if (!forpreview) {
+                p->m_metaData[cstr_dj_keymd5] = md5;
+            }
+            return true;
+        }
+        return false;
+    } else {
+        result = "<html>\n<head>\n<meta http-equiv=\"Content-Type\""
+            "content=\"text/html; charset=UTF-8\">";
+        string part;
+        if (!apply_stylesheet(fn,metamember, data, metaOrAllSS, part, nullptr)) {
+            return false;
+        }
+        result += part;
+        result += "</head>\n<body>\n";
+        if (!apply_stylesheet(fn, bodymember, data, bodySS, part, nullptr)) {
+            return false;
+        }
+        result += part;
+        result += "</body></html>";
+    }
+    return true;
+}
+
+bool MimeHandlerXslt::set_document_file_impl(const std::string& mt, 
+                                             const std::string &fn)
+{
+    LOGDEB0("MimeHandlerXslt::set_document_file_: fn: " << fn << endl);
+    if (!m || !m->ok) {
+        return false;
+    }
+    bool ret = m->process_doc_or_string(m_forPreview, fn, string());
+    if (ret) {
+        m_havedoc = true;
+    }
+    return ret;
+}
+
+bool MimeHandlerXslt::set_document_string_impl(const string& mt, 
+                                               const string& txt)
+{
+    LOGDEB0("MimeHandlerXslt::set_document_string_\n");
+    if (!m || !m->ok) {
+        return false;
+    }
+    bool ret = m->process_doc_or_string(m_forPreview, string(), txt);
+    if (ret) {
+        m_havedoc = true;
+    }
+    return ret;
+}
+
+bool MimeHandlerXslt::next_document()
+{
+    if (!m || !m->ok) {
+        return false;
+    }
+    if (m_havedoc == false)
+	return false;
+    m_havedoc = false;
+    m_metaData[cstr_dj_keymt] = cstr_texthtml;
+    m_metaData[cstr_dj_keycontent].swap(m->result);
+    LOGDEB1("MimeHandlerXslt::next_document: result: [" <<
+            m_metaData[cstr_dj_keycontent] << "]\n");
+    return true;
+}
+
+void MimeHandlerXslt::clear_impl()
+{
+    m_havedoc = false;
+    m->result.clear();
+}
--- a/src/internfile/mh_xslt.h
+++ b/src/internfile/mh_xslt.h
@ -0,0 +1,49 @@
+/* Copyright (C) 2018 J.F.Dockes
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the
+ *   Free Software Foundation, Inc.,
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+#ifndef _MH_XSLT_H_INCLUDED_
+#define _MH_XSLT_H_INCLUDED_
+
+#include <string>
+
+#include "mimehandler.h"
+
+class MimeHandlerXslt : public RecollFilter {
+ public:
+    MimeHandlerXslt(RclConfig *cnf, const std::string& id,
+                    const std::vector<std::string>& params);
+    virtual ~MimeHandlerXslt();
+
+    virtual bool next_document() override;
+    virtual void clear_impl() override;
+
+    virtual bool is_data_input_ok(DataInput input) const override {
+        return (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING);
+    }
+
+protected:
+    virtual bool set_document_file_impl(const std::string& mt, 
+                                        const std::string& file_path);
+    virtual bool set_document_string_impl(const std::string& mt,
+                                          const std::string& data);
+
+    class Internal;
+private:
+    Internal *m{nullptr};
+};
+
+
+#endif /* _MH_XSLT_H_INCLUDED_ */
--- a/src/internfile/mimehandler.cpp
+++ b/src/internfile/mimehandler.cpp
@ -41,6 +41,7 @@ using namespace std;
 #include "mh_symlink.h"
 #include "mh_unknown.h"
 #include "mh_null.h"
+#include "mh_xslt.h"

 // Performance help: we use a pool of already known and created
 // handlers. There can be several instances for a given mime type
@ -137,11 +138,17 @@ void clearMimeHandlerCache()

 /** For mime types set as "internal" in mimeconf: 
  * create appropriate handler object. */
-static RecollFilter *mhFactory(RclConfig *config, const string &mime,
+static RecollFilter *mhFactory(RclConfig *config, const string &mimeOrParams,
 				bool nobuild, string& id)
 {
-    LOGDEB2("mhFactory(" << mime << ")\n");
-    string lmime(mime);
+    LOGDEB1("mhFactory(" << mimeOrParams << ")\n");
+    vector<string> lparams;
+    stringToStrings(mimeOrParams, lparams);
+    if (lparams.empty()) {
+        // ??
+        return nullptr;
+    }
+    string lmime(lparams[0]);
    stringtolower(lmime);
    if (cstr_textplain == lmime) {
 	LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerText\n");
@ -160,11 +167,11 @@ static RecollFilter *mhFactory(RclConfig *config, const string &mime,
 	MD5String("MimeHandlerMail", id);
 	return nobuild ? 0 : new MimeHandlerMail(config, id);
    } else if ("inode/symlink" == lmime) {
-	LOGDEB2("mhFactory(" << mime << "): ret MimeHandlerSymlink\n");
+	LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerSymlink\n");
 	MD5String("MimeHandlerSymlink", id);
 	return nobuild ? 0 : new MimeHandlerSymlink(config, id);
    } else if ("application/x-zerosize" == lmime) {
-	LOGDEB("mhFactory(" << mime << "): ret MimeHandlerNull\n");
+	LOGDEB("mhFactory(" << lmime << "): returning MimeHandlerNull\n");
 	MD5String("MimeHandlerNull", id);
 	return nobuild ? 0 : new MimeHandlerNull(config, id);
    } else if (lmime.find("text/") == 0) {
@ -175,7 +182,11 @@ static RecollFilter *mhFactory(RclConfig *config, const string &mime,
        // exec) but still opening with a specific editor.
 	LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerText(x)\n");
 	MD5String("MimeHandlerText", id);
-        return nobuild ? 0 : new MimeHandlerText(config, id); 
+        return nobuild ? 0 : new MimeHandlerText(config, id);
+    } else if ("xsltproc" == lmime) {
+        // XML Types processed with one or several xslt style sheets.
+        MD5String(mimeOrParams, id);
+        return nobuild ? 0 : new MimeHandlerXslt(config, id, lparams);
    } else {
 	// We should not get there. It means that "internal" was set
 	// as a handler in mimeconf for a mime type we actually can't
@ -262,7 +273,7 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,

 /* Get handler/filter object for given mime type: */
 RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg, 
-			      bool filtertypes)
+                             bool filtertypes)
 {
    LOGDEB("getMimeHandler: mtype [" << mtype << "] filtertypes " <<
           filtertypes << "\n");
@ -291,7 +302,7 @@ RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
 	}
 	bool internal = !stringlowercmp("internal", handlertype);
 	if (internal) {
-	    // For internal types let the factory compute the id
+	    // For internal types let the factory compute the cache id
 	    mhFactory(cfg, cmdstr.empty() ? mtype : cmdstr, true, id);
 	} else {
 	    // exec/execm: use the md5 of the def line
@ -304,16 +315,15 @@ RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
 	    goto out;

 	LOGDEB2("getMimeHandler: " << mtype << " not in cache\n");
-
-	// Not in cache. 
 	if (internal) {
 	    // If there is a parameter after "internal" it's the mime
-	    // type to use. This is so that we can have bogus mime
-	    // types like text/x-purple-html-log (for ie: specific
-	    // icon) and still use the html filter on them. This is
-	    // partly redundant with the localfields/rclaptg, but
-	    // better and the latter will probably go away at some
-	    // point in the future.
+	    // type to use, or the further qualifier (e.g. style sheet
+	    // name for xslt types). This is so that we can have bogus
+	    // mime types like text/x-purple-html-log (for ie:
+	    // specific icon) and still use the html filter on
+	    // them. This is partly redundant with the
+	    // localfields/rclaptg, but better? (and the latter will
+	    // probably go away at some point in the future?).
 	    LOGDEB2("handlertype internal, cmdstr [" << cmdstr << "]\n");
 	    h = mhFactory(cfg, cmdstr.empty() ? mtype : cmdstr, false, id);
 	    goto out;
@ -336,14 +346,10 @@ RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
 		goto out;
            }
 	}
-    }
-
-    // We get here if there was no specific error, but there is no
-    // identified mime type, or no handler associated.
-
-    // Finally, unhandled files are either ignored or their name and
-    // generic metadata is indexed, depending on configuration
-    {
+    } else {
+        // No identified mime type, or no handler associated.
+        // Unhandled files are either ignored or their name and
+        // generic metadata is indexed, depending on configuration
 	bool indexunknown = false;
 	cfg->getConfParam("indexallfilenames", &indexunknown);
 	if (indexunknown) {
--- a/src/internfile/uncomp.cpp
+++ b/src/internfile/uncomp.cpp
@ -118,15 +118,15 @@ bool Uncomp::uncompressfile(const string& ifn,
    ExecCmd ex;
    int status = ex.doexec(cmd, args, 0, &tfile);
    if (status || tfile.empty()) {
-	LOGERR("uncompressfile: doexec: failed for [" << ifn << "] status 0x" <<
-               status << "\n");
+	LOGERR("uncompressfile: doexec: " << cmd << " " <<
+               stringsToString(args) << " failed for [" <<
+               ifn << "] status 0x" << status << "\n");
 	if (!m_dir->wipe()) {
 	    LOGERR("uncompressfile: wipedir failed\n");
 	}
 	return false;
    }
-    if (tfile[tfile.length() - 1] == '\n')
-	tfile.erase(tfile.length() - 1, 1);
+    rtrimstring(tfile, "\n\r");
    m_tfile = tfile;
    m_srcpath = ifn;
    return true;
--- a/src/python/recoll/recoll/rclconfig.py
+++ b/src/python/recoll/recoll/rclconfig.py
@ -12,7 +12,7 @@ import conftree

 class RclDynConf:
    def __init__(self, fname):
-        self.data = ConfSimple(fname)
+        self.data = conftree.ConfSimple(fname)

    def getStringList(self, sk):
        nms = self.data.getNames(sk)
@ -95,6 +95,6 @@ class RclExtraDbs:
    
 if __name__ == '__main__':
    config = RclConfig()
-    print(config.getConfParam("topdirs"))
+    print("topdirs = %s" % config.getConfParam("topdirs"))
    extradbs = RclExtraDbs(config)
    print(extradbs.getActDbs())
--- a/src/qtgui/rclm_view.cpp
+++ b/src/qtgui/rclm_view.cpp
@ -341,7 +341,7 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term)
    // If using an actual file, check that it exists, and if it is
    // compressed, we may need an uncompressed version
    if (!fn.empty() && theconfig->mimeViewerNeedsUncomp(doc.mimetype)) {
-        if (access(fn.c_str(), R_OK) != 0) {
+        if (!path_readable(fn)) {
            QMessageBox::warning(0, "Recoll", 
                                 tr("Can't access file: ") + u8s2qs(fn));
            return;
@ -445,9 +445,13 @@ void RclMain::execViewer(const map<string, string>& subs, bool enterHistory,
 #endif
    QStatusBar *stb = statusBar();
    if (stb) {
-	string fcharset = theconfig->getDefCharset(true);
 	string prcmd;
+#ifdef _WIN32
+        prcmd = ncmd;
+#else
+	string fcharset = theconfig->getDefCharset(true);
 	transcode(ncmd, prcmd, fcharset, "UTF-8");
+#endif
 	QString msg = tr("Executing: [") + 
 	    QString::fromUtf8(prcmd.c_str()) + "]";
 	stb->showMessage(msg, 10000);
--- a/src/qtgui/restable.cpp
+++ b/src/qtgui/restable.cpp
@ -375,7 +375,9 @@ QVariant RecollModel::data(const QModelIndex& index, int role) const

    string data = m_getters[index.column()](colname, doc);

+#ifndef _WIN32
    // Special case url, because it may not be utf-8. URL-encode in this case.
+    // Not on windows, where we always read the paths as Unicode.
    if (!colname.compare("url")) {
        int ecnt;
        string data1;
@ -383,6 +385,7 @@ QVariant RecollModel::data(const QModelIndex& index, int role) const
            data = url_encode(data);
        }
    }
+#endif

    list<string> lr;
    g_hiliter.plaintorich(data, lr, m_hdata);
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -257,9 +257,8 @@ void Db::Native::openWrite(const string& dir, Db::OpenMode mode)
        Xapian::DB_CREATE_OR_OVERWRITE;

 #ifdef _WIN32
-    // Xapian is quite bad at erasing partial db which can
-    // occur because of open file deletion errors on
-    // Windows. 
+    // On Windows, Xapian is quite bad at erasing partial db which can
+    // occur because of open file deletion errors.
    if (mode == DbTrunc) {
        if (path_exists(path_cat(dir, "iamchert"))) {
            wipedir(dir);
@ -268,9 +267,21 @@ void Db::Native::openWrite(const string& dir, Db::OpenMode mode)
    }
 #endif
    
-    if (::access(dir.c_str(), 0) == 0) {
-        // Existing index
+    if (path_exists(dir)) {
+        // Existing index. 
        xwdb = Xapian::WritableDatabase(dir, action);
+        if (action == Xapian::DB_CREATE_OR_OVERWRITE ||
+            xwdb.get_doccount() == 0) {
+            // New or empty index. Set the "store text" option
+            // according to configuration. The metadata record will be
+            // written further down.
+            m_storetext = o_index_storedoctext;
+            LOGDEB("Db:: index " << (m_storetext?"stores":"does not store") <<
+                   " document text\n");
+        } else {
+            // Existing non empty. Get the option from the index.
+            storesDocText(xwdb);
+        }
    } else {
        // New index. If possible, and depending on config, use a stub
        // to force using Chert. No sense in doing this if we are
@ -301,23 +312,22 @@ void Db::Native::openWrite(const string& dir, Db::OpenMode mode)
        LOGINF("Rcl::Db::openWrite: new index will " << (m_storetext?"":"not ")
               << "store document text\n");
 #else
-        // Old Xapian (chert only) or newer (no chert). Use the
+        // Old Xapian (chert only) or much newer (no chert). Use the
        // default index backend and let the user decide of the
        // abstract generation method. The configured default is to
        // store the text.
        xwdb = Xapian::WritableDatabase(dir, action);
        m_storetext = o_index_storedoctext;
 #endif
-        // Set the storetext value inside the index descriptor (new
-        // with recoll 1.24, maybe we'll have other stuff to store in
-        // there in the future).
+    }
+
+    // If the index is empty, write the data format version, 
+    // and the storetext option value inside the index descriptor (new
+    // with recoll 1.24, maybe we'll have other stuff to store in
+    // there in the future).
+    if (xwdb.get_doccount() == 0) {
        string desc = string("storetext=") + (m_storetext ? "1" : "0") + "\n";
        xwdb.set_metadata(cstr_RCL_IDX_DESCRIPTOR_KEY, desc);
-    }
-    
-    // If the index is empty, write the data format version at once
-    // to avoid stupid error messages:
-    if (xwdb.get_doccount() == 0) {
        xwdb.set_metadata(cstr_RCL_IDX_VERSION_KEY, cstr_RCL_IDX_VERSION);
    }

@ -328,21 +338,26 @@ void Db::Native::openWrite(const string& dir, Db::OpenMode mode)
 #endif
 }

-void Db::Native::openRead(const string& dir)
+void Db::Native::storesDocText(Xapian::Database& db)
 {
-    m_iswritable = false;
-    xrdb = Xapian::Database(dir);
-    string desc = xrdb.get_metadata(cstr_RCL_IDX_DESCRIPTOR_KEY);
+    string desc = db.get_metadata(cstr_RCL_IDX_DESCRIPTOR_KEY);
    ConfSimple cf(desc, 1);
    string val;
    m_storetext = false;
    if (cf.get("storetext", val) && stringToBool(val)) {
        m_storetext = true;
    }
-    LOGDEB("Db::openRead: index " << (m_storetext?"stores":"does not store") <<
+    LOGDEB("Db:: index " << (m_storetext?"stores":"does not store") <<
           " document text\n");
 }

+void Db::Native::openRead(const string& dir)
+{
+    m_iswritable = false;
+    xrdb = Xapian::Database(dir);
+    storesDocText(xrdb);
+}
+
 /* See comment in class declaration: return all subdocuments of a
 * document given by its unique id. */
 bool Db::Native::subDocs(const string &udi, int idxi, 
--- a/src/rcldb/rcldb_p.h
+++ b/src/rcldb/rcldb_p.h
@ -105,6 +105,10 @@ class Db::Native {

    void openWrite(const std::string& dir, Db::OpenMode mode);
    void openRead(const string& dir);
+
+    // Determine if an existing index is of the full-text-storing kind
+    // by looking at the index metadata. Stores the result in m_storetext
+    void storesDocText(Xapian::Database&);
    
    // Final steps of doc update, part which need to be single-threaded
    bool addOrUpdateWrite(const string& udi, const string& uniterm, 
--- a/src/sampleconf/mimeconf
+++ b/src/sampleconf/mimeconf
@ -79,38 +79,38 @@ application/postscript = exec rclps
 application/sql = internal text/plain
 application/vnd.ms-excel = execm rclxls.py
 application/vnd.ms-powerpoint = execm rclppt.py
-application/vnd.oasis.opendocument.text = execm rclsoff.py
-application/vnd.oasis.opendocument.text-template = execm rclsoff.py
-application/vnd.oasis.opendocument.presentation = execm rclsoff.py 
-application/vnd.oasis.opendocument.spreadsheet = execm rclsoff.py
-application/vnd.oasis.opendocument.graphics = execm rclsoff.py
-application/vnd.oasis.opendocument.presentation-flat-xml = execm rclsoff-flat.py
-application/vnd.oasis.opendocument.text-flat-xml = execm rclsoff-flat.py
-application/vnd.oasis.opendocument.spreadsheet-flat-xml = execm rclsoff-flat.py
+application/vnd.oasis.opendocument.text = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.oasis.opendocument.text-template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.oasis.opendocument.presentation = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.oasis.opendocument.spreadsheet = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.oasis.opendocument.graphics = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.oasis.opendocument.presentation-flat-xml = internal xsltproc opendoc-flat.xsl
+application/vnd.oasis.opendocument.text-flat-xml = internal xsltproc opendoc-flat.xsl
+application/vnd.oasis.opendocument.spreadsheet-flat-xml = internal xsltproc opendoc-flat.xsl
 application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
- execm rclopxml.py
+ internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl
 application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
- execm rclopxml.py
+ internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl
 application/vnd.openxmlformats-officedocument.presentationml.template = \
 execm rclopxml.py
 application/vnd.openxmlformats-officedocument.presentationml.presentation = \
 execm rclopxml.py
 application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
- execm rclopxml.py
+ internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl
 application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
- execm rclopxml.py
-application/vnd.sun.xml.calc = execm rclsoff.py
-application/vnd.sun.xml.calc.template = execm rclsoff.py
-application/vnd.sun.xml.draw = execm rclsoff.py
-application/vnd.sun.xml.draw.template = execm rclsoff.py
-application/vnd.sun.xml.impress = execm rclsoff.py
-application/vnd.sun.xml.impress.template = execm rclsoff.py
-application/vnd.sun.xml.math = execm rclsoff.py
-application/vnd.sun.xml.writer = execm rclsoff.py
-application/vnd.sun.xml.writer.global = execm rclsoff.py
-application/vnd.sun.xml.writer.template = execm rclsoff.py
+ internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl
+application/vnd.sun.xml.calc = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.calc.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.draw = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.draw.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.impress = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.impress.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.math = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.writer = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.writer.global = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.writer.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
 application/vnd.wordperfect = exec wpd2html;mimetype=text/html
-application/x-abiword = execm rclabw.py
+application/x-abiword = internal xsltproc abiword.xsl
 application/x-awk = internal text/plain
 application/x-chm = execm rclchm
 application/x-dia-diagram = execm rcldia;mimetype=text/plain
@ -118,12 +118,12 @@ application/x-dvi = exec rcldvi
 application/x-flac = execm rclaudio
 application/x-gnote = execm rclxml.py
 application/x-gnuinfo = execm rclinfo
-application/x-gnumeric = execm rclgnm.py
+application/x-gnumeric = internal xsltproc gnumeric.xsl
 application/x-kword = exec rclkwd
 application/x-lyx = exec rcllyx
 application/x-mimehtml = internal message/rfc822
 #application/x-mobipocket-ebook = execm rclmobi
-application/x-okular-notes = execm rclokulnote.py
+application/x-okular-notes = internal xsltproc okular-note.xsl
 application/x-perl = internal text/plain
 # Returned by xdg-mime for .php. Future-proofing                   
 application/x-php = internal text/plain
@ -149,7 +149,7 @@ image/jpeg = execm rclimg
 image/png = execm rclimg
 image/tiff = execm rclimg
 image/vnd.djvu = execm rcldjvu.py
-image/svg+xml = execm rclsvg.py
+image/svg+xml = internal xsltproc svg.xsl
 image/x-xcf = execm rclimg
 image/x-nikon-nef = execm rclimg
 inode/symlink = internal
@ -168,7 +168,7 @@ text/css = internal text/plain
 application/javascript = internal text/plain
 text/x-bibtex = exec rclbibtex.sh ; mimetype = text/plain
 text/x-csv = internal text/plain
-text/x-fictionbook = execm rclfb2.py
+text/x-fictionbook = internal xsltproc fb2.xsl
 text/x-gaim-log = exec rclgaim
 text/x-html-aptosid-man = exec rclaptosidman
 text/x-lua = internal
@ -190,7 +190,7 @@ text/x-tex = exec rcltex
 # All parameter and tag names, attribute values etc, are indexed as
 # text. rclxml.py tries to just index the text content.
 #application/xml = execm rclxml.py
-#text/xml = execm rclxml.py
+#text/xml = internal xsltproc xml.xsl
 application/xml = internal text/plain
 text/xml = internal text/plain

--- a/src/utils/conftree.cpp
+++ b/src/utils/conftree.cpp
@ -507,6 +507,13 @@ int ConfSimple::eraseKey(const string& sk)
    return write();
 }

+int ConfSimple::clear()
+{
+    m_submaps.clear();
+    m_order.clear();
+    return write();
+}
+
 // Walk the tree, calling user function at each node
 ConfSimple::WalkerCode
 ConfSimple::sortwalk(WalkerCode(*walker)(void *, const string&, const string&),
@ -692,6 +699,13 @@ bool ConfSimple::commentsAsXML(ostream& out)
            }
            break;
        }
+        case ConfLine::CFL_SK:
+            out << "<subkey>" << it->m_data << "</subkey>" << endl;
+            break;
+        case ConfLine::CFL_VAR:
+            out << "<varsetting>" << it->m_data << " = " <<
+                it->m_value << "</varsetting>" << endl;
+            break;
        default:
            break;
        }
--- a/src/utils/conftree.h
+++ b/src/utils/conftree.h
@ -159,10 +159,7 @@ public:
    void reparse(const std::string& in);

    /** Clear all content */
-    void clear() {
-        m_submaps.clear();
-        m_order.clear();
-    }
+    int clear();

    /**
     * Get string value for named parameter, from specified subsection (looks 
--- a/src/utils/fstreewalk.cpp
+++ b/src/utils/fstreewalk.cpp
@ -36,6 +36,7 @@
 #include "log.h"
 #include "pathut.h"
 #include "fstreewalk.h"
+#include "transcode.h"

 using namespace std;

@ -310,6 +311,20 @@ FsTreeWalker::Status FsTreeWalker::walk(const string& _top,
    return FtwOk;
 }

+#ifdef _WIN32
+#define DIRENT _wdirent
+#define DIRHDL _WDIR
+#define OPENDIR _wopendir
+#define CLOSEDIR _wclosedir
+#define READDIR _wreaddir
+#else
+#define DIRENT dirent
+#define DIRHDL DIR
+#define OPENDIR opendir
+#define CLOSEDIR closedir
+#define READDIR readdir
+#endif
+
 // Note that the 'norecurse' flag is handled as part of the directory read. 
 // This means that we always go into the top 'walk()' parameter if it is a 
 // directory, even if norecurse is set. Bug or Feature ?
@ -341,24 +356,25 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,

    // This is a directory, read it and process entries:

+#ifndef _WIN32
    // Detect if directory already seen. This could just be several
    // symlinks pointing to the same place (if FtwFollow is set), it
    // could also be some other kind of cycle. In any case, there is
    // no point in entering again.
    // For now, we'll ignore the "other kind of cycle" part and only monitor
    // this is FtwFollow is set
-#ifndef _WIN32
    if (data->options & FtwFollow) {
 	DirId dirid(stp->st_dev, stp->st_ino);
 	if (data->donedirs.find(dirid) != data->donedirs.end()) {
-	    LOGINFO("Not processing ["  << (top) << "] (already seen as other path)\n" );
+	    LOGINFO("Not processing [" << top <<
+                    "] (already seen as other path)\n");
 	    return status;
 	}
 	data->donedirs.insert(dirid);
    }
 #endif
-    
-    DIR *d = opendir(top.c_str());
+    SYSPATH(top, systop);
+    DIRHDL *d = OPENDIR(systop);
    if (d == 0) {
 	data->logsyserr("opendir", top);
 	switch (errno) {
@ -376,42 +392,38 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
 	}
    }

-    struct dirent *ent;
-    while ((ent = readdir(d)) != 0) {
+    struct DIRENT *ent;
+    while ((ent = READDIR(d)) != 0) {
        string fn;
        struct stat st;
+#ifdef _WIN32
+        string sdname;
+        if (!wchartoutf8(ent->d_name, sdname)) {
+            LOGERR("wchartoutf8 failed in " << top << endl);
+            continue;
+        }
+        const char *dname = sdname.c_str();
+#else
+        const char *dname = ent->d_name;
+#endif
 	// Maybe skip dotfiles
-	if ((data->options & FtwSkipDotFiles) && ent->d_name[0] == '.')
+	if ((data->options & FtwSkipDotFiles) && dname[0] == '.')
 	    continue;
 	// Skip . and ..
-	if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) 
+	if (!strcmp(dname, ".") || !strcmp(dname, "..")) 
 	    continue;

 	// Skipped file names match ?
 	if (!data->skippedNames.empty()) {
-	    if (inSkippedNames(ent->d_name))
+	    if (inSkippedNames(dname))
 		continue;
 	}
-
-        fn = path_cat(top, ent->d_name);
-#ifdef _WIN32
-        // readdir gets the useful attrs, no inode indirection on windows,
-        // spare the path_fileprops() call, but make sure we mimick it.
-        memset(&st, 0, sizeof(st));
-        st.st_mtime = ent->d_mtime;
-        st.st_size = ent->d_size;
-        st.st_mode = ent->d_mode;
-        // ctime is really creation time on Windows. Just use mtime
-        // for all. We only use ctime on Unix to catch xattr changes
-        // anyway.
-        st.st_ctime = st.st_mtime;
-#else
+        fn = path_cat(top, dname);
        int statret =  path_fileprops(fn.c_str(), &st, data->options&FtwFollow);
        if (statret == -1) {
            data->logsyserr("stat", fn);
            continue;
        }
-#endif

        if (!data->skippedPaths.empty()) {
            // We do not check the ancestors. This means that you can have
@ -461,7 +473,7 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,

 out:
    if (d)
-	closedir(d);
+	CLOSEDIR(d);
    return status;
 }
 	
--- a/src/utils/md5ut.cpp
+++ b/src/utils/md5ut.cpp
@ -20,15 +20,17 @@
 #include <string.h>

 #include "md5ut.h"
-
 #include "readfile.h"

 using namespace std;

-class FileScanMd5 : public FileScanDo {
+// Quite incredibly if this class is named FileScanMd5 like the
+// different one in readfile.cpp, the vtables get mixed up and mh_xslt
+// crashes while calling a virtual function (gcc 6.3 and 7.3)
+class FileScanMd5loc : public FileScanDo {
 public:
-    FileScanMd5(string& d) : digest(d) {}
-    virtual bool init(size_t size, string *)
+    FileScanMd5loc(string& d) : digest(d) {}
+    virtual bool init(int64_t, string *)
    {
 	MD5Init(&ctx);
 	return true;
@ -44,7 +46,7 @@ public:

 bool MD5File(const string& filename, string &digest, string *reason)
 {
-    FileScanMd5 md5er(digest);
+    FileScanMd5loc md5er(digest);
    if (!file_scan(filename, &md5er, reason))
 	return false;
    // We happen to know that digest and md5er.digest are the same object
--- a/src/utils/miniz.cpp
+++ b/src/utils/miniz.cpp
--- a/src/utils/miniz.h
+++ b/src/utils/miniz.h
--- a/src/utils/pathut.cpp
+++ b/src/utils/pathut.cpp
@ -24,13 +24,19 @@
 #include <stdio.h>
 #include <math.h>
 #include <errno.h>
+#include <dirent.h>

 #ifdef _WIN32
-#include "dirent.h"
 #include "safefcntl.h"
 #include "safeunistd.h"
 #include "safewindows.h"
 #include "safesysstat.h"
+#include "transcode.h"
+
+#define STAT _wstat
+#define LSTAT _wstat
+#define STATBUF _stat
+#define ACCESS _waccess

 #else // Not windows ->
 #include <fcntl.h>
@ -39,10 +45,13 @@
 #include <pwd.h>
 #include <sys/file.h>
 #include <sys/stat.h>
-#include <dirent.h>
 #include <sys/statvfs.h>
 #include <sys/types.h>

+#define STAT stat
+#define LSTAT lstat
+#define STATBUF stat
+#define ACCESS access
 #endif

 #include <cstdlib>
@ -506,8 +515,9 @@ bool path_makepath(const string& ipath, int mode)

 bool path_isdir(const string& path)
 {
-    struct stat st;
-    if (lstat(path.c_str(), &st) < 0) {
+    struct STATBUF st;
+    SYSPATH(path, syspath);
+    if (LSTAT(syspath, &st) < 0) {
        return false;
    }
    if (S_ISDIR(st.st_mode)) {
@ -518,8 +528,9 @@ bool path_isdir(const string& path)

 long long path_filesize(const string& path)
 {
-    struct stat st;
-    if (stat(path.c_str(), &st) < 0) {
+    struct STATBUF st;
+    SYSPATH(path, syspath);
+    if (STAT(syspath, &st) < 0) {
        return -1;
    }
    return (long long)st.st_size;
@ -531,8 +542,9 @@ int path_fileprops(const std::string path, struct stat *stp, bool follow)
        return -1;
    }
    memset(stp, 0, sizeof(struct stat));
-    struct stat mst;
-    int ret = follow ? stat(path.c_str(), &mst) : lstat(path.c_str(), &mst);
+    struct STATBUF mst;
+    SYSPATH(path, syspath);
+    int ret = follow ? STAT(syspath, &mst) : LSTAT(syspath, &mst);
    if (ret != 0) {
        return ret;
    }
@ -551,7 +563,13 @@ int path_fileprops(const std::string path, struct stat *stp, bool follow)

 bool path_exists(const string& path)
 {
-    return access(path.c_str(), 0) == 0;
+    SYSPATH(path, syspath);
+    return ACCESS(syspath, 0) == 0;
+}
+bool path_readable(const string& path)
+{
+    SYSPATH(path, syspath);
+    return ACCESS(syspath, R_OK) == 0;
 }

 // Allowed punctuation in the path part of an URI according to RFC2396
--- a/src/utils/pathut.h
+++ b/src/utils/pathut.h
@ -87,10 +87,20 @@ extern int path_fileprops(const std::string path, struct stat *stp,
 /// Returns true if last elt could be checked to exist. False may mean that
 /// the file/dir does not exist or that an error occurred.
 extern bool path_exists(const std::string& path);
+/// Same but must be readable
+extern bool path_readable(const std::string& path);

 /// Return separator for PATH environment variable
 extern std::string path_PATHsep();

+#ifdef _WIN32
+#define SYSPATH(PATH, SPATH) wchar_t PATH ## _buf[2048];      \
+    utf8towchar(PATH, PATH ## _buf, 2048);                    \
+    wchar_t *SPATH = PATH ## _buf;
+#else
+#define SYSPATH(PATH, SPATH) const char *SPATH = PATH.c_str()
+#endif
+
 /// Dump directory
 extern bool readdir(const std::string& dir, std::string& reason,
                    std::set<std::string>& entries);
--- a/src/utils/rclutil.cpp
+++ b/src/utils/rclutil.cpp
@ -164,10 +164,15 @@ const string& path_pkgdatadir()
 // into either utf-8 if transcoding succeeds, or url-encoded
 bool printableUrl(const string& fcharset, const string& in, string& out)
 {
+#ifdef _WIN32
+    // On windows our paths are always utf-8
+    out = in;
+#else
    int ecnt = 0;
    if (!transcode(in, out, fcharset, "UTF-8", &ecnt) || ecnt) {
        out = url_encode(in, 7);
    }
+#endif
    return true;
 }

--- a/src/utils/readfile.cpp
+++ b/src/utils/readfile.cpp
@ -14,43 +14,67 @@
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
-#ifndef TEST_READFILE
 #ifdef BUILDING_RECOLL
 #include "autoconfig.h"
 #else
 #include "config.h"
 #endif

+#include "readfile.h"
+
 #include <errno.h>
 #include <sys/types.h>
+
 #ifdef _WIN32
 #include "safefcntl.h"
 #include "safesysstat.h"
 #include "safeunistd.h"
+#include "transcode.h"
+#define OPEN _wopen
+
 #else
 #define O_BINARY 0
 #include <fcntl.h>
 #include <sys/stat.h>
 #include <unistd.h>
+#define OPEN open
+
 #endif
+
 #include <string>

-#include "readfile.h"
 #include "smallut.h"
+#include "pathut.h"

-using std::string;
+#ifdef READFILE_ENABLE_MD5
+#include "md5.h"
+#endif

+#ifdef MDU_INCLUDE_LOG
+#include MDU_INCLUDE_LOG
+#else
+#include "log.h"
+#endif
+
+using namespace std;
+
+///////////////
+// Implementation of basic interface: read whole file to memory buffer
 class FileToString : public FileScanDo {
 public:
    FileToString(string& data) : m_data(data) {}
-    string& m_data;
-    bool init(size_t size, string *reason) {
+
+    // Note: the fstat() + reserve() (in init()) calls divide cpu
+    // usage almost by 2 on both linux i586 and macosx (compared to
+    // just append()) Also tried a version with mmap, but it's
+    // actually slower on the mac and not faster on linux.
+    virtual bool init(int64_t size, string *reason) {
        if (size > 0) {
            m_data.reserve(size);
        }
        return true;
    }
-    bool data(const char *buf, int cnt, string *reason) {
+    virtual bool data(const char *buf, int cnt, string *reason) {
        try {
            m_data.append(buf, cnt);
        } catch (...) {
@ -59,248 +83,558 @@ public:
        }
        return true;
    }
+
+    string& m_data;
 };

+bool file_to_string(const string& fn, string& data, int64_t offs, size_t cnt,
+                    string *reason)
+{
+    FileToString accum(data);
+    return file_scan(fn, &accum, offs, cnt, reason
+#ifdef READFILE_ENABLE_MD5
+                     , nullptr
+#endif
+        );
+}
+
 bool file_to_string(const string& fn, string& data, string *reason)
 {
    return file_to_string(fn, data, 0, size_t(-1), reason);
 }
-bool file_to_string(const string& fn, string& data, int64_t offs, size_t cnt,
-                    string *reason)
+
+
+/////////////
+//  Callback/filtering interface
+
+// Abstract class base for both source (origin) and filter
+// (midstream). Both have a downstream
+class FileScanUpstream {
+public:
+    virtual void setDownstream(FileScanDo *down) {
+        m_down = down;
+    }
+    virtual FileScanDo *out() {
+        return m_down;
+    }
+protected:        
+    FileScanDo *m_down{nullptr};
+};
+
+// Source element.
+class FileScanSource : public FileScanUpstream {
+public:
+    FileScanSource(FileScanDo *down) {
+        setDownstream(down);
+    }
+    virtual bool scan() = 0;
+};
+
+// Inside element of a transformation pipe. The idea is that elements
+// which don't recognize the data get themselves out of the pipe
+// (pop()). Typically, only one of the decompression modules
+// (e.g. gzip/bzip2/xz...) would remain. For now there is only gzip,
+// it pops itself if the data does not have the right magic number
+class FileScanFilter : public FileScanDo, public FileScanUpstream {
+public:
+    virtual void insertAtSink(FileScanDo *sink, FileScanUpstream *upstream) {
+        setDownstream(sink);
+        if (m_down) {
+            m_down->setUpstream(this);
+        }
+        setUpstream(upstream);
+        if (m_up) {
+            m_up->setDownstream(this);
+        }
+    }
+
+    // Remove myself from the pipe. 
+    virtual void pop() {
+        if (m_down) {
+            m_down->setUpstream(m_up);
+        }
+        if (m_up) {
+            m_up->setDownstream(m_down);
+        }
+    }
+
+    virtual void setUpstream(FileScanUpstream *up) override {
+        m_up = up;
+    }
+
+private:
+    FileScanUpstream *m_up{nullptr};
+};
+
+
+#if defined(READFILE_ENABLE_ZLIB)
+#include <zlib.h>
+
+class GzFilter : public FileScanFilter {
+public:
+    virtual ~GzFilter() {
+        if (m_initdone) {
+            inflateEnd(&m_stream);
+        }
+    }
+
+    virtual bool init(int64_t size, string *reason) override {
+        LOGDEB1("GzFilter::init\n");
+        if (out()) {
+            return out()->init(size, reason);
+        }
+        return true;
+    }
+
+    virtual bool data(const char *buf, int cnt, string *reason) override {
+        LOGDEB1("GzFilter::data: cnt " << cnt << endl);
+
+        int error;
+        m_stream.next_in = (Bytef*)buf;
+        m_stream.avail_in = cnt;
+        
+        if (m_initdone == false) {
+            // We do not support a first read cnt < 2. This quite
+            // probably can't happen with a compressed file (size>2)
+            // except if we're reading a tty which is improbable. So
+            // assume this is a regular file.
+            const unsigned char *ubuf = (const unsigned char *)buf;
+            if ((cnt < 2) || ubuf[0] != 0x1f || ubuf[1] != 0x8b) {
+                LOGDEB1("GzFilter::data: not gzip. out() is " << out() << "\n");
+                pop();
+                if (out()) {
+                    return out()->data(buf, cnt, reason);
+                } else {
+                    return false;
+                }
+            }
+            m_stream.opaque = nullptr;
+            m_stream.zalloc = alloc_func;
+            m_stream.zfree = free_func;
+            m_stream.next_out = (Bytef*)m_obuf;
+            m_stream.avail_out = m_obs;
+            if ((error = inflateInit2(&m_stream, 15+32)) != Z_OK) {
+                LOGERR("inflateInit2 error: " << error << endl);
+                if (reason) {
+                    *reason += " Zlib inflateinit failed";
+                    if (m_stream.msg && *m_stream.msg) {
+                        *reason += string(": ") + m_stream.msg;
+                    }
+                }
+                return false;
+            }
+            m_initdone = true;
+        }
+        
+        while (m_stream.avail_in != 0) {
+            m_stream.next_out = (Bytef*)m_obuf;
+            m_stream.avail_out = m_obs;
+            if ((error = inflate(&m_stream, Z_SYNC_FLUSH)) < Z_OK) {
+                LOGERR("inflate error: " << error << endl);
+                if (reason) {
+                    *reason += " Zlib inflate failed";
+                    if (m_stream.msg && *m_stream.msg) {
+                        *reason += string(": ") + m_stream.msg;
+                    }
+                }
+                return false;
+            }
+            if (out() &&
+                !out()->data(m_obuf, m_obs - m_stream.avail_out, reason)) {
+                return false;
+            }
+        }
+        return true;
+    }
+    
+    static voidpf alloc_func(voidpf opaque, uInt items, uInt size) {
+        return malloc(items * size);
+    }
+    static void free_func(voidpf opaque, voidpf address) {
+        free(address);
+    }
+
+    bool m_initdone{false};
+    z_stream m_stream;
+    char m_obuf[10000];
+    const int m_obs{10000};
+};
+#endif // GZ
+
+#ifdef READFILE_ENABLE_MD5
+
+class FileScanMd5 : public FileScanFilter {
+public:
+    FileScanMd5(string& d) : digest(d) {}
+    virtual bool init(int64_t size, string *reason) override {
+        LOGDEB1("FileScanMd5: init\n");
+	MD5Init(&ctx);
+        if (out()) {
+            return out()->init(size, reason);
+        }
+	return true;
+    }
+    virtual bool data(const char *buf, int cnt, string *reason) override {
+        LOGDEB1("FileScanMd5: data. cnt " << cnt << endl);
+	MD5Update(&ctx, (const unsigned char*)buf, cnt);
+        if (out() && !out()->data(buf, cnt, reason)) {
+            return false;
+        }
+	return true;
+    }
+    bool finish() {
+        LOGDEB1("FileScanMd5: finish\n");
+        MD5Final(digest, &ctx);
+        return true;
+    }
+    string &digest;
+    MD5_CTX ctx;
+};
+#endif // MD5
+
+// Source taking data from a regular file
+class FileScanSourceFile : public FileScanSource {
+public:
+    FileScanSourceFile(FileScanDo *next, const string& fn, int64_t startoffs,
+                       int64_t cnttoread, string *reason)
+        : FileScanSource(next), m_fn(fn), m_startoffs(startoffs),
+          m_cnttoread(cnttoread), m_reason(reason) { }
+
+    virtual bool scan() {
+        LOGDEB1("FileScanSourceFile: reading " << m_fn << " offs " <<
+               m_startoffs<< " cnt " << m_cnttoread << " out " << out() << endl);
+        const int RDBUFSZ = 8192;
+        bool ret = false;
+        bool noclosing = true;
+        int fd = 0;
+        struct stat st;
+        // Initialize st_size: if fn.empty() , the fstat() call won't happen.
+        st.st_size = 0;
+
+        // If we have a file name, open it, else use stdin.
+        if (!m_fn.empty()) {
+            SYSPATH(m_fn, realpath);
+            fd = OPEN(realpath, O_RDONLY | O_BINARY);
+            if (fd < 0 || fstat(fd, &st) < 0) {
+                catstrerror(m_reason, "open/stat", errno);
+                return false;
+            }
+            noclosing = false;
+        }
+
+#if defined O_NOATIME && O_NOATIME != 0
+        if (fcntl(fd, F_SETFL, O_NOATIME) < 0) {
+            // perror("fcntl");
+        }
+#endif
+        if (out()) {
+            if (m_cnttoread != -1 && m_cnttoread) {
+                out()->init(m_cnttoread + 1, m_reason);
+            } else if (st.st_size > 0) {
+                out()->init(st.st_size + 1, m_reason);
+            } else {
+                out()->init(0, m_reason);
+            }
+        }
+
+        int64_t curoffs = 0;
+        if (m_startoffs > 0 && !m_fn.empty()) {
+            if (lseek(fd, m_startoffs, SEEK_SET) != m_startoffs) {
+                catstrerror(m_reason, "lseek", errno);
+                return false;
+            }
+            curoffs = m_startoffs;
+        }
+
+        char buf[RDBUFSZ];
+        int64_t totread = 0;
+        for (;;) {
+            size_t toread = RDBUFSZ;
+            if (m_startoffs > 0 && curoffs < m_startoffs) {
+                toread = size_t(MIN(RDBUFSZ, m_startoffs - curoffs));
+            }
+
+            if (m_cnttoread != -1) {
+                toread = MIN(toread, (uint64_t)(m_cnttoread - totread));
+            }
+            ssize_t n = static_cast<ssize_t>(read(fd, buf, toread));
+            if (n < 0) {
+                catstrerror(m_reason, "read", errno);
+                goto out;
+            }
+            if (n == 0) {
+                break;
+            }
+            curoffs += n;
+            if (curoffs - n < m_startoffs) {
+                continue;
+            }
+            if (!out()->data(buf, n, m_reason)) {
+                goto out;
+            }
+            totread += n;
+            if (m_cnttoread > 0 && totread >= m_cnttoread) {
+                break;
+            }
+        }
+
+        ret = true;
+    out:
+        if (fd >= 0 && !noclosing) {
+            close(fd);
+        }
+        return ret;
+    }
+    
+protected:
+    string m_fn;
+    int64_t m_startoffs;
+    int64_t m_cnttoread;
+    string *m_reason;
+};
+
+
+#if defined(READFILE_ENABLE_MINIZ)
+#include "miniz.h"
+
+// Source taking data from a ZIP archive member
+class FileScanSourceZip : public FileScanSource {
+public:
+    FileScanSourceZip(FileScanDo *next, const string& fn,
+                      const string& member, string *reason)
+        : FileScanSource(next), m_fn(fn), m_member(member),
+          m_reason(reason) {}
+
+    FileScanSourceZip(const char *data, size_t cnt, FileScanDo *next,
+                      const string& member, string *reason)
+        : FileScanSource(next), m_data(data), m_cnt(cnt), m_member(member),
+          m_reason(reason) {}
+
+    virtual bool scan() {
+        bool ret = false;
+        mz_zip_archive zip;
+        mz_zip_zero_struct(&zip);
+        void *opaque = this;
+
+        bool ret1;
+        if (m_fn.empty()) {
+            ret1 = mz_zip_reader_init_mem(&zip, m_data, m_cnt, 0);
+        } else {
+            SYSPATH(m_fn, realpath);
+            ret1 = mz_zip_reader_init_file(&zip, realpath, 0);
+        }
+        if (!ret1) {
+            if (m_reason) {
+                *m_reason += "mz_zip_reader_init_xx() failed: ";
+                *m_reason +=
+                    string(mz_zip_get_error_string(zip.m_last_error));
+            }
+            return false;
+        }
+
+        mz_uint32 file_index;
+        if (mz_zip_reader_locate_file_v2(&zip, m_member.c_str(), NULL, 0,
+                                         &file_index) < 0) {
+            if (m_reason) {
+                *m_reason += "mz_zip_reader_locate_file() failed: ";
+                *m_reason += string(mz_zip_get_error_string(zip.m_last_error));
+            }
+            goto out;
+        }
+
+        mz_zip_archive_file_stat zstat;
+        if (!mz_zip_reader_file_stat(&zip, file_index, &zstat)) {
+            if (m_reason) {
+                *m_reason += "mz_zip_reader_file_stat() failed: ";
+                *m_reason += string(mz_zip_get_error_string(zip.m_last_error));
+            }
+            goto out;
+        }
+        if (out()) {
+            if (!out()->init(zstat.m_uncomp_size, m_reason)) {
+                goto out;
+            }
+        }
+                
+        if (!mz_zip_reader_extract_to_callback(
+                &zip, file_index, write_cb, opaque, 0)) {
+            if (m_reason) {
+                *m_reason += "mz_zip_reader_extract_to_callback() failed: ";
+                *m_reason += string(mz_zip_get_error_string(zip.m_last_error));
+            }
+            goto out;
+        }
+        
+        ret = true;
+    out:
+        mz_zip_reader_end(&zip);
+        return ret;
+    }
+
+    static size_t write_cb(void *pOpaque, mz_uint64 file_ofs,
+                           const void *pBuf, size_t n) {
+        const char *cp = (const char*)pBuf;
+        LOGDEB1("write_cb: ofs " << file_ofs << " cnt " << n << " data: " <<
+                string(cp, n) << endl);
+        FileScanSourceZip *ths = (FileScanSourceZip *)pOpaque;
+        if (ths->out()) {
+            if (!ths->out()->data(cp, n, ths->m_reason)) {
+                return (size_t)-1;
+            }
+        }
+        return n;
+    }
+    
+protected:
+    const char *m_data;
+    size_t m_cnt;
+    string m_fn;
+    string m_member;
+    string *m_reason;
+};
+
+bool file_scan(const std::string& filename, const std::string& membername,
+               FileScanDo* doer, std::string *reason)
 {
-    FileToString accum(data);
-    return file_scan(fn, &accum, offs, cnt, reason);
+    if (membername.empty()) {
+        return file_scan(filename, doer, 0, -1, reason
+#ifdef READFILE_ENABLE_MD5
+, nullptr
+#endif
+            );
+    } else {
+            FileScanSourceZip source(doer, filename, membername, reason);
+            return source.scan();
+    }
+}
+
+bool string_scan(const char *data, size_t cnt, const std::string& membername,
+                 FileScanDo* doer, std::string *reason)
+{
+    if (membername.empty()) {
+        return string_scan(data, cnt, doer, reason
+#ifdef READFILE_ENABLE_MD5
+, nullptr
+#endif
+            );                           
+    } else {
+        FileScanSourceZip source(data, cnt, doer, membername, reason);
+        return source.scan();
+    }
+}
+
+#endif // READFILE_ENABLE_ZIP
+
+bool file_scan(const string& fn, FileScanDo* doer, int64_t startoffs,
+               int64_t cnttoread, string *reason
+#ifdef READFILE_ENABLE_MD5
+               , string *md5p
+#endif
+    )
+{
+    LOGDEB1("file_scan: doer " << doer << endl);
+#if defined(READFILE_ENABLE_ZLIB)
+    bool nodecomp = startoffs != 0;
+#endif
+    if (startoffs < 0) {
+        startoffs = 0;
+    }
+    
+    FileScanSourceFile source(doer, fn, startoffs, cnttoread, reason);
+    FileScanUpstream *up = &source;
+    up = up;
+    
+#if defined(READFILE_ENABLE_ZLIB)
+    GzFilter gzfilter;
+    if (!nodecomp) {
+        gzfilter.insertAtSink(doer, up);
+        up = &gzfilter;
+    }
+#endif
+
+#ifdef READFILE_ENABLE_MD5
+    // We compute the MD5 on the uncompressed data, so insert this
+    // right at the source (after the decompressor).
+    string digest;
+    FileScanMd5 md5filter(digest);
+    if (md5p) {
+        md5filter.insertAtSink(doer, up);
+        up = &md5filter;
+    }
+#endif
+    
+    bool ret = source.scan();
+
+#ifdef READFILE_ENABLE_MD5
+    if (md5p) {
+        md5filter.finish();
+        MD5HexPrint(digest, *md5p);
+    }
+#endif
+    return ret;
 }

 bool file_scan(const string& fn, FileScanDo* doer, string *reason)
 {
-    return file_scan(fn, doer, 0, size_t(-1), reason);
+    return file_scan(fn, doer, 0, -1, reason
+#ifdef READFILE_ENABLE_MD5
+, nullptr
+#endif
+        );                           
 }

-const int RDBUFSZ = 8192;
-// Note: the fstat() + reserve() (in init()) calls divide cpu usage almost by 2
-// on both linux i586 and macosx (compared to just append())
-// Also tried a version with mmap, but it's actually slower on the mac and not
-// faster on linux.
-bool file_scan(const string& fn, FileScanDo* doer, int64_t startoffs,
-               size_t cnttoread, string *reason)
-{
-    if (startoffs < 0) {
-        *reason += " file_scan: negative startoffs not allowed";
-        return false;
-    }

-    bool ret = false;
-    bool noclosing = true;
-    int fd = 0;
-    struct stat st;
-    // Initialize st_size: if fn.empty() , the fstat() call won't happen.
-    st.st_size = 0;
+class FileScanSourceBuffer : public FileScanSource {
+public:
+    FileScanSourceBuffer(FileScanDo *next, const char *data, size_t cnt,
+                         string *reason)
+        : FileScanSource(next), m_data(data), m_cnt(cnt), m_reason(reason) {}

-    // If we have a file name, open it, else use stdin.
-    if (!fn.empty()) {
-        fd = open(fn.c_str(), O_RDONLY | O_BINARY);
-        if (fd < 0 || fstat(fd, &st) < 0) {
-            catstrerror(reason, "open/stat", errno);
-            return false;
+    virtual bool scan() {
+        if (out()) {
+            if (!out()->init(m_cnt, m_reason)) {
+                return false;
+            }
+            return out()->data(m_data, m_cnt, m_reason);
+        } else {
+            return true;
        }
-        noclosing = false;
    }
+    
+protected:
+    const char *m_data{nullptr};
+    size_t m_cnt{0};
+    string *m_reason{nullptr};
+};

-#if defined O_NOATIME && O_NOATIME != 0
-    if (fcntl(fd, F_SETFL, O_NOATIME) < 0) {
-        // perror("fcntl");
+bool string_scan(const char *data, size_t cnt, FileScanDo* doer,
+                 std::string *reason
+#ifdef READFILE_ENABLE_MD5
+                 , std::string *md5p
+#endif
+    )
+{
+    FileScanSourceBuffer source(doer, data, cnt, reason);
+    FileScanUpstream *up = &source;
+    up = up;
+    
+#ifdef READFILE_ENABLE_MD5
+    string digest;
+    FileScanMd5 md5filter(digest);
+    if (md5p) {
+        md5filter.insertAtSink(doer, up);
+        up = &md5filter;
    }
 #endif
+    
+    bool ret = source.scan();

-    if (cnttoread != (size_t) - 1 && cnttoread) {
-        doer->init(cnttoread + 1, reason);
-    } else if (st.st_size > 0) {
-        doer->init(size_t(st.st_size + 1), reason);
-    } else {
-        doer->init(0, reason);
-    }
-
-    int64_t curoffs = 0;
-    if (startoffs > 0 && !fn.empty()) {
-        if (lseek(fd, startoffs, SEEK_SET) != startoffs) {
-            catstrerror(reason, "lseek", errno);
-            return false;
-        }
-        curoffs = startoffs;
-    }
-
-    char buf[RDBUFSZ];
-    size_t totread = 0;
-    for (;;) {
-        size_t toread = RDBUFSZ;
-        if (startoffs > 0 && curoffs < startoffs) {
-            toread = size_t(MIN(RDBUFSZ, startoffs - curoffs));
-        }
-
-        if (cnttoread != size_t(-1)) {
-            toread = MIN(toread, cnttoread - totread);
-        }
-        ssize_t n = static_cast<ssize_t>(read(fd, buf, toread));
-        if (n < 0) {
-            catstrerror(reason, "read", errno);
-            goto out;
-        }
-        if (n == 0) {
-            break;
-        }
-
-        curoffs += n;
-        if (curoffs - n < startoffs) {
-            continue;
-        }
-
-        if (!doer->data(buf, n, reason)) {
-            goto out;
-        }
-        totread += n;
-        if (cnttoread > 0 && totread >= cnttoread) {
-            break;
-        }
-    }
-
-    ret = true;
-out:
-    if (fd >= 0 && !noclosing) {
-        close(fd);
+#ifdef READFILE_ENABLE_MD5
+    if (md5p) {
+        md5filter.finish();
+        MD5HexPrint(digest, *md5p);
    }
+#endif
    return ret;
 }

-#else // Test
-#include "autoconfig.h"
-
-#include <stdio.h>
-#include <sys/types.h>
-#include "safesysstat.h"
-#include <stdlib.h>
-
-#include <string>
-#include <iostream>
-using namespace std;
-
-#include "readfile.h"
-#include "fstreewalk.h"
-
-using namespace std;
-
-class myCB : public FsTreeWalkerCB {
-public:
-    FsTreeWalker::Status processone(const string& path,
-                                    const struct stat *st,
-                                    FsTreeWalker::CbFlag flg) {
-        if (flg == FsTreeWalker::FtwDirEnter) {
-            //cout << "[Entering " << path << "]" << endl;
-        } else if (flg == FsTreeWalker::FtwDirReturn) {
-            //cout << "[Returning to " << path << "]" << endl;
-        } else if (flg == FsTreeWalker::FtwRegular) {
-            //cout << path << endl;
-            string s, reason;
-            if (!file_to_string(path, s, &reason)) {
-                cerr << "Failed: " << reason << " : " << path << endl;
-            } else {
-                //cout <<
-                //"================================================" << endl;
-                cout << path << endl;
-                //      cout << s;
-            }
-            reason.clear();
-        }
-        return FsTreeWalker::FtwOk;
-    }
-};
-
-static int     op_flags;
-#define OPT_MOINS 0x1
-#define OPT_c     0x2
-#define OPT_o     0x4
-
-static const char *thisprog;
-static char usage [] =
-    "trreadfile [-o offs] [-c cnt] topdirorfile\n\n"
-    ;
-static void
-Usage(void)
-{
-    fprintf(stderr, "%s: usage:\n%s", thisprog, usage);
-    exit(1);
-}
-
-int main(int argc, const char **argv)
-{
-    int64_t offs = 0;
-    size_t cnt = size_t(-1);
-    thisprog = argv[0];
-    argc--;
-    argv++;
-
-    while (argc > 0 && **argv == '-') {
-        (*argv)++;
-        if (!(**argv))
-            /* Cas du "adb - core" */
-        {
-            Usage();
-        }
-        while (**argv)
-            switch (*(*argv)++) {
-            case 'c':
-                op_flags |= OPT_c;
-                if (argc < 2) {
-                    Usage();
-                }
-                cnt = atoll(*(++argv));
-                argc--;
-                goto b1;
-            case 'o':
-                op_flags |= OPT_o;
-                if (argc < 2) {
-                    Usage();
-                }
-                offs = strtoull(*(++argv), 0, 0);
-                argc--;
-                goto b1;
-            default:
-                Usage();
-                break;
-            }
-b1:
-        argc--;
-        argv++;
-    }
-
-    if (argc != 1) {
-        Usage();
-    }
-    string top = *argv++;
-    argc--;
-    cerr << "filename " << top << " offs " << offs << " cnt " << cnt << endl;
-
-    struct stat st;
-    if (!top.empty() && stat(top.c_str(), &st) < 0) {
-        perror("stat");
-        exit(1);
-    }
-    if (!top.empty() && S_ISDIR(st.st_mode)) {
-        FsTreeWalker walker;
-        myCB cb;
-        walker.walk(top, cb);
-        if (walker.getErrCnt() > 0) {
-            cout << walker.getReason();
-        }
-    } else {
-        string s, reason;
-        if (!file_to_string(top, s, offs, cnt, &reason)) {
-            cerr << reason << endl;
-            exit(1);
-        } else {
-            cout << s;
-        }
-    }
-    exit(0);
-}
-#endif //TEST_READFILE
--- a/src/utils/readfile.h
+++ b/src/utils/readfile.h
@ -21,30 +21,85 @@

 #include <string>

-/**
- * Read file in chunks, calling an accumulator for each chunk. Can be used
- * for reading in a file, computing an md5...
- */
+class FileScanUpstream;
+
+/** Data sink for the file reader. */
 class FileScanDo {
 public:
    virtual ~FileScanDo() {}
-    virtual bool init(size_t size, std::string *reason) = 0;
-    virtual bool data(const char *buf, int cnt, std::string* reason) = 0;
+    /* Initialize and allocate. 
+     * @param size if set, lower bound of data size.
+     * @param reason[output] set to error message in case of error.
+     * @return false for error (file_scan will return), true if ok.
+     */
+    virtual bool init(int64_t size, std::string *reason) = 0;
+    /* Process chunk of data
+     * @param buf  the data buffer.
+     * @param cnt byte count.
+     * @param reason[output] set to error message in case of error.
+     * @return false for error (file_scan will return), true if ok.
+     */
+    virtual bool data(const char *buf, int cnt, std::string *reason) = 0;
+    
+    virtual void setUpstream(FileScanUpstream*) {}
 };
-bool file_scan(const std::string& filename, FileScanDo* doer, std::string *reason = 0);
-/* Same but only process count cnt from offset offs. Set cnt to size_t(-1)
- * for no limit */
-bool file_scan(const std::string& fn, FileScanDo* doer, int64_t offs, size_t cnt,
-               std::string *reason = 0);
+
+/** Open and read file, calling the FileScanDo data() method for each chunk.
+ *
+ * @param filename File name. Use empty value for stdin
+
+ * @param doer the data processor. The init() method will be called
+ * initially witht a lower bound of the data size (may be used to
+ * reserve a buffer), or with a 0 size if nothing is known about the
+ * size. The data() method will be called for every chunk of data
+ * read. 
+ * @param offs Start offset. If not zero, will disable decompression 
+ *             (set to -1 to start at 0 with no decompression).
+ * @param cnt Max bytes in output. Set cnt to -1 for no limit.
+ * @param[output] md5p If not null, points to a string to store the hex ascii 
+ *     md5 of the uncompressed data.
+ * @param[output] reason If not null, points to a string for storing an 
+ *     error message if the return value is false.
+ * @return true if the operation ended normally, else false.
+ */
+bool file_scan(const std::string& fn, FileScanDo* doer, int64_t startoffs,
+               int64_t cnttoread, std::string *reason
+#ifdef READFILE_ENABLE_MD5
+               , std::string *md5p
+#endif
+    );
+
+/** Same as above, not offset/cnt/md5 */
+bool file_scan(const std::string& filename, FileScanDo* doer,
+               std::string *reason);
+
+/** Same as file_scan, from a memory buffer. No libz processing */
+bool string_scan(const char *data, size_t cnt, FileScanDo* doer, 
+                 std::string *reason
+#ifdef READFILE_ENABLE_MD5
+                 , std::string *md5p
+#endif
+    );
+
+#if defined(READFILE_ENABLE_MINIZ)
+/* Process a zip archive member */
+bool file_scan(const std::string& filename, const std::string& membername,
+               FileScanDo* doer, std::string *reason);
+bool string_scan(const char* data, size_t cnt, const std::string& membername,
+                 FileScanDo* doer, std::string *reason);
+#endif

 /**
 * Read file into string.
 * @return true for ok, false else
 */
-bool file_to_string(const std::string& filename, std::string& data, std::string *reason = 0);
+bool file_to_string(const std::string& filename, std::string& data,
+                    std::string *reason = 0);

-/** Read file chunk into string. Set cnt to size_t(-1) for whole file */
+/** Read file chunk into string. Set cnt to -1 for going to
+ * eof, offs to -1 for going from the start without decompression */
 bool file_to_string(const std::string& filename, std::string& data,
                    int64_t offs, size_t cnt, std::string *reason = 0);

+
 #endif /* _READFILE_H_INCLUDED_ */
--- a/src/utils/transcode.cpp
+++ b/src/utils/transcode.cpp
@ -21,14 +21,16 @@
 #include <string>
 #include <iostream>
 #include <mutex>
-using std::string;

 #include <errno.h>
 #include <iconv.h>
+#include <wchar.h>

 #include "transcode.h"
 #include "log.h"

+using namespace std;
+
 // We gain approximately 25% exec time for word at a time conversions by
 // caching the iconv_open thing. 
 //
@ -42,7 +44,7 @@ using std::string;
 bool transcode(const string &in, string &out, const string &icode,
 	       const string &ocode, int *ecnt)
 {
-    LOGDEB2("Transcode: "  << (icode) << " -> "  << (ocode) << "\n" );
+    LOGDEB2("Transcode: " << icode << " -> " << ocode << "\n");
 #ifdef ICONV_CACHE_OPEN
    static iconv_t ic = (iconv_t)-1;
    static string cachedicode;
@ -100,8 +102,9 @@ bool transcode(const string &in, string &out, const string &icode,
 		" : " + strerror(errno);
 #endif
 	    if (errno == EILSEQ) {
-		LOGDEB1("transcode:iconv: bad input seq.: shift, retry\n" );
-		LOGDEB1(" Input consumed "  << (ip - in) << " output produced "  << (out.length() + OBSIZ - osiz) << "\n" );
+		LOGDEB1("transcode:iconv: bad input seq.: shift, retry\n");
+		LOGDEB1(" Input consumed " << ip - in << " output produced " <<
+                        out.length() + OBSIZ - osiz << "\n");
 		out.append(obuf, OBSIZ - osiz);
 		out += "?";
 		mecnt++;
@ -144,14 +147,67 @@ error:
    }

    if (mecnt)
-	LOGDEB("transcode: ["  << (icode) << "]->["  << (ocode) << "] "  << (mecnt) << " errors\n" );
+	LOGDEB("transcode: [" << icode << "]->[" << ocode << "] " <<
+               mecnt << " errors\n");
    if (ecnt)
 	*ecnt = mecnt;
    return ret;
 }

+bool wchartoutf8(const wchar_t *in, std::string& out)
+{
+    static iconv_t ic = (iconv_t)-1;
+    if (ic == (iconv_t)-1) {
+	if((ic = iconv_open("UTF-8", "WCHAR_T")) == (iconv_t)-1) {
+            LOGERR("wchartoutf8: iconv_open failed\n");
+            return false;
+	}
+    }
+    const int OBSIZ = 8192;
+    char obuf[OBSIZ], *op;
+    out.erase();
+    size_t isiz = 2 * wcslen(in);
+    out.reserve(isiz);
+    const char *ip = (const char *)in;

-#else
+    while (isiz > 0) {
+	size_t osiz;
+	op = obuf;
+	osiz = OBSIZ;
+
+	if(iconv(ic, (ICONV_CONST char **)&ip, &isiz, &op, &osiz) == (size_t)-1
+           && errno != E2BIG) {
+            LOGERR("wchartoutf8: iconv error, errno: " << errno << endl);
+            return false;
+	}
+	out.append(obuf, OBSIZ - osiz);
+    }
+    return true;
+}
+
+bool utf8towchar(const std::string& in, wchar_t *out, size_t obytescap)
+{
+    static iconv_t ic = (iconv_t)-1;
+    if (ic == (iconv_t)-1) {
+	if((ic = iconv_open("WCHAR_T", "UTF-8")) == (iconv_t)-1) {
+            LOGERR("utf8towchar: iconv_open failed\n");
+            return false;
+	}
+    }
+    size_t isiz = in.size();
+    const char *ip = in.c_str();
+    size_t osiz = (size_t)obytescap-2;
+    char *op = (char *)out;
+    if (iconv(ic, (ICONV_CONST char **)&ip, &isiz, &op, &osiz) == (size_t)-1) {
+        LOGERR("utf8towchar: iconv error, errno: " << errno << endl);
+        return false;
+    }
+    *op++ = 0;
+    *op = 0;
+    return true;
+}
+
+#else // -> TEST

 #include <stdio.h>
 #include <stdlib.h>
@ -222,4 +278,3 @@ int main(int argc, char **argv)
    exit(0);
 }
 #endif
-
--- a/src/utils/transcode.h
+++ b/src/utils/transcode.h
@ -36,4 +36,9 @@ extern bool transcode(const std::string &in, std::string &out,
 		      const std::string &ocode, 
 		      int *ecnt = 0);

+#ifdef _WIN32
+extern bool wchartoutf8(const wchar_t *in, std::string& out);
+extern bool utf8towchar(const std::string& in, wchar_t *out, size_t obytescap);
+#endif
+
 #endif /* _TRANSCODE_H_INCLUDED_ */
--- a/src/windows/dirent.c
+++ b/src/windows/dirent.c
@ -1,154 +0,0 @@
-/*
-
-    Implementation of POSIX directory browsing functions and types for Win32.
-
-    Author:  Kevlin Henney (kevlin@acm.org, kevlin@curbralan.com)
-    History: Created March 1997. Updated June 2003 and July 2012.
-    Rights:  See end of file.
-
-*/
-
-#include <dirent.h>
-#include <errno.h>
-#include <io.h> /* _findfirst and _findnext set errno iff they return -1 */
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-typedef ptrdiff_t handle_type; /* C99's intptr_t not sufficiently portable */
-
-struct DIR
-{
-    handle_type         handle; /* -1 for failed rewind */
-    struct _finddata_t  info;
-    struct dirent       result; /* d_name null iff first time */
-    char                *name;  /* null-terminated char string */
-};
-
-DIR *opendir(const char *name)
-{
-    DIR *dir = 0;
-
-    if(name && name[0])
-    {
-        size_t base_length = strlen(name);
-        const char *all = /* search pattern must end with suitable wildcard */
-            strchr("/\\", name[base_length - 1]) ? "*" : "/*";
-
-        if((dir = (DIR *) malloc(sizeof *dir)) != 0 &&
-           (dir->name = (char *) malloc(base_length + strlen(all) + 1)) != 0)
-        {
-            strcat(strcpy(dir->name, name), all);
-
-            if((dir->handle =
-                (handle_type) _findfirst(dir->name, &dir->info)) != -1)
-            {
-                dir->result.d_name = 0;
-            }
-            else /* rollback */
-            {
-                free(dir->name);
-                free(dir);
-                dir = 0;
-            }
-        }
-        else /* rollback */
-        {
-            free(dir);
-            dir   = 0;
-            errno = ENOMEM;
-        }
-    }
-    else
-    {
-        errno = EINVAL;
-    }
-
-    return dir;
-}
-
-int closedir(DIR *dir)
-{
-    int result = -1;
-
-    if(dir)
-    {
-        if(dir->handle != -1)
-        {
-            result = _findclose(dir->handle);
-        }
-
-        free(dir->name);
-        free(dir);
-    }
-
-    if(result == -1) /* map all errors to EBADF */
-    {
-        errno = EBADF;
-    }
-
-    return result;
-}
-
-struct dirent *readdir(DIR *dir)
-{
-    struct dirent *result = 0;
-
-    if(dir && dir->handle != -1)
-    {
-        if(!dir->result.d_name || _findnext(dir->handle, &dir->info) != -1)
-        {
-            result         = &dir->result;
-            result->d_mtime = dir->info.time_write;
-            result->d_size = dir->info.size;
-            result->d_name = dir->info.name;
-            if (dir->info.attrib & _A_SUBDIR)
-                result->d_mode = S_IFDIR;
-            else
-                result->d_mode = S_IFREG;
-        }
-    }
-    else
-    {
-        errno = EBADF;
-    }
-
-    return result;
-}
-
-void rewinddir(DIR *dir)
-{
-    if(dir && dir->handle != -1)
-    {
-        _findclose(dir->handle);
-        dir->handle = (handle_type) _findfirst(dir->name, &dir->info);
-        dir->result.d_name = 0;
-    }
-    else
-    {
-        errno = EBADF;
-    }
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-/*
-
-    Copyright Kevlin Henney, 1997, 2003, 2012. All rights reserved.
-
-    Permission to use, copy, modify, and distribute this software and its
-    documentation for any purpose is hereby granted without fee, provided
-    that this copyright and permissions notice appear in all copies and
-    derivatives.
-    
-    This software is supplied "as is" without express or implied warranty.
-
-    But that said, if there are any problems please get in touch.
-
-*/
--- a/src/windows/dirent.h
+++ b/src/windows/dirent.h
@ -1,57 +0,0 @@
-#ifndef DIRENT_INCLUDED
-#define DIRENT_INCLUDED
-
-/*
-
-    Declaration of POSIX directory browsing functions and types for Win32.
-
-    Author:  Kevlin Henney (kevlin@acm.org, kevlin@curbralan.com)
-    History: Created March 1997. Updated June 2003.
-    Rights:  See end of file.
-    
-*/
-#include <sys/stat.h>
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-typedef struct DIR DIR;
-
-struct dirent
-{
-    char *d_name;
-    // The native call we use, findfirst/next return file attributes at once,
-    // no need for a separate stat() call in most cases
-    // Note that ctime is actually creation time. No use for posix.
-    time_t d_mtime;
-    off_t d_size;
-    int   d_mode; // S_IFREG or S_IFDIR only
-};
-
-DIR           *opendir(const char *);
-int           closedir(DIR *);
-struct dirent *readdir(DIR *);
-void          rewinddir(DIR *);
-
-/*
-
-    Copyright Kevlin Henney, 1997, 2003. All rights reserved.
-
-    Permission to use, copy, modify, and distribute this software and its
-    documentation for any purpose is hereby granted without fee, provided
-    that this copyright and permissions notice appear in all copies and
-    derivatives.
-    
-    This software is supplied "as is" without express or implied warranty.
-
-    But that said, if there are any problems please get in touch.
-
-*/
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
--- a/src/windows/execmd_w.cpp
+++ b/src/windows/execmd_w.cpp
@ -32,6 +32,7 @@
 #include <psapi.h>
 #include "smallut.h"
 #include "pathut.h"
+#include "transcode.h"

 using namespace std;

@ -752,7 +753,7 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,
        return false;
    }

-    STARTUPINFO siStartInfo;
+    STARTUPINFOW siStartInfo;
    BOOL bSuccess = FALSE;

    // Set up members of the PROCESS_INFORMATION structure. 
@ -760,8 +761,8 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,

    // Set up members of the STARTUPINFO structure. 
    // This structure specifies the STDIN and STDOUT handles for redirection.
-    ZeroMemory(&siStartInfo, sizeof(STARTUPINFO));
-    siStartInfo.cb = sizeof(STARTUPINFO);
+    ZeroMemory(&siStartInfo, sizeof(siStartInfo));
+    siStartInfo.cb = sizeof(siStartInfo);
    if (m->m_flags & EXF_SHOWWINDOW) {
        siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
        if (m->m_flags & EXF_MAXIMIZED) {
@ -782,12 +783,15 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,

    // Create the child process. 
    // Need a writable buffer for the command line, for some reason.
-    LOGDEB1("ExecCmd:startExec: cmdline ["  << (cmdline) << "]\n" );
+    LOGDEB("ExecCmd:startExec: cmdline [" << cmdline << "]\n");
+#if 0
    LPSTR buf = (LPSTR)malloc(cmdline.size() + 1);
    memcpy(buf, cmdline.c_str(), cmdline.size());
    buf[cmdline.size()] = 0;
-    bSuccess = CreateProcess(NULL,
-                             buf, // command line 
+#endif
+    SYSPATH(cmdline, wcmdline);
+    bSuccess = CreateProcessW(NULL,
+                             wcmdline, // command line 
                             NULL,          // process security attributes 
                             NULL,          // primary thread security attrs 
                             TRUE,         // handles are inherited 
@ -798,9 +802,10 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,
                             &m->m_piProcInfo);  // PROCESS_INFORMATION 
    if (!bSuccess) {
        printError("ExecCmd::doexec: CreateProcess");
-    } 
+    }
+    
    free(envir);
-    free(buf);
+//    free(buf);
    // Close child-side handles else we'll never see eofs
    if (!CloseHandle(hOutputWrite))
        printError("CloseHandle");
--- a/src/windows/mimeconf
+++ b/src/windows/mimeconf
@ -47,54 +47,43 @@ application/x-lzma = uncompress python rcluncomp.py 7z %f %t
 # each filter, see the exemples below (ie: msword)
 [index]
 application/msword = execm python rcldoc.py
-application/pdf = execm python rclpdf.py
 application/vnd.ms-excel = execm python rclxls.py
 application/vnd.ms-powerpoint = execm python rclppt.py
-application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
- execm python rclopxml.py
-application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
- execm python rclopxml.py
-application/vnd.openxmlformats-officedocument.presentationml.template = \
- execm python rclopxml.py
-application/vnd.openxmlformats-officedocument.presentationml.presentation = \
- execm python rclopxml.py
-application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
- execm python rclopxml.py
-application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
- execm python rclopxml.py

-application/vnd.oasis.opendocument.text = execm python rclsoff.py
-application/vnd.oasis.opendocument.text-template = execm python rclsoff.py
-application/vnd.oasis.opendocument.presentation = execm python rclsoff.py 
-application/vnd.oasis.opendocument.spreadsheet = execm python rclsoff.py
-application/vnd.oasis.opendocument.graphics = execm python rclsoff.py
-application/vnd.sun.xml.calc = execm python rclsoff.py
-application/vnd.sun.xml.calc.template = execm python rclsoff.py
-application/vnd.sun.xml.draw = execm python rclsoff.py
-application/vnd.sun.xml.draw.template = execm python rclsoff.py
-application/vnd.sun.xml.impress = execm python rclsoff.py
-application/vnd.sun.xml.impress.template = execm python rclsoff.py
-application/vnd.sun.xml.math = execm python rclsoff.py
-application/vnd.sun.xml.writer = execm python rclsoff.py
-application/vnd.sun.xml.writer.global = execm python rclsoff.py
-application/vnd.sun.xml.writer.template = execm python rclsoff.py
+application/pdf = execm python rclpdf.py
+
+application/vnd.oasis.opendocument.text = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.oasis.opendocument.text-template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.oasis.opendocument.presentation = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl 
+application/vnd.oasis.opendocument.spreadsheet = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.oasis.opendocument.graphics = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.calc = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.calc.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.draw = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.draw.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.impress = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.impress.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.math = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.writer = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.writer.global = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.writer.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl

 application/vnd.wordperfect = exec wpd/wpd2html;mimetype=text/html
-application/x-abiword = execm python rclabw.py
-text/x-fictionbook = execm python rclfb2.py
+application/x-abiword = internal xsltproc abiword.xsl
+text/x-fictionbook = internal xsltproc fb2.xsl

 application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
- execm python rclopxml.py
+ internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl
 application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
- execm python rclopxml.py
+ internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl
 application/vnd.openxmlformats-officedocument.presentationml.template = \
 execm python rclopxml.py
 application/vnd.openxmlformats-officedocument.presentationml.presentation = \
 execm python rclopxml.py
 application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
- execm python rclopxml.py
+ internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl
 application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
- execm python rclopxml.py
+ internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl

 application/epub+zip = execm python rclepub
 # Returned by xdg-mime for .js. Future-proofing
@ -121,17 +110,18 @@ application/x-shellscript = internal text/plain
 #application/x-tar = execm python rcltar
 application/x-webarchive = execm python rclwar
 application/x-7z-compressed = execm python rcl7z
+application/zip = execm python rclzip;charset=default
 audio/mpeg = execm python rclaudio
 audio/mp4 = execm python rclaudio
 audio/aac = execm python rclaudio
 audio/x-karaoke = execm python rclkar
-image/gif = execm python rclimg.py
-image/jp2 = execm python rclimg.py
-image/jpeg = execm python rclimg.py
-image/png = execm python rclimg.py
-image/tiff = execm python rclimg.py
-image/svg+xml = execm python rclsvg.py
-#image/x-xcf = execm perl rclimg
+image/gif = execm rclimg.exe
+image/jp2 = execm rclimg.exe
+image/jpeg = execm rclimg.exe
+image/png = execm rclimg.exe
+image/tiff = execm rclimg.exe
+image/svg+xml = internal xsltproc svg.xsl
+#image/x-xcf = execm rclimg.exe
 inode/symlink = internal
 application/x-zerosize = internal
 inode/x-empty = internal application/x-zerosize
--- a/src/windows/mkinstdir.sh
+++ b/src/windows/mkinstdir.sh
@ -24,36 +24,39 @@ test -d $DESTDIR || mkdir $DESTDIR || fatal cant create $DESTDIR

 # Recoll src tree
 RCL=c:/recoll/src/
+RCLW=$RCL/windows/
+# Recoll dependancies
+RCLDEPS=c:/recolldeps/

 ReleaseBuild=y

-UNRTF=c:/recolldeps/unrtf
-ANTIWORD=c:/recolldeps/antiword
-PYXSLT=C:/recolldeps/pyxslt
-PYEXIV2=C:/recolldeps/pyexiv2
-#LIBXAPIAN=c:/temp/xapian-core-1.2.21/.libs/libxapian-22.dll
-LIBXAPIAN=c:/recolldeps/xapian-core-1.4.5/.libs/libxapian-30.dll
-MUTAGEN=C:/recolldeps/mutagen-1.32/
-EPUB=C:/recolldeps/epub-0.5.2
-FUTURE=C:/recolldeps/python2-future
-ZLIB=c:/recolldeps/zlib-1.2.8
-POPPLER=c:/recolldeps/poppler-0.36/
-LIBWPD=c:/recolldeps/libwpd/libwpd-0.10.0/
-LIBREVENGE=c:/recolldeps/libwpd/librevenge-0.0.1.jfd/
-CHM=c:/recolldeps/pychm
-
-# Where to find libgcc_s_dw2-1.dll for progs which need it copied
-gccpath=`which gcc`
-MINGWBIN=`dirname $gccpath`
+PYTHON=${RCLDEPS}py-python3
+UNRTF=${RCLDEPS}unrtf
+ANTIWORD=${RCLDEPS}antiword
+PYXSLT=${RCLDEPS}pyxslt
+PYEXIV2=${RCLDEPS}pyexiv2
+LIBXAPIAN=${RCLDEPS}xapian-core-1.4.5/.libs/libxapian-30.dll
+MUTAGEN=${RCLDEPS}mutagen-1.32/
+EPUB=${RCLDEPS}epub-0.5.2
+FUTURE=${RCLDEPS}python2-future
+ZLIB=${RCLDEPS}zlib-1.2.8
+POPPLER=${RCLDEPS}poppler-0.36/
+LIBWPD=${RCLDEPS}libwpd/libwpd-0.10.0/
+LIBREVENGE=${RCLDEPS}libwpd/librevenge-0.0.1.jfd/
+CHM=${RCLDEPS}pychm
+MISC=${RCLDEPS}misc

 # Where to copy the Qt Dlls from:
 QTBIN=C:/Qt/Qt5.8.0/5.8/mingw53_32/bin
+QTGCCBIN=C:/qt/Qt5.8.0/Tools/mingw530_32/bin/
+# Where to find libgcc_s_dw2-1.dll for progs which need it copied
+MINGWBIN=$QTBIN
+PATH=$MINGWBIN:$QTGCCBIN:$PATH
+export PATH

 # Qt arch
 QTA=Desktop_Qt_5_8_0_MinGW_32bit

-RCLW=$RCL/windows/
-
 if test X$ReleaseBuild = X'y'; then
    qtsdir=release
 else
@ -73,9 +76,14 @@ RCLS=$RCLW/build-rclstartw-${QTA}-${qtsdir}/${qtsdir}/rclstartw.exe

 ################
 # Script:
-
 FILTERS=$DESTDIR/Share/filters

+fatal()
+{
+    echo $*
+    exit 1
+}
+
 # checkcopy. 
 chkcp()
 {
@ -115,7 +123,12 @@ copyzlib()
 {
    chkcp $ZLIB/zlib1.dll $DESTDIR
 }
-
+copypython()
+{
+    mkdir -p $DESTDIR/Share/filters/python
+    cp -rp $PYTHON/* $DESTDIR/Share/filters/python
+    chkcp $PYTHON/python.exe $DESTDIR/Share/filters/python/python.exe
+}
 copyrecoll()
 {
 #    bindir=$RCL/windows/$PLATFORM/$CONFIGURATION/
@ -127,6 +140,7 @@ copyrecoll()
    chkcp $RCLIDX $DESTDIR
    chkcp $RCLQ $DESTDIR 
    chkcp $RCLS $DESTDIR 
+    chkcp $MINGWBIN/libgcc_s_dw2-1.dll $DESTDIR

    chkcp $RCL/COPYING                  $DESTDIR/COPYING.txt
    chkcp $RCL/doc/user/usermanual.html $DESTDIR/Share/doc
@ -143,7 +157,9 @@ copyrecoll()

    chkcp $RCL/python/recoll/recoll/rclconfig.py $FILTERS
    chkcp $RCL/python/recoll/recoll/conftree.py $FILTERS
-    chkcp $RCL/filters/*       $FILTERS 
+    chkcp $RCL/filters/*       $FILTERS
+    rm $FILTERS/rclimg $FILTERS/rclimg.py
+    chkcp $RCLDEPS/rclimg/rclimg.exe $FILTERS
    chkcp $RCL/qtgui/mtpics/*  $DESTDIR/Share/images
    chkcp $RCL/qtgui/i18n/*.qm $DESTDIR/Share/translations
 }
@ -169,9 +185,8 @@ copyunrtf()
    chkcp  $bindir/unrtf.exe               $FILTERS
    chkcp  $UNRTF/outputs/*.conf           $FILTERS/Share
    chkcp  $UNRTF/outputs/SYMBOL.charmap   $FILTERS/Share
-    # libiconv2 is not present in qt, get it from mingw direct. is C, should
-    # be compatible
-    chkcp c:/MinGW/bin/libiconv-2.dll $FILTERS
+    # libiconv-2 originally comes from mingw
+    chkcp $MISC/libiconv-2.dll $FILTERS
 }

 copymutagen()
@ -187,6 +202,10 @@ copyepub()
    # chkcp to check that epub is where we think it is
    chkcp $EPUB/build/lib/epub/opf.py $FILTERS/epub
 }
+
+# We used to copy the future module to the filters dir, but it is now
+# part of the origin Python tree in recolldeps. (2 dirs:
+# site-packages/builtins, site-packages/future)
 copyfuture()
 {
    cp -rp $FUTURE/future $FILTERS/
@ -246,6 +265,18 @@ for d in doc examples filters images translations; do
        fatal mkdir $d failed
 done

+
+# First check that the config is ok
+ cmp -s $RCL/common/autoconfig.h $RCL/common/autoconfig-win.h || \
+    fatal autoconfig.h and autoconfig-win.h differ
+VERSION=`cat $RCL/VERSION`
+CFVERS=`grep PACKAGE_VERSION $RCL/common/autoconfig.h | \
+cut -d ' ' -f 3 | sed -e 's/"//g'`
+test "$VERSION" = "$CFVERS" ||
+    fatal Versions in VERSION and autoconfig.h differ
+
+echo Packaging version $CFVERS
+
 # copyrecoll must stay before copyqt so that windeployqt can do its thing
 copyrecoll
 copyqt
@ -255,9 +286,10 @@ copypoppler
 copyantiword
 copyunrtf
 copyxslt
-copyfuture
+#copyfuture
 copymutagen
 copyepub
-copypyexiv2
+#copypyexiv2
 copywpd
-copychm
+#copychm
+copypython
--- a/src/windows/qmkrecoll/librecoll.pro
+++ b/src/windows/qmkrecoll/librecoll.pro
@ -14,6 +14,13 @@ DEFINES -= UNICODE
 DEFINES -= _UNICODE
 DEFINES += _MBCS
 DEFINES += PSAPI_VERSION=1
+DEFINES += READFILE_ENABLE_MINIZ
+DEFINES += READFILE_ENABLE_MD5
+DEFINES += READFILE_ENABLE_ZLIB
+
+# This is necessary to avoid an undefined impl__xmlFree.
+# See comment in libxml/xmlexports.h
+DEFINES += LIBXML_STATIC

 SOURCES += \
 ../../aspell/rclaspell.cpp \
@ -50,6 +57,7 @@ SOURCES += \
 ../../internfile/mh_mail.cpp \
 ../../internfile/mh_mbox.cpp \
 ../../internfile/mh_text.cpp \
+../../internfile/mh_xslt.cpp \
 ../../internfile/mimehandler.cpp \
 ../../internfile/myhtmlparse.cpp \
 ../../internfile/txtdcode.cpp \
@ -105,6 +113,7 @@ SOURCES += \
 ../../utils/md5.cpp \
 ../../utils/md5ut.cpp \
 ../../utils/mimeparse.cpp \
+../../utils/miniz.cpp \
 ../../utils/pathut.cpp \
 ../../utils/pxattr.cpp \
 ../../utils/rclionice.cpp \
@ -114,8 +123,7 @@ SOURCES += \
 ../../utils/strmatcher.cpp \
 ../../utils/transcode.cpp \
 ../../utils/wipedir.cpp \
-../../windows/strptime.cpp \
-../../windows/dirent.c
+../../windows/strptime.cpp

 INCLUDEPATH += ../../common ../../index ../../internfile ../../query \
            ../../unac ../../utils ../../aspell ../../rcldb ../../qtgui \
@ -129,10 +137,16 @@ windows {
    contains(QMAKE_CC, cl){
        # Visual Studio
    }
-  LIBS += c:/temp/xapian-core-1.4.5/.libs/libxapian-30.dll \
-      c:/temp/zlib-1.2.8/zlib1.dll -liconv -lshlwapi -lpsapi -lkernel32
+  LIBS += C:/recolldeps/libxslt/libxslt-1.1.29/win32/bin.mingw/libxslt.a \
+          C:/recolldeps/libxml2/libxml2-2.9.4+dfsg1/win32/bin.mingw/libxml2.a \
+          c:/recolldeps/xapian-core-1.4.5/.libs/libxapian-30.dll \
+          c:/recolldeps/zlib-1.2.8/zlib1.dll \
+          -liconv -lshlwapi -lpsapi -lkernel32
  INCLUDEPATH += ../../windows \
-            C:/temp/xapian-core-1.4.5/include
+          C:/recolldeps/xapian-core-1.4.5/include \
+          C:/recolldeps/libxslt/libxslt-1.1.29/ \
+          C:/recolldeps/libxml2/libxml2-2.9.4+dfsg1/include
+
 }

 unix {
--- a/src/windows/rclstartw.cpp
+++ b/src/windows/rclstartw.cpp
@ -19,6 +19,9 @@
 #include <shellapi.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include "safewindows.h"
+#include "pathut.h"
+#include "transcode.h"

 using namespace std;

@ -41,6 +44,10 @@ int op_flags;

 int main(int argc, char *argv[])
 {
+    int wargc;
+    wchar_t **wargv = CommandLineToArgvW(GetCommandLineW(), &wargc);
+
+    // Yes we could use wargv
    thisprog = argv[0];
    argc--; argv++;
    int imode = 0;
@ -62,7 +69,9 @@ int main(int argc, char *argv[])
    if (argc != 1) {
        Usage();
    }
-    char *fn = strdup(argv[0]);
+
+    wchar_t *wfn = wargv[1];
+
    // Do we need this ?
    //https://msdn.microsoft.com/en-us/library/windows/desktop/bb762153%28v=vs.85%29.aspx
    //CoInitializeEx(NULL, COINIT_APARTMENTTHREADED | COINIT_DISABLE_OLE1DDE);
@ -73,9 +82,10 @@ int main(int argc, char *argv[])
    default: wmode = SW_SHOWNORMAL;  break;
    }
    
-    int ret = (int)ShellExecute(NULL, "open", fn, NULL, NULL, wmode);
+    int ret = (int)ShellExecuteW(NULL, L"open", wfn, NULL, NULL, wmode);
    if (ret) {
        fprintf(stderr, "ShellExecute returned %d\n", ret);
    }
+    LocalFree(wargv);
    return ret;
 }
--- a/src/windows/recoll-setup.iss
+++ b/src/windows/recoll-setup.iss
@ -2,7 +2,7 @@
 ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!

 #define MyAppName "Recoll"
-#define MyAppVersion "1.24.1-20180517-96c6fd"
+#define MyAppVersion "1.25.0-20190125-540140bd"
 #define MyAppPublisher "Recoll.org"
 #define MyAppURL "http://www.recoll.org"
 #define MyAppExeName "recoll.exe"