From a4fd4ee5bec20ca038670803484a5d5792c28250 Mon Sep 17 00:00:00 2001
From: Jean-Francois Dockes <jfd@recoll.org>
Date: Mon, 21 Mar 2016 12:55:31 +0100
Subject: [PATCH] moved code around to make smallut and pathut less
 recoll-specific and reusable. No actual changes

---
 src/Makefile.am                |    3 +
 src/common/rclconfig.cpp       |    3 +-
 src/common/rclinit.cpp         |    4 +-
 src/common/rclinit.h           |   27 +-
 src/index/beaglequeue.cpp      |    1 +
 src/index/fsindexer.cpp        |    1 +
 src/index/recollindex.cpp      |    1 +
 src/internfile/uncomp.h        |    1 +
 src/python/recoll/pyrecoll.cpp |    1 +
 src/qtgui/preview_load.h       |    1 +
 src/qtgui/recoll.h             |    1 +
 src/query/reslistpager.cpp     |    1 +
 src/rcldb/rcldb.cpp            |   16 +
 src/rcldb/rcldoc.cpp           |   28 +
 src/rcldb/rcldoc.h             |   28 +-
 src/unac/unac.c                |    4 +-
 src/utils/circache.cpp         |   22 +
 src/utils/cpuconf.cpp          |    6 +
 src/utils/hldata.cpp           |   78 +++
 src/utils/hldata.h             |    1 +
 src/utils/pathut.cpp           |  964 +++++++++++------------------
 src/utils/pathut.h             |  132 +---
 src/utils/rclutil.cpp          |  411 +++++++++++++
 src/utils/rclutil.h            |  112 ++++
 src/utils/smallut.cpp          | 1058 ++++++++++++++++----------------
 src/utils/smallut.h            |  409 ++++++------
 26 files changed, 1821 insertions(+), 1493 deletions(-)
 create mode 100644 src/utils/hldata.cpp
 create mode 100644 src/utils/rclutil.cpp
 create mode 100644 src/utils/rclutil.h
diff --git a/src/Makefile.am b/src/Makefile.am
index e32b4e66..6b0f25b7 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -207,6 +207,7 @@ utils/fileudi.h \
 utils/fstreewalk.cpp \
 utils/fstreewalk.h \
 utils/hldata.h \
+utils/hldata.cpp \
 utils/idfile.cpp \
 utils/idfile.h \
 utils/md5.cpp \
@@ -224,6 +225,8 @@ utils/pxattr.cpp \
 utils/pxattr.h \
 utils/rclionice.cpp \
 utils/rclionice.h \
+utils/rclutil.h \
+utils/rclutil.cpp \
 utils/readfile.cpp \
 utils/readfile.h \
 utils/refcntr.h \
diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp
index 638308ed..a26de74b 100644
--- a/src/common/rclconfig.cpp
+++ b/src/common/rclconfig.cpp
@@ -39,6 +39,7 @@
 
 #include "cstr.h"
 #include "pathut.h"
+#include "rclutil.h"
 #include "rclconfig.h"
 #include "conftree.h"
 #include "debuglog.h"
@@ -144,7 +145,7 @@ RclConfig::RclConfig(const string *argcnf)
     }
 
     // Compute our data dir name, typically /usr/local/share/recoll
-    m_datadir = path_sharedatadir();
+    m_datadir = path_pkgdatadir();
     // We only do the automatic configuration creation thing for the default
     // config dir, not if it was specified through -c or RECOLL_CONFDIR
     bool autoconfdir = false;
diff --git a/src/common/rclinit.cpp b/src/common/rclinit.cpp
index 145a7520..97a08fed 100644
--- a/src/common/rclinit.cpp
+++ b/src/common/rclinit.cpp
@@ -32,6 +32,7 @@
 #include "rclconfig.h"
 #include "rclinit.h"
 #include "pathut.h"
+#include "rclutil.h"
 #include "unac.h"
 #include "smallut.h"
 #include "execmd.h"
@@ -318,6 +319,8 @@ RclConfig *recollinit(RclInitFlags flags,
     // Init smallut and pathut static values
     pathut_init_mt();
     smallut_init_mt();
+    rclutil_init_mt();
+    
     // Init execmd.h static PATH and PATHELT splitting
     {string bogus;
         ExecCmd::which("nosuchcmd", bogus);
@@ -389,4 +392,3 @@ bool recoll_ismainthread()
     return pthread_equal(pthread_self(), mainthread_id);
 }
 
-
diff --git a/src/common/rclinit.h b/src/common/rclinit.h
index 5a41f67e..470da903 100644
--- a/src/common/rclinit.h
+++ b/src/common/rclinit.h
@@ -18,40 +18,39 @@
 #define _RCLINIT_H_INCLUDED_
 
 #include <string>
-#ifndef NO_NAMESPACES
-using std::string;
-#endif
 
 class RclConfig;
 /**
  * Initialize by reading configuration, opening log file, etc.
- * 
+ *
  * This must be called from the main thread before starting any others. It sets
  * up the global signal handling. other threads must call recoll_threadinit()
  * when starting.
  *
- * @param flags   misc modifiers. These are currently only used to customize 
+ * @param flags   misc modifiers. These are currently only used to customize
  *      the log file and verbosity.
  * @param cleanup function to call before exiting (atexit)
- * @param sigcleanup function to call on terminal signal (INT/HUP...) This 
- *       should typically set a flag which tells the program (recoll, 
- *       recollindex etc.. to exit as soon as possible (after closing the db, 
+ * @param sigcleanup function to call on terminal signal (INT/HUP...) This
+ *       should typically set a flag which tells the program (recoll,
+ *       recollindex etc.. to exit as soon as possible (after closing the db,
  *       etc.). cleanup will then be called by exit().
  * @param reason in case of error: output string explaining things
  * @param argcnf Configuration directory name from the command line (overriding
  *               default and environment
  * @return the parsed configuration.
  */
-enum RclInitFlags {RCLINIT_NONE=0, RCLINIT_DAEMON=1, RCLINIT_IDX=2};
+enum RclInitFlags {RCLINIT_NONE = 0, RCLINIT_DAEMON = 1, RCLINIT_IDX = 2};
 extern RclConfig *recollinit(RclInitFlags flags,
-			     void (*cleanup)(void), void (*sigcleanup)(int), 
-			     string &reason, const string *argcnf = 0);
-inline RclConfig *recollinit(void (*cleanup)(void), void (*sigcleanup)(int), 
-			     string &reason, const string *argcnf = 0) {
+                             void (*cleanup)(void), void (*sigcleanup)(int),
+                             std::string& reason, const string *argcnf = 0);
+inline RclConfig *recollinit(void (*cleanup)(void), void (*sigcleanup)(int),
+                             std::string& reason,
+                             const std::string *argcnf = 0)
+{
     return recollinit(RCLINIT_NONE, cleanup, sigcleanup, reason, argcnf);
 }
 
-// Threads need to call this to block signals.  
+// Threads need to call this to block signals.
 // The main thread handles all signals.
 extern void recoll_threadinit();
 
diff --git a/src/index/beaglequeue.cpp b/src/index/beaglequeue.cpp
index f6217f70..cf78b717 100644
--- a/src/index/beaglequeue.cpp
+++ b/src/index/beaglequeue.cpp
@@ -23,6 +23,7 @@
 
 #include "cstr.h"
 #include "pathut.h"
+#include "rclutil.h"
 #include "debuglog.h"
 #include "fstreewalk.h"
 #include "beaglequeue.h"
diff --git a/src/index/fsindexer.cpp b/src/index/fsindexer.cpp
index 55fc338f..6b6ea995 100644
--- a/src/index/fsindexer.cpp
+++ b/src/index/fsindexer.cpp
@@ -28,6 +28,7 @@
 
 #include "cstr.h"
 #include "pathut.h"
+#include "rclutil.h"
 #include "conftree.h"
 #include "rclconfig.h"
 #include "fstreewalk.h"
diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp
index 977c9cce..6f9a452f 100644
--- a/src/index/recollindex.cpp
+++ b/src/index/recollindex.cpp
@@ -42,6 +42,7 @@ using namespace std;
 #include "smallut.h"
 #include "chrono.h"
 #include "pathut.h"
+#include "rclutil.h"
 #include "rclmon.h"
 #include "x11mon.h"
 #include "cancelcheck.h"
diff --git a/src/internfile/uncomp.h b/src/internfile/uncomp.h
index 1f2f4d6b..ab7c55a4 100644
--- a/src/internfile/uncomp.h
+++ b/src/internfile/uncomp.h
@@ -21,6 +21,7 @@
 #include <string>
 
 #include "pathut.h"
+#include "rclutil.h"
 #include "ptmutex.h"
 
 /// Uncompression script interface.
diff --git a/src/python/recoll/pyrecoll.cpp b/src/python/recoll/pyrecoll.cpp
index e414dbeb..56466d22 100644
--- a/src/python/recoll/pyrecoll.cpp
+++ b/src/python/recoll/pyrecoll.cpp
@@ -32,6 +32,7 @@ using namespace std;
 #include "searchdata.h"
 #include "rclquery.h"
 #include "pathut.h"
+#include "rclutil.h"
 #include "wasatorcl.h"
 #include "debuglog.h"
 #include "pathut.h"
diff --git a/src/qtgui/preview_load.h b/src/qtgui/preview_load.h
index d1e8dad4..328c4ba1 100644
--- a/src/qtgui/preview_load.h
+++ b/src/qtgui/preview_load.h
@@ -23,6 +23,7 @@
 
 #include "rcldoc.h"
 #include "pathut.h"
+#include "rclutil.h"
 #include "rclconfig.h"
 
 /* 
diff --git a/src/qtgui/recoll.h b/src/qtgui/recoll.h
index 429dab88..b4f035a3 100644
--- a/src/qtgui/recoll.h
+++ b/src/qtgui/recoll.h
@@ -21,6 +21,7 @@
 
 #include "rclconfig.h"
 #include "rcldb.h"
+#include "rclutil.h"
 #include "ptmutex.h"
 
 #include <QString>
diff --git a/src/query/reslistpager.cpp b/src/query/reslistpager.cpp
index fdcfad14..7468a5ab 100644
--- a/src/query/reslistpager.cpp
+++ b/src/query/reslistpager.cpp
@@ -34,6 +34,7 @@ using std::list;
 #include "debuglog.h"
 #include "rclconfig.h"
 #include "smallut.h"
+#include "rclutil.h"
 #include "plaintorich.h"
 #include "mimehandler.h"
 
diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp
index c9c412c9..6a50ffa4 100644
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@@ -42,6 +42,7 @@ using namespace std;
 #include "unacpp.h"
 #include "conftree.h"
 #include "pathut.h"
+#include "rclutil.h"
 #include "smallut.h"
 #include "chrono.h"
 #include "utf8iter.h"
@@ -126,6 +127,21 @@ static inline string make_parentterm(const string& udi)
     return pterm;
 }
 
+static void utf8truncate(string& s, int maxlen)
+{
+    if (s.size() <= string::size_type(maxlen)) {
+        return;
+    }
+    Utf8Iter iter(s);
+    string::size_type pos = 0;
+    while (iter++ != string::npos)
+        if (iter.getBpos() < string::size_type(maxlen)) {
+            pos = iter.getBpos();
+        }
+
+    s.erase(pos);
+}
+
 Db::Native::Native(Db *db) 
     : m_rcldb(db), m_isopen(false), m_iswritable(false),
       m_noversionwrite(false)
diff --git a/src/rcldb/rcldoc.cpp b/src/rcldb/rcldoc.cpp
index 2dcaa05f..3858529f 100644
--- a/src/rcldb/rcldoc.cpp
+++ b/src/rcldb/rcldoc.cpp
@@ -14,9 +14,11 @@
  *   Free Software Foundation, Inc.,
  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
+#include "autoconfig.h"
 
 #include "rcldoc.h"
 #include "debuglog.h"
+#include "rclutil.h"
 
 namespace Rcl {
     const string Doc::keyabs("abstract");
@@ -70,5 +72,31 @@ namespace Rcl {
         if (dotext)
             LOGDEB(("Rcl::Doc::dump: text: \n[%s]\n", text.c_str()));
     }
+
+    // Copy ensuring no shared string data, for threading issues.
+    void Doc::copyto(Doc *d) const
+    {
+	d->url.assign(url.begin(), url.end());
+        d->idxurl.assign(idxurl.begin(), idxurl.end());
+        d->idxi = idxi;
+	d->ipath.assign(ipath.begin(), ipath.end());
+	d->mimetype.assign(mimetype.begin(), mimetype.end());
+	d->fmtime.assign(fmtime.begin(), fmtime.end());
+	d->dmtime.assign(dmtime.begin(), dmtime.end());
+	d->origcharset.assign(origcharset.begin(), origcharset.end());
+        map_ss_cp_noshr(meta, &d->meta);
+	d->syntabs = syntabs;
+	d->pcbytes.assign(pcbytes.begin(), pcbytes.end());
+	d->fbytes.assign(fbytes.begin(), fbytes.end());
+	d->dbytes.assign(dbytes.begin(), dbytes.end());
+	d->sig.assign(sig.begin(), sig.end());
+        d->text.assign(text.begin(), text.end());
+	d->pc = pc;
+	d->xdocid = xdocid;
+	d->idxi = idxi;
+	d->haspages = haspages;
+	d->haschildren = haschildren;
+	d->onlyxattr = onlyxattr;
+    }
 }
 
diff --git a/src/rcldb/rcldoc.h b/src/rcldb/rcldoc.h
index 120d9ae8..08a08dd7 100644
--- a/src/rcldb/rcldoc.h
+++ b/src/rcldb/rcldoc.h
@@ -163,33 +163,11 @@ class Doc {
 	onlyxattr = false;
     }
     // Copy ensuring no shared string data, for threading issues.
-    void copyto(Doc *d) const {
-	d->url.assign(url.begin(), url.end());
-        d->idxurl.assign(idxurl.begin(), idxurl.end());
-        d->idxi = idxi;
-	d->ipath.assign(ipath.begin(), ipath.end());
-	d->mimetype.assign(mimetype.begin(), mimetype.end());
-	d->fmtime.assign(fmtime.begin(), fmtime.end());
-	d->dmtime.assign(dmtime.begin(), dmtime.end());
-	d->origcharset.assign(origcharset.begin(), origcharset.end());
-        map_ss_cp_noshr(meta, &d->meta);
-	d->syntabs = syntabs;
-	d->pcbytes.assign(pcbytes.begin(), pcbytes.end());
-	d->fbytes.assign(fbytes.begin(), fbytes.end());
-	d->dbytes.assign(dbytes.begin(), dbytes.end());
-	d->sig.assign(sig.begin(), sig.end());
-        d->text.assign(text.begin(), text.end());
-	d->pc = pc;
-	d->xdocid = xdocid;
-	d->idxi = idxi;
-	d->haspages = haspages;
-	d->haschildren = haschildren;
-	d->onlyxattr = onlyxattr;
-    }
+    void copyto(Doc *d) const;
+
     Doc()
 	: idxi(0), syntabs(false), pc(0), xdocid(0),
-	  haspages(false), haschildren(false), onlyxattr(false)
-    {
+	  haspages(false), haschildren(false), onlyxattr(false) {
     }
     /** Get value for named field. If value pointer is 0, just test existence */
     bool getmeta(const string& nm, string *value = 0) const
diff --git a/src/unac/unac.c b/src/unac/unac.c
index 1c310831..d1c1bd44 100644
--- a/src/unac/unac.c
+++ b/src/unac/unac.c
@@ -32,10 +32,12 @@
 #include <iostream>
 #include UNORDERED_MAP_INCLUDE
 
-using std::string;
 
 #include "smallut.h"
 
+using std::string;
+using std::vector;
+
 /* 
    Storage for the exception translations. These are chars which
    should not be translated according to what UnicodeData says, but
diff --git a/src/utils/circache.cpp b/src/utils/circache.cpp
index e3dc8fe0..5a54038e 100644
--- a/src/utils/circache.cpp
+++ b/src/utils/circache.cpp
@@ -71,6 +71,28 @@ typedef unsigned char UCHAR;
 typedef unsigned int UINT;
 typedef unsigned long ULONG;
 
+/** Temp buffer with automatic deallocation */
+struct TempBuf {
+    TempBuf()
+        : m_buf(0) {
+    }
+    TempBuf(int n) {
+        m_buf = (char *)malloc(n);
+    }
+    ~TempBuf() {
+        if (m_buf) {
+            free(m_buf);
+        }
+    }
+    char *setsize(int n) {
+        return (m_buf = (char *)realloc(m_buf, n));
+    }
+    char *buf() {
+        return m_buf;
+    }
+    char *m_buf;
+};
+
 static bool inflateToDynBuf(void *inp, UINT inlen, void **outpp, UINT *outlenp);
 
 /*
diff --git a/src/utils/cpuconf.cpp b/src/utils/cpuconf.cpp
index 378d0c38..c715a1b5 100644
--- a/src/utils/cpuconf.cpp
+++ b/src/utils/cpuconf.cpp
@@ -18,10 +18,16 @@
 #ifndef TEST_CPUCONF
 
 #include "autoconfig.h"
+
+#include <stdlib.h>
+
 #include "cpuconf.h"
 #include "execmd.h"
 #include "smallut.h"
 
+using std::string;
+using std::vector;
+
 #if defined(__gnu_linux__) 
 bool getCpuConf(CpuConf& conf)
 {
diff --git a/src/utils/hldata.cpp b/src/utils/hldata.cpp
new file mode 100644
index 00000000..44fcef94
--- /dev/null
+++ b/src/utils/hldata.cpp
@@ -0,0 +1,78 @@
+/* Copyright (C) 2016 J.F.Dockes
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the
+ *   Free Software Foundation, Inc.,
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+#include "autoconfig.h"
+
+#include <stdio.h>
+
+#include "hldata.h"
+
+using std::string;
+using std::map;
+
+void HighlightData::toString(string& out)
+{
+    out.append("\nUser terms (orthograph): ");
+    for (std::set<string>::const_iterator it = uterms.begin();
+            it != uterms.end(); it++) {
+        out.append(" [").append(*it).append("]");
+    }
+    out.append("\nUser terms to Query terms:");
+    for (map<string, string>::const_iterator it = terms.begin();
+            it != terms.end(); it++) {
+        out.append("[").append(it->first).append("]->[");
+        out.append(it->second).append("] ");
+    }
+    out.append("\nGroups: ");
+    char cbuf[200];
+    sprintf(cbuf, "Groups size %d grpsugidx size %d ugroups size %d",
+            int(groups.size()), int(grpsugidx.size()), int(ugroups.size()));
+    out.append(cbuf);
+
+    size_t ugidx = (size_t) - 1;
+    for (unsigned int i = 0; i < groups.size(); i++) {
+        if (ugidx != grpsugidx[i]) {
+            ugidx = grpsugidx[i];
+            out.append("\n(");
+            for (unsigned int j = 0; j < ugroups[ugidx].size(); j++) {
+                out.append("[").append(ugroups[ugidx][j]).append("] ");
+            }
+            out.append(") ->");
+        }
+        out.append(" {");
+        for (unsigned int j = 0; j < groups[i].size(); j++) {
+            out.append("[").append(groups[i][j]).append("]");
+        }
+        sprintf(cbuf, "%d", slacks[i]);
+        out.append("}").append(cbuf);
+    }
+    out.append("\n");
+}
+
+void HighlightData::append(const HighlightData& hl)
+{
+    uterms.insert(hl.uterms.begin(), hl.uterms.end());
+    terms.insert(hl.terms.begin(), hl.terms.end());
+    size_t ugsz0 = ugroups.size();
+    ugroups.insert(ugroups.end(), hl.ugroups.begin(), hl.ugroups.end());
+
+    groups.insert(groups.end(), hl.groups.begin(), hl.groups.end());
+    slacks.insert(slacks.end(), hl.slacks.begin(), hl.slacks.end());
+    for (std::vector<size_t>::const_iterator it = hl.grpsugidx.begin();
+            it != hl.grpsugidx.end(); it++) {
+        grpsugidx.push_back(*it + ugsz0);
+    }
+}
diff --git a/src/utils/hldata.h b/src/utils/hldata.h
index 168e6896..93766d8a 100644
--- a/src/utils/hldata.h
+++ b/src/utils/hldata.h
@@ -4,6 +4,7 @@
 #include <vector>
 #include <string>
 #include <set>
+#include <map>
 
 /** Store data about user search terms and their expansions. This is used
  * mostly for highlighting result text and walking the matches, generating 
diff --git a/src/utils/pathut.cpp b/src/utils/pathut.cpp
index 468efd79..fd4d4466 100644
--- a/src/utils/pathut.cpp
+++ b/src/utils/pathut.cpp
@@ -16,38 +16,44 @@
  */
 
 #ifndef TEST_PATHUT
+#ifdef BUILDING_RECOLL
 #include "autoconfig.h"
+#else
+#include "config.h"
+#endif
 
 #include <stdio.h>
+#ifdef _WIN32
+#include "dirent.h"
 #include "safefcntl.h"
 #include "safeunistd.h"
-#include "dirent.h"
-#include "cstr.h"
-#ifdef _WIN32
 #include "safewindows.h"
+#include "safesysstat.h"
 #else
+#include <fcntl.h>
+#include <unistd.h>
 #include <sys/param.h>
 #include <pwd.h>
 #include <sys/file.h>
+#include <sys/stat.h>
+#include <dirent.h>
 #endif
 #include <math.h>
 #include <errno.h>
 #include <sys/types.h>
-#include "safesysstat.h"
-#include "ptmutex.h"
 
 // Let's include all files where statfs can be defined and hope for no
 // conflict...
-#ifdef HAVE_SYS_MOUNT_H 
+#ifdef HAVE_SYS_MOUNT_H
 #include <sys/mount.h>
 #endif
-#ifdef HAVE_SYS_STATFS_H 
+#ifdef HAVE_SYS_STATFS_H
 #include <sys/statfs.h>
 #endif
-#ifdef HAVE_SYS_STATVFS_H 
+#ifdef HAVE_SYS_STATVFS_H
 #include <sys/statvfs.h>
 #endif
-#ifdef HAVE_SYS_VFS_H 
+#ifdef HAVE_SYS_VFS_H
 #include <sys/vfs.h>
 #endif
 
@@ -60,9 +66,6 @@
 #include <vector>
 
 #include "pathut.h"
-#include "transcode.h"
-#include "wipedir.h"
-#include "md5ut.h"
 
 using namespace std;
 
@@ -71,8 +74,9 @@ using namespace std;
 void path_slashize(string& s)
 {
     for (string::size_type i = 0; i < s.size(); i++) {
-        if (s[i] == '\\')
+        if (s[i] == '\\') {
             s[i] = '/';
+        }
     }
 }
 static bool path_strlookslikedrive(const string& s)
@@ -82,14 +86,16 @@ static bool path_strlookslikedrive(const string& s)
 
 static bool path_hasdrive(const string& s)
 {
-    if (s.size() >= 2 && isalpha(s[0]) && s[1] == ':')
+    if (s.size() >= 2 && isalpha(s[0]) && s[1] == ':') {
         return true;
+    }
     return false;
 }
 static bool path_isdriveabs(const string& s)
 {
-    if (s.size() >= 3 && isalpha(s[0]) && s[1] == ':' && s[2] == '/')
+    if (s.size() >= 3 && isalpha(s[0]) && s[1] == ':' && s[2] == '/') {
         return true;
+    }
     return false;
 }
 
@@ -109,7 +115,7 @@ string path_tchartoutf8(TCHAR *text)
     int size = WideCharToMultiByte(CP_UTF8, 0, text, -1, NULL, 0, NULL, NULL);
     if (size > 0) {
         buffer.resize(size);
-        WideCharToMultiByte(CP_UTF8, 0, text, -1, 
+        WideCharToMultiByte(CP_UTF8, 0, text, -1,
                             &buffer[0], int(buffer.size()), NULL, NULL);
     } else {
         return string();
@@ -130,17 +136,18 @@ string path_thisexecpath()
     PathRemoveFileSpec(text);
 #endif
     string path = path_tchartoutf8(text);
-    if (path.empty())
+    if (path.empty()) {
         path = "c:/";
+    }
 
     return path;
 }
 
 string path_wingettempfilename(TCHAR *pref)
 {
-    TCHAR buf[(MAX_PATH +1)*sizeof(TCHAR)];
-    TCHAR dbuf[(MAX_PATH +1)*sizeof(TCHAR)];
-    GetTempPath(MAX_PATH+1, dbuf);
+    TCHAR buf[(MAX_PATH + 1)*sizeof(TCHAR)];
+    TCHAR dbuf[(MAX_PATH + 1)*sizeof(TCHAR)];
+    GetTempPath(MAX_PATH + 1, dbuf);
     GetTempFileName(dbuf, pref, 0, buf);
     // Windows will have created a temp file, we delete it.
     string filename = path_tchartoutf8(buf);
@@ -150,10 +157,11 @@ string path_wingettempfilename(TCHAR *pref)
 }
 #endif
 
-
-bool fsocc(const string &path, int *pc, long long *avmbs)
+#if defined(HAVE_SYS_MOUNT_H) || defined(HAVE_SYS_STATFS_H) || \
+    defined(HAVE_SYS_STATVFS_H) || defined(HAVE_SYS_VFS_H)
+bool fsocc(const string& path, int *pc, long long *avmbs)
 {
-    static const int FSOCC_MB = 1024*1024;
+    static const int FSOCC_MB = 1024 * 1024;
 #ifdef _WIN32
     ULARGE_INTEGER freebytesavail;
     ULARGE_INTEGER totalbytes;
@@ -161,21 +169,23 @@ bool fsocc(const string &path, int *pc, long long *avmbs)
                             &totalbytes, NULL)) {
         return false;
     }
-    if (pc)
+    if (pc) {
         *pc = int((100 * freebytesavail.QuadPart) / totalbytes.QuadPart);
-    if (avmbs)
+    }
+    if (avmbs) {
         *avmbs = int(totalbytes.QuadPart / FSOCC_MB);
+    }
     return true;
 #else
 #ifdef sun
     struct statvfs buf;
     if (statvfs(path.c_str(), &buf) != 0) {
-	return false;
+        return false;
     }
 #else
     struct statfs buf;
     if (statfs(path.c_str(), &buf) != 0) {
-	return false;
+        return false;
     }
 #endif
 
@@ -184,72 +194,26 @@ bool fsocc(const string &path, int *pc, long long *avmbs)
 #define FSOCC_USED (double(buf.f_blocks - buf.f_bfree))
 #define FSOCC_TOTAVAIL (FSOCC_USED + double(buf.f_bavail))
     if (FSOCC_TOTAVAIL > 0) {
-	fpc = 100.0 * FSOCC_USED / FSOCC_TOTAVAIL;
+        fpc = 100.0 * FSOCC_USED / FSOCC_TOTAVAIL;
     }
-    if (pc)
+    if (pc) {
         *pc = int(fpc);
+    }
     if (avmbs) {
-	*avmbs = 0;
-	if (buf.f_bsize > 0) {
-	    int ratio = buf.f_bsize > FSOCC_MB ? buf.f_bsize / FSOCC_MB :
-		FSOCC_MB / buf.f_bsize;
+        *avmbs = 0;
+        if (buf.f_bsize > 0) {
+            int ratio = buf.f_bsize > FSOCC_MB ? buf.f_bsize / FSOCC_MB :
+                        FSOCC_MB / buf.f_bsize;
 
-	    *avmbs = buf.f_bsize > FSOCC_MB ? 
-                ((long long)buf.f_bavail) * ratio :
-		((long long)buf.f_bavail) / ratio;
-	}
+            *avmbs = buf.f_bsize > FSOCC_MB ?
+                     ((long long)buf.f_bavail) * ratio :
+                     ((long long)buf.f_bavail) / ratio;
+        }
     }
     return true;
 #endif
 }
-
-const string& tmplocation()
-{
-    static string stmpdir;
-    if (stmpdir.empty()) {
-        const char *tmpdir = getenv("RECOLL_TMPDIR");
-        if (tmpdir == 0) 
-            tmpdir = getenv("TMPDIR");
-        if (tmpdir == 0) 
-            tmpdir = getenv("TMP");
-        if (tmpdir == 0) 
-            tmpdir = getenv("TEMP");
-        if (tmpdir == 0) {
-#ifdef _WIN32
-            TCHAR bufw[(MAX_PATH+1)*sizeof(TCHAR)];
-            GetTempPath(MAX_PATH+1, bufw);
-            stmpdir = path_tchartoutf8(bufw);
-#else
-            stmpdir = "/tmp";
-#endif
-        } else {
-            stmpdir = tmpdir;
-        }
-        stmpdir = path_canon(stmpdir);
-    }
-
-    return stmpdir;
-}
-
-// Location for sample config, filters, etc. (e.g. /usr/share/recoll/)
-const string& path_sharedatadir()
-{
-    static string datadir;
-    if (datadir.empty()) {
-#ifdef _WIN32
-        datadir = path_cat(path_thisexecpath(), "Share");
-#else
-        const char *cdatadir = getenv("RECOLL_DATADIR");
-        if (cdatadir == 0) {
-            // If not in environment, use the compiled-in constant. 
-            datadir = RECOLL_DATADIR;
-        } else {
-            datadir = cdatadir;
-        }
-#endif
-    }
-    return datadir;
-}
+#endif // we have found an appropriate include file
 
 string path_PATHsep()
 {
@@ -262,150 +226,17 @@ string path_PATHsep()
 #endif
 }
 
-bool maketmpdir(string& tdir, string& reason)
-{
-#ifndef _WIN32
-    tdir = path_cat(tmplocation(), "rcltmpXXXXXX");
-
-    char *cp = strdup(tdir.c_str());
-    if (!cp) {
-	reason = "maketmpdir: out of memory (for file name !)\n";
-	tdir.erase();
-	return false;
-    }
-
-    // There is a race condition between name computation and
-    // mkdir. try to make sure that we at least don't shoot ourselves
-    // in the foot
-#if !defined(HAVE_MKDTEMP) || defined(_WIN32)
-    static PTMutexInit mlock;
-    PTMutexLocker lock(mlock);
-#endif
-
-    if (!
-#ifdef HAVE_MKDTEMP
-	mkdtemp(cp)
-#else
-	mktemp(cp)
-#endif // HAVE_MKDTEMP
-	) {
-	free(cp);
-	reason = "maketmpdir: mktemp failed for [" + tdir + "] : " +
-	    strerror(errno);
-	tdir.erase();
-	return false;
-    }	
-    tdir = cp;
-    free(cp);
-#else // _WIN32
-    // There is a race condition between name computation and
-    // mkdir. try to make sure that we at least don't shoot ourselves
-    // in the foot
-    static PTMutexInit mlock;
-    PTMutexLocker lock(mlock);
-    tdir = path_wingettempfilename(TEXT("rcltmp"));
-#endif
-
-    // At this point the directory does not exist yet except if we used
-    // mkdtemp
-
-#if !defined(HAVE_MKDTEMP) || defined(_WIN32)
-    if (mkdir(tdir.c_str(), 0700) < 0) {
-	reason = string("maketmpdir: mkdir ") + tdir + " failed";
-	tdir.erase();
-	return false;
-    }
-#endif
-
-    return true;
-}
-
-TempFileInternal::TempFileInternal(const string& suffix)
-    : m_noremove(false)
-{
-    // Because we need a specific suffix, can't use mkstemp
-    // well. There is a race condition between name computation and
-    // file creation. try to make sure that we at least don't shoot
-    // our own selves in the foot. maybe we'll use mkstemps one day.
-    static PTMutexInit mlock;
-    PTMutexLocker lock(mlock);
-
-#ifndef _WIN32
-    string filename = path_cat(tmplocation(), "rcltmpfXXXXXX");
-    char *cp = strdup(filename.c_str());
-    if (!cp) {
-	m_reason = "Out of memory (for file name !)\n";
-	return;
-    }
-
-    // Using mkstemp this way is awful (bot the suffix adding and
-    // using mkstemp() instead of mktemp just to avoid the warnings)
-    int fd;
-    if ((fd = mkstemp(cp)) < 0) {
-	free(cp);
-	m_reason = "TempFileInternal: mkstemp failed\n";
-	return;
-    }
-    close(fd);
-    unlink(cp);
-    filename = cp;
-    free(cp);
-#else
-    string filename = path_wingettempfilename(TEXT("recoll"));
-#endif
-
-    m_filename = filename + suffix;
-    if (close(open(m_filename.c_str(), O_CREAT|O_EXCL, 0600)) != 0) {
-	m_reason = string("Could not open/create") + m_filename;
-	m_filename.erase();
-    }
-}
-
-TempFileInternal::~TempFileInternal()
-{
-    if (!m_filename.empty() && !m_noremove)
-	unlink(m_filename.c_str());
-}
-
-TempDir::TempDir()
-{
-    if (!maketmpdir(m_dirname, m_reason)) {
-	m_dirname.erase();
-	return;
-    }
-}
-
-TempDir::~TempDir()
-{
-    if (!m_dirname.empty()) {
-	(void)wipedir(m_dirname, true, true);
-	m_dirname.erase();
-    }
-}
-
-bool TempDir::wipe()
-{
-    if (m_dirname.empty()) {
-	m_reason = "TempDir::wipe: no directory !\n";
-	return false;
-    }
-    if (wipedir(m_dirname, false, true)) {
-	m_reason = "TempDir::wipe: wipedir failed\n";
-	return false;
-    }
-    return true;
-}
-
-void path_catslash(string &s)
+void path_catslash(string& s)
 {
 #ifdef _WIN32
     path_slashize(s);
 #endif
-    if (s.empty() || s[s.length() - 1] != '/')
-	s += '/';
+    if (s.empty() || s[s.length() - 1] != '/') {
+        s += '/';
+    }
 }
 
-string path_cat(const string &s1, const string &s2)
+string path_cat(const string& s1, const string& s2)
 {
     string res = s1;
     path_catslash(res);
@@ -413,7 +244,7 @@ string path_cat(const string &s1, const string &s2)
     return res;
 }
 
-string path_getfather(const string &s)
+string path_getfather(const string& s)
 {
     string father = s;
 #ifdef _WIN32
@@ -421,62 +252,69 @@ string path_getfather(const string &s)
 #endif
 
     // ??
-    if (father.empty())
-	return "./";
+    if (father.empty()) {
+        return "./";
+    }
 
-    if (path_isroot(father))
+    if (path_isroot(father)) {
         return father;
-    
+    }
+
     if (father[father.length() - 1] == '/') {
-	// Input ends with /. Strip it, root special case was tested above
-	father.erase(father.length()-1);
+        // Input ends with /. Strip it, root special case was tested above
+        father.erase(father.length() - 1);
     }
 
     string::size_type slp = father.rfind('/');
-    if (slp == string::npos)
-	return "./";
+    if (slp == string::npos) {
+        return "./";
+    }
 
     father.erase(slp);
     path_catslash(father);
     return father;
 }
 
-string path_getsimple(const string &s)
+string path_getsimple(const string& s)
 {
     string simple = s;
 #ifdef _WIN32
     path_slashize(simple);
 #endif
 
-    if (simple.empty())
-	return simple;
+    if (simple.empty()) {
+        return simple;
+    }
 
     string::size_type slp = simple.rfind('/');
-    if (slp == string::npos)
-	return simple;
+    if (slp == string::npos) {
+        return simple;
+    }
 
-    simple.erase(0, slp+1);
+    simple.erase(0, slp + 1);
     return simple;
 }
 
-string path_basename(const string &s, const string &suff)
+string path_basename(const string& s, const string& suff)
 {
     string simple = path_getsimple(s);
     string::size_type pos = string::npos;
     if (suff.length() && simple.length() > suff.length()) {
-	pos = simple.rfind(suff);
-	if (pos != string::npos && pos + suff.length() == simple.length())
-	    return simple.substr(0, pos);
-    } 
+        pos = simple.rfind(suff);
+        if (pos != string::npos && pos + suff.length() == simple.length()) {
+            return simple.substr(0, pos);
+        }
+    }
     return simple;
 }
 
 string path_suffix(const string& s)
 {
     string::size_type dotp = s.rfind('.');
-    if (dotp == string::npos)
-	return string();
-    return s.substr(dotp+1);
+    if (dotp == string::npos) {
+        return string();
+    }
+    return s.substr(dotp + 1);
 }
 
 string path_home()
@@ -507,11 +345,12 @@ string path_home()
 
     struct passwd *entry = getpwuid(uid);
     if (entry == 0) {
-	const char *cp = getenv("HOME");
-	if (cp)
-	    return cp;
-	else 
-	return "/";
+        const char *cp = getenv("HOME");
+        if (cp) {
+            return cp;
+        } else {
+            return "/";
+        }
     }
 
     string homedir = entry->pw_dir;
@@ -539,29 +378,31 @@ string path_homedata()
 #endif
 }
 
-string path_tildexpand(const string &s) 
+string path_tildexpand(const string& s)
 {
-    if (s.empty() || s[0] != '~')
-	return s;
+    if (s.empty() || s[0] != '~') {
+        return s;
+    }
     string o = s;
 #ifdef _WIN32
     path_slashize(o);
 #endif
-    
+
     if (s.length() == 1) {
-	o.replace(0, 1, path_home());
-    } else if  (s[1] == '/') {
-	o.replace(0, 2, path_home());
+        o.replace(0, 1, path_home());
+    } else if (s[1] == '/') {
+        o.replace(0, 2, path_home());
     } else {
-	string::size_type pos = s.find('/');
+        string::size_type pos = s.find('/');
         string::size_type l = (pos == string::npos) ? s.length() - 1 : pos - 1;
 #ifdef _WIN32
         // Dont know what this means. Just replace with HOME
-        o.replace(0, l+1, path_home());
+        o.replace(0, l + 1, path_home());
 #else
-	struct passwd *entry = getpwnam(s.substr(1, l).c_str());
-	if (entry)
-	    o.replace(0, l+1, entry->pw_dir);
+        struct passwd *entry = getpwnam(s.substr(1, l).c_str());
+        if (entry) {
+            o.replace(0, l + 1, entry->pw_dir);
+        }
 #endif
     }
     return o;
@@ -569,39 +410,42 @@ string path_tildexpand(const string &s)
 
 bool path_isroot(const string& path)
 {
-    if (path.size() == 1 && path[0] == '/')
+    if (path.size() == 1 && path[0] == '/') {
         return true;
+    }
 #ifdef _WIN32
     if (path.size() == 3 && isalpha(path[0]) && path[1] == ':' &&
-        (path[2] == '/' || path[2] == '\\'))
+            (path[2] == '/' || path[2] == '\\')) {
         return true;
+    }
 #endif
     return false;
 }
 
-bool path_isabsolute(const string &path)
+bool path_isabsolute(const string& path)
 {
     if (!path.empty() && (path[0] == '/'
 #ifdef _WIN32
                           || path_isdriveabs(path)
 #endif
-            )) {
+                         )) {
         return true;
-    } 
+    }
     return false;
 }
-    
-string path_absolute(const string &is)
+
+string path_absolute(const string& is)
 {
-    if (is.length() == 0)
-	return is;
+    if (is.length() == 0) {
+        return is;
+    }
     string s = is;
     if (!path_isabsolute(s)) {
-	char buf[MAXPATHLEN];
-	if (!getcwd(buf, MAXPATHLEN)) {
-	    return string();
-	}
-	s = path_cat(string(buf), s);
+        char buf[MAXPATHLEN];
+        if (!getcwd(buf, MAXPATHLEN)) {
+            return string();
+        }
+        s = path_cat(string(buf), s);
 #ifdef _WIN32
         path_slashize(s);
 #endif
@@ -610,10 +454,11 @@ string path_absolute(const string &is)
 }
 
 #include <smallut.h>
-string path_canon(const string &is, const string* cwd)
+string path_canon(const string& is, const string* cwd)
 {
-    if (is.length() == 0)
-	return is;
+    if (is.length() == 0) {
+        return is;
+    }
     string s = is;
 #ifdef _WIN32
     path_slashize(s);
@@ -624,34 +469,35 @@ string path_canon(const string &is, const string* cwd)
 #endif
 
     if (!path_isabsolute(s)) {
-	char buf[MAXPATHLEN];
-	const char *cwdp = buf;
-	if (cwd) {
-	    cwdp = cwd->c_str();
-	} else {
-	    if (!getcwd(buf, MAXPATHLEN)) {
-		return string();
-	    }
-	}
-	s = path_cat(string(cwdp), s); 
+        char buf[MAXPATHLEN];
+        const char *cwdp = buf;
+        if (cwd) {
+            cwdp = cwd->c_str();
+        } else {
+            if (!getcwd(buf, MAXPATHLEN)) {
+                return string();
+            }
+        }
+        s = path_cat(string(cwdp), s);
     }
     vector<string> elems;
     stringToTokens(s, elems, "/");
     vector<string> cleaned;
-    for (vector<string>::const_iterator it = elems.begin(); 
-	 it != elems.end(); it++){
-	if (*it == "..") {
-	    if (!cleaned.empty())
-		cleaned.pop_back();
-	} else if (it->empty() || *it == ".") {
-	} else {
-	    cleaned.push_back(*it);
-	}
+    for (vector<string>::const_iterator it = elems.begin();
+            it != elems.end(); it++) {
+        if (*it == "..") {
+            if (!cleaned.empty()) {
+                cleaned.pop_back();
+            }
+        } else if (it->empty() || *it == ".") {
+        } else {
+            cleaned.push_back(*it);
+        }
     }
     string ret;
     if (!cleaned.empty()) {
-	for (vector<string>::const_iterator it = cleaned.begin(); 
-	     it != cleaned.end(); it++) {
+        for (vector<string>::const_iterator it = cleaned.begin();
+                it != cleaned.end(); it++) {
             ret += "/";
 #ifdef _WIN32
             if (it == cleaned.begin() && path_strlookslikedrive(*it)) {
@@ -659,10 +505,10 @@ string path_canon(const string &is, const string* cwd)
                 ret.clear();
             }
 #endif
-	    ret += *it;
-	}
+            ret += *it;
+        }
     } else {
-	ret = "/";
+        ret = "/";
     }
     return ret;
 }
@@ -673,21 +519,22 @@ bool makepath(const string& ipath)
     vector<string> elems;
     stringToTokens(path, elems, "/");
     path = "/";
-    for (vector<string>::const_iterator it = elems.begin(); 
-	 it != elems.end(); it++){
+    for (vector<string>::const_iterator it = elems.begin();
+            it != elems.end(); it++) {
 #ifdef _WIN32
-        if (it == elems.begin() && path_strlookslikedrive(*it))
+        if (it == elems.begin() && path_strlookslikedrive(*it)) {
             path = "";
+        }
 #endif
-	path += *it;
-	// Not using path_isdir() here, because this cant grok symlinks
-	// If we hit an existing file, no worry, mkdir will just fail.
-	if (access(path.c_str(), 0) != 0) {
-	    if (mkdir(path.c_str(), 0700) != 0)  {
-		return false;
-	    }
-	}
-	path += "/";
+        path += *it;
+        // Not using path_isdir() here, because this cant grok symlinks
+        // If we hit an existing file, no worry, mkdir will just fail.
+        if (access(path.c_str(), 0) != 0) {
+            if (mkdir(path.c_str(), 0700) != 0)  {
+                return false;
+            }
+        }
+        path += "/";
     }
     return true;
 }
@@ -695,30 +542,35 @@ bool makepath(const string& ipath)
 bool path_isdir(const string& path)
 {
     struct stat st;
-    if (lstat(path.c_str(), &st) < 0) 
-	return false;
-    if (S_ISDIR(st.st_mode))
-	return true;
+    if (lstat(path.c_str(), &st) < 0) {
+        return false;
+    }
+    if (S_ISDIR(st.st_mode)) {
+        return true;
+    }
     return false;
 }
 
 long long path_filesize(const string& path)
 {
     struct stat st;
-    if (stat(path.c_str(), &st) < 0) 
-	return -1;
+    if (stat(path.c_str(), &st) < 0) {
+        return -1;
+    }
     return (long long)st.st_size;
 }
 
 int path_fileprops(const std::string path, struct stat *stp, bool follow)
 {
-    if (!stp)
+    if (!stp) {
         return -1;
+    }
     memset(stp, 0, sizeof(struct stat));
     struct stat mst;
     int ret = follow ? stat(path.c_str(), &mst) : lstat(path.c_str(), &mst);
-    if (ret != 0)
+    if (ret != 0) {
         return ret;
+    }
     stp->st_size = mst.st_size;
     stp->st_mode = mst.st_mode;
     stp->st_mtime = mst.st_mtime;
@@ -751,7 +603,7 @@ bool path_exists(const string& path)
 29 )
 2A *
 2B +
-2C , 
+2C ,
 2D -
 2E .
 2F /
@@ -788,32 +640,32 @@ string url_encode(const string& url, string::size_type offs)
     string out = url.substr(0, offs);
     const char *cp = url.c_str();
     for (string::size_type i = offs; i < url.size(); i++) {
-	unsigned int c;
-	const char *h = "0123456789ABCDEF";
-	c = cp[i];
-	if (c <= 0x20 || 
-	   c >= 0x7f || 
-	   c == '"' ||
-	   c == '#' ||
-	   c == '%' ||
-	   c == ';' ||
-	   c == '<' ||
-	   c == '>' ||
-	   c == '?' ||
-	   c == '[' ||
-	   c == '\\' ||
-	   c == ']' ||
-	   c == '^' ||
-	   c == '`' ||
-	   c == '{' ||
-	   c == '|' ||
-	   c == '}' ) {
-	    out += '%';
-	    out += h[(c >> 4) & 0xf];
-	    out += h[c & 0xf];
-	} else {
-	    out += char(c);
-	}
+        unsigned int c;
+        const char *h = "0123456789ABCDEF";
+        c = cp[i];
+        if (c <= 0x20 ||
+                c >= 0x7f ||
+                c == '"' ||
+                c == '#' ||
+                c == '%' ||
+                c == ';' ||
+                c == '<' ||
+                c == '>' ||
+                c == '?' ||
+                c == '[' ||
+                c == '\\' ||
+                c == ']' ||
+                c == '^' ||
+                c == '`' ||
+                c == '{' ||
+                c == '|' ||
+                c == '}') {
+            out += '%';
+            out += h[(c >> 4) & 0xf];
+            out += h[c & 0xf];
+        } else {
+            out += char(c);
+        }
     }
     return out;
 }
@@ -822,45 +674,24 @@ string url_gpath(const string& url)
 {
     // Remove the access schema part (or whatever it's called)
     string::size_type colon = url.find_first_of(":");
-    if (colon == string::npos || colon == url.size() - 1)
+    if (colon == string::npos || colon == url.size() - 1) {
         return url;
+    }
     // If there are non-alphanum chars before the ':', then there
     // probably is no scheme. Whatever...
     for (string::size_type i = 0; i < colon; i++) {
-        if (!isalnum(url.at(i)))
+        if (!isalnum(url.at(i))) {
             return url;
+        }
     }
 
     // In addition we canonize the path to remove empty host parts
     // (for compatibility with older versions of recoll where file://
     // was hardcoded, but the local path was used for doc
     // identification.
-    return path_canon(url.substr(colon+1));
+    return path_canon(url.substr(colon + 1));
 }
 
-string url_gpathS(const string& url)
-{
-#ifdef _WIN32
-    string u = url_gpath(url);
-    string nu;
-    if (path_hasdrive(u)) {
-        nu.append(1, '/');
-        nu.append(1, u[0]);
-        if (path_isdriveabs(u)) {
-            nu.append(u.substr(2));
-        } else {
-            // This should be an error really
-            nu.append(1, '/');
-            nu.append(u.substr(2));
-        }
-    }
-    return nu;
-#else
-    return url_gpath(url);
-#endif
-}
-
-
 string url_parentfolder(const string& url)
 {
     // In general, the parent is the directory above the full path
@@ -872,28 +703,20 @@ string url_parentfolder(const string& url)
         parenturl = url_gpath(url);
     }
     return isfileurl ? string("file://") + parenturl :
-        string("http://") + parenturl;
+           string("http://") + parenturl;
 }
 
 
-string path_defaultrecollconfsubdir()
-{
-#ifdef _WIN32
-    return "Recoll";
-#else
-    return ".recoll";
-#endif
-}
-
 // Convert to file path if url is like file:
 // Note: this only works with our internal pseudo-urls which are not
 // encoded/escaped
 string fileurltolocalpath(string url)
 {
-    if (url.find("file://") == 0)
+    if (url.find("file://") == 0) {
         url = url.substr(7, string::npos);
-    else
+    } else {
         return string();
+    }
 
 #ifdef _WIN32
     // Absolute file urls are like: file:///c:/mydir/...
@@ -908,23 +731,26 @@ string fileurltolocalpath(string url)
     // part after # if it is preceded by .html
     string::size_type pos;
     if ((pos = url.rfind(".html#")) != string::npos) {
-        url.erase(pos+5);
+        url.erase(pos + 5);
     } else if ((pos = url.rfind(".htm#")) != string::npos) {
-        url.erase(pos+4);
+        url.erase(pos + 4);
     }
 
     return url;
 }
 
+static const string cstr_fileu("file://");
+
 string path_pathtofileurl(const string& path)
 {
-	// We're supposed to receive a canonic absolute path, but on windows we
-	// may need to add a '/' in front of the drive spec
-	string url(cstr_fileu);
-	if (path.empty() || path[0] != '/')
-		url.push_back('/');
-	url += path;
-	return url;
+    // We're supposed to receive a canonic absolute path, but on windows we
+    // may need to add a '/' in front of the drive spec
+    string url(cstr_fileu);
+    if (path.empty() || path[0] != '/') {
+        url.push_back('/');
+    }
+    url += path;
+    return url;
 }
 
 bool urlisfileurl(const string& url)
@@ -932,17 +758,6 @@ bool urlisfileurl(const string& url)
     return url.find("file://") == 0;
 }
 
-// Printable url: this is used to transcode from the system charset
-// into either utf-8 if transcoding succeeds, or url-encoded
-bool printableUrl(const string &fcharset, const string &in, string &out)
-{
-    int ecnt = 0;
-    if (!transcode(in, out, fcharset, "UTF-8", &ecnt) || ecnt) {
-	out = url_encode(in, 7);
-    }
-    return true;
-}
-
 bool readdir(const string& dir, string& reason, set<string>& entries)
 {
     struct stat st;
@@ -951,37 +766,40 @@ bool readdir(const string& dir, string& reason, set<string>& entries)
     DIR *d = 0;
     statret = lstat(dir.c_str(), &st);
     if (statret == -1) {
-	msg << "readdir: cant stat " << dir << " errno " <<  errno;
-	goto out;
+        msg << "readdir: cant stat " << dir << " errno " <<  errno;
+        goto out;
     }
     if (!S_ISDIR(st.st_mode)) {
-	msg << "readdir: " << dir <<  " not a directory";
-	goto out;
+        msg << "readdir: " << dir <<  " not a directory";
+        goto out;
     }
     if (access(dir.c_str(), R_OK) < 0) {
-	msg << "readdir: no read access to " << dir;
-	goto out;
+        msg << "readdir: no read access to " << dir;
+        goto out;
     }
 
     d = opendir(dir.c_str());
     if (d == 0) {
-	msg << "readdir: cant opendir " << dir << ", errno " << errno;
-	goto out;
+        msg << "readdir: cant opendir " << dir << ", errno " << errno;
+        goto out;
     }
 
     struct dirent *ent;
     while ((ent = readdir(d)) != 0) {
-	if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) 
-	    continue;
-	entries.insert(ent->d_name);
+        if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) {
+            continue;
+        }
+        entries.insert(ent->d_name);
     }
 
 out:
-    if (d)
-	closedir(d);
+    if (d) {
+        closedir(d);
+    }
     reason = msg.str();
-    if (reason.empty())
-	return true;
+    if (reason.empty()) {
+        return true;
+    }
     return false;
 }
 
@@ -992,36 +810,40 @@ out:
 // alone.
 Pidfile::~Pidfile()
 {
-    if (m_fd >= 0)
-	::close(m_fd);
+    if (m_fd >= 0) {
+        ::close(m_fd);
+    }
     m_fd = -1;
 }
 
 pid_t Pidfile::read_pid()
 {
     int fd = ::open(m_path.c_str(), O_RDONLY);
-    if (fd == -1)
-	return (pid_t)-1;
+    if (fd == -1) {
+        return (pid_t) - 1;
+    }
 
     char buf[16];
     int i = read(fd, buf, sizeof(buf) - 1);
     ::close(fd);
-    if (i <= 0)
-	return (pid_t)-1;
+    if (i <= 0) {
+        return (pid_t) - 1;
+    }
     buf[i] = '\0';
     char *endptr;
     pid_t pid = strtol(buf, &endptr, 10);
-    if (endptr != &buf[i])
-	return (pid_t)-1;
+    if (endptr != &buf[i]) {
+        return (pid_t) - 1;
+    }
     return pid;
 }
 
 int Pidfile::flopen()
 {
     const char *path = m_path.c_str();
-    if ((m_fd = ::open(path, O_RDWR|O_CREAT, 0644)) == -1) {
-	m_reason = "Open failed: [" + m_path + "]: " + strerror(errno);
-	return -1;
+    if ((m_fd = ::open(path, O_RDWR | O_CREAT, 0644)) == -1) {
+        m_reason = "Open failed: [" + m_path + "]: " + strerror(errno);
+        return -1;
     }
 
 #ifdef sun
@@ -1031,34 +853,34 @@ int Pidfile::flopen()
     lockdata.l_type = F_WRLCK;
     lockdata.l_whence = SEEK_SET;
     if (fcntl(m_fd, F_SETLK,  &lockdata) != 0) {
-	int serrno = errno;
-	(void)::close(m_fd);
-	errno = serrno;
-	m_reason = "fcntl lock failed";
-	return -1;
+        int serrno = errno;
+        (void)::close(m_fd);
+        errno = serrno;
+        m_reason = "fcntl lock failed";
+        return -1;
     }
 #else
 #ifdef _WIN32
-	return 0;
+    return 0;
 #else
     int operation = LOCK_EX | LOCK_NB;
     if (flock(m_fd, operation) == -1) {
-	int serrno = errno;
-	(void)::close(m_fd);
-	errno = serrno;
-	m_reason = "flock failed";
-	return -1;
+        int serrno = errno;
+        (void)::close(m_fd);
+        errno = serrno;
+        m_reason = "flock failed";
+        return -1;
     }
 #endif // ! win32
 #endif // ! sun
 
     if (ftruncate(m_fd, 0) != 0) {
-	/* can't happen [tm] */
-	int serrno = errno;
-	(void)::close(m_fd);
-	errno = serrno;
-	m_reason = "ftruncate failed";
-	return -1;
+        /* can't happen [tm] */
+        int serrno = errno;
+        (void)::close(m_fd);
+        errno = serrno;
+        m_reason = "ftruncate failed";
+        return -1;
     }
     return 0;
 }
@@ -1066,7 +888,7 @@ int Pidfile::flopen()
 pid_t Pidfile::open()
 {
     if (flopen() < 0) {
-	return read_pid();
+        return read_pid();
     }
     return (pid_t)0;
 }
@@ -1075,15 +897,15 @@ int Pidfile::write_pid()
 {
     /* truncate to allow multiple calls */
     if (ftruncate(m_fd, 0) == -1) {
-	m_reason = "ftruncate failed";
-	return -1;
+        m_reason = "ftruncate failed";
+        return -1;
     }
     char pidstr[20];
     sprintf(pidstr, "%u", int(getpid()));
     lseek(m_fd, 0, 0);
     if (::write(m_fd, pidstr, strlen(pidstr)) != (ssize_t)strlen(pidstr)) {
-	m_reason = "write failed";
-	return -1;
+        m_reason = "write failed";
+        return -1;
     }
     return 0;
 }
@@ -1098,79 +920,10 @@ int Pidfile::remove()
     return unlink(m_path.c_str());
 }
 
-
-// Freedesktop standard paths for cache directory (thumbnails are now in there)
-static const string& xdgcachedir()
-{
-    static string xdgcache;
-    if (xdgcache.empty()) {
-	const char *cp = getenv("XDG_CACHE_HOME");
-	if (cp == 0) 
-	    xdgcache = path_cat(path_home(), ".cache");
-	else
-	    xdgcache = string(cp);
-    }
-    return xdgcache;
-}
-static const string& thumbnailsdir()
-{
-    static string thumbnailsd;
-    if (thumbnailsd.empty()) {
-	thumbnailsd = path_cat(xdgcachedir(), "thumbnails");
-	if (access(thumbnailsd.c_str(), 0) != 0) {
-	    thumbnailsd = path_cat(path_home(), ".thumbnails");
-	}
-    }
-    return thumbnailsd;
-}
-
-// Place for 256x256 files
-static const string thmbdirlarge = "large";
-// 128x128
-static const string thmbdirnormal = "normal";
-
-static void thumbname(const string& url, string& name)
-{
-    string digest;
-    string l_url = url_encode(url);
-    MD5String(l_url, digest);
-    MD5HexPrint(digest, name);
-    name += ".png";
-}
-
-bool thumbPathForUrl(const string& url, int size, string& path)
-{
-    string name;
-    thumbname(url, name);
-    if (size <= 128) {
-	path = path_cat(thumbnailsdir(), thmbdirnormal);
-	path = path_cat(path, name);
-	if (access(path.c_str(), R_OK) == 0) {
-	    return true;
-	}
-    } 
-    path = path_cat(thumbnailsdir(), thmbdirlarge);
-    path = path_cat(path, name);
-    if (access(path.c_str(), R_OK) == 0) {
-	return true;
-    }
-
-    // File does not exist. Path corresponds to the large version at this point,
-    // fix it if needed.
-    if (size <= 128) {
-	path = path_cat(path_home(), thmbdirnormal);
-	path = path_cat(path, name);
-    }
-    return false;
-}
-
 // Call funcs that need static init (not initially reentrant)
 void pathut_init_mt()
 {
     path_home();
-    tmplocation();
-    thumbnailsdir();
-    path_sharedatadir();
 }
 
 
@@ -1185,8 +938,9 @@ void path_to_thumb(const string& _input)
 {
     string input(_input);
     // Make absolute path if needed
-    if (input[0] != '/')
+    if (input[0] != '/') {
         input = path_absolute(input);
+    }
 
     input = string("file://") + path_canon(input);
 
@@ -1197,126 +951,133 @@ void path_to_thumb(const string& _input)
 }
 
 const char *tstvec[] = {"", "/", "/dir", "/dir/", "/dir1/dir2",
-			 "/dir1/dir2",
-			"./dir", "./dir1/", "dir", "../dir", "/dir/toto.c",
-			"/dir/.c", "/dir/toto.txt", "toto.txt1"
-};
+                        "/dir1/dir2",
+                        "./dir", "./dir1/", "dir", "../dir", "/dir/toto.c",
+                        "/dir/.c", "/dir/toto.txt", "toto.txt1"
+                       };
 
 const string ttvec[] = {"/dir", "", "~", "~/sub", "~root", "~root/sub",
-		 "~nosuch", "~nosuch/sub"};
+                        "~nosuch", "~nosuch/sub"
+                       };
 int nttvec = sizeof(ttvec) / sizeof(string);
 
 const char *thisprog;
 
 int main(int argc, const char **argv)
 {
-    thisprog = *argv++;argc--;
+    thisprog = *argv++;
+    argc--;
 
     string s;
     vector<string>::const_iterator it;
 #if 0
-    for (unsigned int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) {
-	cout << tstvec[i] << " Father " << path_getfather(tstvec[i]) << endl;
+    for (unsigned int i = 0; i < sizeof(tstvec) / sizeof(char *); i++) {
+        cout << tstvec[i] << " Father " << path_getfather(tstvec[i]) << endl;
     }
-    for (unsigned int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) {
-	cout << tstvec[i] << " Simple " << path_getsimple(tstvec[i]) << endl;
+    for (unsigned int i = 0; i < sizeof(tstvec) / sizeof(char *); i++) {
+        cout << tstvec[i] << " Simple " << path_getsimple(tstvec[i]) << endl;
     }
-    for (unsigned int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) {
-	cout << tstvec[i] << " Basename " << 
-	    path_basename(tstvec[i], ".txt") << endl;
+    for (unsigned int i = 0; i < sizeof(tstvec) / sizeof(char *); i++) {
+        cout << tstvec[i] << " Basename " <<
+             path_basename(tstvec[i], ".txt") << endl;
     }
 #endif
 
 #if 0
     for (int i = 0; i < nttvec; i++) {
-	cout << "tildexp: '" << ttvec[i] << "' -> '" << 
-	    path_tildexpand(ttvec[i]) << "'" << endl;
+        cout << "tildexp: '" << ttvec[i] << "' -> '" <<
+             path_tildexpand(ttvec[i]) << "'" << endl;
     }
 #endif
 
 #if 0
-    const string canontst[] = {"/dir1/../../..", "/////", "", 
-			       "/dir1/../../.././/////dir2///////",
-			       "../../", 
-			       "../../../../../../../../../../"
-    };
+    const string canontst[] = {"/dir1/../../..", "/////", "",
+                               "/dir1/../../.././/////dir2///////",
+                               "../../",
+                               "../../../../../../../../../../"
+                              };
     unsigned int nttvec = sizeof(canontst) / sizeof(string);
     for (unsigned int i = 0; i < nttvec; i++) {
-	cout << "canon: '" << canontst[i] << "' -> '" << 
-	    path_canon(canontst[i]) << "'" << endl;
+        cout << "canon: '" << canontst[i] << "' -> '" <<
+             path_canon(canontst[i]) << "'" << endl;
     }
-#endif    
+#endif
 #if 0
     if (argc != 2) {
-	cerr << "Usage: trpathut <dir> <pattern>" << endl;
-	exit(1);
+        cerr << "Usage: trpathut <dir> <pattern>" << endl;
+        exit(1);
     }
-    string dir = *argv++;argc--;
-    string pattern =  *argv++;argc--;
+    string dir = *argv++;
+    argc--;
+    string pattern =  *argv++;
+    argc--;
     vector<string> matched = path_dirglob(dir, pattern);
-    for (it = matched.begin(); it != matched.end();it++) {
-	cout << *it << endl;
+    for (it = matched.begin(); it != matched.end(); it++) {
+        cout << *it << endl;
     }
 #endif
 
 #if 0
     if (argc != 1) {
-	fprintf(stderr, "Usage: fsocc: trpathut <path>\n");
-	exit(1);
+        fprintf(stderr, "Usage: fsocc: trpathut <path>\n");
+        exit(1);
     }
-  string path = *argv++;argc--;
+    string path = *argv++;
+    argc--;
 
-  int pc;
-  long long blocks;
-  if (!fsocc(path, &pc, &blocks)) {
-      fprintf(stderr, "fsocc failed\n");
-      return 1;
-  }
-  printf("pc %d, megabytes %ld\n", pc, blocks);
+    int pc;
+    long long blocks;
+    if (!fsocc(path, &pc, &blocks)) {
+        fprintf(stderr, "fsocc failed\n");
+        return 1;
+    }
+    printf("pc %d, megabytes %ld\n", pc, blocks);
 #endif
 
 #if 0
-  Pidfile pidfile("/tmp/pathutpidfile");
-  pid_t pid;
-  if ((pid = pidfile.open()) != 0) {
-      cerr << "open failed. reason: " << pidfile.getreason() << 
-	  " return " << pid << endl;
-      exit(1);
-  }
-  pidfile.write_pid();
-  sleep(10);
-  pidfile.close();
-  pidfile.remove();
+    Pidfile pidfile("/tmp/pathutpidfile");
+    pid_t pid;
+    if ((pid = pidfile.open()) != 0) {
+        cerr << "open failed. reason: " << pidfile.getreason() <<
+             " return " << pid << endl;
+        exit(1);
+    }
+    pidfile.write_pid();
+    sleep(10);
+    pidfile.close();
+    pidfile.remove();
 #endif
 
 #if 0
-  if (argc > 1) {
-      cerr <<  "Usage: thumbpath <filepath>" << endl;
-      exit(1);
-  }
-  string input;
-  if (argc == 1) {
-      input = *argv++;
-      if (input.empty())  {
-          cerr << "Usage: thumbpath <filepath>" << endl;
-          exit(1);
-      }
-      path_to_thumb(input);
-  } else {
-      while (getline(cin, input))
-          path_to_thumb(input);
-  }
+    if (argc > 1) {
+        cerr <<  "Usage: thumbpath <filepath>" << endl;
+        exit(1);
+    }
+    string input;
+    if (argc == 1) {
+        input = *argv++;
+        if (input.empty())  {
+            cerr << "Usage: thumbpath <filepath>" << endl;
+            exit(1);
+        }
+        path_to_thumb(input);
+    } else {
+        while (getline(cin, input)) {
+            path_to_thumb(input);
+        }
+    }
 
-  
-  exit(0);
+
+    exit(0);
 #endif
 
 #if 0
     if (argc != 1) {
-	cerr << "Usage: trpathut <filename>" << endl;
-	exit(1);
+        cerr << "Usage: trpathut <filename>" << endl;
+        exit(1);
     }
-    string fn = *argv++;argc--;
+    string fn = *argv++;
+    argc--;
     string ext = path_suffix(fn);
     cout << "Suffix: [" << ext << "]" << endl;
     return 0;
@@ -1324,10 +1085,11 @@ int main(int argc, const char **argv)
 
 #if 1
     if (argc != 1) {
-	cerr << "Usage: trpathut url" << endl;
-	exit(1);
+        cerr << "Usage: trpathut url" << endl;
+        exit(1);
     }
-    string url = *argv++;argc--;
+    string url = *argv++;
+    argc--;
 
     cout << "File: [" << fileurltolocalpath(url) << "]\n";
     return 0;
diff --git a/src/utils/pathut.h b/src/utils/pathut.h
index 10991edf..cf5ecd8d 100644
--- a/src/utils/pathut.h
+++ b/src/utils/pathut.h
@@ -16,63 +16,54 @@
  */
 #ifndef _PATHUT_H_INCLUDED_
 #define _PATHUT_H_INCLUDED_
-#include "autoconfig.h"
 
 #include <string>
 #include <vector>
 #include <set>
 
-#include MEMORY_INCLUDE
+// Must be called in main thread before starting other threads
+extern void pathut_init_mt();
 
 /// Add a / at the end if none there yet.
-extern void path_catslash(std::string &s);
+extern void path_catslash(std::string& s);
 /// Concatenate 2 paths
-extern std::string path_cat(const std::string &s1, const std::string &s2);
+extern std::string path_cat(const std::string& s1, const std::string& s2);
 /// Get the simple file name (get rid of any directory path prefix
-extern std::string path_getsimple(const std::string &s);
+extern std::string path_getsimple(const std::string& s);
 /// Simple file name + optional suffix stripping
-extern std::string path_basename(const std::string &s, 
-				 const std::string &suff = std::string());
+extern std::string path_basename(const std::string& s,
+                                 const std::string& suff = std::string());
 /// Component after last '.'
-extern std::string path_suffix(const std::string &s);
+extern std::string path_suffix(const std::string& s);
 /// Get the father directory
-extern std::string path_getfather(const std::string &s);
+extern std::string path_getfather(const std::string& s);
 /// Get the current user's home directory
 extern std::string path_home();
-/// Expand ~ at the beginning of std::string 
-extern std::string path_tildexpand(const std::string &s);
+/// Expand ~ at the beginning of std::string
+extern std::string path_tildexpand(const std::string& s);
 /// Use getcwd() to make absolute path if needed. Beware: ***this can fail***
 /// we return an empty path in this case.
-extern std::string path_absolute(const std::string &s);
+extern std::string path_absolute(const std::string& s);
 /// Clean up path by removing duplicated / and resolving ../ + make it absolute
-extern std::string path_canon(const std::string &s, const std::string *cwd=0);
+extern std::string path_canon(const std::string& s, const std::string *cwd = 0);
 /// Use glob(3) to return the file names matching pattern inside dir
-extern std::vector<std::string> path_dirglob(const std::string &dir, 
-				   const std::string pattern);
+extern std::vector<std::string> path_dirglob(const std::string& dir,
+        const std::string pattern);
 /// Encode according to rfc 1738
-extern std::string url_encode(const std::string& url, 
-			      std::string::size_type offs = 0);
-/// Transcode to utf-8 if possible or url encoding, for display.
-extern bool printableUrl(const std::string &fcharset, 
-			 const std::string &in, std::string &out);
+extern std::string url_encode(const std::string& url,
+                              std::string::size_type offs = 0);
 //// Convert to file path if url is like file://. This modifies the
 //// input (and returns a copy for convenience)
 extern std::string fileurltolocalpath(std::string url);
 /// Test for file:/// url
 extern bool urlisfileurl(const std::string& url);
-/// 
+///
 extern std::string url_parentfolder(const std::string& url);
 
 /// Return the host+path part of an url. This is not a general
 /// routine, it does the right thing only in the recoll context
 extern std::string url_gpath(const std::string& url);
 
-/// Same but, in the case of a Windows local path, also turn "c:/" into
-/// "/c/" This should be used only for splitting the path in rcldb, it
-/// would better be local in there, but I prefer to keep all the
-/// system-specific path stuff in pathut
-extern std::string url_gpathS(const std::string& url);
-
 /// Stat parameter and check if it's a directory
 extern bool path_isdir(const std::string& path);
 
@@ -92,37 +83,28 @@ extern int path_fileprops(const std::string path, struct stat *stp,
 
 /// Check that path is traversable and last element exists
 /// Returns true if last elt could be checked to exist. False may mean that
-/// the file/dir does not exist or that an error occurred. 
+/// the file/dir does not exist or that an error occurred.
 extern bool path_exists(const std::string& path);
 
 /// Return separator for PATH environment variable
 extern std::string path_PATHsep();
 
 /// Dump directory
-extern bool readdir(const std::string& dir, std::string& reason, 
-		    std::set<std::string>& entries);
+extern bool readdir(const std::string& dir, std::string& reason,
+                    std::set<std::string>& entries);
 
 /** A small wrapper around statfs et al, to return percentage of disk
-    occupation */
-bool fsocc(const std::string &path, int *pc, // Percent occupied
-	   long long *avmbs = 0 // Mbs available to non-superuser. Mb=1024*1024
-	   );
-
-/// Retrieve the temp dir location: $RECOLL_TMPDIR else $TMPDIR else /tmp
-extern const std::string& tmplocation();
-
-/// Create temporary directory (inside the temp location)
-extern bool maketmpdir(std::string& tdir, std::string& reason);
+    occupation
+    @param[output] pc percent occupied
+    @param[output] avmbs Mbs available to non-superuser. Mb=1024*1024
+*/
+bool fsocc(const std::string& path, int *pc, long long *avmbs = 0);
 
 /// mkdir -p
 extern bool makepath(const std::string& path);
 
-/// Sub-directory for default recoll config (e.g: .recoll)
-extern std::string path_defaultrecollconfsubdir();
 /// Where we create the user data subdirs
 extern std::string path_homedata();
-/// e.g. /usr/share/recoll. Depends on OS and config
-extern const std::string& path_sharedatadir();
 /// Test if path is absolute
 extern bool path_isabsolute(const std::string& s);
 
@@ -137,58 +119,12 @@ extern std::string path_pathtofileurl(const std::string& path);
 void path_slashize(std::string& s);
 #endif
 
-/// Temporary file class
-class TempFileInternal {
-public:
-    TempFileInternal(const std::string& suffix);
-    ~TempFileInternal();
-    const char *filename() 
-    {
-	return m_filename.c_str();
-    }
-    const std::string &getreason() 
-    {
-	return m_reason;
-    }
-    void setnoremove(bool onoff)
-    {
-	m_noremove = onoff;
-    }
-    bool ok() 
-    {
-	return !m_filename.empty();
-    }
-private:
-    std::string m_filename;
-    std::string m_reason;
-    bool m_noremove;
-};
-
-typedef STD_SHARED_PTR<TempFileInternal> TempFile;
-
-/// Temporary directory class. Recursively deleted by destructor.
-class TempDir {
-public:
-    TempDir();
-    ~TempDir();
-    const char *dirname() {return m_dirname.c_str();}
-    const std::string &getreason() {return m_reason;}
-    bool ok() {return !m_dirname.empty();}
-    /// Recursively delete contents but not self.
-    bool wipe();
-private:
-    std::string m_dirname;
-    std::string m_reason;
-    TempDir(const TempDir &) {}
-    TempDir& operator=(const TempDir &) {return *this;};
-};
-
 /// Lock/pid file class. This is quite close to the pidfile_xxx
 /// utilities in FreeBSD with a bit more encapsulation. I'd have used
 /// the freebsd code if it was available elsewhere
 class Pidfile {
 public:
-    Pidfile(const std::string& path)	: m_path(path), m_fd(-1) {}
+    Pidfile(const std::string& path)    : m_path(path), m_fd(-1) {}
     ~Pidfile();
     /// Open/create the pid file.
     /// @return 0 if ok, > 0 for pid of existing process, -1 for other error.
@@ -200,7 +136,9 @@ public:
     int close();
     /// Delete the pid file
     int remove();
-    const std::string& getreason() {return m_reason;}
+    const std::string& getreason() {
+        return m_reason;
+    }
 private:
     std::string m_path;
     int    m_fd;
@@ -209,14 +147,4 @@ private:
     int flopen();
 };
 
-
-
-// Freedesktop thumbnail standard path routine
-// On return, path will have the appropriate value in all cases,
-// returns true if the file already exists
-extern bool thumbPathForUrl(const std::string& url, int size, std::string& path);
-
-// Must be called in main thread before starting other threads
-extern void pathut_init_mt();
-
 #endif /* _PATHUT_H_INCLUDED_ */
diff --git a/src/utils/rclutil.cpp b/src/utils/rclutil.cpp
new file mode 100644
index 00000000..cd4778b8
--- /dev/null
+++ b/src/utils/rclutil.cpp
@@ -0,0 +1,411 @@
+/* Copyright (C) 2016 J.F.Dockes
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the
+ *   Free Software Foundation, Inc.,
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+#ifndef TEST_RCLUTIL
+#include "autoconfig.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "safefcntl.h"
+#include "safeunistd.h"
+#include "dirent.h"
+#include "cstr.h"
+#ifdef _WIN32
+#include "safewindows.h"
+#else
+#include <sys/param.h>
+#include <pwd.h>
+#include <sys/file.h>
+#endif
+#include <math.h>
+#include <errno.h>
+#include <sys/types.h>
+#include "safesysstat.h"
+#include "ptmutex.h"
+
+#include "rclutil.h"
+#include "pathut.h"
+#include "wipedir.h"
+#include "transcode.h"
+#include "md5ut.h"
+
+using namespace std;
+
+
+void map_ss_cp_noshr(const map<string, string> s, map<string, string> *d)
+{
+    for (map<string, string>::const_iterator it = s.begin();
+            it != s.end(); it++) {
+        d->insert(
+            pair<string, string>(string(it->first.begin(), it->first.end()),
+                                 string(it->second.begin(), it->second.end())));
+    }
+}
+
+string path_defaultrecollconfsubdir()
+{
+#ifdef _WIN32
+    return "Recoll";
+#else
+    return ".recoll";
+#endif
+}
+
+// Location for sample config, filters, etc. (e.g. /usr/share/recoll/)
+const string& path_pkgdatadir()
+{
+    static string datadir;
+    if (datadir.empty()) {
+#ifdef _WIN32
+        datadir = path_cat(path_thisexecpath(), "Share");
+#else
+        const char *cdatadir = getenv("RECOLL_DATADIR");
+        if (cdatadir == 0) {
+            // If not in environment, use the compiled-in constant.
+            datadir = RECOLL_DATADIR;
+        } else {
+            datadir = cdatadir;
+        }
+#endif
+    }
+    return datadir;
+}
+
+// Printable url: this is used to transcode from the system charset
+// into either utf-8 if transcoding succeeds, or url-encoded
+bool printableUrl(const string& fcharset, const string& in, string& out)
+{
+    int ecnt = 0;
+    if (!transcode(in, out, fcharset, "UTF-8", &ecnt) || ecnt) {
+        out = url_encode(in, 7);
+    }
+    return true;
+}
+
+string url_gpathS(const string& url)
+{
+#ifdef _WIN32
+    string u = url_gpath(url);
+    string nu;
+    if (path_hasdrive(u)) {
+        nu.append(1, '/');
+        nu.append(1, u[0]);
+        if (path_isdriveabs(u)) {
+            nu.append(u.substr(2));
+        } else {
+            // This should be an error really
+            nu.append(1, '/');
+            nu.append(u.substr(2));
+        }
+    }
+    return nu;
+#else
+    return url_gpath(url);
+#endif
+}
+
+const string& tmplocation()
+{
+    static string stmpdir;
+    if (stmpdir.empty()) {
+        const char *tmpdir = getenv("RECOLL_TMPDIR");
+        if (tmpdir == 0) {
+            tmpdir = getenv("TMPDIR");
+        }
+        if (tmpdir == 0) {
+            tmpdir = getenv("TMP");
+        }
+        if (tmpdir == 0) {
+            tmpdir = getenv("TEMP");
+        }
+        if (tmpdir == 0) {
+#ifdef _WIN32
+            TCHAR bufw[(MAX_PATH + 1)*sizeof(TCHAR)];
+            GetTempPath(MAX_PATH + 1, bufw);
+            stmpdir = path_tchartoutf8(bufw);
+#else
+            stmpdir = "/tmp";
+#endif
+        } else {
+            stmpdir = tmpdir;
+        }
+        stmpdir = path_canon(stmpdir);
+    }
+
+    return stmpdir;
+}
+
+bool maketmpdir(string& tdir, string& reason)
+{
+#ifndef _WIN32
+    tdir = path_cat(tmplocation(), "rcltmpXXXXXX");
+
+    char *cp = strdup(tdir.c_str());
+    if (!cp) {
+        reason = "maketmpdir: out of memory (for file name !)\n";
+        tdir.erase();
+        return false;
+    }
+
+    // There is a race condition between name computation and
+    // mkdir. try to make sure that we at least don't shoot ourselves
+    // in the foot
+#if !defined(HAVE_MKDTEMP) || defined(_WIN32)
+    static PTMutexInit mlock;
+    PTMutexLocker lock(mlock);
+#endif
+
+    if (!
+#ifdef HAVE_MKDTEMP
+            mkdtemp(cp)
+#else
+            mktemp(cp)
+#endif // HAVE_MKDTEMP
+       ) {
+        free(cp);
+        reason = "maketmpdir: mktemp failed for [" + tdir + "] : " +
+                 strerror(errno);
+        tdir.erase();
+        return false;
+    }
+    tdir = cp;
+    free(cp);
+#else // _WIN32
+    // There is a race condition between name computation and
+    // mkdir. try to make sure that we at least don't shoot ourselves
+    // in the foot
+    static PTMutexInit mlock;
+    PTMutexLocker lock(mlock);
+    tdir = path_wingettempfilename(TEXT("rcltmp"));
+#endif
+
+    // At this point the directory does not exist yet except if we used
+    // mkdtemp
+
+#if !defined(HAVE_MKDTEMP) || defined(_WIN32)
+    if (mkdir(tdir.c_str(), 0700) < 0) {
+        reason = string("maketmpdir: mkdir ") + tdir + " failed";
+        tdir.erase();
+        return false;
+    }
+#endif
+
+    return true;
+}
+
+TempFileInternal::TempFileInternal(const string& suffix)
+    : m_noremove(false)
+{
+    // Because we need a specific suffix, can't use mkstemp
+    // well. There is a race condition between name computation and
+    // file creation. try to make sure that we at least don't shoot
+    // our own selves in the foot. maybe we'll use mkstemps one day.
+    static PTMutexInit mlock;
+    PTMutexLocker lock(mlock);
+
+#ifndef _WIN32
+    string filename = path_cat(tmplocation(), "rcltmpfXXXXXX");
+    char *cp = strdup(filename.c_str());
+    if (!cp) {
+        m_reason = "Out of memory (for file name !)\n";
+        return;
+    }
+
+    // Using mkstemp this way is awful (bot the suffix adding and
+    // using mkstemp() instead of mktemp just to avoid the warnings)
+    int fd;
+    if ((fd = mkstemp(cp)) < 0) {
+        free(cp);
+        m_reason = "TempFileInternal: mkstemp failed\n";
+        return;
+    }
+    close(fd);
+    unlink(cp);
+    filename = cp;
+    free(cp);
+#else
+    string filename = path_wingettempfilename(TEXT("recoll"));
+#endif
+
+    m_filename = filename + suffix;
+    if (close(open(m_filename.c_str(), O_CREAT | O_EXCL, 0600)) != 0) {
+        m_reason = string("Could not open/create") + m_filename;
+        m_filename.erase();
+    }
+}
+
+TempFileInternal::~TempFileInternal()
+{
+    if (!m_filename.empty() && !m_noremove) {
+        unlink(m_filename.c_str());
+    }
+}
+
+TempDir::TempDir()
+{
+    if (!maketmpdir(m_dirname, m_reason)) {
+        m_dirname.erase();
+        return;
+    }
+}
+
+TempDir::~TempDir()
+{
+    if (!m_dirname.empty()) {
+        (void)wipedir(m_dirname, true, true);
+        m_dirname.erase();
+    }
+}
+
+bool TempDir::wipe()
+{
+    if (m_dirname.empty()) {
+        m_reason = "TempDir::wipe: no directory !\n";
+        return false;
+    }
+    if (wipedir(m_dirname, false, true)) {
+        m_reason = "TempDir::wipe: wipedir failed\n";
+        return false;
+    }
+    return true;
+}
+
+// Freedesktop standard paths for cache directory (thumbnails are now in there)
+static const string& xdgcachedir()
+{
+    static string xdgcache;
+    if (xdgcache.empty()) {
+        const char *cp = getenv("XDG_CACHE_HOME");
+        if (cp == 0) {
+            xdgcache = path_cat(path_home(), ".cache");
+        } else {
+            xdgcache = string(cp);
+        }
+    }
+    return xdgcache;
+}
+static const string& thumbnailsdir()
+{
+    static string thumbnailsd;
+    if (thumbnailsd.empty()) {
+        thumbnailsd = path_cat(xdgcachedir(), "thumbnails");
+        if (access(thumbnailsd.c_str(), 0) != 0) {
+            thumbnailsd = path_cat(path_home(), ".thumbnails");
+        }
+    }
+    return thumbnailsd;
+}
+
+// Place for 256x256 files
+static const string thmbdirlarge = "large";
+// 128x128
+static const string thmbdirnormal = "normal";
+
+static void thumbname(const string& url, string& name)
+{
+    string digest;
+    string l_url = url_encode(url);
+    MD5String(l_url, digest);
+    MD5HexPrint(digest, name);
+    name += ".png";
+}
+
+bool thumbPathForUrl(const string& url, int size, string& path)
+{
+    string name;
+    thumbname(url, name);
+    if (size <= 128) {
+        path = path_cat(thumbnailsdir(), thmbdirnormal);
+        path = path_cat(path, name);
+        if (access(path.c_str(), R_OK) == 0) {
+            return true;
+        }
+    }
+    path = path_cat(thumbnailsdir(), thmbdirlarge);
+    path = path_cat(path, name);
+    if (access(path.c_str(), R_OK) == 0) {
+        return true;
+    }
+
+    // File does not exist. Path corresponds to the large version at this point,
+    // fix it if needed.
+    if (size <= 128) {
+        path = path_cat(path_home(), thmbdirnormal);
+        path = path_cat(path, name);
+    }
+    return false;
+}
+
+void rclutil_init_mt()
+{
+    path_pkgdatadir();
+    tmplocation();
+    thumbnailsdir();
+}
+
+#else // TEST_RCLUTIL
+
+void path_to_thumb(const string& _input)
+{
+    string input(_input);
+    // Make absolute path if needed
+    if (input[0] != '/') {
+        input = path_absolute(input);
+    }
+
+    input = string("file://") + path_canon(input);
+
+    string path;
+    //path = url_encode(input, 7);
+    thumbPathForUrl(input, 7, path);
+    cout << path << endl;
+}
+
+const char *thisprog;
+
+int main(int argc, const char **argv)
+{
+    thisprog = *argv++;
+    argc--;
+
+    string s;
+    vector<string>::const_iterator it;
+
+#if 0
+    if (argc > 1) {
+        cerr <<  "Usage: thumbpath <filepath>" << endl;
+        exit(1);
+    }
+    string input;
+    if (argc == 1) {
+        input = *argv++;
+        if (input.empty())  {
+            cerr << "Usage: thumbpath <filepath>" << endl;
+            exit(1);
+        }
+        path_to_thumb(input);
+    } else {
+        while (getline(cin, input)) {
+            path_to_thumb(input);
+        }
+    }
+    exit(0);
+#endif
+}
+
+#endif // TEST_RCLUTIL
+
diff --git a/src/utils/rclutil.h b/src/utils/rclutil.h
new file mode 100644
index 00000000..3084b9a3
--- /dev/null
+++ b/src/utils/rclutil.h
@@ -0,0 +1,112 @@
+/* Copyright (C) 2016 J.F.Dockes
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the
+ *   Free Software Foundation, Inc.,
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#ifndef _RCLUTIL_H_INCLUDED_
+#define _RCLUTIL_H_INCLUDED_
+#include "autoconfig.h"
+
+// Misc stuff not generic enough to get into smallut or pathut
+
+#include <map>
+#include <string>
+#include MEMORY_INCLUDE
+
+
+extern void rclutil_init_mt();
+
+/// Sub-directory for default recoll config (e.g: .recoll)
+extern std::string path_defaultrecollconfsubdir();
+
+/// e.g. /usr/share/recoll. Depends on OS and config
+extern const std::string& path_pkgdatadir();
+
+/// Transcode to utf-8 if possible or url encoding, for display.
+extern bool printableUrl(const std::string& fcharset,
+                         const std::string& in, std::string& out);
+/// Same but, in the case of a Windows local path, also turn "c:/" into
+/// "/c/" This should be used only for splitting the path in rcldb.
+extern std::string url_gpathS(const std::string& url);
+
+/// Retrieve the temp dir location: $RECOLL_TMPDIR else $TMPDIR else /tmp
+extern const std::string& tmplocation();
+
+/// Create temporary directory (inside the temp location)
+extern bool maketmpdir(std::string& tdir, std::string& reason);
+
+/// Temporary file class
+class TempFileInternal {
+public:
+    TempFileInternal(const std::string& suffix);
+    ~TempFileInternal();
+    const char *filename() {
+        return m_filename.c_str();
+    }
+    const std::string& getreason() {
+        return m_reason;
+    }
+    void setnoremove(bool onoff) {
+        m_noremove = onoff;
+    }
+    bool ok() {
+        return !m_filename.empty();
+    }
+private:
+    std::string m_filename;
+    std::string m_reason;
+    bool m_noremove;
+};
+
+typedef STD_SHARED_PTR<TempFileInternal> TempFile;
+
+/// Temporary directory class. Recursively deleted by destructor.
+class TempDir {
+public:
+    TempDir();
+    ~TempDir();
+    const char *dirname() {
+        return m_dirname.c_str();
+    }
+    const std::string& getreason() {
+        return m_reason;
+    }
+    bool ok() {
+        return !m_dirname.empty();
+    }
+    /// Recursively delete contents but not self.
+    bool wipe();
+private:
+    std::string m_dirname;
+    std::string m_reason;
+    TempDir(const TempDir&) {}
+    TempDir& operator=(const TempDir&) {
+        return *this;
+    };
+};
+
+// Freedesktop thumbnail standard path routine
+// On return, path will have the appropriate value in all cases,
+// returns true if the file already exists
+extern bool thumbPathForUrl(const std::string& url, int size,
+                            std::string& path);
+
+// Duplicate map<string,string> while ensuring no shared string data (to pass
+// to other thread):
+void map_ss_cp_noshr(const std::map<std::string, std::string> s,
+                     std::map<std::string, std::string> *d);
+
+
+#endif /* _RCLUTIL_H_INCLUDED_ */
diff --git a/src/utils/smallut.cpp b/src/utils/smallut.cpp
index 8a4bb550..0700ffc7 100644
--- a/src/utils/smallut.cpp
+++ b/src/utils/smallut.cpp
@@ -1,4 +1,4 @@
-/* Copyright (C) 2004 J.F.Dockes
+/* Copyright (C) 2004-2016 J.F.Dockes
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
  *   the Free Software Foundation; either version 2 of the License, or
@@ -16,7 +16,11 @@
  */
 
 #ifndef TEST_SMALLUT
+#ifdef BUILDING_RECOLL
 #include "autoconfig.h"
+#else
+#include "config.h"
+#endif
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -34,23 +38,10 @@
 #include UNORDERED_SET_INCLUDE
 
 #include "smallut.h"
-#include "utf8iter.h"
-#include "hldata.h"
-#include "cstr.h"
 
 using namespace std;
 
-void map_ss_cp_noshr(const map<string,string> s, map<string,string> *d)
-{
-    for (map<string,string>::const_iterator it= s.begin();
-         it != s.end(); it++) {
-        d->insert(
-            pair<string,string>(string(it->first.begin(), it->first.end()),
-                                string(it->second.begin(), it->second.end())));
-    }
-}
-
-int stringicmp(const string & s1, const string& s2) 
+int stringicmp(const string& s1, const string& s2)
 {
     string::const_iterator it1 = s1.begin();
     string::const_iterator it2 = s2.begin();
@@ -58,25 +49,27 @@ int stringicmp(const string & s1, const string& s2)
     char c1, c2;
 
     if (size1 < size2) {
-	while (it1 != s1.end()) { 
-	    c1 = ::toupper(*it1);
-	    c2 = ::toupper(*it2);
-	    if (c1 != c2) {
-		return c1 > c2 ? 1 : -1;
-	    }
-	    ++it1; ++it2;
-	}
-	return size1 == size2 ? 0 : -1;
+        while (it1 != s1.end()) {
+            c1 = ::toupper(*it1);
+            c2 = ::toupper(*it2);
+            if (c1 != c2) {
+                return c1 > c2 ? 1 : -1;
+            }
+            ++it1;
+            ++it2;
+        }
+        return size1 == size2 ? 0 : -1;
     } else {
-	while (it2 != s2.end()) { 
-	    c1 = ::toupper(*it1);
-	    c2 = ::toupper(*it2);
-	    if (c1 != c2) {
-		return c1 > c2 ? 1 : -1;
-	    }
-	    ++it1; ++it2;
-	}
-	return size1 == size2 ? 0 : 1;
+        while (it2 != s2.end()) {
+            c1 = ::toupper(*it1);
+            c2 = ::toupper(*it2);
+            if (c1 != c2) {
+                return c1 > c2 ? 1 : -1;
+            }
+            ++it1;
+            ++it2;
+        }
+        return size1 == size2 ? 0 : 1;
     }
 }
 void stringtolower(string& io)
@@ -84,8 +77,8 @@ void stringtolower(string& io)
     string::iterator it = io.begin();
     string::iterator ite = io.end();
     while (it != ite) {
-	*it = ::tolower(*it);
-	it++;
+        *it = ::tolower(*it);
+        it++;
     }
 }
 string stringtolower(const string& i)
@@ -97,20 +90,21 @@ string stringtolower(const string& i)
 extern int stringisuffcmp(const string& s1, const string& s2)
 {
     string::const_reverse_iterator r1 = s1.rbegin(), re1 = s1.rend(),
-	r2 = s2.rbegin(), re2 = s2.rend();
+                                   r2 = s2.rbegin(), re2 = s2.rend();
     while (r1 != re1 && r2 != re2) {
-	char c1 = ::toupper(*r1);
-	char c2 = ::toupper(*r2);
-	if (c1 != c2) {
-	    return c1 > c2 ? 1 : -1;
-	}
-	++r1; ++r2;
+        char c1 = ::toupper(*r1);
+        char c2 = ::toupper(*r2);
+        if (c1 != c2) {
+            return c1 > c2 ? 1 : -1;
+        }
+        ++r1;
+        ++r2;
     }
     return 0;
 }
 
 //  s1 is already lowercase
-int stringlowercmp(const string & s1, const string& s2) 
+int stringlowercmp(const string& s1, const string& s2)
 {
     string::const_iterator it1 = s1.begin();
     string::const_iterator it2 = s2.begin();
@@ -118,28 +112,30 @@ int stringlowercmp(const string & s1, const string& s2)
     char c2;
 
     if (size1 < size2) {
-	while (it1 != s1.end()) { 
-	    c2 = ::tolower(*it2);
-	    if (*it1 != c2) {
-		return *it1 > c2 ? 1 : -1;
-	    }
-	    ++it1; ++it2;
-	}
-	return size1 == size2 ? 0 : -1;
+        while (it1 != s1.end()) {
+            c2 = ::tolower(*it2);
+            if (*it1 != c2) {
+                return *it1 > c2 ? 1 : -1;
+            }
+            ++it1;
+            ++it2;
+        }
+        return size1 == size2 ? 0 : -1;
     } else {
-	while (it2 != s2.end()) { 
-	    c2 = ::tolower(*it2);
-	    if (*it1 != c2) {
-		return *it1 > c2 ? 1 : -1;
-	    }
-	    ++it1; ++it2;
-	}
-	return size1 == size2 ? 0 : 1;
+        while (it2 != s2.end()) {
+            c2 = ::tolower(*it2);
+            if (*it1 != c2) {
+                return *it1 > c2 ? 1 : -1;
+            }
+            ++it1;
+            ++it2;
+        }
+        return size1 == size2 ? 0 : 1;
     }
 }
 
 //  s1 is already uppercase
-int stringuppercmp(const string & s1, const string& s2) 
+int stringuppercmp(const string& s1, const string& s2)
 {
     string::const_iterator it1 = s1.begin();
     string::const_iterator it2 = s2.begin();
@@ -147,45 +143,47 @@ int stringuppercmp(const string & s1, const string& s2)
     char c2;
 
     if (size1 < size2) {
-	while (it1 != s1.end()) { 
-	    c2 = ::toupper(*it2);
-	    if (*it1 != c2) {
-		return *it1 > c2 ? 1 : -1;
-	    }
-	    ++it1; ++it2;
-	}
-	return size1 == size2 ? 0 : -1;
+        while (it1 != s1.end()) {
+            c2 = ::toupper(*it2);
+            if (*it1 != c2) {
+                return *it1 > c2 ? 1 : -1;
+            }
+            ++it1;
+            ++it2;
+        }
+        return size1 == size2 ? 0 : -1;
     } else {
-	while (it2 != s2.end()) { 
-	    c2 = ::toupper(*it2);
-	    if (*it1 != c2) {
-		return *it1 > c2 ? 1 : -1;
-	    }
-	    ++it1; ++it2;
-	}
-	return size1 == size2 ? 0 : 1;
+        while (it2 != s2.end()) {
+            c2 = ::toupper(*it2);
+            if (*it1 != c2) {
+                return *it1 > c2 ? 1 : -1;
+            }
+            ++it1;
+            ++it2;
+        }
+        return size1 == size2 ? 0 : 1;
     }
 }
 
 // Compare charset names, removing the more common spelling variations
-bool samecharset(const string &cs1, const string &cs2)
+bool samecharset(const string& cs1, const string& cs2)
 {
     string mcs1, mcs2;
     // Remove all - and _, turn to lowecase
-    for (unsigned int i = 0; i < cs1.length();i++) {
-	if (cs1[i] != '_' && cs1[i] != '-') {
-	    mcs1 += ::tolower(cs1[i]);
-	}
+    for (unsigned int i = 0; i < cs1.length(); i++) {
+        if (cs1[i] != '_' && cs1[i] != '-') {
+            mcs1 += ::tolower(cs1[i]);
+        }
     }
-    for (unsigned int i = 0; i < cs2.length();i++) {
-	if (cs2[i] != '_' && cs2[i] != '-') {
-	    mcs2 += ::tolower(cs2[i]);
-	}
+    for (unsigned int i = 0; i < cs2.length(); i++) {
+        if (cs2[i] != '_' && cs2[i] != '-') {
+            mcs2 += ::tolower(cs2[i]);
+        }
     }
     return mcs1 == mcs2;
 }
 
-template <class T> bool stringToStrings(const string &s, T &tokens, 
+template <class T> bool stringToStrings(const string& s, T& tokens,
                                         const string& addseps)
 {
     string current;
@@ -193,350 +191,351 @@ template <class T> bool stringToStrings(const string &s, T &tokens,
     enum states {SPACE, TOKEN, INQUOTE, ESCAPE};
     states state = SPACE;
     for (unsigned int i = 0; i < s.length(); i++) {
-	switch (s[i]) {
-        case '"': 
-	    switch(state) {
-            case SPACE: 
-		state=INQUOTE; continue;
-            case TOKEN: 
-	        current += '"';
-		continue;
-            case INQUOTE: 
+        switch (s[i]) {
+        case '"':
+            switch (state) {
+            case SPACE:
+                state = INQUOTE;
+                continue;
+            case TOKEN:
+                current += '"';
+                continue;
+            case INQUOTE:
                 tokens.insert(tokens.end(), current);
-		current.clear();
-		state = SPACE;
-		continue;
+                current.clear();
+                state = SPACE;
+                continue;
             case ESCAPE:
-	        current += '"';
-	        state = INQUOTE;
+                current += '"';
+                state = INQUOTE;
                 continue;
-	    }
-	    break;
-        case '\\': 
-	    switch(state) {
-            case SPACE: 
-            case TOKEN: 
+            }
+            break;
+        case '\\':
+            switch (state) {
+            case SPACE:
+            case TOKEN:
                 current += '\\';
-                state=TOKEN; 
+                state = TOKEN;
                 continue;
-            case INQUOTE: 
+            case INQUOTE:
                 state = ESCAPE;
                 continue;
             case ESCAPE:
                 current += '\\';
                 state = INQUOTE;
                 continue;
-	    }
-	    break;
+            }
+            break;
 
-        case ' ': 
-        case '\t': 
-        case '\n': 
-        case '\r': 
-	    switch(state) {
-            case SPACE: 
+        case ' ':
+        case '\t':
+        case '\n':
+        case '\r':
+            switch (state) {
+            case SPACE:
                 continue;
-            case TOKEN: 
-		tokens.insert(tokens.end(), current);
-		current.clear();
-		state = SPACE;
-		continue;
-            case INQUOTE: 
+            case TOKEN:
+                tokens.insert(tokens.end(), current);
+                current.clear();
+                state = SPACE;
+                continue;
+            case INQUOTE:
             case ESCAPE:
                 current += s[i];
                 continue;
-	    }
-	    break;
+            }
+            break;
 
         default:
             if (!addseps.empty() && addseps.find(s[i]) != string::npos) {
-                switch(state) {
+                switch (state) {
                 case ESCAPE:
                     state = INQUOTE;
                     break;
-                case INQUOTE: 
+                case INQUOTE:
                     break;
-                case SPACE: 
+                case SPACE:
                     tokens.insert(tokens.end(), string(1, s[i]));
                     continue;
-                case TOKEN: 
+                case TOKEN:
                     tokens.insert(tokens.end(), current);
                     current.erase();
                     tokens.insert(tokens.end(), string(1, s[i]));
                     state = SPACE;
                     continue;
                 }
-            } else switch(state) {
+            } else switch (state) {
                 case ESCAPE:
                     state = INQUOTE;
                     break;
-                case SPACE: 
+                case SPACE:
                     state = TOKEN;
                     break;
-                case TOKEN: 
-                case INQUOTE: 
+                case TOKEN:
+                case INQUOTE:
                     break;
                 }
-	    current += s[i];
-	}
+            current += s[i];
+        }
     }
-    switch(state) {
-    case SPACE: 
-	break;
-    case TOKEN: 
-	tokens.insert(tokens.end(), current);
-	break;
-    case INQUOTE: 
+    switch (state) {
+    case SPACE:
+        break;
+    case TOKEN:
+        tokens.insert(tokens.end(), current);
+        break;
+    case INQUOTE:
     case ESCAPE:
-	return false;
+        return false;
     }
     return true;
 }
 
-template bool stringToStrings<list<string> >(const string &, 
-					     list<string> &, const string&);
-template bool stringToStrings<vector<string> >(const string &, 
-					       vector<string> &,const string&);
-template bool stringToStrings<set<string> >(const string &,
-					    set<string> &, const string&);
+template bool stringToStrings<list<string> >(const string&,
+        list<string>&, const string&);
+template bool stringToStrings<vector<string> >(const string&,
+        vector<string>&, const string&);
+template bool stringToStrings<set<string> >(const string&,
+        set<string>&, const string&);
 template bool stringToStrings<STD_UNORDERED_SET<string> >
-(const string &, STD_UNORDERED_SET<string> &, const string&);
+(const string&, STD_UNORDERED_SET<string>&, const string&);
 
-template <class T> void stringsToString(const T &tokens, string &s) 
+template <class T> void stringsToString(const T& tokens, string& s)
 {
     for (typename T::const_iterator it = tokens.begin();
-	 it != tokens.end(); it++) {
-	bool hasblanks = false;
-	if (it->find_first_of(" \t\n") != string::npos)
-	    hasblanks = true;
-	if (it != tokens.begin())
-	    s.append(1, ' ');
-	if (hasblanks)
-	    s.append(1, '"');
-	for (unsigned int i = 0; i < it->length(); i++) {
-	    char car = it->at(i);
-	    if (car == '"') {
-		s.append(1, '\\');
-		s.append(1, car);
-	    } else {
-		s.append(1, car);
-	    }
-	}
-	if (hasblanks)
-	    s.append(1, '"');
+            it != tokens.end(); it++) {
+        bool hasblanks = false;
+        if (it->find_first_of(" \t\n") != string::npos) {
+            hasblanks = true;
+        }
+        if (it != tokens.begin()) {
+            s.append(1, ' ');
+        }
+        if (hasblanks) {
+            s.append(1, '"');
+        }
+        for (unsigned int i = 0; i < it->length(); i++) {
+            char car = it->at(i);
+            if (car == '"') {
+                s.append(1, '\\');
+                s.append(1, car);
+            } else {
+                s.append(1, car);
+            }
+        }
+        if (hasblanks) {
+            s.append(1, '"');
+        }
     }
 }
-template void stringsToString<list<string> >(const list<string> &, string &);
-template void stringsToString<vector<string> >(const vector<string> &,string &);
-template void stringsToString<set<string> >(const set<string> &, string &);
-template <class T> string stringsToString(const T &tokens)
+template void stringsToString<list<string> >(const list<string>&, string&);
+template void stringsToString<vector<string> >(const vector<string>&, string&);
+template void stringsToString<set<string> >(const set<string>&, string&);
+template <class T> string stringsToString(const T& tokens)
 {
     string out;
     stringsToString<T>(tokens, out);
     return out;
 }
-template string stringsToString<list<string> >(const list<string> &);
-template string stringsToString<vector<string> >(const vector<string> &);
-template string stringsToString<set<string> >(const set<string> &);
+template string stringsToString<list<string> >(const list<string>&);
+template string stringsToString<vector<string> >(const vector<string>&);
+template string stringsToString<set<string> >(const set<string>&);
 
-template <class T> void stringsToCSV(const T &tokens, string &s, 
-				     char sep)
+template <class T> void stringsToCSV(const T& tokens, string& s,
+                                     char sep)
 {
     s.erase();
     for (typename T::const_iterator it = tokens.begin();
-	 it != tokens.end(); it++) {
-	bool needquotes = false;
-	if (it->empty() || 
-	    it->find_first_of(string(1, sep) + "\"\n") != string::npos)
-	    needquotes = true;
-	if (it != tokens.begin())
-	    s.append(1, sep);
-	if (needquotes)
-	    s.append(1, '"');
-	for (unsigned int i = 0; i < it->length(); i++) {
-	    char car = it->at(i);
-	    if (car == '"') {
-		s.append(2, '"');
-	    } else {
-		s.append(1, car);
-	    }
-	}
-	if (needquotes)
-	    s.append(1, '"');
+            it != tokens.end(); it++) {
+        bool needquotes = false;
+        if (it->empty() ||
+                it->find_first_of(string(1, sep) + "\"\n") != string::npos) {
+            needquotes = true;
+        }
+        if (it != tokens.begin()) {
+            s.append(1, sep);
+        }
+        if (needquotes) {
+            s.append(1, '"');
+        }
+        for (unsigned int i = 0; i < it->length(); i++) {
+            char car = it->at(i);
+            if (car == '"') {
+                s.append(2, '"');
+            } else {
+                s.append(1, car);
+            }
+        }
+        if (needquotes) {
+            s.append(1, '"');
+        }
     }
 }
-template void stringsToCSV<list<string> >(const list<string> &, string &, char);
-template void stringsToCSV<vector<string> >(const vector<string> &,string &, 
-					    char);
+template void stringsToCSV<list<string> >(const list<string>&, string&, char);
+template void stringsToCSV<vector<string> >(const vector<string>&, string&,
+        char);
 
 void stringToTokens(const string& str, vector<string>& tokens,
-		    const string& delims, bool skipinit)
+                    const string& delims, bool skipinit)
 {
     string::size_type startPos = 0, pos;
 
     // Skip initial delims, return empty if this eats all.
-    if (skipinit && 
-	(startPos = str.find_first_not_of(delims, 0)) == string::npos) {
-	return;
+    if (skipinit &&
+            (startPos = str.find_first_not_of(delims, 0)) == string::npos) {
+        return;
     }
-    while (startPos < str.size()) { 
+    while (startPos < str.size()) {
         // Find next delimiter or end of string (end of token)
         pos = str.find_first_of(delims, startPos);
 
         // Add token to the vector and adjust start
-	if (pos == string::npos) {
-	    tokens.push_back(str.substr(startPos));
-	    break;
-	} else if (pos == startPos) {
-	    // Dont' push empty tokens after first
-	    if (tokens.empty())
-		tokens.push_back(string());
-	    startPos = ++pos;
-	} else {
-	    tokens.push_back(str.substr(startPos, pos - startPos));
-	    startPos = ++pos;
-	}
+        if (pos == string::npos) {
+            tokens.push_back(str.substr(startPos));
+            break;
+        } else if (pos == startPos) {
+            // Dont' push empty tokens after first
+            if (tokens.empty()) {
+                tokens.push_back(string());
+            }
+            startPos = ++pos;
+        } else {
+            tokens.push_back(str.substr(startPos, pos - startPos));
+            startPos = ++pos;
+        }
     }
 }
 
-bool stringToBool(const string &s)
+bool stringToBool(const string& s)
 {
-    if (s.empty())
-	return false;
-    if (isdigit(s[0])) {
-	int val = atoi(s.c_str());
-	return val ? true : false;
+    if (s.empty()) {
+        return false;
+    }
+    if (isdigit(s[0])) {
+        int val = atoi(s.c_str());
+        return val ? true : false;
+    }
+    if (s.find_first_of("yYtT") == 0) {
+        return true;
     }
-    if (s.find_first_of("yYtT") == 0)
-	return true;
     return false;
 }
 
-void trimstring(string &s, const char *ws)
+void trimstring(string& s, const char *ws)
 {
     string::size_type pos = s.find_first_not_of(ws);
     if (pos == string::npos) {
-	s.clear();
-	return;
+        s.clear();
+        return;
     }
     s.replace(0, pos, string());
 
     pos = s.find_last_not_of(ws);
-    if (pos != string::npos && pos != s.length()-1)
-	s.replace(pos+1, string::npos, string());
+    if (pos != string::npos && pos != s.length() - 1) {
+        s.replace(pos + 1, string::npos, string());
+    }
 }
 
 // Remove some chars and replace them with spaces
-string neutchars(const string &str, const string &chars)
+string neutchars(const string& str, const string& chars)
 {
     string out;
     neutchars(str, out, chars);
     return out;
 }
-void neutchars(const string &str, string &out, const string& chars)
+void neutchars(const string& str, string& out, const string& chars)
 {
     string::size_type startPos, pos;
 
-    for (pos = 0;;) { 
+    for (pos = 0;;) {
         // Skip initial chars, break if this eats all.
-        if ((startPos = str.find_first_not_of(chars, pos)) == string::npos)
-	    break;
+        if ((startPos = str.find_first_not_of(chars, pos)) == string::npos) {
+            break;
+        }
         // Find next delimiter or end of string (end of token)
         pos = str.find_first_of(chars, startPos);
         // Add token to the output. Note: token cant be empty here
-	if (pos == string::npos) {
-	    out += str.substr(startPos);
-	} else {
-	    out += str.substr(startPos, pos - startPos) + " ";
-	}
+        if (pos == string::npos) {
+            out += str.substr(startPos);
+        } else {
+            out += str.substr(startPos, pos - startPos) + " ";
+        }
     }
 }
 
 
 /* Truncate a string to a given maxlength, avoiding cutting off midword
  * if reasonably possible. Note: we could also use textsplit, stopping when
- * we have enough, this would be cleanly utf8-aware but would remove 
+ * we have enough, this would be cleanly utf8-aware but would remove
  * punctuation */
 static const string cstr_SEPAR = " \t\n\r-:.;,/[]{}";
-string truncate_to_word(const string &input, string::size_type maxlen)
+string truncate_to_word(const string& input, string::size_type maxlen)
 {
     string output;
     if (input.length() <= maxlen) {
-	output = input;
+        output = input;
     } else {
-	output = input.substr(0, maxlen);
-	string::size_type space = output.find_last_of(cstr_SEPAR);
-	// Original version only truncated at space if space was found after
-	// maxlen/2. But we HAVE to truncate at space, else we'd need to do
-	// utf8 stuff to avoid truncating at multibyte char. In any case,
-	// not finding space means that the text probably has no value.
-	// Except probably for Asian languages, so we may want to fix this 
-	// one day
-	if (space == string::npos) {
-	    output.erase();
-	} else {
-	    output.erase(space);
-	}
+        output = input.substr(0, maxlen);
+        string::size_type space = output.find_last_of(cstr_SEPAR);
+        // Original version only truncated at space if space was found after
+        // maxlen/2. But we HAVE to truncate at space, else we'd need to do
+        // utf8 stuff to avoid truncating at multibyte char. In any case,
+        // not finding space means that the text probably has no value.
+        // Except probably for Asian languages, so we may want to fix this
+        // one day
+        if (space == string::npos) {
+            output.erase();
+        } else {
+            output.erase(space);
+        }
     }
     return output;
 }
 
-void utf8truncate(string &s, int maxlen)
-{
-    if (s.size() <= string::size_type(maxlen))
-	return;
-    Utf8Iter iter(s);
-    string::size_type pos = 0;
-    while (iter++ != string::npos) 
-	if (iter.getBpos() < string::size_type(maxlen))
-	    pos = iter.getBpos();
-
-    s.erase(pos);
-}
-
 // Escape things that would look like markup
-string escapeHtml(const string &in)
+string escapeHtml(const string& in)
 {
     string out;
     for (string::size_type pos = 0; pos < in.length(); pos++) {
-	switch(in.at(pos)) {
-	case '<':
-	    out += "&lt;";
-	    break;
-	case '&':
-	    out += "&amp;";
-	    break;
-	default:
-	    out += in.at(pos);
-	}
+        switch (in.at(pos)) {
+        case '<':
+            out += "&lt;";
+            break;
+        case '&':
+            out += "&amp;";
+            break;
+        default:
+            out += in.at(pos);
+        }
     }
     return out;
 }
 
-string escapeShell(const string &in)
+string escapeShell(const string& in)
 {
     string out;
     out += "\"";
     for (string::size_type pos = 0; pos < in.length(); pos++) {
-	switch(in.at(pos)) {
-	case '$':
-	    out += "\\$";
-	    break;
-	case '`':
-	    out += "\\`";
-	    break;
-	case '"':
-	    out += "\\\"";
-	    break;
-	case '\n':
-	    out += "\\\n";
-	    break;
-	case '\\':
-	    out += "\\\\";
-	    break;
-	default:
-	    out += in.at(pos);
-	}
+        switch (in.at(pos)) {
+        case '$':
+            out += "\\$";
+            break;
+        case '`':
+            out += "\\`";
+            break;
+        case '"':
+            out += "\\\"";
+            break;
+        case '\n':
+            out += "\\\n";
+            break;
+        case '\\':
+            out += "\\\\";
+            break;
+        default:
+            out += in.at(pos);
+        }
     }
     out += "\"";
     return out;
@@ -547,26 +546,26 @@ string escapeShell(const string &in)
 bool pcSubst(const string& in, string& out, const map<char, string>& subs)
 {
     string::const_iterator it;
-    for (it = in.begin(); it != in.end();it++) {
-	if (*it == '%') {
-	    if (++it == in.end()) {
-		out += '%';
-		break;
-	    }
-	    if (*it == '%') {
-		out += '%';
-		continue;
-	    }
-	    map<char,string>::const_iterator tr;
-	    if ((tr = subs.find(*it)) != subs.end()) {
-		out += tr->second;
-	    } else {
-		// We used to do "out += *it;" here but this does not make
+    for (it = in.begin(); it != in.end(); it++) {
+        if (*it == '%') {
+            if (++it == in.end()) {
+                out += '%';
+                break;
+            }
+            if (*it == '%') {
+                out += '%';
+                continue;
+            }
+            map<char, string>::const_iterator tr;
+            if ((tr = subs.find(*it)) != subs.end()) {
+                out += tr->second;
+            } else {
+                // We used to do "out += *it;" here but this does not make
                 // sense
-	    }
-	} else {
-	    out += *it;
-	}
+            }
+        } else {
+            out += *it;
+        }
     }
     return true;
 }
@@ -576,15 +575,15 @@ bool pcSubst(const string& in, string& out, const map<string, string>& subs)
     out.erase();
     string::size_type i;
     for (i = 0; i < in.size(); i++) {
-	if (in[i] == '%') {
-	    if (++i == in.size()) {
-		out += '%';
-		break;
-	    }
-	    if (in[i] == '%') {
-		out += '%';
-		continue;
-	    }
+        if (in[i] == '%') {
+            if (++i == in.size()) {
+                out += '%';
+                break;
+            }
+            if (in[i] == '%') {
+                out += '%';
+                continue;
+            }
             string key = "";
             if (in[i] == '(') {
                 if (++i == in.size()) {
@@ -594,25 +593,25 @@ bool pcSubst(const string& in, string& out, const map<string, string>& subs)
                 string::size_type j = in.find_first_of(")", i);
                 if (j == string::npos) {
                     // ??concatenate remaining part and stop
-                    out += in.substr(i-2);
+                    out += in.substr(i - 2);
                     break;
                 }
-                key = in.substr(i, j-i);
+                key = in.substr(i, j - i);
                 i = j;
             } else {
                 key = in[i];
             }
-	    map<string,string>::const_iterator tr;
-	    if ((tr = subs.find(key)) != subs.end()) {
-		out += tr->second;
-	    } else {
+            map<string, string>::const_iterator tr;
+            if ((tr = subs.find(key)) != subs.end()) {
+                out += tr->second;
+            } else {
                 // Substitute to nothing, that's the reasonable thing to do
                 // instead of keeping the %(key)
                 // out += key.size()==1? key : string("(") + key + string(")");
-	    }
-	} else {
-	    out += in[i];
-	}
+            }
+        } else {
+            out += in[i];
+        }
     }
     return true;
 }
@@ -622,14 +621,15 @@ inline static int ulltorbuf(unsigned long long val, char *rbuf)
     for (idx = 0; val; idx++) {
         rbuf[idx] = '0' + val % 10;
         val /= 10;
-    } while (val);
+    }
+    while (val);
     rbuf[idx] = 0;
     return idx;
 }
 
 inline static void ullcopyreverse(const char *rbuf, string& buf, int idx)
 {
-    buf.reserve(idx+1);
+    buf.reserve(idx + 1);
     for (int i = idx - 1; i >= 0; i--) {
         buf.push_back(rbuf[i]);
     }
@@ -659,14 +659,16 @@ void lltodecstr(long long val, string& buf)
     }
 
     bool neg = val < 0;
-    if (neg)
+    if (neg) {
         val = -val;
+    }
 
     char rbuf[30];
     int idx = ulltorbuf(val, rbuf);
 
-    if (neg)
+    if (neg) {
         rbuf[idx++] = '-';
+    }
     rbuf[idx] = 0;
 
     ullcopyreverse(rbuf, buf, idx);
@@ -691,93 +693,96 @@ string ulltodecstr(unsigned long long val)
 string displayableBytes(off_t size)
 {
     const char *unit;
-    
+
     double roundable = 0;
     if (size < 1000) {
-	unit = " B ";
-	roundable = double(size);
+        unit = " B ";
+        roundable = double(size);
     } else if (size < 1E6) {
-	unit = " KB ";
-	roundable = double(size) / 1E3;
+        unit = " KB ";
+        roundable = double(size) / 1E3;
     } else if (size < 1E9) {
-	unit = " MB ";
-	roundable = double(size) / 1E6;
+        unit = " MB ";
+        roundable = double(size) / 1E6;
     } else {
-	unit = " GB ";
-	roundable = double(size) / 1E9;
+        unit = " GB ";
+        roundable = double(size) / 1E9;
     }
     size = off_t(round(roundable));
     return lltodecstr(size).append(unit);
 }
 
-string breakIntoLines(const string& in, unsigned int ll, 
-		      unsigned int maxlines)
+string breakIntoLines(const string& in, unsigned int ll,
+                      unsigned int maxlines)
 {
     string query = in;
     string oq;
     unsigned int nlines = 0;
     while (query.length() > 0) {
-	string ss = query.substr(0, ll);
-	if (ss.length() == ll) {
-	    string::size_type pos = ss.find_last_of(" ");
-	    if (pos == string::npos) {
-		pos = query.find_first_of(" ");
-		if (pos != string::npos)
-		    ss = query.substr(0, pos+1);
-		else 
-		    ss = query;
-	    } else {
-		ss = ss.substr(0, pos+1);
-	    }
-	}
-	// This cant happen, but anyway. Be very sure to avoid an infinite loop
-	if (ss.length() == 0) {
-	    oq = query;
-	    break;
-	}
-	oq += ss + "\n";
-	if (nlines++ >= maxlines) {
-	    oq += " ... \n";
-	    break;
-	}
-	query= query.substr(ss.length());
+        string ss = query.substr(0, ll);
+        if (ss.length() == ll) {
+            string::size_type pos = ss.find_last_of(" ");
+            if (pos == string::npos) {
+                pos = query.find_first_of(" ");
+                if (pos != string::npos) {
+                    ss = query.substr(0, pos + 1);
+                } else {
+                    ss = query;
+                }
+            } else {
+                ss = ss.substr(0, pos + 1);
+            }
+        }
+        // This cant happen, but anyway. Be very sure to avoid an infinite loop
+        if (ss.length() == 0) {
+            oq = query;
+            break;
+        }
+        oq += ss + "\n";
+        if (nlines++ >= maxlines) {
+            oq += " ... \n";
+            break;
+        }
+        query = query.substr(ss.length());
     }
     return oq;
 }
 
 // Date is Y[-M[-D]]
-static bool parsedate(vector<string>::const_iterator& it, 
-              vector<string>::const_iterator end, DateInterval *dip)
+static bool parsedate(vector<string>::const_iterator& it,
+                      vector<string>::const_iterator end, DateInterval *dip)
 {
     dip->y1 = dip->m1 = dip->d1 = dip->y2 = dip->m2 = dip->d2 = 0;
-    if (it->length() > 4 || !it->length() || 
-        it->find_first_not_of("0123456789") != string::npos) {
+    if (it->length() > 4 || !it->length() ||
+            it->find_first_not_of("0123456789") != string::npos) {
         return false;
     }
     if (it == end || sscanf(it++->c_str(), "%d", &dip->y1) != 1) {
         return false;
     }
-    if (it == end || *it == "/")
+    if (it == end || *it == "/") {
         return true;
+    }
     if (*it++ != "-") {
         return false;
     }
 
-    if (it->length() > 2 || !it->length() || 
-        it->find_first_not_of("0123456789") != string::npos) {
+    if (it->length() > 2 || !it->length() ||
+            it->find_first_not_of("0123456789") != string::npos) {
         return false;
     }
     if (it == end || sscanf(it++->c_str(), "%d", &dip->m1) != 1) {
         return false;
     }
-    if (it == end || *it == "/")
+    if (it == end || *it == "/") {
         return true;
+    }
     if (*it++ != "-") {
         return false;
     }
 
-    if (it->length() > 2 || !it->length() || 
-        it->find_first_not_of("0123456789") != string::npos) {
+    if (it->length() > 2 || !it->length() ||
+            it->find_first_not_of("0123456789") != string::npos) {
         return false;
     }
     if (it == end || sscanf(it++->c_str(), "%d", &dip->d1) != 1) {
@@ -790,7 +795,7 @@ static bool parsedate(vector<string>::const_iterator& it,
 // Called with the 'P' already processed. Period ends at end of string
 // or at '/'. We dont' do a lot effort at validation and will happily
 // accept 10Y1Y4Y (the last wins)
-static bool parseperiod(vector<string>::const_iterator& it, 
+static bool parseperiod(vector<string>::const_iterator& it,
                         vector<string>::const_iterator end, DateInterval *dip)
 {
     dip->y1 = dip->m1 = dip->d1 = dip->y2 = dip->m2 = dip->d2 = 0;
@@ -802,17 +807,29 @@ static bool parseperiod(vector<string>::const_iterator& it,
         if (sscanf(it++->c_str(), "%d", &value) != 1) {
             return false;
         }
-        if (it == end || it->empty())
+        if (it == end || it->empty()) {
             return false;
+        }
         switch (it->at(0)) {
-        case 'Y': case 'y': dip->y1 = value;break;
-        case 'M': case 'm': dip->m1 = value;break;
-        case 'D': case 'd': dip->d1 = value;break;
-        default: return false;
+        case 'Y':
+        case 'y':
+            dip->y1 = value;
+            break;
+        case 'M':
+        case 'm':
+            dip->m1 = value;
+            break;
+        case 'D':
+        case 'd':
+            dip->d1 = value;
+            break;
+        default:
+            return false;
         }
         it++;
-        if (it == end)
+        if (it == end) {
             return true;
+        }
         if (*it == "/") {
             return true;
         }
@@ -823,10 +840,11 @@ static bool parseperiod(vector<string>::const_iterator& it,
 #ifdef _WIN32
 int setenv(const char *name, const char *value, int overwrite)
 {
-    if(!overwrite) {
+    if (!overwrite) {
         const char *cp = getenv(name);
-        if (cp)
+        if (cp) {
             return -1;
+        }
     }
     return _putenv_s(name, value);
 }
@@ -845,10 +863,11 @@ time_t portable_timegm(struct tm *tm)
     setenv("TZ", "", 1);
     tzset();
     ret = mktime(tm);
-    if (tz)
+    if (tz) {
         setenv("TZ", tz, 1);
-    else
+    } else {
         unsetenv("TZ");
+    }
     tzset();
     return ret;
 }
@@ -857,12 +876,12 @@ time_t portable_timegm(struct tm *tm)
 static void cerrdip(const string& s, DateInterval *dip)
 {
     cerr << s << dip->y1 << "-" << dip->m1 << "-" << dip->d1 << "/"
-         << dip->y2 << "-" << dip->m2 << "-" << dip->d2 
+         << dip->y2 << "-" << dip->m2 << "-" << dip->d2
          << endl;
 }
 #endif
 
-// Compute date + period. Won't work out of the unix era. 
+// Compute date + period. Won't work out of the unix era.
 // or pre-1970 dates. Just convert everything to unixtime and
 // seconds (with average durations for months/years), add and convert
 // back
@@ -873,7 +892,7 @@ static bool addperiod(DateInterval *dp, DateInterval *pp)
     // timegm sort it out
     memset(&tm, 0, sizeof(tm));
     tm.tm_year = dp->y1 - 1900 + pp->y1;
-    tm.tm_mon = dp->m1 + pp->m1 -1;
+    tm.tm_mon = dp->m1 + pp->m1 - 1;
     tm.tm_mday = dp->d1 + pp->d1;
     time_t tres = mktime(&tm);
     localtime_r(&tres, &tm);
@@ -886,10 +905,19 @@ static bool addperiod(DateInterval *dp, DateInterval *pp)
 int monthdays(int mon, int year)
 {
     switch (mon) {
-    // We are returning a few two many 29 days februaries, no problem
-    case 2: return (year % 4) == 0 ? 29 : 28;
-    case 1:case 3:case 5:case 7: case 8:case 10:case 12: return 31;
-    default: return 30;
+    // We are returning a few too many 29 days februaries, no problem
+    case 2:
+        return (year % 4) == 0 ? 29 : 28;
+    case 1:
+    case 3:
+    case 5:
+    case 7:
+    case 8:
+    case 10:
+    case 12:
+        return 31;
+    default:
+        return 30;
     }
 }
 bool parsedateinterval(const string& s, DateInterval *dip)
@@ -898,14 +926,15 @@ bool parsedateinterval(const string& s, DateInterval *dip)
     dip->y1 = dip->m1 = dip->d1 = dip->y2 = dip->m2 = dip->d2 = 0;
     DateInterval p1, p2, d1, d2;
     p1 = p2 = d1 = d2 = *dip;
-    bool hasp1 = false, hasp2 = false, hasd1 = false, hasd2 = false, 
-        hasslash = false;
+    bool hasp1 = false, hasp2 = false, hasd1 = false, hasd2 = false,
+         hasslash = false;
 
     if (!stringToStrings(s, vs, "PYMDpymd-/")) {
         return false;
     }
-    if (vs.empty())
+    if (vs.empty()) {
         return false;
+    }
 
     vector<string>::const_iterator it = vs.begin();
     if (*it == "P" || *it == "p") {
@@ -943,7 +972,7 @@ secondelt:
             if (!parseperiod(it, vs.end(), &p2)) {
                 return false;
             }
-        hasp2 = true;
+            hasp2 = true;
         } else {
             if (!parsedate(it, vs.end(), &d2)) {
                 return false;
@@ -983,7 +1012,7 @@ secondelt:
     // If there is no explicit period, an incomplete date indicates a
     // period of the size of the uncompleted elements. Ex: 1999
     // actually means 1999/P12M
-    // 
+    //
     // If there is a period, the incomplete date should be extended
     // to the beginning or end of the unspecified portion. Ex: 1999/
     // means 1999-01-01/ and /1999 means /1999-12-31
@@ -1042,10 +1071,12 @@ secondelt:
 
 void catstrerror(string *reason, const char *what, int _errno)
 {
-    if (!reason)
-	return;
-    if (what)
-	reason->append(what);
+    if (!reason) {
+        return;
+    }
+    if (what) {
+        reason->append(what);
+    }
 
     reason->append(": errno: ");
 
@@ -1059,16 +1090,16 @@ void catstrerror(string *reason, const char *what, int _errno)
     // Note: sun strerror is noted mt-safe ??
     reason->append(strerror(_errno));
 #else
-#define ERRBUFSZ 200    
+#define ERRBUFSZ 200
     char errbuf[ERRBUFSZ];
-    // There are 2 versions of strerror_r. 
+    // There are 2 versions of strerror_r.
     // - The GNU one returns a pointer to the message (maybe
     //   static storage or supplied buffer).
     // - The POSIX one always stores in supplied buffer and
     //   returns 0 on success. As the possibility of error and
     //   error code are not specified, we're basically doomed
     //   cause we can't use a test on the 0 value to know if we
-    //   were returned a pointer... 
+    //   were returned a pointer...
     // Also couldn't find an easy way to disable the gnu version without
     // changing the cxxflags globally, so forget it. Recent gnu lib versions
     // normally default to the posix version.
@@ -1080,59 +1111,6 @@ void catstrerror(string *reason, const char *what, int _errno)
 #endif
 }
 
-void HighlightData::toString(std::string& out)
-{
-    out.append("\nUser terms (orthograph): ");
-    for (std::set<std::string>::const_iterator it = uterms.begin();
-	 it != uterms.end(); it++) {
-	out.append(" [").append(*it).append("]");
-    }
-    out.append("\nUser terms to Query terms:");
-    for (map<string, string>::const_iterator it = terms.begin();
-	 it != terms.end(); it++) {
-	out.append("[").append(it->first).append("]->[");
-	out.append(it->second).append("] ");
-    }
-    out.append("\nGroups: ");
-    char cbuf[200];
-    sprintf(cbuf, "Groups size %d grpsugidx size %d ugroups size %d",
-	    int(groups.size()), int(grpsugidx.size()), int(ugroups.size()));
-    out.append(cbuf);
-
-    size_t ugidx = (size_t)-1;
-    for (unsigned int i = 0; i < groups.size(); i++) {
-	if (ugidx != grpsugidx[i]) {
-	    ugidx = grpsugidx[i];
-	    out.append("\n(");
-	    for (unsigned int j = 0; j < ugroups[ugidx].size(); j++) {
-		out.append("[").append(ugroups[ugidx][j]).append("] ");
-	    }
-	    out.append(") ->");
-	}
-	out.append(" {");
-	for (unsigned int j = 0; j < groups[i].size(); j++) {
-	    out.append("[").append(groups[i][j]).append("]");
-	}
-	sprintf(cbuf, "%d", slacks[i]);
-	out.append("}").append(cbuf);
-    }
-    out.append("\n");
-}
-
-void HighlightData::append(const HighlightData& hl)
-{
-    uterms.insert(hl.uterms.begin(), hl.uterms.end());
-    terms.insert(hl.terms.begin(), hl.terms.end());
-    size_t ugsz0 = ugroups.size();
-    ugroups.insert(ugroups.end(), hl.ugroups.begin(), hl.ugroups.end());
-
-    groups.insert(groups.end(), hl.groups.begin(), hl.groups.end());
-    slacks.insert(slacks.end(), hl.slacks.begin(), hl.slacks.end());
-    for (std::vector<size_t>::const_iterator it = hl.grpsugidx.begin(); 
-	 it != hl.grpsugidx.end(); it++) {
-	grpsugidx.push_back(*it + ugsz0);
-    }
-}
 
 static const char *vlang_to_code[] = {
     "be", "cp1251",
@@ -1159,21 +1137,24 @@ static const char *vlang_to_code[] = {
     "uk", "koi8-u",
 };
 
+static const string cstr_cp1252("CP1252");
+
 string langtocode(const string& lang)
 {
     static STD_UNORDERED_MAP<string, string> lang_to_code;
     if (lang_to_code.empty()) {
-	for (unsigned int i = 0; 
-	     i < sizeof(vlang_to_code) / sizeof(char *); i += 2) {
-	    lang_to_code[vlang_to_code[i]] = vlang_to_code[i+1];
-	}
+        for (unsigned int i = 0;
+                i < sizeof(vlang_to_code) / sizeof(char *); i += 2) {
+            lang_to_code[vlang_to_code[i]] = vlang_to_code[i + 1];
+        }
     }
-    STD_UNORDERED_MAP<string,string>::const_iterator it = 
-	lang_to_code.find(lang);
+    STD_UNORDERED_MAP<string, string>::const_iterator it =
+        lang_to_code.find(lang);
 
     // Use cp1252 by default...
-    if (it == lang_to_code.end())
-	return cstr_cp1252;
+    if (it == lang_to_code.end()) {
+        return cstr_cp1252;
+    }
 
     return it->second;
 }
@@ -1182,16 +1163,19 @@ string localelang()
 {
     const char *lang = getenv("LANG");
 
-    if (lang == 0 || *lang == 0 || !strcmp(lang, "C") || !strcmp(lang, "POSIX"))
-	return "en";
+    if (lang == 0 || *lang == 0 || !strcmp(lang, "C") ||
+            !strcmp(lang, "POSIX")) {
+        return "en";
+    }
     string locale(lang);
     string::size_type under = locale.find_first_of("_");
-    if (under == string::npos)
-	return locale;
+    if (under == string::npos) {
+        return locale;
+    }
     return locale.substr(0, under);
 }
 
-// Initialization for static stuff to be called from main thread before going 
+// Initialization for static stuff to be called from main thread before going
 // multiple
 void smallut_init_mt()
 {
@@ -1242,7 +1226,7 @@ int nsuffpairs = sizeof(suffpairs) / sizeof(struct spair);
 // Periods test strings
 const char* periods[] = {
     "2001",    // Year 2001
-    "2001/",  // 2001 or later 
+    "2001/",  // 2001 or later
     "2001/P3Y", // 2001 -> 2004 or 2005, ambiguous
     "2001-01-01/P3Y", // 01-2001 -> 01 2004
     "2001-03-03/2001-05-01", // Explicit one
@@ -1256,47 +1240,53 @@ const char *thisprog;
 static void cerrdip(const string& s, DateInterval *dip)
 {
     cerr << s << dip->y1 << "-" << dip->m1 << "-" << dip->d1 << "/"
-         << dip->y2 << "-" << dip->m2 << "-" << dip->d2 
+         << dip->y2 << "-" << dip->m2 << "-" << dip->d2
          << endl;
 }
 
 int main(int argc, char **argv)
 {
-    thisprog = *argv++;argc--;
+    thisprog = *argv++;
+    argc--;
 
 #if 1
-    if (argc <=0 ) {
+    if (argc <= 0) {
         cerr << "Usage: smallut <stringtosplit>" << endl;
         exit(1);
     }
-    string s = *argv++;argc--;
+    string s = *argv++;
+    argc--;
     vector<string> vs;
     stringToTokens(s, vs, "/");
-    for (vector<string>::const_iterator it = vs.begin(); it != vs.end(); it++)
+    for (vector<string>::const_iterator it = vs.begin(); it != vs.end(); it++) {
         cerr << "[" << *it << "] ";
+    }
     cerr << endl;
     exit(0);
 #elif 0
-    if (argc <=0 ) {
+    if (argc <= 0) {
         cerr << "Usage: smallut <stringtosplit>" << endl;
         exit(1);
     }
-    string s = *argv++;argc--;
+    string s = *argv++;
+    argc--;
     vector<string> vs;
     if (!stringToStrings(s, vs, ":-()")) {
         cerr << "Bad entry" << endl;
         exit(1);
     }
-    for (vector<string>::const_iterator it = vs.begin(); it != vs.end(); it++)
+    for (vector<string>::const_iterator it = vs.begin(); it != vs.end(); it++) {
         cerr << "[" << *it << "] ";
+    }
     cerr << endl;
     exit(0);
 #elif 0
-    if (argc <=0 ) {
+    if (argc <= 0) {
         cerr << "Usage: smallut <dateinterval>" << endl;
         exit(1);
     }
-    string s = *argv++;argc--;
+    string s = *argv++;
+    argc--;
     DateInterval di;
     if (!parsedateinterval(s, &di)) {
         cerr << "Parse failed" << endl;
@@ -1316,34 +1306,34 @@ int main(int argc, char **argv)
     exit(0);
 #elif 0
     for (int i = 0; i < npairs; i++) {
-	{
-	    int c = stringicmp(pairs[i].s1, pairs[i].s2);
-	    printf("'%s' %s '%s' ", pairs[i].s1, 
-		   c == 0 ? "==" : c < 0 ? "<" : ">", pairs[i].s2);
-	}
-	{
-	    int cl = stringlowercmp(pairs[i].s1, pairs[i].s2);
-	    printf("L '%s' %s '%s' ", pairs[i].s1, 
-		   cl == 0 ? "==" : cl < 0 ? "<" : ">", pairs[i].s2);
-	}
-	{
-	    int cu = stringuppercmp(pairs[i].s1, pairs[i].s2);
-	    printf("U '%s' %s '%s' ", pairs[i].s1, 
-		   cu == 0 ? "==" : cu < 0 ? "<" : ">", pairs[i].s2);
-	}
-	printf("\n");
+        {
+            int c = stringicmp(pairs[i].s1, pairs[i].s2);
+            printf("'%s' %s '%s' ", pairs[i].s1,
+                   c == 0 ? "==" : c < 0 ? "<" : ">", pairs[i].s2);
+        }
+        {
+            int cl = stringlowercmp(pairs[i].s1, pairs[i].s2);
+            printf("L '%s' %s '%s' ", pairs[i].s1,
+                   cl == 0 ? "==" : cl < 0 ? "<" : ">", pairs[i].s2);
+        }
+        {
+            int cu = stringuppercmp(pairs[i].s1, pairs[i].s2);
+            printf("U '%s' %s '%s' ", pairs[i].s1,
+                   cu == 0 ? "==" : cu < 0 ? "<" : ">", pairs[i].s2);
+        }
+        printf("\n");
     }
 #elif 0
     for (int i = 0; i < nsuffpairs; i++) {
-	int c = stringisuffcmp(suffpairs[i].s1, suffpairs[i].s2);
-	printf("[%s] %s [%s] \n", suffpairs[i].s1, 
-	       c == 0 ? "matches" : c < 0 ? "<" : ">", suffpairs[i].s2);
+        int c = stringisuffcmp(suffpairs[i].s1, suffpairs[i].s2);
+        printf("[%s] %s [%s] \n", suffpairs[i].s1,
+               c == 0 ? "matches" : c < 0 ? "<" : ">", suffpairs[i].s2);
     }
 #elif 0
     std::string testit("\303\251l\303\251gant");
     for (int sz = 10; sz >= 0; sz--) {
-	utf8truncate(testit, sz);
-	cout << testit << endl;
+        utf8truncate(testit, sz);
+        cout << testit << endl;
     }
 #elif 0
     std::string testit("ligne\ndeuxieme ligne\r3eme ligne\r\n");
@@ -1385,30 +1375,30 @@ int main(int argc, char **argv)
     string sshort("ABC");
     string slong("ABCD");
     string sshortsmaller("ABB");
-    
-    vector<pair<string,string> > cmps;
-    cmps.push_back(pair<string,string>(sshort,sshort));
-    cmps.push_back(pair<string,string>(sshort,slong));
-    cmps.push_back(pair<string,string>(slong,sshort));
-    cmps.push_back(pair<string,string>(sshortsmaller,sshort));
-    cmps.push_back(pair<string,string>(sshort, sshortsmaller));
 
-    for (vector<pair<string,string> >::const_iterator it = cmps.begin();
-         it != cmps.end(); it++) {
-        cout << it->first << " " << it->second << " " << 
-            stringicmp(it->first, it->second) << endl;
+    vector<pair<string, string> > cmps;
+    cmps.push_back(pair<string, string>(sshort, sshort));
+    cmps.push_back(pair<string, string>(sshort, slong));
+    cmps.push_back(pair<string, string>(slong, sshort));
+    cmps.push_back(pair<string, string>(sshortsmaller, sshort));
+    cmps.push_back(pair<string, string>(sshort, sshortsmaller));
+
+    for (vector<pair<string, string> >::const_iterator it = cmps.begin();
+            it != cmps.end(); it++) {
+        cout << it->first << " " << it->second << " " <<
+             stringicmp(it->first, it->second) << endl;
     }
     cout << endl;
-    for (vector<pair<string,string> >::const_iterator it = cmps.begin();
-         it != cmps.end(); it++) {
-        cout << it->first << " " << it->second << " " << 
-            stringlowercmp(stringtolower(it->first), it->second) << endl;
+    for (vector<pair<string, string> >::const_iterator it = cmps.begin();
+            it != cmps.end(); it++) {
+        cout << it->first << " " << it->second << " " <<
+             stringlowercmp(stringtolower(it->first), it->second) << endl;
     }
     cout << endl;
-    for (vector<pair<string,string> >::const_iterator it = cmps.begin();
-         it != cmps.end(); it++) {
-        cout << it->first << " " << it->second << " " << 
-            stringuppercmp(it->first, it->second) << endl;
+    for (vector<pair<string, string> >::const_iterator it = cmps.begin();
+            it != cmps.end(); it++) {
+        cout << it->first << " " << it->second << " " <<
+             stringuppercmp(it->first, it->second) << endl;
     }
 
 #endif
diff --git a/src/utils/smallut.h b/src/utils/smallut.h
index 9b2a4b04..976d26ca 100644
--- a/src/utils/smallut.h
+++ b/src/utils/smallut.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2004 J.F.Dockes
+/* Copyright (C) 2004-2016 J.F.Dockes
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
  *   the Free Software Foundation; either version 2 of the License, or
@@ -17,226 +17,20 @@
 #ifndef _SMALLUT_H_INCLUDED_
 #define _SMALLUT_H_INCLUDED_
 
-#include <stdlib.h>
+#include <sys/types.h>
 
 #include <string>
 #include <vector>
 #include <map>
 #include <set>
 
-using std::string;
-using std::vector;
-using std::map;
-using std::set;
+// Miscellaneous mostly string-oriented small utilities
+// Note that none of the following code knows about utf-8.
 
-// Note these are all ascii routines
-extern int stringicmp(const string& s1, const string& s2);
-// For find_if etc.
-struct StringIcmpPred {
-    StringIcmpPred(const string& s1) 
-        : m_s1(s1) 
-    {}
-    bool operator()(const string& s2) {
-        return stringicmp(m_s1, s2) == 0;
-    }
-    const string& m_s1;
-};
-
-extern int stringlowercmp(const string& alreadylower, const string& s2);
-extern int stringuppercmp(const string& alreadyupper, const string& s2); 
-
-extern void stringtolower(string& io);
-extern string stringtolower(const string& io);
-
-// Is one string the end part of the other ?
-extern int stringisuffcmp(const string& s1, const string& s2);
-
-// Divine language from locale
-extern std::string localelang();
-// Divine 8bit charset from language
-extern std::string langtocode(const string& lang);
-
-// Compare charset names, removing the more common spelling variations
-extern bool samecharset(const string &cs1, const string &cs2);
-
-// Parse date interval specifier into pair of y,m,d dates.  The format
-// for the time interval is based on a subset of iso 8601 with 
-// the addition of open intervals, and removal of all time indications.
-// 'P' is the Period indicator, it's followed by a length in
-// years/months/days (or any subset thereof)
-// Dates: YYYY-MM-DD YYYY-MM YYYY
-// Periods: P[nY][nM][nD] where n is an integer value. 
-// At least one of YMD must be specified
-// The separator for the interval is /. Interval examples
-// YYYY/ (from YYYY) YYYY-MM-DD/P3Y (3 years after date) etc.
-// This returns a pair of y,m,d dates.
-struct DateInterval {
-    int y1;int m1;int d1; int y2;int m2;int d2;
-};
-extern bool parsedateinterval(const string&s, DateInterval *di);
-extern int monthdays(int mon, int year);
-
-/**
- * Parse input string into list of strings. 
- *
- * Token delimiter is " \t\n" except inside dquotes. dquote inside
- * dquotes can be escaped with \ etc...
- * Input is handled a byte at a time, things will work as long as space tab etc.
- * have the ascii values and can't appear as part of a multibyte char. utf-8 ok
- * but so are the iso-8859-x and surely others. addseps do have to be 
- * single-bytes
- */
-template <class T> bool stringToStrings(const string& s, T &tokens, 
-					const string& addseps = "");
-
-/**
- * Inverse operation:
- */
-template <class T> void stringsToString(const T &tokens, string &s);
-template <class T> std::string stringsToString(const T &tokens);
-
-/**
- * Strings to CSV string. tokens containing the separator are quoted (")
- * " inside tokens is escaped as "" ([word "quote"] =>["word ""quote"""]
- */
-template <class T> void stringsToCSV(const T &tokens, string &s, 
-					char sep = ',');
-
-/**
- * Split input string. No handling of quoting
- */
-extern void stringToTokens(const string &s, vector<string> &tokens, 
-			   const string &delims = " \t", bool skipinit=true);
-
-/** Convert string to boolean */
-extern bool stringToBool(const string &s);
-
-/** Remove instances of characters belonging to set (default {space,
-    tab}) at beginning and end of input string */
-extern void trimstring(string &s, const char *ws = " \t");
-
-/** Escape things like < or & by turning them into entities */
-extern string escapeHtml(const string &in);
-
-/** Replace some chars with spaces (ie: newline chars). This is not utf8-aware
- *  so chars should only contain ascii */
-extern string neutchars(const string &str, const string &chars);
-extern void neutchars(const string &str, string& out, const string &chars);
-
-/** Turn string into something that won't be expanded by a shell. In practise
- *  quote with double-quotes and escape $`\ */
-extern string escapeShell(const string &str);
-
-/** Truncate a string to a given maxlength, avoiding cutting off midword
- *  if reasonably possible. */
-extern string truncate_to_word(const string &input, string::size_type maxlen);
-
-/** Truncate in place in an utf8-legal way */
-extern void utf8truncate(string &s, int maxlen);
-
-void ulltodecstr(unsigned long long val, string& buf);
-void lltodecstr(long long val, string& buf);
-string lltodecstr(long long val);
-string ulltodecstr(unsigned long long val);
-
-/** Convert byte count into unit (KB/MB...) appropriate for display */
-string displayableBytes(off_t size);
-
-/** Break big string into lines */
-string breakIntoLines(const string& in, unsigned int ll = 100, 
-		      unsigned int maxlines= 50);
-/** Small utility to substitute printf-like percents cmds in a string */
-bool pcSubst(const string& in, string& out, const map<char, string>& subs);
-/** Substitute printf-like percents and also %(key) */
-bool pcSubst(const string& in, string& out, const map<string, string>& subs);
-
-/** Append system error message */
-void catstrerror(string *reason, const char *what, int _errno);
-
-/** Portable timegm. MS C has _mkgmtime, but there is a bug in Gminw which
- * makes it inaccessible */
-struct tm;
-time_t portable_timegm(struct tm *tm);
-
-/** Temp buffer with automatic deallocation */
-struct TempBuf {
-    TempBuf() 
-        : m_buf(0)
-    {}
-    TempBuf(int n)
-    {
-        m_buf = (char *)malloc(n);
-    }
-    ~TempBuf()
-    { 
-        if (m_buf)
-            free(m_buf);
-    }
-    char *setsize(int n) { return (m_buf = (char *)realloc(m_buf, n)); }
-    char *buf() {return m_buf;}
-    char *m_buf;
-};
-
-inline void leftzeropad(string& s, unsigned len)
-{
-    if (s.length() && s.length() < len)
-	s = s.insert(0, len - s.length(), '0');
-}
-
-// Duplicate map<string,string> while ensuring no shared string data (to pass
-// to other thread):
-void map_ss_cp_noshr(const std::map<std::string,std::string> s,
-                      std::map<std::string,std::string> *d);
-
-// Code for static initialization of an stl map. Somewhat like Boost.assign. 
-// Ref: http://stackoverflow.com/questions/138600/initializing-a-static-stdmapint-int-in-c
-// Example use: map<int, int> m = create_map<int, int> (1,2) (3,4) (5,6) (7,8);
-
-template <typename T, typename U>
-class create_map
-{
-private:
-    std::map<T, U> m_map;
-public:
-    create_map(const T& key, const U& val)
-    {
-        m_map[key] = val;
-    }
-
-    create_map<T, U>& operator()(const T& key, const U& val)
-    {
-        m_map[key] = val;
-        return *this;
-    }
-
-    operator std::map<T, U>()
-    {
-        return m_map;
-    }
-};
-template <typename T>
-class create_vector
-{
-private:
-    std::vector<T> m_vector;
-public:
-    create_vector(const T& val)
-    {
-        m_vector.push_back(val);
-    }
-
-    create_vector<T>& operator()(const T& val)
-    {
-        m_vector.push_back(val);
-        return *this;
-    }
-
-    operator std::vector<T>()
-    {
-        return m_vector;
-    }
-};
+// Call this before going multithread.
+void smallut_init_mt();
 
+#ifndef SMALLUT_DISABLE_MACROS
 #ifndef MIN
 #define MIN(A,B) (((A)<(B)) ? (A) : (B))
 #endif
@@ -246,7 +40,194 @@ public:
 #ifndef deleteZ
 #define deleteZ(X) {delete X;X = 0;}
 #endif
+#endif /* SMALLUT_DISABLE_MACROS */
 
-void smallut_init_mt();
+// Case-insensitive compare. ASCII ONLY !
+extern int stringicmp(const std::string& s1, const std::string& s2);
+
+// For find_if etc.
+struct StringIcmpPred {
+    StringIcmpPred(const std::string& s1)
+        : m_s1(s1) {
+    }
+    bool operator()(const std::string& s2) {
+        return stringicmp(m_s1, s2) == 0;
+    }
+    const std::string& m_s1;
+};
+
+extern int stringlowercmp(const std::string& alreadylower,
+                          const std::string& s2);
+extern int stringuppercmp(const std::string& alreadyupper,
+                          const std::string& s2);
+
+extern void stringtolower(std::string& io);
+extern std::string stringtolower(const std::string& io);
+
+// Is one string the end part of the other ?
+extern int stringisuffcmp(const std::string& s1, const std::string& s2);
+
+// Divine language from locale
+extern std::string localelang();
+// Divine 8bit charset from language
+extern std::string langtocode(const std::string& lang);
+
+// Compare charset names, removing the more common spelling variations
+extern bool samecharset(const std::string& cs1, const std::string& cs2);
+
+// Parse date interval specifier into pair of y,m,d dates.  The format
+// for the time interval is based on a subset of iso 8601 with
+// the addition of open intervals, and removal of all time indications.
+// 'P' is the Period indicator, it's followed by a length in
+// years/months/days (or any subset thereof)
+// Dates: YYYY-MM-DD YYYY-MM YYYY
+// Periods: P[nY][nM][nD] where n is an integer value.
+// At least one of YMD must be specified
+// The separator for the interval is /. Interval examples
+// YYYY/ (from YYYY) YYYY-MM-DD/P3Y (3 years after date) etc.
+// This returns a pair of y,m,d dates.
+struct DateInterval {
+    int y1;
+    int m1;
+    int d1;
+    int y2;
+    int m2;
+    int d2;
+};
+extern bool parsedateinterval(const std::string& s, DateInterval *di);
+extern int monthdays(int mon, int year);
+
+/**
+ * Parse input string into list of strings.
+ *
+ * Token delimiter is " \t\n" except inside dquotes. dquote inside
+ * dquotes can be escaped with \ etc...
+ * Input is handled a byte at a time, things will work as long as
+ * space tab etc. have the ascii values and can't appear as part of a
+ * multibyte char. utf-8 ok but so are the iso-8859-x and surely
+ * others. addseps do have to be single-bytes
+ */
+template <class T> bool stringToStrings(const std::string& s, T& tokens,
+                                        const std::string& addseps = "");
+
+/**
+ * Inverse operation:
+ */
+template <class T> void stringsToString(const T& tokens, std::string& s);
+template <class T> std::string stringsToString(const T& tokens);
+
+/**
+ * Strings to CSV string. tokens containing the separator are quoted (")
+ * " inside tokens is escaped as "" ([word "quote"] =>["word ""quote"""]
+ */
+template <class T> void stringsToCSV(const T& tokens, std::string& s,
+                                     char sep = ',');
+
+/**
+ * Split input string. No handling of quoting
+ */
+extern void stringToTokens(const std::string& s,
+                           std::vector<std::string>& tokens,
+                           const std::string& delims = " \t",
+                           bool skipinit = true);
+
+/** Convert string to boolean */
+extern bool stringToBool(const std::string& s);
+
+/** Remove instances of characters belonging to set (default {space,
+    tab}) at beginning and end of input string */
+extern void trimstring(std::string& s, const char *ws = " \t");
+
+/** Escape things like < or & by turning them into entities */
+extern std::string escapeHtml(const std::string& in);
+
+/** Replace some chars with spaces (ie: newline chars). */
+extern std::string neutchars(const std::string& str, const std::string& chars);
+extern void neutchars(const std::string& str, std::string& out,
+                      const std::string& chars);
+
+/** Turn string into something that won't be expanded by a shell. In practise
+ *  quote with double-quotes and escape $`\ */
+extern std::string escapeShell(const std::string& str);
+
+/** Truncate a string to a given maxlength, avoiding cutting off midword
+ *  if reasonably possible. */
+extern std::string truncate_to_word(const std::string& input,
+                                    std::string::size_type maxlen);
+
+void ulltodecstr(unsigned long long val, std::string& buf);
+void lltodecstr(long long val, std::string& buf);
+std::string lltodecstr(long long val);
+std::string ulltodecstr(unsigned long long val);
+
+/** Convert byte count into unit (KB/MB...) appropriate for display */
+std::string displayableBytes(off_t size);
+
+/** Break big string into lines */
+std::string breakIntoLines(const std::string& in, unsigned int ll = 100,
+                           unsigned int maxlines = 50);
+
+/** Small utility to substitute printf-like percents cmds in a string */
+bool pcSubst(const std::string& in, std::string& out,
+             const std::map<char, std::string>& subs);
+/** Substitute printf-like percents and also %(key) */
+bool pcSubst(const std::string& in, std::string& out,
+             const std::map<std::string, std::string>& subs);
+
+/** Append system error message */
+void catstrerror(std::string *reason, const char *what, int _errno);
+
+/** Portable timegm. MS C has _mkgmtime, but there is a bug in Gminw which
+ * makes it inaccessible */
+struct tm;
+time_t portable_timegm(struct tm *tm);
+
+inline void leftzeropad(std::string& s, unsigned len)
+{
+    if (s.length() && s.length() < len) {
+        s = s.insert(0, len - s.length(), '0');
+    }
+}
+
+// Code for static initialization of an stl map. Somewhat like Boost.assign.
+// Ref: http://stackoverflow.com/questions/138600/initializing-a-static-stdmapint-int-in-c
+// Example use: map<int, int> m = create_map<int, int> (1,2) (3,4) (5,6) (7,8);
+
+template <typename T, typename U>
+class create_map {
+private:
+    std::map<T, U> m_map;
+public:
+    create_map(const T& key, const U& val) {
+        m_map[key] = val;
+    }
+
+    create_map<T, U>& operator()(const T& key, const U& val) {
+        m_map[key] = val;
+        return *this;
+    }
+
+    operator std::map<T, U>() {
+        return m_map;
+    }
+};
+template <typename T>
+class create_vector {
+private:
+    std::vector<T> m_vector;
+public:
+    create_vector(const T& val) {
+        m_vector.push_back(val);
+    }
+
+    create_vector<T>& operator()(const T& val) {
+        m_vector.push_back(val);
+        return *this;
+    }
+
+    operator std::vector<T>() {
+        return m_vector;
+    }
+};
 
 #endif /* _SMALLUT_H_INCLUDED_ */