Merged the changes from the current windows port

2015-09-25 16:14:27 +02:00 · 2015-09-25 16:14:27 +02:00 · 5330685ec1
commit 5330685ec1
parent bf779aca37 99adeae6ff
149 changed files with 10645 additions and 1575 deletions
--- a/.hgignore
+++ b/.hgignore
@ -11,6 +11,16 @@ libtool
 *.lo
 *~
 \#*
+*.obj
+*.sdf
+*.tlog
+*.lib
+*.idb
+*.log
+*.pdb
+.vs
+*.exe
+*.ilk
 ptrans
 src/aclocal.m4
 src/compile
@ -82,12 +92,7 @@ src/qtgui/recoll
 src/qtgui/recoll.app
 src/qtgui/recoll.pro
 src/query/alldeps
-src/query/location.hh
-src/query/position.hh
 src/query/recollq
-src/query/stack.hh
-src/query/wasaparse.cpp
-src/query/wasaparse.hpp
 src/sampleconf/rclmon.sh
 src/sampleconf/recoll.conf
 src/utils/alldeps
--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -22,7 +22,8 @@ COMMONCPPFLAGS = -I. \
    -I$(top_srcdir)/rcldb \
    -I$(top_srcdir)/unac \
    -I$(top_srcdir)/utils \
-    -I$(top_srcdir)/xaposix
+    -I$(top_srcdir)/xaposix \
+    -DBUILDING_RECOLL

 AM_CPPFLAGS = -Wall -Wno-unused \
    $(COMMONCPPFLAGS) \
--- a/src/bincimapmime/convert.cc
+++ b/src/bincimapmime/convert.cc
@ -47,7 +47,7 @@ BincStream::~BincStream(void)
 }

 //------------------------------------------------------------------------
-string BincStream::popString(unsigned int size)
+string BincStream::popString(std::string::size_type size)
 {
  if (size > nstr.length())
    size = nstr.length();
--- a/src/bincimapmime/convert.h
+++ b/src/bincimapmime/convert.h
@ -25,6 +25,7 @@
 */
 #ifndef convert_h_included
 #define convert_h_included
+#include <stddef.h>
 #include <string>
 #include <vector>
 #include <iomanip>
@ -93,7 +94,7 @@ namespace Binc {
    std::string tmp;
    for (std::string::const_iterator i = s.begin();
 	 i != s.end() && i + 1 != s.end(); i += 2) {
-      int n;
+      ptrdiff_t n;
      unsigned char c = *i;
      unsigned char d = *(i + 1);
      
@ -122,7 +123,7 @@ namespace Binc {
    for (std::string::const_iterator i = s_in.begin(); i != s_in.end(); ++i) {
      unsigned char c = (unsigned char)*i;
      if (c <= 31 || c >= 127 || c == '\"' || c == '\\')
-	return "{" + toString(s_in.length()) + "}\r\n" + s_in;
+	return "{" + toString((unsigned long)s_in.length()) + "}\r\n" + s_in;
    }
    
    return "\"" + s_in + "\"";
@ -145,7 +146,7 @@ namespace Binc {
  //----------------------------------------------------------------------
  inline void chomp(std::string &s_in, const std::string &chars = " \t\r\n")
  {
-    int n = s_in.length();
+    std::string::size_type n = s_in.length();
    while (n > 1 && chars.find(s_in[n - 1]) != std::string::npos)
      s_in.resize(n-- - 1);
  }
@ -290,7 +291,7 @@ namespace Binc {
    BincStream &operator << (char t);

    //--
-    std::string popString(unsigned int size);
+    std::string popString(std::string::size_type size);

    //--
    char popChar(void);
--- a/src/bincimapmime/mime-inputsource.h
+++ b/src/bincimapmime/mime-inputsource.h
@ -25,7 +25,7 @@
 */
 #ifndef mime_inputsource_h_included
 #define mime_inputsource_h_included
-
+#include "autoconfig.h"
 // Data source for MIME parser

 // Note about large files: we might want to change the unsigned int
@ -49,7 +49,7 @@ namespace Binc {
    inline MimeInputSource(int fd, unsigned int start = 0);
    virtual inline ~MimeInputSource(void);

-    virtual inline size_t fillRaw(char *raw, size_t nbytes);
+    virtual inline ssize_t fillRaw(char *raw, size_t nbytes);
    virtual inline void reset(void);

    virtual inline bool fillInputBuffer(void);
@ -87,7 +87,7 @@ namespace Binc {
  {
  }

-  inline size_t MimeInputSource::fillRaw(char *raw, size_t nbytes)
+  inline ssize_t MimeInputSource::fillRaw(char *raw, size_t nbytes)
  {
      return read(fd, raw, nbytes);
  }
@ -179,7 +179,7 @@ namespace Binc {
    class MimeInputSourceStream : public MimeInputSource {
  public:
    inline MimeInputSourceStream(istream& s, unsigned int start = 0);
-    virtual inline size_t fillRaw(char *raw, size_t nb);
+    virtual inline ssize_t fillRaw(char *raw, size_t nb);
    virtual inline void reset(void);
  private:
      istream& s;
@ -191,7 +191,7 @@ namespace Binc {
  {
  }

-  inline size_t MimeInputSourceStream::fillRaw(char *raw, size_t nb)
+  inline ssize_t MimeInputSourceStream::fillRaw(char *raw, size_t nb)
  {
    // Why can't streams tell how many characters were actually read
    // when hitting eof ?
@ -199,16 +199,16 @@ namespace Binc {
    s.seekg(0, ios::end);
    std::streampos lst = s.tellg();
    s.seekg(st);
-    size_t nbytes = lst - st;
+    size_t nbytes = size_t(lst - st);
    if (nbytes > nb) {
 	nbytes = nb;
    }
    if (nbytes <= 0) {
-	return (size_t)-1;
+	return (ssize_t)-1;
    }

    s.read(raw, nbytes);
-    return nbytes;
+    return static_cast<ssize_t>(nbytes);
  }

  inline void MimeInputSourceStream::reset(void)
--- a/src/bincimapmime/mime-parsefull.cc
+++ b/src/bincimapmime/mime-parsefull.cc
@ -306,9 +306,9 @@ void Binc::MimePart::parseMessageRFC822(vector<Binc::MimePart> *members,
 bool Binc::MimePart::skipUntilBoundary(const string &delimiter,
 				       unsigned int *nlines, bool *eof)
 {
-  int endpos = delimiter.length();
+  string::size_type endpos = delimiter.length();
  char *delimiterqueue = 0;
-  int delimiterpos = 0;
+  string::size_type delimiterpos = 0;
  const char *delimiterStr = delimiter.c_str();
  if (delimiter != "") {
    delimiterqueue = new char[endpos];
@ -340,7 +340,7 @@ bool Binc::MimePart::skipUntilBoundary(const string &delimiter,
      delimiterpos = 0;
      
    if (compareStringToQueue(delimiterStr, delimiterqueue,
-			     delimiterpos, endpos)) {
+			     delimiterpos, int(endpos))) {
      foundBoundary = true;
      break;
    }
@ -451,7 +451,7 @@ void Binc::MimePart::parseMultipart(const string &boundary,
  skipUntilBoundary(delimiter, nlines, eof);

  if (!eof)
-    *boundarysize = delimiter.size();
+    *boundarysize = int(delimiter.size());

  postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);

@ -484,7 +484,7 @@ void Binc::MimePart::parseMultipart(const string &boundary,
    skipUntilBoundary(delimiter, nlines, eof);

    if (!*eof)
-      *boundarysize = delimiter.size();
+      *boundarysize = int(delimiter.size());

    postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
  }
@ -528,7 +528,7 @@ void Binc::MimePart::parseSinglePart(const string &toboundary,
  //    *boundarysize = _toboundary.length();

  char *boundaryqueue = 0;
-  int endpos = _toboundary.length();
+  size_t endpos = _toboundary.length();
  if (toboundary != "") {
    boundaryqueue = new char[endpos];
    memset(boundaryqueue, 0, endpos);
@ -540,7 +540,7 @@ void Binc::MimePart::parseSinglePart(const string &toboundary,
  string line;
  bool toboundaryIsEmpty = (toboundary == "");
  char c;
-  int boundarypos = 0;
+  string::size_type boundarypos = 0;
  while (mimeSource->getChar(&c)) {
    if (c == '\n') { ++*nbodylines; ++*nlines; }

@ -553,8 +553,8 @@ void Binc::MimePart::parseSinglePart(const string &toboundary,
      boundarypos = 0;
      
    if (compareStringToQueue(_toboundaryStr, boundaryqueue,
-			     boundarypos, endpos)) {
-      *boundarysize = _toboundary.length();
+			     boundarypos, int(endpos))) {
+      *boundarysize = static_cast<int>(_toboundary.length());
      break;
    }
  }
--- a/src/bincimapmime/mime-parseonlyheader.cc
+++ b/src/bincimapmime/mime-parseonlyheader.cc
@ -119,7 +119,7 @@ int Binc::MimePart::doParseOnlyHeader(MimeInputSource *ms,
      if (c == '\n') ++nlines;
      if (c == ':') break;
      if (c == '\n') {
-	for (int i = name.length() - 1; i >= 0; --i)
+	for (string::size_type i = name.length() - 1; i >= 0; --i)
 	  mimeSource->ungetChar();

 	quit = true;
--- a/src/common/autoconfig-win.h
+++ b/src/common/autoconfig-win.h
@ -0,0 +1,186 @@
+/* Manually edited version of autoconfig.h for windows. Many things are
+overriden in the c++ code by ifdefs _WIN32 anyway  */
+#ifndef _AUTOCONFIG_H_INCLUDED
+#define _AUTOCONFIG_H_INCLUDED
+/* Define if building universal (internal helper macro) */
+/* #undef AC_APPLE_UNIVERSAL_BUILD */
+
+/* Path to the aspell api include file */
+/* #undef ASPELL_INCLUDE "aspell-local.h" */
+
+/* Path to the aspell program */
+/* #define ASPELL_PROG "/usr/bin/aspell" */
+
+/* No X11 session monitoring support */
+#define DISABLE_X11MON
+
+/* Path to the fam api include file */
+/* #undef FAM_INCLUDE */
+
+/* Path to the file program */
+#define FILE_PROG "/usr/bin/file"
+
+/* "Have C++0x" */
+#undef HAVE_CXX0X_UNORDERED
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the `dl' library (-ldl). */
+#define HAVE_LIBDL 1
+
+/* Define to 1 if you have the `pthread' library (-lpthread). */
+#define HAVE_LIBPTHREAD 1
+
+/* Define to 1 if you have the `z' library (-lz). */
+#define HAVE_LIBZ 1
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the `mkdtemp' function. */
+/* #undef HAVE_MKDTEMP */
+
+/* Define to 1 if you have the `posix_spawn,' function. */
+/* #undef HAVE_POSIX_SPAWN_ */
+
+/* Define to 1 if you have the `setrlimit' function. */
+#define HAVE_SETRLIMIT 1
+
+/* Has std::shared_ptr */
+#define HAVE_SHARED_PTR_STD
+
+/* Has std::tr1::shared_ptr */
+/* #undef HAVE_SHARED_PTR_TR1 */
+
+/* Define to 1 if you have the <spawn.h> header file. */
+#define HAVE_SPAWN_H 1
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the <sys/mount.h> header file. */
+/* #undef HAVE_SYS_MOUNT_H */
+
+/* Define to 1 if you have the <sys/param.h,> header file. */
+/* #undef HAVE_SYS_PARAM_H_ */
+
+/* Define to 1 if you have the <sys/statfs.h> header file. */
+/* #undef HAVE_SYS_STATFS_H */
+
+/* Define to 1 if you have the <sys/statvfs.h> header file. */
+/* #undef HAVE_SYS_STATVFS_H */
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the <sys/vfs.h> header file. */
+/* #undef HAVE_SYS_VFS_H */
+
+/* "Have tr1" */
+/* #undef HAVE_TR1_UNORDERED */
+
+/* Define to 1 if you have the <unistd.h> header file. */
+/* #undef HAVE_UNISTD_H */
+
+/* Use multiple threads for indexing */
+#define IDX_THREADS 1
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+   */
+#define LT_OBJDIR ".libs/"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT ""
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "Recoll"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "Recoll 1.22.0"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "recoll"
+
+/* Define to the home page for this package. */
+#define PACKAGE_URL ""
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "1.22.0"
+
+/* putenv parameter is const */
+/* #undef PUTENV_ARG_CONST */
+
+/* iconv parameter 2 is const char** */
+#define RCL_ICONV_INBUF_CONST 1
+
+/* Real time monitoring option */
+#undef RCL_MONITOR
+
+/* Split camelCase words */
+/* #undef RCL_SPLIT_CAMELCASE */
+
+/* Compile the aspell interface */
+/* #undef RCL_USE_ASPELL */
+
+/* Compile the fam interface */
+/* #undef RCL_USE_FAM */
+
+/* Compile the inotify interface */
+#define RCL_USE_INOTIFY 1
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Use posix_spawn() */
+/* #undef USE_POSIX_SPAWN */
+
+/* Enable using the system's 'file' command to id mime if we fail internally
+   */
+/* #undef USE_SYSTEM_FILE_COMMAND */
+
+/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
+   significant byte first (like Motorola and SPARC, unlike Intel). */
+#if defined AC_APPLE_UNIVERSAL_BUILD
+# if defined __BIG_ENDIAN__
+#  define WORDS_BIGENDIAN 1
+# endif
+#else
+# ifndef WORDS_BIGENDIAN
+/* #  undef WORDS_BIGENDIAN */
+# endif
+#endif
+
+/* Define to 1 if the X Window System is missing or not being used. */
+/* #undef X_DISPLAY_MISSING */
+
+/* Enable large inode numbers on Mac OS X 10.5.  */
+#ifndef _DARWIN_USE_64_BIT_INODE
+# define _DARWIN_USE_64_BIT_INODE 1
+#endif
+
+/* Number of bits in a file offset, on hosts where this is settable. */
+/* #undef _FILE_OFFSET_BITS */
+
+/* Define for large files, on AIX-style hosts. */
+/* #undef _LARGE_FILES */
+
+#define DISABLE_WEB_INDEXER
+
+#include "conf_post.h"
+#endif // already included
--- a/src/common/beaglequeuecache.cpp
+++ b/src/common/beaglequeuecache.cpp
@ -35,7 +35,7 @@ BeagleQueueCache::BeagleQueueCache(RclConfig *cnf)
        ccdir = "webcache";
    ccdir = path_tildexpand(ccdir);
    // If not an absolute path, compute relative to config dir
-    if (ccdir.at(0) != '/')
+    if (!path_isabsolute(ccdir))
        ccdir = path_cat(cnf->getConfDir(), ccdir);

    int maxmbs = 40;
--- a/src/common/conf_post.h
+++ b/src/common/conf_post.h
@ -26,3 +26,42 @@
 #  define STD_SHARED_PTR    RefCntr
 #endif

+#ifdef _WIN32
+#include "safewindows.h"
+typedef int pid_t;
+inline int readlink(const char *cp, void *buf, int cnt) {
+	return -1;
+}
+#define HAVE_STRUCT_TIMESPEC
+#define strdup _strdup
+#define timegm _mkgmtime
+#ifdef _MSC_VER
+// gmtime is supposedly thread-safe on windows
+#define gmtime_r(A, B) gmtime(A)
+#define localtime_r(A,B) localtime(A)
+#define PATH_MAX MAX_PATH
+#define MAXPATHLEN PATH_MAX
+typedef int mode_t;
+#endif
+
+typedef DWORD32 u_int32_t;
+typedef DWORD64 u_int64_t;
+typedef unsigned __int8 u_int8_t;
+typedef int ssize_t;
+#define strncasecmp _strnicmp
+#define strcasecmp _stricmp
+#define ftruncate _chsize_s
+#define chdir _chdir
+
+#define R_OK 4
+#define W_OK 2
+#define X_OK 4
+#define RECOLL_DATADIR "C:\\recoll\\"
+#define S_ISLNK(X) false
+#define lstat stat
+#define fseeko _fseeki64 
+#define ftello (off_t)_ftelli64
+#define timegm _mkgmtime
+#endif
+
+
--- a/src/common/rclconfig.cpp
+++ b/src/common/rclconfig.cpp
@ -19,11 +19,13 @@

 #include <stdio.h>
 #include <errno.h>
+#ifndef _WIN32
 #include <langinfo.h>
+#include <sys/param.h>
+#endif
 #include <limits.h>
 #include "safesysstat.h"
 #include "safeunistd.h"
-#include <sys/param.h>
 #ifdef __FreeBSD__
 #include <osreldate.h>
 #endif
@ -45,6 +47,7 @@
 #include "readfile.h"
 #include "fstreewalk.h"
 #include "cpuconf.h"
+#include "execmd.h"

 using namespace std;

@ -120,7 +123,8 @@ void RclConfig::zeroMe() {

 bool RclConfig::isDefaultConfig() const
 {
-    string defaultconf = path_cat(path_canon(path_home()), ".recoll/");
+    string defaultconf = path_cat(path_homedata(),
+                                  path_defaultrecollconfsubdir());
    string specifiedconf = path_canon(m_confdir);
    path_catslash(specifiedconf);
    return !defaultconf.compare(specifiedconf);
@ -146,14 +150,7 @@ RclConfig::RclConfig(const string *argcnf)
    }

    // Compute our data dir name, typically /usr/local/share/recoll
-    const char *cdatadir = getenv("RECOLL_DATADIR");
-    if (cdatadir == 0) {
-	// If not in environment, use the compiled-in constant. 
-	m_datadir = RECOLL_DATADIR;
-    } else {
-	m_datadir = cdatadir;
-    }
-
+    m_datadir = path_sharedatadir();
    // We only do the automatic configuration creation thing for the default
    // config dir, not if it was specified through -c or RECOLL_CONFDIR
    bool autoconfdir = false;
@ -172,7 +169,7 @@ RclConfig::RclConfig(const string *argcnf)
 	    m_confdir = path_canon(cp);
 	} else {
 	    autoconfdir = true;
-	    m_confdir = path_cat(path_home(), ".recoll/");
+	    m_confdir = path_cat(path_homedata(), path_defaultrecollconfsubdir());
 	}
    }

@ -200,6 +197,7 @@ RclConfig::RclConfig(const string *argcnf)
    // is called from the main thread at once, by constructing a config
    // from recollinit
    if (o_localecharset.empty()) {
+#ifndef _WIN32
 	const char *cp;
 	cp = nl_langinfo(CODESET);
 	// We don't keep US-ASCII. It's better to use a superset
@ -217,6 +215,9 @@ RclConfig::RclConfig(const string *argcnf)
 	    // Use cp1252 instead of iso-8859-1, it's a superset.
 	    o_localecharset = string(cstr_cp1252);
 	}
+#else
+        o_localecharset = "UTF-8";
+#endif
 	LOGDEB1(("RclConfig::getDefCharset: localecharset [%s]\n",
 		 o_localecharset.c_str()));
    }
@ -635,7 +636,7 @@ bool RclConfig::inStopSuffixes(const string& fni)
 	     it != stoplist.end(); it++) {
 	    STOPSUFFIXES->insert(SfString(stringtolower(*it)));
 	    if (m_maxsufflen < it->length())
-		m_maxsufflen = it->length();
+		m_maxsufflen = int(it->length());
 	}
    }

@ -1154,7 +1155,7 @@ string RclConfig::getConfdirPath(const char *varname, const char *dflt) const
    } else {
 	result = path_tildexpand(result);
 	// If not an absolute path, compute relative to config dir
-	if (result.at(0) != '/') {
+	if (!path_isabsolute(result)) {
 	    result = path_cat(getConfDir(), result);
 	}
    }
@ -1212,7 +1213,7 @@ void RclConfig::urlrewrite(const string& dbdir, string& url) const
 	    // This call always succeeds because the key comes from getNames()
 	    if (m_ptrans->get(*it, npath, dbdir)) { 
 		path = path.replace(0, it->size(), npath);
-		url = "file://" + path;
+		url = path_pathtofileurl(path);
 	    }
 	    break;
 	}
@ -1305,45 +1306,45 @@ vector<string> RclConfig::getDaemSkippedPaths() const
 }


-// Look up an executable filter.  We look in $RECOLL_FILTERSDIR,
-// filtersdir in config file, then let the system use the PATH
+// Look up an executable filter.  We add $RECOLL_FILTERSDIR,
+// and filtersdir from the config file to the PATH, then use execmd::which()
 string RclConfig::findFilter(const string &icmd) const
 {
    // If the path is absolute, this is it
-    if (icmd[0] == '/')
+    if (path_isabsolute(icmd))
 	return icmd;

-    string cmd;
-    const char *cp;
+    const char *cp = getenv("PATH");
+    if (!cp) //??
+        cp = "";
+    string PATH(cp);

-    // Filters dir from environment ?
+    // For historical reasons: check in personal config directory
+    PATH = getConfDir() + path_PATHsep() + PATH;
+
+    string temp;
+    // Prepend $datadir/filters
+    temp = path_cat(m_datadir, "filters");
+    PATH = temp + path_PATHsep() + PATH;
+
+    // Prepend possible configuration parameter?
+    if (getConfParam(string("filtersdir"), temp)) {
+        temp = path_tildexpand(temp);
+        PATH = temp + path_PATHsep() + PATH;
+    }
+
+    // Prepend possible environment variable
    if ((cp = getenv("RECOLL_FILTERSDIR"))) {
-	cmd = path_cat(cp, icmd);
-	if (access(cmd.c_str(), X_OK) == 0)
-	    return cmd;
-    } 
-    // Filters dir as configuration parameter?
-    if (getConfParam(string("filtersdir"), cmd)) {
-	cmd = path_cat(cmd, icmd);
-	if (access(cmd.c_str(), X_OK) == 0)
-	    return cmd;
+        PATH = string(cp) + path_PATHsep() + PATH;
    } 

-    // Filters dir as datadir subdir. Actually the standard case, but
-    // this is normally the same value found in config file (previous step)
-    cmd = path_cat(m_datadir, "filters");
-    cmd = path_cat(cmd, icmd);
-    if (access(cmd.c_str(), X_OK) == 0)
-	return cmd;
-
-    // Last resort for historical reasons: check in personal config
-    // directory
-    cmd = path_cat(getConfDir(), icmd);
-    if (access(cmd.c_str(), X_OK) == 0)
-	return cmd;
-
-    // Let the shell try to find it...
-    return icmd;
+    string cmd;
+    if (ExecCmd::which(icmd, cmd, PATH.c_str())) {
+        return cmd;
+    } else {
+        // Let the shell try to find it...
+        return icmd;
+    }
 }

 /** 
--- a/src/common/rclinit.cpp
+++ b/src/common/rclinit.cpp
@ -17,6 +17,9 @@
 #include "autoconfig.h"

 #include <stdio.h>
+#ifdef _WIN32
+#include "safewindows.h"
+#endif
 #include <signal.h>
 #include <locale.h>
 #include <pthread.h>
@ -33,8 +36,6 @@
 #include "smallut.h"
 #include "execmd.h"

-static const int catchedSigs[] = {SIGINT, SIGQUIT, SIGTERM, SIGUSR1, SIGUSR2};
-
 static pthread_t mainthread_id;

 static void siglogreopen(int)
@ -43,23 +44,17 @@ static void siglogreopen(int)
 	DebugLog::reopen();
 }

-RclConfig *recollinit(RclInitFlags flags, 
-		      void (*cleanup)(void), void (*sigcleanup)(int), 
-		      string &reason, const string *argcnf)
+#ifndef _WIN32
+// We would like to block SIGCHLD globally, but we can't because
+// QT uses it. Have to block it inside execmd.cpp
+static const int catchedSigs[] = {SIGINT, SIGQUIT, SIGTERM, SIGUSR1, SIGUSR2};
+void initAsyncSigs(void (*sigcleanup)(int))
 {
-    if (cleanup)
-	atexit(cleanup);
-
    // We ignore SIGPIPE always. All pieces of code which can write to a pipe
    // must check write() return values.
+#ifndef _WIN32
    signal(SIGPIPE, SIG_IGN);
-    
-    // Make sure the locale is set. This is only for converting file names 
-    // to utf8 for indexing.
-    setlocale(LC_CTYPE, "");
-
-    // We would like to block SIGCHLD globally, but we can't because
-    // QT uses it. Have to block it inside execmd.cpp
+#endif

    // Install app signal handler
    if (sigcleanup) {
@ -75,11 +70,91 @@ RclConfig *recollinit(RclInitFlags flags,
 	    }
    }

+    // Install log rotate sig handler
+    {
+	struct sigaction action;
+	action.sa_handler = siglogreopen;
+	action.sa_flags = 0;
+	sigemptyset(&action.sa_mask);
+	if (signal(SIGHUP, SIG_IGN) != SIG_IGN) {
+	    if (sigaction(SIGHUP, &action, 0) < 0) {
+		perror("Sigaction failed");
+	    }
+	}
+    }
+}
+#else
+
+// Windows signals etc.
+//
+// ^C can be caught by the signal() emulation, but not ^Break
+// apparently, which is why we use the native approach too
+//
+// When a keyboard interrupt occurs, windows creates a thread inside
+// the process and calls the handler. The process exits when the
+// handler returns or after at most 10S
+//
+// In practise, only recollindex sets sigcleanup(), and the routine
+// just sets a global termination flag. So we just call it and sleep,
+// hoping that cleanup does not take more than what Windows will let
+// us live.
+
+static void (*l_sigcleanup)(int);
+
+static BOOL WINAPI CtrlHandler(DWORD fdwCtrlType)
+{
+    if (l_sigcleanup == 0)
+        return FALSE;
+
+    switch(fdwCtrlType) { 
+    case CTRL_C_EVENT: 
+    case CTRL_CLOSE_EVENT: 
+    case CTRL_BREAK_EVENT: 
+    case CTRL_LOGOFF_EVENT: 
+    case CTRL_SHUTDOWN_EVENT:
+        l_sigcleanup(SIGINT);
+        Sleep(10000);
+        return TRUE;
+    default: 
+        return FALSE; 
+    } 
+} 
+ 
+static const int catchedSigs[] = {SIGINT, SIGTERM};
+void initAsyncSigs(void (*sigcleanup)(int))
+{
+    // Install app signal handler
+    if (sigcleanup) {
+        l_sigcleanup = sigcleanup;
+	for (unsigned int i = 0; i < sizeof(catchedSigs) / sizeof(int); i++) {
+	    if (signal(catchedSigs[i], SIG_IGN) != SIG_IGN) {
+		signal(catchedSigs[i], sigcleanup);
+	    }
+        }
+    }
+    SetConsoleCtrlHandler((PHANDLER_ROUTINE)CtrlHandler, TRUE);
+}
+
+#endif
+
+RclConfig *recollinit(RclInitFlags flags, 
+		      void (*cleanup)(void), void (*sigcleanup)(int), 
+		      string &reason, const string *argcnf)
+{
+    if (cleanup)
+	atexit(cleanup);
+
+    // Make sure the locale is set. This is only for converting file names 
+    // to utf8 for indexing.
+    setlocale(LC_CTYPE, "");
+
    DebugLog::getdbl()->setloglevel(DEBDEB1);
    DebugLog::setfilename("stderr");
    if (getenv("RECOLL_LOGDATE"))
        DebugLog::getdbl()->logdate(1);

+    initAsyncSigs(sigcleanup);
+    
    RclConfig *config = new RclConfig(argcnf);
    if (!config || !config->ok()) {
 	reason = "Configuration could not be built:\n";
@ -105,7 +180,7 @@ RclConfig *recollinit(RclInitFlags flags,
    if (!logfilename.empty()) {
 	logfilename = path_tildexpand(logfilename);
 	// If not an absolute path or , compute relative to config dir
-	if (logfilename.at(0) != '/' && 
+	if (!path_isabsolute(logfilename) && 
 	    !DebugLog::DebugLog::isspecialname(logfilename.c_str())) {
 	    logfilename = path_cat(config->getConfDir(), logfilename);
 	}
@ -115,18 +190,6 @@ RclConfig *recollinit(RclInitFlags flags,
 	int lev = atoi(loglevel.c_str());
 	DebugLog::getdbl()->setloglevel(lev);
    }
-    // Install log rotate sig handler
-    {
-	struct sigaction action;
-	action.sa_handler = siglogreopen;
-	action.sa_flags = 0;
-	sigemptyset(&action.sa_mask);
-	if (signal(SIGHUP, SIG_IGN) != SIG_IGN) {
-	    if (sigaction(SIGHUP, &action, 0) < 0) {
-		perror("Sigaction failed");
-	    }
-	}
-    }

    // Make sure the locale charset is initialized (so that multiple
    // threads don't try to do it at once).
@ -139,14 +202,20 @@ RclConfig *recollinit(RclInitFlags flags,
    // Init smallut and pathut static values
    pathut_init_mt();
    smallut_init_mt();
-
+    // Init execmd.h static PATH and PATHELT splitting
+    {string bogus;
+        ExecCmd::which("nosuchcmd", bogus);
+    }
+    
    // Init Unac translation exceptions
    string unacex;
    if (config->getConfParam("unac_except_trans", unacex) && !unacex.empty()) 
 	unac_set_except_translations(unacex.c_str());

 #ifndef IDX_THREADS
+#ifndef _WIN32
    ExecCmd::useVfork(true);
+#endif
 #else
    // Keep threads init behind log init, but make sure it's done before
    // we do the vfork choice ! The latter is not used any more actually, 
@ -156,11 +225,15 @@ RclConfig *recollinit(RclInitFlags flags,
    bool novfork;
    config->getConfParam("novfork", &novfork);
    if (novfork) {
+#ifndef _WIN32
 	LOGDEB0(("rclinit: will use fork() for starting commands\n"));
        ExecCmd::useVfork(false);
+#endif
    } else {
+#ifndef _WIN32
 	LOGDEB0(("rclinit: will use vfork() for starting commands\n"));
 	ExecCmd::useVfork(true);
+#endif
    }
 #endif

@ -179,10 +252,11 @@ RclConfig *recollinit(RclInitFlags flags,
    return config;
 }

-// Signals are handled by the main thread. All others should call this routine
-// to block possible signals
+// Signals are handled by the main thread. All others should call this
+// routine to block possible signals
 void recoll_threadinit()
 {
+#ifndef _WIN32
    sigset_t sset;
    sigemptyset(&sset);

@ -190,6 +264,14 @@ void recoll_threadinit()
 	sigaddset(&sset, catchedSigs[i]);
    sigaddset(&sset, SIGHUP);
    pthread_sigmask(SIG_BLOCK, &sset, 0);
+#else
+    // Not sure that this is needed at all or correct under windows.
+    for (unsigned int i = 0; i < sizeof(catchedSigs) / sizeof(int); i++) {
+        if (signal(catchedSigs[i], SIG_IGN) != SIG_IGN) {
+            signal(catchedSigs[i], SIG_IGN);
+        }
+    }
+#endif
 }

 bool recoll_ismainthread()
--- a/src/common/syngroups.cpp
+++ b/src/common/syngroups.cpp
@ -110,7 +110,7 @@ bool SynGroups::setfile(const string& fn)
 	lnum++;

        {
-            int ll = strlen(cline);
+            size_t ll = strlen(cline);
            while (ll > 0 && (cline[ll-1] == '\n' || cline[ll-1] == '\r')) {
                cline[ll-1] = 0;
                ll--;
--- a/src/common/textsplit.cpp
+++ b/src/common/textsplit.cpp
@ -142,6 +142,8 @@ static inline int whatcc(unsigned int c)
 	} else {
 	    vector<unsigned int>::iterator it = 
 		lower_bound(vpuncblocks.begin(), vpuncblocks.end(), c);
+		if (it == vpuncblocks.end())
+			return LETTER;
 	    if (c == *it)
 		return SPACE;
 	    if ((it - vpuncblocks.begin()) % 2 == 1) {
@ -217,11 +219,11 @@ bool          TextSplit::o_deHyphenate = false;
 // Final term checkpoint: do some checking (the kind which is simpler
 // to do here than in the main loop), then send term to our client.
 inline bool TextSplit::emitterm(bool isspan, string &w, int pos, 
-				int btstart, int btend)
+				size_t btstart, size_t btend)
 {
    LOGDEB2(("TextSplit::emitterm: [%s] pos %d\n", w.c_str(), pos));

-    unsigned int l = w.length();
+    int l = int(w.length());

 #ifdef TEXTSPLIT_STATS
    // Update word length statistics. Do this before we filter out
@ -230,7 +232,7 @@ inline bool TextSplit::emitterm(bool isspan, string &w, int pos,
 	m_stats.newsamp(m_wordChars);
 #endif

-    if (l > 0 && l < (unsigned)m_maxWordLength) {
+    if (l > 0 && l < m_maxWordLength) {
 	// 1 byte word: we index single ascii letters and digits, but
 	// nothing else. We might want to turn this into a test for a
 	// single utf8 character instead ?
@ -245,9 +247,9 @@ inline bool TextSplit::emitterm(bool isspan, string &w, int pos,
 	    }
 	}
 	if (pos != m_prevpos || l != m_prevlen) {
-	    bool ret = takeword(w, pos, btstart, btend);
+	    bool ret = takeword(w, pos, int(btstart), int(btend));
 	    m_prevpos = pos;
-	    m_prevlen = w.length();
+	    m_prevlen = int(w.length());
 	    return ret;
 	}
 	LOGDEB2(("TextSplit::emitterm:dup: [%s] pos %d\n", w.c_str(), pos));
@ -293,7 +295,7 @@ bool TextSplit::span_is_acronym(string *acronym)

 // Generate terms from span. Have to take into account the
 // flags: ONLYSPANS, NOSPANS, noNumbers
-bool TextSplit::words_from_span(int bp)
+bool TextSplit::words_from_span(size_t bp)
 {
 #if 0
    cerr << "Span: [" << m_span << "] " << " w_i_s size: " << 
@ -305,10 +307,10 @@ bool TextSplit::words_from_span(int bp)
    }
    cerr << endl;
 #endif
-    unsigned int spanwords = m_words_in_span.size();
+    int spanwords = int(m_words_in_span.size());
    int pos = m_spanpos;
    // Byte position of the span start
-    int spboffs = bp - m_span.size();
+    size_t spboffs = bp - m_span.size();

    if (o_deHyphenate && spanwords == 2 && 
 	m_span[m_words_in_span[0].second] == '-') {
@ -322,13 +324,13 @@ bool TextSplit::words_from_span(int bp)
 		     m_spanpos, spboffs, spboffs + m_words_in_span[1].second);
    }

-    for (unsigned int i = 0; 
+    for (int i = 0; 
         i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords); 
         i++, pos++) {

        int deb = m_words_in_span[i].first;

-        for (unsigned int j = ((m_flags&TXTS_ONLYSPANS) ? spanwords-1 : i);
+        for (int j = ((m_flags&TXTS_ONLYSPANS) ? spanwords-1 : i);
             j < ((m_flags&TXTS_NOSPANS) ? i+1 : spanwords);
             j++) {

@ -362,11 +364,11 @@ bool TextSplit::words_from_span(int bp)
 * @param spanerase Set if the current span is at its end. Process it.
 * @param bp        The current BYTE position in the stream
 */
-inline bool TextSplit::doemit(bool spanerase, int bp)
+inline bool TextSplit::doemit(bool spanerase, size_t bp)
 {
    LOGDEB2(("TextSplit::doemit: sper %d bp %d spp %d spanwords %u wS %d wL %d "
            "inn %d span [%s]\n",
-            spanerase, bp, m_spanpos, m_words_in_span.size(), 
+             spanerase, int(bp), m_spanpos, m_words_in_span.size(), 
            m_wordStart, m_wordLen, m_inNumber, m_span.c_str()));

    if (m_wordLen) {
@ -404,8 +406,8 @@ inline bool TextSplit::doemit(bool spanerase, int bp)
 	    case '\'':
 		m_span.resize(m_span.length()-1);
                if (m_words_in_span.size() &&
-                    m_words_in_span.back().second > m_span.size())
-                    m_words_in_span.back().second = m_span.size();
+                    m_words_in_span.back().second > int(m_span.size()))
+                    m_words_in_span.back().second = int(m_span.size());
 		if (--bp < 0) 
 		    bp = 0;
 		break;
@ -422,7 +424,7 @@ inline bool TextSplit::doemit(bool spanerase, int bp)

    } else {
    
-	m_wordStart = m_span.length();
+	m_wordStart = int(m_span.length());

    }

@ -830,16 +832,16 @@ bool TextSplit::cjk_to_words(Utf8Iter *itp, unsigned int *cp)
 	}

 	// Take note of byte offset for this character.
-	boffs[nchars-1] = it.getBpos();
+	boffs[nchars-1] = int(it.getBpos());

 	// Output all new ngrams: they begin at each existing position
 	// and end after the new character. onlyspans->only output
 	// maximum words, nospans=> single chars
 	if (!(m_flags & TXTS_ONLYSPANS) || nchars == o_CJKNgramLen) {
-	    unsigned int btend = it.getBpos() + it.getBlen();
-	    unsigned int loopbeg = (m_flags & TXTS_NOSPANS) ? nchars-1 : 0;
-	    unsigned int loopend = (m_flags & TXTS_ONLYSPANS) ? 1 : nchars;
-	    for (unsigned int i = loopbeg; i < loopend; i++) {
+	    int btend = int(it.getBpos() + it.getBlen());
+	    int loopbeg = (m_flags & TXTS_NOSPANS) ? nchars-1 : 0;
+	    int loopend = (m_flags & TXTS_ONLYSPANS) ? 1 : nchars;
+	    for (int i = loopbeg; i < loopend; i++) {
 		if (!takeword(it.buffer().substr(boffs[i], 
 						       btend-boffs[i]),
 				m_wordpos - (nchars-i-1), boffs[i], btend)) {
@ -860,7 +862,7 @@ bool TextSplit::cjk_to_words(Utf8Iter *itp, unsigned int *cp)
    // If onlyspans is set, there may be things to flush in the buffer
    // first
    if ((m_flags & TXTS_ONLYSPANS) && nchars > 0 && nchars != o_CJKNgramLen)  {
-	unsigned int btend = it.getBpos(); // Current char is out
+	int btend = int(it.getBpos()); // Current char is out
 	if (!takeword(it.buffer().substr(boffs[0], btend-boffs[0]),
 			    m_wordpos - nchars,
 			    boffs[0], btend)) {
--- a/src/common/textsplit.h
+++ b/src/common/textsplit.h
@ -184,7 +184,7 @@ private:
    // Current span. Might be jf.dockes@wanadoo.f
    std::string        m_span; 

-    std::vector <std::pair<unsigned int, unsigned int> > m_words_in_span;
+    std::vector <std::pair<int, int> > m_words_in_span;

    // Current word: no punctuation at all in there. Byte offset
    // relative to the current span and byte length
@ -201,7 +201,7 @@ private:
    // It may happen that our cleanup would result in emitting the
    // same term twice. We try to avoid this
    int           m_prevpos;
-    unsigned int  m_prevlen;
+    int           m_prevlen;

 #ifdef TEXTSPLIT_STATS
    // Stats counters. These are processed in TextSplit rather than by a 
@ -215,11 +215,11 @@ private:
    // This processes cjk text:
    bool cjk_to_words(Utf8Iter *it, unsigned int *cp);

-    bool emitterm(bool isspan, std::string &term, int pos, int bs, int be);
-    bool doemit(bool spanerase, int bp);
+    bool emitterm(bool isspan, std::string &term, int pos, size_t bs,size_t be);
+    bool doemit(bool spanerase, size_t bp);
    void discardspan();
    bool span_is_acronym(std::string *acronym);
-    bool words_from_span(int bp);
+    bool words_from_span(size_t bp);
 };

 #endif /* _TEXTSPLIT_H_INCLUDED_ */
--- a/src/configure.ac
+++ b/src/configure.ac
@ -408,13 +408,13 @@ AC_ARG_ENABLE(recollq,
   need for Qt). This is done by default if --disable-qtgui is set but this
   option enables forcing it.]),
        enableRECOLLQ=$enableval, enableRECOLLQ="no")
-
-if text X"$enableRECOLLQ" != X ; then
+if test X"$enableRECOLLQ" != X ; then
    AM_CONDITIONAL(MAKECMDLINE, [test X$enableRECOLLQ = Xyes])
 else
    AM_CONDITIONAL(MAKECMDLINE, [test X$enableQT = Xno])
 fi

+
 if test X$enableQT = Xyes ; then

  if test X$QTDIR != X ; then
@ -586,6 +586,5 @@ AC_SUBST(RCLLIBVERSION)
 AC_CONFIG_FILES(Makefile)
 AC_CONFIG_FILES(common/rclversion.h)
 AC_CONFIG_FILES(python/recoll/setup.py)
-AC_CONFIG_FILES(sampleconf/recoll.conf)

 AC_OUTPUT
--- a/src/filters/rcldoc.py
+++ b/src/filters/rcldoc.py
@ -0,0 +1,165 @@
+#!/usr/bin/env python
+
+import rclexecm
+import rclexec1
+import re
+import sys
+import os
+
+# Processing the output from antiword: create html header and tail, process
+# continuation lines escape, HTML special characters, accumulate the data.
+class WordProcessData:
+    def __init__(self, em):
+        self.em = em
+        self.out = ""
+        self.cont = ""
+        self.gotdata = False
+        # Line with continued word (ending in -)
+        # we strip the - which is not nice for actually hyphenated word.
+        # What to do ?
+        self.patcont = re.compile('''[\w][-]$''')
+        # Pattern for breaking continuation at last word start
+        self.patws = re.compile('''([\s])([\w]+)(-)$''')
+
+    def takeLine(self, line):
+        if not self.gotdata:
+            if line == "":
+                return
+            self.out = '<html><head><title></title>' + \
+                       '<meta http-equiv="Content-Type"' + \
+                       'content="text/html;charset=UTF-8">' + \
+                       '</head><body><p>'
+            self.gotdata = True
+
+        if self.cont:
+            line = self.cont + line
+            self.cont = ""
+
+        if line == "\f":
+            self.out += "</p><hr><p>"
+            return
+
+        if self.patcont.search(line):
+            # Break at last whitespace
+            match = self.patws.search(line)
+            if match:
+                self.cont = line[match.start(2):match.end(2)]
+                line = line[0:match.start(1)]
+            else:
+                self.cont = line
+                line = ""
+
+        if line:
+            self.out += self.em.htmlescape(line) + "<br>"
+        else:
+            self.out += "<br>"
+
+    def wrapData(self):
+        if self.gotdata:
+            self.out += "</p></body></html>"
+        self.em.setmimetype("text/html")
+        return self.out
+
+# Null data accumulator. We use this when antiword has fail, and the
+# data actually comes from rclrtf, rcltext or vwWare, which all
+# output HTML
+class WordPassData:
+    def __init__(self, em):
+        self.out = ""
+        self.em = em
+
+    def takeLine(self, line):
+        self.out += line
+
+    def wrapData(self):
+        self.em.setmimetype("text/html")
+        return self.out
+        
+
+# Filter for msword docs. Try antiword, and if this fails, check for
+# an rtf or text document (.doc are sometimes like this...). Also try
+# vwWare if the doc is actually a word doc
+class WordFilter:
+    def __init__(self, em, td):
+        self.em = em
+        self.ntry = 0
+        self.execdir = td
+
+    def reset(self):
+        self.ntry = 0
+            
+    def hasControlChars(self, data):
+        for c in data:
+            if c < chr(32) and c != '\n' and c != '\t' and \
+                   c !=  '\f' and c != '\r':
+                return True
+        return False
+
+    def mimetype(self, fn):
+        rtfprolog ="{\\rtf1"
+        docprolog = b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1"
+        try:
+            f = open(fn, "rb")
+        except:
+            return ""
+        data = f.read(100)
+        if data[0:6] == rtfprolog:
+            return "text/rtf"
+        elif data[0:8] == docprolog:
+            return "application/msword"
+        elif self.hasControlChars(data):
+            return "application/octet-stream"
+        else:
+            return "text/plain"
+
+    def getCmd(self, fn):
+        '''Return command to execute, and postprocessor, according to
+        our state: first try antiword, then others depending on mime
+        identification. Do 2 tries at most'''
+        if self.ntry == 0:
+            self.ntry = 1
+            cmd = rclexecm.which("antiword")
+            if cmd:
+                return ([cmd, "-t", "-i", "1", "-m", "UTF-8"],
+                        WordProcessData(self.em))
+            else:
+                return ([],None)
+        elif self.ntry == 1:
+            self.ntry = 2
+            # antiword failed. Check for an rtf file, or text and
+            # process accordingly. It the doc is actually msword, try
+            # wvWare.
+            mt = self.mimetype(fn)
+            self.em.rclog("rcldoc.py: actual MIME type %s" % mt)
+            if mt == "text/plain":
+                return ([python, os.path.join(self.execdir, "rcltext.py")],
+                       WordPassData(self.em))
+            elif mt == "text/rtf":
+                cmd = ["python", os.path.join(self.execdir, "rclrtf.py"),
+                       "-s"]
+                self.em.rclog("rcldoc.py: returning cmd %s" % cmd)
+                return (cmd, WordPassData(self.em))
+            elif mt == "application/msword":
+                cmd = rclexecm.which("wvWare")
+                if cmd:
+                    return ([cmd, "--nographics", "--charset=utf-8"],
+                            WordPassData(self.em))
+                else:
+                    return ([],None)    
+            else:
+                return ([],None)
+        else:
+            return ([],None)
+
+if __name__ == '__main__':
+    # Remember where we execute filters from, in case we need to exec another
+    execdir = os.path.dirname(sys.argv[0])
+    # Check that we have antiword. We could fallback to wvWare, but
+    # this is not what the old filter did.
+    if not rclexecm.which("antiword"):
+        print("RECFILTERROR HELPERNOTFOUND antiword")
+        sys.exit(1)
+    proto = rclexecm.RclExecM()
+    filter = WordFilter(proto, execdir)
+    extract = rclexec1.Executor(proto, filter)
+    rclexecm.main(proto, extract)
--- a/src/filters/rclexec1.py
+++ b/src/filters/rclexec1.py
@ -0,0 +1,112 @@
+#################################
+# Copyright (C) 2014 J.F.Dockes
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the
+#   Free Software Foundation, Inc.,
+#   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+########################################################
+
+# Common code for replacing the old shell scripts with Python execm
+# ones: this implements the basic functions for a filter which
+# executes a command to translate a simple file (like rclword with
+# antiword).
+#
+# This was motivated by the Windows port: to replace shell and Unix
+# utility (awk , etc usage). We can't just execute python scripts,
+# this would be to slow. So this helps implementing a permanent script
+# to repeatedly execute single commands.
+
+import subprocess
+import rclexecm
+
+# This class has the code to execute the subprocess and call a
+# data-specific post-processor. Command and processor are supplied by
+# the object which we receive as a parameter, which in turn is defined
+# in the actual executable filter (e.g. rcldoc.py)
+class Executor:
+    opt_ignxval = 1
+    
+    def __init__(self, em, flt):
+        self.em = em
+        self.flt = flt
+        self.currentindex = 0
+
+    def runCmd(self, cmd, filename, postproc, opt):
+        ''' Substitute parameters and execute command, process output
+        with the specific postprocessor and return the complete text.
+        We expect cmd as a list of command name + arguments'''
+
+        try:
+            fullcmd = cmd + [filename]
+            proc = subprocess.Popen(fullcmd,
+                                    stdout = subprocess.PIPE)
+            stdout = proc.stdout
+        except subprocess.CalledProcessError as err:
+            self.em.rclog("extractone: Popen(%s) error: %s" % (fullcmd, err))
+            return (False, "")
+        except OSError as err:
+            self.em.rclog("extractone: Popen(%s) OS error: %s" % (fullcmd, err))
+            return (False, "")
+
+        for line in stdout:
+            postproc.takeLine(line.strip())
+
+        proc.wait()
+        if (opt & self.opt_ignxval) == 0 and proc.returncode:
+            self.em.rclog("extractone: [%s] returncode %d" % \
+                          (filename, proc.returncode))
+            return False, postproc.wrapData()
+        else:
+            return True, postproc.wrapData()
+
+    def extractone(self, params):
+        #self.em.rclog("extractone %s %s" % (params["filename:"], \
+        # params["mimetype:"]))
+        self.flt.reset()
+        ok = False
+        if not params.has_key("filename:"):
+            self.em.rclog("extractone: no mime or file name")
+            return (ok, "", "", rclexecm.RclExecM.eofnow)
+
+        fn = params["filename:"]
+        while True:
+            cmdseq = self.flt.getCmd(fn)
+            cmd = cmdseq[0]
+            postproc = cmdseq[1]
+            opt = cmdseq[2] if len(cmdseq) == 3 else 0
+            if cmd:
+                ok, data = self.runCmd(cmd, fn, postproc, opt)
+                if ok:
+                    break
+            else:
+                break
+        if ok:
+            return (ok, data, "", rclexecm.RclExecM.eofnext)
+        else:
+            return (ok, "", "", rclexecm.RclExecM.eofnow)
+        
+    ###### File type handler api, used by rclexecm ---------->
+    def openfile(self, params):
+        self.currentindex = 0
+        return True
+
+    def getipath(self, params):
+        return self.extractone(params)
+        
+    def getnext(self, params):
+        if self.currentindex >= 1:
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+        else:
+            ret= self.extractone(params)
+            self.currentindex += 1
+            return ret
--- a/src/filters/rclexecm.py
+++ b/src/filters/rclexecm.py
@ -1,10 +1,34 @@
-#!/usr/bin/env python
+#################################
+# Copyright (C) 2014 J.F.Dockes
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the
+#   Free Software Foundation, Inc.,
+#   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+########################################################
+## Recoll multifilter communication module and utilities
+
+from __future__ import print_function

-###########################################
-## Generic recoll multifilter communication code
 import sys
 import os
+import tempfile
+import shutil
+import getopt

+############################################
+# RclExecM implements the
+# communication protocol with the recollindex process. It calls the
+# object specific of the document type to actually get the data.
 class RclExecM:
    noteof  = 0
    eofnext = 1
@ -27,9 +51,13 @@ class RclExecM:
        else:
            self.maxmembersize = 50 * 1024
        self.maxmembersize = self.maxmembersize * 1024
+        if sys.platform == "win32":
+            import msvcrt
+            msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
+            msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)

    def rclog(self, s, doexit = 0, exitvalue = 1):
-        print >> sys.stderr, "RCLMFILT:", self.myname, ":", s
+        print("RCLMFILT: %s: %s" % (self.myname, s), file=sys.stderr)
        if doexit:
            sys.exit(exitvalue)

@ -87,29 +115,29 @@ class RclExecM:
                self.rclog("GOT UNICODE for ipath [%s]" % (ipath,))
                docdata = docdata.encode("UTF-8")

-            print "Document:", len(docdata)
+            print("Document: %d" % len(docdata))
            sys.stdout.write(docdata)

            if len(ipath):
-                print "Ipath:", len(ipath)
+                print("Ipath: %d" % len(ipath))
                sys.stdout.write(ipath)

            if len(self.mimetype):
-                print "Mimetype:", len(self.mimetype)
+                print("Mimetype: %d" % len(self.mimetype))
                sys.stdout.write(self.mimetype)

        # If we're at the end of the contents, say so
        if iseof == RclExecM.eofnow:
-            print "Eofnow: 0"
+            print("Eofnow: 0")
        elif iseof == RclExecM.eofnext:
-            print "Eofnext: 0"
+            print("Eofnext: 0")
        if iserror == RclExecM.subdocerror:
-            print "Subdocerror: 0"
+            print("Subdocerror: 0")
        elif iserror == RclExecM.fileerror:
-            print "Fileerror: 0"
+            print("Fileerror: 0")
  
        # End of message
-        print
+        print()
        sys.stdout.flush()
        #self.rclog("done writing data")

@ -168,67 +196,161 @@ class RclExecM:
            self.processmessage(processor, params)


-  
+# Helper routine to test for program accessibility
+def which(program):
+    def is_exe(fpath):
+        return os.path.exists(fpath) and os.access(fpath, os.X_OK)
+    def ext_candidates(fpath):
+        yield fpath
+        for ext in os.environ.get("PATHEXT", "").split(os.pathsep):
+            yield fpath + ext
+
+    def path_candidates():
+        yield os.path.dirname(sys.argv[0])
+        for path in os.environ["PATH"].split(os.pathsep):
+            yield path
+            
+    fpath, fname = os.path.split(program)
+    if fpath:
+        if is_exe(program):
+            return program
+    else:
+        for path in path_candidates():
+            exe_file = os.path.join(path, program)
+            for candidate in ext_candidates(exe_file):
+                if is_exe(candidate):
+                    return candidate
+    return None
+
+# Temp dir helper
+class SafeTmpDir:
+    def __init__(self, em):
+        self.em = em
+        self.toptmp = ""
+        self.tmpdir = ""
+
+    def __del__(self):
+        try:
+            if self.toptmp:
+                shutil.rmtree(self.tmpdir, True)
+                os.rmdir(self.toptmp)
+        except Exception as err:
+            self.em.rclog("delete dir failed for " + self.toptmp)
+
+    def getpath(self):
+        if not self.tmpdir:
+            envrcltmp = os.getenv('RECOLL_TMPDIR')
+            if envrcltmp:
+                self.toptmp = tempfile.mkdtemp(prefix='rcltmp', dir=envrcltmp)
+            else:
+                self.toptmp = tempfile.mkdtemp(prefix='rcltmp')
+
+            self.tmpdir = os.path.join(self.toptmp, 'rclsofftmp')
+            os.makedirs(self.tmpdir)
+
+        return self.tmpdir
+   
+
 # Common main routine for all python execm filters: either run the
 # normal protocol engine or a local loop to test without recollindex
 def main(proto, extract):
    if len(sys.argv) == 1:
        proto.mainloop(extract)
-    else:
-        # Got a file name parameter: TESTING without an execm parent
-        # Loop on all entries or get specific ipath
-        def mimetype_with_file(f):
-            cmd = 'file -i "' + f + '"'
-            fileout = os.popen(cmd).read()
-            lst = fileout.split(':')
-            mimetype = lst[len(lst)-1].strip()
-            lst = mimetype.split(';')
-            return lst[0].strip()
-        def mimetype_with_xdg(f):
-            cmd = 'xdg-mime query filetype "' + f + '"'
-            return os.popen(cmd).read().strip()
-        params = {'filename:': sys.argv[1]}
-        # Some filters (e.g. rclaudio) need/get a MIME type from the indexer
-        mimetype = mimetype_with_xdg(sys.argv[1])
-        params['mimetype:'] = mimetype
-        if not extract.openfile(params):
-            print "Open error"
-            sys.exit(1)
-        ipath = ""
-        if len(sys.argv) == 3:
-            ipath = sys.argv[2]
+        # mainloop does not return. Just in case
+        sys.exit(1)

-        if ipath != "":
-            params['ipath:'] = ipath
-            ok, data, ipath, eof = extract.getipath(params)
-            if ok:
-                print "== Found entry for ipath %s (mimetype [%s]):" % \
-                      (ipath, proto.mimetype)
-                if isinstance(data, unicode):
-                    bdata = data.encode("UTF-8")
-                else:
-                    bdata = data
+
+    # Not running the main loop: either acting as single filter (when called
+    # from other filter for example), or debugging
+    def usage():
+        print("Usage: rclexecm.py [-d] [-s] [-i ipath] [filename]",
+              file=sys.stderr)
+        sys.exit(1)
+        
+    actAsSingle = False
+    debugDumpData = False
+    ipath = ""
+
+    args = sys.argv[1:]
+    opts, args = getopt.getopt(args, "hdsi:")
+    for opt, arg in opts:
+        if opt in ['-h']:
+            usage()
+        elif opt in ['-s']:
+            actAsSingle = True
+        elif opt in ['-i']:
+            ipath = arg
+        elif opt in ['-d']:
+            debugDumpData = True
+        else:
+            print("unknown option %s\n"%opt, file=sys.stderr)
+            usage()
+
+    if len(args) != 1:
+        usage()
+        
+    def mimetype_with_file(f):
+        cmd = 'file -i "' + f + '"'
+        fileout = os.popen(cmd).read()
+        lst = fileout.split(':')
+        mimetype = lst[len(lst)-1].strip()
+        lst = mimetype.split(';')
+        return lst[0].strip()
+
+    def mimetype_with_xdg(f):
+        cmd = 'xdg-mime query filetype "' + f + '"'
+        return os.popen(cmd).read().strip()
+
+    def debprint(s):
+        if not actAsSingle:
+            print(s)
+            
+    params = {'filename:': args[0]}
+    # Some filters (e.g. rclaudio) need/get a MIME type from the indexer
+    mimetype = mimetype_with_xdg(args[0])
+    params['mimetype:'] = mimetype
+
+    if not extract.openfile(params):
+        print("Open error", file=sys.stderr)
+        sys.exit(1)
+
+    if ipath != "" or actAsSingle:
+        params['ipath:'] = ipath
+        ok, data, ipath, eof = extract.getipath(params)
+        if ok:
+            debprint("== Found entry for ipath %s (mimetype [%s]):" % \
+                  (ipath, proto.mimetype))
+            if isinstance(data, unicode):
+                bdata = data.encode("UTF-8")
+            else:
+                bdata = data
+            if debugDumpData or actAsSingle:
                sys.stdout.write(bdata)
-                print
-            else:
-                print "Got error, eof %d"%eof
+                print()
            sys.exit(0)
+        else:
+            print("Got error, eof %d"%eof, file=sys.stderr)
+            sys.exit(1)

-        ecnt = 0   
-        while 1:
-            ok, data, ipath, eof = extract.getnext(params)
-            if ok:
-                ecnt = ecnt + 1
-                print "== Entry %d ipath %s (mimetype [%s]):" % \
-                      (ecnt, ipath, proto.mimetype)
-                if isinstance(data, unicode):
-                    bdata = data.encode("UTF-8")
-                else:
-                    bdata = data
-                #sys.stdout.write(bdata)
-                print
-                if eof != RclExecM.noteof:
-                    break
+    ecnt = 0   
+    while 1:
+        ok, data, ipath, eof = extract.getnext(params)
+        if ok:
+            ecnt = ecnt + 1
+            debprint("== Entry %d ipath %s (mimetype [%s]):" % \
+                  (ecnt, ipath, proto.mimetype))
+            if isinstance(data, unicode):
+                bdata = data.encode("UTF-8")
            else:
-                print "Not ok, eof %d" % eof
-                break
+                bdata = data
+            if debugDumpData:
+                sys.stdout.write(bdata)
+                print()
+            if eof != RclExecM.noteof:
+                sys.exit(0)
+        else:
+            print("Not ok, eof %d" % eof, file=sys.stderr)
+            sys.exit(1)
+        # Not sure this makes sense, but going on looping certainly does not
+        if actAsSingle:
+            sys.exit(0)
--- a/src/filters/rclimg
+++ b/src/filters/rclimg
@ -147,6 +147,9 @@ if ($@) {
 	exit(1);
 }

+binmode(STDIN)      || die "cannot binmode STDIN";
+binmode(STDOUT)     || die "cannot binmode STDOUT";
+
 #print STDERR "RCLIMG: Starting\n";
 $| = 1;
 while (1) {
--- a/src/filters/rcllatinclass.py
+++ b/src/filters/rcllatinclass.py
@ -109,7 +109,7 @@ class European8859TextClassifier:


 if __name__ == "__main__":
-    f = open(sys.argv[1])
+    f = open(sys.argv[1], "rb")
    rawtext = f.read()
    f.close()

--- a/src/filters/rclnull
+++ b/src/filters/rclnull
@ -1,9 +0,0 @@
-#!/bin/sh
-# It may make sense in some cases to set this null filter (no output)
-# instead of using recoll_noindex or leaving the default filter in
-# case one doesn't want to install it: this will avoid endless retries
-# to reindex the affected files, as recoll will think it has succeeded
-# indexing them. Downside: the files won't be indexed when one
-# actually installs the real filter, will need a -z
-
-exit 0
--- a/src/filters/rclopxml.py
+++ b/src/filters/rclopxml.py
@ -0,0 +1,224 @@
+#!/usr/bin/env python
+# Copyright (C) 2015 J.F.Dockes
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the
+#   Free Software Foundation, Inc.,
+#   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+######################################
+from __future__ import print_function
+
+import sys
+import rclexecm
+import rclxslt
+import fnmatch
+from zipfile import ZipFile
+
+meta_stylesheet = '''<?xml version="1.0"?>
+<xsl:stylesheet 
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
+ xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:dcterms="http://purl.org/dc/terms/"
+ xmlns:dcmitype="http://purl.org/dc/dcmitype/"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+<!--  <xsl:output method="text"/> -->
+  <xsl:output omit-xml-declaration="yes"/>
+
+  <xsl:template match="cp:coreProperties">
+    <xsl:text>&#10;</xsl:text>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
+    <xsl:text>&#10;</xsl:text>
+    <xsl:apply-templates/>
+  </xsl:template>
+
+  <xsl:template match="dc:creator">
+    <meta>
+    <xsl:attribute name="name">
+      <!-- <xsl:value-of select="name()"/> pour sortir tous les meta avec 
+       le meme nom que dans le xml (si on devenait dc-natif) -->
+      <xsl:text>author</xsl:text> 
+    </xsl:attribute>
+    <xsl:attribute name="content">
+       <xsl:value-of select="."/>
+    </xsl:attribute>
+    </meta>
+    <xsl:text>&#10;</xsl:text>
+  </xsl:template>
+
+  <xsl:template match="dcterms:modified">
+    <meta>
+    <xsl:attribute name="name">
+      <xsl:text>date</xsl:text> 
+    </xsl:attribute>
+    <xsl:attribute name="content">
+       <xsl:value-of select="."/>
+    </xsl:attribute>
+    </meta>
+    <xsl:text>&#10;</xsl:text>
+  </xsl:template>
+
+  <xsl:template match="*">
+  </xsl:template>
+
+</xsl:stylesheet>
+'''
+
+word_tagmatch = 'w:p'
+word_xmlns_decls = '''xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
+xmlns:ve="http://schemas.openxmlformats.org/markup-compatibility/2006"
+xmlns:o="urn:schemas-microsoft-com:office:office"
+xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
+xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"
+xmlns:v="urn:schemas-microsoft-com:vml"
+xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
+xmlns:w10="urn:schemas-microsoft-com:office:word"
+xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
+xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml"
+'''
+word_moretemplates = ''
+
+
+xl_tagmatch = 'x:t'
+xl_xmlns_decls='''xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
+xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
+ '''
+xl_moretemplates = ''
+
+pp_tagmatch = 'a:t'
+pp_xmlns_decls = '''xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
+xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" 
+xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" 
+xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"
+'''
+# I want to suppress text output for all except a:t, don't know how to do it
+# help ! At least get rid of these:
+pp_moretemplates = '''<xsl:template match="p:attrName">
+</xsl:template>
+'''
+
+content_stylesheet = '''<?xml version="1.0"?>
+<xsl:stylesheet @XMLNS_DECLS@ >
+
+ <xsl:output omit-xml-declaration="yes"/>
+
+ <xsl:template match="/">
+  <div>
+  <xsl:apply-templates/> 
+  </div>
+</xsl:template>
+
+ <xsl:template match="@TAGMATCH@">
+  <p>
+  <xsl:value-of select="."/>
+  </p>
+ </xsl:template>
+
+@MORETEMPLATES@
+
+</xsl:stylesheet>
+'''
+
+class OXExtractor:
+    def __init__(self, em):
+        self.em = em
+        self.currentindex = 0
+
+    # Replace values inside data style sheet, depending on type of doc
+    def computestylesheet(self, nm):
+        decls = globals()[nm + '_xmlns_decls']
+        stylesheet = content_stylesheet.replace('@XMLNS_DECLS@', decls)
+        tagmatch = globals()[nm + '_tagmatch']
+        stylesheet = stylesheet.replace('@TAGMATCH@', tagmatch)
+        moretmpl = globals()[nm + '_moretemplates']
+        stylesheet = stylesheet.replace('@MORETEMPLATES@', moretmpl)
+
+        return stylesheet
+    
+    def extractone(self, params):
+        if not params.has_key("filename:"):
+            self.em.rclog("extractone: no mime or file name")
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+        fn = params["filename:"]
+
+        try:
+            zip = ZipFile(fn)
+        except Exception as err:
+            self.em.rclog("unzip failed: " + str(err))
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+
+        docdata = '<html><head>'
+
+        try:
+            metadata = zip.read("docProps/core.xml")
+            if metadata:
+                res = rclxslt.apply_sheet_data(meta_stylesheet, metadata)
+                docdata += res
+        except:
+            # To be checked. I'm under the impression that I get this when
+            # nothing matches?
+            # self.em.rclog("no/bad metadata in %s" % fn)
+            pass
+
+        docdata += '</head><body>'
+
+        try:
+            content= zip.read('word/document.xml')
+            stl = self.computestylesheet('word')
+            docdata += rclxslt.apply_sheet_data(stl, content)
+        except:
+            pass
+
+        try:
+            content = zip.read('xl/sharedStrings.xml')
+            stl = self.computestylesheet('xl')
+            docdata += rclxslt.apply_sheet_data(stl, content)
+        except:
+            pass
+
+        try:
+            stl = self.computestylesheet('pp')
+            # Note that we'd need a numeric sort really (else we get slide1
+            # slide11 slide2)
+            for fn in sorted(zip.namelist()):
+                if fnmatch.fnmatch(fn, 'ppt/slides/slide*.xml'):
+                    content = zip.read(fn)
+                    docdata += rclxslt.apply_sheet_data(stl, content)
+        except:
+            pass
+
+        docdata += '</body></html>'
+
+        return (True, docdata, "", rclexecm.RclExecM.eofnext)
+    
+
+    ###### File type handler api, used by rclexecm ---------->
+    def openfile(self, params):
+        self.currentindex = 0
+        return True
+
+    def getipath(self, params):
+        return self.extractone(params)
+        
+    def getnext(self, params):
+        if self.currentindex >= 1:
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+        else:
+            ret= self.extractone(params)
+            self.currentindex += 1
+            return ret
+
+if __name__ == '__main__':
+    proto = rclexecm.RclExecM()
+    extract = OXExtractor(proto)
+    rclexecm.main(proto, extract)
--- a/src/filters/rclppt.py
+++ b/src/filters/rclppt.py
@ -0,0 +1,58 @@
+#!/usr/bin/env python
+
+import rclexecm
+import rclexec1
+import re
+import sys
+import os
+
+# Processing the output from unrtf
+class PPTProcessData:
+    def __init__(self, em):
+        self.em = em
+        self.out = ""
+        self.gotdata = 0
+
+    # Some versions of unrtf put out a garbled charset line.
+    # Apart from this, we pass the data untouched.
+    def takeLine(self, line):
+        if not self.gotdata:
+            self.out += '''<html><head>''' + \
+                        '''<meta http-equiv="Content-Type" ''' + \
+                        '''content="text/html;charset=UTF-8">''' + \
+                        '''</head><body><pre>'''
+            self.gotdata = True
+        self.out += self.em.htmlescape(line)
+
+    def wrapData(self):
+        return self.out + '''</pre></body></html>'''
+
+class PPTFilter:
+    def __init__(self, em):
+        self.em = em
+        self.ntry = 0
+
+    def reset(self):
+        self.ntry = 0
+        pass
+            
+    def getCmd(self, fn):
+        if self.ntry:
+            return ([], None)
+        self.ntry = 1
+        cmd = rclexecm.which("ppt-dump.py")
+        if cmd:
+            # ppt-dump.py often exits 1 with valid data. Ignore exit value
+            return (["python", cmd, "--no-struct-output", "--dump-text"],
+                    PPTProcessData(self.em), rclexec1.Executor.opt_ignxval)
+        else:
+            return ([], None)
+
+if __name__ == '__main__':
+    if not rclexecm.which("ppt-dump.py"):
+        print("RECFILTERROR HELPERNOTFOUND ppt-dump.py")
+        sys.exit(1)
+    proto = rclexecm.RclExecM()
+    filter = PPTFilter(proto)
+    extract = rclexec1.Executor(proto, filter)
+    rclexecm.main(proto, extract)
--- a/src/filters/rclrtf.py
+++ b/src/filters/rclrtf.py
@ -0,0 +1,60 @@
+#!/usr/bin/env python
+
+import rclexecm
+import rclexec1
+import re
+import sys
+import os
+
+# Processing the output from unrtf
+class RTFProcessData:
+    def __init__(self, em):
+        self.em = em
+        self.out = ""
+        self.gothead = 0
+        self.patendhead = re.compile('''</head>''')
+        self.patcharset = re.compile('''^<meta http-equiv=''')
+
+    # Some versions of unrtf put out a garbled charset line.
+    # Apart from this, we pass the data untouched.
+    def takeLine(self, line):
+        if not self.gothead:
+            if self.patendhead.search(line):
+                self.out +=  '<meta http-equiv="Content-Type" ' + \
+                             'content="text/html;charset=UTF-8">' + "\n"
+                self.out += line + "\n"
+                self.gothead = 1
+            elif not self.patcharset.search(line):
+                self.out += line + "\n"
+        else:
+            self.out += line + "\n"
+
+    def wrapData(self):
+        return self.out
+
+class RTFFilter:
+    def __init__(self, em):
+        self.em = em
+        self.ntry = 0
+
+    def reset(self):
+        self.ntry = 0
+            
+    def getCmd(self, fn):
+        if self.ntry:
+            return ([], None)
+        self.ntry = 1
+        cmd = rclexecm.which("unrtf")
+        if cmd:
+            return ([cmd, "--nopict", "--html"], RTFProcessData(self.em))
+        else:
+            return ([], None)
+
+if __name__ == '__main__':
+    if not rclexecm.which("unrtf"):
+        print("RECFILTERROR HELPERNOTFOUND antiword")
+        sys.exit(1)
+    proto = rclexecm.RclExecM()
+    filter = RTFFilter(proto)
+    extract = rclexec1.Executor(proto, filter)
+    rclexecm.main(proto, extract)
--- a/src/filters/rclsoff.py
+++ b/src/filters/rclsoff.py
@ -0,0 +1,189 @@
+#!/usr/bin/env python
+# Copyright (C) 2014 J.F.Dockes
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the
+#   Free Software Foundation, Inc.,
+#   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+######################################
+
+import sys
+import rclexecm
+import rclxslt
+from zipfile import ZipFile
+
+stylesheet_meta = '''<?xml version="1.0"?>
+<xsl:stylesheet version="1.0"
+  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+  xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" 
+  xmlns:xlink="http://www.w3.org/1999/xlink" 
+  xmlns:dc="http://purl.org/dc/elements/1.1/" 
+  xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" 
+  xmlns:ooo="http://openoffice.org/2004/office"
+  exclude-result-prefixes="office xlink meta ooo dc"
+  >
+
+<xsl:output method="html" encoding="UTF-8"/>
+
+<xsl:template match="/office:document-meta">
+  <xsl:apply-templates select="office:meta/dc:description"/>
+  <xsl:apply-templates select="office:meta/dc:subject"/>
+  <xsl:apply-templates select="office:meta/dc:title"/>
+  <xsl:apply-templates select="office:meta/meta:keyword"/>
+  <xsl:apply-templates select="office:meta/dc:creator"/>
+</xsl:template>
+
+<xsl:template match="dc:title">
+<title> <xsl:value-of select="."/> </title><xsl:text>
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="dc:description">
+  <meta>
+  <xsl:attribute name="name">abstract</xsl:attribute>
+  <xsl:attribute name="content">
+     <xsl:value-of select="."/>
+  </xsl:attribute>
+  </meta><xsl:text>
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="dc:subject">
+  <meta>
+  <xsl:attribute name="name">keywords</xsl:attribute>
+  <xsl:attribute name="content">
+     <xsl:value-of select="."/>
+  </xsl:attribute>
+  </meta><xsl:text>
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="dc:creator">
+  <meta>
+  <xsl:attribute name="name">author</xsl:attribute>
+  <xsl:attribute name="content">
+     <xsl:value-of select="."/>
+  </xsl:attribute>
+  </meta><xsl:text>
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="meta:keyword">
+  <meta>
+  <xsl:attribute name="name">keywords</xsl:attribute>
+  <xsl:attribute name="content">
+     <xsl:value-of select="."/>
+  </xsl:attribute>
+  </meta><xsl:text>
+</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
+'''
+
+stylesheet_content  = '''<?xml version="1.0"?>
+<xsl:stylesheet version="1.0"
+  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+  xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
+  exclude-result-prefixes="text"
+>
+
+<xsl:output method="html" encoding="UTF-8"/>
+
+<xsl:template match="text:p">
+  <p><xsl:apply-templates/></p><xsl:text>
+  </xsl:text>
+</xsl:template>
+
+<xsl:template match="text:h">
+<p><xsl:apply-templates/></p><xsl:text>
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="text:s">
+<xsl:text> </xsl:text>
+</xsl:template>
+
+<xsl:template match="text:line-break">
+<br />
+</xsl:template>
+
+<xsl:template match="text:tab">
+<xsl:text>    </xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
+'''
+
+class OOExtractor:
+    def __init__(self, em):
+        self.em = em
+        self.currentindex = 0
+
+    def extractone(self, params):
+        if not params.has_key("filename:"):
+            self.em.rclog("extractone: no mime or file name")
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+        fn = params["filename:"]
+
+        try:
+            zip = ZipFile(fn)
+        except Exception as err:
+            self.em.rclog("unzip failed: " + str(err))
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+
+        docdata = '<html><head><meta http-equiv="Content-Type"' \
+                  'content="text/html; charset=UTF-8"></head><body>'
+
+        try:
+            metadata = zip.read("meta.xml")
+            if metadata:
+                res = rclxslt.apply_sheet_data(stylesheet_meta, metadata)
+                docdata += res
+        except:
+            # To be checked. I'm under the impression that I get this when
+            # nothing matches?
+            #self.em.rclog("no/bad metadata in %s" % fn)
+            pass
+
+        try:
+            content = zip.read("content.xml")
+            if content:
+                res = rclxslt.apply_sheet_data(stylesheet_content, content)
+                docdata += res
+            docdata += '</body></html>'
+        except Exception as err:
+            self.em.rclog("bad data in %s" % fn)
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+
+        return (True, docdata, "", rclexecm.RclExecM.eofnext)
+    
+    ###### File type handler api, used by rclexecm ---------->
+    def openfile(self, params):
+        self.currentindex = 0
+        return True
+
+    def getipath(self, params):
+        return self.extractone(params)
+        
+    def getnext(self, params):
+        if self.currentindex >= 1:
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+        else:
+            ret= self.extractone(params)
+            self.currentindex += 1
+            return ret
+
+if __name__ == '__main__':
+    proto = rclexecm.RclExecM()
+    extract = OOExtractor(proto)
+    rclexecm.main(proto, extract)
--- a/src/filters/rclsvg.py
+++ b/src/filters/rclsvg.py
@ -0,0 +1,140 @@
+#!/usr/bin/env python
+# Copyright (C) 2014 J.F.Dockes
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the
+#   Free Software Foundation, Inc.,
+#   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+######################################
+
+import sys
+import rclexecm
+import rclxslt
+
+stylesheet_all = '''<?xml version="1.0"?>
+<xsl:stylesheet version="1.0"
+  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+  xmlns:svg="http://www.w3.org/2000/svg"
+  xmlns:dc="http://purl.org/dc/elements/1.1/"
+  exclude-result-prefixes="svg"
+  >
+
+<xsl:output method="html" encoding="UTF-8"/>
+
+<xsl:template match="/">
+  <html>
+  <head>
+  <xsl:apply-templates select="svg:svg/svg:title"/>
+  <xsl:apply-templates select="svg:svg/svg:desc"/>
+  <xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:creator"/>
+  <xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:subject"/>
+  <xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:description"/>
+  </head>
+  <body>
+  <xsl:apply-templates select="//svg:text"/>
+  </body>
+  </html>
+</xsl:template>
+
+<xsl:template match="svg:desc"> 
+  <meta>
+  <xsl:attribute name="name">keywords</xsl:attribute>
+  <xsl:attribute name="content">
+     <xsl:value-of select="."/>
+  </xsl:attribute>
+  </meta><xsl:text>
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="dc:creator"> 
+  <meta>
+  <xsl:attribute name="name">author</xsl:attribute>
+  <xsl:attribute name="content">
+     <xsl:value-of select="."/>
+  </xsl:attribute>
+  </meta><xsl:text>
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="dc:subject"> 
+  <meta>
+  <xsl:attribute name="name">keywords</xsl:attribute>
+  <xsl:attribute name="content">
+     <xsl:value-of select="."/>
+  </xsl:attribute>
+  </meta><xsl:text>
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="dc:description"> 
+  <meta>
+  <xsl:attribute name="name">description</xsl:attribute>
+  <xsl:attribute name="content">
+     <xsl:value-of select="."/>
+  </xsl:attribute>
+  </meta><xsl:text>
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="svg:title"> 
+  <title><xsl:value-of select="."/></title><xsl:text>
+  </xsl:text>
+</xsl:template>
+	    
+<xsl:template match="svg:text"> 
+  <p><xsl:value-of select="."/></p><xsl:text>
+  </xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
+'''
+
+class SVGExtractor:
+    def __init__(self, em):
+        self.em = em
+        self.currentindex = 0
+
+    def extractone(self, params):
+        if not params.has_key("filename:"):
+            self.em.rclog("extractone: no mime or file name")
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+        fn = params["filename:"]
+
+        try:
+            data = open(fn, 'rb').read()
+            docdata = rclxslt.apply_sheet_data(stylesheet_all, data)
+        except Exception as err:
+            self.em.rclog("%s: bad data: " % (fn, err))
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+
+        return (True, docdata, "", rclexecm.RclExecM.eofnow)
+    
+    ###### File type handler api, used by rclexecm ---------->
+    def openfile(self, params):
+        self.currentindex = 0
+        return True
+
+    def getipath(self, params):
+        return self.extractone(params)
+        
+    def getnext(self, params):
+        if self.currentindex >= 1:
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+        else:
+            ret= self.extractone(params)
+            self.currentindex += 1
+            return ret
+
+if __name__ == '__main__':
+    proto = rclexecm.RclExecM()
+    extract = SVGExtractor(proto)
+    rclexecm.main(proto, extract)
--- a/src/filters/rcltext.py
+++ b/src/filters/rcltext.py
@ -0,0 +1,51 @@
+#!/usr/bin/env python
+
+import rclexecm
+import sys
+
+# Wrapping a text file. Recoll does it internally in most cases, but
+# there is a reason this exists, just can't remember it ...
+class TxtDump:
+    def __init__(self, em):
+        self.em = em
+
+    def extractone(self, params):
+        #self.em.rclog("extractone %s %s" % (params["filename:"], \
+        #params["mimetype:"]))
+        if not params.has_key("filename:"):
+            self.em.rclog("extractone: no file name")
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+
+        fn = params["filename:"]
+        # No charset, so recoll will have to use its config to guess it
+        txt = '<html><head><title></title></head><body><pre>'
+        try:
+            f = open(fn, "rb")
+            txt += self.em.htmlescape(f.read())
+        except Exception as err:
+            self.em.rclog("TxtDump: %s : %s" % (fn, err))
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+            
+        txt += '</pre></body></html>'
+        return (True, txt, "", rclexecm.RclExecM.eofnext)
+        
+    ###### File type handler api, used by rclexecm ---------->
+    def openfile(self, params):
+        self.currentindex = 0
+        return True
+
+    def getipath(self, params):
+        return self.extractone(params)
+        
+    def getnext(self, params):
+        if self.currentindex >= 1:
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+        else:
+            ret= self.extractone(params)
+            self.currentindex += 1
+            return ret
+
+if __name__ == '__main__':
+    proto = rclexecm.RclExecM()
+    extract = TxtDump(proto)
+    rclexecm.main(proto, extract)
--- a/src/filters/rclxls.py
+++ b/src/filters/rclxls.py
@ -0,0 +1,65 @@
+#!/usr/bin/env python
+
+import rclexecm
+import rclexec1
+import xlsxmltocsv
+import re
+import sys
+import os
+import xml.sax
+
+# Processing the output from unrtf
+class XLSProcessData:
+    def __init__(self, em):
+        self.em = em
+        self.out = ""
+        self.gotdata = 0
+        self.xmldata = ""
+        
+    # Some versions of unrtf put out a garbled charset line.
+    # Apart from this, we pass the data untouched.
+    def takeLine(self, line):
+        if not self.gotdata:
+            self.out += '''<html><head>''' + \
+                        '''<meta http-equiv="Content-Type" ''' + \
+                        '''content="text/html;charset=UTF-8">''' + \
+                        '''</head><body><pre>'''
+            self.gotdata = True
+        self.xmldata += line
+
+    def wrapData(self):
+        handler =  xlsxmltocsv.XlsXmlHandler()
+        data = xml.sax.parseString(self.xmldata, handler)
+        self.out += self.em.htmlescape(handler.output)
+        return self.out + '''</pre></body></html>'''
+
+class XLSFilter:
+    def __init__(self, em):
+        self.em = em
+        self.ntry = 0
+
+    def reset(self):
+        self.ntry = 0
+        pass
+            
+    def getCmd(self, fn):
+        if self.ntry:
+            return ([], None)
+        self.ntry = 1
+        cmd = rclexecm.which("xls-dump.py")
+        if cmd:
+            # xls-dump.py often exits 1 with valid data. Ignore exit value
+            return (["python", cmd, "--dump-mode=canonical-xml", \
+                     "--utf-8", "--catch"],
+                    XLSProcessData(self.em), rclexec1.Executor.opt_ignxval)
+        else:
+            return ([], None)
+
+if __name__ == '__main__':
+    if not rclexecm.which("ppt-dump.py"):
+        print("RECFILTERROR HELPERNOTFOUND ppt-dump.py")
+        sys.exit(1)
+    proto = rclexecm.RclExecM()
+    filter = XLSFilter(proto)
+    extract = rclexec1.Executor(proto, filter)
+    rclexecm.main(proto, extract)
--- a/src/filters/rclxml.py
+++ b/src/filters/rclxml.py
@ -0,0 +1,98 @@
+#!/usr/bin/env python
+# Copyright (C) 2014 J.F.Dockes
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the
+#   Free Software Foundation, Inc.,
+#   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+######################################
+
+import sys
+import rclexecm
+import rclxslt
+
+stylesheet_all = '''<?xml version="1.0"?>
+<xsl:stylesheet version="1.0"
+		xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+
+  <xsl:output method="html" encoding="UTF-8"/>
+
+  <xsl:template match="/">
+    <html>
+      <head>
+	<xsl:if test="//*[local-name() = 'title']">
+	  <title>
+	    <xsl:value-of select="//*[local-name() = 'title'][1]"/>
+	  </title>
+	</xsl:if>
+      </head>
+      <body>
+	<xsl:apply-templates/>
+      </body>
+    </html>
+  </xsl:template>
+
+  <xsl:template match="text()">
+    <xsl:if test="string-length(normalize-space(.)) &gt; 0">
+      <p><xsl:value-of select="."/></p>
+      <xsl:text>
+      </xsl:text>
+    </xsl:if>
+  </xsl:template>
+
+  <xsl:template match="*">
+    <xsl:apply-templates/>
+  </xsl:template>
+
+</xsl:stylesheet>
+'''
+
+class XMLExtractor:
+    def __init__(self, em):
+        self.em = em
+        self.currentindex = 0
+
+    def extractone(self, params):
+        if not params.has_key("filename:"):
+            self.em.rclog("extractone: no mime or file name")
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+        fn = params["filename:"]
+
+        try:
+            data = open(fn, 'rb').read()
+            docdata = rclxslt.apply_sheet_data(stylesheet_all, data)
+        except Exception as err:
+            self.em.rclog("%s: bad data: " % (fn, err))
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+
+        return (True, docdata, "", rclexecm.RclExecM.eofnow)
+    
+    ###### File type handler api, used by rclexecm ---------->
+    def openfile(self, params):
+        self.currentindex = 0
+        return True
+
+    def getipath(self, params):
+        return self.extractone(params)
+        
+    def getnext(self, params):
+        if self.currentindex >= 1:
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+        else:
+            ret= self.extractone(params)
+            self.currentindex += 1
+            return ret
+
+if __name__ == '__main__':
+    proto = rclexecm.RclExecM()
+    extract = XMLExtractor(proto)
+    rclexecm.main(proto, extract)
--- a/src/filters/rclxslt.py
+++ b/src/filters/rclxslt.py
@ -0,0 +1,52 @@
+# Copyright (C) 2014 J.F.Dockes
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the
+#   Free Software Foundation, Inc.,
+#   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+######################################
+
+# Helper module for xslt-based filters
+
+import sys
+
+try:
+    import libxml2
+    import libxslt
+except:
+    print "RECFILTERROR HELPERNOTFOUND python:libxml2/python:libxslt1"
+    sys.exit(1);
+
+libxml2.substituteEntitiesDefault(1)
+
+def apply_sheet_data(sheet, data):
+    styledoc = libxml2.parseMemory(sheet, len(sheet))
+    style = libxslt.parseStylesheetDoc(styledoc)
+    doc = libxml2.parseMemory(data, len(data))
+    result = style.applyStylesheet(doc, None)
+    res = style.saveResultToString(result)
+    style.freeStylesheet()
+    doc.freeDoc()
+    result.freeDoc()
+    return res
+
+def apply_sheet_file(sheet, fn):
+    styledoc = libxml2.parseMemory(sheet, len(sheet))
+    style = libxslt.parseStylesheetDoc(styledoc)
+    doc = libxml2.parseFile(fn)
+    result = style.applyStylesheet(doc, None)
+    res = style.saveResultToString(result)
+    style.freeStylesheet()
+    doc.freeDoc()
+    result.freeDoc()
+    return res
+
--- a/src/filters/xlsxmltocsv.py
+++ b/src/filters/xlsxmltocsv.py
@ -15,10 +15,13 @@ else:
    dquote = '"'
    
 class XlsXmlHandler(xml.sax.handler.ContentHandler):
+    def __init__(self):
+        self.output = ""
+        
    def startElement(self, name, attrs):
        if name == "worksheet":
            if "name" in attrs:
-                print("%s" % attrs["name"].encode("UTF-8"))
+                self.output += "%s\n" % attrs["name"].encode("UTF-8")
        elif name == "row":
            self.cells = dict()
        elif name == "label-cell" or name == "number-cell":
@ -30,7 +33,7 @@ class XlsXmlHandler(xml.sax.handler.ContentHandler):
                self.cells[int(attrs["col"])] = value
            else:
                #??
-                sys.stdout.write("%s%s"%(value.encode("UTF-8"),sepstring))
+                self.output += "%s%s" % (value.encode("UTF-8"), sepstring)
        elif name == "formula-cell":
            if "formula-result" in attrs and "col" in attrs:
                self.cells[int(attrs["col"])] = \
@ -40,17 +43,21 @@ class XlsXmlHandler(xml.sax.handler.ContentHandler):
        if name == "row":
            curidx = 0
            for idx, value in self.cells.iteritems():
-                sys.stdout.write(sepstring * (idx - curidx))
-                sys.stdout.write('%s%s%s' % (dquote, value, dquote))
+                self.output += sepstring * (idx - curidx)
+                self.output += "%s%s%s" % (dquote, value, dquote)
                curidx = idx
-            sys.stdout.write("\n")
+            self.output += "\n"
        elif name == "worksheet":
-            print("")
+            self.output += "\n"

-try:
-    xml.sax.parse(sys.stdin, XlsXmlHandler())
-except BaseException as err:
-    error("xml-parse: %s\n" % (str(sys.exc_info()[:2]),))
-    sys.exit(1)

-sys.exit(0)
+if __name__ == '__main__':
+    try:
+        handler = XlsXmlHandler()
+        xml.sax.parse(sys.stdin, handler)
+        print(handler.output)
+    except BaseException as err:
+        error("xml-parse: %s\n" % (str(sys.exc_info()[:2]),))
+        sys.exit(1)
+
+    sys.exit(0)
--- a/src/index/fetcher.cpp
+++ b/src/index/fetcher.cpp
@ -14,9 +14,8 @@
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
-#ifdef HAVE_CONFIG_H
 #include "autoconfig.h"
-#endif
+

 #include "debuglog.h"

@ -34,8 +33,10 @@ DocFetcher *docFetcherMake(const Rcl::Doc& idoc)
    idoc.getmeta(Rcl::Doc::keybcknd, &backend);
    if (backend.empty() || !backend.compare("FS")) {
 	return new FSDocFetcher;
+#ifndef DISABLE_WEB_INDEXER
    } else if (!backend.compare("BGL")) {
 	return new BGLDocFetcher;
+#endif
    } else {
 	LOGERR(("DocFetcherFactory: unknown backend [%s]\n", backend.c_str()));
 	return 0;
--- a/src/index/fsindexer.cpp
+++ b/src/index/fsindexer.cpp
@ -43,7 +43,6 @@
 #include "fileudi.h"
 #include "cancelcheck.h"
 #include "rclinit.h"
-#include "execmd.h"
 #include "extrameta.h"

 using namespace std;
@ -145,13 +144,11 @@ FsIndexer::~FsIndexer()
    void *status;
    if (m_haveInternQ) {
 	status = m_iwqueue.setTerminateAndWait();
-	LOGDEB0(("FsIndexer: internfile wrkr status: %ld (1->ok)\n", 
-		 long(status)));
+	LOGDEB0(("FsIndexer: internfile wrkr status: %p (1->ok)\n", status));
    }
    if (m_haveSplitQ) {
 	status = m_dwqueue.setTerminateAndWait();
-	LOGDEB0(("FsIndexer: dbupd worker status: %ld (1->ok)\n", 
-		 long(status)));
+	LOGDEB0(("FsIndexer: dbupd worker status: %p (1->ok)\n", status));
    }
    delete m_stableconfig;
 #endif // IDX_THREADS
@ -259,7 +256,7 @@ static bool matchesSkipped(const vector<string>& tdl,
    string canonpath = path_canon(path);
    string mpath = canonpath;
    string topdir;
-    while (mpath.length() > 1) {
+    while (!path_isroot(mpath)) { // we assume root not in skipped paths.
        for (vector<string>::const_iterator it = tdl.begin();  
             it != tdl.end(); it++) {
            // the topdirs members are already canonized.
@ -281,7 +278,7 @@ static bool matchesSkipped(const vector<string>& tdl,
        mpath = path_getfather(mpath);
        // getfather normally returns a path ending with /, canonic
        // paths don't (except for '/' itself).
-        if (!mpath.empty() && mpath[mpath.size()-1] == '/')
+        if (!path_isroot(mpath) && mpath[mpath.size()-1] == '/')
            mpath.erase(mpath.size()-1);
        // should not be necessary, but lets be prudent. If the
        // path did not shorten, something is seriously amiss
@ -330,7 +327,7 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
 {
    LOGDEB(("FsIndexer::indexFiles\n"));
    m_noretryfailed = (flags & ConfIndexer::IxFNoRetryFailed) != 0;
-    int ret = false;
+    bool ret = false;

    if (!init())
        return false;
@ -703,7 +700,7 @@ FsIndexer::processonefile(RclConfig *config,
    }

    LOGDEB0(("processone: processing: [%s] %s\n", 
-             displayableBytes(stp->st_size).c_str(), fn.c_str()));
+             displayableBytes(off_t(stp->st_size)).c_str(), fn.c_str()));

    string utf8fn = compute_utf8fn(config, fn);

@ -772,7 +769,7 @@ FsIndexer::processonefile(RclConfig *config,
 	    if (doc.fmtime.empty())
 		doc.fmtime = ascdate;
 	    if (doc.url.empty())
-		doc.url = cstr_fileu + fn;
+		doc.url = path_pathtofileurl(fn);
 	    const string *fnp = 0;
 	    if (doc.ipath.empty()) {
                if (!doc.peekmeta(Rcl::Doc::keyfn, &fnp) || fnp->empty())
@ -868,7 +865,7 @@ FsIndexer::processonefile(RclConfig *config,
                fileDoc.meta[Rcl::Doc::keytcfn] = utf8fn;
 	    fileDoc.haschildren = true;
 	    fileDoc.mimetype = mimetype;
-	    fileDoc.url = cstr_fileu + fn;
+	    fileDoc.url = path_pathtofileurl(fn);
 	    if (m_havelocalfields) 
 		setlocalfields(localfields, fileDoc);
 	    char cbuf[100]; 
--- a/src/index/indexer.cpp
+++ b/src/index/indexer.cpp
@ -14,9 +14,7 @@
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
-#ifdef HAVE_CONFIG_H
 #include "autoconfig.h"
-#endif

 #include <stdio.h>
 #include <errno.h>
@ -27,7 +25,9 @@
 #include "debuglog.h"
 #include "indexer.h"
 #include "fsindexer.h"
+#ifndef DISABLE_WEB_INDEXER
 #include "beaglequeue.h"
+#endif
 #include "mimehandler.h"
 #include "pathut.h"

@ -46,7 +46,9 @@ ConfIndexer::ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc)
 ConfIndexer::~ConfIndexer()
 {
     deleteZ(m_fsindexer);
+#ifndef DISABLE_WEB_INDEXER
     deleteZ(m_beagler);
+#endif
 }

 // Determine if this is likely the first time that the user runs
@ -107,7 +109,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags)
            return false;
        }
    }
-
+#ifndef DISABLE_WEB_INDEXER
    if (m_dobeagle && (typestorun & IxTBeagleQueue)) {
        deleteZ(m_beagler);
        m_beagler = new BeagleQueueIndexer(m_config, &m_db, m_updater);
@ -116,7 +118,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags)
            return false;
        }
    }
-
+#endif
    if (typestorun == IxTAll) {
        // Get rid of all database entries that don't exist in the
        // filesystem anymore. Only if all *configured* indexers ran.
@ -173,6 +175,7 @@ bool ConfIndexer::indexFiles(list<string>& ifiles, int flag)
        ret = m_fsindexer->indexFiles(myfiles, flag);
    LOGDEB2(("ConfIndexer::indexFiles: fsindexer returned %d, "
            "%d files remainining\n", ret, myfiles.size()));
+#ifndef DISABLE_WEB_INDEXER

    if (m_dobeagle && !myfiles.empty() && !(flag & IxFNoWeb)) {
        if (!m_beagler)
@ -183,7 +186,7 @@ bool ConfIndexer::indexFiles(list<string>& ifiles, int flag)
            ret = false;
        }
    }
-
+#endif
    // The close would be done in our destructor, but we want status here
    if (!m_db.close()) {
 	LOGERR(("ConfIndexer::index: error closing database in %s\n", 
@ -255,6 +258,7 @@ bool ConfIndexer::purgeFiles(std::list<string> &files, int flag)
    if (m_fsindexer)
        ret = m_fsindexer->purgeFiles(myfiles);

+#ifndef DISABLE_WEB_INDEXER
    if (m_dobeagle && !myfiles.empty() && !(flag & IxFNoWeb)) {
        if (!m_beagler)
            m_beagler = new BeagleQueueIndexer(m_config, &m_db, m_updater);
@ -264,6 +268,7 @@ bool ConfIndexer::purgeFiles(std::list<string> &files, int flag)
            ret = false;
        }
    }
+#endif

    // The close would be done in our destructor, but we want status here
    if (!m_db.close()) {
--- a/src/index/indexer.h
+++ b/src/index/indexer.h
@ -16,20 +16,18 @@
 */
 #ifndef _INDEXER_H_INCLUDED_
 #define _INDEXER_H_INCLUDED_
+#include "rclconfig.h"

 #include <string>
 #include <list>
 #include <map>
 #include <vector>

-#ifndef NO_NAMESPACES
 using std::string;
 using std::list;
 using std::map;
 using std::vector;
-#endif

-#include "rclconfig.h"
 #include "rcldb.h"
 #include "rcldoc.h"
 #ifdef IDX_THREADS
--- a/src/index/mimetype.cpp
+++ b/src/index/mimetype.cpp
@ -140,6 +140,7 @@ string mimetype(const string &fn, const struct stat *stp,

    string mtype;

+#ifndef _WIN32
    // Extended attribute has priority on everything, as per:
    // http://freedesktop.org/wiki/CommonExtendedAttributes
    if (pxattr::get(fn, "mime_type", &mtype)) {
@ -150,6 +151,7 @@ string mimetype(const string &fn, const struct stat *stp,
 	    return mtype;
 	}
    }
+#endif

    if (cfg == 0)  {
 	LOGERR(("Mimetype: null config ??\n"));
@ -177,7 +179,6 @@ string mimetype(const string &fn, const struct stat *stp,
    if (mtype.empty() && stp)
 	mtype = mimetypefromdata(cfg, fn, usfc);

- out:
    return mtype;
 }

--- a/src/index/mimetype.h
+++ b/src/index/mimetype.h
@ -17,10 +17,10 @@
 #ifndef _MIMETYPE_H_INCLUDED_
 #define _MIMETYPE_H_INCLUDED_

+#include "safesysstat.h"
 #include <string>

 class RclConfig;
-struct stat;

 /**
 * Try to determine a mime type for file. 
--- a/src/index/rclmonrcv.cpp
+++ b/src/index/rclmonrcv.cpp
@ -469,7 +469,7 @@ bool RclFAM::getEvent(RclMonEvent& ev, int msecs)
    MONDEB(("RclFAM::getEvent: FAMNextEvent returned\n"));
    
    map<int,string>::const_iterator it;
-    if ((fe.filename[0] != '/') && 
+    if ((!path_isabsolute(fe.filename)) && 
 	(it = m_idtopath.find(fe.fr.reqnum)) != m_idtopath.end()) {
 	ev.m_path = path_cat(it->second, fe.filename);
    } else {
--- a/src/index/recollindex.cpp
+++ b/src/index/recollindex.cpp
@ -14,16 +14,18 @@
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
-#ifdef HAVE_CONFIG_H
 #include "autoconfig.h"
-#endif

 #include <stdio.h>
 #include <signal.h>
 #include <errno.h>
+#ifndef _WIN32
 #include <sys/time.h>
 #include <sys/resource.h>
-#include <fcntl.h>
+#else
+#include <direct.h>
+#endif
+#include "safefcntl.h"
 #include "safeunistd.h"

 #include <iostream>
@ -42,10 +44,14 @@ using namespace std;
 #include "x11mon.h"
 #include "cancelcheck.h"
 #include "rcldb.h"
+#ifndef DISABLE_WEB_INDEXER
 #include "beaglequeue.h"
+#endif
 #include "recollindex.h"
 #include "fsindexer.h"
+#ifndef _WIN32
 #include "rclionice.h"
+#endif
 #include "execmd.h"
 #include "checkretryfailed.h"

@ -133,6 +139,7 @@ class MyUpdater : public DbIxStatusUpdater {
 	    return false;
 	}

+#ifndef DISABLE_X11MON
 	// If we are in the monitor, we also need to check X11 status
 	// during the initial indexing pass (else the user could log
 	// out and the indexing would go on, not good (ie: if the user
@ -142,7 +149,7 @@ class MyUpdater : public DbIxStatusUpdater {
 	    stopindexing = true;
 	    return false;
 	}
-
+#endif
 	return true;
    }

@ -177,11 +184,13 @@ static void makeIndexerOrExit(RclConfig *config, bool inPlaceReset)

 void rclIxIonice(const RclConfig *config)
 {
+#ifndef _WIN32
    string clss, classdata;
    if (!config->getConfParam("monioniceclass", clss) || clss.empty())
 	clss = "3";
    config->getConfParam("monioniceclassdata", classdata);
    rclionice(clss, classdata);
+#endif
 }

 class MakeListWalkerCB : public FsTreeWalkerCB {
@ -273,7 +282,7 @@ static bool checktopdirs(RclConfig *config, vector<string>& nonexist)

    for (vector<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
 	*it = path_tildexpand(*it);
-        if (!it->size() || (*it)[0] != '/') {
+        if (!it->size() || !path_isabsolute(*it)) {
            if ((*it)[0] == '~') {
                cerr << "Tilde expansion failed: " << *it << endl;
                LOGERR(("recollindex: tilde expansion failed: %s\n",
@ -285,7 +294,7 @@ static bool checktopdirs(RclConfig *config, vector<string>& nonexist)
            }
            return false;
        }
-        if (access(it->c_str(), 0) < 0) {
+        if (!path_exists(*it)) {
            nonexist.push_back(*it);
        }
    }
@ -295,7 +304,7 @@ static bool checktopdirs(RclConfig *config, vector<string>& nonexist)
    if (config->getConfParam("skippedPaths", &tdl, true)) {
        for (vector<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
            *it = path_tildexpand(*it);
-            if (access(it->c_str(), 0) < 0) {
+            if (!path_exists(*it)) {
                nonexist.push_back(*it);
            }
        }
@ -304,7 +313,7 @@ static bool checktopdirs(RclConfig *config, vector<string>& nonexist)
    if (config->getConfParam("daemSkippedPaths", &tdl, true)) {
        for (vector<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
            *it = path_tildexpand(*it);
-            if (access(it->c_str(), 0) < 0) {
+            if (!path_exists(*it)) {
                nonexist.push_back(*it);
            }
        }
@ -393,8 +402,10 @@ int main(int argc, char **argv)
    // The reexec struct is used by the daemon to shed memory after
    // the initial indexing pass and to restart when the configuration
    // changes
+#ifndef _WIN32
    o_reexec = new ReExec;
    o_reexec->init(argc, argv);
+#endif

    thisprog = argv[0];
    argc--; argv++;
@ -463,7 +474,9 @@ int main(int argc, char **argv)
 	cerr << "Configuration problem: " << reason << endl;
 	exit(1);
    }
+#ifndef _WIN32
    o_reexec->atexit(cleanup);
+#endif

    vector<string> nonexist;
    if (!checktopdirs(config, nonexist))
@ -511,9 +524,11 @@ int main(int argc, char **argv)
    if (op_flags & OPT_k) {
        indexerFlags &= ~ConfIndexer::IxFNoRetryFailed; 
    } else {
+#ifndef _WIN32
        if (checkRetryFailed(config, false)) {
            indexerFlags &= ~ConfIndexer::IxFNoRetryFailed; 
        }
+#endif
    }

    Pidfile pidfile(config->getPidfile());
@ -522,12 +537,13 @@ int main(int argc, char **argv)
    // Log something at LOGINFO to reset the trace file. Else at level
    // 3 it's not even truncated if all docs are up to date.
    LOGINFO(("recollindex: starting up\n"));
-
+#ifndef _WIN32
    if (setpriority(PRIO_PROCESS, 0, 20) != 0) {
        LOGINFO(("recollindex: can't setpriority(), errno %d\n", errno));
    }
    // Try to ionice. This does not work on all platforms
    rclIxIonice(config);
+#endif

    if (op_flags & (OPT_i|OPT_e)) {
 	lockorexit(&pidfile);
@ -596,15 +612,17 @@ int main(int argc, char **argv)
 	lockorexit(&pidfile);
 	if (!(op_flags&OPT_D)) {
 	    LOGDEB(("recollindex: daemonizing\n"));
+#ifndef _WIN32
 	    if (daemon(0,0) != 0) {
 	      fprintf(stderr, "daemon() failed, errno %d\n", errno);
 	      LOGERR(("daemon() failed, errno %d\n", errno));
 	      exit(1);
 	    }
+#endif
 	}
 	// Need to rewrite pid, it changed
 	pidfile.write_pid();
-
+#ifndef _WIN32
        // Not too sure if I have to redo the nice thing after daemon(),
        // can't hurt anyway (easier than testing on all platforms...)
        if (setpriority(PRIO_PROCESS, 0, 20) != 0) {
@ -612,6 +630,7 @@ int main(int argc, char **argv)
        }
 	// Try to ionice. This does not work on all platforms
 	rclIxIonice(config);
+#endif

 	if (sleepsecs > 0) {
 	    LOGDEB(("recollindex: sleeping %d\n", sleepsecs));
@ -633,12 +652,15 @@ int main(int argc, char **argv)
 			"not going into monitor mode\n"));
 		exit(1);
 	    } else {
+#ifndef _WIN32
                // Record success of indexing pass with failed files retries.
                if (!(indexerFlags & ConfIndexer::IxFNoRetryFailed)) {
                    checkRetryFailed(config, true);
                }
+#endif
            }
 	    deleteZ(confindexer);
+#ifndef _WIN32
 	    o_reexec->insertArgs(vector<string>(1, "-n"));
 	    LOGINFO(("recollindex: reexecuting with -n after initial full pass\n"));
 	    // Note that -n will be inside the reexec when we come
@ -646,6 +668,7 @@ int main(int argc, char **argv)
 	    // starting a config change exec to ensure that we do a
 	    // purging pass in this case.
 	    o_reexec->reexec();
+#endif
 	}
        if (updater) {
 	    updater->status.phase = DbIxStatus::DBIXS_MONITOR;
@ -672,11 +695,12 @@ int main(int argc, char **argv)
 	makeIndexerOrExit(config, inPlaceReset);
 	bool status = confindexer->index(rezero, ConfIndexer::IxTAll, 
                                         indexerFlags);
-
+#ifndef _WIN32
        // Record success of indexing pass with failed files retries.
        if (status && !(indexerFlags & ConfIndexer::IxFNoRetryFailed)) {
            checkRetryFailed(config, true);
        }
+#endif
 	if (!status) 
 	    cerr << "Indexing failed" << endl;
        if (!confindexer->getReason().empty())
--- a/src/internfile/Filter.h
+++ b/src/internfile/Filter.h
@ -108,7 +108,7 @@ namespace Dijon
 	 */
 	virtual bool set_document_data(const std::string& mtype, 
 				       const char *data_ptr, 
-				       unsigned int data_length) = 0;
+				       size_t data_length) = 0;

 	/** (Re)initializes the filter with the given data.
 	 * Call next_document() to position the filter onto the first document.
@ -140,7 +140,7 @@ namespace Dijon
 	    stat() calls The value is stored inside metaData, docsize
 	    key
 	*/
-	virtual void set_docsize(size_t size) = 0;
+	virtual void set_docsize(off_t size) = 0;

 	// Going from one nested document to the next.

--- a/src/internfile/extrameta.cpp
+++ b/src/internfile/extrameta.cpp
@ -46,7 +46,7 @@ void reapXAttrs(const RclConfig* cfg, const string& path,
 		map<string, string>& xfields)
 {
    LOGDEB2(("reapXAttrs: [%s]\n", path.c_str()));
-    
+#ifndef _WIN32
    // Retrieve xattrs names from files and mapping table from config
    vector<string> xnames;
    if (!pxattr::list(path, &xnames)) {
@ -79,6 +79,7 @@ void reapXAttrs(const RclConfig* cfg, const string& path,
 	xfields[key] = value;
 	LOGDEB2(("reapXAttrs: [%s] -> [%s]\n", key.c_str(), value.c_str()));
    }
+#endif
 }

 void docFieldsFromXattrs(RclConfig *cfg, const map<string, string>& xfields, 
@ -93,6 +94,7 @@ void docFieldsFromXattrs(RclConfig *cfg, const map<string, string>& xfields,
 void reapMetaCmds(RclConfig* cfg, const string& path, 
 		  map<string, string>& cfields)
 {
+#ifndef _WIN32
    const vector<MDReaper>& reapers = cfg->getMDReapers();
    if (reapers.empty())
 	return;
@ -111,6 +113,7 @@ void reapMetaCmds(RclConfig* cfg, const string& path,
 	    cfields[rp->fieldname] =  output;
 	}
    }
+#endif
 }

 // Set fields from external commands
--- a/src/internfile/internfile.cpp
+++ b/src/internfile/internfile.cpp
@ -169,7 +169,7 @@ void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf,
            l_mime = *imime;
    }

-    size_t docsize = stp->st_size;
+    off_t docsize = stp->st_size;

    if (!l_mime.empty()) {
 	// Has mime: check for a compressed file. If so, create a
@ -216,7 +216,7 @@ void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf,
    m_mimetype = l_mime;
    RecollFilter *df = getMimeHandler(l_mime, m_cfg, !m_forPreview);

-    if (!df or df->is_unknown()) {
+    if (!df || df->is_unknown()) {
 	// No real handler for this type, for now :( 
 	LOGDEB(("FileInterner:: unprocessed mime: [%s] [%s]\n", 
 		l_mime.c_str(), f.c_str()));
@ -629,7 +629,7 @@ void FileInterner::popHandler()
 {
    if (m_handlers.empty())
 	return;
-    int i = m_handlers.size() - 1;
+    size_t i = m_handlers.size() - 1;
    if (m_tmpflgs[i]) {
 	m_tempfiles.pop_back();
 	m_tmpflgs[i] = false;
--- a/src/internfile/mh_exec.cpp
+++ b/src/internfile/mh_exec.cpp
@ -17,10 +17,10 @@
 #include "autoconfig.h"

 #include <sys/types.h>
-#include <sys/wait.h>
+#include <time.h>
+#include "safesyswait.h"

 #include <list>
-using namespace std;

 #include "cstr.h"
 #include "execmd.h"
@ -32,6 +32,8 @@ using namespace std;
 #include "md5ut.h"
 #include "rclconfig.h"

+using namespace std;
+
 // This is called periodically by ExeCmd when it is waiting for data,
 // or when it does receive some. We may choose to interrupt the
 // command.
--- a/src/internfile/mh_execm.cpp
+++ b/src/internfile/mh_execm.cpp
@ -1,4 +1,4 @@
-/* Copyright (C) 2005 J.F.Dockes 
+	/* Copyright (C) 2005 J.F.Dockes 
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
@ -32,7 +32,7 @@ using namespace std;
 #include "idfile.h"

 #include <sys/types.h>
-#include <sys/wait.h>
+#include "safesyswait.h"

 bool MimeHandlerExecMultiple::startCmd()
 {
@ -90,6 +90,9 @@ bool MimeHandlerExecMultiple::readDataElement(string& name, string &data)
        LOGERR(("MHExecMultiple: getline error\n"));
        return false;
    }
+    
+    LOGDEB1(("MHEM:rde: line [%s]\n", ibuf.c_str()));
+
    // Empty line (end of message) ?
    if (!ibuf.compare("\n")) {
        LOGDEB(("MHExecMultiple: Got empty line\n"));
@ -163,7 +166,7 @@ bool MimeHandlerExecMultiple::next_document()
 	return false;
    }

-    if (m_cmd.getChildPid() < 0 && !startCmd()) {
+    if (m_cmd.getChildPid() <= 0 && !startCmd()) {
        return false;
    }

--- a/src/internfile/mh_mail.cpp
+++ b/src/internfile/mh_mail.cpp
@ -14,13 +14,15 @@
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
+#include "autoconfig.h"

 #include <stdio.h>
 #include <fcntl.h>
 #include <errno.h>
-#include <unistd.h>
+#include "safeunistd.h"
 #include <time.h>
 #include <cstdlib>
+#include "safesysstat.h"

 #include <map>
 #include <sstream>
--- a/src/internfile/mh_mbox.cpp
+++ b/src/internfile/mh_mbox.cpp
@ -22,7 +22,11 @@
 #include <sys/types.h>
 #include "safesysstat.h"
 #include <time.h>
+#ifndef _WIN32
 #include <regex.h>
+#else 
+#include <regex>
+#endif

 #include <cstring>
 #include <map>
@ -70,6 +74,14 @@ static PTMutexInit o_mcache_mutex;
 * offsets for all message "From_" lines follow. The format is purely
 * binary, values are not even byte-swapped to be proc-idependant.
 */
+
+#ifdef _WIN32
+// vc++ does not let define an array of size o_b1size because non-const??
+#define M_o_b1size 1024
+#else
+#define M_o_b1size o_b1size
+#endif
+
 class MboxCache {
 public:
    typedef MimeHandlerMbox::mbhoff_type mbhoff_type;
@ -98,7 +110,7 @@ public:
        }
        FpKeeper keeper(&fp);

-        char blk1[o_b1size];
+        char blk1[M_o_b1size];
        if (fread(blk1, 1, o_b1size, fp) != o_b1size) {
            LOGDEB0(("MboxCache::get_offsets: read blk1 errno %d\n", errno));
            return -1;
@ -116,7 +128,7 @@ public:
            return -1;
        }
        mbhoff_type offset = -1;
-        int ret;
+        size_t ret;
        if ((ret = fread(&offset, 1, sizeof(mbhoff_type), fp))
            != sizeof(mbhoff_type)) {
            LOGDEB0(("MboxCache::get_offsets: read ret %d errno %d\n", 
@ -184,7 +196,7 @@ public:
                m_dir = "mboxcache";
            m_dir = path_tildexpand(m_dir);
            // If not an absolute path, compute relative to config dir
-            if (m_dir.at(0) != '/')
+            if (!path_isabsolute(m_dir))
                m_dir = path_cat(config->getConfDir(), m_dir);
            m_ok = true;
        }
@ -226,7 +238,6 @@ private:
 };

 const size_t MboxCache::o_b1size = 1024;
-
 static class MboxCache o_mcache;

 static const string cstr_keyquirks("mhmboxquirks");
@ -307,7 +318,7 @@ bool MimeHandlerMbox::set_document_file(const string& mt, const string &fn)
 typedef char line_type[LL+10];
 static inline void stripendnl(line_type& line, int& ll)
 {
-    ll = strlen(line);
+    ll = int(strlen(line));
    while (ll > 0) {
 	if (line[ll-1] == '\n' || line[ll-1] == '\r') {
 	    line[ll-1] = 0;
@ -376,9 +387,20 @@ static const  char *frompat =
 // exactly like: From ^M (From followed by space and eol). We only
 // test for this if QUIRKS_TBIRD is set
 static const char *miniTbirdFrom = "^From $";
-
+#ifndef _WIN32
 static regex_t fromregex;
 static regex_t minifromregex;
+#define M_regexec(A,B,C,D,E) regexec(&(A),B,C,D,E)
+#else
+basic_regex<char> fromregex;
+basic_regex<char> minifromregex;
+#define REG_ICASE std::regex_constants::icase
+#define REG_NOSUB std::regex_constants::nosubs
+#define REG_EXTENDED std::regex_constants::extended
+#define M_regexec(A, B, C, D, E) regex_match(B,A)
+
+#endif
+
 static bool regcompiled;
 static PTMutexInit o_regex_mutex;

@ -390,8 +412,13 @@ static void compileregexes()
    // that we are alone.
    if (regcompiled)
 	return;
+#ifndef _WIN32
    regcomp(&fromregex, frompat, REG_NOSUB|REG_EXTENDED);
    regcomp(&minifromregex, miniTbirdFrom, REG_NOSUB|REG_EXTENDED);
+#else
+    fromregex = basic_regex<char>(frompat, REG_NOSUB | REG_EXTENDED);
+    minifromregex = basic_regex<char>(miniTbirdFrom, REG_NOSUB | REG_EXTENDED);
+#endif
    regcompiled = true;
 }

@ -440,9 +467,9 @@ bool MimeHandlerMbox::next_document()
            (off = o_mcache.get_offset(m_config, m_udi, mtarg)) >= 0 && 
            fseeko(fp, (off_t)off, SEEK_SET) >= 0 && 
            fgets(line, LL, fp) &&
-            (!regexec(&fromregex, line, 0, 0, 0) || 
+            (!M_regexec(fromregex, line, 0, 0, 0) || 
 	     ((m_quirks & MBOXQUIRK_TBIRD) && 
-	      !regexec(&minifromregex, line, 0, 0, 0)))	) {
+	      !M_regexec(minifromregex, line, 0, 0, 0)))	) {
                LOGDEB0(("MimeHandlerMbox: Cache: From_ Ok\n"));
                fseeko(fp, (off_t)off, SEEK_SET);
                m_msgnum = mtarg -1;
@ -485,9 +512,9 @@ bool MimeHandlerMbox::next_document()
 		/* The 'F' compare is redundant but it improves performance
 		   A LOT */
 		if (line[0] == 'F' && (
-		    !regexec(&fromregex, line, 0, 0, 0) || 
+		    !M_regexec(fromregex, line, 0, 0, 0) || 
 		    ((m_quirks & MBOXQUIRK_TBIRD) && 
-		     !regexec(&minifromregex, line, 0, 0, 0)))
+		     !M_regexec(minifromregex, line, 0, 0, 0)))
 		    ) {
 		    LOGDEB1(("MimeHandlerMbox: msgnum %d, "
 		     "From_ at line %d: [%s]\n", m_msgnum, m_lineno, line));
--- a/src/internfile/mh_null.h
+++ b/src/internfile/mh_null.h
@ -0,0 +1,59 @@
+/* Copyright (C) 2004 J.F.Dockes
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the
+ *   Free Software Foundation, Inc.,
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+#ifndef _MH_NULL_H_INCLUDED_
+#define _MH_NULL_H_INCLUDED_
+
+// It may make sense in some cases to set this null filter (no output)
+// instead of using recoll_noindex or leaving the default filter in
+// case one doesn't want to install it: this will avoid endless retries
+// to reindex the affected files, as recoll will think it has succeeded
+// indexing them. Downside: the files won't be indexed when one
+// actually installs the real filter, will need a -z
+// Actually used for empty files
+// Associated to application/x-zerosize, so use 
+// <mimetype> = internal application/x-zerosize
+// in mimeconf
+#include <string>
+#include "cstr.h"
+#include "mimehandler.h"
+
+class MimeHandlerNull : public RecollFilter {
+ public:
+    MimeHandlerNull(RclConfig *cnf, const std::string& id) 
+	: RecollFilter(cnf, id) 
+    {
+    }
+    virtual ~MimeHandlerNull() 
+    {
+    }
+    virtual bool set_document_file(const string& mt, const string& fn) 
+    {
+	RecollFilter::set_document_file(mt, fn);
+	return m_havedoc = true;
+    }
+    virtual bool next_document() 
+    {
+	if (m_havedoc == false)
+	    return false;
+	m_havedoc = false; 
+	m_metaData[cstr_dj_keycontent] = cstr_null;
+	m_metaData[cstr_dj_keymt] = cstr_textplain;
+	return true;
+    }
+};
+
+#endif /* _MH_NULL_H_INCLUDED_ */
--- a/src/internfile/mh_symlink.h
+++ b/src/internfile/mh_symlink.h
@ -18,7 +18,7 @@
 #define _MH_SYMLINK_H_INCLUDED_

 #include <string>
-#include <unistd.h>
+#include "safeunistd.h"
 #include <errno.h>

 #include "cstr.h"
--- a/src/internfile/mh_text.cpp
+++ b/src/internfile/mh_text.cpp
@ -18,10 +18,13 @@

 #include <stdio.h>
 #include <errno.h>
+#include "safefcntl.h"
+#include <sys/types.h>
+#include "safesysstat.h"
+#include "safeunistd.h"

 #include <iostream>
 #include <string>
-using namespace std;

 #include "cstr.h"
 #include "mh_text.h"
@ -32,16 +35,23 @@ using namespace std;
 #include "pxattr.h"
 #include "pathut.h"

+using namespace std;
+
 const int MB = 1024*1024;
 const int KB = 1024;

 // Process a plain text file
 bool MimeHandlerText::set_document_file(const string& mt, const string &fn)
 {
-    LOGDEB(("MimeHandlerText::set_document_file: [%s]\n", fn.c_str()));
+    LOGDEB(("MimeHandlerText::set_document_file: [%s] offs %lld\n",
+            fn.c_str(), m_offs));

    RecollFilter::set_document_file(mt, fn);
+
    m_fn = fn;
+    // This should not be necessary, but it happens on msw that offset is large
+    // negative at this point, could not find the reason (still trying).
+    m_offs = 0;

    // file size for oversize check
    long long fsize = path_filesize(m_fn);
@ -51,9 +61,11 @@ bool MimeHandlerText::set_document_file(const string& mt, const string &fn)
        return false;
    }

+#ifndef _WIN32
    // Check for charset defined in extended attribute as per:
    // http://freedesktop.org/wiki/CommonExtendedAttributes
    pxattr::get(m_fn, "charset", &m_charsetfromxattr);
+#endif

    // Max file size parameter: texts over this size are not indexed
    int maxmbs = 20;
--- a/src/internfile/mh_text.h
+++ b/src/internfile/mh_text.h
@ -19,7 +19,6 @@
 #include <sys/types.h>

 #include <string>
-using std::string;

 #include "mimehandler.h"

@ -30,22 +29,22 @@ using std::string;
 */
 class MimeHandlerText : public RecollFilter {
 public:
-    MimeHandlerText(RclConfig *cnf, const string& id) 
-        : RecollFilter(cnf, id), m_paging(false), m_offs(0) 
+    MimeHandlerText(RclConfig *cnf, const std::string& id) 
+        : RecollFilter(cnf, id), m_paging(false), m_offs(0), m_pagesz(0)
    {
    }
    virtual ~MimeHandlerText() 
    {
    }
-    virtual bool set_document_file(const string& mt, const string &file_path);
-    virtual bool set_document_string(const string&, const string&);
+    virtual bool set_document_file(const std::string& mt, const std::string &file_path);
+    virtual bool set_document_string(const std::string&, const std::string&);
    virtual bool is_data_input_ok(DataInput input) const {
 	if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
 	    return true;
 	return false;
    }
    virtual bool next_document();
-    virtual bool skip_to_document(const string& s);
+    virtual bool skip_to_document(const std::string& s);
    virtual void clear() 
    {
        m_paging = false;
@ -56,11 +55,11 @@ class MimeHandlerText : public RecollFilter {
    }
 private:
    bool   m_paging;
-    string m_text;
-    string m_fn;
+    std::string m_text;
+    std::string m_fn;
    off_t  m_offs; // Offset of next read in file if we're paging
    size_t m_pagesz;
-    string m_charsetfromxattr; 
+    std::string m_charsetfromxattr; 

    bool readnext();
 };
--- a/src/internfile/mimehandler.cpp
+++ b/src/internfile/mimehandler.cpp
@ -31,7 +31,6 @@ using namespace std;
 #include "rclconfig.h"
 #include "smallut.h"
 #include "md5ut.h"
-
 #include "mh_exec.h"
 #include "mh_execm.h"
 #include "mh_html.h"
@ -40,6 +39,7 @@ using namespace std;
 #include "mh_text.h"
 #include "mh_symlink.h"
 #include "mh_unknown.h"
+#include "mh_null.h"
 #include "ptmutex.h"

 // Performance help: we use a pool of already known and created
@ -163,6 +163,10 @@ static RecollFilter *mhFactory(RclConfig *config, const string &mime,
 	LOGDEB2(("mhFactory(%s): ret MimeHandlerSymlink\n", mime.c_str()));
 	MD5String("MimeHandlerSymlink", id);
 	return nobuild ? 0 : new MimeHandlerSymlink(config, id);
+    } else if ("application/x-zerosize" == lmime) {
+	LOGDEB(("mhFactory(%s): ret MimeHandlerNull\n", mime.c_str()));
+	MD5String("MimeHandlerNull", id);
+	return nobuild ? 0 : new MimeHandlerNull(config, id);
    } else if (lmime.find("text/") == 0) {
        // Try to handle unknown text/xx as text/plain. This
        // only happen if the text/xx was defined as "internal" in
@ -206,7 +210,7 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
    }

    // Split command name and args, and build exec object
-    list<string> cmdtoks;
+    vector<string> cmdtoks;
    stringToStrings(cmdstr, cmdtoks);
    if (cmdtoks.empty()) {
 	LOGERR(("mhExecFactory: bad config line for [%s]: [%s]\n", 
@ -216,7 +220,22 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
    MimeHandlerExec *h = multiple ? 
 	new MimeHandlerExecMultiple(cfg, id) :
        new MimeHandlerExec(cfg, id);
-    list<string>::iterator it = cmdtoks.begin();
+    vector<string>::iterator it = cmdtoks.begin();
+
+    // Special-case python and perl on windows: we need to also locate the
+    // first argument which is the script name "python somescript.py". 
+    // On Unix, thanks to #!, we usually just run "somescript.py", but need
+    // the same change if we ever want to use the same cmdling as windows
+    if (!stringlowercmp("python", *it) || !stringlowercmp("perl", *it)) {
+        if (cmdtoks.size() < 2) {
+            LOGERR(("mhExecFactory: python/perl cmd: no script?. [%s]: [%s]\n", 
+                    mtype.c_str(), hs.c_str()));
+        }
+        vector<string>::iterator it1(it);
+        it1++;
+        *it1 = cfg->findFilter(*it1);
+    }
+            
    h->params.push_back(cfg->findFilter(*it++));
    h->params.insert(h->params.end(), it, cmdtoks.end());

--- a/src/internfile/mimehandler.h
+++ b/src/internfile/mimehandler.h
@ -86,12 +86,12 @@ public:
 	return false;
    }
    virtual bool set_document_data(const std::string& mtype, 
-				   const char *cp, unsigned int sz) 
+				   const char *cp, size_t sz) 
    {
 	return set_document_string(mtype, std::string(cp, sz));
    }

-    virtual void set_docsize(size_t size)
+    virtual void set_docsize(off_t size)
    {
 	char csize[30];
 	sprintf(csize, "%lld", (long long)size);
--- a/src/internfile/myhtmlparse.cpp
+++ b/src/internfile/myhtmlparse.cpp
@ -23,6 +23,10 @@
 * -----END-LICENCE-----
 */
 #include <time.h>
+#ifdef _WIN32
+// Local implementation in windows directory
+#include "strptime.h" 
+#endif
 #include <stdio.h>
 #include <algorithm>
 #include <cstring>
--- a/src/internfile/uncomp.cpp
+++ b/src/internfile/uncomp.cpp
@ -108,8 +108,12 @@ bool Uncomp::uncompressfile(const string& ifn,
    }

    // Execute command and retrieve output file name, check that it exists
+#ifndef _WIN32
    ExecCmd ex;
    int status = ex.doexec(cmd, args, 0, &tfile);
+#else
+	int status = -1;
+#endif
    if (status || tfile.empty()) {
 	LOGERR(("uncompressfile: doexec: failed for [%s] status 0x%x\n", 
 		ifn.c_str(), status));
--- a/src/qtgui/guiutils.cpp
+++ b/src/qtgui/guiutils.cpp
@ -14,7 +14,6 @@
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
-#include <unistd.h>

 #include <algorithm>
 #include <cstdio>
--- a/src/qtgui/main.cpp
+++ b/src/qtgui/main.cpp
@ -14,11 +14,8 @@
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
-
-
 #include "autoconfig.h"

-#include <unistd.h>
 #include <cstdlib>

 #include <qapplication.h>
@ -164,14 +161,6 @@ static void recollCleanup()
    LOGDEB2(("recollCleanup: done\n"));
 }

-static void sigcleanup(int)
-{
-    // We used to not call exit from here, because of the idxthread, but
-    // this is now gone, so...
-    recollNeedsExit = 1;
-    exit(1);
-}
-
 void applyStyleSheet(const QString& ssfname)
 {
    const char *cfname = (const char *)ssfname.toLocal8Bit();
@ -305,7 +294,7 @@ int main(int argc, char **argv)


    string reason;
-    theconfig = recollinit(recollCleanup, sigcleanup, reason, &a_config);
+    theconfig = recollinit(recollCleanup, 0, reason, &a_config);
    if (!theconfig || !theconfig->ok()) {
 	QString msg = "Configuration problem: ";
 	msg += QString::fromUtf8(reason.c_str());
--- a/src/qtgui/preview_w.cpp
+++ b/src/qtgui/preview_w.cpp
--- a/src/qtgui/rclm_idx.cpp
+++ b/src/qtgui/rclm_idx.cpp
@ -16,6 +16,8 @@
 */
 #include "autoconfig.h"

+#include <signal.h>
+
 #include <QMessageBox>
 #include <QTimer>

--- a/src/qtgui/rclm_view.cpp
+++ b/src/qtgui/rclm_view.cpp
@ -310,7 +310,7 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term)
 	istempfile = true;
 	rememberTempFile(temp);
 	fn = temp->filename();
-	url = string("file://") + fn;
+	url = path_pathtofileurl(fn);
    }

    // If using an actual file, check that it exists, and if it is
@ -335,7 +335,7 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term)
        if (temp) {
 	    rememberTempFile(temp);
            fn = temp->filename();
-            url = string("file://") + fn;
+            url = path_pathtofileurl(fn);
        }
    }

@ -430,16 +430,16 @@ void RclMain::startManual()
 void RclMain::startManual(const string& index)
 {
    Rcl::Doc doc;
-    doc.url = "file://";
-    doc.url = path_cat(doc.url, theconfig->getDatadir());
-    doc.url = path_cat(doc.url, "doc");
-    doc.url = path_cat(doc.url, "usermanual.html");
+    string path = theconfig->getDatadir();
+    path = path_cat(path, "doc");
+    path = path_cat(path, "usermanual.html");
    LOGDEB(("RclMain::startManual: help index is %s\n", 
 	    index.empty()?"(null)":index.c_str()));
    if (!index.empty()) {
-	doc.url += "#";
-	doc.url += index;
+	path += "#";
+	path += index;
    }
+    doc.url = path_pathtofileurl(path);
    doc.mimetype = "text/html";
    startNativeViewer(doc);
 }
--- a/src/qtgui/rclmain_w.cpp
+++ b/src/qtgui/rclmain_w.cpp
@ -16,9 +16,6 @@
 */
 #include "autoconfig.h"

-#include <fcntl.h>
-#include "safeunistd.h"
-
 #include <utility>
 #include MEMORY_INCLUDE

@ -138,10 +135,14 @@ void RclMain::init()

    // idxstatus file. Make sure it exists before trying to watch it
    // (case where we're started on an older index, or if the status
-    // file was deleted since indexing
-    ::close(::open(theconfig->getIdxStatusFile().c_str(), O_CREAT, 0600));
-    m_watcher.addPath(QString::fromLocal8Bit(
-			  theconfig->getIdxStatusFile().c_str()));
+    // file was deleted since indexing)
+    QString idxfn = 
+        QString::fromLocal8Bit(theconfig->getIdxStatusFile().c_str());
+    QFile qf(idxfn);
+    qf.open(QIODevice::ReadWrite);
+    qf.setPermissions(QFile::ReadOwner|QFile::WriteOwner);
+    qf.close();
+    m_watcher.addPath(idxfn);
    // At least some versions of qt4 don't display the status bar if
    // it's not created here.
    (void)statusBar();
@ -728,7 +729,7 @@ void RclMain::initiateQuery()
 	qApp->processEvents();
 	if (progress.wasCanceled()) {
 	    // Just get out of there asap. 
-	    _exit(1);
+	    exit(1);
 	}

 	qApp->processEvents();
--- a/src/qtgui/rtitool.cpp
+++ b/src/qtgui/rtitool.cpp
@ -19,6 +19,7 @@
 #include <stdio.h>
 #include "safesysstat.h"
 #include "safeunistd.h"
+#include <signal.h>

 #include <string>

--- a/src/qtgui/snippets_w.cpp
+++ b/src/qtgui/snippets_w.cpp
@ -16,7 +16,6 @@
 */
 #include "autoconfig.h"

-#include <unistd.h>
 #include <stdio.h>

 #include <string>
--- a/src/qtgui/spell_w.cpp
+++ b/src/qtgui/spell_w.cpp
@ -17,7 +17,6 @@
 #include "autoconfig.h"

 #include <stdio.h>
-#include <unistd.h>

 #include <algorithm>
 #include <list>
--- a/src/query/docseq.cpp
+++ b/src/query/docseq.cpp
@ -14,6 +14,8 @@
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
+#include "autoconfig.h"
+
 #include "docseq.h"
 #include "filtseq.h"
 #include "sortseq.h"
--- a/src/query/docseqdb.cpp
+++ b/src/query/docseqdb.cpp
@ -14,17 +14,20 @@
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
+#include "autoconfig.h"
+
 #include <math.h>
 #include <time.h>

 #include <list>
-using std::list;

 #include "docseqdb.h"
 #include "rcldb.h"
 #include "debuglog.h"
 #include "wasatorcl.h"

+using std::list;
+
 DocSequenceDb::DocSequenceDb(STD_SHARED_PTR<Rcl::Query> q, const string &t, 
 			     STD_SHARED_PTR<Rcl::SearchData> sdata) 
    : DocSequence(t), m_q(q), m_sdata(sdata), m_fsdata(sdata),
--- a/src/query/docseqhist.cpp
+++ b/src/query/docseqhist.cpp
@ -35,7 +35,7 @@ using std::list;
 bool RclDHistoryEntry::encode(string& value)
 {
    char chartime[30];
-    sprintf(chartime,"%ld", unixtime);
+    sprintf(chartime,"%lld", (long long)unixtime);
    string budi;
    base64_encode(udi, budi);
    value = string("U ") + string(chartime) + " " + budi;
@ -161,5 +161,5 @@ int DocSequenceHistory::getResCnt()
 {	
    if (m_hlist.empty())
 	m_hlist = getDocHistory(m_hist);
-    return m_hlist.size();
+    return int(m_hlist.size());
 }
--- a/src/query/docseqhist.h
+++ b/src/query/docseqhist.h
@ -16,6 +16,7 @@
 */
 #ifndef _DOCSEQHIST_H_INCLUDED_
 #define _DOCSEQHIST_H_INCLUDED_
+#include <time.h>

 #include "docseq.h"
 #include "dynconf.h"
@ -28,13 +29,13 @@ namespace Rcl {
 class RclDHistoryEntry : public DynConfEntry {
 public:
    RclDHistoryEntry() : unixtime(0) {}
-    RclDHistoryEntry(long t, const string& u) 
+    RclDHistoryEntry(time_t t, const string& u) 
 	: unixtime(t), udi(u) {}
    virtual ~RclDHistoryEntry() {}
    virtual bool decode(const string &value);
    virtual bool encode(string& value);
    virtual bool equal(const DynConfEntry& other);
-    long unixtime;
+    time_t unixtime;
    string udi;
 };

@ -57,7 +58,7 @@ private:
    Rcl::Db    *m_db;
    RclDynConf *m_hist;
    int         m_prevnum;
-    long        m_prevtime;
+    time_t      m_prevtime;
    std::string m_description; // This is just an nls translated 'doc history'
    std::list<RclDHistoryEntry> m_hlist;
    std::list<RclDHistoryEntry>::const_iterator m_it;
--- a/src/query/location.hh
+++ b/src/query/location.hh
@ -0,0 +1,187 @@
+// A Bison parser, made by GNU Bison 3.0.2.
+
+// Locations for Bison parsers in C++
+
+// Copyright (C) 2002-2013 Free Software Foundation, Inc.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+// As a special exception, you may create a larger work that contains
+// part or all of the Bison parser skeleton and distribute that work
+// under terms of your choice, so long as that work isn't itself a
+// parser generator using the skeleton or a modified version thereof
+// as a parser skeleton.  Alternatively, if you modify or redistribute
+// the parser skeleton itself, you may (at your option) remove this
+// special exception, which will cause the skeleton and the resulting
+// Bison output files to be licensed under the GNU General Public
+// License without this special exception.
+
+// This special exception was added by the Free Software Foundation in
+// version 2.2 of Bison.
+
+/**
+ ** \file location.hh
+ ** Define the yy::location class.
+ */
+
+#ifndef YY_YY_LOCATION_HH_INCLUDED
+# define YY_YY_LOCATION_HH_INCLUDED
+
+# include "position.hh"
+
+
+namespace yy {
+#line 46 "location.hh" // location.cc:291
+  /// Abstract a location.
+  class location
+  {
+  public:
+
+    /// Construct a location from \a b to \a e.
+    location (const position& b, const position& e)
+      : begin (b)
+      , end (e)
+    {
+    }
+
+    /// Construct a 0-width location in \a p.
+    explicit location (const position& p = position ())
+      : begin (p)
+      , end (p)
+    {
+    }
+
+    /// Construct a 0-width location in \a f, \a l, \a c.
+    explicit location (std::string* f,
+                       unsigned int l = 1u,
+                       unsigned int c = 1u)
+      : begin (f, l, c)
+      , end (f, l, c)
+    {
+    }
+
+
+    /// Initialization.
+    void initialize (std::string* f = YY_NULLPTR,
+                     unsigned int l = 1u,
+                     unsigned int c = 1u)
+    {
+      begin.initialize (f, l, c);
+      end = begin;
+    }
+
+    /** \name Line and Column related manipulators
+     ** \{ */
+  public:
+    /// Reset initial location to final location.
+    void step ()
+    {
+      begin = end;
+    }
+
+    /// Extend the current location to the COUNT next columns.
+    void columns (int count = 1)
+    {
+      end += count;
+    }
+
+    /// Extend the current location to the COUNT next lines.
+    void lines (int count = 1)
+    {
+      end.lines (count);
+    }
+    /** \} */
+
+
+  public:
+    /// Beginning of the located region.
+    position begin;
+    /// End of the located region.
+    position end;
+  };
+
+  /// Join two location objects to create a location.
+  inline location operator+ (location res, const location& end)
+  {
+    res.end = end.end;
+    return res;
+  }
+
+  /// Change end position in place.
+  inline location& operator+= (location& res, int width)
+  {
+    res.columns (width);
+    return res;
+  }
+
+  /// Change end position.
+  inline location operator+ (location res, int width)
+  {
+    return res += width;
+  }
+
+  /// Change end position in place.
+  inline location& operator-= (location& res, int width)
+  {
+    return res += -width;
+  }
+
+  /// Change end position.
+  inline location operator- (const location& begin, int width)
+  {
+    return begin + -width;
+  }
+
+  /// Compare two location objects.
+  inline bool
+  operator== (const location& loc1, const location& loc2)
+  {
+    return loc1.begin == loc2.begin && loc1.end == loc2.end;
+  }
+
+  /// Compare two location objects.
+  inline bool
+  operator!= (const location& loc1, const location& loc2)
+  {
+    return !(loc1 == loc2);
+  }
+
+  /** \brief Intercept output stream redirection.
+   ** \param ostr the destination output stream
+   ** \param loc a reference to the location to redirect
+   **
+   ** Avoid duplicate information.
+   */
+  template <typename YYChar>
+  inline std::basic_ostream<YYChar>&
+  operator<< (std::basic_ostream<YYChar>& ostr, const location& loc)
+  {
+    unsigned int end_col = 0 < loc.end.column ? loc.end.column - 1 : 0;
+    ostr << loc.begin// << "(" << loc.end << ") "
+;
+    if (loc.end.filename
+        && (!loc.begin.filename
+            || *loc.begin.filename != *loc.end.filename))
+      ostr << '-' << loc.end.filename << ':' << loc.end.line << '.' << end_col;
+    else if (loc.begin.line < loc.end.line)
+      ostr << '-' << loc.end.line << '.' << end_col;
+    else if (loc.begin.column < end_col)
+      ostr << '-' << end_col;
+    return ostr;
+  }
+
+
+} // yy
+#line 187 "location.hh" // location.cc:291
+#endif // !YY_YY_LOCATION_HH_INCLUDED
--- a/src/query/plaintorich.cpp
+++ b/src/query/plaintorich.cpp
@ -15,7 +15,7 @@
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */

-
+#include <limits.h>
 #include <string>
 #include <utility>
 #include <list>
@ -54,8 +54,8 @@ struct MatchEntry {
    pair<int, int> offs;
    // Index of the search group this comes from: this is to relate a 
    // match to the original user input.
-    unsigned int grpidx;
-    MatchEntry(int sta, int sto, unsigned int idx) 
+    size_t grpidx;
+    MatchEntry(int sta, int sto, size_t idx) 
 	: offs(sta, sto), grpidx(idx)
    {
    }
@ -105,7 +105,7 @@ class TextSplitPTR : public TextSplit {
 	// pos, bts, bte));

 	// If this word is a search term, remember its byte-offset span. 
-	map<string, unsigned int>::const_iterator it = m_terms.find(dumb);
+	map<string, size_t>::const_iterator it = m_terms.find(dumb);
 	if (it != m_terms.end()) {
 	    tboffs.push_back(MatchEntry(bts, bte, (*it).second));
 	}
@ -135,7 +135,7 @@ private:
    int m_wcount;

    // In: user query terms
-    map<string, unsigned int>    m_terms; 
+    map<string, size_t>    m_terms; 

    // m_gterms holds all the terms in m_groups, as a set for quick lookup
    set<string>    m_gterms;
@ -214,7 +214,7 @@ static bool do_proximity_test(int window, vector<vector<int>* >& plists,
 bool TextSplitPTR::matchGroup(unsigned int grpidx)
 {
    const vector<string>& terms = m_hdata.groups[grpidx];
-    int window = m_hdata.groups[grpidx].size() + m_hdata.slacks[grpidx];
+    int window = int(m_hdata.groups[grpidx].size() + m_hdata.slacks[grpidx]);

    LOGDEB1(("TextSplitPTR::matchGroup:d %d: %s\n", window,
 	    vecStringToString(terms).c_str()));
@ -270,7 +270,7 @@ bool TextSplitPTR::matchGroup(unsigned int grpidx)
    for (vector<int>::iterator it = plists[0]->begin(); 
 	 it != plists[0]->end(); it++) {
 	int pos = *it;
-	int sta = int(10E9), sto = 0;
+	int sta = INT_MAX, sto = 0;
 	LOGDEB2(("MatchGroup: Testing at pos %d\n", pos));
 	if (do_proximity_test(window,plists, 1, pos, pos, &sta, &sto, minpos)) {
 	    LOGDEB1(("TextSplitPTR::matchGroup: MATCH termpos [%d,%d]\n", 
@ -417,10 +417,10 @@ bool PlainToRich::plaintorich(const string& in,
 	// If we still have terms positions, check (byte) position. If
 	// we are at or after a term match, mark.
 	if (tPosIt != tPosEnd) {
-	    int ibyteidx = chariter.getBpos();
+	    int ibyteidx = int(chariter.getBpos());
 	    if (ibyteidx == tPosIt->offs.first) {
 		if (!intag && ibyteidx >= (int)headend) {
-		    *olit += startMatch(tPosIt->grpidx);
+		    *olit += startMatch((unsigned int)(tPosIt->grpidx));
 		}
                inrcltag = 1;
 	    } else if (ibyteidx == tPosIt->offs.second) {
--- a/src/query/position.hh
+++ b/src/query/position.hh
@ -0,0 +1,180 @@
+// A Bison parser, made by GNU Bison 3.0.2.
+
+// Positions for Bison parsers in C++
+
+// Copyright (C) 2002-2013 Free Software Foundation, Inc.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+// As a special exception, you may create a larger work that contains
+// part or all of the Bison parser skeleton and distribute that work
+// under terms of your choice, so long as that work isn't itself a
+// parser generator using the skeleton or a modified version thereof
+// as a parser skeleton.  Alternatively, if you modify or redistribute
+// the parser skeleton itself, you may (at your option) remove this
+// special exception, which will cause the skeleton and the resulting
+// Bison output files to be licensed under the GNU General Public
+// License without this special exception.
+
+// This special exception was added by the Free Software Foundation in
+// version 2.2 of Bison.
+
+/**
+ ** \file position.hh
+ ** Define the yy::position class.
+ */
+
+#ifndef YY_YY_POSITION_HH_INCLUDED
+# define YY_YY_POSITION_HH_INCLUDED
+
+# include <algorithm> // std::max
+# include <iostream>
+# include <string>
+
+# ifndef YY_NULLPTR
+#  if defined __cplusplus && 201103L <= __cplusplus
+#   define YY_NULLPTR nullptr
+#  else
+#   define YY_NULLPTR 0
+#  endif
+# endif
+
+
+namespace yy {
+#line 56 "position.hh" // location.cc:291
+  /// Abstract a position.
+  class position
+  {
+  public:
+    /// Construct a position.
+    explicit position (std::string* f = YY_NULLPTR,
+                       unsigned int l = 1u,
+                       unsigned int c = 1u)
+      : filename (f)
+      , line (l)
+      , column (c)
+    {
+    }
+
+
+    /// Initialization.
+    void initialize (std::string* fn = YY_NULLPTR,
+                     unsigned int l = 1u,
+                     unsigned int c = 1u)
+    {
+      filename = fn;
+      line = l;
+      column = c;
+    }
+
+    /** \name Line and Column related manipulators
+     ** \{ */
+    /// (line related) Advance to the COUNT next lines.
+    void lines (int count = 1)
+    {
+      if (count)
+        {
+          column = 1u;
+          line = add_ (line, count, 1);
+        }
+    }
+
+    /// (column related) Advance to the COUNT next columns.
+    void columns (int count = 1)
+    {
+      column = add_ (column, count, 1);
+    }
+    /** \} */
+
+    /// File name to which this position refers.
+    std::string* filename;
+    /// Current line number.
+    unsigned int line;
+    /// Current column number.
+    unsigned int column;
+
+  private:
+    /// Compute max(min, lhs+rhs) (provided min <= lhs).
+    static unsigned int add_ (unsigned int lhs, int rhs, unsigned int min)
+    {
+      return (0 < rhs || -static_cast<unsigned int>(rhs) < lhs
+              ? rhs + lhs
+              : min);
+    }
+  };
+
+  /// Add and assign a position.
+  inline position&
+  operator+= (position& res, int width)
+  {
+    res.columns (width);
+    return res;
+  }
+
+  /// Add two position objects.
+  inline position
+  operator+ (position res, int width)
+  {
+    return res += width;
+  }
+
+  /// Add and assign a position.
+  inline position&
+  operator-= (position& res, int width)
+  {
+    return res += -width;
+  }
+
+  /// Add two position objects.
+  inline position
+  operator- (position res, int width)
+  {
+    return res -= width;
+  }
+
+  /// Compare two position objects.
+  inline bool
+  operator== (const position& pos1, const position& pos2)
+  {
+    return (pos1.line == pos2.line
+            && pos1.column == pos2.column
+            && (pos1.filename == pos2.filename
+                || (pos1.filename && pos2.filename
+                    && *pos1.filename == *pos2.filename)));
+  }
+
+  /// Compare two position objects.
+  inline bool
+  operator!= (const position& pos1, const position& pos2)
+  {
+    return !(pos1 == pos2);
+  }
+
+  /** \brief Intercept output stream redirection.
+   ** \param ostr the destination output stream
+   ** \param pos a reference to the position to redirect
+   */
+  template <typename YYChar>
+  inline std::basic_ostream<YYChar>&
+  operator<< (std::basic_ostream<YYChar>& ostr, const position& pos)
+  {
+    if (pos.filename)
+      ostr << *pos.filename << ':';
+    return ostr << pos.line << '.' << pos.column;
+  }
+
+
+} // yy
+#line 180 "position.hh" // location.cc:291
+#endif // !YY_YY_POSITION_HH_INCLUDED
--- a/src/query/reslistpager.cpp
+++ b/src/query/reslistpager.cpp
@ -77,7 +77,7 @@ void ResListPager::resultPageNext()
    if (m_winfirst < 0) {
 	m_winfirst = 0;
    } else {
-	m_winfirst += m_respage.size();
+	m_winfirst += int(m_respage.size());
    }
    // Get the next page of results. Note that we look ahead by one to
    // determine if there is actually a next page
@ -102,7 +102,7 @@ void ResListPager::resultPageNext()
 	    // Next button. We'd need to remove the Next link from the page
 	    // too.
 	    // Restore the m_winfirst value, let the current result vector alone
-	    m_winfirst -= m_respage.size();
+	    m_winfirst -= int(m_respage.size());
 	} else {
 	    // No results at all (on first page)
 	    m_winfirst = -1;
@ -213,9 +213,9 @@ void ResListPager::displayDoc(RclConfig *config, int i, Rcl::Doc& doc,
    // Size information. We print both doc and file if they differ a lot
    off_t fsize = -1, dsize = -1;
    if (!doc.dbytes.empty())
-	dsize = atoll(doc.dbytes.c_str());
+	dsize = static_cast<off_t>(atoll(doc.dbytes.c_str()));
    if (!doc.fbytes.empty())
-	fsize = atoll(doc.fbytes.c_str());
+	fsize =  static_cast<off_t>(atoll(doc.fbytes.c_str()));
    string sizebuf;
    if (dsize > 0) {
 	sizebuf = displayableBytes(dsize);
--- a/src/query/reslistpager.h
+++ b/src/query/reslistpager.h
@ -64,7 +64,7 @@ public:
    int pageLastDocNum() {
 	if (m_winfirst < 0 || m_respage.size() == 0)
 	    return -1;
-	return m_winfirst + m_respage.size() - 1;
+	return m_winfirst + int(m_respage.size()) - 1;
    }
    virtual int pageSize() const {return m_pagesize;}
    void pageNext();
--- a/src/query/sortseq.h
+++ b/src/query/sortseq.h
@ -39,7 +39,7 @@ class DocSeqSorted : public DocSeqModifier {
    virtual bool canSort() {return true;}
    virtual bool setSortSpec(const DocSeqSortSpec &sortspec);
    virtual bool getDoc(int num, Rcl::Doc &doc, string *sh = 0);
-    virtual int getResCnt() {return m_docsp.size();}
+    virtual int getResCnt() {return int(m_docsp.size());}
 private:
    DocSeqSortSpec          m_spec;
    std::vector<Rcl::Doc>   m_docs;
--- a/src/query/stack.hh
+++ b/src/query/stack.hh
@ -0,0 +1,158 @@
+// A Bison parser, made by GNU Bison 3.0.2.
+
+// Stack handling for Bison parsers in C++
+
+// Copyright (C) 2002-2013 Free Software Foundation, Inc.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+// As a special exception, you may create a larger work that contains
+// part or all of the Bison parser skeleton and distribute that work
+// under terms of your choice, so long as that work isn't itself a
+// parser generator using the skeleton or a modified version thereof
+// as a parser skeleton.  Alternatively, if you modify or redistribute
+// the parser skeleton itself, you may (at your option) remove this
+// special exception, which will cause the skeleton and the resulting
+// Bison output files to be licensed under the GNU General Public
+// License without this special exception.
+
+// This special exception was added by the Free Software Foundation in
+// version 2.2 of Bison.
+
+/**
+ ** \file stack.hh
+ ** Define the yy::stack class.
+ */
+
+#ifndef YY_YY_STACK_HH_INCLUDED
+# define YY_YY_STACK_HH_INCLUDED
+
+# include <vector>
+
+
+namespace yy {
+#line 46 "stack.hh" // stack.hh:133
+  template <class T, class S = std::vector<T> >
+  class stack
+  {
+  public:
+    // Hide our reversed order.
+    typedef typename S::reverse_iterator iterator;
+    typedef typename S::const_reverse_iterator const_iterator;
+
+    stack ()
+      : seq_ ()
+    {
+    }
+
+    stack (unsigned int n)
+      : seq_ (n)
+    {
+    }
+
+    inline
+    T&
+    operator[] (unsigned int i)
+    {
+      return seq_[seq_.size () - 1 - i];
+    }
+
+    inline
+    const T&
+    operator[] (unsigned int i) const
+    {
+      return seq_[seq_.size () - 1 - i];
+    }
+
+    /// Steal the contents of \a t.
+    ///
+    /// Close to move-semantics.
+    inline
+    void
+    push (T& t)
+    {
+      seq_.push_back (T());
+      operator[](0).move (t);
+    }
+
+    inline
+    void
+    pop (unsigned int n = 1)
+    {
+      for (; n; --n)
+        seq_.pop_back ();
+    }
+
+    void
+    clear ()
+    {
+      seq_.clear ();
+    }
+
+    inline
+    typename S::size_type
+    size () const
+    {
+      return seq_.size ();
+    }
+
+    inline
+    const_iterator
+    begin () const
+    {
+      return seq_.rbegin ();
+    }
+
+    inline
+    const_iterator
+    end () const
+    {
+      return seq_.rend ();
+    }
+
+  private:
+    stack (const stack&);
+    stack& operator= (const stack&);
+    /// The wrapped container.
+    S seq_;
+  };
+
+  /// Present a slice of the top of a stack.
+  template <class T, class S = stack<T> >
+  class slice
+  {
+  public:
+    slice (const S& stack, unsigned int range)
+      : stack_ (stack)
+      , range_ (range)
+    {
+    }
+
+    inline
+    const T&
+    operator [] (unsigned int i) const
+    {
+      return stack_[range_ - i];
+    }
+
+  private:
+    const S& stack_;
+    unsigned int range_;
+  };
+
+
+} // yy
+#line 157 "stack.hh" // stack.hh:133
+
+#endif // !YY_YY_STACK_HH_INCLUDED
--- a/src/query/wasaparse.cpp
+++ b/src/query/wasaparse.cpp
--- a/src/query/wasaparse.hpp
+++ b/src/query/wasaparse.hpp
@ -0,0 +1,476 @@
+// A Bison parser, made by GNU Bison 3.0.2.
+
+// Skeleton interface for Bison LALR(1) parsers in C++
+
+// Copyright (C) 2002-2013 Free Software Foundation, Inc.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+// As a special exception, you may create a larger work that contains
+// part or all of the Bison parser skeleton and distribute that work
+// under terms of your choice, so long as that work isn't itself a
+// parser generator using the skeleton or a modified version thereof
+// as a parser skeleton.  Alternatively, if you modify or redistribute
+// the parser skeleton itself, you may (at your option) remove this
+// special exception, which will cause the skeleton and the resulting
+// Bison output files to be licensed under the GNU General Public
+// License without this special exception.
+
+// This special exception was added by the Free Software Foundation in
+// version 2.2 of Bison.
+
+/**
+ ** \file y.tab.h
+ ** Define the yy::parser class.
+ */
+
+// C++ LALR(1) parser skeleton written by Akim Demaille.
+
+#ifndef YY_YY_Y_TAB_H_INCLUDED
+# define YY_YY_Y_TAB_H_INCLUDED
+
+
+# include <vector>
+# include <iostream>
+# include <stdexcept>
+# include <string>
+# include "stack.hh"
+# include "location.hh"
+
+
+#ifndef YY_ATTRIBUTE
+# if (defined __GNUC__                                               \
+      && (2 < __GNUC__ || (__GNUC__ == 2 && 96 <= __GNUC_MINOR__)))  \
+     || defined __SUNPRO_C && 0x5110 <= __SUNPRO_C
+#  define YY_ATTRIBUTE(Spec) __attribute__(Spec)
+# else
+#  define YY_ATTRIBUTE(Spec) /* empty */
+# endif
+#endif
+
+#ifndef YY_ATTRIBUTE_PURE
+# define YY_ATTRIBUTE_PURE   YY_ATTRIBUTE ((__pure__))
+#endif
+
+#ifndef YY_ATTRIBUTE_UNUSED
+# define YY_ATTRIBUTE_UNUSED YY_ATTRIBUTE ((__unused__))
+#endif
+
+#if !defined _Noreturn \
+     && (!defined __STDC_VERSION__ || __STDC_VERSION__ < 201112)
+# if defined _MSC_VER && 1200 <= _MSC_VER
+#  define _Noreturn __declspec (noreturn)
+# else
+#  define _Noreturn YY_ATTRIBUTE ((__noreturn__))
+# endif
+#endif
+
+/* Suppress unused-variable warnings by "using" E.  */
+#if ! defined lint || defined __GNUC__
+# define YYUSE(E) ((void) (E))
+#else
+# define YYUSE(E) /* empty */
+#endif
+
+#if defined __GNUC__ && 407 <= __GNUC__ * 100 + __GNUC_MINOR__
+/* Suppress an incorrect diagnostic about yylval being uninitialized.  */
+# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \
+    _Pragma ("GCC diagnostic push") \
+    _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"")\
+    _Pragma ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"")
+# define YY_IGNORE_MAYBE_UNINITIALIZED_END \
+    _Pragma ("GCC diagnostic pop")
+#else
+# define YY_INITIAL_VALUE(Value) Value
+#endif
+#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+# define YY_IGNORE_MAYBE_UNINITIALIZED_END
+#endif
+#ifndef YY_INITIAL_VALUE
+# define YY_INITIAL_VALUE(Value) /* Nothing. */
+#endif
+
+/* Debug traces.  */
+#ifndef YYDEBUG
+# define YYDEBUG 0
+#endif
+
+
+namespace yy {
+#line 113 "y.tab.h" // lalr1.cc:372
+
+
+
+
+
+  /// A Bison parser.
+  class parser
+  {
+  public:
+#ifndef YYSTYPE
+    /// Symbol semantic values.
+    union semantic_type
+    {
+    #line 44 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:372
+
+    std::string *str;
+    Rcl::SearchDataClauseSimple *cl;
+    Rcl::SearchData *sd;
+
+#line 133 "y.tab.h" // lalr1.cc:372
+    };
+#else
+    typedef YYSTYPE semantic_type;
+#endif
+    /// Symbol locations.
+    typedef location location_type;
+
+    /// Syntax errors thrown from user actions.
+    struct syntax_error : std::runtime_error
+    {
+      syntax_error (const location_type& l, const std::string& m);
+      location_type location;
+    };
+
+    /// Tokens.
+    struct token
+    {
+      enum yytokentype
+      {
+        WORD = 258,
+        QUOTED = 259,
+        QUALIFIERS = 260,
+        AND = 261,
+        UCONCAT = 262,
+        OR = 263,
+        EQUALS = 264,
+        CONTAINS = 265,
+        SMALLEREQ = 266,
+        SMALLER = 267,
+        GREATEREQ = 268,
+        GREATER = 269
+      };
+    };
+
+    /// (External) token type, as returned by yylex.
+    typedef token::yytokentype token_type;
+
+    /// Internal symbol number.
+    typedef int symbol_number_type;
+
+    /// Internal symbol number for tokens (subsumed by symbol_number_type).
+    typedef unsigned char token_number_type;
+
+    /// A complete symbol.
+    ///
+    /// Expects its Base type to provide access to the symbol type
+    /// via type_get().
+    ///
+    /// Provide access to semantic value and location.
+    template <typename Base>
+    struct basic_symbol : Base
+    {
+      /// Alias to Base.
+      typedef Base super_type;
+
+      /// Default constructor.
+      basic_symbol ();
+
+      /// Copy constructor.
+      basic_symbol (const basic_symbol& other);
+
+      /// Constructor for valueless symbols.
+      basic_symbol (typename Base::kind_type t,
+                    const location_type& l);
+
+      /// Constructor for symbols with semantic value.
+      basic_symbol (typename Base::kind_type t,
+                    const semantic_type& v,
+                    const location_type& l);
+
+      ~basic_symbol ();
+
+      /// Destructive move, \a s is emptied into this.
+      void move (basic_symbol& s);
+
+      /// The semantic value.
+      semantic_type value;
+
+      /// The location.
+      location_type location;
+
+    private:
+      /// Assignment operator.
+      basic_symbol& operator= (const basic_symbol& other);
+    };
+
+    /// Type access provider for token (enum) based symbols.
+    struct by_type
+    {
+      /// Default constructor.
+      by_type ();
+
+      /// Copy constructor.
+      by_type (const by_type& other);
+
+      /// The symbol type as needed by the constructor.
+      typedef token_type kind_type;
+
+      /// Constructor from (external) token numbers.
+      by_type (kind_type t);
+
+      /// Steal the symbol type from \a that.
+      void move (by_type& that);
+
+      /// The (internal) type number (corresponding to \a type).
+      /// -1 when this symbol is empty.
+      symbol_number_type type_get () const;
+
+      /// The token.
+      token_type token () const;
+
+      enum { empty = 0 };
+
+      /// The symbol type.
+      /// -1 when this symbol is empty.
+      token_number_type type;
+    };
+
+    /// "External" symbols: returned by the scanner.
+    typedef basic_symbol<by_type> symbol_type;
+
+
+    /// Build a parser object.
+    parser (WasaParserDriver* d_yyarg);
+    virtual ~parser ();
+
+    /// Parse.
+    /// \returns  0 iff parsing succeeded.
+    virtual int parse ();
+
+#if YYDEBUG
+    /// The current debugging stream.
+    std::ostream& debug_stream () const YY_ATTRIBUTE_PURE;
+    /// Set the current debugging stream.
+    void set_debug_stream (std::ostream &);
+
+    /// Type for debugging levels.
+    typedef int debug_level_type;
+    /// The current debugging level.
+    debug_level_type debug_level () const YY_ATTRIBUTE_PURE;
+    /// Set the current debugging level.
+    void set_debug_level (debug_level_type l);
+#endif
+
+    /// Report a syntax error.
+    /// \param loc    where the syntax error is found.
+    /// \param msg    a description of the syntax error.
+    virtual void error (const location_type& loc, const std::string& msg);
+
+    /// Report a syntax error.
+    void error (const syntax_error& err);
+
+  private:
+    /// This class is not copyable.
+    parser (const parser&);
+    parser& operator= (const parser&);
+
+    /// State numbers.
+    typedef int state_type;
+
+    /// Generate an error message.
+    /// \param yystate   the state where the error occurred.
+    /// \param yytoken   the lookahead token type, or yyempty_.
+    virtual std::string yysyntax_error_ (state_type yystate,
+                                         symbol_number_type yytoken) const;
+
+    /// Compute post-reduction state.
+    /// \param yystate   the current state
+    /// \param yysym     the nonterminal to push on the stack
+    state_type yy_lr_goto_state_ (state_type yystate, int yysym);
+
+    /// Whether the given \c yypact_ value indicates a defaulted state.
+    /// \param yyvalue   the value to check
+    static bool yy_pact_value_is_default_ (int yyvalue);
+
+    /// Whether the given \c yytable_ value indicates a syntax error.
+    /// \param yyvalue   the value to check
+    static bool yy_table_value_is_error_ (int yyvalue);
+
+    static const signed char yypact_ninf_;
+    static const signed char yytable_ninf_;
+
+    /// Convert a scanner token number \a t to a symbol number.
+    static token_number_type yytranslate_ (int t);
+
+    // Tables.
+  // YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
+  // STATE-NUM.
+  static const signed char yypact_[];
+
+  // YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM.
+  // Performed when YYTABLE does not specify something else to do.  Zero
+  // means the default is an error.
+  static const unsigned char yydefact_[];
+
+  // YYPGOTO[NTERM-NUM].
+  static const signed char yypgoto_[];
+
+  // YYDEFGOTO[NTERM-NUM].
+  static const signed char yydefgoto_[];
+
+  // YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM.  If
+  // positive, shift that token.  If negative, reduce the rule whose
+  // number is the opposite.  If YYTABLE_NINF, syntax error.
+  static const signed char yytable_[];
+
+  static const signed char yycheck_[];
+
+  // YYSTOS[STATE-NUM] -- The (internal number of the) accessing
+  // symbol of state STATE-NUM.
+  static const unsigned char yystos_[];
+
+  // YYR1[YYN] -- Symbol number of symbol that rule YYN derives.
+  static const unsigned char yyr1_[];
+
+  // YYR2[YYN] -- Number of symbols on the right hand side of rule YYN.
+  static const unsigned char yyr2_[];
+
+
+    /// Convert the symbol name \a n to a form suitable for a diagnostic.
+    static std::string yytnamerr_ (const char *n);
+
+
+    /// For a symbol, its name in clear.
+    static const char* const yytname_[];
+#if YYDEBUG
+  // YYRLINE[YYN] -- Source line where rule number YYN was defined.
+  static const unsigned char yyrline_[];
+    /// Report on the debug stream that the rule \a r is going to be reduced.
+    virtual void yy_reduce_print_ (int r);
+    /// Print the state stack on the debug stream.
+    virtual void yystack_print_ ();
+
+    // Debugging.
+    int yydebug_;
+    std::ostream* yycdebug_;
+
+    /// \brief Display a symbol type, value and location.
+    /// \param yyo    The output stream.
+    /// \param yysym  The symbol.
+    template <typename Base>
+    void yy_print_ (std::ostream& yyo, const basic_symbol<Base>& yysym) const;
+#endif
+
+    /// \brief Reclaim the memory associated to a symbol.
+    /// \param yymsg     Why this token is reclaimed.
+    ///                  If null, print nothing.
+    /// \param yysym     The symbol.
+    template <typename Base>
+    void yy_destroy_ (const char* yymsg, basic_symbol<Base>& yysym) const;
+
+  private:
+    /// Type access provider for state based symbols.
+    struct by_state
+    {
+      /// Default constructor.
+      by_state ();
+
+      /// The symbol type as needed by the constructor.
+      typedef state_type kind_type;
+
+      /// Constructor.
+      by_state (kind_type s);
+
+      /// Copy constructor.
+      by_state (const by_state& other);
+
+      /// Steal the symbol type from \a that.
+      void move (by_state& that);
+
+      /// The (internal) type number (corresponding to \a state).
+      /// "empty" when empty.
+      symbol_number_type type_get () const;
+
+      enum { empty = 0 };
+
+      /// The state.
+      state_type state;
+    };
+
+    /// "Internal" symbol: element of the stack.
+    struct stack_symbol_type : basic_symbol<by_state>
+    {
+      /// Superclass.
+      typedef basic_symbol<by_state> super_type;
+      /// Construct an empty symbol.
+      stack_symbol_type ();
+      /// Steal the contents from \a sym to build this.
+      stack_symbol_type (state_type s, symbol_type& sym);
+      /// Assignment, needed by push_back.
+      stack_symbol_type& operator= (const stack_symbol_type& that);
+    };
+
+    /// Stack type.
+    typedef stack<stack_symbol_type> stack_type;
+
+    /// The stack.
+    stack_type yystack_;
+
+    /// Push a new state on the stack.
+    /// \param m    a debug message to display
+    ///             if null, no trace is output.
+    /// \param s    the symbol
+    /// \warning the contents of \a s.value is stolen.
+    void yypush_ (const char* m, stack_symbol_type& s);
+
+    /// Push a new look ahead token on the state on the stack.
+    /// \param m    a debug message to display
+    ///             if null, no trace is output.
+    /// \param s    the state
+    /// \param sym  the symbol (for its value and location).
+    /// \warning the contents of \a s.value is stolen.
+    void yypush_ (const char* m, state_type s, symbol_type& sym);
+
+    /// Pop \a n symbols the three stacks.
+    void yypop_ (unsigned int n = 1);
+
+    // Constants.
+    enum
+    {
+      yyeof_ = 0,
+      yylast_ = 59,     ///< Last index in yytable_.
+      yynnts_ = 7,  ///< Number of nonterminal symbols.
+      yyempty_ = -2,
+      yyfinal_ = 14, ///< Termination state number.
+      yyterror_ = 1,
+      yyerrcode_ = 256,
+      yyntokens_ = 18  ///< Number of tokens.
+    };
+
+
+    // User arguments.
+    WasaParserDriver* d;
+  };
+
+
+
+} // yy
+#line 472 "y.tab.h" // lalr1.cc:372
+
+
+
+
+#endif // !YY_YY_Y_TAB_H_INCLUDED
--- a/src/query/wasaparseaux.cpp
+++ b/src/query/wasaparseaux.cpp
@ -161,10 +161,10 @@ bool WasaParserDriver::addClause(SearchData *sd,
        size_t size = strtoll(cl->gettext().c_str(), &cp, 10);
        if (*cp != 0) {
            switch (*cp) {
-            case 'k': case 'K': size *= 1E3;break;
-            case 'm': case 'M': size *= 1E6;break;
-            case 'g': case 'G': size *= 1E9;break;
-            case 't': case 'T': size *= 1E12;break;
+            case 'k': case 'K': size *= 1000;break;
+            case 'm': case 'M': size *= 1000*1000;break;
+            case 'g': case 'G': size *= 1000*1000*1000;break;
+            case 't': case 'T': size *= size_t(1000)*1000*1000*1000;break;
            default: 
                m_reason = string("Bad multiplier suffix: ") + *cp;
                delete cl;
--- a/src/rcldb/daterange.cpp
+++ b/src/rcldb/daterange.cpp
@ -29,7 +29,7 @@
 #include <vector>
 using namespace std;

-#include "xapian.h"
+#include <xapian.h>

 #include "debuglog.h"
 #include "rclconfig.h"
--- a/src/rcldb/expansiondbs.cpp
+++ b/src/rcldb/expansiondbs.cpp
@ -97,6 +97,8 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,

 	    // Detect and skip CJK terms.
 	    Utf8Iter utfit(*it);
+            if (utfit.eof()) // Empty term?? Seems to happen.
+                continue;
 	    if (TextSplit::isCJK(*utfit)) {
 		// LOGDEB(("stemskipped: Skipping CJK\n"));
 		continue;
--- a/src/rcldb/rclabstract.cpp
+++ b/src/rcldb/rclabstract.cpp
@ -82,7 +82,7 @@ bool Query::Native::getMatchTerms(unsigned long xdocid, vector<string>& terms)
 {
    if (!xenquire) {
 	LOGERR(("Query::getMatchTerms: no query opened\n"));
-	return -1;
+	return false;
    }

    terms.clear();
@ -386,7 +386,7 @@ int Query::Native::makeAbstract(Xapian::docid docid,
    for (multimap<double, vector<string> >::reverse_iterator mit = byQ.rbegin(); 
 	 mit != byQ.rend(); mit++) {
 	unsigned int maxgrpoccs;
-	float q;
+	double q;
 	if (byQ.size() == 1) {
 	    maxgrpoccs = maxtotaloccs;
 	    q = 1.0;
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -18,7 +18,7 @@

 #include <stdio.h>
 #include <cstring>
-#include <unistd.h>
+#include "safeunistd.h"
 #include <math.h>
 #include <time.h>

@ -433,7 +433,7 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
    string dbdir = m_rcldb->m_basedir;
    doc.idxi = 0;
    if (!m_rcldb->m_extraDbs.empty()) {
-	unsigned int idxi = whatDbIdx(docid);
+	int idxi = int(whatDbIdx(docid));

 	// idxi is in [0, extraDbs.size()]. 0 is for the main index,
 	// idxi-1 indexes into the additional dbs array.
@ -549,14 +549,13 @@ bool Db::Native::getPagePositions(Xapian::docid docid, vector<int>& vpos)
    return true;
 }

-int Db::Native::getPageNumberForPosition(const vector<int>& pbreaks, 
-					 unsigned int pos)
+int Db::Native::getPageNumberForPosition(const vector<int>& pbreaks, int pos)
 {
-    if (pos < baseTextPosition) // Not in text body
+    if (pos < int(baseTextPosition)) // Not in text body
 	return -1;
    vector<int>::const_iterator it = 
 	upper_bound(pbreaks.begin(), pbreaks.end(), pos);
-    return it - pbreaks.begin() + 1;
+    return int(it - pbreaks.begin() + 1);
 }

 // Note: we're passed a Xapian::Document* because Xapian
@ -1420,10 +1419,11 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
 	time_t mtime = atoll(doc.dmtime.empty() ? doc.fmtime.c_str() : 
 			     doc.dmtime.c_str());
        struct tm tmb;
-	localtime_r(&mtime, &tmb);
+		struct tm *tmbp = &tmb;
+	tmbp = localtime_r(&mtime, &tmb);
 	char buf[9];
 	snprintf(buf, 9, "%04d%02d%02d",
-		 tmb.tm_year+1900, tmb.tm_mon + 1, tmb.tm_mday);
+		 tmbp->tm_year+1900, tmbp->tm_mon + 1, tmbp->tm_mday);
 	// Date (YYYYMMDD)
 	newdocument.add_boolean_term(wrap_prefix(xapday_prefix) + string(buf)); 
 	// Month (YYYYMM)
--- a/src/rcldb/rcldb_p.h
+++ b/src/rcldb/rcldb_p.h
@ -120,7 +120,7 @@ class Db::Native {
 			const string& uniterm);

    bool getPagePositions(Xapian::docid docid, vector<int>& vpos);
-    int getPageNumberForPosition(const vector<int>& pbreaks, unsigned int pos);
+    int getPageNumberForPosition(const vector<int>& pbreaks, int pos);

    bool dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc);

--- a/src/rcldb/rclterms.cpp
+++ b/src/rcldb/rclterms.cpp
@ -21,13 +21,15 @@
 #include "autoconfig.h"

 #include <string>
-using namespace std;

 #include "debuglog.h"
 #include "rcldb.h"
 #include "rcldb_p.h"
 #include "stemdb.h"
 #include "expansiondbs.h"
+#include "strmatcher.h"
+
+using namespace std;

 namespace Rcl {

@ -41,10 +43,10 @@ bool Db::filenameWildExp(const string& fnexp, vector<string>& names, int max)
    // get here currently anyway), and has no wildcards, we add * at
    // each end: match any substring
    if (pattern[0] == '"' && pattern[pattern.size()-1] == '"') {
-	pattern = pattern.substr(1, pattern.size() -2);
+        pattern = pattern.substr(1, pattern.size() -2);
    } else if (pattern.find_first_of(cstr_minwilds) == string::npos && 
-	       !unaciscapital(pattern)) {
-	pattern = "*" + pattern + "*";
+               !unaciscapital(pattern)) {
+        pattern = "*" + pattern + "*";
    } // else let it be

    LOGDEB(("Rcl::Db::filenameWildExp: pattern: [%s]\n", pattern.c_str()));
@ -55,21 +57,21 @@ bool Db::filenameWildExp(const string& fnexp, vector<string>& names, int max)
    // stripping conditionally on indexstripchars.
    string pat1;
    if (unacmaybefold(pattern, pat1, "UTF-8", UNACOP_UNACFOLD)) {
-	pattern.swap(pat1);
+        pattern.swap(pat1);
    }

    TermMatchResult result;
    if (!idxTermMatch(ET_WILD, string(), pattern, result, max,
-		      unsplitFilenameFieldName))
-	return false;
+                      unsplitFilenameFieldName))
+        return false;
    for (vector<TermMatchEntry>::const_iterator it = result.entries.begin();
-	 it != result.entries.end(); it++) 
-	names.push_back(it->term);
+         it != result.entries.end(); it++) 
+        names.push_back(it->term);

    if (names.empty()) {
-	// Build an impossible query: we know its impossible because we
-	// control the prefixes!
-	names.push_back(wrap_prefix("XNONE") + "NoMatchingTerms");
+        // Build an impossible query: we know its impossible because we
+        // control the prefixes!
+        names.push_back(wrap_prefix("XNONE") + "NoMatchingTerms");
    }
    return true;
 }
@ -82,11 +84,11 @@ bool Db::maxYearSpan(int *minyear, int *maxyear)
    *maxyear = -1000000;
    TermMatchResult result;
    if (!idxTermMatch(ET_WILD, string(), "*", result, -1, "xapyear")) {
-	LOGINFO(("Rcl::Db:maxYearSpan: termMatch failed\n"));
-	return false;
+        LOGINFO(("Rcl::Db:maxYearSpan: termMatch failed\n"));
+        return false;
    }
    for (vector<TermMatchEntry>::const_iterator it = result.entries.begin();
-	 it != result.entries.end(); it++) {
+         it != result.entries.end(); it++) {
        if (!it->term.empty()) {
            int year = atoi(strip_prefix(it->term).c_str());
            if (year < *minyear)
@ -102,11 +104,11 @@ bool Db::getAllDbMimeTypes(std::vector<std::string>& exp)
 {
    Rcl::TermMatchResult res;
    if (!idxTermMatch(Rcl::Db::ET_WILD, "", "*", res, -1, "mtype")) {
-	return false;
+        return false;
    }
    for (vector<Rcl::TermMatchEntry>::const_iterator rit = res.entries.begin();
-	 rit != res.entries.end(); rit++) {
-	exp.push_back(Rcl::strip_prefix(rit->term));
+         rit != res.entries.end(); rit++) {
+        exp.push_back(Rcl::strip_prefix(rit->term));
    }
    return true;
 }
@ -114,19 +116,19 @@ bool Db::getAllDbMimeTypes(std::vector<std::string>& exp)
 class TermMatchCmpByWcf {
 public:
    int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
-	return r.wcf - l.wcf < 0;
+        return r.wcf - l.wcf < 0;
    }
 };
 class TermMatchCmpByTerm {
 public:
    int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
-	return l.term.compare(r.term) > 0;
+        return l.term.compare(r.term) > 0;
    }
 };
 class TermMatchTermEqual {
 public:
    int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
-	return !l.term.compare(r.term);
+        return !l.term.compare(r.term);
    }
 };

@ -136,10 +138,10 @@ public:
 static void addPrefix(vector<TermMatchEntry>& terms, const string& prefix)
 {
    if (prefix.empty())
-	return;
+        return;
    for (vector<TermMatchEntry>::iterator it = terms.begin(); 
         it != terms.end(); it++)
-	it->term.insert(0, prefix);
+        it->term.insert(0, prefix);
 }

 static const char *tmtptostr(int typ)
@ -164,22 +166,22 @@ static const char *tmtptostr(int typ)
 // using the main index terms (filtering, retrieving stats, expansion
 // in some cases).
 bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
-		   TermMatchResult& res, int max,  const string& field,
-		   vector<string>* multiwords)
+                   TermMatchResult& res, int max,  const string& field,
+                   vector<string>* multiwords)
 {
    int matchtyp = matchTypeTp(typ_sens);
    if (!m_ndb || !m_ndb->m_isopen)
-	return false;
+        return false;
    Xapian::Database xrdb = m_ndb->xrdb;

    bool diac_sensitive = (typ_sens & ET_DIACSENS) != 0;
    bool case_sensitive = (typ_sens & ET_CASESENS) != 0;

    LOGDEB0(("Db::TermMatch: typ %s diacsens %d casesens %d lang [%s] term [%s]"
-	    " max %d field [%s] stripped %d init res.size %u\n",
-	    tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(), 
-	    _term.c_str(), max, field.c_str(), o_index_stripchars, 
-	     res.entries.size()));
+            " max %d field [%s] stripped %d init res.size %u\n",
+            tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(), 
+            _term.c_str(), max, field.c_str(), o_index_stripchars, 
+             res.entries.size()));

    // If index is stripped, no case or diac expansion can be needed:
    // for the processing inside this routine, everything looks like
@ -187,11 +189,11 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
    // Also, convert input to lowercase and strip its accents.
    string term = _term;
    if (o_index_stripchars) {
-	diac_sensitive = case_sensitive = true;
-	if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
-	    LOGERR(("Db::termMatch: unac failed for [%s]\n", _term.c_str()));
-	    return false;
-	}
+        diac_sensitive = case_sensitive = true;
+        if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
+            LOGERR(("Db::termMatch: unac failed for [%s]\n", _term.c_str()));
+            return false;
+        }
    }

    // The case/diac expansion db
@ -199,125 +201,125 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
    XapComputableSynFamMember synac(xrdb, synFamDiCa, "all", &unacfoldtrans);

    if (matchtyp == ET_WILD || matchtyp == ET_REGEXP) {
-	STD_SHARED_PTR<StrMatcher> matcher;
-	if (matchtyp == ET_WILD) {
-	    matcher = STD_SHARED_PTR<StrMatcher>(new StrWildMatcher(term));
-	} else {
-	    matcher = STD_SHARED_PTR<StrMatcher>(new StrRegexpMatcher(term));
-	}
-	if (!diac_sensitive || !case_sensitive) {
-	    // Perform case/diac expansion on the exp as appropriate and
-	    // expand the result.
-	    vector<string> exp;
-	    if (diac_sensitive) {
-		// Expand for diacritics and case, filtering for same diacritics
-		SynTermTransUnac foldtrans(UNACOP_FOLD);
-		synac.synKeyExpand(matcher.get(), exp, &foldtrans);
-	    } else if (case_sensitive) {
-		// Expand for diacritics and case, filtering for same case
-		SynTermTransUnac unactrans(UNACOP_UNAC);
-		synac.synKeyExpand(matcher.get(), exp, &unactrans);
-	    } else {
-		// Expand for diacritics and case, no filtering
-		synac.synKeyExpand(matcher.get(), exp);
-	    }
-	    // Retrieve additional info and filter against the index itself
-	    for (vector<string>::const_iterator it = exp.begin(); 
-		 it != exp.end(); it++) {
-		idxTermMatch(ET_NONE, "", *it, res, max, field);
-	    }
-	    // And also expand the original expression against the
-	    // main index: for the common case where the expression
-	    // had no case/diac expansion (no entry in the exp db if
-	    // the original term is lowercase and without accents).
-	    idxTermMatch(typ_sens, lang, term, res, max, field);
-	} else {
-	    idxTermMatch(typ_sens, lang, term, res, max, field);
-	}
+        STD_SHARED_PTR<StrMatcher> matcher;
+        if (matchtyp == ET_WILD) {
+            matcher = STD_SHARED_PTR<StrMatcher>(new StrWildMatcher(term));
+        } else {
+            matcher = STD_SHARED_PTR<StrMatcher>(new StrRegexpMatcher(term));
+        }
+        if (!diac_sensitive || !case_sensitive) {
+            // Perform case/diac expansion on the exp as appropriate and
+            // expand the result.
+            vector<string> exp;
+            if (diac_sensitive) {
+                // Expand for diacritics and case, filtering for same diacritics
+                SynTermTransUnac foldtrans(UNACOP_FOLD);
+                synac.synKeyExpand(matcher.get(), exp, &foldtrans);
+            } else if (case_sensitive) {
+                // Expand for diacritics and case, filtering for same case
+                SynTermTransUnac unactrans(UNACOP_UNAC);
+                synac.synKeyExpand(matcher.get(), exp, &unactrans);
+            } else {
+                // Expand for diacritics and case, no filtering
+                synac.synKeyExpand(matcher.get(), exp);
+            }
+            // Retrieve additional info and filter against the index itself
+            for (vector<string>::const_iterator it = exp.begin(); 
+                 it != exp.end(); it++) {
+                idxTermMatch(ET_NONE, "", *it, res, max, field);
+            }
+            // And also expand the original expression against the
+            // main index: for the common case where the expression
+            // had no case/diac expansion (no entry in the exp db if
+            // the original term is lowercase and without accents).
+            idxTermMatch(typ_sens, lang, term, res, max, field);
+        } else {
+            idxTermMatch(typ_sens, lang, term, res, max, field);
+        }

    } else {
-	// Expansion is STEM or NONE (which may still need synonyms
-	// and case/diac exp)
+        // Expansion is STEM or NONE (which may still need synonyms
+        // and case/diac exp)

-	vector<string> lexp;
-	if (diac_sensitive && case_sensitive) {
-	    // No case/diac expansion
-	    lexp.push_back(term);
-	} else if (diac_sensitive) {
-	    // Expand for accents and case, filtering for same accents,
-	    SynTermTransUnac foldtrans(UNACOP_FOLD);
-	    synac.synExpand(term, lexp, &foldtrans);
-	} else if (case_sensitive) {
-	    // Expand for accents and case, filtering for same case
-	    SynTermTransUnac unactrans(UNACOP_UNAC);
-	    synac.synExpand(term, lexp, &unactrans);
-	} else {
-	    // We are neither accent- nor case- sensitive and may need stem
-	    // expansion or not. Expand for accents and case
-	    synac.synExpand(term, lexp);
-	}
+        vector<string> lexp;
+        if (diac_sensitive && case_sensitive) {
+            // No case/diac expansion
+            lexp.push_back(term);
+        } else if (diac_sensitive) {
+            // Expand for accents and case, filtering for same accents,
+            SynTermTransUnac foldtrans(UNACOP_FOLD);
+            synac.synExpand(term, lexp, &foldtrans);
+        } else if (case_sensitive) {
+            // Expand for accents and case, filtering for same case
+            SynTermTransUnac unactrans(UNACOP_UNAC);
+            synac.synExpand(term, lexp, &unactrans);
+        } else {
+            // We are neither accent- nor case- sensitive and may need stem
+            // expansion or not. Expand for accents and case
+            synac.synExpand(term, lexp);
+        }

-	if (matchtyp == ET_STEM || (typ_sens & ET_SYNEXP)) {
+        if (matchtyp == ET_STEM || (typ_sens & ET_SYNEXP)) {
            // Note: if any of the above conds is true, we are insensitive to
            // diacs and case (enforced in searchdatatox:termexpand
-	    // Need stem expansion. Lowercase the result of accent and case
-	    // expansion for input to stemdb.
-	    for (unsigned int i = 0; i < lexp.size(); i++) {
-		string lower;
-		unacmaybefold(lexp[i], lower, "UTF-8", UNACOP_FOLD);
-		lexp[i] = lower;
-	    }
-	    sort(lexp.begin(), lexp.end());
-	    lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
+            // Need stem expansion. Lowercase the result of accent and case
+            // expansion for input to stemdb.
+            for (unsigned int i = 0; i < lexp.size(); i++) {
+                string lower;
+                unacmaybefold(lexp[i], lower, "UTF-8", UNACOP_FOLD);
+                lexp[i] = lower;
+            }
+            sort(lexp.begin(), lexp.end());
+            lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());

-	    if (matchtyp == ET_STEM) {
-		StemDb sdb(xrdb);
-		vector<string> exp1;
-		for (vector<string>::const_iterator it = lexp.begin(); 
-		     it != lexp.end(); it++) {
-		    sdb.stemExpand(lang, *it, exp1);
-		}
-		exp1.swap(lexp);
-		sort(lexp.begin(), lexp.end());
-		lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
-		LOGDEB(("ExpTerm: stemexp: %s\n", 
-			stringsToString(lexp).c_str()));
-	    }
+            if (matchtyp == ET_STEM) {
+                StemDb sdb(xrdb);
+                vector<string> exp1;
+                for (vector<string>::const_iterator it = lexp.begin(); 
+                     it != lexp.end(); it++) {
+                    sdb.stemExpand(lang, *it, exp1);
+                }
+                exp1.swap(lexp);
+                sort(lexp.begin(), lexp.end());
+                lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
+                LOGDEB(("ExpTerm: stemexp: %s\n", 
+                        stringsToString(lexp).c_str()));
+            }

-	    // Expand the result for synonyms. Note that doing it here
-	    // means that multi-term synonyms will not work
-	    // (e.g. stakhanovist -> "hard at work". We would have to
-	    // separate the multi-word expansions for our caller to
-	    // add them as phrases to the query. Not impossible, but
-	    // let's keep it at single words for now.
-	    if (m_syngroups.ok() && (typ_sens & ET_SYNEXP)) {
-		LOGDEB(("ExpTerm: got syngroups\n"));
-		vector<string> exp1(lexp);
-		for (vector<string>::const_iterator it = lexp.begin(); 
-		     it != lexp.end(); it++) {
-		    vector<string> sg = m_syngroups.getgroup(*it);
-		    if (!sg.empty()) {
-			LOGDEB(("ExpTerm: syns: %s -> %s\n", 
-				it->c_str(), stringsToString(sg).c_str()));
-			for (vector<string>::const_iterator it1 = sg.begin();
-			     it1 != sg.end(); it1++) {
-			    if (it1->find_first_of(" ") != string::npos) {
-				if (multiwords) {
-				    multiwords->push_back(*it1);
-				}
-			    } else {
-				exp1.push_back(*it1);
-			    }
-			}
-		    }
-		}
-		lexp.swap(exp1);
-		sort(lexp.begin(), lexp.end());
-		lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
-	    }
+            // Expand the result for synonyms. Note that doing it here
+            // means that multi-term synonyms will not work
+            // (e.g. stakhanovist -> "hard at work". We would have to
+            // separate the multi-word expansions for our caller to
+            // add them as phrases to the query. Not impossible, but
+            // let's keep it at single words for now.
+            if (m_syngroups.ok() && (typ_sens & ET_SYNEXP)) {
+                LOGDEB(("ExpTerm: got syngroups\n"));
+                vector<string> exp1(lexp);
+                for (vector<string>::const_iterator it = lexp.begin(); 
+                     it != lexp.end(); it++) {
+                    vector<string> sg = m_syngroups.getgroup(*it);
+                    if (!sg.empty()) {
+                        LOGDEB(("ExpTerm: syns: %s -> %s\n", 
+                                it->c_str(), stringsToString(sg).c_str()));
+                        for (vector<string>::const_iterator it1 = sg.begin();
+                             it1 != sg.end(); it1++) {
+                            if (it1->find_first_of(" ") != string::npos) {
+                                if (multiwords) {
+                                    multiwords->push_back(*it1);
+                                }
+                            } else {
+                                exp1.push_back(*it1);
+                            }
+                        }
+                    }
+                }
+                lexp.swap(exp1);
+                sort(lexp.begin(), lexp.end());
+                lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
+            }

-	    // Expand the resulting list for case (all stemdb content
-	    // is lowercase)
+            // Expand the resulting list for case (all stemdb content
+            // is lowercase)
            vector<string> exp1;
            for (vector<string>::const_iterator it = lexp.begin(); 
                 it != lexp.end(); it++) {
@ -326,27 +328,27 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
            exp1.swap(lexp);
            sort(lexp.begin(), lexp.end());
            lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
-	}
+        }

-	// Filter the result and get the stats, possibly add prefixes.
-	LOGDEB(("ExpandTerm:TM: lexp: %s\n", stringsToString(lexp).c_str()));
-	for (vector<string>::const_iterator it = lexp.begin();
-	     it != lexp.end(); it++) {
-	    idxTermMatch(Rcl::Db::ET_WILD, "", *it, res, max, field);
-	}
+        // Filter the result and get the stats, possibly add prefixes.
+        LOGDEB(("ExpandTerm:TM: lexp: %s\n", stringsToString(lexp).c_str()));
+        for (vector<string>::const_iterator it = lexp.begin();
+             it != lexp.end(); it++) {
+            idxTermMatch(Rcl::Db::ET_WILD, "", *it, res, max, field);
+        }
    }

    TermMatchCmpByTerm tcmp;
    sort(res.entries.begin(), res.entries.end(), tcmp);
    TermMatchTermEqual teq;
    vector<TermMatchEntry>::iterator uit = 
-	unique(res.entries.begin(), res.entries.end(), teq);
+        unique(res.entries.begin(), res.entries.end(), teq);
    res.entries.resize(uit - res.entries.begin());
    TermMatchCmpByWcf wcmp;
    sort(res.entries.begin(), res.entries.end(), wcmp);
    if (max > 0) {
-	// Would need a small max and big stem expansion...
-	res.entries.resize(MIN(res.entries.size(), (unsigned int)max));
+        // Would need a small max and big stem expansion...
+        res.entries.resize(MIN(res.entries.size(), (unsigned int)max));
    }
    return true;
 }
@ -354,114 +356,116 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
 // Second phase of wildcard/regexp term expansion after case/diac
 // expansion: expand against main index terms
 bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
-		      TermMatchResult& res, int max,  const string& field)
+                      TermMatchResult& res, int max,  const string& field)
 {
    int typ = matchTypeTp(typ_sens);
    LOGDEB1(("Db::idxTermMatch: typ %s lang [%s] term [%s] "
-	     "max %d field [%s] init res.size %u\n",
-	     tmtptostr(typ), lang.c_str(), root.c_str(),
-	     max, field.c_str(), res.entries.size()));
+             "max %d field [%s] init res.size %u\n",
+             tmtptostr(typ), lang.c_str(), root.c_str(),
+             max, field.c_str(), res.entries.size()));

    if (typ == ET_STEM) {
-	LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n"));
-	abort();
+        LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n"));
+        abort();
    }

    Xapian::Database xdb = m_ndb->xrdb;

    string prefix;
    if (!field.empty()) {
-	const FieldTraits *ftp = 0;
-	if (!fieldToTraits(field, &ftp, true) || ftp->pfx.empty()) {
+        const FieldTraits *ftp = 0;
+        if (!fieldToTraits(field, &ftp, true) || ftp->pfx.empty()) {
            LOGDEB(("Db::termMatch: field is not indexed (no prefix): [%s]\n", 
                    field.c_str()));
        } else {
-	    prefix = wrap_prefix(ftp->pfx);
-	}
+            prefix = wrap_prefix(ftp->pfx);
+        }
    }
    res.prefix = prefix;

    STD_SHARED_PTR<StrMatcher> matcher;
    if (typ == ET_REGEXP) {
-	matcher = STD_SHARED_PTR<StrMatcher>(new StrRegexpMatcher(root));
-	if (!matcher->ok()) {
-	    LOGERR(("termMatch: regcomp failed: %s\n", 
-		    matcher->getreason().c_str()))
-		return false;
-	}
+        matcher = STD_SHARED_PTR<StrMatcher>(new StrRegexpMatcher(root));
+        if (!matcher->ok()) {
+            LOGERR(("termMatch: regcomp failed: %s\n", 
+                    matcher->getreason().c_str()))
+                return false;
+        }
    } else if (typ == ET_WILD) {
-	matcher = STD_SHARED_PTR<StrMatcher>(new StrWildMatcher(root));
+        matcher = STD_SHARED_PTR<StrMatcher>(new StrWildMatcher(root));
    }

    // Find the initial section before any special char
    string::size_type es = string::npos;
    if (matcher) {
-	es = matcher->baseprefixlen();
+        es = matcher->baseprefixlen();
    }

    // Initial section: the part of the prefix+expr before the
    // first wildcard character. We only scan the part of the
    // index where this matches
    string is;
-    switch (es) {
-    case string::npos: is = prefix + root; break;
-    case 0: is = prefix; break;
-    default: is = prefix + root.substr(0, es); break;
+    if (es == string::npos) {
+        is = prefix + root;
+    } else if (es == 0) {
+        is = prefix;
+    } else {
+        is = prefix + root.substr(0, es);
    }
    LOGDEB2(("termMatch: initsec: [%s]\n", is.c_str()));

    for (int tries = 0; tries < 2; tries++) { 
-	try {
-	    Xapian::TermIterator it = xdb.allterms_begin(); 
-	    if (!is.empty())
-		it.skip_to(is.c_str());
-	    for (int rcnt = 0; it != xdb.allterms_end(); it++) {
-		// If we're beyond the terms matching the initial
-		// section, end
-		if (!is.empty() && (*it).find(is) != 0)
-		    break;
+        try {
+            Xapian::TermIterator it = xdb.allterms_begin(); 
+            if (!is.empty())
+                it.skip_to(is.c_str());
+            for (int rcnt = 0; it != xdb.allterms_end(); it++) {
+                // If we're beyond the terms matching the initial
+                // section, end
+                if (!is.empty() && (*it).find(is) != 0)
+                    break;

-		// Else try to match the term. The matcher content
-		// is without prefix, so we remove this if any. We
-		// just checked that the index term did begin with
-		// the prefix.
-		string term;
-		if (!prefix.empty()) {
-		    term = (*it).substr(prefix.length());
-		} else {
-		    if (has_prefix(*it)) {
-			continue;
-		    }
-		    term = *it;
-		}
+                // Else try to match the term. The matcher content
+                // is without prefix, so we remove this if any. We
+                // just checked that the index term did begin with
+                // the prefix.
+                string term;
+                if (!prefix.empty()) {
+                    term = (*it).substr(prefix.length());
+                } else {
+                    if (has_prefix(*it)) {
+                        continue;
+                    }
+                    term = *it;
+                }

-		if (matcher && !matcher->match(term))
-		    continue;
+                if (matcher && !matcher->match(term))
+                    continue;

-		res.entries.push_back(
-		    TermMatchEntry(*it, xdb.get_collection_freq(*it),
-				   it.get_termfreq()));
+                res.entries.push_back(
+                    TermMatchEntry(*it, xdb.get_collection_freq(*it),
+                                   it.get_termfreq()));

-		// The problem with truncating here is that this is done
-		// alphabetically and we may not keep the most frequent 
-		// terms. OTOH, not doing it may stall the program if
-		// we are walking the whole term list. We compromise
-		// by cutting at 2*max
-		if (max > 0 && ++rcnt >= 2*max)
-		    break;
-	    }
-	    m_reason.erase();
-	    break;
-	} catch (const Xapian::DatabaseModifiedError &e) {
-	    m_reason = e.get_msg();
-	    xdb.reopen();
-	    continue;
-	} XCATCHERROR(m_reason);
-	break;
+                // The problem with truncating here is that this is done
+                // alphabetically and we may not keep the most frequent 
+                // terms. OTOH, not doing it may stall the program if
+                // we are walking the whole term list. We compromise
+                // by cutting at 2*max
+                if (max > 0 && ++rcnt >= 2*max)
+                    break;
+            }
+            m_reason.erase();
+            break;
+        } catch (const Xapian::DatabaseModifiedError &e) {
+            m_reason = e.get_msg();
+            xdb.reopen();
+            continue;
+        } XCATCHERROR(m_reason);
+        break;
    }
    if (!m_reason.empty()) {
-	LOGERR(("termMatch: %s\n", m_reason.c_str()));
-	return false;
+        LOGERR(("termMatch: %s\n", m_reason.c_str()));
+        return false;
    }

    return true;
@ -476,62 +480,62 @@ public:
 TermIter *Db::termWalkOpen()
 {
    if (!m_ndb || !m_ndb->m_isopen)
-	return 0;
+        return 0;
    TermIter *tit = new TermIter;
    if (tit) {
-	tit->db = m_ndb->xrdb;
+        tit->db = m_ndb->xrdb;
        XAPTRY(tit->it = tit->db.allterms_begin(), tit->db, m_reason);
-	if (!m_reason.empty()) {
-	    LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
-	    return 0;
-	}
+        if (!m_reason.empty()) {
+            LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
+            return 0;
+        }
    }
    return tit;
 }
 bool Db::termWalkNext(TermIter *tit, string &term)
 {
    XAPTRY(
-	if (tit && tit->it != tit->db.allterms_end()) {
-	    term = *(tit->it)++;
-	    return true;
-	}
+        if (tit && tit->it != tit->db.allterms_end()) {
+            term = *(tit->it)++;
+            return true;
+        }
        , tit->db, m_reason);

    if (!m_reason.empty()) {
-	LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
+        LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
    }
    return false;
 }
 void Db::termWalkClose(TermIter *tit)
 {
    try {
-	delete tit;
+        delete tit;
    } catch (...) {}
 }

 bool Db::termExists(const string& word)
 {
    if (!m_ndb || !m_ndb->m_isopen)
-	return 0;
+        return 0;

    XAPTRY(if (!m_ndb->xrdb.term_exists(word)) return false,
           m_ndb->xrdb, m_reason);

    if (!m_reason.empty()) {
-	LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
-	return false;
+        LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
+        return false;
    }
    return true;
 }

 bool Db::stemDiffers(const string& lang, const string& word, 
-		     const string& base)
+                     const string& base)
 {
    Xapian::Stem stemmer(lang);
    if (!stemmer(word).compare(stemmer(base))) {
-	LOGDEB2(("Rcl::Db::stemDiffers: same for %s and %s\n", 
-		word.c_str(), base.c_str()));
-	return false;
+        LOGDEB2(("Rcl::Db::stemDiffers: same for %s and %s\n", 
+                word.c_str(), base.c_str()));
+        return false;
    }
    return true;
 }
--- a/src/rcldb/searchdata.cpp
+++ b/src/rcldb/searchdata.cpp
@ -242,7 +242,7 @@ void SearchData::simplify()
             j < i + clsubp->getSub()->m_query.size(); j++) {
            m_query[j]->setParent(this);
        }
-        i += clsubp->getSub()->m_query.size() - 1;
+        i += int(clsubp->getSub()->m_query.size()) - 1;

        // We don't want the clauses to be deleted when the parent is, as we
        // know own them.
--- a/src/rcldb/searchdata.h
+++ b/src/rcldb/searchdata.h
@ -270,7 +270,7 @@ public:
    {
 	return m_parentSearch ? m_parentSearch->getMaxExp() : 10000;
    }
-    int getMaxCl() 
+    size_t getMaxCl() 
    {
 	return m_parentSearch ? m_parentSearch->getMaxCl() : 100000;
    }
@ -376,7 +376,7 @@ protected:
    std::string  m_field; // Field specification if any
    HighlightData m_hldata;
    // Current count of Xapian clauses, to check against expansion limit
-    int  m_curcl;
+    size_t  m_curcl;
    bool processUserString(Rcl::Db &db, const string &iq,
 			   std::string &ermsg,
 			   void* pq, int slack = 0, bool useNear = false);
--- a/src/rcldb/searchdatatox.cpp
+++ b/src/rcldb/searchdatatox.cpp
@ -840,7 +840,7 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
 	    tpq.setTSQ(&splitter);
 	    splitter.text_to_words(*it);

-	    slack += tpq.lastpos() - tpq.terms().size() + 1;
+	    slack += tpq.lastpos() - int(tpq.terms().size()) + 1;

 	    LOGDEB0(("strToXapianQ: termcount: %d\n", tpq.terms().size()));
 	    switch (tpq.terms().size() + terminc) {
@ -963,7 +963,7 @@ bool SearchDataClausePath::toNativeQuery(Rcl::Db &db, void *p)

    vector<Xapian::Query> orqueries;

-    if (m_text[0] == '/')
+    if (path_isabsolute(m_text))
 	orqueries.push_back(Xapian::Query(wrap_prefix(pathelt_prefix)));
    else
        m_text = path_tildexpand(m_text);
--- a/src/rcldb/stemdb.cpp
+++ b/src/rcldb/stemdb.cpp
@ -22,11 +22,12 @@

 #include "autoconfig.h"

-#include <unistd.h>
+#include "safeunistd.h"

 #include <algorithm>
 #include <map>
 #include <iostream>
+#include <string>
 using namespace std;

 #include <xapian.h>
--- a/src/sampleconf/mimeconf
+++ b/src/sampleconf/mimeconf
@ -133,7 +133,8 @@ image/vnd.djvu = exec rcldjvu
 image/svg+xml = exec rclsvg
 image/x-xcf = execm rclimg
 inode/symlink = internal
-inode/x-empty = exec rclnull
+application/x-zerosize = internal
+inode/x-empty = internal application/x-zerosize
 message/rfc822 = internal
 text/calendar = execm rclics;mimetype=text/plain
 text/html  = internal 
--- a/src/sampleconf/recoll.conf.in
+++ b/src/sampleconf/recoll.conf.in
@ -1,7 +1,7 @@
 # (C) 2004 J.F.Dockes. License: GPL
 #
 # Recoll default configuration file. This typically lives in
-# @prefix@/share/recoll/examples and provides default values. You can
+# $prefix/share/recoll/examples and provides default values. You can
 # override selected parameters by adding assigments to
 # ~/.recoll/recoll.conf (or $RECOLL_CONFDIR/recoll.conf)
 #
@ -199,12 +199,13 @@ maxfsoccuppc = 0
 idxflushmb = 10

 # Place to search for executable filters. If RECOLL_FILTERSDIR is set in
-# the environment, we use it instead
-filtersdir = @prefix@/share/recoll/filters
+# the environment, we use it instead. Defaults to $prefix/share/recoll/filters
+# filtersdir = /path/to/my/filters

 # Place to search for icons. The only reason to change this would be if you
-# want to change the icons displayed in the result list
-iconsdir = @prefix@/share/recoll/images
+# want to change the icons displayed in the result list.
+# Defaults to $prefix/share/recoll/images
+# iconsdir = /path/to/my/icons

 # Should we use the system's 'file -i' command as a final step in file type
 # identification ? This may be useful, but will usually cause the
--- a/src/unac/unac.c
+++ b/src/unac/unac.c
@ -16,21 +16,20 @@
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */

-#ifdef HAVE_CONFIG_H
-#ifdef RECOLL_DATADIR
+#ifdef BUILDING_RECOLL
 #include "autoconfig.h"
 #else
 #include "config.h"
 #endif /* RECOLL */
-#endif /* HAVE_CONFIG_H */

-#ifdef RECOLL_DATADIR
+#ifdef BUILDING_RECOLL
 /* Yes, recoll unac is actually c++, lets face modernity, I will not be
   caught writing another binary search  */
 #include <vector>
 #include <map>
 #include <string>
 #include <algorithm>
+#include <iostream>
 #include UNORDERED_MAP_INCLUDE

 using std::string;
@ -53,7 +52,7 @@ static inline bool is_except_char(unsigned short c, string& trans)
    trans = it->second;
    return true;
 }
-#endif /* RECOLL_DATADIR */
+#endif /* BUILDING_RECOLL*/

 /*
 * If configure.in has not defined this symbol, assume const. It
@ -14171,9 +14170,9 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
 			       char** outp, size_t* out_lengthp, int what)
 {
  char* out;
-  int out_size;
-  int out_length;
-  unsigned int i;
+  size_t out_size;
+  size_t out_length;
+  size_t i;

  out_size = in_length > 0 ? in_length : 1024;

@ -14191,13 +14190,13 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
  for(i = 0; i < in_length; i += 2) {
    unsigned short c;
    unsigned short* p;
-    int l;
-    int k;
+    size_t l;
+    size_t k;
    c = (in[i] << 8) | (in[i + 1] & 0xff);
    /*
     * Lookup the tables for decomposition information
     */
-#ifdef RECOLL_DATADIR
+#ifdef BUILDING_RECOLL
    // Exception unac/fold values set by user. There should be 3 arrays for
    // unac/fold/unac+fold. For now there is only one array, which used to
    // be set for unac+fold, and is mostly or only used to prevent diacritics
@ -14220,11 +14219,11 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
 	    l = trans.size() / 2;
 	}
    } else {
-#endif /* RECOLL_DATADIR */
+#endif /* BUILDING_RECOLL */
 	unac_uf_char_utf16_(c, p, l, what)
-#ifdef RECOLL_DATADIR
+#ifdef BUILDING_RECOLL
    }
-#endif /* RECOLL_DATADIR */
+#endif /* BUILDING_RECOLL */

    /*
     * Explain what's done in great detail
@ -14237,7 +14236,7 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
      if(l == 0) {
 	DEBUG_APPEND("untouched\n");
      } else {
-	int i;
+	size_t i;
 	for(i = 0; i < l; i++)
 	  DEBUG_APPEND("0x%04x ", p[i]);
 	DEBUG_APPEND("\n");
@ -14437,10 +14436,11 @@ static int convert(const char* from, const char* to,
 	  const char* tmp = space;
 	  size_t tmp_length = 2;
 	  if(iconv(cd, (ICONV_CONST char **) &tmp, &tmp_length, &out, &out_remain) == (size_t)-1) {
-	    if(errno == E2BIG)
+              if(errno == E2BIG) {
 	      /* fall thru to the E2BIG case below */;
-	    else
-	      goto out;
+              } else {
+                  goto out;
+              }
 	  } else {
 	    /* The offending character was replaced by a SPACE, skip it. */
 	    in += 2;
@ -14456,7 +14456,7 @@ static int convert(const char* from, const char* to,
 	  /*
 	   * The output does not fit in the current out buffer, enlarge it.
 	   */
-	  int length = out - out_base;
+	  size_t length = out - out_base;
 	  out_size *= 2;
 	  {
 	      char *saved = out_base;
@ -14562,7 +14562,7 @@ const char* unac_version(void)
  return UNAC_VERSION;
 }

-#ifdef RECOLL_DATADIR
+#ifdef BUILDING_RECOLL
 void unac_set_except_translations(const char *spectrans)
 {
    except_trans.clear();
@ -14615,4 +14615,4 @@ void unac_set_except_translations(const char *spectrans)
 	free(out);
    }
 }
-#endif /* RECOLL_DATADIR */
+#endif /* BUILDING_RECOLL */
--- a/src/unac/unac.cpp
+++ b/src/unac/unac.cpp
@ -1 +0,0 @@
-unac.c
--- a/src/unac/unac.cpp
+++ b/src/unac/unac.cpp
@ -0,0 +1 @@
+#include "unac.c"
--- a/src/unac/unac.h
+++ b/src/unac/unac.h
@ -114,7 +114,7 @@ int fold_string(const char* charset,
 /* To be called before starting threads in mt programs */
 void unac_init_mt();

-#ifdef RECOLL_DATADIR
+#ifdef BUILDING_RECOLL
 #include <string>
 /** 
 * Set exceptions for unaccenting, for characters which should not be
@ -128,7 +128,7 @@ void unac_init_mt();
 *  can't be an exception character, deal with it...
 */
 void unac_set_except_translations(const char *spectrans);
-#endif /* RECOLL_DATADIR */
+#endif /* BUILDING_RECOLL */

 /*
 * Return unac version number.
--- a/src/utils/Makefile
+++ b/src/utils/Makefile
@ -109,8 +109,8 @@ trfileudi.o : fileudi.cpp fileudi.h
 EXECMD_OBJS= trexecmd.o 
 trexecmd : $(EXECMD_OBJS)
 	$(CXX) $(ALL_CXXFLAGS) -o trexecmd $(EXECMD_OBJS) $(LIBRECOLL)
-trexecmd.o : execmd.cpp execmd.h
-	$(CXX) -o trexecmd.o -c $(ALL_CXXFLAGS) -DTEST_EXECMD execmd.cpp
+trexecmd.o : trexecmd.cpp execmd.h
+	$(CXX) -o trexecmd.o -c $(ALL_CXXFLAGS) -I../xaposix trexecmd.cpp

 TRANSCODE_OBJS= trtranscode.o   
 transcode : $(TRANSCODE_OBJS)
--- a/src/utils/base64.cpp
+++ b/src/utils/base64.cpp
@ -217,7 +217,7 @@ void base64_encode(const string &in, string &out)

    out.clear();

-    int srclength = in.length();
+    string::size_type srclength = in.length();
    int sidx = 0;
    while (2 < srclength) {
 	input[0] = in[sidx++];
@ -244,7 +244,7 @@ void base64_encode(const string &in, string &out)
    if (0 != srclength) {
 	/* Get what's left. */
 	input[0] = input[1] = input[2] = '\0';
-	for (int i = 0; i < srclength; i++)
+	for (string::size_type i = 0; i < srclength; i++)
 	    input[i] = in[sidx++];
 	
 	output[0] = input[0] >> 2;
--- a/src/utils/circache.cpp
+++ b/src/utils/circache.cpp
@ -184,7 +184,7 @@ public:
    // Offset of last write (newest header)
    off_t m_nheadoffs;
    // Pad size for newest entry. 
-    int   m_npadsize;
+    off_t m_npadsize;
    // Keep history or only last entry
    bool  m_uniquentries; 
    ///////////////////// End header entries
@ -956,10 +956,10 @@ bool CirCache::erase(const string& udi)
 // entry. 
 class CCScanHookSpacer : public  CCScanHook {
 public:
-    UINT sizewanted;
-    UINT sizeseen;
+    off_t sizewanted;
+    off_t sizeseen;
    vector<pair<string, off_t> > squashed_udis;
-    CCScanHookSpacer(int sz)
+    CCScanHookSpacer(off_t sz)
        : sizewanted(sz), sizeseen(0) {assert(sz > 0);}

    virtual status takeone(off_t offs, const string& udi, 
@ -1009,14 +1009,14 @@ bool CirCache::put(const string& udi, const ConfSimple *iconf,

    // Data compression ?
    const char *datap = data.c_str();
-    unsigned int datalen = data.size();
+    size_t datalen = data.size();
    unsigned short flags = 0;
    TempBuf compbuf;
    if (!(iflags & NoCompHint)) {
-        ULONG len = compressBound(data.size());
+        uLong len = compressBound(static_cast<uLong>(data.size()));
        char *bf = compbuf.setsize(len);
        if (bf != 0 &&
-            compress((Bytef*)bf, &len, (Bytef*)data.c_str(), data.size()) 
+            compress((Bytef*)bf, &len, (Bytef*)data.c_str(), static_cast<uLong>(data.size()))
            == Z_OK) {
            if (float(len) < 0.9 * float(data.size())) {
                // bf is local but it's our static buffer address
@ -1034,16 +1034,16 @@ bool CirCache::put(const string& udi, const ConfSimple *iconf,
    }

    // Characteristics for the new entry.
-    int nsize = CIRCACHE_HEADER_SIZE + dic.size() + datalen;
-    int nwriteoffs = m_d->m_oheadoffs;
-    int npadsize = 0;
+    off_t nsize = CIRCACHE_HEADER_SIZE + dic.size() + datalen;
+    off_t nwriteoffs = m_d->m_oheadoffs;
+    off_t npadsize = 0;
    bool extending = false;

    LOGDEB(("CirCache::put: nsz %d oheadoffs %d\n", nsize, m_d->m_oheadoffs));

    // Check if we can recover some pad space from the (physically) previous
    // entry.
-    int recovpadsize = m_d->m_oheadoffs == CIRCACHE_FIRSTBLOCK_SIZE ?
+    off_t recovpadsize = m_d->m_oheadoffs == CIRCACHE_FIRSTBLOCK_SIZE ?
        0 : m_d->m_npadsize;
    if (recovpadsize != 0) {
        // Need to read the latest entry's header, to rewrite it with a 
@ -1082,7 +1082,7 @@ bool CirCache::put(const string& udi, const ConfSimple *iconf,
    } else {
        // Scan the file until we have enough space for the new entry,
        // and determine the pad size up to the 1st preserved entry
-        int scansize = nsize - recovpadsize;
+        off_t scansize = nsize - recovpadsize;
        LOGDEB(("CirCache::put: scanning for size %d from offs %u\n",
                scansize, (UINT)m_d->m_oheadoffs));
        CCScanHookSpacer spacer(scansize);
--- a/src/utils/conftree.cpp
+++ b/src/utils/conftree.cpp
@ -14,9 +14,7 @@
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
+#include "autoconfig.h"

 #ifndef TEST_CONFTREE

@ -70,7 +68,7 @@ void ConfSimple::parseinput(istream &input)
 	}

        {
-            int ll = strlen(cline);
+            size_t ll = strlen(cline);
            while (ll > 0 && (cline[ll-1] == '\n' || cline[ll-1] == '\r')) {
                cline[ll-1] = 0;
                ll--;
@ -576,8 +574,8 @@ bool ConfSimple::hasNameAnywhere(const string& nm) const
 int ConfTree::get(const std::string &name, string &value, const string &sk)
    const
 {
-    if (sk.empty() || sk[0] != '/') {
-	//	LOGDEB((stderr, "ConfTree::get: looking in global space\n"));
+    if (sk.empty() || !path_isabsolute(sk) ) {
+        // LOGDEB((stderr, "ConfTree::get: looking in global space for sk [%s]\n", sk.c_str()));
 	return ConfSimple::get(name, value, sk);
    }

@ -590,15 +588,21 @@ int ConfTree::get(const std::string &name, string &value, const string &sk)

    // Look in subkey and up its parents until root ('')
    for (;;) {
-	//	LOGDEB((stderr,"ConfTree::get: looking for '%s' in '%s'\n",
-	//		name.c_str(), msk.c_str()));
+	// LOGDEB((stderr,"ConfTree::get: looking for '%s' in '%s'\n",
+        // name.c_str(), msk.c_str()));
 	if (ConfSimple::get(name, value, msk))
 	    return 1;
 	string::size_type pos = msk.rfind("/");
 	if (pos != string::npos) {
 	    msk.replace(pos, string::npos, string());
-	} else
+	} else {
+#ifdef _WIN32
+            if (msk.size() == 2 && isalpha(msk[0]) && msk[1] == ':')
+                msk.clear();
+            else
+#endif
 	    break;
+        }
    }
    return 0;
 }
--- a/src/utils/copyfile.cpp
+++ b/src/utils/copyfile.cpp
@ -15,14 +15,18 @@
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
 #ifndef TEST_COPYFILE
+#include "autoconfig.h"
+
 #include <stdio.h>
 #include <errno.h>
 #include "safefcntl.h"
 #include <sys/types.h>
 #include "safesysstat.h"
 #include "safeunistd.h"
+#ifndef _WIN32
 #include <sys/time.h>
 #include <utime.h>
+#endif

 #include <cstring>

@ -43,7 +47,7 @@ bool copyfile(const char *src, const char *dst, string &reason, int flags)

    LOGDEB(("copyfile: %s to %s\n", src, dst));

-    if ((sfd = ::open(src, O_RDONLY)) < 0) {
+    if ((sfd = ::open(src, O_RDONLY, 0)) < 0) {
        reason += string("open ") + src + ": " + strerror(errno);
        goto out;
    }
@ -149,6 +153,7 @@ bool renameormove(const char *src, const char *dst, string &reason)
        return false;
    }

+#ifndef _WIN32
    // Try to preserve modes, owner, times. This may fail for a number
    // of reasons
    if ((st1.st_mode & 0777) != (st.st_mode & 0777)) {
@ -167,7 +172,7 @@ bool renameormove(const char *src, const char *dst, string &reason)
    times[1].tv_sec = st.st_mtime;
    times[1].tv_usec = 0;
    utimes(dst, times);
-
+#endif
    // All ok, get rid of origin
    if (unlink(src) < 0) {
        reason += string("Can't unlink ") + src + "Error : " + strerror(errno);
--- a/src/utils/cpuconf.cpp
+++ b/src/utils/cpuconf.cpp
@ -62,7 +62,6 @@ bool getCpuConf(CpuConf& cpus)
 }
 #endif

-
 #else // TEST_CPUCONF

 #include <stdlib.h>
--- a/Show More
+++ b/Show More