updated filters page for current status

2010-05-04 09:11:56 +02:00 · 2010-05-04 09:11:56 +02:00 · f385ff4f1a
commit f385ff4f1a
parent 538264db95
15 changed files with 274 additions and 1828 deletions
--- a/website/filters/filters.html
+++ b/website/filters/filters.html
@ -56,7 +56,7 @@
 	(<span class="filename">$HOME/.recoll</span> or 
 	<span class="filename">$RECOLL_CONFDIR</span>).</p>
      
-      <p>Alternatively, you can replace your 1.[8,9,10] system files with
+      <p>Alternatively, you can replace your system files with
 	these updated and complete versions:
 	<a href="mimemap">mimemap</a>
 	<a href="mimeconf">mimeconf</a> 
@ -64,230 +64,19 @@

      <p>Notes:</p>
      <blockquote>
-	<p>All filters are up to date in Recoll 1.10.5</p>
-	  
-	<p>Recoll 1.10.0: only <span class="filename">rclsvg</span> for
-	  Scalable Vector Graphic files is missing.</p>
-
-	<p>Recoll 1.9: all filters are up to date in the release,
-	  except the <span class="filename">rclimg</span> image
-	  filter and <span class="filename">rcltex</span>TeX filter.</p>
-
-	<p>Recoll 1.8: The image, <b>kword</b>,
-	  <b>abiword</b> and <b>wordperfect</b> can be installed in
-	  addition.</p>
+	<p>All filters are up to date in Recoll 1.13.04, except rclics.</p>
+	<p>If you are using an older version, you should update to 1.13.04.</p>
      </blockquote>

-      <h2>Open XML Office formats</h2> 

-      <p>Filter: <a href="rclopxml">rclopxml</a>. </p>
-      <p>This needs <span class="command">xsltproc</span> to be
-        installed (if you run a decently recent Linux, this is
-        probably on your system already). </p>
-
-      <p>The filters are certainly not perfect, but extract a good
-	part of the text, which is probably better than nothing.</p>
-	
-	<p>There are quite a few added lines in the configuration
-	  files, just fetch the new ones:
-	<a href="mimemap">mimemap</a>
-	<a href="mimeconf">mimeconf</a> 
-	<a href="mimeview">mimeview</a> </p>
-
-
-      <h2>Scalable Vector Graphics filter</h2> 
-
-        <p>A new filter for <b>SVG</b> files:
-	  <a href="rclsvg">rclsvg</a>.
-	You'll have to add the following lines in the configuration
-	files:</p>
-
-      <p>In <span class="filename">mimemap</span>: </p>
-      <pre>.svg = image/svg+xml
-</pre>
-      <p>In <span class="filename">mimeconf</span>, [index] section: </p>
-      <pre>image/svg+xml = exec rclsvg</pre>
-      <p><span class="filename">mimeconf</span>, [icons] section:</p>
-      <pre>image/svg+xml = drawing</pre>
-      <p><span class="filename">mimeconf</span>, [categories] section, also add
-	<tt>image/svg+xml</tt> to the <tt>other</tt> list.</p>
-
-      <p>The filter is based on <span class="command">sed</span>, so
-      you don't need to install any external application.</p>
-
-      <p>In 
-	<span class="filename">mimeview</span>, or the <em>[view]</em>
-	section of 
-	<span class="filename">mimeconf</span> for older recoll versions: </p>
-      <pre>    image/svg+xml = inkview %f</pre>
-      <p>(Or substitute your favorite editor).</p>
-
-
-
-      <h2>TeX filter</h2> 
-
-        <p>A new filter for <b>TeX</b> files:
-	  <a href="rcltex">rcltex</a>.
-	You'll have to add the following lines in the configuration
-	files:</p>
-
-      <p>In <span class="filename">mimemap</span>: </p>
-      <pre>.tex = application/x-tex
-</pre>
-      <p>In <span class="filename">mimeconf</span>, [index] section: </p>
-      <pre>    application/x-tex = exec rcltex</pre>
-      <p>mimeconf, [icons] section:</p>
-      <pre>application/x-tex = wordprocessing</pre>
-      <p>mimeconf, [categories] section, also add
-	application/x-tex to the <tt>texts</tt> list.</p>
-
-      <p>This filter uses either <span class="command">untex</span>
-	or <a
-	  "href=http://www.cs.purdue.edu/homes/trinkle/detex/">detex</a>
-	if the command is available. . A copy of the
-	source code for untex is stored <a "href=../untex/untex-1.3.jf.tar.gz">
-	  here</a></p>
-
-      <p>In 
-	<span class="filename">mimeview</span>, or the <em>[view]</em>
-	section of 
-	<span class="filename">mimeconf</span> for older recoll versions: </p>
-      <pre>    application/x-tex = gnuclient -q %f</pre>
-      <p>(Or substitute your favorite editor).</p>
-
-
-      <h2>A filter for image tags</h2> 
-
-        <p>A new filter for extracting tags from image and picture files:
-	  <a href="rclimg">rclimg</a>, by Cedric Scott. It is based on
-	  the <b>Exiftool</b> Perl application and library.
-	  You'll have to add the following lines in the configuration
-	  files:</p>
-
-      <p>In <span class="filename">mimemap</span>: </p>
-      <pre>.jpeg = image/jpeg
-.gif = image/gif
-.tiff = image/tiff
-.tif  = image/tiff
-</pre>
-      <p>In <span class="filename">mimeconf</span>, [index] section: </p>
-      <pre>image/gif = exec rclimg
-image/jpeg = exec rclimg
-image/png = exec rclimg
-image/tiff = exec rclimg
-      </pre>
-      <p>And remove the <tt>image/jpeg = exec rcljpeg</tt> line.</p>
-
-      <p>Exiftool supports many other image formats, just enter any
-	additional ones like above.</p>
-
-      <h2>Wordperfect filter</h2> 
-
-        <p>A new filter for <b>Wordperfect</b> files:
-	  <a href="rclwpd">rclwpd</a>.
-	You'll have to add the following lines in the configuration
-	files:</p>
-
-      <p>In <span class="filename">mimemap</span>: </p>
-      <pre>.wpd = application/vnd.wordperfect
-</pre>
-      <p>In <span class="filename">mimeconf</span>, [index] section: </p>
-      <pre>    application/vnd.wordperfect = exec rclwpd</pre>
-      <p>mimeconf, [icons] section:</p>
-      <pre>application/vnd.wordperfect = wordprocessing</pre>
-      <p>mimeconf, [categories] section, also add
-	application/vnd.wordperfect to the <tt>texts</tt> list.</p>
-
-      <p>In 
-	<span class="filename">mimeview</span>, or the <em>[view]</em>
-	section of 
-	<span class="filename">mimeconf</span> for older recoll versions: </p>
-      <pre>    application/vnd.wordperfect = openoffice %f</pre>
-
-      <h2>Abiword filter</h2> 
-
-      <p>A new filter for <a href="http://www.abisource.com/">
-	  abiword</a> files: <a href="rclabw">
-	  rclabw</a>.
-	You'll have to add the following lines in the configuration
-	files:</p>
-
-      <p>In <span class="filename">mimemap</span>: </p>
-      <pre>    .abw = application/x-abiword</pre>
-
-      <p>In <span class="filename">mimeconf</span>: </p>
-      <pre>    application/x-abiword = exec rclabw</pre>
-
-      <p>In 
-	<span class="filename">mimeview</span>, or the <em>[view]</em>
-	section of 
-	<span class="filename">mimeconf</span> for older recoll versions: </p>
-      <pre>    application/x-abiword = abiword %f</pre>
-
-      <h2>Kword filter</h2> 
-
-      <p>A new filter for <a href="http://www.kde.org/whatiskde/koffice.php/">
-	  kword</a> files: <a href="rclkwd">
-	  rclkwd</a>.
-	You'll have to add the following lines in the configuration
-	files:</p>
-
-      <p>In <span class="filename">mimemap</span>: </p>
-      <pre>    .kwd = application/x-kword</pre>
-      <p>In <span class="filename">mimeconf</span>: </p>
-      <pre>    application/x-kword = exec rclkwd</pre>
-      <p>In 
-	<span class="filename">mimeview</span>, or the <em>[view]</em>
-	section of 
-	<span class="filename">mimeconf</span> for older recoll versions: </p>
-      <pre>    application/x-kword = kword %f</pre>
-
-
-      <h2>Openoffice filter</h2>
-      <p>The filter script for all releases up and including 1.7.5 had
-      a bug on Debian and Ubuntu systems. You can download the <a
-      href="rclsoff">corrected script</a>.</p>
-
-      <h2>Scribus filter</h2> 
-
-      <p>A new filter for <a href="http://www.scribus.net/">
-	  Scribus</a> files: <a href="rclscribus">
-	  rclscribus</a>. This is only for the newer
-	<em>.sla</em> files. I am willing to add support for the older
-	<em>.scd</em> format if someone sends me a sample... You'll
-	have to add the following lines in the configuration files:</p>
-
-      <p>In <span class="filename">mimemap</span>: </p>
-      <pre>      .sla = application/x-scribus</pre>
-      <p>In <span class="filename">mimeconf</span>: </p>
-      <pre>      application/x-scribus = exec rclscribus</pre>
-      <p>In 
-	<span class="filename">mimeview</span>, or the <em>[view]</em>
-	section of 
-	<span class="filename">mimeconf</span> for older recoll versions: </p>
-      <pre>       application/x-scribus = scribus %f</pre>
-
-      <p>Do *not* add entries for <em>.sla.gz</em>, the normal recoll
-      decompression process will handle them (hopefully...).</p>
-
-
-      <h2>Lyx filter</h2> 
-
-      <p>A new filter for <a href="http://www.lyx.or/">
-	  Lyx</a> files: <a href="rcllyx">rcllyx</a>. 
-	This probably has quite a few issues with character encoding,
-	but it's also probably better than handling lyx documents as
-	text files.</p>
-
-      <p>In <span class="filename">mimemap</span>: </p>
-      <pre>      .lyx = application/x-lyx</pre>
-      <p>In <span class="filename">mimeconf</span>: </p>
-      <pre>      application/x-lyx = exec rcllyx</pre>
-      <p>In 
-	<span class="filename">mimeview</span>, or the <em>[view]</em>
-	section of 
-	<span class="filename">mimeconf</span> for older recoll versions: </p>
-      <pre>       application/x-lyx = lyx %f</pre>
+      <h2>Updated icalendar filter</h2>
+      <p>The filter script for all releases up and including 1.13.04 used
+      the icalendar Python modules which is not robust enough against some
+      syntax errors (found for example in Mozilla exports). The new version
+      uses an internal trivial parser, which will hopefully be both
+      sufficient for what we are doing and more robust.
+      You can download the <a href="rclics">new script</a>. This will not
+      work for versions prior to 1.13.</p>

    </div>
  </body>
--- a/website/filters/mimeconf
+++ b/website/filters/mimeconf
@ -1,4 +1,4 @@
-# @(#$Id: mimeconf,v 1.41 2008/09/01 20:39:40 dockes Exp $  (C) 2004 J.F.Dockes
+# @(#$Id: mimeconf,v 1.48 2008-11-27 13:35:24 dockes Exp $  (C) 2004 J.F.Dockes

 # Recoll : associations of mime types to processing filters.
 # There are different sections for decompression, 'interning' for indexing
@ -23,15 +23,24 @@ application/x-bzip2 =  uncompress rcluncomp bunzip2 %f %t
 ## ###################################
 # Filters for indexing and internal preview. 
 # The "internal" filters are hardwired in the c++ code.
-# The external "exec" filters are typically scripts. They output the
+# The external "exec" filters are typically scripts. By default, they output the
 # document in simple html format, have a look at the scripts.
+# A different format (ie text/plain), and a character set can be defined for
+# each filter, see the exemples below (ie: msword)
 [index]
-application/msword = exec rcldoc
+# Note: rcldoc did some work to splice hyphenated words at eol. Seems
+# actually not needed because antiword apparently does it too
+# application/msword = exec rcldoc
+application/msword = exec antiword -t -i 1 -m UTF-8;mimetype=text/plain;charset=utf-8
+# Alternatively you can use wvWare for msword. It's much slower than
+# antiword, but will handle documents which provoke the 'I'm afraid the
+# text stream of this file is too small to handle' antiword error
+# application/msword = exec wvWare --charset=utf-8 --nographics
 application/ogg = exec rclogg
 application/pdf = exec rclpdf
-application/postscript = exec rclps
-application/vnd.ms-excel = exec rclxls
-application/vnd.ms-powerpoint = exec rclppt
+application/postscript = exec pstotext;charset=iso-8859-1;mimetype=text/plain
+application/vnd.ms-excel = exec xls2csv -c "	" -d utf-8;charset=utf-8;mimetype=text/plain
+application/vnd.ms-powerpoint = exec catppt -d utf-8;charset=utf-8;mimetype=text/plain
 application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
 exec rclopxml
 application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
@ -54,29 +63,40 @@ application/vnd.sun.xml.math = exec rclsoff
 application/vnd.sun.xml.writer = exec rclsoff
 application/vnd.sun.xml.writer.global = exec rclsoff
 application/vnd.sun.xml.writer.template = exec rclsoff
-application/vnd.wordperfect = exec rclwpd
+application/vnd.wordperfect = exec wpd2html;mimetype=text/html
 application/x-abiword = exec rclabw
+application/x-awk = internal
 application/x-dvi = exec rcldvi
 application/x-flac = exec rclflac
 application/x-kword = exec rclkwd
 application/x-lyx = exec rcllyx
+application/x-perl = internal
 application/x-scribus = exec rclscribus
+application/x-shellscript = internal
 application/x-tex = exec rcltex
+application/x-chm = execm rclchm
+application/zip = execm rclzip
 audio/mpeg = exec rclid3
-image/gif = exec rclimg
-image/jpeg = exec rclimg
-image/png = exec rclimg
-image/tiff = exec rclimg
+image/gif = execm rclimg
+image/jpeg = execm rclimg
+image/png = execm rclimg
+image/tiff = execm rclimg
 image/vnd.djvu = exec rcldjvu
 image/svg+xml = exec rclsvg
 message/rfc822 = internal
+text/calendar = execm rclics;mimetype=text/plain;charset=utf-8
 text/html  = internal 
 text/plain = internal 
-text/rtf = exec rclrtf
+text/rtf = exec unrtf --nopict --html;charset=iso-8859-1;mimetype=text/html
+text/x-c = internal
+text/x-fictionbook = exec rclfb2
 text/x-gaim-log = exec rclgaim
 text/x-html-sidux-man = exec rclsiduxman
 text/x-mail = internal
 text/x-man = exec rclman
+text/x-purple-log = exec rclpurple
+text/x-python = exec rclpython
+text/x-shellscript = internal

 ## #############################################
 # Icons to be used in the result list if required by gui config
@ -117,6 +137,9 @@ application/x-kword = wordprocessing
 application/x-lyx = wordprocessing
 application/x-scribus = document
 application/x-tex = wordprocessing
+application/x-awk = source
+application/x-perl = source
+application/x-shellscript = source
 audio/mpeg = sownd
 image/gif = image
 image/jpeg = image
@ -128,9 +151,14 @@ message/rfc822 = message
 text/html = html
 text/plain = txt
 text/x-c = source
+text/x-c++ = source
+text/x-fictionbook = document
 text/x-html-sidux-man = sidux-book
 text/x-mail = message
 text/x-man = document
+application/x-chm = document
+text/x-purple-log = pidgin
+text/x-python = text-x-python

 [categories]

@ -145,17 +173,27 @@ text = \
      application/vnd.sun.xml.writer.template \
      application/vnd.wordperfect \
      application/x-abiword \
+      application/x-awk \
+      application/x-chm \
      application/x-dvi \
      application/x-kword \
      application/x-lyx \
+      application/x-perl \
      application/x-scribus \
+      application/x-shellscript \
      application/x-tex \
      image/vnd.djvu \
+      text/calendar \
      text/html \
      text/plain \
      text/rtf \
+      text/x-c \
+      text/x-c++ \
+      text/x-fictionbook \
      text/x-html-sidux-man \
-      text/x-man
+      text/x-man \
+      text/x-python \
+      text/x-shellscript

 spreadsheet = \
   application/vnd.ms-excel \
@ -182,34 +220,11 @@ media = \
 message = message/rfc822 \
 	  text/x-gaim-log \
 	  text/x-mail \
+	  text/x-purple-log \

 other = application/vnd.sun.xml.draw \
        application/vnd.sun.xml.draw.template \
        application/vnd.sun.xml.math \
        application/x-fsdirectory \
+	application/zip \
        image/svg+xml \
-
-
-[prefixes]
-
-# This allows extending the set of fields that recoll understand/searches. 
-# See the manual for exact usage.
-# Important: 
-#   - the field names MUST be all lowercase here. They can be anycased
-#     in the documents:
-#   - The extension field prefixes MUST begin with X and be all UPPERCASE.
-title = S
-caption = S
-subject = S
-
-author = A
-creator = A
-from = A
-
-keyword = K
-tag = K
-keywords = K
-tags = K
-
-# testing /example :
-recollspecialfield = XRCLSF
--- a/website/filters/mimemap
+++ b/website/filters/mimemap
@ -1,16 +1,23 @@
-# @(#$Id: mimemap,v 1.31 2008/08/25 16:12:16 dockes Exp $  (C) 2004 J.F.Dockes
+# @(#$Id: mimemap,v 1.32 2008-09-15 08:03:37 dockes Exp $  (C) 2004 J.F.Dockes
 # Recoll: associations of file name extensions to mime types

 .txt = text/plain
 .text = text/plain
 .d    = text/plain

-# Source files. Defining them as text/x-c will enable ext viewer. If
-# text/plain they will be somewhat indexed
+# Source files. 
+# Defining them with specific types allows using a specific ext viewer (in 
+# mimeview). You can in general use rcltext to wrap them in html for
+# indexing the contents (and rough preview). You could also just set them
+# as text/plain (index as text, use text viewer)
 .cpp = text/x-c
 .h   = text/x-c
 .c   = text/x-c
 .cc  = text/x-c
+.py  = text/x-python
+.awk = application/x-awk
+.pl = application/x-perl
+.sh = application/x-shellscript

 .rtf  = text/rtf

@ -18,6 +25,7 @@
 .htm = text/html
 .shtml = text/html
 .php = text/html
+.ics = text/calendar

 .pdf = application/pdf

@ -35,10 +43,12 @@
 .Z = application/x-gzip
 .bz2 = application/x-bzip2
 #.Z  = application/x-compress
+.zip = application/zip

 .doc = application/msword
 .ppt = application/vnd.ms-powerpoint
 .xls = application/vnd.ms-excel
+.chm = application/x-chm

 # OpenOffice / opendocument. We handle opendocument as old openoffice files
 # for now
@ -97,21 +107,26 @@
 .tiff = image/tiff
 .tif  = image/tiff

-# A list of stuff that we don't want to touch at all (for now). Having the
-# suffixes listed in there speeds up things quite a lot by avoiding
+.fb2 = text/x-fictionbook
+
+# A list of suffixes (name endings) that we don't want to touch at all.
+# Having these explicitely listed speeds things up a bit by avoiding
 # unneeded decompression or 'file' calls. File names still get indexed if
-# indexallfilenames is set
+# indexallfilenames is set (so this is different from skippedNames). It's a
+# bit unconsistent to have it listed among the suffix translations, but no
+# problem in practice.
 recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz .log.gz .md5 .map \
-       .m4 .tcl .js .sh .pl .awk \
       .o .lib .dll .a \
-       .dat .bak .rdf .log .db .ini .msf .pid \
+       .dat .bak .rdf .log .db .msf .pid \
       .gnm .gnumeric \
       .gif .bmp .xpm \
       ,v ~ #

-# Special handling of .txt files inside ~/.gaim directory
+# Special handling of .txt files inside ~/.gaim and ~/.purple directories
 [~/.gaim]
 .txt = text/x-gaim-log
+[~/.purple]
+.txt = text/x-purple-log

 # Special handling of sidux manual menu system
 [/usr/share/sidux-manual]
@ -130,3 +145,5 @@ recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz .log.gz .md5 .map \
 .6 = text/x-man
 .7 = text/x-man
 .8 = text/x-man
+.n = text/x-man
+.3pm = text/x-man
--- a/website/filters/mimeview
+++ b/website/filters/mimeview
@ -1,4 +1,4 @@
-# @(#$Id: mimeview,v 1.15 2008/09/01 20:39:40 dockes Exp $  (C) 2004 J.F.Dockes
+# @(#$Id: mimeview,v 1.16 2008-09-15 08:03:37 dockes Exp $  (C) 2004 J.F.Dockes

 ## ##########################################
 # External viewers, launched by the recoll GUI when you click on a result
@ -40,8 +40,9 @@ application/vnd.sun.xml.writer = openoffice %f
 application/vnd.sun.xml.writer.global = openoffice %f
 application/vnd.sun.xml.writer.template = openoffice %f
 application/vnd.wordperfect = openoffice %f
-application/x-fsdirectory = rox %f
+application/x-chm = okular %f
 application/x-dvi = xdvi %f
+application/x-fsdirectory = rox %f
 application/x-flac = xmms %f
 application/x-lyx = lyx %f
 application/x-scribus = scribus %f
@ -59,3 +60,4 @@ text/plain = gnuclient -q %f
 text/x-c = gnuclient -q %f
 text/x-html-sidux-man = konqueror %f
 #text/x-html-sidux-man = iceweasel %f
+text/x-python = idle %f
--- a/website/filters/rclabw
+++ b/website/filters/rclabw
@ -1,175 +0,0 @@
-#!/bin/sh
-# @(#$Id: rclabw,v 1.2 2007/06/15 11:41:50 dockes Exp $  (C) 2004 J.F.Dockes
-# Parts taken from Estraier:
-#================================================================
-# Estraier: a personal full-text search system
-# Copyright (C) 2003-2004 Mikio Hirabayashi
-#================================================================
-#================================================================
-# Extract text from an abiword file
-#================================================================
-
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname="rclabw"
-filetype=abiword
-
-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
-
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
-
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
-    echo RECFILTERROR $*
-    # Also alert on stderr just in case
-    echo ":2:$progname::: $*" 1>&2
-    exit 1
-}
-
-iscmd()
-{
-    cmd=$1
-    case $cmd in
-    */*)
-	if test -x $cmd ; then return 0; else return 1; fi ;;
-    *)
-      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
-      for d in $*;do test -x $d/$cmd && return 0;done
-      return 1 ;;
-    esac
-}
-
-checkcmds()
-{
-    for cmd in $*;do
-      if iscmd $cmd 
-      then 
-        a=1
-      else 
-        senderror HELPERNOTFOUND $cmd
-      fi
-    done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help" 
-then
-  echo "Convert a $filetype file to HTML text for Recoll indexing."
-  echo "Usage: $progname [infile]"
-  exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
-  senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-checkcmds iconv sed
-
-# check the input file existence
-if test ! -f "$infile"
-then
-  printf '%s: %s: no such file\n' "$progname" "$infile"
-  exit 1
-fi
-
-encoding=`sed -e  '/<?xml version=/s/"?>$//' \
-	      -e '/^<?xml version=/s/.*encoding="//p;D;q' \
-	      -e D \
-< $infile`
-if test X$encoding = X ; then encoding=UTF-8;fi
-
-# Note: there can be newlines inside the description field, we don't want
-# them... Have 2 use 2 different selectors for the single-line and
-# multiple-line cases because of the generic tag end (</m> for all meta
-# tags)
-descsedprog='
-/<m key="dc.description">\([^<]*\)<\/m>/ {
-s//\1/
-p
-q
-}
-/<m key="dc.description">/,/<\/m>/ {
-s!.*<m key="dc.description">!!
-s!</m>.*!!
-H
-}
-${
-g
-s/\n/ /g
-p
-}
-'
-
-description=`sed -n -e "$descsedprog" < "$infile"`
-#echo description: "$description"
-
-# Set program for the single line meta elements. Takes element name as
-# parameter 
-setmetasedprog() {
-metasedprog='/<m key="'$1'">/{
-s/.*<m key="'$1'">\([^<]*\).*/\1/
-'"s/\"/'/g"'
-p
-}'
-}
-
-setmetasedprog dc.subject
-subject=`sed -n -e "$metasedprog" "$infile"`
-#echo subject: "$subject"
-
-setmetasedprog dc.title
-title=`sed -n -e "$metasedprog" "$infile"`
-#echo titre: "$title"
-
-setmetasedprog abiword.keywords
-keywords=`sed -n -e "$metasedprog" "$infile"`
-#echo keywords: "$keywords"
-
-setmetasedprog dc.creator
-creator=`sed -n -e "$metasedprog" "$infile"`
-#echo creator: "$creator"
-
-# Note: next expr supposes that paragraphs are always all by themselves on
-# a single line in the xml (no multiple <p> per line, no embedded newlines
-# in text).
-contentsedprog='
-/<p[ >]/{
-s/<[^>]*>/ /g
-p
-}
-'
-content=`sed -n -e "$contentsedprog" "$infile"`
-#echo content: "$content"
-
-# output the result
-(echo '<html><head><title>' "$title" '</title>'
-echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
-echo '<meta name="description" content="' "$description $subject" '">'
-echo '<meta name="keywords" content="' "$keywords" '">'
-echo '<meta name="author" content="' "$creator" '">'
-echo '</head><body><pre>'
-echo "$content" 
-echo '</pre></body></html>') \
-| iconv -f $encoding -t UTF-8 -c -s 
-
-
-# exit normally
-exit 0
--- a/website/filters/rclics
+++ b/website/filters/rclics
@ -0,0 +1,180 @@
+#!/usr/bin/env python
+
+# Read an ICS file, break it into "documents" which are events, todos,
+# or journal entries, and interface with recoll execm
+#
+# For historical reasons, this can use either the icalendar or the
+# vobject Python modules, or an internal splitter. The default is now
+# to use the internal splitter, the other modules are more trouble
+# than they're worth (to us and until we will want to get into date
+# computations etc.)
+
+import rclexecm
+import sys
+
+# Decide how we'll process the file.
+modules = ('internal', 'icalendar', 'vobject')
+usemodule = 'internal'
+forcevobject = 0
+if usemodule != 'internal':
+    try:
+        if forcevobject:
+            raise Exception
+        from icalendar import Calendar, Event
+        usemodule = 'icalendar'
+    except:
+        try:
+            import vobject
+            usemodule = 'vobject'
+        except:
+            print "RECFILTERROR HELPERNOTFOUND python:icalendar"
+            print "RECFILTERROR HELPERNOTFOUND python:vobject"
+            sys.exit(1);
+
+
+class IcalExtractor:
+    def __init__(self, em):
+        self.file = ""
+	self.contents = []
+        self.em = em
+        self.em.setmimetype("text/plain")
+
+    def extractone(self, index):
+        if index >= len(self.contents):
+            return(False, "", "", True)
+        docdata = self.contents[index]
+	#self.em.rclog(docdata)
+
+        iseof = rclexecm.RclExecM.noteof
+        if self.currentindex >= len(self.contents) -1:
+            iseof = rclexecm.RclExecM.eofnext
+        return (True, docdata, str(index), iseof)
+
+    ###### File type handler api, used by rclexecm ---------->
+    def openfile(self, params):
+        self.file = params["filename:"]
+
+        try:
+            calstr = open(self.file, 'rb')
+        except Exception, e:
+            self.em.rclog("Openfile: open: %s" % str(e))
+            return False
+
+        self.currentindex = 0
+
+        if usemodule == 'internal':
+            self.contents = ICalSimpleSplitter().splitcalendar(calstr)
+        elif usemodule == 'icalendar':
+            try:
+                cal = Calendar.from_string(calstr.read())
+            except Exception, e:
+                self.em.rclog("Openfile: read or parse error: %s" % str(e))
+                return False
+            self.contents = cal.walk()
+            self.contents = [item.as_string() for item in self.contents
+                             if (item.name == 'VEVENT' or item.name == 'VTODO'
+                                 or item.name == 'VJOURNAL')]
+        else:
+            try:
+                cal = vobject.readOne(calstr)
+            except Exception, e:
+                self.em.rclog("Openfile: cant parse object: %s" % str(e))
+                return False
+            for lstnm in ('vevent_list', 'vtodo_list', 'vjournal_list'):
+                lst = getattr(cal, lstnm, [])
+                for ev in lst:
+                    self.contents.append(ev.serialize())
+
+        #self.em.rclog("openfile: Entry count: %d"%(len(self.contents)))
+        return True
+
+    def getipath(self, params):
+        try:
+            index = int(params["ipath:"])
+        except:
+            return False
+        return self.extractone(index)
+        
+    def getnext(self, params):
+        if self.currentindex >= len(self.contents):
+            self.em.rclog("getnext: EOF hit")
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+        else:
+            ret= self.extractone(self.currentindex)
+            self.currentindex += 1
+            return ret
+
+# Trivial splitter: cut objects on BEGIN/END (only for 'interesting' objects)
+# ignore all other syntax
+class ICalSimpleSplitter:
+    # Note that if an 'interesting' element is nested inside another one,
+    # it will not be extracted (stay as text in external event). This is
+    # not an issue and I don't think it can happen with the current list
+    interesting = ('VTODO', 'VEVENT', 'VJOURNAL')
+
+    def splitcalendar(self, fin):
+        curblkname = ''
+        curblk = ''
+
+        lo = []
+        for line in fin:
+            line = line.rstrip()
+            if line == '':
+                continue
+
+            if curblkname:
+                curblk = curblk + line + "\n"
+
+            l = line.split(":")
+            if len(l) < 2:
+                continue
+
+            # If not currently inside a block and we see an
+            # 'interesting' BEGIN, start block
+            if curblkname == '' and l[0].upper() == "BEGIN" :
+                name = l[1].upper()
+                if name in ICalSimpleSplitter.interesting:
+                    curblkname = name
+                    curblk = curblk + line + "\n"
+
+            # If currently accumulating block lines, check for end
+            if curblkname and l[0].upper() == "END" and \
+                   l[1].upper() == curblkname:
+                lo.append(curblk)
+                curblkname = ''
+                curblk = ''
+
+        if curblk:
+            lo.append(curblk)
+            curblkname = ''
+            curblk = ''
+
+        return lo
+ 
+
+##### Main program: either talk to the parent or execute test loop
+
+e = rclexecm.RclExecM()
+ical = IcalExtractor(e)
+
+if len(sys.argv) == 1:
+    e.mainloop(ical)
+else:
+    # Got a file name parameter: testing without an execm parent
+    # Loop on all entries
+    if not ical.openfile({'filename:':sys.argv[1]}):
+        print "Open error"
+        sys.exit(1)
+
+    ecnt = 0   
+    while 1:
+        ok, data, ipath, eof = ical.getnext("")
+        if ok:
+            ecnt = ecnt + 1
+            print "=========== ENTRY %d =================" % ecnt
+            print data
+            print
+        else:
+            print "Got error, eof %d"%eof
+            break
+    
--- a/website/filters/rclimg
+++ b/website/filters/rclimg
@ -1,95 +0,0 @@
-#! /usr/bin/perl -w
-# @(#$Id: rclimg,v 1.2 2007/10/02 13:56:42 dockes Exp $  (C) 2007 Cedric Scott
-#######################################################
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the
-# Free Software Foundation, Inc.,
-# 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-######################################################
-
-#
-# rclimg: extract image tags with exiftool and convert the data to html for
-# recoll indexing.
-#
-
-#
-# maps image file tags to xapian tags
-#
-$tagMap = {
-	'subject' => 'subject',
-	'title' => 'title',
-	'headline' => 'title',
-	'caption' => 'caption',
-	'caption-abstract' => 'caption',
-	'author' => 'author',
-	'creator' => 'creator',
-	'from' => 'from',
-	'keywords' => 'keywords',
-	'keyword' => 'keyword',
-	'tag' => 'tag',
-};
-
-# set to non-zero if tags which map to xapian tags are to output
-# in the body as well as the header
-#
-$headAndBody = 1;
-
-# xapianTag
-# returns a xapian tag to be used for this tag
-#
-sub xapianTag {
-	my $imgtag = shift;
-	while ( ( $tagre, $xapiantag) = each %{$tagMap} ) {
-		return $xapiantag  if $imgtag =~ /$tagre/i;
-	}
-	return undef;
-}
-
-#
-# start here
-#
-use Image::ExifTool qw(:Public);
-
-$imageFile = shift;
-$imageFile = '-' if $imageFile eq '';
-unless ( open(IMGF, $imageFile)  ) {
-	print STDERR "$0: can't open file $imageFile\n";
-	exit(1); # file doesn't exist or can't be read
-}
-$info = ImageInfo(\*IMGF);
-die unless $info;
-$fields = [];
-$other = [];
-$titleHtmlTag = "";
-foreach $tagname ( sort keys %{$info} ) {
-	$xapiantag = xapianTag($tagname);
-	if (defined $xapiantag ) {
-		push @{$fields}, [ $xapiantag, $info->{$tagname} ];
-		$titleHtmlTag = "<title>$info->{$tagname}</title>" if $xapiantag eq 'title';
-		push @{$other}, [ $tagname, $info->{$tagname} ] if $headAndBody;
-	} else {
-		push @{$other}, [ $tagname, $info->{$tagname} ];
-	}
-}
-print "<html>\n<head>\n$titleHtmlTag\n";
-print "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=UTF-8\">\n";
-foreach $tagpair ( @{$fields} ) {
-	($tagname, $value) = @{$tagpair};
-	print "<meta name=\"$tagname\" content=\"$value\">\n";
-}
-print "</head><body>\n";
-foreach $tagpair (@{$other} ) {
-	($tagname, $value) = @{$tagpair};
-	printf "%30s : %s<br>\n", $tagname, $value;
-}
-print "</body>\n</html>\n";
--- a/website/filters/rclkwd
+++ b/website/filters/rclkwd
@ -1,204 +0,0 @@
-#!/bin/sh
-# @(#$Id: rclkwd,v 1.1 2007/06/08 14:01:30 dockes Exp $  (C) 2004 J.F.Dockes
-# Parts taken from Estraier:
-#================================================================
-# Estraier: a personal full-text search system
-# Copyright (C) 2003-2004 Mikio Hirabayashi
-#================================================================
-#================================================================
-# rclkword
-# Extract text from a kword file
-#
-#================================================================
-
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname="rclkwd"
-filetype=kword
-
-
-
-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
-
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
-
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
-    echo RECFILTERROR $*
-    # Also alert on stderr just in case
-    echo ":2:$progname::: $*" 1>&2
-    exit 1
-}
-
-iscmd()
-{
-    cmd=$1
-    case $cmd in
-    */*)
-	if test -x $cmd ; then return 0; else return 1; fi ;;
-    *)
-      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
-      for d in $*;do test -x $d/$cmd && return 0;done
-      return 1 ;;
-    esac
-}
-
-checkcmds()
-{
-    for cmd in $*;do
-      if iscmd $cmd 
-      then 
-        a=1
-      else 
-        senderror HELPERNOTFOUND $cmd
-      fi
-    done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help" 
-then
-  echo "Convert a $filetype file to HTML text for Recoll indexing."
-  echo "Usage: $progname [infile]"
-  exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
-  senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-checkcmds awk unzip gunzip tar
-
-# check the input file existence
-if test ! -f "$infile"
-then
-  printf '%s: %s: no such file\n' "$progname" "$infile"
-  exit 1
-fi
-
-# We need a temporary directory
-if test z"$RECOLL_TMPDIR" != z; then
-   ttdir=$RECOLL_TMPDIR
-elif test z"$TMPDIR" != z ; then
-   ttdir=$TMPDIR
-else
-   ttdir=/tmp
-fi
-tmpdir=$ttdir/rclkwd_tmp$$
-mkdir $tmpdir || exit 1
-mkdir $tmpdir/rclkwdtmp || exit 1
-
-cleanup()
-{
-    # Note that we're using a constant part (rclkwdtmp), that hopefully
-    # guarantees that we can't do big mistakes here.
-    rm -rf $tmpdir/rclkwdtmp
-    rmdir $tmpdir
-}
-    
-trap cleanup EXIT HUP QUIT INT TERM
-
-# Old kwd files are gzip/tar archibes. Newer ones are zip archives.
-if file $infile | grep -qi gzip ; then
-   # Unzip the input file and change to the unzipped directory
-   gunzip < "$infile" | (cd $tmpdir/rclkwdtmp;tar xf -)
-else
-    echo new kwd
-   # Unzip the input file and change to the unzipped directory
-   unzip -q -d $tmpdir/rclkwdtmp "$infile"
-fi
-cd $tmpdir/rclkwdtmp
-
-metafile=documentinfo.xml
-contentfile=maindoc.xml
-
-if test -f $metafile ; then
-
-  # Note: there can be newlines inside the description field, we don't want
-  # them...
-  abssedprog='/<abstract>/,/<\/abstract>/{
-s!.*<abstract>!!
-s!</abstract>.*!!
-p
-}
-'
-  abstract=`sed -n -e "$abssedprog" < $metafile | tr '\n' ' ' | \
-	sed -e '1s/<!\[CDATA\[//' -e 's/\]\]>//'`
-  subject=`sed -e "s/\"/'/" -e 's/.*<subject>\([^<]*\).*/\1/p;d' \
-	     < $metafile`
-  title=`sed -e "s/\"/'/" -e 's/.*<title>\([^<]*\).*/\1/p;d' \
-	     < $metafile | tr '\n' ' '`
-  keywords=`sed -e "s/\"/'/" -e 's/.*<keyword>\([^<]*\).*/\1/p;d' \
-	      < $metafile`
-fi
-
-# Note: next expr inserts a newline at each end of paragraph (for preview)
-content="`sed -e 's!</TEXT>!\\
-!g' -e 's/<[^>]*>/ /g' < $contentfile | sed -e '/^[ 	]*$/d'`"
-
-#echo abstract "$abstract"
-#echo subject "$subject"
-#echo title "$title"
-#echo keywords "$keywords"
-#echo content "$content"
-
-# output the result
-echo '<html><head>'
-echo '<title>' "$title" '</title>'
-echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
-echo '<meta name="abstract" content="' "$abstract $subject" '">'
-echo '<meta name="keywords" content="' "$keywords" '">'
-echo '</head><body><p>'
-
-# The strange 'BEGIN' setup is to prevent 'file' from thinking this file
-# is an awk program
-echo "$content" | sed -e "s/&apos;/'/g" -e 's/&quot;/"/g' |\
-awk 'BEGIN'\
-' {
-  cont = ""
-}
-{
-    $0 = cont $0
-    cont = ""
-
-    if ($0 ~ /[-]$/) {
-      # Note : soft-hyphen is iso8859 0xad
-      # Break at last whitespace
-      match($0, "[ \t][^ \t]+$")
-      line = substr($0, 0, RSTART)
-      cont = substr($0, RSTART, RLENGTH-1)
-      $0 = line
-    }
-
-    if($0 == "\f") {
-        print "</p>\n<hr>\n<p>"
-        next
-    } 
-
-    print $0 "<br>"
-}
-END {
-    printf("</p></body></html>\n");
-}' | iconv -f UTF-8 -t UTF-8 -c -s 
-
-cd /
-# exit normally
-exit 0
--- a/website/filters/rcllyx
+++ b/website/filters/rcllyx
@ -1,195 +0,0 @@
-#!/bin/sh
-# @(#$Id: rcllyx,v 1.4 2007/01/23 07:23:12 dockes Exp $  (C) 2004 J.F.Dockes
-# There may still be code from Estraier in here:
-#================================================================
-# Estraier: a personal full-text search system
-# Copyright (C) 2003-2004 Mikio Hirabayashi
-#================================================================
-#================================================================
-# rcllyx
-# Convert a lyx file to recoll HTML.
-#
-# We use lyx --export. It was suggested to use untex, but it doesn't give 
-# good results on raw lyx (of course, this is not TeX), and exporting to
-# LaTex then using untex doesn't look nice when we can use the native  lyx
-# text export.
-# The character encoding of the exported text is defined by the
-# \inputencoding directive in the lyx file header and, in quite an obscure
-# way, by the \language parameter. We use a heuristic to divine the output
-# text encoding and it is guaranteed not to work in all cases. Trials using
-# an intermediary dvi, pdf or ps file gave worse results. This needs
-# improvement. It doesn't even take into account the fact that the language
-# can change inside the doc (does this change the encoding or not ?). To be
-# frank, this is not entirely my fault, the lyx format is a joke.
-#
-# As there is unfortunately no way to define the output file name, we have
-# to use a temporary directory and link the input file in there.
-
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname="rcllyx"
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help" 
-then
-  printf 'Extract lyx text as basic HTML.\n'
-  printf 'Usage: %s [infile]\n' "$progname"
-  exit 1
-fi
-
-infile="$1"
-
-# check the input file existence
-if test ! -f "$infile"
-then
-  printf '%s: %s: no such file\n' "$progname" "$infile"
-  exit 1
-fi
-
-iscmd()
-{
-    cmd=$1
-    case $cmd in
-    */*)
-	if test -x $cmd ; then return 0; else return 1; fi ;;
-    *)
-      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
-      for d in $*;do test -x $d/$cmd && return 0;done
-      return 1 ;;
-    esac
-}
-checkcmds()
-{
-    for cmd in $*;do
-      if iscmd $cmd 
-      then 
-        a=1
-      else 
-        echo $cmd not found 1>&2 
-	exit 1
-      fi
-    done
-}
-
-checkcmds lyx iconv
-
-# We need a temporary directory
-if test z"$RECOLL_TMPDIR" != z; then
-   ttdir=$RECOLL_TMPDIR
-elif test z"$TMPDIR" != z ; then
-   ttdir=$TMPDIR
-else
-   ttdir=/tmp
-fi
-
-tmpdir=$ttdir/rcllyx_tmp$$
-mkdir $tmpdir || exit 1
-mkdir $tmpdir/rcllyxtmp || exit 1
-
-cleanup()
-{
-    # Note that we're using a constant part (rcllyxtmp), that hopefully
-    # guarantees that we can't do big mistakes here.
-    rm -rf $tmpdir/rcllyxtmp
-    rmdir $tmpdir
-}
-    
-trap cleanup EXIT HUP QUIT INT TERM
-
-workdir=$tmpdir/rcllyxtmp
-case "$infile" in
- */*) ;;
- *) infile=`pwd`/$infile;;
-esac
-
-binfile=`basename $infile`
-ln -s "$infile" "$workdir/$binfile" || exit 1
-lyxfile=$workdir/$binfile
-textfile=$workdir/`basename $binfile .lyx`.txt
-
-#echo binfile: $binfile;echo lyxfile: $lyxfile ; ls -l $lyxfile; echo textfile: $textfile
-
-# Run lyx --export
-lyx --export text $lyxfile
-
-# Charset and language
-formatline=`egrep '^\\\lyxformat ' $lyxfile`
-if test -n "$formatline" ; then 
-   set $formatline
-   format=$2
-fi
-charsetline=`egrep '^\\\inputencoding ' $lyxfile`
-if test -n "$charsetline" ; then 
-   set $charsetline
-   charset=$2
-fi
-langline=`egrep '^\\\language ' $lyxfile`
-if test -n "$langline" ; then 
-   set $langline
-   lang=$2
-fi
-#echo format: [$format] charset: [$charset] lang [$lang]
-
-if test "$format" -ge 249 ; then
-  charset=utf-8
-else
-  # try to guess the charset from the language: this is in no way guaranteed
-  # to work, the logic has built-in inconsistencies even beyond the numerous
-  # external ones (what if the ukrainian writer prefers koi8-r ?). This is a
-  # joke. 
-  if test -z "$charset" -o "$charset" = default -o "$charset" = auto ; then
-    case "$lang" in
-    american|afrikaans|basque|catalan|danish|dutch|english|faeroese|finnish|french|galician|german|icelandic|irish|italian|norwegian|portuguese|spanish|swedish)
-      charset=iso-8859-1;;
-    czech|german|hungarian|polish|romanian|croatian|slovak|slovene)
-      charset=iso-8859-2;;
-    esperanto|galician|maltese|Turkish) 
-      charset=iso-8859-3;;
-    estonian|latvian|lithuanian) 
-      charset=iso-8859-4;;
-    bulgarian|byelorussian|macedonian|russian|serbian|ukrainian) 
-      charset=iso-8859-5;;
-    arabic) 
-      charset=iso-8859-6;;
-    greek) 
-      charset=iso-8859-7;;
-    hebrew) 
-      charset=iso-8859-8;;
-    #ISO-8859-9 - Latin 5 Same as 8859-1 except for Turkish instead of
-    #Icelandic. ? What is one to do :)
-    #ISO-8859-10 - Latin 6
-    lappish|nordic|eskimo|inuit|sami)
-      charset=iso-8859-10;;
-  albanian|german|english|basque|breton|catalan|danish|spanish|estonian|esthonian|faeroese|faroese|finnish|french|frisian|friesian|scottish|goidelic|irish|gaelic|galician|welsh|greenlandic|inuit|icelandic|italian|latin|dutch|norvegian|portuguese|romansch|romansh|friulian|ladin|swedish)
-      charset=iso-8859-15;;
-    *)
-      charset=iso-8859-1;;
-    esac
-  fi
-fi
-
-if test -n "$charset" ; then
-   inputcmd="iconv -f $charset -t UTF-8 -c -s"
-else
-   inputcmd=cat
-fi
-#echo inputcmd: [$inputcmd]
-
-cat <<EOF
-<html>
-<head>
-    <title>$title</title>
-    <meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
-</head>
-<body>
-<pre>
-EOF
-
-$inputcmd < $textfile
-
-cat <<EOF
-</pre>
-</body>
-</html>
-EOF
--- a/website/filters/rclopxml
+++ b/website/filters/rclopxml
@ -1,245 +0,0 @@
-#!/bin/sh
-# @(#$Id: rclopxml,v 1.2 2008/09/01 17:31:47 dockes Exp $  (C) 2004 J.F.Dockes
-#================================================================
-# rcldocx
-# Extract text from an openxml msword file (will be extended for spreadsheets)
-# TODO: Also process docProps/core.xml for attributes, and word/endnotes.xml
-#================================================================
-
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname=rclopxml
-filetype=openxml
-
-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
-
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
-
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
-    echo RECFILTERROR $*
-    # Also alert on stderr just in case
-    echo ":2:$progname::: $*" 1>&2
-    exit 1
-}
-
-iscmd()
-{
-    cmd=$1
-    case $cmd in
-    */*)
-	if test -x $cmd ; then return 0; else return 1; fi ;;
-    *)
-      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
-      for d in $*;do test -x $d/$cmd && return 0;done
-      return 1 ;;
-    esac
-}
-
-checkcmds()
-{
-    for cmd in $*;do
-      if iscmd $cmd 
-      then 
-        a=1
-      else 
-        senderror HELPERNOTFOUND $cmd
-      fi
-    done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help" 
-then
-  echo "Convert a $filetype file to HTML text for Recoll indexing."
-  echo "Usage: $progname [infile]"
-  exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
-  senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-checkcmds xsltproc unzip
-
-# check the input file existence
-if test ! -f "$infile"
-then
-  printf '%s: %s: no such file\n' "$progname" "$infile"
-  exit 1
-fi
-
-# We need a temporary directory
-if test z"$RECOLL_TMPDIR" != z; then
-   ttdir=$RECOLL_TMPDIR
-elif test z"$TMPDIR" != z ; then
-   ttdir=$TMPDIR
-else
-   ttdir=/tmp
-fi
-tmpdir=$ttdir/rclopxml_tmp$$
-mkdir $tmpdir || exit 1
-mkdir $tmpdir/rclopxmltmp || exit 1
-
-cleanup()
-{
-    # Note that we're using a constant part (rclopxmltmp), that hopefully
-    # guarantees that we can't do big mistakes here.
-    rm -rf $tmpdir/rclopxmltmp
-    rmdir $tmpdir
-}
-    
-trap cleanup EXIT HUP QUIT INT TERM
-
-# Unzip the input file and change to the unzipped directory
-unzip -q -d $tmpdir/rclopxmltmp "$infile"
-cd $tmpdir/rclopxmltmp
-
-echo '<html>
-<head>'
-
-xsltproc - docProps/core.xml <<EOF
-<?xml version="1.0"?>
-<xsl:stylesheet 
- xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
- xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
- xmlns:dc="http://purl.org/dc/elements/1.1/"
- xmlns:dcterms="http://purl.org/dc/terms/"
- xmlns:dcmitype="http://purl.org/dc/dcmitype/"
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-
-<!--  <xsl:output method="text"/> -->
-  <xsl:output omit-xml-declaration="yes"/>
-
-  <xsl:template match="cp:coreProperties">
-    <xsl:text>&#10;</xsl:text>
-    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
-    <xsl:text>&#10;</xsl:text>
-    <xsl:apply-templates/>
-  </xsl:template>
-
-  <xsl:template match="dc:creator">
-    <meta>
-    <xsl:attribute name="name">
-      <!-- <xsl:value-of select="name()"/> pour sortir tous les meta avec 
-       le meme nom que dans le xml (si on devenait dc-natif) -->
-      <xsl:text>author</xsl:text> 
-    </xsl:attribute>
-    <xsl:attribute name="content">
-       <xsl:value-of select="."/>
-    </xsl:attribute>
-    </meta>
-    <xsl:text>&#10;</xsl:text>
-  </xsl:template>
-
-  <xsl:template match="dcterms:modified">
-    <meta>
-    <xsl:attribute name="name">
-      <xsl:text>date</xsl:text> 
-    </xsl:attribute>
-    <xsl:attribute name="content">
-       <xsl:value-of select="."/>
-    </xsl:attribute>
-    </meta>
-    <xsl:text>&#10;</xsl:text>
-  </xsl:template>
-
-  <xsl:template match="*">
-  </xsl:template>
-
-</xsl:stylesheet>
-EOF
-
-echo '</head>
-<body>'
-
-filename=''
-if test -f word/document.xml ; then
- filenames=word/document.xml 
- tagmatch="w:p"
- xmlns_decls='
- xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
- xmlns:ve="http://schemas.openxmlformats.org/markup-compatibility/2006"
- xmlns:o="urn:schemas-microsoft-com:office:office"
- xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
- xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"
- xmlns:v="urn:schemas-microsoft-com:vml"
- xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
- xmlns:w10="urn:schemas-microsoft-com:office:word"
- xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
- xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml"
- '
-
-elif test -f xl/sharedStrings.xml ; then
- filenames=xl/sharedStrings.xml 
- tagmatch='x:t'
- xmlns_decls='
-   xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
-   xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
-  '
-
-elif test -f ppt/slides/slide1.xml ; then
- filenames=`echo ppt/slides/slide*.xml`
- tagmatch='a:t'
- xmlns_decls='
-  xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
-  xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" 
- xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" 
-  xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"
- '
-# I want to suppress text output for all except a:t, don't know how to do it
-# help ! At least get rid of these:
- moretemplates='
-  <xsl:template match="p:attrName">
-  </xsl:template>
-'
-else
-    # ??
-    exit 1
-fi
-
-
-for filename in $filenames;do
-xsltproc - $filename <<EOF
-<?xml version="1.0"?>
-<xsl:stylesheet $xmlns_decls >
-
- <xsl:output omit-xml-declaration="yes"/>
-
- <xsl:template match="/">
-  <div>
-  <xsl:apply-templates/> 
-  </div>
-</xsl:template>
-
- <xsl:template match="$tagmatch">
-  <p>
-  <xsl:value-of select="."/>
-  </p>
- </xsl:template>
-
- $moretemplates
-
-</xsl:stylesheet>
-EOF
-done
-
-echo '</html>'
--- a/website/filters/rclscribus
+++ b/website/filters/rclscribus
@ -1,151 +0,0 @@
-#!/bin/sh
-# @(#$Id: rclscribus,v 1.1 2007/01/22 16:32:55 dockes Exp $  (C) 2004 J.F.Dockes
-# There may still be code from Estraier in here:
-#================================================================
-# Estraier: a personal full-text search system
-# Copyright (C) 2003-2004 Mikio Hirabayashi
-#================================================================
-#================================================================
-# rclscribus
-# Convert a scribus file to recoll HTML. This only handles the newer .sla
-# files until I can have a look at an older .scd.
-#
-# We just hack into the scribus XML, taking advantage that the tag of
-# interest is apparently always output on a single line.
-# The text seems to be found in attribute CH of tag ITEXT, it is utf-8
-
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname="rclscribus"
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help" 
-then
-  printf 'Extract scribus text as basic HTML.\n'
-  printf 'Usage: %s [infile]\n' "$progname"
-  exit 1
-fi
-
-infile="$1"
-
-# check the input file existence
-if test ! -f "$infile"
-then
-  printf '%s: %s: no such file\n' "$progname" "$infile"
-  exit 1
-fi
-
-iscmd()
-{
-    cmd=$1
-    case $cmd in
-    */*)
-	if test -x $cmd ; then return 0; else return 1; fi ;;
-    *)
-      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
-      for d in $*;do test -x $d/$cmd && return 0;done
-      return 1 ;;
-    esac
-}
-checkcmds()
-{
-    for cmd in $*;do
-      if iscmd $cmd 
-      then 
-        a=1
-      else 
-        echo $cmd not found 1>&2 
-	exit 1
-      fi
-    done
-}
-checkcmds grep awk sed
-
-# A small sed program to join lines where they are broken inside an
-# attribute value. The idea is that all scribus tag are apparently on one
-# line except when there are embedded new lines in an attribute lie
-# 'comments'. The first version of the sed script joins line which does not
-# end with > with the next. It doesn't guard against an embedded '>'. The
-# seconf joins line not beginning with '<' with the previous. It is much
-# slower for some reason.
-sedjoinprog=':a
-/[^>] *$/N; s/\n/ /; ta'
-#sedjoinprog1=':a
-#$!N;/^ *[^<]/s/\n/ /;ta
-#P;D'
-
-# Extract description title author and keywords
-description=`sed -e "$sedjoinprog" < $infile | \
-awk '
-/<DOCUMENT / {
-    if (match($0, " COMMENTS=\"[^\"]+")) { 
-       s=substr($0, RSTART+11, RLENGTH-11)
-       printf("%s", s);
-       # Note: there is no way to know if this ends a frame, so no "<br>"
-    }
-}
-'`
-
-title=`sed -e "$sedjoinprog" < $infile | \
-awk '
-/<DOCUMENT / {
-    if (match($0, " TITLE=\"[^\"]+")) { 
-       s=substr($0, RSTART+8, RLENGTH-8)
-       printf("%s", s);
-       # Note: there is no way to know if this ends a frame, so no "<br>"
-    }
-}
-'`
-
-author=`sed -e "$sedjoinprog" < $infile | \
-awk '
-/<DOCUMENT / {
-    if (match($0, " AUTHOR=\"[^\"]+")) { 
-       s=substr($0, RSTART+9, RLENGTH-9)
-       printf("%s", s);
-       # Note: there is no way to know if this ends a frame, so no "<br>"
-    }
-}
-'`
-
-keywords=`sed -e "$sedjoinprog" < $infile | \
-awk '
-/<DOCUMENT / {
-    if (match($0, " KEYWORDS=\"[^\"]+")) { 
-       s=substr($0, RSTART+11, RLENGTH-11)
-       printf("%s", s);
-       # Note: there is no way to know if this ends a frame, so no "<br>"
-    }
-}
-'`
-
-#echo description: [$description];echo title: [$title];
-#echo author: [$author];echo keywords: [$keywords]
-
-cat <<EOF
-<html><head>
-<title>$title</title>
-<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
-<meta name="author" content="$author">
-<meta name="description" content="$description">
-<meta name="keywords" content="$keywords">
-</head>
-<body><p>
-EOF
-
-
-sed -e ':a' -e '/[^>] *$/N; s/\n/ /; ta' < $infile | \
-awk '
-/<ITEXT / {
-    if (match($0, " CH=\"[^\"]+")) { 
-       s=substr($0, RSTART+5, RLENGTH-5)
-       printf("%s", s);
-       # Note: there is no way to know if this ends a frame, so no "<br>"
-    }
-}
-END {
-    print "</p></body></html>"
-}
-' | \
-sed -e 's/&#x5;/<br>/g' -e 's/&#x1c;/<br>/g'
--- a/website/filters/rclsoff
+++ b/website/filters/rclsoff
@ -1,156 +0,0 @@
-#!/bin/sh
-# @(#$Id: rclsoff,v 1.6.6.1 2007/01/21 16:41:49 dockes Exp $  (C) 2004 J.F.Dockes
-# Parts taken from Estraier:
-#================================================================
-# Estraier: a personal full-text search system
-# Copyright (C) 2003-2004 Mikio Hirabayashi
-#================================================================
-#================================================================
-# rclsoff
-# Extract text from an openoffice/soffice file
-#
-#================================================================
-
-
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname="rclsoff"
-
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help" 
-then
-  printf 'Convert an openoffice file to unformatted HTML text.\n'
-  printf 'Usage: %s [infile]\n' "$progname"
-  exit 1
-fi
-
-infile="$1"
-
-iscmd()
-{
-    cmd=$1
-    case $cmd in
-    */*)
-	if test -x $cmd ; then return 0; else return 1; fi ;;
-    *)
-      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
-      for d in $*;do test -x $d/$cmd && return 0;done
-      return 1 ;;
-    esac
-}
-checkcmds()
-{
-    for cmd in $*;do
-      if iscmd $cmd 
-      then 
-        a=1
-      else 
-        echo $cmd not found 1>&2 
-	exit 1
-      fi
-    done
-}
-checkcmds awk iconv unzip
-
-# check the input file existence
-if test ! -f "$infile"
-then
-  printf '%s: %s: no such file\n' "$progname" "$infile"
-  exit 1
-fi
-
-# We need a temporary directory
-if test z"$RECOLL_TMPDIR" != z; then
-   ttdir=$RECOLL_TMPDIR
-elif test z"$TMPDIR" != z ; then
-   ttdir=$TMPDIR
-else
-   ttdir=/tmp
-fi
-tmpdir=$ttdir/rclsoff_tmp$$
-mkdir $tmpdir || exit 1
-mkdir $tmpdir/rclsofftmp || exit 1
-
-cleanup()
-{
-    # Note that we're using a constant part (rclsofftmp), that hopefully
-    # guarantees that we can't do big mistakes here.
-    rm -rf $tmpdir/rclsofftmp
-    rmdir $tmpdir
-}
-    
-trap cleanup EXIT HUP QUIT INT TERM
-
-# Unzip the input file and change to the unzipped directory
-unzip -q -d $tmpdir/rclsofftmp "$infile"
-cd $tmpdir/rclsofftmp
-
-# Note: there can be newlines inside the description field, we don't want
-# them...
-descsedprog='/<dc:description>/,/<\/dc:description>/{
-s!.*<dc:description>!!
-s!</dc:description>.*!!
-p
-}
-'
-description=`sed -n -e "$descsedprog" < meta.xml | tr '\n' ' '`
-
-subject=`sed -e "s/\"/'/" -e 's/.*<dc:subject>\([^<]*\).*/\1/p;d' < meta.xml`
-
-title=`sed -e "s/\"/'/" -e 's/.*<dc:title>\([^<]*\).*/\1/p;d' < meta.xml`
-
-keywords=`sed -e "s/\"/'/" -e 's/.*<meta:keyword>\([^<]*\).*/\1/p;d' \
-	      < meta.xml`
-
-# Note: next expr inserts a newline at each end of paragraph (for preview)
-content="`sed -e 's!</text:p>!\\
-!g' -e 's/<[^>]*>/ /g' < content.xml`"
-
-#echo description "$description"
-#echo subject "$subject"
-#echo title "$title"
-#echo keywords "$keywords"
-#echo content "$content"
-
-# output the result
-echo '<html><head>'
-echo '<title>' "$title" '</title>'
-echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
-echo '<meta name="description" content="' "$description $subject" '">'
-echo '<meta name="keywords" content="' "$keywords" '">'
-echo '</head><body><p>'
-
-echo "$content" | sed -e "s/&apos;/'/g" -e 's/&quot;/"/g' |\
-awk '
-BEGIN {
-  cont = ""
-}
-{
-    $0 = cont $0
-    cont = ""
-
-    if ($0 ~ /[-]$/) {
-      # Note : soft-hyphen is iso8859 0xad
-      # Break at last whitespace
-      match($0, "[ \t][^ \t]+$")
-      line = substr($0, 0, RSTART)
-      cont = substr($0, RSTART, RLENGTH-1)
-      $0 = line
-    }
-
-    if($0 == "\f") {
-        print "</p>\n<hr>\n<p>"
-        next
-    } 
-
-    print $0 "<br>"
-}
-END {
-    printf("</p></body></html>\n");
-}' | iconv -f UTF-8 -t UTF-8 -c -s 
-
-cd /
-# exit normally
-exit 0
--- a/website/filters/rclsvg
+++ b/website/filters/rclsvg
@ -1,143 +0,0 @@
-#!/bin/sh
-# @(#$Id: rclsvg,v 1.2 2008/02/03 16:05:57 dockes Exp $  (C) 2004 J.F.Dockes
-# Parts taken from Estraier:
-#================================================================
-# Estraier: a personal full-text search system
-# Copyright (C) 2003-2004 Mikio Hirabayashi
-#================================================================
-#================================================================
-# Extract text from a Scalable Vector Graphics file
-#================================================================
-
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname="rclsvg"
-filetype=svg
-
-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
-
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
-
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
-    echo RECFILTERROR $*
-    # Also alert on stderr just in case
-    echo ":2:$progname::: $*" 1>&2
-    exit 1
-}
-
-iscmd()
-{
-    cmd=$1
-    case $cmd in
-    */*)
-	if test -x $cmd ; then return 0; else return 1; fi ;;
-    *)
-      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
-      for d in $*;do test -x $d/$cmd && return 0;done
-      return 1 ;;
-    esac
-}
-
-checkcmds()
-{
-    for cmd in $*;do
-      if iscmd $cmd 
-      then 
-        a=1
-      else 
-        senderror HELPERNOTFOUND $cmd
-      fi
-    done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help" 
-then
-  echo "Convert a $filetype file to HTML text for Recoll indexing."
-  echo "Usage: $progname [infile]"
-  exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
-  senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-checkcmds iconv sed
-
-# check the input file existence
-if test ! -f "$infile"
-then
-  printf '%s: %s: no such file\n' "$progname" "$infile"
-  exit 1
-fi
-
-encoding=`sed -ne '/<?xml/s/.*encoding="\([^"]*\).*/\1/p' < $infile`
-
-if test X$encoding = X ; then encoding=UTF-8;fi
-
-# We use several sed instances to make our life easier. Not good for
-# performance, and a sed guru might be able to do better.
-#
-# The first sed makes sure each tag starts on a new line
-# The second one selects the tags we're interested in.
-# The last strips the tags, leaving only text.
-#
-# The whole thing wholly ignore issues like '<' inside quoted strings.
-#
-# We could/should add code to explicitely separate title and other
-# metadata elements.
-
-# Insert new line before each tag
-sptagonline='s/</\
-</g'
-
-# Select tags
-spselecttags='/<title/,/<\/title>/p
-/<desc/,/<\/desc>/p
-/<metadata/,/<\/metadata>/p
-/<text/,/<\/text>/p'
-
-# Strip tags
-spstriptags='#n
-/</{
-    :c
-     />/!{
-	N
-	b c
-     }
-     />/s/<.*>//g
-}
-/^[ 	]*$/!p'
-
-content=`sed -e "$sptagonline" < $infile | sed -ne "$spselecttags" | \
-    sed -ne "$spstriptags"`
-
-(echo '<html><head>'
-echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
-echo '</head><body><pre>'
-echo "$content" 
-echo '</pre></body></html>') \
-| iconv -f $encoding -t UTF-8 -c -s 
-
-
-# exit normally
-exit 0
--- a/website/filters/rcltex
+++ b/website/filters/rcltex
@ -1,106 +0,0 @@
-#!/bin/sh
-# @(#$Id: rcltex,v 1.2 2007/11/09 15:56:14 dockes Exp $  (C) 2004 J.F.Dockes
-#================================================================
-# rcltex
-# Translate TeX files for recoll. Uses either untex or detex to translate to html
-#================================================================
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname="rcltex"
-filetype=TeX
-
-
-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
-
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
-
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
-    echo RECFILTERROR $*
-    # Also alert on stderr just in case
-    echo ":2:$progname::: $*" 1>&2
-    exit 1
-}
-
-iscmd()
-{
-    cmd=$1
-    case $cmd in
-    */*)
-	if test -x $cmd ; then return 0; else return 1; fi ;;
-    *)
-      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
-      for d in $*;do test -x $d/$cmd && return 0;done
-      return 1 ;;
-    esac
-}
-
-checkcmds()
-{
-    for cmd in $*;do
-      if iscmd $cmd 
-      then 
-        a=1
-      else 
-        senderror HELPERNOTFOUND $cmd
-      fi
-    done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help" 
-then
-  echo "Convert a $filetype file to HTML text for Recoll indexing."
-  echo "Usage: $progname [infile]"
-  exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
-  senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-if iscmd detex ; then
-    checkcmds iconv
-    CMD="detex -n -e ''"
-else
-    checkcmds untex iconv
-    CMD="untex -giso -a"
-fi
-
-# output the result
-echo '<html><head>'
-#echo '<title>' "$title" '</title>'
-echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
-echo '</head><body>'
-echo '<pre>'
-
-#untex -giso -a "$infile" | \
-
-$CMD "$infile" | \
-   iconv -c -f iso-8859-1 -t utf-8 | \
-   sed \
-       -e 's/</&lt;/g' -e 's/&/&amp;/g' 
-
-echo '</pre>'
-echo '</body></html>'
-
-# exit normally
-exit 0
--- a/website/filters/rclwpd
+++ b/website/filters/rclwpd
@ -1,87 +0,0 @@
-#!/bin/sh
-# @(#$Id: rclwpd,v 1.1 2007/08/26 13:34:59 dockes Exp $  (C) 2004 J.F.Dockes
-# Some inspiration from estraier
-#================================================================
-# rclwpd
-# convert wordperfect documents to html, by  executing the wpd2html program:
-#    http://libwpd.sourceforge.net/download.html
-#================================================================
-
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname="rclwpd"
-filetype=wpd
-
-
-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
-
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
-
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
-    echo RECFILTERROR $*
-    # Also alert on stderr just in case
-    echo ":2:$progname::: $*" 1>&2
-    exit 1
-}
-
-iscmd()
-{
-    cmd=$1
-    case $cmd in
-    */*)
-	if test -x $cmd ; then return 0; else return 1; fi ;;
-    *)
-      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
-      for d in $*;do test -x $d/$cmd && return 0;done
-      return 1 ;;
-    esac
-}
-
-checkcmds()
-{
-    for cmd in $*;do
-      if iscmd $cmd 
-      then 
-        a=1
-      else 
-        senderror HELPERNOTFOUND $cmd
-      fi
-    done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help" 
-then
-  echo "Convert a $filetype file to HTML text for Recoll indexing."
-  echo "Usage: $progname [infile]"
-  exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
-  senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-checkcmds wpd2html
-
-# output the result. wpd2html output doesn't seem to need any adjustment?
-
-wpd2html  "$infile" 2> /dev/null