added the old filters page which had been forgotten

2010-05-04 09:06:52 +02:00 · 2010-05-04 09:06:52 +02:00 · 538264db95
commit 538264db95
parent b33ff20b54
14 changed files with 2259 additions and 0 deletions
--- a/website/filters/filters.html
+++ b/website/filters/filters.html
@ -0,0 +1,294 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+
+<html>
+  <head>
+    <title>Recoll updated filters</title>
+
+    <meta name="generator" content="HTML Tidy, see www.w3.org">
+    <meta name="Author" content="Jean-Francois Dockes">
+    <meta name="Description" content=
+    "recoll is a simple full-text search system for unix and linux
+    based on the powerful and mature xapian engine">
+    <meta name="Keywords" content=
+    "full text search, desktop search, unix, linux">
+    <meta http-equiv="Content-language" content="en">
+    <meta http-equiv="content-type" content="text/html; charset=iso-8859-1">
+    <meta name="robots" content="All,Index,Follow">
+
+    <link type="text/css" rel="stylesheet" href="../styles/style.css">
+  </head>
+
+  <body>
+    
+    <div class="rightlinks">
+      <ul>
+	<li><a href="../index.html">Home</a></li>
+	<li><a href="../download.html">Downloads</a></li>
+	<li><a href="../usermanual/index.html">User manual</a></li>
+	<li><a href="../usermanual/rcl.install.html">Installation</a></li>
+	<li><a href="../index.html#support">Support</a></li>
+      </ul>
+    </div>
+    
+    <div class="content">
+
+      <h1>Updated filters for Recoll</h1>
+      
+      <p>The following describe new and updated filters, which will be
+      part of the next release, but can be installed on the current
+      release if you need them.</p>
+
+      <p>For updated filters, you just need to copy the script to the
+      filters directory which may be typically either <span
+      class="filename">/usr/share/recoll/filters</span>, or <span
+      class="filename">/usr/local/share/recoll/filters</span>.</p>
+
+      <p>For new filters, you'll need to copy the script file as
+	above, possibly install the supporting application, and usually
+	edit the 
+	<span class="filename">mimemap</span>, 
+	<span class="filename">mimeview</span> and
+	<span class="filename">mimeconf</span> files, either in the
+	shared directory
+	(<span class="filename">
+	  /usr[/local]/share/recoll/examples</span>), or 
+	in your personal configuration directory 
+	(<span class="filename">$HOME/.recoll</span> or 
+	<span class="filename">$RECOLL_CONFDIR</span>).</p>
+      
+      <p>Alternatively, you can replace your 1.[8,9,10] system files with
+	these updated and complete versions:
+	<a href="mimemap">mimemap</a>
+	<a href="mimeconf">mimeconf</a> 
+	<a href="mimeview">mimeview</a> </p>
+
+      <p>Notes:</p>
+      <blockquote>
+	<p>All filters are up to date in Recoll 1.10.5</p>
+	  
+	<p>Recoll 1.10.0: only <span class="filename">rclsvg</span> for
+	  Scalable Vector Graphic files is missing.</p>
+
+	<p>Recoll 1.9: all filters are up to date in the release,
+	  except the <span class="filename">rclimg</span> image
+	  filter and <span class="filename">rcltex</span>TeX filter.</p>
+
+	<p>Recoll 1.8: The image, <b>kword</b>,
+	  <b>abiword</b> and <b>wordperfect</b> can be installed in
+	  addition.</p>
+      </blockquote>
+
+      <h2>Open XML Office formats</h2> 
+
+      <p>Filter: <a href="rclopxml">rclopxml</a>. </p>
+      <p>This needs <span class="command">xsltproc</span> to be
+        installed (if you run a decently recent Linux, this is
+        probably on your system already). </p>
+
+      <p>The filters are certainly not perfect, but extract a good
+	part of the text, which is probably better than nothing.</p>
+	
+	<p>There are quite a few added lines in the configuration
+	  files, just fetch the new ones:
+	<a href="mimemap">mimemap</a>
+	<a href="mimeconf">mimeconf</a> 
+	<a href="mimeview">mimeview</a> </p>
+
+
+      <h2>Scalable Vector Graphics filter</h2> 
+
+        <p>A new filter for <b>SVG</b> files:
+	  <a href="rclsvg">rclsvg</a>.
+	You'll have to add the following lines in the configuration
+	files:</p>
+
+      <p>In <span class="filename">mimemap</span>: </p>
+      <pre>.svg = image/svg+xml
+</pre>
+      <p>In <span class="filename">mimeconf</span>, [index] section: </p>
+      <pre>image/svg+xml = exec rclsvg</pre>
+      <p><span class="filename">mimeconf</span>, [icons] section:</p>
+      <pre>image/svg+xml = drawing</pre>
+      <p><span class="filename">mimeconf</span>, [categories] section, also add
+	<tt>image/svg+xml</tt> to the <tt>other</tt> list.</p>
+
+      <p>The filter is based on <span class="command">sed</span>, so
+      you don't need to install any external application.</p>
+
+      <p>In 
+	<span class="filename">mimeview</span>, or the <em>[view]</em>
+	section of 
+	<span class="filename">mimeconf</span> for older recoll versions: </p>
+      <pre>    image/svg+xml = inkview %f</pre>
+      <p>(Or substitute your favorite editor).</p>
+
+
+
+      <h2>TeX filter</h2> 
+
+        <p>A new filter for <b>TeX</b> files:
+	  <a href="rcltex">rcltex</a>.
+	You'll have to add the following lines in the configuration
+	files:</p>
+
+      <p>In <span class="filename">mimemap</span>: </p>
+      <pre>.tex = application/x-tex
+</pre>
+      <p>In <span class="filename">mimeconf</span>, [index] section: </p>
+      <pre>    application/x-tex = exec rcltex</pre>
+      <p>mimeconf, [icons] section:</p>
+      <pre>application/x-tex = wordprocessing</pre>
+      <p>mimeconf, [categories] section, also add
+	application/x-tex to the <tt>texts</tt> list.</p>
+
+      <p>This filter uses either <span class="command">untex</span>
+	or <a
+	  "href=http://www.cs.purdue.edu/homes/trinkle/detex/">detex</a>
+	if the command is available. . A copy of the
+	source code for untex is stored <a "href=../untex/untex-1.3.jf.tar.gz">
+	  here</a></p>
+
+      <p>In 
+	<span class="filename">mimeview</span>, or the <em>[view]</em>
+	section of 
+	<span class="filename">mimeconf</span> for older recoll versions: </p>
+      <pre>    application/x-tex = gnuclient -q %f</pre>
+      <p>(Or substitute your favorite editor).</p>
+
+
+      <h2>A filter for image tags</h2> 
+
+        <p>A new filter for extracting tags from image and picture files:
+	  <a href="rclimg">rclimg</a>, by Cedric Scott. It is based on
+	  the <b>Exiftool</b> Perl application and library.
+	  You'll have to add the following lines in the configuration
+	  files:</p>
+
+      <p>In <span class="filename">mimemap</span>: </p>
+      <pre>.jpeg = image/jpeg
+.gif = image/gif
+.tiff = image/tiff
+.tif  = image/tiff
+</pre>
+      <p>In <span class="filename">mimeconf</span>, [index] section: </p>
+      <pre>image/gif = exec rclimg
+image/jpeg = exec rclimg
+image/png = exec rclimg
+image/tiff = exec rclimg
+      </pre>
+      <p>And remove the <tt>image/jpeg = exec rcljpeg</tt> line.</p>
+
+      <p>Exiftool supports many other image formats, just enter any
+	additional ones like above.</p>
+
+      <h2>Wordperfect filter</h2> 
+
+        <p>A new filter for <b>Wordperfect</b> files:
+	  <a href="rclwpd">rclwpd</a>.
+	You'll have to add the following lines in the configuration
+	files:</p>
+
+      <p>In <span class="filename">mimemap</span>: </p>
+      <pre>.wpd = application/vnd.wordperfect
+</pre>
+      <p>In <span class="filename">mimeconf</span>, [index] section: </p>
+      <pre>    application/vnd.wordperfect = exec rclwpd</pre>
+      <p>mimeconf, [icons] section:</p>
+      <pre>application/vnd.wordperfect = wordprocessing</pre>
+      <p>mimeconf, [categories] section, also add
+	application/vnd.wordperfect to the <tt>texts</tt> list.</p>
+
+      <p>In 
+	<span class="filename">mimeview</span>, or the <em>[view]</em>
+	section of 
+	<span class="filename">mimeconf</span> for older recoll versions: </p>
+      <pre>    application/vnd.wordperfect = openoffice %f</pre>
+
+      <h2>Abiword filter</h2> 
+
+      <p>A new filter for <a href="http://www.abisource.com/">
+	  abiword</a> files: <a href="rclabw">
+	  rclabw</a>.
+	You'll have to add the following lines in the configuration
+	files:</p>
+
+      <p>In <span class="filename">mimemap</span>: </p>
+      <pre>    .abw = application/x-abiword</pre>
+
+      <p>In <span class="filename">mimeconf</span>: </p>
+      <pre>    application/x-abiword = exec rclabw</pre>
+
+      <p>In 
+	<span class="filename">mimeview</span>, or the <em>[view]</em>
+	section of 
+	<span class="filename">mimeconf</span> for older recoll versions: </p>
+      <pre>    application/x-abiword = abiword %f</pre>
+
+      <h2>Kword filter</h2> 
+
+      <p>A new filter for <a href="http://www.kde.org/whatiskde/koffice.php/">
+	  kword</a> files: <a href="rclkwd">
+	  rclkwd</a>.
+	You'll have to add the following lines in the configuration
+	files:</p>
+
+      <p>In <span class="filename">mimemap</span>: </p>
+      <pre>    .kwd = application/x-kword</pre>
+      <p>In <span class="filename">mimeconf</span>: </p>
+      <pre>    application/x-kword = exec rclkwd</pre>
+      <p>In 
+	<span class="filename">mimeview</span>, or the <em>[view]</em>
+	section of 
+	<span class="filename">mimeconf</span> for older recoll versions: </p>
+      <pre>    application/x-kword = kword %f</pre>
+
+
+      <h2>Openoffice filter</h2>
+      <p>The filter script for all releases up and including 1.7.5 had
+      a bug on Debian and Ubuntu systems. You can download the <a
+      href="rclsoff">corrected script</a>.</p>
+
+      <h2>Scribus filter</h2> 
+
+      <p>A new filter for <a href="http://www.scribus.net/">
+	  Scribus</a> files: <a href="rclscribus">
+	  rclscribus</a>. This is only for the newer
+	<em>.sla</em> files. I am willing to add support for the older
+	<em>.scd</em> format if someone sends me a sample... You'll
+	have to add the following lines in the configuration files:</p>
+
+      <p>In <span class="filename">mimemap</span>: </p>
+      <pre>      .sla = application/x-scribus</pre>
+      <p>In <span class="filename">mimeconf</span>: </p>
+      <pre>      application/x-scribus = exec rclscribus</pre>
+      <p>In 
+	<span class="filename">mimeview</span>, or the <em>[view]</em>
+	section of 
+	<span class="filename">mimeconf</span> for older recoll versions: </p>
+      <pre>       application/x-scribus = scribus %f</pre>
+
+      <p>Do *not* add entries for <em>.sla.gz</em>, the normal recoll
+      decompression process will handle them (hopefully...).</p>
+
+
+      <h2>Lyx filter</h2> 
+
+      <p>A new filter for <a href="http://www.lyx.or/">
+	  Lyx</a> files: <a href="rcllyx">rcllyx</a>. 
+	This probably has quite a few issues with character encoding,
+	but it's also probably better than handling lyx documents as
+	text files.</p>
+
+      <p>In <span class="filename">mimemap</span>: </p>
+      <pre>      .lyx = application/x-lyx</pre>
+      <p>In <span class="filename">mimeconf</span>: </p>
+      <pre>      application/x-lyx = exec rcllyx</pre>
+      <p>In 
+	<span class="filename">mimeview</span>, or the <em>[view]</em>
+	section of 
+	<span class="filename">mimeconf</span> for older recoll versions: </p>
+      <pre>       application/x-lyx = lyx %f</pre>
+
+    </div>
+  </body>
+</html>
--- a/website/filters/mimeconf
+++ b/website/filters/mimeconf
@ -0,0 +1,215 @@
+# @(#$Id: mimeconf,v 1.41 2008/09/01 20:39:40 dockes Exp $  (C) 2004 J.F.Dockes
+
+# Recoll : associations of mime types to processing filters.
+# There are different sections for decompression, 'interning' for indexing
+# and preview, and external viewers
+
+## #######################################
+# Decompression: these types need a first pass to create a temp file to
+# work with. We use a script because uncompress utilities usually work in
+# place, which is not suitable. 
+#
+# The %t parameter will be substituted to the name of a temporary directory
+# by recoll. This directory is guaranteed empty when calling the filter
+#
+# The %f parameter will be substituted with the input file. 
+#
+# The script (ie: rcluncomp) must output the uncompressed file name on
+# stdout. 
+application/x-gzip  =  uncompress rcluncomp gunzip %f %t
+application/x-compress = uncompress rcluncomp gunzip %f %t
+application/x-bzip2 =  uncompress rcluncomp bunzip2 %f %t
+
+## ###################################
+# Filters for indexing and internal preview. 
+# The "internal" filters are hardwired in the c++ code.
+# The external "exec" filters are typically scripts. They output the
+# document in simple html format, have a look at the scripts.
+[index]
+application/msword = exec rcldoc
+application/ogg = exec rclogg
+application/pdf = exec rclpdf
+application/postscript = exec rclps
+application/vnd.ms-excel = exec rclxls
+application/vnd.ms-powerpoint = exec rclppt
+application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
+ exec rclopxml
+application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
+ exec rclopxml
+application/vnd.openxmlformats-officedocument.presentationml.template = \
+ exec rclopxml
+application/vnd.openxmlformats-officedocument.presentationml.presentation = \
+ exec rclopxml
+application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
+ exec rclopxml
+application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
+ exec rclopxml
+application/vnd.sun.xml.calc = exec rclsoff
+application/vnd.sun.xml.calc.template = exec rclsoff
+application/vnd.sun.xml.draw = exec rclsoff
+application/vnd.sun.xml.draw.template = exec rclsoff
+application/vnd.sun.xml.impress = exec rclsoff
+application/vnd.sun.xml.impress.template = exec rclsoff
+application/vnd.sun.xml.math = exec rclsoff
+application/vnd.sun.xml.writer = exec rclsoff
+application/vnd.sun.xml.writer.global = exec rclsoff
+application/vnd.sun.xml.writer.template = exec rclsoff
+application/vnd.wordperfect = exec rclwpd
+application/x-abiword = exec rclabw
+application/x-dvi = exec rcldvi
+application/x-flac = exec rclflac
+application/x-kword = exec rclkwd
+application/x-lyx = exec rcllyx
+application/x-scribus = exec rclscribus
+application/x-tex = exec rcltex
+audio/mpeg = exec rclid3
+image/gif = exec rclimg
+image/jpeg = exec rclimg
+image/png = exec rclimg
+image/tiff = exec rclimg
+image/vnd.djvu = exec rcldjvu
+image/svg+xml = exec rclsvg
+message/rfc822 = internal
+text/html  = internal 
+text/plain = internal 
+text/rtf = exec rclrtf
+text/x-gaim-log = exec rclgaim
+text/x-html-sidux-man = exec rclsiduxman
+text/x-mail = internal
+text/x-man = exec rclman
+
+## #############################################
+# Icons to be used in the result list if required by gui config
+[icons]
+application/msword = wordprocessing
+application/ogg = sownd
+application/pdf = pdf
+application/postscript = postscript
+application/vnd.ms-excel = spreadsheet
+application/vnd.ms-powerpoint = presentation
+application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
+  wordprocessing
+application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
+  wordprocessing
+application/vnd.openxmlformats-officedocument.presentationml.template = \
+ presentation
+application/vnd.openxmlformats-officedocument.presentationml.presentation = \
+ presentation
+application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
+ spreadsheet
+application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
+ spreadsheet
+application/vnd.sun.xml.calc = spreadsheet
+application/vnd.sun.xml.calc.template = spreadsheet
+application/vnd.sun.xml.draw = drawing
+application/vnd.sun.xml.draw.template = drawing
+application/vnd.sun.xml.impress = presentation
+application/vnd.sun.xml.impress.template = presentation
+application/vnd.sun.xml.writer = wordprocessing
+application/vnd.sun.xml.writer.global = wordprocessing
+application/vnd.sun.xml.writer.template = wordprocessing
+application/vnd.wordperfect = wordprocessing
+application/x-abiword = wordprocessing
+application/x-dvi = document
+application/x-flac = sownd
+application/x-fsdirectory = folder
+application/x-kword = wordprocessing
+application/x-lyx = wordprocessing
+application/x-scribus = document
+application/x-tex = wordprocessing
+audio/mpeg = sownd
+image/gif = image
+image/jpeg = image
+image/png = image
+image/tiff = image
+image/vnd.djvu = document
+image/svg+xml = drawing
+message/rfc822 = message
+text/html = html
+text/plain = txt
+text/x-c = source
+text/x-html-sidux-man = sidux-book
+text/x-mail = message
+text/x-man = document
+
+[categories]
+
+text = \
+      application/msword \
+      application/pdf \
+      application/postscript \
+      application/vnd.openxmlformats-officedocument.wordprocessingml.document \
+      application/vnd.openxmlformats-officedocument.wordprocessingml.template \
+      application/vnd.sun.xml.writer \
+      application/vnd.sun.xml.writer.global \
+      application/vnd.sun.xml.writer.template \
+      application/vnd.wordperfect \
+      application/x-abiword \
+      application/x-dvi \
+      application/x-kword \
+      application/x-lyx \
+      application/x-scribus \
+      application/x-tex \
+      image/vnd.djvu \
+      text/html \
+      text/plain \
+      text/rtf \
+      text/x-html-sidux-man \
+      text/x-man
+
+spreadsheet = \
+   application/vnd.ms-excel \
+   application/vnd.openxmlformats-officedocument.spreadsheetml.sheet \
+   application/vnd.openxmlformats-officedocument.spreadsheetml.template \
+   application/vnd.sun.xml.calc \
+   application/vnd.sun.xml.calc.template
+
+presentation = application/vnd.ms-powerpoint \
+  application/vnd.openxmlformats-officedocument.presentationml.template \
+  application/vnd.openxmlformats-officedocument.presentationml.presentation \
+  application/vnd.sun.xml.impress \
+  application/vnd.sun.xml.impress.template
+
+media = \
+      audio/mpeg \
+      application/ogg \
+      application/x-flac \
+      image/jpeg \
+      image/png \
+      image/tiff \
+      image/gif \
+
+message = message/rfc822 \
+	  text/x-gaim-log \
+	  text/x-mail \
+
+other = application/vnd.sun.xml.draw \
+        application/vnd.sun.xml.draw.template \
+        application/vnd.sun.xml.math \
+        application/x-fsdirectory \
+        image/svg+xml \
+
+
+[prefixes]
+
+# This allows extending the set of fields that recoll understand/searches. 
+# See the manual for exact usage.
+# Important: 
+#   - the field names MUST be all lowercase here. They can be anycased
+#     in the documents:
+#   - The extension field prefixes MUST begin with X and be all UPPERCASE.
+title = S
+caption = S
+subject = S
+
+author = A
+creator = A
+from = A
+
+keyword = K
+tag = K
+keywords = K
+tags = K
+
+# testing /example :
+recollspecialfield = XRCLSF
--- a/website/filters/mimemap
+++ b/website/filters/mimemap
@ -0,0 +1,132 @@
+# @(#$Id: mimemap,v 1.31 2008/08/25 16:12:16 dockes Exp $  (C) 2004 J.F.Dockes
+# Recoll: associations of file name extensions to mime types
+
+.txt = text/plain
+.text = text/plain
+.d    = text/plain
+
+# Source files. Defining them as text/x-c will enable ext viewer. If
+# text/plain they will be somewhat indexed
+.cpp = text/x-c
+.h   = text/x-c
+.c   = text/x-c
+.cc  = text/x-c
+
+.rtf  = text/rtf
+
+.html = text/html
+.htm = text/html
+.shtml = text/html
+.php = text/html
+
+.pdf = application/pdf
+
+.ps = application/postscript
+.eps = application/postscript
+.ai = application/postscript
+
+.tex = application/x-tex
+.dvi = application/x-dvi
+
+.djvu = image/vnd.djvu
+.svg = image/svg+xml
+
+.gz = application/x-gzip
+.Z = application/x-gzip
+.bz2 = application/x-bzip2
+#.Z  = application/x-compress
+
+.doc = application/msword
+.ppt = application/vnd.ms-powerpoint
+.xls = application/vnd.ms-excel
+
+# OpenOffice / opendocument. We handle opendocument as old openoffice files
+# for now
+.sxc = application/vnd.sun.xml.calc
+.ods = application/vnd.sun.xml.calc
+.stc = application/vnd.sun.xml.calc.template
+.sxd = application/vnd.sun.xml.draw
+.std = application/vnd.sun.xml.draw.template
+.sxi = application/vnd.sun.xml.impress
+.odp = application/vnd.sun.xml.impress
+.sti = application/vnd.sun.xml.impress.template
+.sxm = application/vnd.sun.xml.math
+.sxw = application/vnd.sun.xml.writer
+.odt = application/vnd.sun.xml.writer
+.sxg = application/vnd.sun.xml.writer.global
+.stw = application/vnd.sun.xml.writer.template
+
+# ms openxml 
+.docm = application/vnd.ms-word.document.macroEnabled.12
+.docx = application/vnd.openxmlformats-officedocument.wordprocessingml.document
+.dotm = application/vnd.ms-word.template.macroEnabled.12
+.dotx = application/vnd.openxmlformats-officedocument.wordprocessingml.template
+.potm = application/vnd.ms-powerpoint.template.macroEnabled.12
+.potx = application/vnd.openxmlformats-officedocument.presentationml.template
+.ppam = application/vnd.ms-powerpoint.addin.macroEnabled.12
+.ppsm = application/vnd.ms-powerpoint.slideshow.macroEnabled.12
+.ppsx = application/vnd.openxmlformats-officedocument.presentationml.slideshow
+.pptm = application/vnd.ms-powerpoint.presentation.macroEnabled.12
+.pptx = application/vnd.openxmlformats-officedocument.presentationml.presentation
+.xlam = application/vnd.ms-excel.addin.macroEnabled.12
+.xlsb = application/vnd.ms-excel.sheet.binary.macroEnabled.12
+.xlsm = application/vnd.ms-excel.sheet.macroEnabled.12
+.xlsx = application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
+.xltm = application/vnd.ms-excel.template.macroEnabled.12
+.xltx = application/vnd.openxmlformats-officedocument.spreadsheetml.template
+ 
+.abw = application/x-abiword
+.lyx = application/x-lyx
+.sla = application/x-scribus
+.scd = application/x-scribus
+
+.kwd = application/x-kword
+
+.wpd = application/vnd.wordperfect
+
+.rtf = text/rtf
+
+.mp3 = audio/mpeg
+.flac = application/x-flac
+.ogg = application/ogg
+
+.png = image/png
+.jpg = image/jpeg
+.jpeg = image/jpeg
+.gif = image/gif
+.tiff = image/tiff
+.tif  = image/tiff
+
+# A list of stuff that we don't want to touch at all (for now). Having the
+# suffixes listed in there speeds up things quite a lot by avoiding
+# unneeded decompression or 'file' calls. File names still get indexed if
+# indexallfilenames is set
+recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz .log.gz .md5 .map \
+       .m4 .tcl .js .sh .pl .awk \
+       .o .lib .dll .a \
+       .dat .bak .rdf .log .db .ini .msf .pid \
+       .gnm .gnumeric \
+       .gif .bmp .xpm \
+       ,v ~ #
+
+# Special handling of .txt files inside ~/.gaim directory
+[~/.gaim]
+.txt = text/x-gaim-log
+
+# Special handling of sidux manual menu system
+[/usr/share/sidux-manual]
+.htm = text/x-html-sidux-man
+.html = text/x-html-sidux-man
+
+# Manual files. You may want to adjust the location for your system
+# We can't use the default text/troff type because this doesn't say
+# what macro set to use (groff -man)
+[/usr/share/man]
+.1 = text/x-man
+.2 = text/x-man
+.3 = text/x-man
+.4 = text/x-man
+.5 = text/x-man
+.6 = text/x-man
+.7 = text/x-man
+.8 = text/x-man
--- a/website/filters/mimeview
+++ b/website/filters/mimeview
@ -0,0 +1,61 @@
+# @(#$Id: mimeview,v 1.15 2008/09/01 20:39:40 dockes Exp $  (C) 2004 J.F.Dockes
+
+## ##########################################
+# External viewers, launched by the recoll GUI when you click on a result
+# 'edit' link
+
+[view]
+# Pseudo entry used if the 'use desktop' preference is set in the GUI
+application/x-all = xdg-open %f
+
+application/x-kword = kword %f
+application/x-abiword = abiword %f
+
+application/msword = openoffice %f
+application/ogg = xmms %f
+application/pdf  = xpdf %f
+application/postscript = gv %f
+application/vnd.ms-excel = openoffice %f
+application/vnd.ms-powerpoint = openoffice %f
+application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
+ openoffice %f
+application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
+ openoffice %f
+application/vnd.openxmlformats-officedocument.presentationml.template = \
+ openoffice %f
+application/vnd.openxmlformats-officedocument.presentationml.presentation = \
+ openoffice %f
+application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
+ openoffice %f
+application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
+ openoffice %f
+application/vnd.sun.xml.calc = openoffice %f
+application/vnd.sun.xml.calc.template = openoffice %f
+application/vnd.sun.xml.draw = openoffice %f
+application/vnd.sun.xml.draw.template = openoffice %f
+application/vnd.sun.xml.impress = openoffice %f
+application/vnd.sun.xml.impress.template = openoffice %f
+application/vnd.sun.xml.math = openoffice %f
+application/vnd.sun.xml.writer = openoffice %f
+application/vnd.sun.xml.writer.global = openoffice %f
+application/vnd.sun.xml.writer.template = openoffice %f
+application/vnd.wordperfect = openoffice %f
+application/x-fsdirectory = rox %f
+application/x-dvi = xdvi %f
+application/x-flac = xmms %f
+application/x-lyx = lyx %f
+application/x-scribus = scribus %f
+application/x-tex = gnuclient -q %f
+audio/mpeg = xmms %f
+image/jpeg = xv %f
+image/png = xv %f
+image/tiff = xv %f
+image/gif  = xv %f
+image/svg+xml = inkview %f
+image/vnd.djvu = djview %f
+# Or firefox -remote "openFile(%u)"
+text/html = firefox %u
+text/plain = gnuclient -q %f
+text/x-c = gnuclient -q %f
+text/x-html-sidux-man = konqueror %f
+#text/x-html-sidux-man = iceweasel %f
--- a/website/filters/rclabw
+++ b/website/filters/rclabw
@ -0,0 +1,175 @@
+#!/bin/sh
+# @(#$Id: rclabw,v 1.2 2007/06/15 11:41:50 dockes Exp $  (C) 2004 J.F.Dockes
+# Parts taken from Estraier:
+#================================================================
+# Estraier: a personal full-text search system
+# Copyright (C) 2003-2004 Mikio Hirabayashi
+#================================================================
+#================================================================
+# Extract text from an abiword file
+#================================================================
+
+# set variables
+LANG=C ; export LANG
+LC_ALL=C ; export LC_ALL
+progname="rclabw"
+filetype=abiword
+
+#RECFILTCOMMONCODE
+##############################################################################
+# !! Leave the previous line unmodified!! Code imported from the
+# recfiltcommon file
+
+# Utility code common to all shell filters. This could be sourced at run
+# time, but it's slightly more efficient to include the code in the
+# filters at build time (with a sed script).
+
+# Describe error in a way that can be interpreted by our caller
+senderror()
+{
+    echo RECFILTERROR $*
+    # Also alert on stderr just in case
+    echo ":2:$progname::: $*" 1>&2
+    exit 1
+}
+
+iscmd()
+{
+    cmd=$1
+    case $cmd in
+    */*)
+	if test -x $cmd ; then return 0; else return 1; fi ;;
+    *)
+      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
+      for d in $*;do test -x $d/$cmd && return 0;done
+      return 1 ;;
+    esac
+}
+
+checkcmds()
+{
+    for cmd in $*;do
+      if iscmd $cmd 
+      then 
+        a=1
+      else 
+        senderror HELPERNOTFOUND $cmd
+      fi
+    done
+}
+
+# show help message
+if test $# -ne 1 -o "$1" = "--help" 
+then
+  echo "Convert a $filetype file to HTML text for Recoll indexing."
+  echo "Usage: $progname [infile]"
+  exit 1
+fi
+
+infile="$1"
+
+# check the input file existence (may be '-' for stdin)
+if test "X$infile" != X- -a ! -f "$infile"
+then
+  senderror INPUTNOSUCHFILE "$infile"
+fi
+
+# protect access to our temp files and directories
+umask 77
+
+##############################################################################
+# !! Leave the following line unmodified !
+#ENDRECFILTCOMMONCODE
+
+checkcmds iconv sed
+
+# check the input file existence
+if test ! -f "$infile"
+then
+  printf '%s: %s: no such file\n' "$progname" "$infile"
+  exit 1
+fi
+
+encoding=`sed -e  '/<?xml version=/s/"?>$//' \
+	      -e '/^<?xml version=/s/.*encoding="//p;D;q' \
+	      -e D \
+< $infile`
+if test X$encoding = X ; then encoding=UTF-8;fi
+
+# Note: there can be newlines inside the description field, we don't want
+# them... Have 2 use 2 different selectors for the single-line and
+# multiple-line cases because of the generic tag end (</m> for all meta
+# tags)
+descsedprog='
+/<m key="dc.description">\([^<]*\)<\/m>/ {
+s//\1/
+p
+q
+}
+/<m key="dc.description">/,/<\/m>/ {
+s!.*<m key="dc.description">!!
+s!</m>.*!!
+H
+}
+${
+g
+s/\n/ /g
+p
+}
+'
+
+description=`sed -n -e "$descsedprog" < "$infile"`
+#echo description: "$description"
+
+# Set program for the single line meta elements. Takes element name as
+# parameter 
+setmetasedprog() {
+metasedprog='/<m key="'$1'">/{
+s/.*<m key="'$1'">\([^<]*\).*/\1/
+'"s/\"/'/g"'
+p
+}'
+}
+
+setmetasedprog dc.subject
+subject=`sed -n -e "$metasedprog" "$infile"`
+#echo subject: "$subject"
+
+setmetasedprog dc.title
+title=`sed -n -e "$metasedprog" "$infile"`
+#echo titre: "$title"
+
+setmetasedprog abiword.keywords
+keywords=`sed -n -e "$metasedprog" "$infile"`
+#echo keywords: "$keywords"
+
+setmetasedprog dc.creator
+creator=`sed -n -e "$metasedprog" "$infile"`
+#echo creator: "$creator"
+
+# Note: next expr supposes that paragraphs are always all by themselves on
+# a single line in the xml (no multiple <p> per line, no embedded newlines
+# in text).
+contentsedprog='
+/<p[ >]/{
+s/<[^>]*>/ /g
+p
+}
+'
+content=`sed -n -e "$contentsedprog" "$infile"`
+#echo content: "$content"
+
+# output the result
+(echo '<html><head><title>' "$title" '</title>'
+echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
+echo '<meta name="description" content="' "$description $subject" '">'
+echo '<meta name="keywords" content="' "$keywords" '">'
+echo '<meta name="author" content="' "$creator" '">'
+echo '</head><body><pre>'
+echo "$content" 
+echo '</pre></body></html>') \
+| iconv -f $encoding -t UTF-8 -c -s 
+
+
+# exit normally
+exit 0
--- a/website/filters/rclimg
+++ b/website/filters/rclimg
@ -0,0 +1,95 @@
+#! /usr/bin/perl -w
+# @(#$Id: rclimg,v 1.2 2007/10/02 13:56:42 dockes Exp $  (C) 2007 Cedric Scott
+#######################################################
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+######################################################
+
+#
+# rclimg: extract image tags with exiftool and convert the data to html for
+# recoll indexing.
+#
+
+#
+# maps image file tags to xapian tags
+#
+$tagMap = {
+	'subject' => 'subject',
+	'title' => 'title',
+	'headline' => 'title',
+	'caption' => 'caption',
+	'caption-abstract' => 'caption',
+	'author' => 'author',
+	'creator' => 'creator',
+	'from' => 'from',
+	'keywords' => 'keywords',
+	'keyword' => 'keyword',
+	'tag' => 'tag',
+};
+
+# set to non-zero if tags which map to xapian tags are to output
+# in the body as well as the header
+#
+$headAndBody = 1;
+
+# xapianTag
+# returns a xapian tag to be used for this tag
+#
+sub xapianTag {
+	my $imgtag = shift;
+	while ( ( $tagre, $xapiantag) = each %{$tagMap} ) {
+		return $xapiantag  if $imgtag =~ /$tagre/i;
+	}
+	return undef;
+}
+
+#
+# start here
+#
+use Image::ExifTool qw(:Public);
+
+$imageFile = shift;
+$imageFile = '-' if $imageFile eq '';
+unless ( open(IMGF, $imageFile)  ) {
+	print STDERR "$0: can't open file $imageFile\n";
+	exit(1); # file doesn't exist or can't be read
+}
+$info = ImageInfo(\*IMGF);
+die unless $info;
+$fields = [];
+$other = [];
+$titleHtmlTag = "";
+foreach $tagname ( sort keys %{$info} ) {
+	$xapiantag = xapianTag($tagname);
+	if (defined $xapiantag ) {
+		push @{$fields}, [ $xapiantag, $info->{$tagname} ];
+		$titleHtmlTag = "<title>$info->{$tagname}</title>" if $xapiantag eq 'title';
+		push @{$other}, [ $tagname, $info->{$tagname} ] if $headAndBody;
+	} else {
+		push @{$other}, [ $tagname, $info->{$tagname} ];
+	}
+}
+print "<html>\n<head>\n$titleHtmlTag\n";
+print "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=UTF-8\">\n";
+foreach $tagpair ( @{$fields} ) {
+	($tagname, $value) = @{$tagpair};
+	print "<meta name=\"$tagname\" content=\"$value\">\n";
+}
+print "</head><body>\n";
+foreach $tagpair (@{$other} ) {
+	($tagname, $value) = @{$tagpair};
+	printf "%30s : %s<br>\n", $tagname, $value;
+}
+print "</body>\n</html>\n";
--- a/website/filters/rclkwd
+++ b/website/filters/rclkwd
@ -0,0 +1,204 @@
+#!/bin/sh
+# @(#$Id: rclkwd,v 1.1 2007/06/08 14:01:30 dockes Exp $  (C) 2004 J.F.Dockes
+# Parts taken from Estraier:
+#================================================================
+# Estraier: a personal full-text search system
+# Copyright (C) 2003-2004 Mikio Hirabayashi
+#================================================================
+#================================================================
+# rclkword
+# Extract text from a kword file
+#
+#================================================================
+
+# set variables
+LANG=C ; export LANG
+LC_ALL=C ; export LC_ALL
+progname="rclkwd"
+filetype=kword
+
+
+
+#RECFILTCOMMONCODE
+##############################################################################
+# !! Leave the previous line unmodified!! Code imported from the
+# recfiltcommon file
+
+# Utility code common to all shell filters. This could be sourced at run
+# time, but it's slightly more efficient to include the code in the
+# filters at build time (with a sed script).
+
+# Describe error in a way that can be interpreted by our caller
+senderror()
+{
+    echo RECFILTERROR $*
+    # Also alert on stderr just in case
+    echo ":2:$progname::: $*" 1>&2
+    exit 1
+}
+
+iscmd()
+{
+    cmd=$1
+    case $cmd in
+    */*)
+	if test -x $cmd ; then return 0; else return 1; fi ;;
+    *)
+      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
+      for d in $*;do test -x $d/$cmd && return 0;done
+      return 1 ;;
+    esac
+}
+
+checkcmds()
+{
+    for cmd in $*;do
+      if iscmd $cmd 
+      then 
+        a=1
+      else 
+        senderror HELPERNOTFOUND $cmd
+      fi
+    done
+}
+
+# show help message
+if test $# -ne 1 -o "$1" = "--help" 
+then
+  echo "Convert a $filetype file to HTML text for Recoll indexing."
+  echo "Usage: $progname [infile]"
+  exit 1
+fi
+
+infile="$1"
+
+# check the input file existence (may be '-' for stdin)
+if test "X$infile" != X- -a ! -f "$infile"
+then
+  senderror INPUTNOSUCHFILE "$infile"
+fi
+
+# protect access to our temp files and directories
+umask 77
+
+##############################################################################
+# !! Leave the following line unmodified !
+#ENDRECFILTCOMMONCODE
+
+checkcmds awk unzip gunzip tar
+
+# check the input file existence
+if test ! -f "$infile"
+then
+  printf '%s: %s: no such file\n' "$progname" "$infile"
+  exit 1
+fi
+
+# We need a temporary directory
+if test z"$RECOLL_TMPDIR" != z; then
+   ttdir=$RECOLL_TMPDIR
+elif test z"$TMPDIR" != z ; then
+   ttdir=$TMPDIR
+else
+   ttdir=/tmp
+fi
+tmpdir=$ttdir/rclkwd_tmp$$
+mkdir $tmpdir || exit 1
+mkdir $tmpdir/rclkwdtmp || exit 1
+
+cleanup()
+{
+    # Note that we're using a constant part (rclkwdtmp), that hopefully
+    # guarantees that we can't do big mistakes here.
+    rm -rf $tmpdir/rclkwdtmp
+    rmdir $tmpdir
+}
+    
+trap cleanup EXIT HUP QUIT INT TERM
+
+# Old kwd files are gzip/tar archibes. Newer ones are zip archives.
+if file $infile | grep -qi gzip ; then
+   # Unzip the input file and change to the unzipped directory
+   gunzip < "$infile" | (cd $tmpdir/rclkwdtmp;tar xf -)
+else
+    echo new kwd
+   # Unzip the input file and change to the unzipped directory
+   unzip -q -d $tmpdir/rclkwdtmp "$infile"
+fi
+cd $tmpdir/rclkwdtmp
+
+metafile=documentinfo.xml
+contentfile=maindoc.xml
+
+if test -f $metafile ; then
+
+  # Note: there can be newlines inside the description field, we don't want
+  # them...
+  abssedprog='/<abstract>/,/<\/abstract>/{
+s!.*<abstract>!!
+s!</abstract>.*!!
+p
+}
+'
+  abstract=`sed -n -e "$abssedprog" < $metafile | tr '\n' ' ' | \
+	sed -e '1s/<!\[CDATA\[//' -e 's/\]\]>//'`
+  subject=`sed -e "s/\"/'/" -e 's/.*<subject>\([^<]*\).*/\1/p;d' \
+	     < $metafile`
+  title=`sed -e "s/\"/'/" -e 's/.*<title>\([^<]*\).*/\1/p;d' \
+	     < $metafile | tr '\n' ' '`
+  keywords=`sed -e "s/\"/'/" -e 's/.*<keyword>\([^<]*\).*/\1/p;d' \
+	      < $metafile`
+fi
+
+# Note: next expr inserts a newline at each end of paragraph (for preview)
+content="`sed -e 's!</TEXT>!\\
+!g' -e 's/<[^>]*>/ /g' < $contentfile | sed -e '/^[ 	]*$/d'`"
+
+#echo abstract "$abstract"
+#echo subject "$subject"
+#echo title "$title"
+#echo keywords "$keywords"
+#echo content "$content"
+
+# output the result
+echo '<html><head>'
+echo '<title>' "$title" '</title>'
+echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
+echo '<meta name="abstract" content="' "$abstract $subject" '">'
+echo '<meta name="keywords" content="' "$keywords" '">'
+echo '</head><body><p>'
+
+# The strange 'BEGIN' setup is to prevent 'file' from thinking this file
+# is an awk program
+echo "$content" | sed -e "s/&apos;/'/g" -e 's/&quot;/"/g' |\
+awk 'BEGIN'\
+' {
+  cont = ""
+}
+{
+    $0 = cont $0
+    cont = ""
+
+    if ($0 ~ /[-]$/) {
+      # Note : soft-hyphen is iso8859 0xad
+      # Break at last whitespace
+      match($0, "[ \t][^ \t]+$")
+      line = substr($0, 0, RSTART)
+      cont = substr($0, RSTART, RLENGTH-1)
+      $0 = line
+    }
+
+    if($0 == "\f") {
+        print "</p>\n<hr>\n<p>"
+        next
+    } 
+
+    print $0 "<br>"
+}
+END {
+    printf("</p></body></html>\n");
+}' | iconv -f UTF-8 -t UTF-8 -c -s 
+
+cd /
+# exit normally
+exit 0
--- a/website/filters/rcllyx
+++ b/website/filters/rcllyx
@ -0,0 +1,195 @@
+#!/bin/sh
+# @(#$Id: rcllyx,v 1.4 2007/01/23 07:23:12 dockes Exp $  (C) 2004 J.F.Dockes
+# There may still be code from Estraier in here:
+#================================================================
+# Estraier: a personal full-text search system
+# Copyright (C) 2003-2004 Mikio Hirabayashi
+#================================================================
+#================================================================
+# rcllyx
+# Convert a lyx file to recoll HTML.
+#
+# We use lyx --export. It was suggested to use untex, but it doesn't give 
+# good results on raw lyx (of course, this is not TeX), and exporting to
+# LaTex then using untex doesn't look nice when we can use the native  lyx
+# text export.
+# The character encoding of the exported text is defined by the
+# \inputencoding directive in the lyx file header and, in quite an obscure
+# way, by the \language parameter. We use a heuristic to divine the output
+# text encoding and it is guaranteed not to work in all cases. Trials using
+# an intermediary dvi, pdf or ps file gave worse results. This needs
+# improvement. It doesn't even take into account the fact that the language
+# can change inside the doc (does this change the encoding or not ?). To be
+# frank, this is not entirely my fault, the lyx format is a joke.
+#
+# As there is unfortunately no way to define the output file name, we have
+# to use a temporary directory and link the input file in there.
+
+# set variables
+LANG=C ; export LANG
+LC_ALL=C ; export LC_ALL
+progname="rcllyx"
+
+# show help message
+if test $# -ne 1 -o "$1" = "--help" 
+then
+  printf 'Extract lyx text as basic HTML.\n'
+  printf 'Usage: %s [infile]\n' "$progname"
+  exit 1
+fi
+
+infile="$1"
+
+# check the input file existence
+if test ! -f "$infile"
+then
+  printf '%s: %s: no such file\n' "$progname" "$infile"
+  exit 1
+fi
+
+iscmd()
+{
+    cmd=$1
+    case $cmd in
+    */*)
+	if test -x $cmd ; then return 0; else return 1; fi ;;
+    *)
+      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
+      for d in $*;do test -x $d/$cmd && return 0;done
+      return 1 ;;
+    esac
+}
+checkcmds()
+{
+    for cmd in $*;do
+      if iscmd $cmd 
+      then 
+        a=1
+      else 
+        echo $cmd not found 1>&2 
+	exit 1
+      fi
+    done
+}
+
+checkcmds lyx iconv
+
+# We need a temporary directory
+if test z"$RECOLL_TMPDIR" != z; then
+   ttdir=$RECOLL_TMPDIR
+elif test z"$TMPDIR" != z ; then
+   ttdir=$TMPDIR
+else
+   ttdir=/tmp
+fi
+
+tmpdir=$ttdir/rcllyx_tmp$$
+mkdir $tmpdir || exit 1
+mkdir $tmpdir/rcllyxtmp || exit 1
+
+cleanup()
+{
+    # Note that we're using a constant part (rcllyxtmp), that hopefully
+    # guarantees that we can't do big mistakes here.
+    rm -rf $tmpdir/rcllyxtmp
+    rmdir $tmpdir
+}
+    
+trap cleanup EXIT HUP QUIT INT TERM
+
+workdir=$tmpdir/rcllyxtmp
+case "$infile" in
+ */*) ;;
+ *) infile=`pwd`/$infile;;
+esac
+
+binfile=`basename $infile`
+ln -s "$infile" "$workdir/$binfile" || exit 1
+lyxfile=$workdir/$binfile
+textfile=$workdir/`basename $binfile .lyx`.txt
+
+#echo binfile: $binfile;echo lyxfile: $lyxfile ; ls -l $lyxfile; echo textfile: $textfile
+
+# Run lyx --export
+lyx --export text $lyxfile
+
+# Charset and language
+formatline=`egrep '^\\\lyxformat ' $lyxfile`
+if test -n "$formatline" ; then 
+   set $formatline
+   format=$2
+fi
+charsetline=`egrep '^\\\inputencoding ' $lyxfile`
+if test -n "$charsetline" ; then 
+   set $charsetline
+   charset=$2
+fi
+langline=`egrep '^\\\language ' $lyxfile`
+if test -n "$langline" ; then 
+   set $langline
+   lang=$2
+fi
+#echo format: [$format] charset: [$charset] lang [$lang]
+
+if test "$format" -ge 249 ; then
+  charset=utf-8
+else
+  # try to guess the charset from the language: this is in no way guaranteed
+  # to work, the logic has built-in inconsistencies even beyond the numerous
+  # external ones (what if the ukrainian writer prefers koi8-r ?). This is a
+  # joke. 
+  if test -z "$charset" -o "$charset" = default -o "$charset" = auto ; then
+    case "$lang" in
+    american|afrikaans|basque|catalan|danish|dutch|english|faeroese|finnish|french|galician|german|icelandic|irish|italian|norwegian|portuguese|spanish|swedish)
+      charset=iso-8859-1;;
+    czech|german|hungarian|polish|romanian|croatian|slovak|slovene)
+      charset=iso-8859-2;;
+    esperanto|galician|maltese|Turkish) 
+      charset=iso-8859-3;;
+    estonian|latvian|lithuanian) 
+      charset=iso-8859-4;;
+    bulgarian|byelorussian|macedonian|russian|serbian|ukrainian) 
+      charset=iso-8859-5;;
+    arabic) 
+      charset=iso-8859-6;;
+    greek) 
+      charset=iso-8859-7;;
+    hebrew) 
+      charset=iso-8859-8;;
+    #ISO-8859-9 - Latin 5 Same as 8859-1 except for Turkish instead of
+    #Icelandic. ? What is one to do :)
+    #ISO-8859-10 - Latin 6
+    lappish|nordic|eskimo|inuit|sami)
+      charset=iso-8859-10;;
+  albanian|german|english|basque|breton|catalan|danish|spanish|estonian|esthonian|faeroese|faroese|finnish|french|frisian|friesian|scottish|goidelic|irish|gaelic|galician|welsh|greenlandic|inuit|icelandic|italian|latin|dutch|norvegian|portuguese|romansch|romansh|friulian|ladin|swedish)
+      charset=iso-8859-15;;
+    *)
+      charset=iso-8859-1;;
+    esac
+  fi
+fi
+
+if test -n "$charset" ; then
+   inputcmd="iconv -f $charset -t UTF-8 -c -s"
+else
+   inputcmd=cat
+fi
+#echo inputcmd: [$inputcmd]
+
+cat <<EOF
+<html>
+<head>
+    <title>$title</title>
+    <meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
+</head>
+<body>
+<pre>
+EOF
+
+$inputcmd < $textfile
+
+cat <<EOF
+</pre>
+</body>
+</html>
+EOF
--- a/website/filters/rclopxml
+++ b/website/filters/rclopxml
@ -0,0 +1,245 @@
+#!/bin/sh
+# @(#$Id: rclopxml,v 1.2 2008/09/01 17:31:47 dockes Exp $  (C) 2004 J.F.Dockes
+#================================================================
+# rcldocx
+# Extract text from an openxml msword file (will be extended for spreadsheets)
+# TODO: Also process docProps/core.xml for attributes, and word/endnotes.xml
+#================================================================
+
+# set variables
+LANG=C ; export LANG
+LC_ALL=C ; export LC_ALL
+progname=rclopxml
+filetype=openxml
+
+#RECFILTCOMMONCODE
+##############################################################################
+# !! Leave the previous line unmodified!! Code imported from the
+# recfiltcommon file
+
+# Utility code common to all shell filters. This could be sourced at run
+# time, but it's slightly more efficient to include the code in the
+# filters at build time (with a sed script).
+
+# Describe error in a way that can be interpreted by our caller
+senderror()
+{
+    echo RECFILTERROR $*
+    # Also alert on stderr just in case
+    echo ":2:$progname::: $*" 1>&2
+    exit 1
+}
+
+iscmd()
+{
+    cmd=$1
+    case $cmd in
+    */*)
+	if test -x $cmd ; then return 0; else return 1; fi ;;
+    *)
+      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
+      for d in $*;do test -x $d/$cmd && return 0;done
+      return 1 ;;
+    esac
+}
+
+checkcmds()
+{
+    for cmd in $*;do
+      if iscmd $cmd 
+      then 
+        a=1
+      else 
+        senderror HELPERNOTFOUND $cmd
+      fi
+    done
+}
+
+# show help message
+if test $# -ne 1 -o "$1" = "--help" 
+then
+  echo "Convert a $filetype file to HTML text for Recoll indexing."
+  echo "Usage: $progname [infile]"
+  exit 1
+fi
+
+infile="$1"
+
+# check the input file existence (may be '-' for stdin)
+if test "X$infile" != X- -a ! -f "$infile"
+then
+  senderror INPUTNOSUCHFILE "$infile"
+fi
+
+# protect access to our temp files and directories
+umask 77
+
+##############################################################################
+# !! Leave the following line unmodified !
+#ENDRECFILTCOMMONCODE
+
+checkcmds xsltproc unzip
+
+# check the input file existence
+if test ! -f "$infile"
+then
+  printf '%s: %s: no such file\n' "$progname" "$infile"
+  exit 1
+fi
+
+# We need a temporary directory
+if test z"$RECOLL_TMPDIR" != z; then
+   ttdir=$RECOLL_TMPDIR
+elif test z"$TMPDIR" != z ; then
+   ttdir=$TMPDIR
+else
+   ttdir=/tmp
+fi
+tmpdir=$ttdir/rclopxml_tmp$$
+mkdir $tmpdir || exit 1
+mkdir $tmpdir/rclopxmltmp || exit 1
+
+cleanup()
+{
+    # Note that we're using a constant part (rclopxmltmp), that hopefully
+    # guarantees that we can't do big mistakes here.
+    rm -rf $tmpdir/rclopxmltmp
+    rmdir $tmpdir
+}
+    
+trap cleanup EXIT HUP QUIT INT TERM
+
+# Unzip the input file and change to the unzipped directory
+unzip -q -d $tmpdir/rclopxmltmp "$infile"
+cd $tmpdir/rclopxmltmp
+
+echo '<html>
+<head>'
+
+xsltproc - docProps/core.xml <<EOF
+<?xml version="1.0"?>
+<xsl:stylesheet 
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
+ xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:dcterms="http://purl.org/dc/terms/"
+ xmlns:dcmitype="http://purl.org/dc/dcmitype/"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+<!--  <xsl:output method="text"/> -->
+  <xsl:output omit-xml-declaration="yes"/>
+
+  <xsl:template match="cp:coreProperties">
+    <xsl:text>&#10;</xsl:text>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
+    <xsl:text>&#10;</xsl:text>
+    <xsl:apply-templates/>
+  </xsl:template>
+
+  <xsl:template match="dc:creator">
+    <meta>
+    <xsl:attribute name="name">
+      <!-- <xsl:value-of select="name()"/> pour sortir tous les meta avec 
+       le meme nom que dans le xml (si on devenait dc-natif) -->
+      <xsl:text>author</xsl:text> 
+    </xsl:attribute>
+    <xsl:attribute name="content">
+       <xsl:value-of select="."/>
+    </xsl:attribute>
+    </meta>
+    <xsl:text>&#10;</xsl:text>
+  </xsl:template>
+
+  <xsl:template match="dcterms:modified">
+    <meta>
+    <xsl:attribute name="name">
+      <xsl:text>date</xsl:text> 
+    </xsl:attribute>
+    <xsl:attribute name="content">
+       <xsl:value-of select="."/>
+    </xsl:attribute>
+    </meta>
+    <xsl:text>&#10;</xsl:text>
+  </xsl:template>
+
+  <xsl:template match="*">
+  </xsl:template>
+
+</xsl:stylesheet>
+EOF
+
+echo '</head>
+<body>'
+
+filename=''
+if test -f word/document.xml ; then
+ filenames=word/document.xml 
+ tagmatch="w:p"
+ xmlns_decls='
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
+ xmlns:ve="http://schemas.openxmlformats.org/markup-compatibility/2006"
+ xmlns:o="urn:schemas-microsoft-com:office:office"
+ xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
+ xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"
+ xmlns:v="urn:schemas-microsoft-com:vml"
+ xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
+ xmlns:w10="urn:schemas-microsoft-com:office:word"
+ xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
+ xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml"
+ '
+
+elif test -f xl/sharedStrings.xml ; then
+ filenames=xl/sharedStrings.xml 
+ tagmatch='x:t'
+ xmlns_decls='
+   xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
+   xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
+  '
+
+elif test -f ppt/slides/slide1.xml ; then
+ filenames=`echo ppt/slides/slide*.xml`
+ tagmatch='a:t'
+ xmlns_decls='
+  xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
+  xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" 
+ xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" 
+  xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"
+ '
+# I want to suppress text output for all except a:t, don't know how to do it
+# help ! At least get rid of these:
+ moretemplates='
+  <xsl:template match="p:attrName">
+  </xsl:template>
+'
+else
+    # ??
+    exit 1
+fi
+
+
+for filename in $filenames;do
+xsltproc - $filename <<EOF
+<?xml version="1.0"?>
+<xsl:stylesheet $xmlns_decls >
+
+ <xsl:output omit-xml-declaration="yes"/>
+
+ <xsl:template match="/">
+  <div>
+  <xsl:apply-templates/> 
+  </div>
+</xsl:template>
+
+ <xsl:template match="$tagmatch">
+  <p>
+  <xsl:value-of select="."/>
+  </p>
+ </xsl:template>
+
+ $moretemplates
+
+</xsl:stylesheet>
+EOF
+done
+
+echo '</html>'
--- a/website/filters/rclscribus
+++ b/website/filters/rclscribus
@ -0,0 +1,151 @@
+#!/bin/sh
+# @(#$Id: rclscribus,v 1.1 2007/01/22 16:32:55 dockes Exp $  (C) 2004 J.F.Dockes
+# There may still be code from Estraier in here:
+#================================================================
+# Estraier: a personal full-text search system
+# Copyright (C) 2003-2004 Mikio Hirabayashi
+#================================================================
+#================================================================
+# rclscribus
+# Convert a scribus file to recoll HTML. This only handles the newer .sla
+# files until I can have a look at an older .scd.
+#
+# We just hack into the scribus XML, taking advantage that the tag of
+# interest is apparently always output on a single line.
+# The text seems to be found in attribute CH of tag ITEXT, it is utf-8
+
+# set variables
+LANG=C ; export LANG
+LC_ALL=C ; export LC_ALL
+progname="rclscribus"
+
+# show help message
+if test $# -ne 1 -o "$1" = "--help" 
+then
+  printf 'Extract scribus text as basic HTML.\n'
+  printf 'Usage: %s [infile]\n' "$progname"
+  exit 1
+fi
+
+infile="$1"
+
+# check the input file existence
+if test ! -f "$infile"
+then
+  printf '%s: %s: no such file\n' "$progname" "$infile"
+  exit 1
+fi
+
+iscmd()
+{
+    cmd=$1
+    case $cmd in
+    */*)
+	if test -x $cmd ; then return 0; else return 1; fi ;;
+    *)
+      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
+      for d in $*;do test -x $d/$cmd && return 0;done
+      return 1 ;;
+    esac
+}
+checkcmds()
+{
+    for cmd in $*;do
+      if iscmd $cmd 
+      then 
+        a=1
+      else 
+        echo $cmd not found 1>&2 
+	exit 1
+      fi
+    done
+}
+checkcmds grep awk sed
+
+# A small sed program to join lines where they are broken inside an
+# attribute value. The idea is that all scribus tag are apparently on one
+# line except when there are embedded new lines in an attribute lie
+# 'comments'. The first version of the sed script joins line which does not
+# end with > with the next. It doesn't guard against an embedded '>'. The
+# seconf joins line not beginning with '<' with the previous. It is much
+# slower for some reason.
+sedjoinprog=':a
+/[^>] *$/N; s/\n/ /; ta'
+#sedjoinprog1=':a
+#$!N;/^ *[^<]/s/\n/ /;ta
+#P;D'
+
+# Extract description title author and keywords
+description=`sed -e "$sedjoinprog" < $infile | \
+awk '
+/<DOCUMENT / {
+    if (match($0, " COMMENTS=\"[^\"]+")) { 
+       s=substr($0, RSTART+11, RLENGTH-11)
+       printf("%s", s);
+       # Note: there is no way to know if this ends a frame, so no "<br>"
+    }
+}
+'`
+
+title=`sed -e "$sedjoinprog" < $infile | \
+awk '
+/<DOCUMENT / {
+    if (match($0, " TITLE=\"[^\"]+")) { 
+       s=substr($0, RSTART+8, RLENGTH-8)
+       printf("%s", s);
+       # Note: there is no way to know if this ends a frame, so no "<br>"
+    }
+}
+'`
+
+author=`sed -e "$sedjoinprog" < $infile | \
+awk '
+/<DOCUMENT / {
+    if (match($0, " AUTHOR=\"[^\"]+")) { 
+       s=substr($0, RSTART+9, RLENGTH-9)
+       printf("%s", s);
+       # Note: there is no way to know if this ends a frame, so no "<br>"
+    }
+}
+'`
+
+keywords=`sed -e "$sedjoinprog" < $infile | \
+awk '
+/<DOCUMENT / {
+    if (match($0, " KEYWORDS=\"[^\"]+")) { 
+       s=substr($0, RSTART+11, RLENGTH-11)
+       printf("%s", s);
+       # Note: there is no way to know if this ends a frame, so no "<br>"
+    }
+}
+'`
+
+#echo description: [$description];echo title: [$title];
+#echo author: [$author];echo keywords: [$keywords]
+
+cat <<EOF
+<html><head>
+<title>$title</title>
+<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
+<meta name="author" content="$author">
+<meta name="description" content="$description">
+<meta name="keywords" content="$keywords">
+</head>
+<body><p>
+EOF
+
+
+sed -e ':a' -e '/[^>] *$/N; s/\n/ /; ta' < $infile | \
+awk '
+/<ITEXT / {
+    if (match($0, " CH=\"[^\"]+")) { 
+       s=substr($0, RSTART+5, RLENGTH-5)
+       printf("%s", s);
+       # Note: there is no way to know if this ends a frame, so no "<br>"
+    }
+}
+END {
+    print "</p></body></html>"
+}
+' | \
+sed -e 's/&#x5;/<br>/g' -e 's/&#x1c;/<br>/g'
--- a/website/filters/rclsoff
+++ b/website/filters/rclsoff
@ -0,0 +1,156 @@
+#!/bin/sh
+# @(#$Id: rclsoff,v 1.6.6.1 2007/01/21 16:41:49 dockes Exp $  (C) 2004 J.F.Dockes
+# Parts taken from Estraier:
+#================================================================
+# Estraier: a personal full-text search system
+# Copyright (C) 2003-2004 Mikio Hirabayashi
+#================================================================
+#================================================================
+# rclsoff
+# Extract text from an openoffice/soffice file
+#
+#================================================================
+
+
+# set variables
+LANG=C ; export LANG
+LC_ALL=C ; export LC_ALL
+progname="rclsoff"
+
+
+# show help message
+if test $# -ne 1 -o "$1" = "--help" 
+then
+  printf 'Convert an openoffice file to unformatted HTML text.\n'
+  printf 'Usage: %s [infile]\n' "$progname"
+  exit 1
+fi
+
+infile="$1"
+
+iscmd()
+{
+    cmd=$1
+    case $cmd in
+    */*)
+	if test -x $cmd ; then return 0; else return 1; fi ;;
+    *)
+      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
+      for d in $*;do test -x $d/$cmd && return 0;done
+      return 1 ;;
+    esac
+}
+checkcmds()
+{
+    for cmd in $*;do
+      if iscmd $cmd 
+      then 
+        a=1
+      else 
+        echo $cmd not found 1>&2 
+	exit 1
+      fi
+    done
+}
+checkcmds awk iconv unzip
+
+# check the input file existence
+if test ! -f "$infile"
+then
+  printf '%s: %s: no such file\n' "$progname" "$infile"
+  exit 1
+fi
+
+# We need a temporary directory
+if test z"$RECOLL_TMPDIR" != z; then
+   ttdir=$RECOLL_TMPDIR
+elif test z"$TMPDIR" != z ; then
+   ttdir=$TMPDIR
+else
+   ttdir=/tmp
+fi
+tmpdir=$ttdir/rclsoff_tmp$$
+mkdir $tmpdir || exit 1
+mkdir $tmpdir/rclsofftmp || exit 1
+
+cleanup()
+{
+    # Note that we're using a constant part (rclsofftmp), that hopefully
+    # guarantees that we can't do big mistakes here.
+    rm -rf $tmpdir/rclsofftmp
+    rmdir $tmpdir
+}
+    
+trap cleanup EXIT HUP QUIT INT TERM
+
+# Unzip the input file and change to the unzipped directory
+unzip -q -d $tmpdir/rclsofftmp "$infile"
+cd $tmpdir/rclsofftmp
+
+# Note: there can be newlines inside the description field, we don't want
+# them...
+descsedprog='/<dc:description>/,/<\/dc:description>/{
+s!.*<dc:description>!!
+s!</dc:description>.*!!
+p
+}
+'
+description=`sed -n -e "$descsedprog" < meta.xml | tr '\n' ' '`
+
+subject=`sed -e "s/\"/'/" -e 's/.*<dc:subject>\([^<]*\).*/\1/p;d' < meta.xml`
+
+title=`sed -e "s/\"/'/" -e 's/.*<dc:title>\([^<]*\).*/\1/p;d' < meta.xml`
+
+keywords=`sed -e "s/\"/'/" -e 's/.*<meta:keyword>\([^<]*\).*/\1/p;d' \
+	      < meta.xml`
+
+# Note: next expr inserts a newline at each end of paragraph (for preview)
+content="`sed -e 's!</text:p>!\\
+!g' -e 's/<[^>]*>/ /g' < content.xml`"
+
+#echo description "$description"
+#echo subject "$subject"
+#echo title "$title"
+#echo keywords "$keywords"
+#echo content "$content"
+
+# output the result
+echo '<html><head>'
+echo '<title>' "$title" '</title>'
+echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
+echo '<meta name="description" content="' "$description $subject" '">'
+echo '<meta name="keywords" content="' "$keywords" '">'
+echo '</head><body><p>'
+
+echo "$content" | sed -e "s/&apos;/'/g" -e 's/&quot;/"/g' |\
+awk '
+BEGIN {
+  cont = ""
+}
+{
+    $0 = cont $0
+    cont = ""
+
+    if ($0 ~ /[-]$/) {
+      # Note : soft-hyphen is iso8859 0xad
+      # Break at last whitespace
+      match($0, "[ \t][^ \t]+$")
+      line = substr($0, 0, RSTART)
+      cont = substr($0, RSTART, RLENGTH-1)
+      $0 = line
+    }
+
+    if($0 == "\f") {
+        print "</p>\n<hr>\n<p>"
+        next
+    } 
+
+    print $0 "<br>"
+}
+END {
+    printf("</p></body></html>\n");
+}' | iconv -f UTF-8 -t UTF-8 -c -s 
+
+cd /
+# exit normally
+exit 0
--- a/website/filters/rclsvg
+++ b/website/filters/rclsvg
@ -0,0 +1,143 @@
+#!/bin/sh
+# @(#$Id: rclsvg,v 1.2 2008/02/03 16:05:57 dockes Exp $  (C) 2004 J.F.Dockes
+# Parts taken from Estraier:
+#================================================================
+# Estraier: a personal full-text search system
+# Copyright (C) 2003-2004 Mikio Hirabayashi
+#================================================================
+#================================================================
+# Extract text from a Scalable Vector Graphics file
+#================================================================
+
+# set variables
+LANG=C ; export LANG
+LC_ALL=C ; export LC_ALL
+progname="rclsvg"
+filetype=svg
+
+#RECFILTCOMMONCODE
+##############################################################################
+# !! Leave the previous line unmodified!! Code imported from the
+# recfiltcommon file
+
+# Utility code common to all shell filters. This could be sourced at run
+# time, but it's slightly more efficient to include the code in the
+# filters at build time (with a sed script).
+
+# Describe error in a way that can be interpreted by our caller
+senderror()
+{
+    echo RECFILTERROR $*
+    # Also alert on stderr just in case
+    echo ":2:$progname::: $*" 1>&2
+    exit 1
+}
+
+iscmd()
+{
+    cmd=$1
+    case $cmd in
+    */*)
+	if test -x $cmd ; then return 0; else return 1; fi ;;
+    *)
+      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
+      for d in $*;do test -x $d/$cmd && return 0;done
+      return 1 ;;
+    esac
+}
+
+checkcmds()
+{
+    for cmd in $*;do
+      if iscmd $cmd 
+      then 
+        a=1
+      else 
+        senderror HELPERNOTFOUND $cmd
+      fi
+    done
+}
+
+# show help message
+if test $# -ne 1 -o "$1" = "--help" 
+then
+  echo "Convert a $filetype file to HTML text for Recoll indexing."
+  echo "Usage: $progname [infile]"
+  exit 1
+fi
+
+infile="$1"
+
+# check the input file existence (may be '-' for stdin)
+if test "X$infile" != X- -a ! -f "$infile"
+then
+  senderror INPUTNOSUCHFILE "$infile"
+fi
+
+# protect access to our temp files and directories
+umask 77
+
+##############################################################################
+# !! Leave the following line unmodified !
+#ENDRECFILTCOMMONCODE
+
+checkcmds iconv sed
+
+# check the input file existence
+if test ! -f "$infile"
+then
+  printf '%s: %s: no such file\n' "$progname" "$infile"
+  exit 1
+fi
+
+encoding=`sed -ne '/<?xml/s/.*encoding="\([^"]*\).*/\1/p' < $infile`
+
+if test X$encoding = X ; then encoding=UTF-8;fi
+
+# We use several sed instances to make our life easier. Not good for
+# performance, and a sed guru might be able to do better.
+#
+# The first sed makes sure each tag starts on a new line
+# The second one selects the tags we're interested in.
+# The last strips the tags, leaving only text.
+#
+# The whole thing wholly ignore issues like '<' inside quoted strings.
+#
+# We could/should add code to explicitely separate title and other
+# metadata elements.
+
+# Insert new line before each tag
+sptagonline='s/</\
+</g'
+
+# Select tags
+spselecttags='/<title/,/<\/title>/p
+/<desc/,/<\/desc>/p
+/<metadata/,/<\/metadata>/p
+/<text/,/<\/text>/p'
+
+# Strip tags
+spstriptags='#n
+/</{
+    :c
+     />/!{
+	N
+	b c
+     }
+     />/s/<.*>//g
+}
+/^[ 	]*$/!p'
+
+content=`sed -e "$sptagonline" < $infile | sed -ne "$spselecttags" | \
+    sed -ne "$spstriptags"`
+
+(echo '<html><head>'
+echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
+echo '</head><body><pre>'
+echo "$content" 
+echo '</pre></body></html>') \
+| iconv -f $encoding -t UTF-8 -c -s 
+
+
+# exit normally
+exit 0
--- a/website/filters/rcltex
+++ b/website/filters/rcltex
@ -0,0 +1,106 @@
+#!/bin/sh
+# @(#$Id: rcltex,v 1.2 2007/11/09 15:56:14 dockes Exp $  (C) 2004 J.F.Dockes
+#================================================================
+# rcltex
+# Translate TeX files for recoll. Uses either untex or detex to translate to html
+#================================================================
+# set variables
+LANG=C ; export LANG
+LC_ALL=C ; export LC_ALL
+progname="rcltex"
+filetype=TeX
+
+
+#RECFILTCOMMONCODE
+##############################################################################
+# !! Leave the previous line unmodified!! Code imported from the
+# recfiltcommon file
+
+# Utility code common to all shell filters. This could be sourced at run
+# time, but it's slightly more efficient to include the code in the
+# filters at build time (with a sed script).
+
+# Describe error in a way that can be interpreted by our caller
+senderror()
+{
+    echo RECFILTERROR $*
+    # Also alert on stderr just in case
+    echo ":2:$progname::: $*" 1>&2
+    exit 1
+}
+
+iscmd()
+{
+    cmd=$1
+    case $cmd in
+    */*)
+	if test -x $cmd ; then return 0; else return 1; fi ;;
+    *)
+      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
+      for d in $*;do test -x $d/$cmd && return 0;done
+      return 1 ;;
+    esac
+}
+
+checkcmds()
+{
+    for cmd in $*;do
+      if iscmd $cmd 
+      then 
+        a=1
+      else 
+        senderror HELPERNOTFOUND $cmd
+      fi
+    done
+}
+
+# show help message
+if test $# -ne 1 -o "$1" = "--help" 
+then
+  echo "Convert a $filetype file to HTML text for Recoll indexing."
+  echo "Usage: $progname [infile]"
+  exit 1
+fi
+
+infile="$1"
+
+# check the input file existence (may be '-' for stdin)
+if test "X$infile" != X- -a ! -f "$infile"
+then
+  senderror INPUTNOSUCHFILE "$infile"
+fi
+
+# protect access to our temp files and directories
+umask 77
+
+##############################################################################
+# !! Leave the following line unmodified !
+#ENDRECFILTCOMMONCODE
+
+if iscmd detex ; then
+    checkcmds iconv
+    CMD="detex -n -e ''"
+else
+    checkcmds untex iconv
+    CMD="untex -giso -a"
+fi
+
+# output the result
+echo '<html><head>'
+#echo '<title>' "$title" '</title>'
+echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
+echo '</head><body>'
+echo '<pre>'
+
+#untex -giso -a "$infile" | \
+
+$CMD "$infile" | \
+   iconv -c -f iso-8859-1 -t utf-8 | \
+   sed \
+       -e 's/</&lt;/g' -e 's/&/&amp;/g' 
+
+echo '</pre>'
+echo '</body></html>'
+
+# exit normally
+exit 0
--- a/website/filters/rclwpd
+++ b/website/filters/rclwpd
@ -0,0 +1,87 @@
+#!/bin/sh
+# @(#$Id: rclwpd,v 1.1 2007/08/26 13:34:59 dockes Exp $  (C) 2004 J.F.Dockes
+# Some inspiration from estraier
+#================================================================
+# rclwpd
+# convert wordperfect documents to html, by  executing the wpd2html program:
+#    http://libwpd.sourceforge.net/download.html
+#================================================================
+
+# set variables
+LANG=C ; export LANG
+LC_ALL=C ; export LC_ALL
+progname="rclwpd"
+filetype=wpd
+
+
+#RECFILTCOMMONCODE
+##############################################################################
+# !! Leave the previous line unmodified!! Code imported from the
+# recfiltcommon file
+
+# Utility code common to all shell filters. This could be sourced at run
+# time, but it's slightly more efficient to include the code in the
+# filters at build time (with a sed script).
+
+# Describe error in a way that can be interpreted by our caller
+senderror()
+{
+    echo RECFILTERROR $*
+    # Also alert on stderr just in case
+    echo ":2:$progname::: $*" 1>&2
+    exit 1
+}
+
+iscmd()
+{
+    cmd=$1
+    case $cmd in
+    */*)
+	if test -x $cmd ; then return 0; else return 1; fi ;;
+    *)
+      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
+      for d in $*;do test -x $d/$cmd && return 0;done
+      return 1 ;;
+    esac
+}
+
+checkcmds()
+{
+    for cmd in $*;do
+      if iscmd $cmd 
+      then 
+        a=1
+      else 
+        senderror HELPERNOTFOUND $cmd
+      fi
+    done
+}
+
+# show help message
+if test $# -ne 1 -o "$1" = "--help" 
+then
+  echo "Convert a $filetype file to HTML text for Recoll indexing."
+  echo "Usage: $progname [infile]"
+  exit 1
+fi
+
+infile="$1"
+
+# check the input file existence (may be '-' for stdin)
+if test "X$infile" != X- -a ! -f "$infile"
+then
+  senderror INPUTNOSUCHFILE "$infile"
+fi
+
+# protect access to our temp files and directories
+umask 77
+
+##############################################################################
+# !! Leave the following line unmodified !
+#ENDRECFILTCOMMONCODE
+
+checkcmds wpd2html
+
+# output the result. wpd2html output doesn't seem to need any adjustment?
+
+wpd2html  "$infile" 2> /dev/null