diff --git a/src/filters/rclppt b/src/filters/rclppt new file mode 100755 index 00000000..d9c5140e --- /dev/null +++ b/src/filters/rclppt @@ -0,0 +1,93 @@ +#!/bin/sh +# @(#$Id: rclppt,v 1.1 2006-09-05 09:52:23 dockes Exp $ (C) 2004 J.F.Dockes +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the +# Free Software Foundation, Inc., +# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#================================================================ +# rclppt +# Handle powerpoint files for recoll. +# Uses catppt from the catdoc utilities +# (http://ftp.45.free.net/~vitus/software/catdoc/) +# In my experience, this sometimes fail to extract text, printing "Default +# Design" ou "format par defaut" instead and only. +# +#================================================================ +# set variables +LANG=C ; export LANG +LC_ALL=C ; export LC_ALL +progname="rclsoff" + +# show help message +if test $# -ne 1 -o "$1" = "--help" +then + printf 'Process a powerpoint file for recoll indexation.\n' + printf 'Usage: %s [infile]\n' "$progname" + exit 1 +fi + +infile="$1" + +iscmd() +{ + cmd=$1 + case $cmd in + */*) + if test -x $cmd ; then return 0; else return 1; fi ;; + *) + oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs + for d in $*;do test -x $d/$cmd && return 0;done + return 1 ;; + esac +} +checkcmds() +{ + cmdsok=0 + for cmd in $*;do + if iscmd $cmd + then + cmdsok=1 + else + cmdsok=0 + fi + done +} + +# check the input file existence +if test ! -f "$infile" +then + printf '%s: %s: no such file\n' "$progname" "$infile" + exit 1 +fi +checkcmds catppt +if test X$cmdsok = X0 ; then + printf "Catppt not found" + exit 1 +fi + +# output the result +echo '' +#echo '' "$title" '' +echo '' +echo '' +echo '
'
+
+catppt -d utf-8 "$infile" | \
+   sed -e 's/'
+echo ''
+
+# exit normally
+exit 0
diff --git a/src/filters/rclxls b/src/filters/rclxls
new file mode 100755
index 00000000..fad14606
--- /dev/null
+++ b/src/filters/rclxls
@@ -0,0 +1,94 @@
+#!/bin/sh
+# @(#$Id: rclxls,v 1.1 2006-09-05 09:52:23 dockes Exp $  (C) 2004 J.F.Dockes
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+#================================================================
+# rclppt
+# Handle excel files for recoll. 
+# Uses xls2csv from the catdoc utilities
+# (http://ftp.45.free.net/~vitus/software/catdoc/)
+# Note: xls2csv is supposed to detect the source charset from the excel
+# file but this does not always work. If you see unexpected russian chars
+# (the russian author's default charset) in the output, you may want to add
+# ie a -s 8859-1 option to the xls2csv command line.
+#================================================================
+# set variables
+LANG=C ; export LANG
+LC_ALL=C ; export LC_ALL
+progname="rclsoff"
+
+# show help message
+if test $# -ne 1 -o "$1" = "--help" 
+then
+  printf 'Process an excel file for recoll indexation.\n'
+  printf 'Usage: %s [infile]\n' "$progname"
+  exit 1
+fi
+
+infile="$1"
+
+iscmd()
+{
+    cmd=$1
+    case $cmd in
+    */*)
+	if test -x $cmd ; then return 0; else return 1; fi ;;
+    *)
+      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
+      for d in $*;do test -x $d/$cmd && return 0;done
+      return 1 ;;
+    esac
+}
+checkcmds()
+{
+    cmdsok=0
+    for cmd in $*;do
+      if iscmd $cmd 
+      then 
+        cmdsok=1
+      else 
+        cmdsok=0
+      fi
+    done
+}
+
+# check the input file existence
+if test ! -f "$infile"
+then
+  printf '%s: %s: no such file\n' "$progname" "$infile"
+  exit 1
+fi
+checkcmds xls2csv
+if test X$cmdsok = X0 ; then
+   printf "xls2csv not found"
+   exit 1
+fi
+
+# output the result
+echo ''
+#echo '' "$title" ''
+echo ''
+echo ''
+echo '
'
+
+xls2csv -c'	' -b"
" -d utf-8 "$infile" | \ + sed -e 's/' +echo '' + +# exit normally +exit 0 diff --git a/src/sampleconf/mimeconf b/src/sampleconf/mimeconf index 1fe0cec8..dd39b0a4 100644 --- a/src/sampleconf/mimeconf +++ b/src/sampleconf/mimeconf @@ -1,4 +1,4 @@ -# @(#$Id: mimeconf,v 1.16 2006-04-01 07:34:21 dockes Exp $ (C) 2004 J.F.Dockes +# @(#$Id: mimeconf,v 1.17 2006-09-05 09:52:23 dockes Exp $ (C) 2004 J.F.Dockes # Recoll : associations of mime types to processing filters. # There are different sections for decompression, 'interning' for indexing @@ -28,6 +28,8 @@ application/x-bzip2 = uncompress rcluncomp bunzip2 %f %t application/msword = exec rcldoc application/pdf = exec rclpdf application/postscript = exec rclps +application/vnd.ms-powerpoint = exec rclppt +application/vnd.ms-excel = exec rclxls application/vnd.sun.xml.calc = exec rclsoff application/vnd.sun.xml.calc.template = exec rclsoff application/vnd.sun.xml.draw = exec rclsoff @@ -52,6 +54,8 @@ text/x-mail = internal # External viewers, launched when you on a result entry 'edit' link [view] application/msword = openoffice %f +application/vnd.ms-powerpoint = openoffice %f +application/vnd.ms-excel = openoffice %f application/pdf = xpdf %f application/postscript = gv %f application/vnd.sun.xml.calc = openoffice %f diff --git a/src/sampleconf/mimemap b/src/sampleconf/mimemap index dcb38214..f00181ca 100644 --- a/src/sampleconf/mimemap +++ b/src/sampleconf/mimemap @@ -1,4 +1,4 @@ -# @(#$Id: mimemap,v 1.15 2006-04-01 07:34:21 dockes Exp $ (C) 2004 J.F.Dockes +# @(#$Id: mimemap,v 1.16 2006-09-05 09:52:23 dockes Exp $ (C) 2004 J.F.Dockes # Recoll: associations of file name extensions to mime types .txt = text/plain @@ -35,6 +35,8 @@ #.Z = application/x-compress .doc = application/msword +.ppt = application/vnd.ms-powerpoint +.xls = application/vnd.ms-excel # OpenOffice / opendocument. We handle opendocument as old openoffice files # for now