diff --git a/src/filters/rclppt b/src/filters/rclppt new file mode 100755 index 00000000..d9c5140e --- /dev/null +++ b/src/filters/rclppt @@ -0,0 +1,93 @@ +#!/bin/sh +# @(#$Id: rclppt,v 1.1 2006-09-05 09:52:23 dockes Exp $ (C) 2004 J.F.Dockes +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the +# Free Software Foundation, Inc., +# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#================================================================ +# rclppt +# Handle powerpoint files for recoll. +# Uses catppt from the catdoc utilities +# (http://ftp.45.free.net/~vitus/software/catdoc/) +# In my experience, this sometimes fail to extract text, printing "Default +# Design" ou "format par defaut" instead and only. +# +#================================================================ +# set variables +LANG=C ; export LANG +LC_ALL=C ; export LC_ALL +progname="rclsoff" + +# show help message +if test $# -ne 1 -o "$1" = "--help" +then + printf 'Process a powerpoint file for recoll indexation.\n' + printf 'Usage: %s [infile]\n' "$progname" + exit 1 +fi + +infile="$1" + +iscmd() +{ + cmd=$1 + case $cmd in + */*) + if test -x $cmd ; then return 0; else return 1; fi ;; + *) + oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs + for d in $*;do test -x $d/$cmd && return 0;done + return 1 ;; + esac +} +checkcmds() +{ + cmdsok=0 + for cmd in $*;do + if iscmd $cmd + then + cmdsok=1 + else + cmdsok=0 + fi + done +} + +# check the input file existence +if test ! -f "$infile" +then + printf '%s: %s: no such file\n' "$progname" "$infile" + exit 1 +fi +checkcmds catppt +if test X$cmdsok = X0 ; then + printf "Catppt not found" + exit 1 +fi + +# output the result +echo '
' +#echo '' + +catppt -d utf-8 "$infile" | \ + sed -e 's/</g' -e 's/&/&/g' + +echo '' +echo '' + +# exit normally +exit 0 diff --git a/src/filters/rclxls b/src/filters/rclxls new file mode 100755 index 00000000..fad14606 --- /dev/null +++ b/src/filters/rclxls @@ -0,0 +1,94 @@ +#!/bin/sh +# @(#$Id: rclxls,v 1.1 2006-09-05 09:52:23 dockes Exp $ (C) 2004 J.F.Dockes +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the +# Free Software Foundation, Inc., +# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#================================================================ +# rclppt +# Handle excel files for recoll. +# Uses xls2csv from the catdoc utilities +# (http://ftp.45.free.net/~vitus/software/catdoc/) +# Note: xls2csv is supposed to detect the source charset from the excel +# file but this does not always work. If you see unexpected russian chars +# (the russian author's default charset) in the output, you may want to add +# ie a -s 8859-1 option to the xls2csv command line. +#================================================================ +# set variables +LANG=C ; export LANG +LC_ALL=C ; export LC_ALL +progname="rclsoff" + +# show help message +if test $# -ne 1 -o "$1" = "--help" +then + printf 'Process an excel file for recoll indexation.\n' + printf 'Usage: %s [infile]\n' "$progname" + exit 1 +fi + +infile="$1" + +iscmd() +{ + cmd=$1 + case $cmd in + */*) + if test -x $cmd ; then return 0; else return 1; fi ;; + *) + oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs + for d in $*;do test -x $d/$cmd && return 0;done + return 1 ;; + esac +} +checkcmds() +{ + cmdsok=0 + for cmd in $*;do + if iscmd $cmd + then + cmdsok=1 + else + cmdsok=0 + fi + done +} + +# check the input file existence +if test ! -f "$infile" +then + printf '%s: %s: no such file\n' "$progname" "$infile" + exit 1 +fi +checkcmds xls2csv +if test X$cmdsok = X0 ; then + printf "xls2csv not found" + exit 1 +fi + +# output the result +echo '' +#echo '
' + +xls2csv -c' ' -b"' +echo '' + +# exit normally +exit 0 diff --git a/src/sampleconf/mimeconf b/src/sampleconf/mimeconf index 1fe0cec8..dd39b0a4 100644 --- a/src/sampleconf/mimeconf +++ b/src/sampleconf/mimeconf @@ -1,4 +1,4 @@ -# @(#$Id: mimeconf,v 1.16 2006-04-01 07:34:21 dockes Exp $ (C) 2004 J.F.Dockes +# @(#$Id: mimeconf,v 1.17 2006-09-05 09:52:23 dockes Exp $ (C) 2004 J.F.Dockes # Recoll : associations of mime types to processing filters. # There are different sections for decompression, 'interning' for indexing @@ -28,6 +28,8 @@ application/x-bzip2 = uncompress rcluncomp bunzip2 %f %t application/msword = exec rcldoc application/pdf = exec rclpdf application/postscript = exec rclps +application/vnd.ms-powerpoint = exec rclppt +application/vnd.ms-excel = exec rclxls application/vnd.sun.xml.calc = exec rclsoff application/vnd.sun.xml.calc.template = exec rclsoff application/vnd.sun.xml.draw = exec rclsoff @@ -52,6 +54,8 @@ text/x-mail = internal # External viewers, launched when you on a result entry 'edit' link [view] application/msword = openoffice %f +application/vnd.ms-powerpoint = openoffice %f +application/vnd.ms-excel = openoffice %f application/pdf = xpdf %f application/postscript = gv %f application/vnd.sun.xml.calc = openoffice %f diff --git a/src/sampleconf/mimemap b/src/sampleconf/mimemap index dcb38214..f00181ca 100644 --- a/src/sampleconf/mimemap +++ b/src/sampleconf/mimemap @@ -1,4 +1,4 @@ -# @(#$Id: mimemap,v 1.15 2006-04-01 07:34:21 dockes Exp $ (C) 2004 J.F.Dockes +# @(#$Id: mimemap,v 1.16 2006-09-05 09:52:23 dockes Exp $ (C) 2004 J.F.Dockes # Recoll: associations of file name extensions to mime types .txt = text/plain @@ -35,6 +35,8 @@ #.Z = application/x-compress .doc = application/msword +.ppt = application/vnd.ms-powerpoint +.xls = application/vnd.ms-excel # OpenOffice / opendocument. We handle opendocument as old openoffice files # for now
" -d utf-8 "$infile" | \ + sed -e 's/</g' -e 's/&/&/g' + +echo '