diff --git a/src/Makefile.am b/src/Makefile.am index 9bf33658..94e13efd 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -670,6 +670,7 @@ filters/rcllyx \ filters/rclman \ filters/rclmidi.py \ filters/rclpdf.py \ +filters/rclps \ filters/rclokulnote.py \ filters/rclopxml.py \ filters/rclppt.py \ diff --git a/src/filters/rcldvi b/src/filters/rcldvi index da449a78..5b484193 100755 --- a/src/filters/rcldvi +++ b/src/filters/rcldvi @@ -18,10 +18,15 @@ # # Extract text from a dvi file by either executing dvitops and -# pstotext or using catdvi. dvitops has given better results during -# tests, and is chosen first if available, but the dvitops/pstotext -# combination is much slower than catdvi set variables. In any case, -# the program is not too good with special characters (e.g. ligatures) +# pstotext or using catdvi. +# +# pstotext does not work any more with gs 9.22-25, so catdvi is always +# needed now. +# Initially dvitops had given better results during tests, and was +# chosen first if available, but the dvitops/pstotext combination +# is much slower than catdvi set variables. In any case, the +# program is not too good with special characters (e.g. ligatures) + LANG=C ; export LANG LC_ALL=C ; export LC_ALL progname="rcldvi" @@ -100,21 +105,27 @@ decoderdvips() } decodercatdvi() { - catdvi $1 + catdvi --output-encoding=UTF-8 $1 } decoder="" -iscmd dvips -isdvips=$? -iscmd pstotext -ispstotext=$? -if test $isdvips -eq 0 -a $ispstotext -eq 0; then - decoder=decoderdvips -elif iscmd catdvi ; then - decoder=decodercatdvi -fi +# pstotext does not work any more +# iscmd dvips +# isdvips=$? +# iscmd pstotext +# ispstotext=$? +# if test $isdvips -eq 0 -a $ispstotext -eq 0; then +# decoder=decoderdvips +# elif iscmd catdvi ; then +# decoder=decodercatdvi +# fi + +if iscmd catdvi ; then + decoder=decodercatdvi +fi + if test X$decoder = X ; then - senderror HELPERNOTFOUND dvips or catdvi + senderror HELPERNOTFOUND catdvi fi # The strange 'BEGIN' setup is to prevent 'file' from thinking this file diff --git a/src/filters/rclps b/src/filters/rclps new file mode 100755 index 00000000..aab29d65 --- /dev/null +++ b/src/filters/rclps @@ -0,0 +1,101 @@ +#!/bin/sh +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the +# Free Software Foundation, Inc., +# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +# +# Extract text from a postscript file +# +# We used to use pstotext but it does not work any more with gs 9.22-25 +# So we now go through pdf, which seems the only way to obtain non-ascii output +# + +LANG=C ; export LANG +LC_ALL=C ; export LC_ALL +progname="rcldvi" +filetype=dvi + + +#RECFILTCOMMONCODE +############################################################################## +# !! Leave the previous line unmodified!! Code imported from the +# recfiltcommon file + +# Utility code common to all shell filters. This could be sourced at run +# time, but it's slightly more efficient to include the code in the +# filters at build time (with a sed script). + +# Describe error in a way that can be interpreted by our caller +senderror() +{ + echo RECFILTERROR $* + # Also alert on stderr just in case + echo ":2:$progname::: $*" 1>&2 + exit 1 +} + +iscmd() +{ + cmd=$1 + case $cmd in + */*) + if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;; + *) + oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs + for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done + return 1 ;; + esac +} + +checkcmds() +{ + for cmd in $*;do + if iscmd $cmd + then + a=1 + else + senderror HELPERNOTFOUND $cmd + fi + done +} + +# show help message +if test $# -ne 1 -o "$1" = "--help" +then + echo "Convert a $filetype file to HTML text for Recoll indexing." + echo "Usage: $progname [infile]" + exit 1 +fi + +infile="$1" + +# check the input file existence (may be '-' for stdin) +if test "X$infile" != X- -a ! -f "$infile" +then + senderror INPUTNOSUCHFILE "$infile" +fi + +# protect access to our temp files and directories +umask 77 + +############################################################################## +# !! Leave the following line unmodified ! +#ENDRECFILTCOMMONCODE + +checkcmds ps2pdf pdftotext + +ps2pdf "$infile" - | pdftotext -htmlmeta -enc UTF-8 -eol unix -q - - | \ + egrep -v '^