From 69f56860e84c1ccd31abf4b1adcc9cc5e2b96064 Mon Sep 17 00:00:00 2001 From: dockes Date: Wed, 2 Feb 2005 17:57:08 +0000 Subject: [PATCH] *** empty log message *** --- src/filters/rclps | 73 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100755 src/filters/rclps diff --git a/src/filters/rclps b/src/filters/rclps new file mode 100755 index 00000000..b1578919 --- /dev/null +++ b/src/filters/rclps @@ -0,0 +1,73 @@ +#!/bin/sh +# @(#$Id: rclps,v 1.1 2005-02-02 17:57:08 dockes Exp $ (C) 2004 J.F.Dockes +# Parts taken from Estraier: +#================================================================ +# Estraier: a personal full-text search system +# Copyright (C) 2003-2004 Mikio Hirabayashi +#================================================================ +#================================================================ +# rclps +# Extract text from a postscript file by executing pstotext or ps2ascii. +# +# The default is to use pstotext which can deal with accents. +# +# OTOH, ps2ascii is much faster, comes with ghostscript, and sometimes work +# better (ie: on some openoffice output files). +# +#================================================================ + + +# set variables +LANG=C ; export LANG +LC_ALL=C ; export LC_ALL +progname="rclpdf" +decoder=pstotext +#decoder=ps2ascii + +# show help message +if test $# -ne 1 -o "$1" = "--help" +then + printf 'Convert a postscript file to unformatted HTML text.\n' + printf 'Usage: %s [infile]\n' "$progname" + exit 1 +fi + +infile="$1" + +# check the input file existence +if test ! -f "$infile" +then + printf '%s: %s: no such file\n' "$progname" "$infile" + exit 1 +fi + +# output the result +$decoder "$infile" | +awk ' +BEGIN { + printf("\n") + printf("\n") + printf("\n

"); + esc = 1 +} +{ + if ($0 ~ /-$/) { + sub(/-$/, "", $0) + printf("%s", $0); + } else if($0 == "\f") { + printf("

\n
\n

") + } else { + if(esc > 0) { + gsub(/&/, "\\&", $0) + gsub(//, "\\>", $0) + } + print $0 + } +} +END { + printf("

\n"); +}' | iconv -f iso-8859-1 -t UTF-8 -c -s + +# exit normally +exit 0