powerpoint: decide to use unoconv based on the number of lines in catppt output
This commit is contained in:
parent
a9358d2f03
commit
134153e412
@ -30,8 +30,6 @@ LC_ALL=C ; export LC_ALL
|
|||||||
progname="rclppt"
|
progname="rclppt"
|
||||||
filetype=powerpoint
|
filetype=powerpoint
|
||||||
|
|
||||||
RCLPPT_CATPPT=${RCLPPT_CATPPT:=yes}
|
|
||||||
|
|
||||||
#RECFILTCOMMONCODE
|
#RECFILTCOMMONCODE
|
||||||
##############################################################################
|
##############################################################################
|
||||||
# !! Leave the previous line unmodified!! Code imported from the
|
# !! Leave the previous line unmodified!! Code imported from the
|
||||||
@ -98,54 +96,68 @@ umask 77
|
|||||||
# !! Leave the following line unmodified !
|
# !! Leave the following line unmodified !
|
||||||
#ENDRECFILTCOMMONCODE
|
#ENDRECFILTCOMMONCODE
|
||||||
|
|
||||||
if test X$RCLPPT_CATPPT = Xyes ; then
|
havecappt=no
|
||||||
checkcmds catppt
|
iscmd cappt && havecappt=yes
|
||||||
|
haveunoconv=no
|
||||||
|
iscmd unoconv && haveunoconv=yes
|
||||||
|
iscmd pdftotext || haveunoconv=no
|
||||||
|
|
||||||
# output the result
|
if test X$havecatppt = Xno -a X$haveunoconv = Xno ; then
|
||||||
echo '<html><head>'
|
# checkcmds will exit with the appropriate salutations
|
||||||
#echo '<title>' "$title" '</title>'
|
checkcmds catppt unoconv pdftotext
|
||||||
echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
|
fi
|
||||||
echo '</head><body>'
|
|
||||||
echo '<pre>'
|
# This needs a temp dir because we first output pdf (outputting html
|
||||||
|
# would produce one file per page), and pdftotext can't read from
|
||||||
catppt -d utf-8 "$infile" | \
|
# stdin
|
||||||
sed -e 's/</</g' -e 's/&/&/g'
|
if test z"$RECOLL_TMPDIR" != z; then
|
||||||
|
ttdir=$RECOLL_TMPDIR
|
||||||
echo '</pre>'
|
elif test z"$TMPDIR" != z ; then
|
||||||
echo '</body></html>'
|
ttdir=$TMPDIR
|
||||||
|
else
|
||||||
# exit normally
|
ttdir=/tmp
|
||||||
exit 0
|
fi
|
||||||
|
|
||||||
else
|
tmpdir=$ttdir/rclppt_tmp$$
|
||||||
|
mkdir $tmpdir || exit 1
|
||||||
# Using unoconv
|
mkdir $tmpdir/rclppttmp || exit 1
|
||||||
checkcmds unoconv pdftotext
|
unopdf=$tmpdir/rclppttmp/output.pdf
|
||||||
|
cattxt=$tmpdir/rclppttmp/output.txt
|
||||||
# This needs a temp dir because we first output pdf (outputting html
|
cleanup()
|
||||||
# would produce one file per page), and pdftotext can't read from
|
{
|
||||||
# stdin
|
# Note that we're using a constant part (rclkwdtmp), that hopefully
|
||||||
if test z"$RECOLL_TMPDIR" != z; then
|
# guarantees that we can't do big mistakes here.
|
||||||
ttdir=$RECOLL_TMPDIR
|
rm -rf $tmpdir/rclppttmp
|
||||||
elif test z"$TMPDIR" != z ; then
|
rmdir $tmpdir
|
||||||
ttdir=$TMPDIR
|
}
|
||||||
else
|
trap cleanup EXIT HUP QUIT INT TERM
|
||||||
ttdir=/tmp
|
|
||||||
fi
|
# Try catppt. If the output looks too small and unoconv is available, use this
|
||||||
|
# instead. unoconv is very slow but it handles newer files that catppt will
|
||||||
tmpdir=$ttdir/rclppt_tmp$$
|
# not convert.
|
||||||
mkdir $tmpdir || exit 1
|
#
|
||||||
mkdir $tmpdir/rclppttmp || exit 1
|
# I'm not sure of the right test for detecting catppt failure. On the
|
||||||
unopdf=$tmpdir/rclppttmp/output.pdf
|
# sample I have, it outputs Azure\n1_Azure\n\n. I don't know if Azure
|
||||||
cleanup()
|
# is a good marker of failure. Anyway, it seems unlikely that a real
|
||||||
{
|
# ppt would have fewer than 5 lines
|
||||||
# Note that we're using a constant part (rclkwdtmp), that hopefully
|
|
||||||
# guarantees that we can't do big mistakes here.
|
catppt -d utf-8 "$infile" > $cattxt
|
||||||
rm -rf $tmpdir/rclppttmp
|
lines=`wc -l < $cattxt`
|
||||||
rmdir $tmpdir
|
|
||||||
}
|
if test $lines -lt 5 -a X$haveunoconv = Xyes; then
|
||||||
|
unoconv -f pdf -o $unopdf "$infile"
|
||||||
trap cleanup EXIT HUP QUIT INT TERM
|
`dirname $0`/rclpdf $unopdf
|
||||||
unoconv -f pdf -o $unopdf "$infile"
|
else
|
||||||
`dirname $0`/rclpdf $unopdf
|
# output the catppt result
|
||||||
|
echo '<html><head>'
|
||||||
|
#echo '<title>' "$title" '</title>'
|
||||||
|
echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
|
||||||
|
echo '</head><body>'
|
||||||
|
echo '<pre>'
|
||||||
|
|
||||||
|
catppt -d utf-8 "$infile" | \
|
||||||
|
sed -e 's/</</g' -e 's/&/&/g' < $cattxt
|
||||||
|
|
||||||
|
echo '</pre>'
|
||||||
|
echo '</body></html>'
|
||||||
fi
|
fi
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user