diff --git a/src/filters/rclppt b/src/filters/rclppt index fb0fa4da..f933b406 100755 --- a/src/filters/rclppt +++ b/src/filters/rclppt @@ -30,8 +30,6 @@ LC_ALL=C ; export LC_ALL progname="rclppt" filetype=powerpoint -RCLPPT_CATPPT=${RCLPPT_CATPPT:=yes} - #RECFILTCOMMONCODE ############################################################################## # !! Leave the previous line unmodified!! Code imported from the @@ -98,54 +96,68 @@ umask 77 # !! Leave the following line unmodified ! #ENDRECFILTCOMMONCODE -if test X$RCLPPT_CATPPT = Xyes ; then - checkcmds catppt +havecappt=no +iscmd cappt && havecappt=yes +haveunoconv=no +iscmd unoconv && haveunoconv=yes +iscmd pdftotext || haveunoconv=no - # output the result - echo '' - #echo '' "$title" '' - echo '' - echo '' - echo '
'
-
-  catppt -d utf-8 "$infile" | \
-      sed -e 's/'
-  echo ''
-
-  # exit normally
-  exit 0
-
-else
-
-  # Using unoconv
-  checkcmds unoconv pdftotext
-
-  # This needs a temp dir because we first output pdf (outputting html
-  # would produce one file per page), and pdftotext can't read from
-  # stdin
-  if test z"$RECOLL_TMPDIR" != z; then
-      ttdir=$RECOLL_TMPDIR
-  elif test z"$TMPDIR" != z ; then
-      ttdir=$TMPDIR
-  else
-      ttdir=/tmp
-  fi
-
-  tmpdir=$ttdir/rclppt_tmp$$
-  mkdir $tmpdir || exit 1
-  mkdir $tmpdir/rclppttmp || exit 1
-  unopdf=$tmpdir/rclppttmp/output.pdf
-  cleanup()
-  {
-      # Note that we're using a constant part (rclkwdtmp), that hopefully
-      # guarantees that we can't do big mistakes here.
-      rm -rf $tmpdir/rclppttmp
-      rmdir $tmpdir
-  }
-    
-  trap cleanup EXIT HUP QUIT INT TERM
-  unoconv -f pdf -o $unopdf "$infile"
-  `dirname $0`/rclpdf $unopdf
+if test X$havecatppt = Xno -a X$haveunoconv = Xno ; then
+    # checkcmds will exit with the appropriate salutations
+    checkcmds catppt unoconv pdftotext
+fi
+
+# This needs a temp dir because we first output pdf (outputting html
+# would produce one file per page), and pdftotext can't read from
+# stdin
+if test z"$RECOLL_TMPDIR" != z; then
+    ttdir=$RECOLL_TMPDIR
+elif test z"$TMPDIR" != z ; then
+    ttdir=$TMPDIR
+else
+    ttdir=/tmp
+fi
+
+tmpdir=$ttdir/rclppt_tmp$$
+mkdir $tmpdir || exit 1
+mkdir $tmpdir/rclppttmp || exit 1
+unopdf=$tmpdir/rclppttmp/output.pdf
+cattxt=$tmpdir/rclppttmp/output.txt
+cleanup()
+{
+    # Note that we're using a constant part (rclkwdtmp), that hopefully
+    # guarantees that we can't do big mistakes here.
+    rm -rf $tmpdir/rclppttmp
+    rmdir $tmpdir
+}
+trap cleanup EXIT HUP QUIT INT TERM
+
+# Try catppt. If the output looks too small and unoconv is available, use this 
+# instead. unoconv is very slow but it handles newer files that catppt will 
+# not convert. 
+#
+# I'm not sure of the right test for detecting catppt failure. On the
+# sample I have, it outputs Azure\n1_Azure\n\n. I don't know if Azure
+# is a good marker of failure. Anyway, it seems unlikely that a real
+# ppt would have fewer than 5 lines
+
+catppt -d utf-8 "$infile" > $cattxt
+lines=`wc -l < $cattxt`
+
+if test $lines -lt 5 -a X$haveunoconv = Xyes; then
+    unoconv -f pdf -o $unopdf "$infile"
+    `dirname $0`/rclpdf $unopdf
+else
+    # output the catppt result
+    echo ''
+    #echo '' "$title" ''
+    echo ''
+    echo ''
+    echo '
'
+
+    catppt -d utf-8 "$infile" | \
+        sed -e 's/'
+    echo ''
 fi