From 17542969a567834b01b33d6f798480b411f48dd3 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Mon, 23 Jan 2012 20:25:55 +0100 Subject: [PATCH] new gnumeric and okular notes filters --- src/filters/rclgnm | 193 +++++++++++++++++++++++++++++++++++ src/filters/rclokulnote | 130 +++++++++++++++++++++++ src/sampleconf/mimeconf | 8 +- src/sampleconf/mimemap | 9 +- src/sampleconf/mimeview | 1 + tests/gnumeric/gnumeric.sh | 16 +++ tests/gnumeric/gnumeric.txt | 5 + website/features.html | 35 +++++-- website/filters/filters.html | 36 ++++--- website/index.html.en | 3 +- 10 files changed, 412 insertions(+), 24 deletions(-) create mode 100755 src/filters/rclgnm create mode 100755 src/filters/rclokulnote create mode 100755 tests/gnumeric/gnumeric.sh create mode 100644 tests/gnumeric/gnumeric.txt diff --git a/src/filters/rclgnm b/src/filters/rclgnm new file mode 100755 index 00000000..92ed8184 --- /dev/null +++ b/src/filters/rclgnm @@ -0,0 +1,193 @@ +#!/bin/sh +# @(#$Id: rclsoff,v 1.12 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes +# Parts taken from Estraier: +#================================================================ +# Estraier: a personal full-text search system +# Copyright (C) 2003-2004 Mikio Hirabayashi +#================================================================ +#================================================================ +# Extract text from a gnumeric spreadsheet +#================================================================ + +# set variables +LANG=C ; export LANG +LC_ALL=C ; export LC_ALL +progname="rclgnumeric" +filetype=gnumeric + + +#RECFILTCOMMONCODE +############################################################################## +# !! Leave the previous line unmodified!! Code imported from the +# recfiltcommon file + +# Utility code common to all shell filters. This could be sourced at run +# time, but it's slightly more efficient to include the code in the +# filters at build time (with a sed script). + +# Describe error in a way that can be interpreted by our caller +senderror() +{ + echo RECFILTERROR $* + # Also alert on stderr just in case + echo ":2:$progname::: $*" 1>&2 + exit 1 +} + +iscmd() +{ + cmd=$1 + case $cmd in + */*) + if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;; + *) + oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs + for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done + return 1 ;; + esac +} + +checkcmds() +{ + for cmd in $*;do + if iscmd $cmd + then + a=1 + else + senderror HELPERNOTFOUND $cmd + fi + done +} + +# show help message +if test $# -ne 1 -o "$1" = "--help" +then + echo "Convert a $filetype file to HTML text for Recoll indexing." + echo "Usage: $progname [infile]" + exit 1 +fi + +infile="$1" + +# check the input file existence (may be '-' for stdin) +if test "X$infile" != X- -a ! -f "$infile" +then + senderror INPUTNOSUCHFILE "$infile" +fi + +# protect access to our temp files and directories +umask 77 + +############################################################################## +# !! Leave the following line unmodified ! +#ENDRECFILTCOMMONCODE + +checkcmds xsltproc gunzip + +# We need a temporary file +if test z"$RECOLL_TMPDIR" != z; then + ttdir=$RECOLL_TMPDIR +elif test z"$TMPDIR" != z ; then + ttdir=$TMPDIR +else + ttdir=/tmp +fi +tmpfile=$ttdir/rclgnm.XXXXXX + +tmpfile=`mktemp "$tmpfile"` +if [ $? -ne 0 ]; then + senderror "$0: Can't create temp file, exiting..." +fi + +cleanup() +{ + # Note that we're using a constant part (rclsofftmp), that hopefully + # guarantees that we can't do big mistakes here. + rm -f $tmpfile +} + +trap cleanup EXIT HUP QUIT INT TERM + +gunzip < $1 > $tmpfile || senderror "Cant uncompress input" +xsltproc --novalid --nonet - $tmpfile < + + + + + + + + + + + + + + + + + + + + + date + + + + + + + abstract + + + + + + + keywords + + + + + + + keywords + + + + + + <xsl:value-of select="."/> + + + + + author + + + + + + + +

+
+ + +
+
+ +
+EOF + diff --git a/src/filters/rclokulnote b/src/filters/rclokulnote new file mode 100755 index 00000000..d677ba5c --- /dev/null +++ b/src/filters/rclokulnote @@ -0,0 +1,130 @@ +#!/bin/sh +# @(#$Id: rclsoff,v 1.12 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes +# Parts taken from Estraier: +#================================================================ +# Estraier: a personal full-text search system +# Copyright (C) 2003-2004 Mikio Hirabayashi +#================================================================ +#================================================================ +# Extract text from a gnumeric spreadsheet +#================================================================ + +# set variables +LANG=C ; export LANG +LC_ALL=C ; export LC_ALL +progname="rclgnumeric" +filetype=gnumeric + + +#RECFILTCOMMONCODE +############################################################################## +# !! Leave the previous line unmodified!! Code imported from the +# recfiltcommon file + +# Utility code common to all shell filters. This could be sourced at run +# time, but it's slightly more efficient to include the code in the +# filters at build time (with a sed script). + +# Describe error in a way that can be interpreted by our caller +senderror() +{ + echo RECFILTERROR $* + # Also alert on stderr just in case + echo ":2:$progname::: $*" 1>&2 + exit 1 +} + +iscmd() +{ + cmd=$1 + case $cmd in + */*) + if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;; + *) + oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs + for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done + return 1 ;; + esac +} + +checkcmds() +{ + for cmd in $*;do + if iscmd $cmd + then + a=1 + else + senderror HELPERNOTFOUND $cmd + fi + done +} + +# show help message +if test $# -ne 1 -o "$1" = "--help" +then + echo "Convert a $filetype file to HTML text for Recoll indexing." + echo "Usage: $progname [infile]" + exit 1 +fi + +infile="$1" + +# check the input file existence (may be '-' for stdin) +if test "X$infile" != X- -a ! -f "$infile" +then + senderror INPUTNOSUCHFILE "$infile" +fi + +# protect access to our temp files and directories +umask 77 + +############################################################################## +# !! Leave the following line unmodified ! +#ENDRECFILTCOMMONCODE + +checkcmds xsltproc + +xsltproc --novalid --nonet - $infile < + + + + + + + + + + + + Okular notes about: <xsl:value-of select="/documentInfo/@url" /> + + + + + + + + + + + + + +

+ + +
+ + +

=

+ + +
+ + + +
+EOF + diff --git a/src/sampleconf/mimeconf b/src/sampleconf/mimeconf index 1412af5d..f47b2b4b 100644 --- a/src/sampleconf/mimeconf +++ b/src/sampleconf/mimeconf @@ -74,9 +74,11 @@ application/x-chm = execm rclchm application/x-dvi = exec rcldvi application/x-flac = execm rclaudio application/x-gnuinfo = execm rclinfo +application/x-gnumeric = exec rclgnm application/x-kword = exec rclkwd application/x-lyx = exec rcllyx application/x-mimehtml = internal message/rfc822 +application/x-okular-notes = exec rclokulnote application/x-perl = internal text/plain application/x-rar = execm rclrar;charset=default application/x-scribus = exec rclscribus @@ -153,8 +155,10 @@ application/x-abiword = wordprocessing application/x-dvi = document application/x-flac = sownd application/x-fsdirectory = folder +application/x-gnumeric = spreadsheet application/x-kword = wordprocessing application/x-lyx = wordprocessing +application/x-okular-notes = document application/x-scribus = document application/x-gnuinfo = document application/x-tex = wordprocessing @@ -207,6 +211,7 @@ text = \ application/x-dvi \ application/x-kword \ application/x-lyx \ + application/x-okular-notes \ application/x-perl \ application/x-scribus \ application/x-gnuinfo \ @@ -233,7 +238,8 @@ spreadsheet = \ application/vnd.openxmlformats-officedocument.spreadsheetml.sheet \ application/vnd.openxmlformats-officedocument.spreadsheetml.template \ application/vnd.sun.xml.calc \ - application/vnd.sun.xml.calc.template + application/vnd.sun.xml.calc.template \ + application/x-gnumeric presentation = application/vnd.ms-powerpoint \ application/vnd.openxmlformats-officedocument.presentationml.template \ diff --git a/src/sampleconf/mimemap b/src/sampleconf/mimemap index 04948692..f7c2d383 100644 --- a/src/sampleconf/mimemap +++ b/src/sampleconf/mimemap @@ -96,6 +96,7 @@ .scd = application/x-scribus .info = application/x-gnuinfo .kwd = application/x-kword +.gnumeric = application/x-gnumeric .wpd = application/vnd.wordperfect @@ -133,7 +134,7 @@ recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz .log.gz .md5 .map \ .o .lib .dll .a .sys .exe .com \ .dat .bak .rdf .log .db .msf .pid \ - .gnm .gnumeric \ + .gnm \ ,v ~ # # Special handling of .txt files inside ~/.gaim and ~/.purple directories @@ -185,3 +186,9 @@ recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz .log.gz .md5 .map \ .8 = text/x-man .9 = text/x-man .n = text/x-man + +# Special handling for okular notes +[~/.kde4/share/apps/okular/docdata] +.xml = application/x-okular-notes +[~/.kde/share/apps/okular/docdata] +.xml = application/x-okular-notes diff --git a/src/sampleconf/mimeview b/src/sampleconf/mimeview index cdbb348f..9bf561f3 100644 --- a/src/sampleconf/mimeview +++ b/src/sampleconf/mimeview @@ -54,6 +54,7 @@ application/vnd.wordperfect = libreoffice %f application/x-chm = kchmviewer %f application/x-fsdirectory = dolphin %f application/x-gnuinfo = xterm -e "info -f %f" +application/x-gnumeric = gnumeric %f application/x-flac = rhythmbox %f audio/mpeg = rhythmbox %f diff --git a/tests/gnumeric/gnumeric.sh b/tests/gnumeric/gnumeric.sh new file mode 100755 index 00000000..e803d00a --- /dev/null +++ b/tests/gnumeric/gnumeric.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +topdir=`dirname $0`/.. +. $topdir/shared.sh + +initvariables $0 + +( + recollq author=gnumericAuthor + recollq gnumerictext + recollq gnumericcommentaire +) 2> $mystderr | egrep -v '^Recoll query: ' > $mystdout + +diff -w ${myname}.txt $mystdout > $mydiffs 2>&1 + +checkresult diff --git a/tests/gnumeric/gnumeric.txt b/tests/gnumeric/gnumeric.txt new file mode 100644 index 00000000..9ae0c5d6 --- /dev/null +++ b/tests/gnumeric/gnumeric.txt @@ -0,0 +1,5 @@ +0 results +1 results +application/x-gnumeric [file:///home/dockes/projets/fulltext/testrecoll/gnumeric/trygnumeric.gnumeric] [The gnumericTitle] 2111 bytes +1 results +application/x-gnumeric [file:///home/dockes/projets/fulltext/testrecoll/gnumeric/trygnumeric.gnumeric] [The gnumericTitle] 2111 bytes diff --git a/website/features.html b/website/features.html index 4171e8a8..e6e36adf 100644 --- a/website/features.html +++ b/website/features.html @@ -142,6 +142,9 @@
  • OpenOffice files.
  • SVG files.
  • +
  • Gnumeric files.
  • +
  • Okular annotations files.
  • +
    Other formats
    @@ -200,16 +203,30 @@ "http://www.gnu.org/software/ghostscript/ghostscript.html"> ghostscript and pstotext. - Actually the pstotext 1.9 found at the latter link has a - problem with file names using special shell characters, and - you should either use the version packaged for your system - which is probably patched, or apply the Debian patch which - is stored here for - convenience. See - http://packages.debian.org/squeeze/pstotext and - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=356988 for - references/explanations. + convenience. See http://packages.debian.org/squeeze/pstotext + and http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=356988 + for references/explanations. +
    + To make things a bit easier, I also + store an + already patched version. I added an + install target to the Makefile... This installs to + /usr/local, use make install PREFIX=/usr to + change. So all you need is: +
    +              tar xvzf pstotext-1.9-patched.tar.gz
    +              cd pstotext-1.9-patched
    +              make
    +              make install
    +            
    +
    + +
  • RTF files with unrtf. Please diff --git a/website/filters/filters.html b/website/filters/filters.html index 7b801069..5d34d189 100644 --- a/website/filters/filters.html +++ b/website/filters/filters.html @@ -64,37 +64,49 @@ mimeconf mimeview

    - + +

    Okular annotations

    +

    rclokulnote. Okular lets you create + annotations for PDF documents and stores them in xml format + somewhere under ~/.kde. This filter does not do a nice job to + format the data, but will at least let you find it...

    + +

    Gnumeric

    +

    rclgnm. Needs xsltproc and gunzip.

    Rar archive support

    -

    rclrar. This needs the Python rarfile module. -

    +

    rclrar. This is up to date in Recoll + 1.16.2 but may be added to Recoll 1.15. It needs the Python + rarfile module.

    Mimehtml support

    This is based on the internal mail filter, you just need to - download and install the config files. Will only work with - 1.15.

    + download and install the configuration files (mimemap and + mimeconf. Will only work with 1.15 and later.

    Konqueror webarchive (.war) filter

    rclwar

    Updated zip archive filter

    The filter is corrected to handle utf-8 paths in zip archives: - rclzip

    - + rclzip. Up to date in Recoll 1.16, but + may be useful with Recoll 1.15

    Updated audio tag filter

    The mutagen-based rclaudio filter delivered with recoll 1.14.2 used a very recent mutagen interface which will only work with mutagen versions after 1.17 (probably. at least works with 1.19, doesn't with 1.15). - You can download the corrected script here. + You can download the corrected script + here. Not useful with Recoll 1.5 or 1.6.

    diff --git a/website/index.html.en b/website/index.html.en index 40d46743..67ace5b5 100644 --- a/website/index.html.en +++ b/website/index.html.en @@ -63,7 +63,8 @@ the search tips might prove useful ! Also the - Faqs and Howtos on bitbucket.org.

    + Faqs and Howtos on bitbucket.org, and some contributed + customisation/beautification tricks .

    News: