new gnumeric and okular notes filters
This commit is contained in:
parent
3c65886366
commit
17542969a5
193
src/filters/rclgnm
Executable file
193
src/filters/rclgnm
Executable file
@ -0,0 +1,193 @@
|
||||
#!/bin/sh
|
||||
# @(#$Id: rclsoff,v 1.12 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# Parts taken from Estraier:
|
||||
#================================================================
|
||||
# Estraier: a personal full-text search system
|
||||
# Copyright (C) 2003-2004 Mikio Hirabayashi
|
||||
#================================================================
|
||||
#================================================================
|
||||
# Extract text from a gnumeric spreadsheet
|
||||
#================================================================
|
||||
|
||||
# set variables
|
||||
LANG=C ; export LANG
|
||||
LC_ALL=C ; export LC_ALL
|
||||
progname="rclgnumeric"
|
||||
filetype=gnumeric
|
||||
|
||||
|
||||
#RECFILTCOMMONCODE
|
||||
##############################################################################
|
||||
# !! Leave the previous line unmodified!! Code imported from the
|
||||
# recfiltcommon file
|
||||
|
||||
# Utility code common to all shell filters. This could be sourced at run
|
||||
# time, but it's slightly more efficient to include the code in the
|
||||
# filters at build time (with a sed script).
|
||||
|
||||
# Describe error in a way that can be interpreted by our caller
|
||||
senderror()
|
||||
{
|
||||
echo RECFILTERROR $*
|
||||
# Also alert on stderr just in case
|
||||
echo ":2:$progname::: $*" 1>&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
iscmd()
|
||||
{
|
||||
cmd=$1
|
||||
case $cmd in
|
||||
*/*)
|
||||
if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;;
|
||||
*)
|
||||
oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
|
||||
for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done
|
||||
return 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
checkcmds()
|
||||
{
|
||||
for cmd in $*;do
|
||||
if iscmd $cmd
|
||||
then
|
||||
a=1
|
||||
else
|
||||
senderror HELPERNOTFOUND $cmd
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# show help message
|
||||
if test $# -ne 1 -o "$1" = "--help"
|
||||
then
|
||||
echo "Convert a $filetype file to HTML text for Recoll indexing."
|
||||
echo "Usage: $progname [infile]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
infile="$1"
|
||||
|
||||
# check the input file existence (may be '-' for stdin)
|
||||
if test "X$infile" != X- -a ! -f "$infile"
|
||||
then
|
||||
senderror INPUTNOSUCHFILE "$infile"
|
||||
fi
|
||||
|
||||
# protect access to our temp files and directories
|
||||
umask 77
|
||||
|
||||
##############################################################################
|
||||
# !! Leave the following line unmodified !
|
||||
#ENDRECFILTCOMMONCODE
|
||||
|
||||
checkcmds xsltproc gunzip
|
||||
|
||||
# We need a temporary file
|
||||
if test z"$RECOLL_TMPDIR" != z; then
|
||||
ttdir=$RECOLL_TMPDIR
|
||||
elif test z"$TMPDIR" != z ; then
|
||||
ttdir=$TMPDIR
|
||||
else
|
||||
ttdir=/tmp
|
||||
fi
|
||||
tmpfile=$ttdir/rclgnm.XXXXXX
|
||||
|
||||
tmpfile=`mktemp "$tmpfile"`
|
||||
if [ $? -ne 0 ]; then
|
||||
senderror "$0: Can't create temp file, exiting..."
|
||||
fi
|
||||
|
||||
cleanup()
|
||||
{
|
||||
# Note that we're using a constant part (rclsofftmp), that hopefully
|
||||
# guarantees that we can't do big mistakes here.
|
||||
rm -f $tmpfile
|
||||
}
|
||||
|
||||
trap cleanup EXIT HUP QUIT INT TERM
|
||||
|
||||
gunzip < $1 > $tmpfile || senderror "Cant uncompress input"
|
||||
xsltproc --novalid --nonet - $tmpfile <<EOF
|
||||
<?xml version="1.0"?>
|
||||
<xsl:stylesheet version="1.0"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
|
||||
xmlns:ooo="http://openoffice.org/2004/office"
|
||||
xmlns:gnm="http://www.gnumeric.org/v10.dtd"
|
||||
|
||||
exclude-result-prefixes="office xlink meta ooo dc"
|
||||
>
|
||||
|
||||
<xsl:output method="html" encoding="UTF-8"/>
|
||||
|
||||
<xsl:template match="/">
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
|
||||
<xsl:apply-templates select="//office:document-meta/office:meta"/>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<xsl:apply-templates select="//gnm:Cells"/>
|
||||
<xsl:apply-templates select="//gnm:Objects"/>
|
||||
</body>
|
||||
</html>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="//dc:date">
|
||||
<meta>
|
||||
<xsl:attribute name="name">date</xsl:attribute>
|
||||
<xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
|
||||
</meta>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="//dc:description">
|
||||
<meta>
|
||||
<xsl:attribute name="name">abstract</xsl:attribute>
|
||||
<xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
|
||||
</meta>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="//meta:keyword">
|
||||
<meta>
|
||||
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||
<xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
|
||||
</meta>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="//dc:subject">
|
||||
<meta>
|
||||
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||
<xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
|
||||
</meta>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="//dc:title">
|
||||
<title> <xsl:value-of select="."/> </title>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="//meta:initial-creator">
|
||||
<meta>
|
||||
<xsl:attribute name="name">author</xsl:attribute>
|
||||
<xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
|
||||
</meta>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="office:meta/*"/>
|
||||
|
||||
<xsl:template match="gnm:Cell">
|
||||
<p><xsl:value-of select="."/></p>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="gnm:CellComment">
|
||||
<blockquote><xsl:value-of select="@Text"/></blockquote>
|
||||
</xsl:template>
|
||||
|
||||
</xsl:stylesheet>
|
||||
EOF
|
||||
|
||||
130
src/filters/rclokulnote
Executable file
130
src/filters/rclokulnote
Executable file
@ -0,0 +1,130 @@
|
||||
#!/bin/sh
|
||||
# @(#$Id: rclsoff,v 1.12 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# Parts taken from Estraier:
|
||||
#================================================================
|
||||
# Estraier: a personal full-text search system
|
||||
# Copyright (C) 2003-2004 Mikio Hirabayashi
|
||||
#================================================================
|
||||
#================================================================
|
||||
# Extract text from a gnumeric spreadsheet
|
||||
#================================================================
|
||||
|
||||
# set variables
|
||||
LANG=C ; export LANG
|
||||
LC_ALL=C ; export LC_ALL
|
||||
progname="rclgnumeric"
|
||||
filetype=gnumeric
|
||||
|
||||
|
||||
#RECFILTCOMMONCODE
|
||||
##############################################################################
|
||||
# !! Leave the previous line unmodified!! Code imported from the
|
||||
# recfiltcommon file
|
||||
|
||||
# Utility code common to all shell filters. This could be sourced at run
|
||||
# time, but it's slightly more efficient to include the code in the
|
||||
# filters at build time (with a sed script).
|
||||
|
||||
# Describe error in a way that can be interpreted by our caller
|
||||
senderror()
|
||||
{
|
||||
echo RECFILTERROR $*
|
||||
# Also alert on stderr just in case
|
||||
echo ":2:$progname::: $*" 1>&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
iscmd()
|
||||
{
|
||||
cmd=$1
|
||||
case $cmd in
|
||||
*/*)
|
||||
if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;;
|
||||
*)
|
||||
oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
|
||||
for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done
|
||||
return 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
checkcmds()
|
||||
{
|
||||
for cmd in $*;do
|
||||
if iscmd $cmd
|
||||
then
|
||||
a=1
|
||||
else
|
||||
senderror HELPERNOTFOUND $cmd
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# show help message
|
||||
if test $# -ne 1 -o "$1" = "--help"
|
||||
then
|
||||
echo "Convert a $filetype file to HTML text for Recoll indexing."
|
||||
echo "Usage: $progname [infile]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
infile="$1"
|
||||
|
||||
# check the input file existence (may be '-' for stdin)
|
||||
if test "X$infile" != X- -a ! -f "$infile"
|
||||
then
|
||||
senderror INPUTNOSUCHFILE "$infile"
|
||||
fi
|
||||
|
||||
# protect access to our temp files and directories
|
||||
umask 77
|
||||
|
||||
##############################################################################
|
||||
# !! Leave the following line unmodified !
|
||||
#ENDRECFILTCOMMONCODE
|
||||
|
||||
checkcmds xsltproc
|
||||
|
||||
xsltproc --novalid --nonet - $infile <<EOF
|
||||
<?xml version="1.0"?>
|
||||
<xsl:stylesheet version="1.0"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
||||
|
||||
<xsl:output method="html" encoding="UTF-8"/>
|
||||
<xsl:strip-space elements="*" />
|
||||
|
||||
|
||||
<xsl:template match="/">
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<title>
|
||||
Okular notes about: <xsl:value-of select="/documentInfo/@url" />
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<xsl:apply-templates />
|
||||
</body>
|
||||
</html>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="node()">
|
||||
<xsl:apply-templates select="@* | node() "/>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="text()">
|
||||
<p><xsl:value-of select="."/></p>
|
||||
<xsl:text >
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="@contents|@author">
|
||||
<p><xsl:value-of select="local-name()"/>=<xsl:value-of select="." /></p>
|
||||
<xsl:text >
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="@*"/>
|
||||
|
||||
</xsl:stylesheet>
|
||||
EOF
|
||||
|
||||
@ -74,9 +74,11 @@ application/x-chm = execm rclchm
|
||||
application/x-dvi = exec rcldvi
|
||||
application/x-flac = execm rclaudio
|
||||
application/x-gnuinfo = execm rclinfo
|
||||
application/x-gnumeric = exec rclgnm
|
||||
application/x-kword = exec rclkwd
|
||||
application/x-lyx = exec rcllyx
|
||||
application/x-mimehtml = internal message/rfc822
|
||||
application/x-okular-notes = exec rclokulnote
|
||||
application/x-perl = internal text/plain
|
||||
application/x-rar = execm rclrar;charset=default
|
||||
application/x-scribus = exec rclscribus
|
||||
@ -153,8 +155,10 @@ application/x-abiword = wordprocessing
|
||||
application/x-dvi = document
|
||||
application/x-flac = sownd
|
||||
application/x-fsdirectory = folder
|
||||
application/x-gnumeric = spreadsheet
|
||||
application/x-kword = wordprocessing
|
||||
application/x-lyx = wordprocessing
|
||||
application/x-okular-notes = document
|
||||
application/x-scribus = document
|
||||
application/x-gnuinfo = document
|
||||
application/x-tex = wordprocessing
|
||||
@ -207,6 +211,7 @@ text = \
|
||||
application/x-dvi \
|
||||
application/x-kword \
|
||||
application/x-lyx \
|
||||
application/x-okular-notes \
|
||||
application/x-perl \
|
||||
application/x-scribus \
|
||||
application/x-gnuinfo \
|
||||
@ -233,7 +238,8 @@ spreadsheet = \
|
||||
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet \
|
||||
application/vnd.openxmlformats-officedocument.spreadsheetml.template \
|
||||
application/vnd.sun.xml.calc \
|
||||
application/vnd.sun.xml.calc.template
|
||||
application/vnd.sun.xml.calc.template \
|
||||
application/x-gnumeric
|
||||
|
||||
presentation = application/vnd.ms-powerpoint \
|
||||
application/vnd.openxmlformats-officedocument.presentationml.template \
|
||||
|
||||
@ -96,6 +96,7 @@
|
||||
.scd = application/x-scribus
|
||||
.info = application/x-gnuinfo
|
||||
.kwd = application/x-kword
|
||||
.gnumeric = application/x-gnumeric
|
||||
|
||||
.wpd = application/vnd.wordperfect
|
||||
|
||||
@ -133,7 +134,7 @@
|
||||
recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz .log.gz .md5 .map \
|
||||
.o .lib .dll .a .sys .exe .com \
|
||||
.dat .bak .rdf .log .db .msf .pid \
|
||||
.gnm .gnumeric \
|
||||
.gnm \
|
||||
,v ~ #
|
||||
|
||||
# Special handling of .txt files inside ~/.gaim and ~/.purple directories
|
||||
@ -185,3 +186,9 @@ recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz .log.gz .md5 .map \
|
||||
.8 = text/x-man
|
||||
.9 = text/x-man
|
||||
.n = text/x-man
|
||||
|
||||
# Special handling for okular notes
|
||||
[~/.kde4/share/apps/okular/docdata]
|
||||
.xml = application/x-okular-notes
|
||||
[~/.kde/share/apps/okular/docdata]
|
||||
.xml = application/x-okular-notes
|
||||
|
||||
@ -54,6 +54,7 @@ application/vnd.wordperfect = libreoffice %f
|
||||
application/x-chm = kchmviewer %f
|
||||
application/x-fsdirectory = dolphin %f
|
||||
application/x-gnuinfo = xterm -e "info -f %f"
|
||||
application/x-gnumeric = gnumeric %f
|
||||
|
||||
application/x-flac = rhythmbox %f
|
||||
audio/mpeg = rhythmbox %f
|
||||
|
||||
16
tests/gnumeric/gnumeric.sh
Executable file
16
tests/gnumeric/gnumeric.sh
Executable file
@ -0,0 +1,16 @@
|
||||
#!/bin/sh
|
||||
|
||||
topdir=`dirname $0`/..
|
||||
. $topdir/shared.sh
|
||||
|
||||
initvariables $0
|
||||
|
||||
(
|
||||
recollq author=gnumericAuthor
|
||||
recollq gnumerictext
|
||||
recollq gnumericcommentaire
|
||||
) 2> $mystderr | egrep -v '^Recoll query: ' > $mystdout
|
||||
|
||||
diff -w ${myname}.txt $mystdout > $mydiffs 2>&1
|
||||
|
||||
checkresult
|
||||
5
tests/gnumeric/gnumeric.txt
Normal file
5
tests/gnumeric/gnumeric.txt
Normal file
@ -0,0 +1,5 @@
|
||||
0 results
|
||||
1 results
|
||||
application/x-gnumeric [file:///home/dockes/projets/fulltext/testrecoll/gnumeric/trygnumeric.gnumeric] [The gnumericTitle] 2111 bytes
|
||||
1 results
|
||||
application/x-gnumeric [file:///home/dockes/projets/fulltext/testrecoll/gnumeric/trygnumeric.gnumeric] [The gnumericTitle] 2111 bytes
|
||||
@ -142,6 +142,9 @@
|
||||
<li><span class="literal">OpenOffice</span> files.</li>
|
||||
|
||||
<li><span class="literal">SVG</span> files.</li>
|
||||
<li><span class="literal">Gnumeric</span> files.</li>
|
||||
<li><span class="literal">Okular</span> annotations files.</li>
|
||||
|
||||
</ul>
|
||||
|
||||
<h5>Other formats</h5>
|
||||
@ -200,16 +203,30 @@
|
||||
"http://www.gnu.org/software/ghostscript/ghostscript.html">
|
||||
ghostscript</a> and <a href=
|
||||
"http://www.cs.wisc.edu/~ghost/doc/pstotext.htm">pstotext</a>.
|
||||
Actually the pstotext 1.9 found at the latter link has a
|
||||
problem with file names using special shell characters, and
|
||||
you should either use the version packaged for your system
|
||||
which is probably patched, or apply the Debian patch which
|
||||
is stored <a href=
|
||||
Pstotext 1.9 has a serious issue with special characters in
|
||||
file names, and you should either use the version packaged for
|
||||
your system which is probably patched, or apply the Debian
|
||||
patch which is stored <a href=
|
||||
"files/pstotext-1.9_4-debian.patch">here</a> for
|
||||
convenience. See
|
||||
http://packages.debian.org/squeeze/pstotext and
|
||||
http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=356988 for
|
||||
references/explanations.</li>
|
||||
convenience. See http://packages.debian.org/squeeze/pstotext
|
||||
and http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=356988
|
||||
for references/explanations.
|
||||
<blockquote>
|
||||
To make things a bit easier, I also
|
||||
store <a href="files/pstotext-1.9-patched.tar.gz">an
|
||||
already patched version</a>. I added an
|
||||
install target to the Makefile... This installs to
|
||||
/usr/local, use <i>make install PREFIX=/usr</i> to
|
||||
change. So all you need is:
|
||||
<pre>
|
||||
tar xvzf pstotext-1.9-patched.tar.gz
|
||||
cd pstotext-1.9-patched
|
||||
make
|
||||
make install
|
||||
</pre>
|
||||
</blockquote>
|
||||
</li>
|
||||
|
||||
|
||||
<li><span class="literal">RTF</span> files with <a href=
|
||||
"http://www.gnu.org/software/unrtf/unrtf.html">unrtf</a>. Please
|
||||
|
||||
@ -64,37 +64,49 @@
|
||||
<a href="mimeconf">mimeconf</a>
|
||||
<a href="mimeview">mimeview</a> </p>
|
||||
|
||||
<!--
|
||||
<p>Notes:</p>
|
||||
<blockquote>
|
||||
<p>All filters are up to date in Recoll 1.1.04, except rclics.</p>
|
||||
<p>If you are using an older version, you should update to 1.13.04.</p>
|
||||
<p>All filters are up to date in Recoll 1.16.2, except
|
||||
rclchm, and the new ones for gnumeric and Okular annotations.</p>
|
||||
<p>Recoll 1.15 may benefit from some of the newer
|
||||
filters linked below.</p>
|
||||
<p>If you are running an older recoll version, you really
|
||||
should upgrade.</p>
|
||||
</blockquote>
|
||||
-->
|
||||
|
||||
<h2>Okular annotations</h2>
|
||||
<p><a href="rclokulnote">rclokulnote</a>. Okular lets you create
|
||||
annotations for PDF documents and stores them in xml format
|
||||
somewhere under ~/.kde. This filter does not do a nice job to
|
||||
format the data, but will at least let you find it...</p>
|
||||
|
||||
<h2>Gnumeric</h2>
|
||||
<p><a href="rclgnm">rclgnm</a>. Needs xsltproc and gunzip.</p>
|
||||
|
||||
<h2>Rar archive support</h2>
|
||||
<p><a href="rclrar">rclrar</a>. This needs the Python rarfile module.
|
||||
</p>
|
||||
<p><a href="rclrar">rclrar</a>. This is up to date in Recoll
|
||||
1.16.2 but may be added to Recoll 1.15. It needs the Python
|
||||
rarfile module. </p>
|
||||
|
||||
<h2>Mimehtml support</h2>
|
||||
<p>This is based on the internal mail filter, you just need to
|
||||
download and install the config files. Will only work with
|
||||
1.15.</p>
|
||||
download and install the configuration files (mimemap and
|
||||
mimeconf. Will only work with 1.15 and later.</p>
|
||||
|
||||
<h2>Konqueror webarchive (.war) filter</h2>
|
||||
<p><a href="rclwar">rclwar</a></p>
|
||||
|
||||
<h2>Updated zip archive filter</h2>
|
||||
<p>The filter is corrected to handle utf-8 paths in zip archives:
|
||||
<a href="rclzip">rclzip</a></p>
|
||||
|
||||
<a href="rclzip">rclzip</a>. Up to date in Recoll 1.16, but
|
||||
may be useful with Recoll 1.15</p>
|
||||
|
||||
<h2>Updated audio tag filter</h2>
|
||||
<p>The mutagen-based rclaudio filter delivered with recoll 1.14.2
|
||||
used a very recent mutagen interface which will only work with
|
||||
mutagen versions after 1.17 (probably. at least works with 1.19,
|
||||
doesn't with 1.15).
|
||||
You can download the <a href="rclaudio">corrected script here</a>.
|
||||
You can download the <a href="rclaudio">corrected script
|
||||
here. Not useful with Recoll 1.5 or 1.6</a>.
|
||||
</p>
|
||||
|
||||
</div>
|
||||
|
||||
@ -63,7 +63,8 @@
|
||||
the <a href="usermanual/rcl.search.tips.html">search
|
||||
tips</a> might prove useful ! Also the
|
||||
<a href="http://bitbucket.org/medoc/recoll/wiki/FaqsAndHowTos">
|
||||
Faqs and Howtos</a> on bitbucket.org.</p>
|
||||
Faqs and Howtos</a> on bitbucket.org, and some contributed
|
||||
customisation/beautification tricks .</p>
|
||||
|
||||
|
||||
<h2>News: </h2>
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user