From 47f76c6c42ffcd2ab828d911d5efee4afe6d56f0 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Mon, 8 Feb 2010 11:25:08 +0100 Subject: [PATCH] Add support for indexing fictionbook documents --- src/filters/rclfb2 | 140 ++++++++++++++++++++++++++++++++++++++++ src/sampleconf/mimeconf | 3 + src/sampleconf/mimemap | 2 + 3 files changed, 145 insertions(+) create mode 100755 src/filters/rclfb2 diff --git a/src/filters/rclfb2 b/src/filters/rclfb2 new file mode 100755 index 00000000..d6dcdea2 --- /dev/null +++ b/src/filters/rclfb2 @@ -0,0 +1,140 @@ +#!/bin/sh +# @(#$Id: rclopxml,v 1.3 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes +#================================================================ +# rclfb2 +# Extract text from an fb2 ebook (xml) +#================================================================ + +# set variables +LANG=C ; export LANG +LC_ALL=C ; export LC_ALL +progname=rclfb2 +filetype=fb2 + + +#RECFILTCOMMONCODE +############################################################################## +# !! Leave the previous line unmodified!! Code imported from the +# recfiltcommon file + +# Utility code common to all shell filters. This could be sourced at run +# time, but it's slightly more efficient to include the code in the +# filters at build time (with a sed script). + +# Describe error in a way that can be interpreted by our caller +senderror() +{ + echo RECFILTERROR $* + # Also alert on stderr just in case + echo ":2:$progname::: $*" 1>&2 + exit 1 +} + +iscmd() +{ + cmd=$1 + case $cmd in + */*) + if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;; + *) + oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs + for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done + return 1 ;; + esac +} + +checkcmds() +{ + for cmd in $*;do + if iscmd $cmd + then + a=1 + else + senderror HELPERNOTFOUND $cmd + fi + done +} + +# show help message +if test $# -ne 1 -o "$1" = "--help" +then + echo "Convert a $filetype file to HTML text for Recoll indexing." + echo "Usage: $progname [infile]" + exit 1 +fi + +infile="$1" + +# check the input file existence (may be '-' for stdin) +if test "X$infile" != X- -a ! -f "$infile" +then + senderror INPUTNOSUCHFILE "$infile" +fi + +# protect access to our temp files and directories +umask 77 + +############################################################################## +# !! Leave the following line unmodified ! +#ENDRECFILTCOMMONCODE + +checkcmds xsltproc + +xsltproc - $infile < + + + + + + + + + + + + + + + + + + + + + + + + + <xsl:value-of select="."/> + + + + + author + + + + + + + + + + + + + + + + +

+
+
+ +
+EOF diff --git a/src/sampleconf/mimeconf b/src/sampleconf/mimeconf index 8618472b..9e7af312 100644 --- a/src/sampleconf/mimeconf +++ b/src/sampleconf/mimeconf @@ -89,6 +89,7 @@ text/html = internal text/plain = internal text/rtf = exec unrtf --nopict --html;charset=iso-8859-1;mimetype=text/html text/x-c = internal +text/x-fictionbook = exec rclfb2 text/x-gaim-log = exec rclgaim text/x-html-sidux-man = exec rclsiduxman text/x-mail = internal @@ -151,6 +152,7 @@ text/html = html text/plain = txt text/x-c = source text/x-c++ = source +text/x-fictionbook = document text/x-html-sidux-man = sidux-book text/x-mail = message text/x-man = document @@ -187,6 +189,7 @@ text = \ text/rtf \ text/x-c \ text/x-c++ \ + text/x-fictionbook \ text/x-html-sidux-man \ text/x-man \ text/x-python \ diff --git a/src/sampleconf/mimemap b/src/sampleconf/mimemap index de4c3769..0a1b1234 100644 --- a/src/sampleconf/mimemap +++ b/src/sampleconf/mimemap @@ -107,6 +107,8 @@ .tiff = image/tiff .tif = image/tiff +.fb2 = text/x-fictionbook + # A list of suffixes (name endings) that we don't want to touch at all. # Having these explicitely listed speeds things up a bit by avoiding # unneeded decompression or 'file' calls. File names still get indexed if