diff --git a/src/Makefile.am b/src/Makefile.am
index 407a1dbd..8fe5de6a 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -559,6 +559,7 @@ rclpychm:
rclpychm-install:
(cd python/pychm || exit 1; \
set -x; \
+ mkdir -p ${librcldir}; \
mv dist/pychm*.egg ${librcldir};\
)
rclpychm-clean:
@@ -636,9 +637,10 @@ filters/rclepub \
filters/rclepub1 \
filters/rclexec1.py \
filters/rclexecm.py \
-filters/rclfb2 \
+filters/rclfb2.py \
filters/rclgaim \
-filters/rclgnm \
+filters/rclgenxslt.py \
+filters/rclgnm.py \
filters/rclics \
filters/rclimg \
filters/rclimg.py \
@@ -651,7 +653,7 @@ filters/rcllyx \
filters/rclman \
filters/rclmidi.py \
filters/rclpdf.py \
-filters/rclokulnote \
+filters/rclokulnote.py \
filters/rclopxml.py \
filters/rclppt.py \
filters/rclpurple \
@@ -660,7 +662,6 @@ filters/rclrar \
filters/rclrtf.py \
filters/rclscribus \
filters/rclshowinfo \
-filters/rclsiduxman \
filters/rclsoff.py \
filters/rclsoff-flat.py \
filters/rclsvg.py \
diff --git a/src/desktop/hotrecoll.py b/src/desktop/hotrecoll.py
index 28cb7c35..9eb5e0d9 100755
--- a/src/desktop/hotrecoll.py
+++ b/src/desktop/hotrecoll.py
@@ -1,4 +1,7 @@
-#!/usr/bin/python
+#!/usr/bin/python2
+#
+# wnck does not have a python3 binding as far as I can see (or at
+# least it's not packaged by, e.g. Debian. So python2 only for now.
#
# This script should be linked to a keyboard shortcut. Under gnome,
# you can do this from the main preferences menu, or directly execute
diff --git a/src/filters/rclabw.py b/src/filters/rclabw.py
index ca1d77df..9d9e5201 100755
--- a/src/filters/rclabw.py
+++ b/src/filters/rclabw.py
@@ -20,7 +20,7 @@ from __future__ import print_function
import sys
import rclexecm
-import rclxslt
+import rclgenxslt
stylesheet_all = '''
'''
-
-class ABWExtractor:
- def __init__(self, em):
- self.em = em
- self.currentindex = 0
-
- def extractone(self, params):
- if "filename:" not in params:
- self.em.rclog("extractone: no mime or file name")
- return (False, "", "", rclexecm.RclExecM.eofnow)
- fn = params["filename:"]
- try:
- data = open(fn, 'rb').read()
- docdata = rclxslt.apply_sheet_data(stylesheet_all, data)
- except Exception as err:
- self.em.rclog("%s: bad data: %s" % (fn, err))
- return (False, "", "", rclexecm.RclExecM.eofnow)
-
- return (True, docdata, "", rclexecm.RclExecM.eofnext)
-
- ###### File type handler api, used by rclexecm ---------->
- def openfile(self, params):
- self.currentindex = 0
- return True
-
- def getipath(self, params):
- return self.extractone(params)
-
- def getnext(self, params):
- if self.currentindex >= 1:
- return (False, "", "", rclexecm.RclExecM.eofnow)
- else:
- ret= self.extractone(params)
- self.currentindex += 1
- return ret
-
-
if __name__ == '__main__':
proto = rclexecm.RclExecM()
- extract = ABWExtractor(proto)
+ extract = rclgenxslt.XSLTExtractor(proto, stylesheet_all)
rclexecm.main(proto, extract)
diff --git a/src/filters/rcldvi b/src/filters/rcldvi
index 8d2b41f1..98a51bef 100755
--- a/src/filters/rcldvi
+++ b/src/filters/rcldvi
@@ -17,11 +17,11 @@
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
-# Extract text from a dvi file by either executing dvitops and rclps
-# or using catdvi. dvitops has given better results during tests, and is
-# chosen first if available, but the dvitops/rclps combination is much
-# slower than catdvi
-# set variables
+# Extract text from a dvi file by either executing dvitops and
+# pstotext or using catdvi. dvitops has given better results during
+# tests, and is chosen first if available, but the dvitops/pstotext
+# combination is much slower than catdvi set variables. In any case,
+# the program is not too good with special characters (e.g. ligatures)
LANG=C ; export LANG
LC_ALL=C ; export LC_ALL
progname="rcldvi"
@@ -94,26 +94,25 @@ umask 77
# !! Leave the following line unmodified !
#ENDRECFILTCOMMONCODE
-# Find rclps. Note: this only works because we are always executed with a
-# full path
-rclps=`dirname $0`/rclps
-
+decoderdvips()
+{
+ dvips -f $1 2> /dev/null | pstotext | iconv -f cp1252 -t utf-8 -c -s
+}
+decodercatdvi()
+{
+ catdvi $1
+}
decoder=""
if iscmd dvips -a iscmd pstotext ; then
- decoder=dvips
+ decoder=decoderdvips
elif iscmd catdvi ; then
- decoder=catdvi
+ decoder=decodercatdvi
fi
if test X$decoder = X ; then
senderror HELPERNOTFOUND dvips or catdvi
fi
-if test X$decoder = Xdvips ; then
- $decoder -f < "$infile" 2> /dev/null | $rclps -
- exit $?
-fi
-
# The strange 'BEGIN' setup is to prevent 'file' from thinking this file
# is an awk program
$decoder "$infile" |
diff --git a/src/filters/rclfb2 b/src/filters/rclfb2
deleted file mode 100755
index 0d5ac772..00000000
--- a/src/filters/rclfb2
+++ /dev/null
@@ -1,139 +0,0 @@
-#!/bin/sh
-# @(#$Id: rclopxml,v 1.3 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
-#================================================================
-# Extract text from an fb2 ebook (xml)
-#================================================================
-
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname=rclfb2
-filetype=fb2
-
-
-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
-
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
-
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
- echo RECFILTERROR $*
- # Also alert on stderr just in case
- echo ":2:$progname::: $*" 1>&2
- exit 1
-}
-
-iscmd()
-{
- cmd=$1
- case $cmd in
- */*)
- if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;;
- *)
- oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
- for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done
- return 1 ;;
- esac
-}
-
-checkcmds()
-{
- for cmd in $*;do
- if iscmd $cmd
- then
- a=1
- else
- senderror HELPERNOTFOUND $cmd
- fi
- done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help"
-then
- echo "Convert a $filetype file to HTML text for Recoll indexing."
- echo "Usage: $progname [infile]"
- exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
- senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-checkcmds xsltproc
-
-xsltproc --nonet --novalid - "$infile" <
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- author
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-EOF
diff --git a/src/filters/rclfb2.py b/src/filters/rclfb2.py
new file mode 100755
index 00000000..ad36f7e4
--- /dev/null
+++ b/src/filters/rclfb2.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+# Copyright (C) 2014 J.F.Dockes
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+######################################
+
+from __future__ import print_function
+
+import sys
+import rclexecm
+import rclxslt
+import rclgenxslt
+
+stylesheet_all = '''
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ author
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+'''
+
+if __name__ == '__main__':
+ proto = rclexecm.RclExecM()
+ extract = rclgenxslt.XSLTExtractor(proto, stylesheet_all)
+ rclexecm.main(proto, extract)
diff --git a/src/filters/rclgenxslt.py b/src/filters/rclgenxslt.py
new file mode 100755
index 00000000..2135a443
--- /dev/null
+++ b/src/filters/rclgenxslt.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+# Copyright (C) 2018 J.F.Dockes
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+######################################
+from __future__ import print_function
+
+import sys
+import rclexecm
+import rclxslt
+import gzip
+
+class XSLTExtractor:
+ def __init__(self, em, stylesheet, gzip=False):
+ self.em = em
+ self.currentindex = 0
+ self.stylesheet = stylesheet
+ self.dogz = gzip
+
+
+ def extractone(self, params):
+ if "filename:" not in params:
+ self.em.rclog("extractone: no mime or file name")
+ return (False, "", "", rclexecm.RclExecM.eofnow)
+ fn = params["filename:"]
+ try:
+ if self.dogz:
+ data = gzip.open(fn, 'rb').read()
+ else:
+ data = open(fn, 'rb').read()
+ docdata = rclxslt.apply_sheet_data(self.stylesheet, data)
+ except Exception as err:
+ self.em.rclog("%s: bad data: %s" % (fn, err))
+ return (False, "", "", rclexecm.RclExecM.eofnow)
+
+ return (True, docdata, "", rclexecm.RclExecM.eofnext)
+
+
+ ###### File type handler api, used by rclexecm ---------->
+ def openfile(self, params):
+ self.currentindex = 0
+ return True
+
+ def getipath(self, params):
+ return self.extractone(params)
+
+ def getnext(self, params):
+ if self.currentindex >= 1:
+ return (False, "", "", rclexecm.RclExecM.eofnow)
+ else:
+ ret= self.extractone(params)
+ self.currentindex += 1
+ return ret
diff --git a/src/filters/rclgnm b/src/filters/rclgnm
deleted file mode 100755
index 0d30327b..00000000
--- a/src/filters/rclgnm
+++ /dev/null
@@ -1,191 +0,0 @@
-#!/bin/sh
-# @(#$Id: rclsoff,v 1.12 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
-# Parts taken from Estraier:
-#================================================================
-# Estraier: a personal full-text search system
-# Copyright (C) 2003-2004 Mikio Hirabayashi
-#================================================================
-#================================================================
-# Extract text from a gnumeric spreadsheet
-#================================================================
-
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname="rclgnumeric"
-filetype=gnumeric
-
-
-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
-
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
-
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
- echo RECFILTERROR $*
- # Also alert on stderr just in case
- echo ":2:$progname::: $*" 1>&2
- exit 1
-}
-
-iscmd()
-{
- cmd=$1
- case $cmd in
- */*)
- if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;;
- *)
- oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
- for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done
- return 1 ;;
- esac
-}
-
-checkcmds()
-{
- for cmd in $*;do
- if iscmd $cmd
- then
- a=1
- else
- senderror HELPERNOTFOUND $cmd
- fi
- done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help"
-then
- echo "Convert a $filetype file to HTML text for Recoll indexing."
- echo "Usage: $progname [infile]"
- exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
- senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-checkcmds xsltproc gunzip
-
-# We need a temporary file
-if test z"$RECOLL_TMPDIR" != z; then
- ttdir=$RECOLL_TMPDIR
-elif test z"$TMPDIR" != z ; then
- ttdir=$TMPDIR
-else
- ttdir=/tmp
-fi
-tmpfile=$ttdir/rclgnm.XXXXXX
-
-tmpfile=`mktemp "$tmpfile"`
-if [ $? -ne 0 ]; then
- senderror "$0: Can't create temp file, exiting..."
-fi
-
-cleanup()
-{
- rm -f $tmpfile
-}
-
-trap cleanup EXIT HUP QUIT INT TERM
-
-gunzip < $1 > $tmpfile || senderror "Cant uncompress input"
-xsltproc --novalid --nonet - $tmpfile <
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- date
-
-
-
-
-
-
- abstract
-
-
-
-
-
-
- keywords
-
-
-
-
-
-
- keywords
-
-
-
-
-
-
-
-
-
-
- author
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-EOF
-
diff --git a/src/filters/rclgnm.py b/src/filters/rclgnm.py
new file mode 100755
index 00000000..67ff4f41
--- /dev/null
+++ b/src/filters/rclgnm.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+# Copyright (C) 2014 J.F.Dockes
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+######################################
+
+from __future__ import print_function
+
+import sys
+import rclexecm
+import rclgenxslt
+
+
+stylesheet_all = '''
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ date
+
+
+
+
+
+
+ abstract
+
+
+
+
+
+
+ keywords
+
+
+
+
+
+
+ keywords
+
+
+
+
+
+
+
+
+
+
+ author
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+'''
+
+
+if __name__ == '__main__':
+ proto = rclexecm.RclExecM()
+ extract = rclgenxslt.XSLTExtractor(proto, stylesheet_all, gzip=True)
+ rclexecm.main(proto, extract)
+
diff --git a/src/filters/rclokulnote b/src/filters/rclokulnote
deleted file mode 100755
index 70f6432e..00000000
--- a/src/filters/rclokulnote
+++ /dev/null
@@ -1,130 +0,0 @@
-#!/bin/sh
-# @(#$Id: rclsoff,v 1.12 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
-# Parts taken from Estraier:
-#================================================================
-# Estraier: a personal full-text search system
-# Copyright (C) 2003-2004 Mikio Hirabayashi
-#================================================================
-#================================================================
-# Extract text from a gnumeric spreadsheet
-#================================================================
-
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname="rclgnumeric"
-filetype=gnumeric
-
-
-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
-
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
-
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
- echo RECFILTERROR $*
- # Also alert on stderr just in case
- echo ":2:$progname::: $*" 1>&2
- exit 1
-}
-
-iscmd()
-{
- cmd=$1
- case $cmd in
- */*)
- if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;;
- *)
- oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
- for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done
- return 1 ;;
- esac
-}
-
-checkcmds()
-{
- for cmd in $*;do
- if iscmd $cmd
- then
- a=1
- else
- senderror HELPERNOTFOUND $cmd
- fi
- done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help"
-then
- echo "Convert a $filetype file to HTML text for Recoll indexing."
- echo "Usage: $progname [infile]"
- exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
- senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-checkcmds xsltproc
-
-xsltproc --novalid --nonet - "$infile" <
-
-
-
-
-
-
-
-
-
-
-
- Okular notes about:
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-EOF
-
diff --git a/src/filters/rclokulnote.py b/src/filters/rclokulnote.py
new file mode 100755
index 00000000..cf6c6c4e
--- /dev/null
+++ b/src/filters/rclokulnote.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+# Copyright (C) 2014 J.F.Dockes
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+######################################
+from __future__ import print_function
+
+import sys
+import rclexecm
+import rclgenxslt
+
+stylesheet_all = '''
+
+
+
+
+
+
+
+
+
+
+ Okular notes about:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+'''
+
+if __name__ == '__main__':
+ proto = rclexecm.RclExecM()
+ extract = rclgenxslt.XSLTExtractor(proto, stylesheet_all)
+ rclexecm.main(proto, extract)
+
diff --git a/src/filters/rclsiduxman b/src/filters/rclsiduxman
deleted file mode 100755
index b9bbe723..00000000
--- a/src/filters/rclsiduxman
+++ /dev/null
@@ -1,92 +0,0 @@
-#!/bin/sh
-# @(#$Id: rclsiduxman,v 1.1 2008-06-09 09:12:05 dockes Exp $ (C) 2004 J.F.Dockes
-# Parts taken from Estraier:
-#================================================================
-# Estraier: a personal full-text search system
-# Copyright (C) 2003-2004 Mikio Hirabayashi
-#================================================================
-#================================================================
-# Strip the menu part from sidux manual pages to improve search precision
-#================================================================
-
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname="rclsiduxman"
-filetype="sidux manual htm"
-
-
-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
-
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
-
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
- echo RECFILTERROR $*
- # Also alert on stderr just in case
- echo ":2:$progname::: $*" 1>&2
- exit 1
-}
-
-iscmd()
-{
- cmd=$1
- case $cmd in
- */*)
- if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;;
- *)
- oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
- for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done
- return 1 ;;
- esac
-}
-
-checkcmds()
-{
- for cmd in $*;do
- if iscmd $cmd
- then
- a=1
- else
- senderror HELPERNOTFOUND $cmd
- fi
- done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help"
-then
- echo "Convert a $filetype file to HTML text for Recoll indexing."
- echo "Usage: $progname [infile]"
- exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
- senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-checkcmds sed
-# Delete everything from