diff --git a/src/Makefile.am b/src/Makefile.am index 407a1dbd..8fe5de6a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -559,6 +559,7 @@ rclpychm: rclpychm-install: (cd python/pychm || exit 1; \ set -x; \ + mkdir -p ${librcldir}; \ mv dist/pychm*.egg ${librcldir};\ ) rclpychm-clean: @@ -636,9 +637,10 @@ filters/rclepub \ filters/rclepub1 \ filters/rclexec1.py \ filters/rclexecm.py \ -filters/rclfb2 \ +filters/rclfb2.py \ filters/rclgaim \ -filters/rclgnm \ +filters/rclgenxslt.py \ +filters/rclgnm.py \ filters/rclics \ filters/rclimg \ filters/rclimg.py \ @@ -651,7 +653,7 @@ filters/rcllyx \ filters/rclman \ filters/rclmidi.py \ filters/rclpdf.py \ -filters/rclokulnote \ +filters/rclokulnote.py \ filters/rclopxml.py \ filters/rclppt.py \ filters/rclpurple \ @@ -660,7 +662,6 @@ filters/rclrar \ filters/rclrtf.py \ filters/rclscribus \ filters/rclshowinfo \ -filters/rclsiduxman \ filters/rclsoff.py \ filters/rclsoff-flat.py \ filters/rclsvg.py \ diff --git a/src/desktop/hotrecoll.py b/src/desktop/hotrecoll.py index 28cb7c35..9eb5e0d9 100755 --- a/src/desktop/hotrecoll.py +++ b/src/desktop/hotrecoll.py @@ -1,4 +1,7 @@ -#!/usr/bin/python +#!/usr/bin/python2 +# +# wnck does not have a python3 binding as far as I can see (or at +# least it's not packaged by, e.g. Debian. So python2 only for now. # # This script should be linked to a keyboard shortcut. Under gnome, # you can do this from the main preferences menu, or directly execute diff --git a/src/filters/rclabw.py b/src/filters/rclabw.py index ca1d77df..9d9e5201 100755 --- a/src/filters/rclabw.py +++ b/src/filters/rclabw.py @@ -20,7 +20,7 @@ from __future__ import print_function import sys import rclexecm -import rclxslt +import rclgenxslt stylesheet_all = ''' ''' - -class ABWExtractor: - def __init__(self, em): - self.em = em - self.currentindex = 0 - - def extractone(self, params): - if "filename:" not in params: - self.em.rclog("extractone: no mime or file name") - return (False, "", "", rclexecm.RclExecM.eofnow) - fn = params["filename:"] - try: - data = open(fn, 'rb').read() - docdata = rclxslt.apply_sheet_data(stylesheet_all, data) - except Exception as err: - self.em.rclog("%s: bad data: %s" % (fn, err)) - return (False, "", "", rclexecm.RclExecM.eofnow) - - return (True, docdata, "", rclexecm.RclExecM.eofnext) - - ###### File type handler api, used by rclexecm ----------> - def openfile(self, params): - self.currentindex = 0 - return True - - def getipath(self, params): - return self.extractone(params) - - def getnext(self, params): - if self.currentindex >= 1: - return (False, "", "", rclexecm.RclExecM.eofnow) - else: - ret= self.extractone(params) - self.currentindex += 1 - return ret - - if __name__ == '__main__': proto = rclexecm.RclExecM() - extract = ABWExtractor(proto) + extract = rclgenxslt.XSLTExtractor(proto, stylesheet_all) rclexecm.main(proto, extract) diff --git a/src/filters/rcldvi b/src/filters/rcldvi index 8d2b41f1..98a51bef 100755 --- a/src/filters/rcldvi +++ b/src/filters/rcldvi @@ -17,11 +17,11 @@ # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # -# Extract text from a dvi file by either executing dvitops and rclps -# or using catdvi. dvitops has given better results during tests, and is -# chosen first if available, but the dvitops/rclps combination is much -# slower than catdvi -# set variables +# Extract text from a dvi file by either executing dvitops and +# pstotext or using catdvi. dvitops has given better results during +# tests, and is chosen first if available, but the dvitops/pstotext +# combination is much slower than catdvi set variables. In any case, +# the program is not too good with special characters (e.g. ligatures) LANG=C ; export LANG LC_ALL=C ; export LC_ALL progname="rcldvi" @@ -94,26 +94,25 @@ umask 77 # !! Leave the following line unmodified ! #ENDRECFILTCOMMONCODE -# Find rclps. Note: this only works because we are always executed with a -# full path -rclps=`dirname $0`/rclps - +decoderdvips() +{ + dvips -f $1 2> /dev/null | pstotext | iconv -f cp1252 -t utf-8 -c -s +} +decodercatdvi() +{ + catdvi $1 +} decoder="" if iscmd dvips -a iscmd pstotext ; then - decoder=dvips + decoder=decoderdvips elif iscmd catdvi ; then - decoder=catdvi + decoder=decodercatdvi fi if test X$decoder = X ; then senderror HELPERNOTFOUND dvips or catdvi fi -if test X$decoder = Xdvips ; then - $decoder -f < "$infile" 2> /dev/null | $rclps - - exit $? -fi - # The strange 'BEGIN' setup is to prevent 'file' from thinking this file # is an awk program $decoder "$infile" | diff --git a/src/filters/rclfb2 b/src/filters/rclfb2 deleted file mode 100755 index 0d5ac772..00000000 --- a/src/filters/rclfb2 +++ /dev/null @@ -1,139 +0,0 @@ -#!/bin/sh -# @(#$Id: rclopxml,v 1.3 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes -#================================================================ -# Extract text from an fb2 ebook (xml) -#================================================================ - -# set variables -LANG=C ; export LANG -LC_ALL=C ; export LC_ALL -progname=rclfb2 -filetype=fb2 - - -#RECFILTCOMMONCODE -############################################################################## -# !! Leave the previous line unmodified!! Code imported from the -# recfiltcommon file - -# Utility code common to all shell filters. This could be sourced at run -# time, but it's slightly more efficient to include the code in the -# filters at build time (with a sed script). - -# Describe error in a way that can be interpreted by our caller -senderror() -{ - echo RECFILTERROR $* - # Also alert on stderr just in case - echo ":2:$progname::: $*" 1>&2 - exit 1 -} - -iscmd() -{ - cmd=$1 - case $cmd in - */*) - if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;; - *) - oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs - for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done - return 1 ;; - esac -} - -checkcmds() -{ - for cmd in $*;do - if iscmd $cmd - then - a=1 - else - senderror HELPERNOTFOUND $cmd - fi - done -} - -# show help message -if test $# -ne 1 -o "$1" = "--help" -then - echo "Convert a $filetype file to HTML text for Recoll indexing." - echo "Usage: $progname [infile]" - exit 1 -fi - -infile="$1" - -# check the input file existence (may be '-' for stdin) -if test "X$infile" != X- -a ! -f "$infile" -then - senderror INPUTNOSUCHFILE "$infile" -fi - -# protect access to our temp files and directories -umask 77 - -############################################################################## -# !! Leave the following line unmodified ! -#ENDRECFILTCOMMONCODE - -checkcmds xsltproc - -xsltproc --nonet --novalid - "$infile" < - - - - - - - - - - - - - - - - - - - - - - - - - <xsl:value-of select="."/> - - - - - author - - - - - - - - - - - - - - - - -

-
-
- -
-EOF diff --git a/src/filters/rclfb2.py b/src/filters/rclfb2.py new file mode 100755 index 00000000..ad36f7e4 --- /dev/null +++ b/src/filters/rclfb2.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +# Copyright (C) 2014 J.F.Dockes +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the +# Free Software Foundation, Inc., +# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +###################################### + +from __future__ import print_function + +import sys +import rclexecm +import rclxslt +import rclgenxslt + +stylesheet_all = ''' + + + + + + + + + + + + + + + + + + + + + + + + + <xsl:value-of select="."/> + + + + + author + + + + + + + + + + + + + + + + +

+
+
+ +
+''' + +if __name__ == '__main__': + proto = rclexecm.RclExecM() + extract = rclgenxslt.XSLTExtractor(proto, stylesheet_all) + rclexecm.main(proto, extract) diff --git a/src/filters/rclgenxslt.py b/src/filters/rclgenxslt.py new file mode 100755 index 00000000..2135a443 --- /dev/null +++ b/src/filters/rclgenxslt.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# Copyright (C) 2018 J.F.Dockes +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the +# Free Software Foundation, Inc., +# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +###################################### +from __future__ import print_function + +import sys +import rclexecm +import rclxslt +import gzip + +class XSLTExtractor: + def __init__(self, em, stylesheet, gzip=False): + self.em = em + self.currentindex = 0 + self.stylesheet = stylesheet + self.dogz = gzip + + + def extractone(self, params): + if "filename:" not in params: + self.em.rclog("extractone: no mime or file name") + return (False, "", "", rclexecm.RclExecM.eofnow) + fn = params["filename:"] + try: + if self.dogz: + data = gzip.open(fn, 'rb').read() + else: + data = open(fn, 'rb').read() + docdata = rclxslt.apply_sheet_data(self.stylesheet, data) + except Exception as err: + self.em.rclog("%s: bad data: %s" % (fn, err)) + return (False, "", "", rclexecm.RclExecM.eofnow) + + return (True, docdata, "", rclexecm.RclExecM.eofnext) + + + ###### File type handler api, used by rclexecm ----------> + def openfile(self, params): + self.currentindex = 0 + return True + + def getipath(self, params): + return self.extractone(params) + + def getnext(self, params): + if self.currentindex >= 1: + return (False, "", "", rclexecm.RclExecM.eofnow) + else: + ret= self.extractone(params) + self.currentindex += 1 + return ret diff --git a/src/filters/rclgnm b/src/filters/rclgnm deleted file mode 100755 index 0d30327b..00000000 --- a/src/filters/rclgnm +++ /dev/null @@ -1,191 +0,0 @@ -#!/bin/sh -# @(#$Id: rclsoff,v 1.12 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes -# Parts taken from Estraier: -#================================================================ -# Estraier: a personal full-text search system -# Copyright (C) 2003-2004 Mikio Hirabayashi -#================================================================ -#================================================================ -# Extract text from a gnumeric spreadsheet -#================================================================ - -# set variables -LANG=C ; export LANG -LC_ALL=C ; export LC_ALL -progname="rclgnumeric" -filetype=gnumeric - - -#RECFILTCOMMONCODE -############################################################################## -# !! Leave the previous line unmodified!! Code imported from the -# recfiltcommon file - -# Utility code common to all shell filters. This could be sourced at run -# time, but it's slightly more efficient to include the code in the -# filters at build time (with a sed script). - -# Describe error in a way that can be interpreted by our caller -senderror() -{ - echo RECFILTERROR $* - # Also alert on stderr just in case - echo ":2:$progname::: $*" 1>&2 - exit 1 -} - -iscmd() -{ - cmd=$1 - case $cmd in - */*) - if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;; - *) - oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs - for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done - return 1 ;; - esac -} - -checkcmds() -{ - for cmd in $*;do - if iscmd $cmd - then - a=1 - else - senderror HELPERNOTFOUND $cmd - fi - done -} - -# show help message -if test $# -ne 1 -o "$1" = "--help" -then - echo "Convert a $filetype file to HTML text for Recoll indexing." - echo "Usage: $progname [infile]" - exit 1 -fi - -infile="$1" - -# check the input file existence (may be '-' for stdin) -if test "X$infile" != X- -a ! -f "$infile" -then - senderror INPUTNOSUCHFILE "$infile" -fi - -# protect access to our temp files and directories -umask 77 - -############################################################################## -# !! Leave the following line unmodified ! -#ENDRECFILTCOMMONCODE - -checkcmds xsltproc gunzip - -# We need a temporary file -if test z"$RECOLL_TMPDIR" != z; then - ttdir=$RECOLL_TMPDIR -elif test z"$TMPDIR" != z ; then - ttdir=$TMPDIR -else - ttdir=/tmp -fi -tmpfile=$ttdir/rclgnm.XXXXXX - -tmpfile=`mktemp "$tmpfile"` -if [ $? -ne 0 ]; then - senderror "$0: Can't create temp file, exiting..." -fi - -cleanup() -{ - rm -f $tmpfile -} - -trap cleanup EXIT HUP QUIT INT TERM - -gunzip < $1 > $tmpfile || senderror "Cant uncompress input" -xsltproc --novalid --nonet - $tmpfile < - - - - - - - - - - - - - - - - - - - - - date - - - - - - - abstract - - - - - - - keywords - - - - - - - keywords - - - - - - <xsl:value-of select="."/> - - - - - author - - - - - - - -

-
- - -
-
- -
-EOF - diff --git a/src/filters/rclgnm.py b/src/filters/rclgnm.py new file mode 100755 index 00000000..67ff4f41 --- /dev/null +++ b/src/filters/rclgnm.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +# Copyright (C) 2014 J.F.Dockes +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the +# Free Software Foundation, Inc., +# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +###################################### + +from __future__ import print_function + +import sys +import rclexecm +import rclgenxslt + + +stylesheet_all = ''' + + + + + + + + + + + + + + + + + + + + + date + + + + + + + abstract + + + + + + + keywords + + + + + + + keywords + + + + + + <xsl:value-of select="."/> + + + + + author + + + + + + + +

+
+ + +
+
+ +
+''' + + +if __name__ == '__main__': + proto = rclexecm.RclExecM() + extract = rclgenxslt.XSLTExtractor(proto, stylesheet_all, gzip=True) + rclexecm.main(proto, extract) + diff --git a/src/filters/rclokulnote b/src/filters/rclokulnote deleted file mode 100755 index 70f6432e..00000000 --- a/src/filters/rclokulnote +++ /dev/null @@ -1,130 +0,0 @@ -#!/bin/sh -# @(#$Id: rclsoff,v 1.12 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes -# Parts taken from Estraier: -#================================================================ -# Estraier: a personal full-text search system -# Copyright (C) 2003-2004 Mikio Hirabayashi -#================================================================ -#================================================================ -# Extract text from a gnumeric spreadsheet -#================================================================ - -# set variables -LANG=C ; export LANG -LC_ALL=C ; export LC_ALL -progname="rclgnumeric" -filetype=gnumeric - - -#RECFILTCOMMONCODE -############################################################################## -# !! Leave the previous line unmodified!! Code imported from the -# recfiltcommon file - -# Utility code common to all shell filters. This could be sourced at run -# time, but it's slightly more efficient to include the code in the -# filters at build time (with a sed script). - -# Describe error in a way that can be interpreted by our caller -senderror() -{ - echo RECFILTERROR $* - # Also alert on stderr just in case - echo ":2:$progname::: $*" 1>&2 - exit 1 -} - -iscmd() -{ - cmd=$1 - case $cmd in - */*) - if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;; - *) - oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs - for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done - return 1 ;; - esac -} - -checkcmds() -{ - for cmd in $*;do - if iscmd $cmd - then - a=1 - else - senderror HELPERNOTFOUND $cmd - fi - done -} - -# show help message -if test $# -ne 1 -o "$1" = "--help" -then - echo "Convert a $filetype file to HTML text for Recoll indexing." - echo "Usage: $progname [infile]" - exit 1 -fi - -infile="$1" - -# check the input file existence (may be '-' for stdin) -if test "X$infile" != X- -a ! -f "$infile" -then - senderror INPUTNOSUCHFILE "$infile" -fi - -# protect access to our temp files and directories -umask 77 - -############################################################################## -# !! Leave the following line unmodified ! -#ENDRECFILTCOMMONCODE - -checkcmds xsltproc - -xsltproc --novalid --nonet - "$infile" < - - - - - - - - - - - - Okular notes about: <xsl:value-of select="/documentInfo/@url" /> - - - - - - - - - - - - - -

- - -
- - -

- - -
- - - -
-EOF - diff --git a/src/filters/rclokulnote.py b/src/filters/rclokulnote.py new file mode 100755 index 00000000..cf6c6c4e --- /dev/null +++ b/src/filters/rclokulnote.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# Copyright (C) 2014 J.F.Dockes +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the +# Free Software Foundation, Inc., +# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +###################################### +from __future__ import print_function + +import sys +import rclexecm +import rclgenxslt + +stylesheet_all = ''' + + + + + + + + + + + Okular notes about: <xsl:value-of select="/documentInfo/@url" /> + + + + + + + + + + + + + +

+ + +
+ + +

+ + +
+ + + +
+''' + +if __name__ == '__main__': + proto = rclexecm.RclExecM() + extract = rclgenxslt.XSLTExtractor(proto, stylesheet_all) + rclexecm.main(proto, extract) + diff --git a/src/filters/rclsiduxman b/src/filters/rclsiduxman deleted file mode 100755 index b9bbe723..00000000 --- a/src/filters/rclsiduxman +++ /dev/null @@ -1,92 +0,0 @@ -#!/bin/sh -# @(#$Id: rclsiduxman,v 1.1 2008-06-09 09:12:05 dockes Exp $ (C) 2004 J.F.Dockes -# Parts taken from Estraier: -#================================================================ -# Estraier: a personal full-text search system -# Copyright (C) 2003-2004 Mikio Hirabayashi -#================================================================ -#================================================================ -# Strip the menu part from sidux manual pages to improve search precision -#================================================================ - -# set variables -LANG=C ; export LANG -LC_ALL=C ; export LC_ALL -progname="rclsiduxman" -filetype="sidux manual htm" - - -#RECFILTCOMMONCODE -############################################################################## -# !! Leave the previous line unmodified!! Code imported from the -# recfiltcommon file - -# Utility code common to all shell filters. This could be sourced at run -# time, but it's slightly more efficient to include the code in the -# filters at build time (with a sed script). - -# Describe error in a way that can be interpreted by our caller -senderror() -{ - echo RECFILTERROR $* - # Also alert on stderr just in case - echo ":2:$progname::: $*" 1>&2 - exit 1 -} - -iscmd() -{ - cmd=$1 - case $cmd in - */*) - if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;; - *) - oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs - for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done - return 1 ;; - esac -} - -checkcmds() -{ - for cmd in $*;do - if iscmd $cmd - then - a=1 - else - senderror HELPERNOTFOUND $cmd - fi - done -} - -# show help message -if test $# -ne 1 -o "$1" = "--help" -then - echo "Convert a $filetype file to HTML text for Recoll indexing." - echo "Usage: $progname [infile]" - exit 1 -fi - -infile="$1" - -# check the input file existence (may be '-' for stdin) -if test "X$infile" != X- -a ! -f "$infile" -then - senderror INPUTNOSUCHFILE "$infile" -fi - -# protect access to our temp files and directories -umask 77 - -############################################################################## -# !! Leave the following line unmodified ! -#ENDRECFILTCOMMONCODE - -checkcmds sed -# Delete everything from