From 61e471a0e287bc904b349c446d6c2f275244d876 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Mon, 4 Jun 2018 15:49:21 +0200 Subject: [PATCH] use rclbasehandler in more filters --- src/Makefile.am | 1 + src/filters/rclbasehandler.py | 2 +- src/filters/rclexec1.py | 23 +++------------- src/filters/rclkar | 52 ++++------------------------------- 4 files changed, 12 insertions(+), 66 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index 8fe5de6a..b024bec2 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -626,6 +626,7 @@ filters/rcl7z \ filters/rclabw.py \ filters/rclaptosidman \ filters/rclaudio \ +filters/rclbasehandler.py \ filters/rclbibtex.sh \ filters/rclcheckneedretry.sh \ filters/rclchm \ diff --git a/src/filters/rclbasehandler.py b/src/filters/rclbasehandler.py index fa504eed..859dc45d 100644 --- a/src/filters/rclbasehandler.py +++ b/src/filters/rclbasehandler.py @@ -16,7 +16,7 @@ # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # Base for extractor classes. With some common generic implementations -# for the boilerplate functions, meant for single-document file handlers. +# for the boilerplate functions. from __future__ import print_function diff --git a/src/filters/rclexec1.py b/src/filters/rclexec1.py index 295fb714..2fdebb72 100644 --- a/src/filters/rclexec1.py +++ b/src/filters/rclexec1.py @@ -30,18 +30,18 @@ from __future__ import print_function import subprocess import rclexecm +from rclbasehandler import RclBaseHandler # This class has the code to execute the subprocess and call a # data-specific post-processor. Command and processor are supplied by # the object which we receive as a parameter, which in turn is defined # in the actual executable filter (e.g. rcldoc.py) -class Executor: +class Executor(RclBaseHandler): opt_ignxval = 1 def __init__(self, em, flt): - self.em = em + super(Executor, self).__init__(em) self.flt = flt - self.currentindex = 0 def runCmd(self, cmd, filename, postproc, opt): ''' Substitute parameters and execute command, process output @@ -109,19 +109,4 @@ class Executor: return (ok, data, "", rclexecm.RclExecM.eofnext) else: return (ok, "", "", rclexecm.RclExecM.eofnow) - - ###### File type handler api, used by rclexecm ----------> - def openfile(self, params): - self.currentindex = 0 - return True - - def getipath(self, params): - return self.extractone(params) - - def getnext(self, params): - if self.currentindex >= 1: - return (False, "", "", rclexecm.RclExecM.eofnow) - else: - ret= self.extractone(params) - self.currentindex += 1 - return ret + diff --git a/src/filters/rclkar b/src/filters/rclkar index c54ec7bc..e1981985 100755 --- a/src/filters/rclkar +++ b/src/filters/rclkar @@ -10,6 +10,7 @@ import os.path import string import re import codecs +from rclbasehandler import RclBaseHandler try: import rcllatinclass @@ -51,7 +52,7 @@ if PY3: else: nullchar = chr(0) -class KarTextExtractor: +class KarTextExtractor(RclBaseHandler): # Afaik, the only charset encodings with null bytes are variations on # utf-16 and utf-32 and iso relatives. A hopefully comprehensive # list follows, compiled from iconv and python values. This is used for @@ -66,8 +67,7 @@ class KarTextExtractor: 'utf_16_le', 'utf_32', 'utf_32_be', 'utf_32_le')) def __init__(self, em): - self.em = em - self.currentindex = 0 + super(KarTextExtractor, self).__init__(em) self.encoding = "" self.defaultencoding = "" self.hadnulls = False @@ -182,16 +182,7 @@ class KarTextExtractor: return (encoding, confidence) - def extractone(self, params): - '''Process one file''' - docdata = "" - ok = False - - if "filename:" not in params: - self.em.rclog("extractone: no mime or file name") - return (ok, docdata, "", rclexecm.RclExecM.eofnow) - filename = params["filename:"] - + def html_text(self, filename): # Character encoding from file name ? self.encoding = self.encodingfromfilename(filename) if self.encoding: @@ -200,18 +191,8 @@ class KarTextExtractor: except: self.encoding = "" - # Mimetype not used for now - if "mimetype:" not in params: - mimetype = 'audio/x-midi' - else: - mimetype = params["mimetype:"] - # Read in and midi-decode the file - try: - stream = midi.read_midifile(filename) - except Exception as err: - self.em.rclog("extractone: read_midifile failed: [%s]" % err) - return (ok, docdata, "", rclexecm.RclExecM.eofnow) + stream = midi.read_midifile(filename) title = None author = None @@ -262,7 +243,6 @@ class KarTextExtractor: lyrics += self.nulltrunc(edata) lyricsN += edata - # Try to guess the encoding. First do it with the data # possibly containing nulls. If we get one of the accepted # nullbyte encodings, go with this, else repeat with the @@ -305,28 +285,8 @@ class KarTextExtractor: lyrics = self.reencode(lyrics) language = self.reencode(language) - self.em.setmimetype("text/html") - docdata = htmltemplate % (title, author, language, lyrics) + return htmltemplate % (title, author, language, lyrics) - ok = True - return (ok, docdata, "", rclexecm.RclExecM.eofnext) - - ###### File type handler api, used by rclexecm. Some stuff makes little - # sense because we only have one doc per file. - def openfile(self, params): - self.currentindex = 0 - return True - - def getipath(self, params): - return self.extractone(params) - - def getnext(self, params): - if self.currentindex >= 1: - return (False, "", "", rclexecm.RclExecM.eofnow) - else: - ret= self.extractone(params) - self.currentindex += 1 - return ret proto = rclexecm.RclExecM() extract = KarTextExtractor(proto)