use rclbasehandler in more filters

This commit is contained in:
Jean-Francois Dockes 2018-06-04 15:49:21 +02:00
parent 0d24cc35da
commit 61e471a0e2
4 changed files with 12 additions and 66 deletions

View File

@ -626,6 +626,7 @@ filters/rcl7z \
filters/rclabw.py \
filters/rclaptosidman \
filters/rclaudio \
filters/rclbasehandler.py \
filters/rclbibtex.sh \
filters/rclcheckneedretry.sh \
filters/rclchm \

View File

@ -16,7 +16,7 @@
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# Base for extractor classes. With some common generic implementations
# for the boilerplate functions, meant for single-document file handlers.
# for the boilerplate functions.
from __future__ import print_function

View File

@ -30,18 +30,18 @@ from __future__ import print_function
import subprocess
import rclexecm
from rclbasehandler import RclBaseHandler
# This class has the code to execute the subprocess and call a
# data-specific post-processor. Command and processor are supplied by
# the object which we receive as a parameter, which in turn is defined
# in the actual executable filter (e.g. rcldoc.py)
class Executor:
class Executor(RclBaseHandler):
opt_ignxval = 1
def __init__(self, em, flt):
self.em = em
super(Executor, self).__init__(em)
self.flt = flt
self.currentindex = 0
def runCmd(self, cmd, filename, postproc, opt):
''' Substitute parameters and execute command, process output
@ -109,19 +109,4 @@ class Executor:
return (ok, data, "", rclexecm.RclExecM.eofnext)
else:
return (ok, "", "", rclexecm.RclExecM.eofnow)
###### File type handler api, used by rclexecm ---------->
def openfile(self, params):
self.currentindex = 0
return True
def getipath(self, params):
return self.extractone(params)
def getnext(self, params):
if self.currentindex >= 1:
return (False, "", "", rclexecm.RclExecM.eofnow)
else:
ret= self.extractone(params)
self.currentindex += 1
return ret

View File

@ -10,6 +10,7 @@ import os.path
import string
import re
import codecs
from rclbasehandler import RclBaseHandler
try:
import rcllatinclass
@ -51,7 +52,7 @@ if PY3:
else:
nullchar = chr(0)
class KarTextExtractor:
class KarTextExtractor(RclBaseHandler):
# Afaik, the only charset encodings with null bytes are variations on
# utf-16 and utf-32 and iso relatives. A hopefully comprehensive
# list follows, compiled from iconv and python values. This is used for
@ -66,8 +67,7 @@ class KarTextExtractor:
'utf_16_le', 'utf_32', 'utf_32_be', 'utf_32_le'))
def __init__(self, em):
self.em = em
self.currentindex = 0
super(KarTextExtractor, self).__init__(em)
self.encoding = ""
self.defaultencoding = ""
self.hadnulls = False
@ -182,16 +182,7 @@ class KarTextExtractor:
return (encoding, confidence)
def extractone(self, params):
'''Process one file'''
docdata = ""
ok = False
if "filename:" not in params:
self.em.rclog("extractone: no mime or file name")
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
filename = params["filename:"]
def html_text(self, filename):
# Character encoding from file name ?
self.encoding = self.encodingfromfilename(filename)
if self.encoding:
@ -200,18 +191,8 @@ class KarTextExtractor:
except:
self.encoding = ""
# Mimetype not used for now
if "mimetype:" not in params:
mimetype = 'audio/x-midi'
else:
mimetype = params["mimetype:"]
# Read in and midi-decode the file
try:
stream = midi.read_midifile(filename)
except Exception as err:
self.em.rclog("extractone: read_midifile failed: [%s]" % err)
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
stream = midi.read_midifile(filename)
title = None
author = None
@ -262,7 +243,6 @@ class KarTextExtractor:
lyrics += self.nulltrunc(edata)
lyricsN += edata
# Try to guess the encoding. First do it with the data
# possibly containing nulls. If we get one of the accepted
# nullbyte encodings, go with this, else repeat with the
@ -305,28 +285,8 @@ class KarTextExtractor:
lyrics = self.reencode(lyrics)
language = self.reencode(language)
self.em.setmimetype("text/html")
docdata = htmltemplate % (title, author, language, lyrics)
return htmltemplate % (title, author, language, lyrics)
ok = True
return (ok, docdata, "", rclexecm.RclExecM.eofnext)
###### File type handler api, used by rclexecm. Some stuff makes little
# sense because we only have one doc per file.
def openfile(self, params):
self.currentindex = 0
return True
def getipath(self, params):
return self.extractone(params)
def getnext(self, params):
if self.currentindex >= 1:
return (False, "", "", rclexecm.RclExecM.eofnow)
else:
ret= self.extractone(params)
self.currentindex += 1
return ret
proto = rclexecm.RclExecM()
extract = KarTextExtractor(proto)