use rclbasehandler in more filters
This commit is contained in:
parent
0d24cc35da
commit
61e471a0e2
@ -626,6 +626,7 @@ filters/rcl7z \
|
|||||||
filters/rclabw.py \
|
filters/rclabw.py \
|
||||||
filters/rclaptosidman \
|
filters/rclaptosidman \
|
||||||
filters/rclaudio \
|
filters/rclaudio \
|
||||||
|
filters/rclbasehandler.py \
|
||||||
filters/rclbibtex.sh \
|
filters/rclbibtex.sh \
|
||||||
filters/rclcheckneedretry.sh \
|
filters/rclcheckneedretry.sh \
|
||||||
filters/rclchm \
|
filters/rclchm \
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
# Base for extractor classes. With some common generic implementations
|
# Base for extractor classes. With some common generic implementations
|
||||||
# for the boilerplate functions, meant for single-document file handlers.
|
# for the boilerplate functions.
|
||||||
|
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
|
|||||||
@ -30,18 +30,18 @@ from __future__ import print_function
|
|||||||
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import rclexecm
|
import rclexecm
|
||||||
|
from rclbasehandler import RclBaseHandler
|
||||||
|
|
||||||
# This class has the code to execute the subprocess and call a
|
# This class has the code to execute the subprocess and call a
|
||||||
# data-specific post-processor. Command and processor are supplied by
|
# data-specific post-processor. Command and processor are supplied by
|
||||||
# the object which we receive as a parameter, which in turn is defined
|
# the object which we receive as a parameter, which in turn is defined
|
||||||
# in the actual executable filter (e.g. rcldoc.py)
|
# in the actual executable filter (e.g. rcldoc.py)
|
||||||
class Executor:
|
class Executor(RclBaseHandler):
|
||||||
opt_ignxval = 1
|
opt_ignxval = 1
|
||||||
|
|
||||||
def __init__(self, em, flt):
|
def __init__(self, em, flt):
|
||||||
self.em = em
|
super(Executor, self).__init__(em)
|
||||||
self.flt = flt
|
self.flt = flt
|
||||||
self.currentindex = 0
|
|
||||||
|
|
||||||
def runCmd(self, cmd, filename, postproc, opt):
|
def runCmd(self, cmd, filename, postproc, opt):
|
||||||
''' Substitute parameters and execute command, process output
|
''' Substitute parameters and execute command, process output
|
||||||
@ -109,19 +109,4 @@ class Executor:
|
|||||||
return (ok, data, "", rclexecm.RclExecM.eofnext)
|
return (ok, data, "", rclexecm.RclExecM.eofnext)
|
||||||
else:
|
else:
|
||||||
return (ok, "", "", rclexecm.RclExecM.eofnow)
|
return (ok, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
###### File type handler api, used by rclexecm ---------->
|
|
||||||
def openfile(self, params):
|
|
||||||
self.currentindex = 0
|
|
||||||
return True
|
|
||||||
|
|
||||||
def getipath(self, params):
|
|
||||||
return self.extractone(params)
|
|
||||||
|
|
||||||
def getnext(self, params):
|
|
||||||
if self.currentindex >= 1:
|
|
||||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
|
||||||
else:
|
|
||||||
ret= self.extractone(params)
|
|
||||||
self.currentindex += 1
|
|
||||||
return ret
|
|
||||||
|
|||||||
@ -10,6 +10,7 @@ import os.path
|
|||||||
import string
|
import string
|
||||||
import re
|
import re
|
||||||
import codecs
|
import codecs
|
||||||
|
from rclbasehandler import RclBaseHandler
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import rcllatinclass
|
import rcllatinclass
|
||||||
@ -51,7 +52,7 @@ if PY3:
|
|||||||
else:
|
else:
|
||||||
nullchar = chr(0)
|
nullchar = chr(0)
|
||||||
|
|
||||||
class KarTextExtractor:
|
class KarTextExtractor(RclBaseHandler):
|
||||||
# Afaik, the only charset encodings with null bytes are variations on
|
# Afaik, the only charset encodings with null bytes are variations on
|
||||||
# utf-16 and utf-32 and iso relatives. A hopefully comprehensive
|
# utf-16 and utf-32 and iso relatives. A hopefully comprehensive
|
||||||
# list follows, compiled from iconv and python values. This is used for
|
# list follows, compiled from iconv and python values. This is used for
|
||||||
@ -66,8 +67,7 @@ class KarTextExtractor:
|
|||||||
'utf_16_le', 'utf_32', 'utf_32_be', 'utf_32_le'))
|
'utf_16_le', 'utf_32', 'utf_32_be', 'utf_32_le'))
|
||||||
|
|
||||||
def __init__(self, em):
|
def __init__(self, em):
|
||||||
self.em = em
|
super(KarTextExtractor, self).__init__(em)
|
||||||
self.currentindex = 0
|
|
||||||
self.encoding = ""
|
self.encoding = ""
|
||||||
self.defaultencoding = ""
|
self.defaultencoding = ""
|
||||||
self.hadnulls = False
|
self.hadnulls = False
|
||||||
@ -182,16 +182,7 @@ class KarTextExtractor:
|
|||||||
return (encoding, confidence)
|
return (encoding, confidence)
|
||||||
|
|
||||||
|
|
||||||
def extractone(self, params):
|
def html_text(self, filename):
|
||||||
'''Process one file'''
|
|
||||||
docdata = ""
|
|
||||||
ok = False
|
|
||||||
|
|
||||||
if "filename:" not in params:
|
|
||||||
self.em.rclog("extractone: no mime or file name")
|
|
||||||
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
|
|
||||||
filename = params["filename:"]
|
|
||||||
|
|
||||||
# Character encoding from file name ?
|
# Character encoding from file name ?
|
||||||
self.encoding = self.encodingfromfilename(filename)
|
self.encoding = self.encodingfromfilename(filename)
|
||||||
if self.encoding:
|
if self.encoding:
|
||||||
@ -200,18 +191,8 @@ class KarTextExtractor:
|
|||||||
except:
|
except:
|
||||||
self.encoding = ""
|
self.encoding = ""
|
||||||
|
|
||||||
# Mimetype not used for now
|
|
||||||
if "mimetype:" not in params:
|
|
||||||
mimetype = 'audio/x-midi'
|
|
||||||
else:
|
|
||||||
mimetype = params["mimetype:"]
|
|
||||||
|
|
||||||
# Read in and midi-decode the file
|
# Read in and midi-decode the file
|
||||||
try:
|
stream = midi.read_midifile(filename)
|
||||||
stream = midi.read_midifile(filename)
|
|
||||||
except Exception as err:
|
|
||||||
self.em.rclog("extractone: read_midifile failed: [%s]" % err)
|
|
||||||
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
|
|
||||||
|
|
||||||
title = None
|
title = None
|
||||||
author = None
|
author = None
|
||||||
@ -262,7 +243,6 @@ class KarTextExtractor:
|
|||||||
lyrics += self.nulltrunc(edata)
|
lyrics += self.nulltrunc(edata)
|
||||||
lyricsN += edata
|
lyricsN += edata
|
||||||
|
|
||||||
|
|
||||||
# Try to guess the encoding. First do it with the data
|
# Try to guess the encoding. First do it with the data
|
||||||
# possibly containing nulls. If we get one of the accepted
|
# possibly containing nulls. If we get one of the accepted
|
||||||
# nullbyte encodings, go with this, else repeat with the
|
# nullbyte encodings, go with this, else repeat with the
|
||||||
@ -305,28 +285,8 @@ class KarTextExtractor:
|
|||||||
lyrics = self.reencode(lyrics)
|
lyrics = self.reencode(lyrics)
|
||||||
language = self.reencode(language)
|
language = self.reencode(language)
|
||||||
|
|
||||||
self.em.setmimetype("text/html")
|
return htmltemplate % (title, author, language, lyrics)
|
||||||
docdata = htmltemplate % (title, author, language, lyrics)
|
|
||||||
|
|
||||||
ok = True
|
|
||||||
return (ok, docdata, "", rclexecm.RclExecM.eofnext)
|
|
||||||
|
|
||||||
###### File type handler api, used by rclexecm. Some stuff makes little
|
|
||||||
# sense because we only have one doc per file.
|
|
||||||
def openfile(self, params):
|
|
||||||
self.currentindex = 0
|
|
||||||
return True
|
|
||||||
|
|
||||||
def getipath(self, params):
|
|
||||||
return self.extractone(params)
|
|
||||||
|
|
||||||
def getnext(self, params):
|
|
||||||
if self.currentindex >= 1:
|
|
||||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
|
||||||
else:
|
|
||||||
ret= self.extractone(params)
|
|
||||||
self.currentindex += 1
|
|
||||||
return ret
|
|
||||||
|
|
||||||
proto = rclexecm.RclExecM()
|
proto = rclexecm.RclExecM()
|
||||||
extract = KarTextExtractor(proto)
|
extract = KarTextExtractor(proto)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user