Support soft karaoke midi files (,kar)

This commit is contained in:
Jean-Francois Dockes 2011-01-27 16:43:48 +01:00
parent 2de2f986d3
commit 131c0189f4
3 changed files with 134 additions and 0 deletions

129
src/filters/rclkar Executable file
View File

@ -0,0 +1,129 @@
#!/usr/bin/env python
# Read a .kar midi karaoke file and translate to recoll indexable format
import rclexecm
import sys
import os.path
try:
import midi
except:
print "RECFILTERROR HELPERNOTFOUND python:midi"
sys.exit(1);
# prototype for the html document we're returning
htmltemplate = '''
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>%s</title>
<meta name="author" content="%s">
<meta name="language" content="%s">
</head>
<body>
%s
</body>
</html>
'''
class KarTextExtractor:
def __init__(self, em):
self.em = em
self.currentindex = 0
def extractone(self, params):
docdata = ""
ok = False
# Mimetype not used for now
if not params.has_key("mimetype:"):
mimetype = 'audio/x-midi'
else:
mimetype = params["mimetype:"]
if not params.has_key("filename:"):
self.em.rclog("extractone: no mime or file name")
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
filename = params["filename:"]
try:
stream = midi.read_midifile(filename)
except Exception, err:
self.em.rclog("extractone: extract failed: [%s]" % err)
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
title = None
author = None
language = None
lyrics = ""
for event in stream.iterevents():
if isinstance(event, midi.TextMetaEvent):
if not event.data:
continue
elif event.data[0] == '/' or event.data[0] == '\\':
lyrics += "\n" + event.data[1:]
elif event.data[0] == '[' or event.data[0] == ']':
lyrics += event.data[1:]
elif event.data[0] == '@':
if len(event.data) == 1:
continue
else:
if event.data[1] == 'I':
lyrics += event.data[2:] + '\n'
elif event.data[1] == 'L':
language = event.data[2:]
elif event.data[1] == 'T':
if title is None:
title = event.data[2:]
elif author is None:
author = event.data[2:]
else:
lyrics += event.data
elif isinstance(event, midi.LryricsEvent):
if not event.data:
continue
elif event.data[0] == '/' or event.data[0] == '\\':
lyrics += "\n" + event.data[1:]
else:
lyrics += event.data
if title is None:
title = ""
if author is None:
author = ""
if language is None:
language = ""
if lyrics != "":
try:
lyrics = self.em.htmlescape(lyrics.encode("utf-8"))
lyrics = lyrics.replace("\n", "<br>")
except Exception, err:
print "ENCODE FAILED", err
lyrics = ""
self.em.setmimetype("text/html")
docdata = htmltemplate % (title, author, language, lyrics)
ok = True
return (ok, docdata, "", rclexecm.RclExecM.eofnext)
###### File type handler api, used by rclexecm. Some stuff makes little
# sense because we only have one doc per file.
def openfile(self, params):
self.currentindex = 0
return True
def getipath(self, params):
return self.extractone(params)
def getnext(self, params):
if self.currentindex >= 1:
return (False, "", "", rclexecm.RclExecM.eofnow)
else:
ret= self.extractone(params)
self.currentindex += 1
return ret
proto = rclexecm.RclExecM()
extract = KarTextExtractor(proto)
rclexecm.main(proto, extract)

View File

@ -79,6 +79,7 @@ text/x-tex = exec rcltex
application/x-chm = execm rclchm
application/zip = execm rclzip
audio/mpeg = execm rclaudio
audio/x-karaoke = execm rclkar
image/gif = execm rclimg
image/jpeg = execm rclimg
image/png = execm rclimg
@ -149,6 +150,7 @@ application/x-awk = source
application/x-perl = source
application/x-shellscript = source
audio/mpeg = sownd
audio/x-karaoke = sownd
image/gif = image
image/jpeg = image
image/png = image
@ -225,6 +227,7 @@ presentation = application/vnd.ms-powerpoint \
media = \
audio/mpeg \
audio/x-karaoke \
application/ogg \
application/x-flac \
image/jpeg \

View File

@ -99,6 +99,8 @@
.mp3 = audio/mpeg
.flac = application/x-flac
.ogg = application/ogg
# Note: file -i says audio/x-midi, but soft karaoke files are special.
.kar = audio/x-karaoke
.png = image/png
.jpg = image/jpeg