diff --git a/src/filters/rclkar b/src/filters/rclkar
new file mode 100755
index 00000000..5293abbf
--- /dev/null
+++ b/src/filters/rclkar
@@ -0,0 +1,129 @@
+#!/usr/bin/env python
+
+# Read a .kar midi karaoke file and translate to recoll indexable format
+
+import rclexecm
+import sys
+import os.path
+
+try:
+ import midi
+except:
+ print "RECFILTERROR HELPERNOTFOUND python:midi"
+ sys.exit(1);
+
+# prototype for the html document we're returning
+htmltemplate = '''
+
+
+
+ %s
+
+
+
+
+ %s
+
+
+'''
+
+class KarTextExtractor:
+ def __init__(self, em):
+ self.em = em
+ self.currentindex = 0
+
+ def extractone(self, params):
+ docdata = ""
+ ok = False
+
+ # Mimetype not used for now
+ if not params.has_key("mimetype:"):
+ mimetype = 'audio/x-midi'
+ else:
+ mimetype = params["mimetype:"]
+
+ if not params.has_key("filename:"):
+ self.em.rclog("extractone: no mime or file name")
+ return (ok, docdata, "", rclexecm.RclExecM.eofnow)
+ filename = params["filename:"]
+
+ try:
+ stream = midi.read_midifile(filename)
+ except Exception, err:
+ self.em.rclog("extractone: extract failed: [%s]" % err)
+ return (ok, docdata, "", rclexecm.RclExecM.eofnow)
+
+ title = None
+ author = None
+ language = None
+ lyrics = ""
+ for event in stream.iterevents():
+ if isinstance(event, midi.TextMetaEvent):
+ if not event.data:
+ continue
+ elif event.data[0] == '/' or event.data[0] == '\\':
+ lyrics += "\n" + event.data[1:]
+ elif event.data[0] == '[' or event.data[0] == ']':
+ lyrics += event.data[1:]
+ elif event.data[0] == '@':
+ if len(event.data) == 1:
+ continue
+ else:
+ if event.data[1] == 'I':
+ lyrics += event.data[2:] + '\n'
+ elif event.data[1] == 'L':
+ language = event.data[2:]
+ elif event.data[1] == 'T':
+ if title is None:
+ title = event.data[2:]
+ elif author is None:
+ author = event.data[2:]
+ else:
+ lyrics += event.data
+ elif isinstance(event, midi.LryricsEvent):
+ if not event.data:
+ continue
+ elif event.data[0] == '/' or event.data[0] == '\\':
+ lyrics += "\n" + event.data[1:]
+ else:
+ lyrics += event.data
+ if title is None:
+ title = ""
+ if author is None:
+ author = ""
+ if language is None:
+ language = ""
+
+ if lyrics != "":
+ try:
+ lyrics = self.em.htmlescape(lyrics.encode("utf-8"))
+ lyrics = lyrics.replace("\n", "
")
+ except Exception, err:
+ print "ENCODE FAILED", err
+ lyrics = ""
+ self.em.setmimetype("text/html")
+ docdata = htmltemplate % (title, author, language, lyrics)
+
+ ok = True
+ return (ok, docdata, "", rclexecm.RclExecM.eofnext)
+
+ ###### File type handler api, used by rclexecm. Some stuff makes little
+ # sense because we only have one doc per file.
+ def openfile(self, params):
+ self.currentindex = 0
+ return True
+
+ def getipath(self, params):
+ return self.extractone(params)
+
+ def getnext(self, params):
+ if self.currentindex >= 1:
+ return (False, "", "", rclexecm.RclExecM.eofnow)
+ else:
+ ret= self.extractone(params)
+ self.currentindex += 1
+ return ret
+
+proto = rclexecm.RclExecM()
+extract = KarTextExtractor(proto)
+rclexecm.main(proto, extract)
diff --git a/src/sampleconf/mimeconf b/src/sampleconf/mimeconf
index 74871c62..386efd8e 100644
--- a/src/sampleconf/mimeconf
+++ b/src/sampleconf/mimeconf
@@ -79,6 +79,7 @@ text/x-tex = exec rcltex
application/x-chm = execm rclchm
application/zip = execm rclzip
audio/mpeg = execm rclaudio
+audio/x-karaoke = execm rclkar
image/gif = execm rclimg
image/jpeg = execm rclimg
image/png = execm rclimg
@@ -149,6 +150,7 @@ application/x-awk = source
application/x-perl = source
application/x-shellscript = source
audio/mpeg = sownd
+audio/x-karaoke = sownd
image/gif = image
image/jpeg = image
image/png = image
@@ -225,6 +227,7 @@ presentation = application/vnd.ms-powerpoint \
media = \
audio/mpeg \
+ audio/x-karaoke \
application/ogg \
application/x-flac \
image/jpeg \
diff --git a/src/sampleconf/mimemap b/src/sampleconf/mimemap
index 865d6dac..d2b2fc75 100644
--- a/src/sampleconf/mimemap
+++ b/src/sampleconf/mimemap
@@ -99,6 +99,8 @@
.mp3 = audio/mpeg
.flac = application/x-flac
.ogg = application/ogg
+# Note: file -i says audio/x-midi, but soft karaoke files are special.
+.kar = audio/x-karaoke
.png = image/png
.jpg = image/jpeg