From b043a2b9b26e5cd93c301f9950b0476fb718185e Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Sun, 12 Sep 2010 19:31:38 +0200 Subject: [PATCH] use new mutagen-based python filter for all audio files --- src/filters/rclaudio | 105 ++++++++++++++++++++++++++++++++++++++++ src/sampleconf/mimeconf | 6 +-- 2 files changed, 108 insertions(+), 3 deletions(-) create mode 100755 src/filters/rclaudio diff --git a/src/filters/rclaudio b/src/filters/rclaudio new file mode 100755 index 00000000..6b569c9e --- /dev/null +++ b/src/filters/rclaudio @@ -0,0 +1,105 @@ +#!/usr/bin/env python + +# Audio tag filter for Recoll, using mutagen + +import sys +import os +import rclexecm + +try: + from mutagen.mp3 import EasyMP3 as MP3 + from mutagen.flac import FLAC + from mutagen.oggvorbis import OggVorbis +except: + print "RECFILTERROR HELPERNOTFOUND python:mutagen" + sys.exit(1); + +# prototype for the html document we're returning +htmltemplate = ''' + + + + + + + + + %s + + +''' +def htmlescape(txt): + txt = txt.replace("<", "<") + txt = txt.replace("&", "&") + txt = txt.replace('"', "&dquot;") + return txt + +# mp3: album, title, artist, genre, date, tracknumber +# flac: album, title, artist, genre, xxx, tracknumber +# oggvorbis:album, title, artist, genre, date, tracknumber +class AudioTagExtractor: + def __init__(self, em): + self.em = em + self.currentindex = 0 + + def extractone(self, params): + #self.em.rclog("extractone %s %s" % (params["filename:"], params["mimetype:"])) + docdata = "" + ok = False + if not params.has_key("mimetype:") or not params.has_key("filename:"): + self.em.rclog("extractone: no mime or file name") + return (ok, docdata, "", rclexecm.RclExeM.eofnow) + filename = params["filename:"] + mimetype = params["mimetype:"] + try: + if mimetype == "audio/mpeg": + tags = MP3(filename) + elif mimetype == "application/ogg": + tags = OggVorbis(filename) + elif mimetype == "application/x-flac": + tags = FLAC(filename) + else: + raise Exception, "Bad mime type %s" % mimetype + except Exception, err: + self.em.rclog("extractone: extract failed: [%s]" % err) + return (ok, docdata, "", rclexecm.RclExecM.eofnow) + + album = "" + artist = "" + title = "" + try: + album = htmlescape(tags["album"][0].encode("utf-8")) + except: + pass + try: + artist = htmlescape(tags["artist"][0].encode("utf-8")) + except: + pass + try: + title = htmlescape(tags["title"][0].encode("utf-8")) + except: + pass + alldata = htmlescape(tags.pprint().encode("utf-8")) + alldata = alldata.replace("\n", "
") + docdata = htmltemplate % (album, artist, title, alldata) + ok = True + return (ok, docdata, "", rclexecm.RclExecM.eofnext) + + ###### File type handler api, used by rclexecm ----------> + def openfile(self, params): + self.currentindex = 0 + return True + + def getipath(self, params): + return self.extractone(params) + + def getnext(self, params): + if self.currentindex >= 1: + return (False, "", "", rclexecm.RclExecM.eofnow) + else: + ret= self.extractone(params) + self.currentindex += 1 + return ret + +e = rclexecm.RclExecM() +e.mainloop(AudioTagExtractor(e)) diff --git a/src/sampleconf/mimeconf b/src/sampleconf/mimeconf index 9e7af312..052dce34 100644 --- a/src/sampleconf/mimeconf +++ b/src/sampleconf/mimeconf @@ -36,8 +36,8 @@ application/msword = exec antiword -t -i 1 -m UTF-8;mimetype=text/plain;charset= # antiword, but will handle documents which provoke the 'I'm afraid the # text stream of this file is too small to handle' antiword error # application/msword = exec wvWare --charset=utf-8 --nographics -application/ogg = exec rclogg -application/pdf = exec rclpdf +application/ogg = execm rclaudio +application/pdf = execm rclaudio application/postscript = exec pstotext;charset=iso-8859-1;mimetype=text/plain application/vnd.ms-excel = exec xls2csv -c " " -d utf-8;charset=utf-8;mimetype=text/plain application/vnd.ms-powerpoint = exec catppt -d utf-8;charset=utf-8;mimetype=text/plain @@ -76,7 +76,7 @@ application/x-shellscript = internal application/x-tex = exec rcltex application/x-chm = execm rclchm application/zip = execm rclzip -audio/mpeg = exec rclid3 +audio/mpeg = execm rclaudio image/gif = execm rclimg image/jpeg = execm rclimg image/png = execm rclimg