use new mutagen-based python filter for all audio files

This commit is contained in:
Jean-Francois Dockes 2010-09-12 19:31:38 +02:00
parent 716b75dc45
commit b043a2b9b2
2 changed files with 108 additions and 3 deletions

105
src/filters/rclaudio Executable file
View File

@ -0,0 +1,105 @@
#!/usr/bin/env python
# Audio tag filter for Recoll, using mutagen
import sys
import os
import rclexecm
try:
from mutagen.mp3 import EasyMP3 as MP3
from mutagen.flac import FLAC
from mutagen.oggvorbis import OggVorbis
except:
print "RECFILTERROR HELPERNOTFOUND python:mutagen"
sys.exit(1);
# prototype for the html document we're returning
htmltemplate = '''
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<meta name="album" content="%s">
<meta name="author" content="%s">
<meta name="title" content="%s">
</head>
<body>
%s
</body>
</html>
'''
def htmlescape(txt):
txt = txt.replace("<", "&lt;")
txt = txt.replace("&", "&amp;")
txt = txt.replace('"', "&dquot;")
return txt
# mp3: album, title, artist, genre, date, tracknumber
# flac: album, title, artist, genre, xxx, tracknumber
# oggvorbis:album, title, artist, genre, date, tracknumber
class AudioTagExtractor:
def __init__(self, em):
self.em = em
self.currentindex = 0
def extractone(self, params):
#self.em.rclog("extractone %s %s" % (params["filename:"], params["mimetype:"]))
docdata = ""
ok = False
if not params.has_key("mimetype:") or not params.has_key("filename:"):
self.em.rclog("extractone: no mime or file name")
return (ok, docdata, "", rclexecm.RclExeM.eofnow)
filename = params["filename:"]
mimetype = params["mimetype:"]
try:
if mimetype == "audio/mpeg":
tags = MP3(filename)
elif mimetype == "application/ogg":
tags = OggVorbis(filename)
elif mimetype == "application/x-flac":
tags = FLAC(filename)
else:
raise Exception, "Bad mime type %s" % mimetype
except Exception, err:
self.em.rclog("extractone: extract failed: [%s]" % err)
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
album = ""
artist = ""
title = ""
try:
album = htmlescape(tags["album"][0].encode("utf-8"))
except:
pass
try:
artist = htmlescape(tags["artist"][0].encode("utf-8"))
except:
pass
try:
title = htmlescape(tags["title"][0].encode("utf-8"))
except:
pass
alldata = htmlescape(tags.pprint().encode("utf-8"))
alldata = alldata.replace("\n", "<br>")
docdata = htmltemplate % (album, artist, title, alldata)
ok = True
return (ok, docdata, "", rclexecm.RclExecM.eofnext)
###### File type handler api, used by rclexecm ---------->
def openfile(self, params):
self.currentindex = 0
return True
def getipath(self, params):
return self.extractone(params)
def getnext(self, params):
if self.currentindex >= 1:
return (False, "", "", rclexecm.RclExecM.eofnow)
else:
ret= self.extractone(params)
self.currentindex += 1
return ret
e = rclexecm.RclExecM()
e.mainloop(AudioTagExtractor(e))

View File

@ -36,8 +36,8 @@ application/msword = exec antiword -t -i 1 -m UTF-8;mimetype=text/plain;charset=
# antiword, but will handle documents which provoke the 'I'm afraid the
# text stream of this file is too small to handle' antiword error
# application/msword = exec wvWare --charset=utf-8 --nographics
application/ogg = exec rclogg
application/pdf = exec rclpdf
application/ogg = execm rclaudio
application/pdf = execm rclaudio
application/postscript = exec pstotext;charset=iso-8859-1;mimetype=text/plain
application/vnd.ms-excel = exec xls2csv -c " " -d utf-8;charset=utf-8;mimetype=text/plain
application/vnd.ms-powerpoint = exec catppt -d utf-8;charset=utf-8;mimetype=text/plain
@ -76,7 +76,7 @@ application/x-shellscript = internal
application/x-tex = exec rcltex
application/x-chm = execm rclchm
application/zip = execm rclzip
audio/mpeg = exec rclid3
audio/mpeg = execm rclaudio
image/gif = execm rclimg
image/jpeg = execm rclimg
image/png = execm rclimg