improved mp3/flac filter. use pstotext directly

This commit is contained in:
dockes 2008-10-09 09:21:38 +00:00
parent bfc6512d24
commit 1b97b96dea
3 changed files with 47 additions and 19 deletions

View File

@ -1,5 +1,5 @@
#!/bin/sh
# @(#$Id: rclflac,v 1.1 2007-10-02 14:00:47 dockes Exp $ (C) 2007 J.F.Dockes
# @(#$Id: rclflac,v 1.2 2008-10-09 09:21:38 dockes Exp $ (C) 2007 J.F.Dockes
#================================================================
# rclflac
# Handle flac audio files for recoll.
@ -78,28 +78,57 @@ checkcmds metaflac
# output the result
echo '<html><head>'
#echo '<title>' "$title" '</title>'
echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
echo '</head><body>'
echo '<pre>'
nocaptionexp='s/ *comment\[.*\]: .*=//p'
kpcaptionexp='s/ *comment\[.*\]: //p'
if test X$RECOLL_FILTER_FORPREVIEW = Xyes ; then
captionexp=$kpcaptionexp
else
captionexp=$nocaptionexp
fi
echo '</head><body><pre>'
metaflac --list "$infile" | \
sed -n \
-e '/comment\[/s/</&lt;/g' \
-e '/comment\[/s/&/&amp;/g' \
-e '/comment\[/'"$captionexp"
-e '/comment\[/s/ *comment\[.*\]: //p'
echo '</pre>'
echo '</body></html>'
echo '</pre></body></html>'
else
metaflac --list "$infile" | \
sed -n \
-e '/comment\[/s/ *comment\[.*\]: //p' | \
awk -F= '
{
value=""
for (i = 2; i <=NF; i++) {
value = value " " $i
}
sub("^ +", "", value)
sub("&", "&amp;", value)
sub("\"", "&quot;", value)
sub("<", "&lt;", value)
sub(">", "&gt;", value)
}
/^title=/{
printf "<meta name=\"title\" content=\"%s\">\n", value
body=body value "\n"
next
}
/^artist=/{
printf "<meta name=\"artist\" content=\"%s\">\n", value
body=body value "\n"
next
}
/^album=/{
printf "<meta name=\"album\" content=\"%s\">\n", value
body=body value "\n"
next
}
{
body=body value "\n"
}
END{print "</head><body><pre>";print body;print "</pre></body></html>"}
'
fi
# exit normally
exit 0

View File

@ -1,5 +1,5 @@
#!/bin/sh
# @(#$Id: rclid3,v 1.3 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
# @(#$Id: rclid3,v 1.4 2008-10-09 09:21:38 dockes Exp $ (C) 2004 J.F.Dockes
#================================================================
# rclid3
# Handle audio files for recoll. This uses id3info to extract tags
@ -84,8 +84,6 @@ echo '<html><head>'
#echo '<title>' "$title" '</title>'
echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
nocaptionexp='s/===.*://'
if test X$RECOLL_FILTER_FORPREVIEW = Xyes ; then
echo '</head><body><pre>'
id3info "$infile" | \
@ -104,6 +102,7 @@ id3info "$infile" | awk -F: \
for (i = 2; i <=NF; i++) {
value = value " " $i
}
sub("^ +", "", value)
sub("&", "&amp;", value)
sub("\"", "&quot;", value)
sub("<", "&lt;", value)

View File

@ -1,4 +1,4 @@
# @(#$Id: mimeconf,v 1.45 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
# @(#$Id: mimeconf,v 1.46 2008-10-09 09:21:38 dockes Exp $ (C) 2004 J.F.Dockes
# Recoll : associations of mime types to processing filters.
# There are different sections for decompression, 'interning' for indexing
@ -32,7 +32,7 @@ application/x-bzip2 = uncompress rcluncomp bunzip2 %f %t
application/msword = exec antiword -t -i 1 -m UTF-8;mimetype=text/plain;charset=utf-8
application/ogg = exec rclogg
application/pdf = exec rclpdf
application/postscript = exec rclps
application/postscript = exec pstotext;charset=iso-8859-1;mimetype=text/plain
application/vnd.ms-excel = exec xls2csv -c " " -d utf-8;charset=utf-8;mimetype=text/plain
application/vnd.ms-powerpoint = exec catppt -d utf-8;charset=utf-8;mimetype=text/plain
application/vnd.openxmlformats-officedocument.wordprocessingml.document = \