improved rclid3 and rclogg
This commit is contained in:
parent
954de37067
commit
49388c8748
@ -1,5 +1,5 @@
|
||||
#!/bin/sh
|
||||
# @(#$Id: rclabw,v 1.2 2007-06-15 11:41:50 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# @(#$Id: rclabw,v 1.3 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# Parts taken from Estraier:
|
||||
#================================================================
|
||||
# Estraier: a personal full-text search system
|
||||
@ -83,13 +83,6 @@ umask 77
|
||||
|
||||
checkcmds iconv sed
|
||||
|
||||
# check the input file existence
|
||||
if test ! -f "$infile"
|
||||
then
|
||||
printf '%s: %s: no such file\n' "$progname" "$infile"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
encoding=`sed -e '/<?xml version=/s/"?>$//' \
|
||||
-e '/^<?xml version=/s/.*encoding="//p;D;q' \
|
||||
-e D \
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#!/bin/sh
|
||||
# @(#$Id: rcldjvu,v 1.5 2007-06-08 13:51:08 dockes Exp $ (C) 2005 J.F.Dockes
|
||||
# @(#$Id: rcldjvu,v 1.6 2008-10-08 08:27:34 dockes Exp $ (C) 2005 J.F.Dockes
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -103,13 +103,6 @@ umask 77
|
||||
|
||||
checkcmds djvutxt djvused awk
|
||||
|
||||
# check the input file existence
|
||||
if test ! -f "$infile"
|
||||
then
|
||||
printf '%s: %s: no such file\n' "$progname" "$infile"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Title: we try to extract it from the annotations. djvused outputs string
|
||||
# in C/awk \-escaped notation. Awk can only process this in string
|
||||
# constants, so we have a first awk pass to create an awk program to parse
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#!/bin/sh
|
||||
# @(#$Id: rclid3,v 1.2 2007-11-09 11:54:59 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# @(#$Id: rclid3,v 1.3 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
#================================================================
|
||||
# rclid3
|
||||
# Handle audio files for recoll. This uses id3info to extract tags
|
||||
@ -83,25 +83,52 @@ checkcmds id3info
|
||||
echo '<html><head>'
|
||||
#echo '<title>' "$title" '</title>'
|
||||
echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
|
||||
echo '</head><body>'
|
||||
echo '<pre>'
|
||||
|
||||
nocaptionexp='s/===.*://'
|
||||
kpcaptionexp='s/===[^(]*//'
|
||||
|
||||
if test X$RECOLL_FILTER_FORPREVIEW = Xyes ; then
|
||||
captionexp=$kpcaptionexp
|
||||
else
|
||||
captionexp=$nocaptionexp
|
||||
fi
|
||||
|
||||
echo '</head><body><pre>'
|
||||
id3info "$infile" | \
|
||||
sed -e '/Tag information for/d' \
|
||||
-e "$captionexp" \
|
||||
-e 's/</</g' -e 's/&/&/g' \
|
||||
-e 's/===[^(]*//' \
|
||||
-e 's/</</g' -e 's/&/&/g' -e 's/"/"/g' -e 's/>/>/g'
|
||||
echo '</pre></body></html>'
|
||||
|
||||
echo '</pre>'
|
||||
echo '</body></html>'
|
||||
else
|
||||
# Indexing. There are many other fields which we could process this way.
|
||||
# Would also need that some fields are set up in the "fields" config
|
||||
# file to be really useful
|
||||
id3info "$infile" | awk -F: \
|
||||
'{
|
||||
value=""
|
||||
for (i = 2; i <=NF; i++) {
|
||||
value = value " " $i
|
||||
}
|
||||
sub("&", "&", value)
|
||||
sub("\"", """, value)
|
||||
sub("<", "<", value)
|
||||
sub(">", ">", value)
|
||||
}
|
||||
/TIT2/{
|
||||
printf "<meta name=\"title\" content=\"%s\">\n", value
|
||||
body=body value "\n"
|
||||
next
|
||||
}
|
||||
/TPE1/{
|
||||
printf "<meta name=\"artist\" content=\"%s\">\n", value
|
||||
body=body value "\n"
|
||||
next
|
||||
}
|
||||
/TALB/{
|
||||
printf "<meta name=\"album\" content=\"%s\">\n", value
|
||||
body=body value "\n"
|
||||
next
|
||||
}
|
||||
{
|
||||
body=body value "\n"
|
||||
}
|
||||
END{print "</head><body><pre>";print body;print "</pre></body></html>"}
|
||||
'
|
||||
fi
|
||||
|
||||
# exit normally
|
||||
exit 0
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#!/bin/sh
|
||||
# @(#$Id: rclkwd,v 1.1 2007-06-08 14:01:30 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# @(#$Id: rclkwd,v 1.2 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# Parts taken from Estraier:
|
||||
#================================================================
|
||||
# Estraier: a personal full-text search system
|
||||
@ -87,13 +87,6 @@ umask 77
|
||||
|
||||
checkcmds awk unzip gunzip tar
|
||||
|
||||
# check the input file existence
|
||||
if test ! -f "$infile"
|
||||
then
|
||||
printf '%s: %s: no such file\n' "$progname" "$infile"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# We need a temporary directory
|
||||
if test z"$RECOLL_TMPDIR" != z; then
|
||||
ttdir=$RECOLL_TMPDIR
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#!/bin/sh
|
||||
# @(#$Id: rclogg,v 1.1 2007-10-02 14:00:47 dockes Exp $ (C) 2007 J.F.Dockes
|
||||
# @(#$Id: rclogg,v 1.2 2008-10-08 08:27:34 dockes Exp $ (C) 2007 J.F.Dockes
|
||||
#================================================================
|
||||
# rclogg
|
||||
# Handle ogg audio files for recoll.
|
||||
@ -74,33 +74,37 @@ fi
|
||||
# !! Leave the following line unmodified !
|
||||
#ENDRECFILTCOMMONCODE
|
||||
|
||||
checkcmds ogginfo
|
||||
checkcmds ogginfo sed awk
|
||||
|
||||
# output the result
|
||||
echo '<html><head>'
|
||||
#echo '<title>' "$title" '</title>'
|
||||
echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
|
||||
echo '</head><body>'
|
||||
echo '<pre>'
|
||||
|
||||
nocaptionexp='/=/s/.*=//p'
|
||||
kpcaptionexp='/=/p'
|
||||
if test X$RECOLL_FILTER_FORPREVIEW = Xyes
|
||||
then
|
||||
|
||||
echo '</head><body><pre>'
|
||||
ogginfo "$infile" | grep '=' | sed -e 's/&/&/g' -e 's/"/"/g' \
|
||||
-e 's/</</g' -e 's/>/>/g'
|
||||
echo '</pre></body></html>'
|
||||
|
||||
if test X$RECOLL_FILTER_FORPREVIEW = Xyes ; then
|
||||
captionexp=$kpcaptionexp
|
||||
else
|
||||
captionexp=$nocaptionexp
|
||||
ogginfo "$infile" | grep '=' | awk -F= \
|
||||
'{sub("^[ ]+", "", $1)
|
||||
sub("&", "&", $2)
|
||||
sub("\"", """, $2)
|
||||
sub("<", "<", $2)
|
||||
sub(">", ">", $2)
|
||||
printf "<meta name=\"%s\" content=\"%s\">\n", $1, $2
|
||||
body=body $2 "\n"
|
||||
}
|
||||
END{print "</head><body><pre>";print body;print "</pre></body></html>"}'
|
||||
fi
|
||||
|
||||
# Note: ogginfo output is unparseable: the COMMENT field can be
|
||||
# multiline, but there is no way to detect the end of the 'User
|
||||
# comments' area
|
||||
ogginfo "$infile" | \
|
||||
sed -n \
|
||||
-e "$captionexp"
|
||||
|
||||
echo '</pre>'
|
||||
echo '</body></html>'
|
||||
|
||||
# exit normally
|
||||
exit 0
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#!/bin/sh
|
||||
# @(#$Id: rclopxml,v 1.2 2008-09-01 17:31:47 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# @(#$Id: rclopxml,v 1.3 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
#================================================================
|
||||
# rcldocx
|
||||
# Extract text from an openxml msword file (will be extended for spreadsheets)
|
||||
@ -80,13 +80,6 @@ umask 77
|
||||
|
||||
checkcmds xsltproc unzip
|
||||
|
||||
# check the input file existence
|
||||
if test ! -f "$infile"
|
||||
then
|
||||
printf '%s: %s: no such file\n' "$progname" "$infile"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# We need a temporary directory
|
||||
if test z"$RECOLL_TMPDIR" != z; then
|
||||
ttdir=$RECOLL_TMPDIR
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#!/bin/sh
|
||||
# @(#$Id: rclppt,v 1.3 2007-06-08 13:51:09 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# @(#$Id: rclppt,v 1.4 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
@ -100,10 +100,6 @@ umask 77
|
||||
#ENDRECFILTCOMMONCODE
|
||||
|
||||
checkcmds catppt
|
||||
if test X$cmdsok = X0 ; then
|
||||
printf "Catppt not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# output the result
|
||||
echo '<html><head>'
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#!/bin/sh
|
||||
# @(#$Id: rclsoff,v 1.11 2007-06-15 11:41:50 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# @(#$Id: rclsoff,v 1.12 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# Parts taken from Estraier:
|
||||
#================================================================
|
||||
# Estraier: a personal full-text search system
|
||||
@ -89,13 +89,6 @@ umask 77
|
||||
|
||||
checkcmds awk iconv unzip
|
||||
|
||||
# check the input file existence
|
||||
if test ! -f "$infile"
|
||||
then
|
||||
printf '%s: %s: no such file\n' "$progname" "$infile"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# We need a temporary directory
|
||||
if test z"$RECOLL_TMPDIR" != z; then
|
||||
ttdir=$RECOLL_TMPDIR
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#!/bin/sh
|
||||
# @(#$Id: rclsvg,v 1.2 2008-02-03 16:05:57 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# @(#$Id: rclsvg,v 1.3 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# Parts taken from Estraier:
|
||||
#================================================================
|
||||
# Estraier: a personal full-text search system
|
||||
@ -83,13 +83,6 @@ umask 77
|
||||
|
||||
checkcmds iconv sed
|
||||
|
||||
# check the input file existence
|
||||
if test ! -f "$infile"
|
||||
then
|
||||
printf '%s: %s: no such file\n' "$progname" "$infile"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
encoding=`sed -ne '/<?xml/s/.*encoding="\([^"]*\).*/\1/p' < $infile`
|
||||
|
||||
if test X$encoding = X ; then encoding=UTF-8;fi
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#!/bin/sh
|
||||
# @(#$Id: rclxls,v 1.4 2007-06-08 13:51:09 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# @(#$Id: rclxls,v 1.5 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
@ -100,17 +100,7 @@ umask 77
|
||||
# !! Leave the following line unmodified !
|
||||
#ENDRECFILTCOMMONCODE
|
||||
|
||||
# check the input file existence
|
||||
if test ! -f "$infile"
|
||||
then
|
||||
printf '%s: %s: no such file\n' "$progname" "$infile"
|
||||
exit 1
|
||||
fi
|
||||
checkcmds xls2csv
|
||||
if test X$cmdsok = X0 ; then
|
||||
printf "xls2csv not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# output the result
|
||||
echo '<html><head>'
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# @(#$Id: fields,v 1.4 2008-10-07 06:44:23 dockes Exp $ (C) 2007 J.F.Dockes
|
||||
# @(#$Id: fields,v 1.5 2008-10-08 08:27:34 dockes Exp $ (C) 2007 J.F.Dockes
|
||||
# Field names configuration. This defines how one may search ie for
|
||||
# author:Hemingway
|
||||
# Important:
|
||||
@ -53,7 +53,7 @@ stored = author
|
||||
|
||||
[aliases]
|
||||
abstract = summary dc:summary description xesam:description
|
||||
author = creator dc:creator xesam:author xesam:creator
|
||||
author = creator dc:creator xesam:author xesam:creator from
|
||||
caption = title title dc:title subject
|
||||
# catg = dc:type contentCategory
|
||||
dbytes = size xesam:size
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# @(#$Id: mimeconf,v 1.44 2008-09-28 14:20:50 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# @(#$Id: mimeconf,v 1.45 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
|
||||
# Recoll : associations of mime types to processing filters.
|
||||
# There are different sections for decompression, 'interning' for indexing
|
||||
@ -26,12 +26,15 @@ application/x-bzip2 = uncompress rcluncomp bunzip2 %f %t
|
||||
# The external "exec" filters are typically scripts. They output the
|
||||
# document in simple html format, have a look at the scripts.
|
||||
[index]
|
||||
application/msword = exec rcldoc
|
||||
# Note: rcldoc did some work to splice hyphenated words at eol. Seems
|
||||
# actually not needed because antiword apparently does it too
|
||||
# application/msword = exec rcldoc
|
||||
application/msword = exec antiword -t -i 1 -m UTF-8;mimetype=text/plain;charset=utf-8
|
||||
application/ogg = exec rclogg
|
||||
application/pdf = exec rclpdf
|
||||
application/postscript = exec rclps
|
||||
application/vnd.ms-excel = exec rclxls
|
||||
application/vnd.ms-powerpoint = exec rclppt
|
||||
application/vnd.ms-excel = exec xls2csv -c " " -d utf-8;charset=utf-8;mimetype=text/plain
|
||||
application/vnd.ms-powerpoint = exec catppt -d utf-8;charset=utf-8;mimetype=text/plain
|
||||
application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
|
||||
exec rclopxml
|
||||
application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
|
||||
@ -54,7 +57,7 @@ application/vnd.sun.xml.math = exec rclsoff
|
||||
application/vnd.sun.xml.writer = exec rclsoff
|
||||
application/vnd.sun.xml.writer.global = exec rclsoff
|
||||
application/vnd.sun.xml.writer.template = exec rclsoff
|
||||
application/vnd.wordperfect = exec rclwpd
|
||||
application/vnd.wordperfect = exec wpd2html;mimetype=text/html
|
||||
application/x-abiword = exec rclabw
|
||||
application/x-dvi = exec rcldvi
|
||||
application/x-flac = exec rclflac
|
||||
@ -72,7 +75,7 @@ image/svg+xml = exec rclsvg
|
||||
message/rfc822 = internal
|
||||
text/html = internal
|
||||
text/plain = internal
|
||||
text/rtf = exec rclrtf
|
||||
text/rtf = exec unrtf --nopict --html;charset=iso-8859-1;mimetype=text/html
|
||||
text/x-c = exec rcltext
|
||||
text/x-gaim-log = exec rclgaim
|
||||
text/x-html-sidux-man = exec rclsiduxman
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user