improved rclid3 and rclogg
This commit is contained in:
parent
954de37067
commit
49388c8748
@ -1,5 +1,5 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
# @(#$Id: rclabw,v 1.2 2007-06-15 11:41:50 dockes Exp $ (C) 2004 J.F.Dockes
|
# @(#$Id: rclabw,v 1.3 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
|
||||||
# Parts taken from Estraier:
|
# Parts taken from Estraier:
|
||||||
#================================================================
|
#================================================================
|
||||||
# Estraier: a personal full-text search system
|
# Estraier: a personal full-text search system
|
||||||
@ -83,13 +83,6 @@ umask 77
|
|||||||
|
|
||||||
checkcmds iconv sed
|
checkcmds iconv sed
|
||||||
|
|
||||||
# check the input file existence
|
|
||||||
if test ! -f "$infile"
|
|
||||||
then
|
|
||||||
printf '%s: %s: no such file\n' "$progname" "$infile"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
encoding=`sed -e '/<?xml version=/s/"?>$//' \
|
encoding=`sed -e '/<?xml version=/s/"?>$//' \
|
||||||
-e '/^<?xml version=/s/.*encoding="//p;D;q' \
|
-e '/^<?xml version=/s/.*encoding="//p;D;q' \
|
||||||
-e D \
|
-e D \
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
# @(#$Id: rcldjvu,v 1.5 2007-06-08 13:51:08 dockes Exp $ (C) 2005 J.F.Dockes
|
# @(#$Id: rcldjvu,v 1.6 2008-10-08 08:27:34 dockes Exp $ (C) 2005 J.F.Dockes
|
||||||
|
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License as published by
|
# it under the terms of the GNU General Public License as published by
|
||||||
@ -103,13 +103,6 @@ umask 77
|
|||||||
|
|
||||||
checkcmds djvutxt djvused awk
|
checkcmds djvutxt djvused awk
|
||||||
|
|
||||||
# check the input file existence
|
|
||||||
if test ! -f "$infile"
|
|
||||||
then
|
|
||||||
printf '%s: %s: no such file\n' "$progname" "$infile"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Title: we try to extract it from the annotations. djvused outputs string
|
# Title: we try to extract it from the annotations. djvused outputs string
|
||||||
# in C/awk \-escaped notation. Awk can only process this in string
|
# in C/awk \-escaped notation. Awk can only process this in string
|
||||||
# constants, so we have a first awk pass to create an awk program to parse
|
# constants, so we have a first awk pass to create an awk program to parse
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
# @(#$Id: rclid3,v 1.2 2007-11-09 11:54:59 dockes Exp $ (C) 2004 J.F.Dockes
|
# @(#$Id: rclid3,v 1.3 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
|
||||||
#================================================================
|
#================================================================
|
||||||
# rclid3
|
# rclid3
|
||||||
# Handle audio files for recoll. This uses id3info to extract tags
|
# Handle audio files for recoll. This uses id3info to extract tags
|
||||||
@ -83,25 +83,52 @@ checkcmds id3info
|
|||||||
echo '<html><head>'
|
echo '<html><head>'
|
||||||
#echo '<title>' "$title" '</title>'
|
#echo '<title>' "$title" '</title>'
|
||||||
echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
|
echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
|
||||||
echo '</head><body>'
|
|
||||||
echo '<pre>'
|
|
||||||
|
|
||||||
nocaptionexp='s/===.*://'
|
nocaptionexp='s/===.*://'
|
||||||
kpcaptionexp='s/===[^(]*//'
|
|
||||||
|
|
||||||
if test X$RECOLL_FILTER_FORPREVIEW = Xyes ; then
|
if test X$RECOLL_FILTER_FORPREVIEW = Xyes ; then
|
||||||
captionexp=$kpcaptionexp
|
echo '</head><body><pre>'
|
||||||
else
|
|
||||||
captionexp=$nocaptionexp
|
|
||||||
fi
|
|
||||||
|
|
||||||
id3info "$infile" | \
|
id3info "$infile" | \
|
||||||
sed -e '/Tag information for/d' \
|
sed -e '/Tag information for/d' \
|
||||||
-e "$captionexp" \
|
-e 's/===[^(]*//' \
|
||||||
-e 's/</</g' -e 's/&/&/g' \
|
-e 's/</</g' -e 's/&/&/g' -e 's/"/"/g' -e 's/>/>/g'
|
||||||
|
echo '</pre></body></html>'
|
||||||
|
|
||||||
echo '</pre>'
|
else
|
||||||
echo '</body></html>'
|
# Indexing. There are many other fields which we could process this way.
|
||||||
|
# Would also need that some fields are set up in the "fields" config
|
||||||
|
# file to be really useful
|
||||||
|
id3info "$infile" | awk -F: \
|
||||||
|
'{
|
||||||
|
value=""
|
||||||
|
for (i = 2; i <=NF; i++) {
|
||||||
|
value = value " " $i
|
||||||
|
}
|
||||||
|
sub("&", "&", value)
|
||||||
|
sub("\"", """, value)
|
||||||
|
sub("<", "<", value)
|
||||||
|
sub(">", ">", value)
|
||||||
|
}
|
||||||
|
/TIT2/{
|
||||||
|
printf "<meta name=\"title\" content=\"%s\">\n", value
|
||||||
|
body=body value "\n"
|
||||||
|
next
|
||||||
|
}
|
||||||
|
/TPE1/{
|
||||||
|
printf "<meta name=\"artist\" content=\"%s\">\n", value
|
||||||
|
body=body value "\n"
|
||||||
|
next
|
||||||
|
}
|
||||||
|
/TALB/{
|
||||||
|
printf "<meta name=\"album\" content=\"%s\">\n", value
|
||||||
|
body=body value "\n"
|
||||||
|
next
|
||||||
|
}
|
||||||
|
{
|
||||||
|
body=body value "\n"
|
||||||
|
}
|
||||||
|
END{print "</head><body><pre>";print body;print "</pre></body></html>"}
|
||||||
|
'
|
||||||
|
fi
|
||||||
|
|
||||||
# exit normally
|
|
||||||
exit 0
|
exit 0
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
# @(#$Id: rclkwd,v 1.1 2007-06-08 14:01:30 dockes Exp $ (C) 2004 J.F.Dockes
|
# @(#$Id: rclkwd,v 1.2 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
|
||||||
# Parts taken from Estraier:
|
# Parts taken from Estraier:
|
||||||
#================================================================
|
#================================================================
|
||||||
# Estraier: a personal full-text search system
|
# Estraier: a personal full-text search system
|
||||||
@ -87,13 +87,6 @@ umask 77
|
|||||||
|
|
||||||
checkcmds awk unzip gunzip tar
|
checkcmds awk unzip gunzip tar
|
||||||
|
|
||||||
# check the input file existence
|
|
||||||
if test ! -f "$infile"
|
|
||||||
then
|
|
||||||
printf '%s: %s: no such file\n' "$progname" "$infile"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# We need a temporary directory
|
# We need a temporary directory
|
||||||
if test z"$RECOLL_TMPDIR" != z; then
|
if test z"$RECOLL_TMPDIR" != z; then
|
||||||
ttdir=$RECOLL_TMPDIR
|
ttdir=$RECOLL_TMPDIR
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
# @(#$Id: rclogg,v 1.1 2007-10-02 14:00:47 dockes Exp $ (C) 2007 J.F.Dockes
|
# @(#$Id: rclogg,v 1.2 2008-10-08 08:27:34 dockes Exp $ (C) 2007 J.F.Dockes
|
||||||
#================================================================
|
#================================================================
|
||||||
# rclogg
|
# rclogg
|
||||||
# Handle ogg audio files for recoll.
|
# Handle ogg audio files for recoll.
|
||||||
@ -74,33 +74,37 @@ fi
|
|||||||
# !! Leave the following line unmodified !
|
# !! Leave the following line unmodified !
|
||||||
#ENDRECFILTCOMMONCODE
|
#ENDRECFILTCOMMONCODE
|
||||||
|
|
||||||
checkcmds ogginfo
|
checkcmds ogginfo sed awk
|
||||||
|
|
||||||
# output the result
|
# output the result
|
||||||
echo '<html><head>'
|
echo '<html><head>'
|
||||||
#echo '<title>' "$title" '</title>'
|
|
||||||
echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
|
echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
|
||||||
echo '</head><body>'
|
|
||||||
echo '<pre>'
|
|
||||||
|
|
||||||
nocaptionexp='/=/s/.*=//p'
|
if test X$RECOLL_FILTER_FORPREVIEW = Xyes
|
||||||
kpcaptionexp='/=/p'
|
then
|
||||||
|
|
||||||
|
echo '</head><body><pre>'
|
||||||
|
ogginfo "$infile" | grep '=' | sed -e 's/&/&/g' -e 's/"/"/g' \
|
||||||
|
-e 's/</</g' -e 's/>/>/g'
|
||||||
|
echo '</pre></body></html>'
|
||||||
|
|
||||||
if test X$RECOLL_FILTER_FORPREVIEW = Xyes ; then
|
|
||||||
captionexp=$kpcaptionexp
|
|
||||||
else
|
else
|
||||||
captionexp=$nocaptionexp
|
ogginfo "$infile" | grep '=' | awk -F= \
|
||||||
|
'{sub("^[ ]+", "", $1)
|
||||||
|
sub("&", "&", $2)
|
||||||
|
sub("\"", """, $2)
|
||||||
|
sub("<", "<", $2)
|
||||||
|
sub(">", ">", $2)
|
||||||
|
printf "<meta name=\"%s\" content=\"%s\">\n", $1, $2
|
||||||
|
body=body $2 "\n"
|
||||||
|
}
|
||||||
|
END{print "</head><body><pre>";print body;print "</pre></body></html>"}'
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Note: ogginfo output is unparseable: the COMMENT field can be
|
# Note: ogginfo output is unparseable: the COMMENT field can be
|
||||||
# multiline, but there is no way to detect the end of the 'User
|
# multiline, but there is no way to detect the end of the 'User
|
||||||
# comments' area
|
# comments' area
|
||||||
ogginfo "$infile" | \
|
|
||||||
sed -n \
|
|
||||||
-e "$captionexp"
|
|
||||||
|
|
||||||
echo '</pre>'
|
|
||||||
echo '</body></html>'
|
|
||||||
|
|
||||||
# exit normally
|
# exit normally
|
||||||
exit 0
|
exit 0
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
# @(#$Id: rclopxml,v 1.2 2008-09-01 17:31:47 dockes Exp $ (C) 2004 J.F.Dockes
|
# @(#$Id: rclopxml,v 1.3 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
|
||||||
#================================================================
|
#================================================================
|
||||||
# rcldocx
|
# rcldocx
|
||||||
# Extract text from an openxml msword file (will be extended for spreadsheets)
|
# Extract text from an openxml msword file (will be extended for spreadsheets)
|
||||||
@ -80,13 +80,6 @@ umask 77
|
|||||||
|
|
||||||
checkcmds xsltproc unzip
|
checkcmds xsltproc unzip
|
||||||
|
|
||||||
# check the input file existence
|
|
||||||
if test ! -f "$infile"
|
|
||||||
then
|
|
||||||
printf '%s: %s: no such file\n' "$progname" "$infile"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# We need a temporary directory
|
# We need a temporary directory
|
||||||
if test z"$RECOLL_TMPDIR" != z; then
|
if test z"$RECOLL_TMPDIR" != z; then
|
||||||
ttdir=$RECOLL_TMPDIR
|
ttdir=$RECOLL_TMPDIR
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
# @(#$Id: rclppt,v 1.3 2007-06-08 13:51:09 dockes Exp $ (C) 2004 J.F.Dockes
|
# @(#$Id: rclppt,v 1.4 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License as published by
|
# it under the terms of the GNU General Public License as published by
|
||||||
# the Free Software Foundation; either version 2 of the License, or
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
@ -100,10 +100,6 @@ umask 77
|
|||||||
#ENDRECFILTCOMMONCODE
|
#ENDRECFILTCOMMONCODE
|
||||||
|
|
||||||
checkcmds catppt
|
checkcmds catppt
|
||||||
if test X$cmdsok = X0 ; then
|
|
||||||
printf "Catppt not found"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# output the result
|
# output the result
|
||||||
echo '<html><head>'
|
echo '<html><head>'
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
# @(#$Id: rclsoff,v 1.11 2007-06-15 11:41:50 dockes Exp $ (C) 2004 J.F.Dockes
|
# @(#$Id: rclsoff,v 1.12 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
|
||||||
# Parts taken from Estraier:
|
# Parts taken from Estraier:
|
||||||
#================================================================
|
#================================================================
|
||||||
# Estraier: a personal full-text search system
|
# Estraier: a personal full-text search system
|
||||||
@ -89,13 +89,6 @@ umask 77
|
|||||||
|
|
||||||
checkcmds awk iconv unzip
|
checkcmds awk iconv unzip
|
||||||
|
|
||||||
# check the input file existence
|
|
||||||
if test ! -f "$infile"
|
|
||||||
then
|
|
||||||
printf '%s: %s: no such file\n' "$progname" "$infile"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# We need a temporary directory
|
# We need a temporary directory
|
||||||
if test z"$RECOLL_TMPDIR" != z; then
|
if test z"$RECOLL_TMPDIR" != z; then
|
||||||
ttdir=$RECOLL_TMPDIR
|
ttdir=$RECOLL_TMPDIR
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
# @(#$Id: rclsvg,v 1.2 2008-02-03 16:05:57 dockes Exp $ (C) 2004 J.F.Dockes
|
# @(#$Id: rclsvg,v 1.3 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
|
||||||
# Parts taken from Estraier:
|
# Parts taken from Estraier:
|
||||||
#================================================================
|
#================================================================
|
||||||
# Estraier: a personal full-text search system
|
# Estraier: a personal full-text search system
|
||||||
@ -83,13 +83,6 @@ umask 77
|
|||||||
|
|
||||||
checkcmds iconv sed
|
checkcmds iconv sed
|
||||||
|
|
||||||
# check the input file existence
|
|
||||||
if test ! -f "$infile"
|
|
||||||
then
|
|
||||||
printf '%s: %s: no such file\n' "$progname" "$infile"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
encoding=`sed -ne '/<?xml/s/.*encoding="\([^"]*\).*/\1/p' < $infile`
|
encoding=`sed -ne '/<?xml/s/.*encoding="\([^"]*\).*/\1/p' < $infile`
|
||||||
|
|
||||||
if test X$encoding = X ; then encoding=UTF-8;fi
|
if test X$encoding = X ; then encoding=UTF-8;fi
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
# @(#$Id: rclxls,v 1.4 2007-06-08 13:51:09 dockes Exp $ (C) 2004 J.F.Dockes
|
# @(#$Id: rclxls,v 1.5 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License as published by
|
# it under the terms of the GNU General Public License as published by
|
||||||
# the Free Software Foundation; either version 2 of the License, or
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
@ -100,17 +100,7 @@ umask 77
|
|||||||
# !! Leave the following line unmodified !
|
# !! Leave the following line unmodified !
|
||||||
#ENDRECFILTCOMMONCODE
|
#ENDRECFILTCOMMONCODE
|
||||||
|
|
||||||
# check the input file existence
|
|
||||||
if test ! -f "$infile"
|
|
||||||
then
|
|
||||||
printf '%s: %s: no such file\n' "$progname" "$infile"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
checkcmds xls2csv
|
checkcmds xls2csv
|
||||||
if test X$cmdsok = X0 ; then
|
|
||||||
printf "xls2csv not found"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# output the result
|
# output the result
|
||||||
echo '<html><head>'
|
echo '<html><head>'
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
# @(#$Id: fields,v 1.4 2008-10-07 06:44:23 dockes Exp $ (C) 2007 J.F.Dockes
|
# @(#$Id: fields,v 1.5 2008-10-08 08:27:34 dockes Exp $ (C) 2007 J.F.Dockes
|
||||||
# Field names configuration. This defines how one may search ie for
|
# Field names configuration. This defines how one may search ie for
|
||||||
# author:Hemingway
|
# author:Hemingway
|
||||||
# Important:
|
# Important:
|
||||||
@ -53,7 +53,7 @@ stored = author
|
|||||||
|
|
||||||
[aliases]
|
[aliases]
|
||||||
abstract = summary dc:summary description xesam:description
|
abstract = summary dc:summary description xesam:description
|
||||||
author = creator dc:creator xesam:author xesam:creator
|
author = creator dc:creator xesam:author xesam:creator from
|
||||||
caption = title title dc:title subject
|
caption = title title dc:title subject
|
||||||
# catg = dc:type contentCategory
|
# catg = dc:type contentCategory
|
||||||
dbytes = size xesam:size
|
dbytes = size xesam:size
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
# @(#$Id: mimeconf,v 1.44 2008-09-28 14:20:50 dockes Exp $ (C) 2004 J.F.Dockes
|
# @(#$Id: mimeconf,v 1.45 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
|
||||||
|
|
||||||
# Recoll : associations of mime types to processing filters.
|
# Recoll : associations of mime types to processing filters.
|
||||||
# There are different sections for decompression, 'interning' for indexing
|
# There are different sections for decompression, 'interning' for indexing
|
||||||
@ -26,12 +26,15 @@ application/x-bzip2 = uncompress rcluncomp bunzip2 %f %t
|
|||||||
# The external "exec" filters are typically scripts. They output the
|
# The external "exec" filters are typically scripts. They output the
|
||||||
# document in simple html format, have a look at the scripts.
|
# document in simple html format, have a look at the scripts.
|
||||||
[index]
|
[index]
|
||||||
application/msword = exec rcldoc
|
# Note: rcldoc did some work to splice hyphenated words at eol. Seems
|
||||||
|
# actually not needed because antiword apparently does it too
|
||||||
|
# application/msword = exec rcldoc
|
||||||
|
application/msword = exec antiword -t -i 1 -m UTF-8;mimetype=text/plain;charset=utf-8
|
||||||
application/ogg = exec rclogg
|
application/ogg = exec rclogg
|
||||||
application/pdf = exec rclpdf
|
application/pdf = exec rclpdf
|
||||||
application/postscript = exec rclps
|
application/postscript = exec rclps
|
||||||
application/vnd.ms-excel = exec rclxls
|
application/vnd.ms-excel = exec xls2csv -c " " -d utf-8;charset=utf-8;mimetype=text/plain
|
||||||
application/vnd.ms-powerpoint = exec rclppt
|
application/vnd.ms-powerpoint = exec catppt -d utf-8;charset=utf-8;mimetype=text/plain
|
||||||
application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
|
application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
|
||||||
exec rclopxml
|
exec rclopxml
|
||||||
application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
|
application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
|
||||||
@ -54,7 +57,7 @@ application/vnd.sun.xml.math = exec rclsoff
|
|||||||
application/vnd.sun.xml.writer = exec rclsoff
|
application/vnd.sun.xml.writer = exec rclsoff
|
||||||
application/vnd.sun.xml.writer.global = exec rclsoff
|
application/vnd.sun.xml.writer.global = exec rclsoff
|
||||||
application/vnd.sun.xml.writer.template = exec rclsoff
|
application/vnd.sun.xml.writer.template = exec rclsoff
|
||||||
application/vnd.wordperfect = exec rclwpd
|
application/vnd.wordperfect = exec wpd2html;mimetype=text/html
|
||||||
application/x-abiword = exec rclabw
|
application/x-abiword = exec rclabw
|
||||||
application/x-dvi = exec rcldvi
|
application/x-dvi = exec rcldvi
|
||||||
application/x-flac = exec rclflac
|
application/x-flac = exec rclflac
|
||||||
@ -72,7 +75,7 @@ image/svg+xml = exec rclsvg
|
|||||||
message/rfc822 = internal
|
message/rfc822 = internal
|
||||||
text/html = internal
|
text/html = internal
|
||||||
text/plain = internal
|
text/plain = internal
|
||||||
text/rtf = exec rclrtf
|
text/rtf = exec unrtf --nopict --html;charset=iso-8859-1;mimetype=text/html
|
||||||
text/x-c = exec rcltext
|
text/x-c = exec rcltext
|
||||||
text/x-gaim-log = exec rclgaim
|
text/x-gaim-log = exec rclgaim
|
||||||
text/x-html-sidux-man = exec rclsiduxman
|
text/x-html-sidux-man = exec rclsiduxman
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user