converted to xslt
This commit is contained in:
parent
6fd41e77a5
commit
17393bad47
@ -82,88 +82,98 @@ umask 77
|
|||||||
# !! Leave the following line unmodified !
|
# !! Leave the following line unmodified !
|
||||||
#ENDRECFILTCOMMONCODE
|
#ENDRECFILTCOMMONCODE
|
||||||
|
|
||||||
checkcmds iconv sed
|
checkcmds xsltproc
|
||||||
|
|
||||||
encoding=`sed -e '/<?xml version=/s/"?>$//' \
|
xsltproc - $infile <<EOF
|
||||||
-e '/^<?xml version=/s/.*encoding="//p;D;q' \
|
<?xml version="1.0"?>
|
||||||
-e D \
|
<xsl:stylesheet version="1.0"
|
||||||
< $infile`
|
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||||
if test X$encoding = X ; then encoding=UTF-8;fi
|
xmlns:ab="http://www.abisource.com/awml.dtd"
|
||||||
|
exclude-result-prefixes="ab"
|
||||||
|
>
|
||||||
|
|
||||||
# Note: there can be newlines inside the description field, we don't want
|
<xsl:output method="html" encoding="UTF-8"/>
|
||||||
# them... Have 2 use 2 different selectors for the single-line and
|
|
||||||
# multiple-line cases because of the generic tag end (</m> for all meta
|
|
||||||
# tags)
|
|
||||||
descsedprog='
|
|
||||||
/<m key="dc.description">\([^<]*\)<\/m>/ {
|
|
||||||
s//\1/
|
|
||||||
p
|
|
||||||
q
|
|
||||||
}
|
|
||||||
/<m key="dc.description">/,/<\/m>/ {
|
|
||||||
s!.*<m key="dc.description">!!
|
|
||||||
s!</m>.*!!
|
|
||||||
H
|
|
||||||
}
|
|
||||||
${
|
|
||||||
g
|
|
||||||
s/\n/ /g
|
|
||||||
p
|
|
||||||
}
|
|
||||||
'
|
|
||||||
|
|
||||||
description=`sed -n -e "$descsedprog" < "$infile"`
|
<xsl:template match="/">
|
||||||
#echo description: "$description"
|
<html>
|
||||||
|
<head>
|
||||||
|
<xsl:apply-templates select="ab:abiword/ab:metadata"/>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
# Set program for the single line meta elements. Takes element name as
|
<!-- This is for the older abiword format with no namespaces -->
|
||||||
# parameter
|
<xsl:for-each select="abiword/section">
|
||||||
setmetasedprog() {
|
<xsl:apply-templates select="p"/>
|
||||||
metasedprog='/<m key="'$1'">/{
|
</xsl:for-each>
|
||||||
s/.*<m key="'$1'">\([^<]*\).*/\1/
|
|
||||||
'"s/\"/'/g"'
|
|
||||||
p
|
|
||||||
}'
|
|
||||||
}
|
|
||||||
|
|
||||||
setmetasedprog dc.subject
|
<!-- Newer namespaced format -->
|
||||||
subject=`sed -n -e "$metasedprog" "$infile"`
|
<xsl:for-each select="ab:abiword/ab:section">
|
||||||
#echo subject: "$subject"
|
<xsl:for-each select="ab:p">
|
||||||
|
<p><xsl:value-of select="."/></p><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:for-each>
|
||||||
|
</xsl:for-each>
|
||||||
|
|
||||||
setmetasedprog dc.title
|
</body>
|
||||||
title=`sed -n -e "$metasedprog" "$infile"`
|
</html>
|
||||||
#echo titre: "$title"
|
</xsl:template>
|
||||||
|
|
||||||
setmetasedprog abiword.keywords
|
<xsl:template match="p">
|
||||||
keywords=`sed -n -e "$metasedprog" "$infile"`
|
<p><xsl:value-of select="."/></p><xsl:text>
|
||||||
#echo keywords: "$keywords"
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
setmetasedprog dc.creator
|
<xsl:template match="ab:metadata">
|
||||||
creator=`sed -n -e "$metasedprog" "$infile"`
|
<xsl:for-each select="ab:m">
|
||||||
#echo creator: "$creator"
|
<xsl:choose>
|
||||||
|
<xsl:when test="@key = 'dc.creator'">
|
||||||
# Note: next expr supposes that paragraphs are always all by themselves on
|
<meta>
|
||||||
# a single line in the xml (no multiple <p> per line, no embedded newlines
|
<xsl:attribute name="name">author</xsl:attribute>
|
||||||
# in text).
|
<xsl:attribute name="content">
|
||||||
contentsedprog='
|
<xsl:value-of select="."/>
|
||||||
/<p[ >]/{
|
</xsl:attribute>
|
||||||
s/<[^>]*>/ /g
|
</meta><xsl:text>
|
||||||
p
|
</xsl:text>
|
||||||
}
|
</xsl:when>
|
||||||
'
|
<xsl:when test="@key = 'abiword.keywords'">
|
||||||
content=`sed -n -e "$contentsedprog" "$infile"`
|
<meta>
|
||||||
#echo content: "$content"
|
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
# output the result
|
<xsl:value-of select="."/>
|
||||||
(echo '<html><head><title>' "$title" '</title>'
|
</xsl:attribute>
|
||||||
echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
|
</meta><xsl:text>
|
||||||
echo '<meta name="description" content="' "$description $subject" '">'
|
</xsl:text>
|
||||||
echo '<meta name="keywords" content="' "$keywords" '">'
|
</xsl:when>
|
||||||
echo '<meta name="author" content="' "$creator" '">'
|
<xsl:when test="@key = 'dc.subject'">
|
||||||
echo '</head><body><pre>'
|
<meta>
|
||||||
echo "$content"
|
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||||
echo '</pre></body></html>') \
|
<xsl:attribute name="content">
|
||||||
| iconv -f $encoding -t UTF-8 -c -s
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:when>
|
||||||
|
<xsl:when test="@key = 'dc.description'">
|
||||||
|
<meta>
|
||||||
|
<xsl:attribute name="name">abstract</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:when>
|
||||||
|
<xsl:when test="@key = 'dc.title'">
|
||||||
|
<title><xsl:value-of select="."/></title><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:when>
|
||||||
|
<xsl:otherwise>
|
||||||
|
</xsl:otherwise>
|
||||||
|
</xsl:choose>
|
||||||
|
</xsl:for-each>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
</xsl:stylesheet>
|
||||||
|
EOF
|
||||||
|
|
||||||
# exit normally
|
# exit normally
|
||||||
exit 0
|
exit 0
|
||||||
|
|||||||
@ -86,7 +86,7 @@ umask 77
|
|||||||
# !! Leave the following line unmodified !
|
# !! Leave the following line unmodified !
|
||||||
#ENDRECFILTCOMMONCODE
|
#ENDRECFILTCOMMONCODE
|
||||||
|
|
||||||
checkcmds awk unzip gunzip tar
|
checkcmds unzip gunzip tar xsltproc
|
||||||
|
|
||||||
# We need a temporary directory
|
# We need a temporary directory
|
||||||
if test z"$RECOLL_TMPDIR" != z; then
|
if test z"$RECOLL_TMPDIR" != z; then
|
||||||
@ -115,7 +115,6 @@ if file $infile | grep -qi gzip ; then
|
|||||||
# Unzip the input file and change to the unzipped directory
|
# Unzip the input file and change to the unzipped directory
|
||||||
gunzip < "$infile" | (cd $tmpdir/rclkwdtmp;tar xf -)
|
gunzip < "$infile" | (cd $tmpdir/rclkwdtmp;tar xf -)
|
||||||
else
|
else
|
||||||
echo new kwd
|
|
||||||
# Unzip the input file and change to the unzipped directory
|
# Unzip the input file and change to the unzipped directory
|
||||||
unzip -q -d $tmpdir/rclkwdtmp "$infile"
|
unzip -q -d $tmpdir/rclkwdtmp "$infile"
|
||||||
fi
|
fi
|
||||||
@ -124,74 +123,98 @@ cd $tmpdir/rclkwdtmp
|
|||||||
metafile=documentinfo.xml
|
metafile=documentinfo.xml
|
||||||
contentfile=maindoc.xml
|
contentfile=maindoc.xml
|
||||||
|
|
||||||
if test -f $metafile ; then
|
echo '<html><head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">'
|
||||||
|
|
||||||
# Note: there can be newlines inside the description field, we don't want
|
if test -f $metafile ; then
|
||||||
# them...
|
xsltproc --novalid - $metafile <<EOF
|
||||||
abssedprog='/<abstract>/,/<\/abstract>/{
|
<?xml version="1.0"?>
|
||||||
s!.*<abstract>!!
|
<xsl:stylesheet version="1.0"
|
||||||
s!</abstract>.*!!
|
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||||
p
|
xmlns:kw="http://www.koffice.org/DTD/document-info"
|
||||||
}
|
exclude-result-prefixes="kw"
|
||||||
'
|
>
|
||||||
abstract=`sed -n -e "$abssedprog" < $metafile | tr '\n' ' ' | \
|
|
||||||
sed -e '1s/<!\[CDATA\[//' -e 's/\]\]>//'`
|
<xsl:output method="html" encoding="UTF-8"/>
|
||||||
subject=`sed -e "s/\"/'/" -e 's/.*<subject>\([^<]*\).*/\1/p;d' \
|
|
||||||
< $metafile`
|
<xsl:template match="/">
|
||||||
title=`sed -e "s/\"/'/" -e 's/.*<title>\([^<]*\).*/\1/p;d' \
|
<xsl:apply-templates select="kw:document-info|document-info"/>
|
||||||
< $metafile | tr '\n' ' '`
|
</xsl:template>
|
||||||
keywords=`sed -e "s/\"/'/" -e 's/.*<keyword>\([^<]*\).*/\1/p;d' \
|
<xsl:template match="/kw:document-info|/document-info">
|
||||||
< $metafile`
|
<xsl:apply-templates select="kw:author|author"/>
|
||||||
|
<xsl:apply-templates select="kw:about/kw:abstract|abstract"/>
|
||||||
|
<xsl:apply-templates select="kw:about/kw:title|title"/>
|
||||||
|
<xsl:apply-templates select="kw:about/kw:keyword|keyword"/>
|
||||||
|
<xsl:apply-templates select="kw:about/kw:subject|subject"/>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="kw:author|author">
|
||||||
|
<meta>
|
||||||
|
<xsl:attribute name="name">author</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="kw:full-name|full-name"/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="kw:abstract|abstract">
|
||||||
|
<meta>
|
||||||
|
<xsl:attribute name="name">abtract</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="kw:keyword|kw:subject|keyword|subject">
|
||||||
|
<meta>
|
||||||
|
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="kw:title|title">
|
||||||
|
<title><xsl:value-of select="."/></title><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
</xsl:stylesheet>
|
||||||
|
|
||||||
|
EOF
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Note: next expr inserts a newline at each end of paragraph (for preview)
|
echo '</head><body>'
|
||||||
content="`sed -e 's!</TEXT>!\\
|
xsltproc --novalid - $contentfile <<EOF
|
||||||
!g' -e 's/<[^>]*>/ /g' < $contentfile | sed -e '/^[ ]*$/d'`"
|
<?xml version="1.0"?>
|
||||||
|
<xsl:stylesheet version="1.0"
|
||||||
|
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||||
|
xmlns:kw="http://www.koffice.org/DTD/kword"
|
||||||
|
exclude-result-prefixes="kw"
|
||||||
|
>
|
||||||
|
|
||||||
#echo abstract "$abstract"
|
<xsl:output method="html" encoding="UTF-8"/>
|
||||||
#echo subject "$subject"
|
|
||||||
#echo title "$title"
|
|
||||||
#echo keywords "$keywords"
|
|
||||||
#echo content "$content"
|
|
||||||
|
|
||||||
# output the result
|
<xsl:template match="/">
|
||||||
echo '<html><head>'
|
<xsl:apply-templates select="//kw:TEXT|//TEXT"/>
|
||||||
echo '<title>' "$title" '</title>'
|
</xsl:template>
|
||||||
echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
|
|
||||||
echo '<meta name="abstract" content="' "$abstract $subject" '">'
|
|
||||||
echo '<meta name="keywords" content="' "$keywords" '">'
|
|
||||||
echo '</head><body><p>'
|
|
||||||
|
|
||||||
# The strange 'BEGIN' setup is to prevent 'file' from thinking this file
|
<xsl:template match="kw:TEXT|TEXT">
|
||||||
# is an awk program
|
<xsl:if test="normalize-space(.) != ''">
|
||||||
echo "$content" | sed -e "s/'/'/g" -e 's/"/"/g' |\
|
<p><xsl:value-of select="."/></p><xsl:text>
|
||||||
awk 'BEGIN'\
|
</xsl:text>
|
||||||
' {
|
</xsl:if>
|
||||||
cont = ""
|
</xsl:template>
|
||||||
}
|
|
||||||
{
|
|
||||||
$0 = cont $0
|
|
||||||
cont = ""
|
|
||||||
|
|
||||||
if ($0 ~ /[-]$/) {
|
</xsl:stylesheet>
|
||||||
# Note : soft-hyphen is iso8859 0xad
|
|
||||||
# Break at last whitespace
|
|
||||||
match($0, "[ \t][^ \t]+$")
|
|
||||||
line = substr($0, 0, RSTART)
|
|
||||||
cont = substr($0, RSTART, RLENGTH-1)
|
|
||||||
$0 = line
|
|
||||||
}
|
|
||||||
|
|
||||||
if($0 == "\f") {
|
EOF
|
||||||
print "</p>\n<hr>\n<p>"
|
|
||||||
next
|
|
||||||
}
|
|
||||||
|
|
||||||
print $0 "<br>"
|
echo '</body></html>'
|
||||||
}
|
|
||||||
END {
|
|
||||||
printf("</p></body></html>\n");
|
|
||||||
}' | iconv -f UTF-8 -t UTF-8 -c -s
|
|
||||||
|
|
||||||
cd /
|
cd /
|
||||||
# exit normally
|
# exit normally
|
||||||
|
|||||||
@ -13,6 +13,9 @@
|
|||||||
# We just hack into the scribus XML, taking advantage that the tag of
|
# We just hack into the scribus XML, taking advantage that the tag of
|
||||||
# interest is apparently always output on a single line.
|
# interest is apparently always output on a single line.
|
||||||
# The text seems to be found in attribute CH of tag ITEXT, it is utf-8
|
# The text seems to be found in attribute CH of tag ITEXT, it is utf-8
|
||||||
|
#
|
||||||
|
# Tried to convert this to xsltproc but it seems that quite a few
|
||||||
|
# Scribus document are not actually proper xml
|
||||||
|
|
||||||
# set variables
|
# set variables
|
||||||
LANG=C ; export LANG
|
LANG=C ; export LANG
|
||||||
@ -22,8 +25,6 @@ filetype=Scribus
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#RECFILTCOMMONCODE
|
#RECFILTCOMMONCODE
|
||||||
##############################################################################
|
##############################################################################
|
||||||
# !! Leave the previous line unmodified!! Code imported from the
|
# !! Leave the previous line unmodified!! Code imported from the
|
||||||
|
|||||||
@ -18,10 +18,6 @@ progname="rclsoff"
|
|||||||
filetype=openoffice
|
filetype=openoffice
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#RECFILTCOMMONCODE
|
#RECFILTCOMMONCODE
|
||||||
##############################################################################
|
##############################################################################
|
||||||
# !! Leave the previous line unmodified!! Code imported from the
|
# !! Leave the previous line unmodified!! Code imported from the
|
||||||
@ -88,7 +84,7 @@ umask 77
|
|||||||
# !! Leave the following line unmodified !
|
# !! Leave the following line unmodified !
|
||||||
#ENDRECFILTCOMMONCODE
|
#ENDRECFILTCOMMONCODE
|
||||||
|
|
||||||
checkcmds awk iconv unzip
|
checkcmds xsltproc
|
||||||
|
|
||||||
# We need a temporary directory
|
# We need a temporary directory
|
||||||
if test z"$RECOLL_TMPDIR" != z; then
|
if test z"$RECOLL_TMPDIR" != z; then
|
||||||
@ -116,92 +112,98 @@ trap cleanup EXIT HUP QUIT INT TERM
|
|||||||
unzip -q -d $tmpdir/rclsofftmp "$infile"
|
unzip -q -d $tmpdir/rclsofftmp "$infile"
|
||||||
cd $tmpdir/rclsofftmp
|
cd $tmpdir/rclsofftmp
|
||||||
|
|
||||||
# Note: there can be newlines inside the description field, we don't want
|
echo '<html><head>
|
||||||
# them...
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">'
|
||||||
descsedprog='/<dc:description>/,/<\/dc:description>/{
|
|
||||||
s!.*<dc:description>!!
|
|
||||||
s!</dc:description>.*!!
|
|
||||||
H
|
|
||||||
${
|
|
||||||
g
|
|
||||||
s/\n/ /g
|
|
||||||
p
|
|
||||||
}
|
|
||||||
}
|
|
||||||
'
|
|
||||||
description=`sed -n -e "$descsedprog" meta.xml`
|
|
||||||
#echo description "$description"
|
|
||||||
|
|
||||||
# Takes tag name as parameter and creates sed program to extract single
|
xsltproc - meta.xml <<EOF
|
||||||
# line meta tags values.
|
<?xml version="1.0"?>
|
||||||
setmetasedprog() {
|
<xsl:stylesheet version="1.0"
|
||||||
metasedprog="s/\"/'/g"'
|
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||||
/.*<'"$1"'>\([^<]*\).*/s//\1/p
|
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
|
||||||
'
|
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||||
}
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
|
||||||
|
xmlns:ooo="http://openoffice.org/2004/office"
|
||||||
|
exclude-result-prefixes="office xlink meta ooo dc"
|
||||||
|
>
|
||||||
|
|
||||||
setmetasedprog dc:subject
|
<xsl:output method="html" encoding="UTF-8"/>
|
||||||
subject=`sed -n -e "$metasedprog" meta.xml`
|
|
||||||
#echo subject: $subject
|
|
||||||
|
|
||||||
setmetasedprog dc:title
|
<xsl:template match="/office:document-meta">
|
||||||
title=`sed -n -e "$metasedprog" meta.xml`
|
<xsl:apply-templates select="office:meta/dc:description"/>
|
||||||
#echo title: $title
|
<xsl:apply-templates select="office:meta/dc:subject"/>
|
||||||
|
<xsl:apply-templates select="office:meta/dc:title"/>
|
||||||
|
<xsl:apply-templates select="office:meta/meta:keyword"/>
|
||||||
|
<xsl:apply-templates select="office:meta/dc:creator"/>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
setmetasedprog meta:keyword
|
<xsl:template match="dc:title">
|
||||||
keywords=`sed -n -e "$metasedprog" meta.xml`
|
<title> <xsl:value-of select="."/> </title><xsl:text>
|
||||||
#echo keywords: $keywords
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
setmetasedprog dc:creator
|
<xsl:template match="dc:description">
|
||||||
creator=`sed -n -e "$metasedprog" meta.xml`
|
<meta>
|
||||||
#echo creator: $creator
|
<xsl:attribute name="name">abstract</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
# Note: next expr inserts a newline at each end of paragraph (for preview)
|
<xsl:template match="dc:subject">
|
||||||
content="`sed -e 's!</text:p>!\\
|
<meta>
|
||||||
!g' -e 's/<[^>]*>/ /g' < content.xml`"
|
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
#echo content "$content"
|
<xsl:template match="dc:creator">
|
||||||
|
<meta>
|
||||||
|
<xsl:attribute name="name">author</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
# output the result
|
<xsl:template match="meta:keyword">
|
||||||
echo '<html><head>'
|
<meta>
|
||||||
echo '<title>' "$title" '</title>'
|
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||||
echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
|
<xsl:attribute name="content">
|
||||||
echo '<meta name="description" content="' "$description $subject" '">'
|
<xsl:value-of select="."/>
|
||||||
echo '<meta name="keywords" content="' "$keywords" '">'
|
</xsl:attribute>
|
||||||
echo '<meta name="author" content="' "$creator" '">'
|
</meta><xsl:text>
|
||||||
echo '</head><body><p>'
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
# The strange 'BEGIN' setup is to prevent 'file' from thinking this file
|
</xsl:stylesheet>
|
||||||
# is an awk program
|
EOF
|
||||||
echo "$content" | sed -e "s/'/'/g" -e 's/"/"/g' |\
|
|
||||||
awk 'BEGIN'\
|
|
||||||
' {
|
|
||||||
cont = ""
|
|
||||||
}
|
|
||||||
{
|
|
||||||
$0 = cont $0
|
|
||||||
cont = ""
|
|
||||||
|
|
||||||
if ($0 ~ /[-]$/) {
|
echo '</head><body>'
|
||||||
# Note : soft-hyphen is iso8859 0xad
|
|
||||||
# Break at last whitespace
|
|
||||||
match($0, "[ \t][^ \t]+$")
|
|
||||||
line = substr($0, 0, RSTART)
|
|
||||||
cont = substr($0, RSTART, RLENGTH-1)
|
|
||||||
$0 = line
|
|
||||||
}
|
|
||||||
|
|
||||||
if($0 == "\f") {
|
xsltproc - content.xml <<EOF
|
||||||
print "</p>\n<hr>\n<p>"
|
<?xml version="1.0"?>
|
||||||
next
|
<xsl:stylesheet version="1.0"
|
||||||
}
|
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||||
|
xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
|
||||||
|
exclude-result-prefixes="text"
|
||||||
|
>
|
||||||
|
|
||||||
print $0 "<br>"
|
<xsl:output method="html" encoding="UTF-8"/>
|
||||||
}
|
|
||||||
END {
|
|
||||||
printf("</p></body></html>\n");
|
|
||||||
}' | iconv -f UTF-8 -t UTF-8 -c -s
|
|
||||||
|
|
||||||
|
<xsl:template match="text:p">
|
||||||
|
<p><xsl:value-of select="."/></p><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
</xsl:stylesheet>
|
||||||
|
EOF
|
||||||
|
echo '</body></html>'
|
||||||
cd /
|
cd /
|
||||||
# exit normally
|
|
||||||
exit 0
|
exit 0
|
||||||
|
|||||||
@ -1,10 +1,5 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
# @(#$Id: rclsvg,v 1.3 2008-10-08 08:27:34 dockes Exp $ (C) 2004 J.F.Dockes
|
|
||||||
# Parts taken from Estraier:
|
|
||||||
#================================================================
|
|
||||||
# Estraier: a personal full-text search system
|
|
||||||
# Copyright (C) 2003-2004 Mikio Hirabayashi
|
|
||||||
#================================================================
|
|
||||||
#================================================================
|
#================================================================
|
||||||
# Extract text from a Scalable Vector Graphics file
|
# Extract text from a Scalable Vector Graphics file
|
||||||
#================================================================
|
#================================================================
|
||||||
@ -82,56 +77,51 @@ umask 77
|
|||||||
# !! Leave the following line unmodified !
|
# !! Leave the following line unmodified !
|
||||||
#ENDRECFILTCOMMONCODE
|
#ENDRECFILTCOMMONCODE
|
||||||
|
|
||||||
checkcmds iconv sed
|
checkcmds xsltproc
|
||||||
|
|
||||||
encoding=`sed -ne '/<?xml/s/.*encoding="\([^"]*\).*/\1/p' < $infile`
|
xsltproc - $infile <<EOF
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<xsl:stylesheet version="1.0"
|
||||||
|
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||||
|
xmlns:svg="http://www.w3.org/2000/svg"
|
||||||
|
exclude-result-prefixes="svg"
|
||||||
|
>
|
||||||
|
|
||||||
if test X$encoding = X ; then encoding=UTF-8;fi
|
<xsl:output method="html" encoding="UTF-8"/>
|
||||||
|
|
||||||
# We use several sed instances to make our life easier. Not good for
|
<xsl:template match="/">
|
||||||
# performance, and a sed guru might be able to do better.
|
<html>
|
||||||
#
|
<head>
|
||||||
# The first sed makes sure each tag starts on a new line
|
<xsl:apply-templates select="svg:svg/svg:title"/>
|
||||||
# The second one selects the tags we're interested in.
|
<xsl:apply-templates select="svg:svg/svg:desc"/>
|
||||||
# The last strips the tags, leaving only text.
|
</head>
|
||||||
#
|
<body>
|
||||||
# The whole thing wholly ignore issues like '<' inside quoted strings.
|
<xsl:apply-templates select="//svg:text"/>
|
||||||
#
|
</body>
|
||||||
# We could/should add code to explicitly separate title and other
|
</html>
|
||||||
# metadata elements.
|
</xsl:template>
|
||||||
|
|
||||||
# Insert new line before each tag
|
<xsl:template match="svg:desc">
|
||||||
sptagonline='s/</\
|
<meta>
|
||||||
</g'
|
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
# Select tags
|
<xsl:template match="svg:title">
|
||||||
spselecttags='/<title/,/<\/title>/p
|
<title><xsl:value-of select="."/></title><xsl:text>
|
||||||
/<desc/,/<\/desc>/p
|
</xsl:text>
|
||||||
/<metadata/,/<\/metadata>/p
|
</xsl:template>
|
||||||
/<text/,/<\/text>/p'
|
|
||||||
|
<xsl:template match="svg:text">
|
||||||
|
<p><xsl:value-of select="."/></p><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
# Strip tags
|
</xsl:stylesheet>
|
||||||
spstriptags='#n
|
EOF
|
||||||
/</{
|
|
||||||
:c
|
|
||||||
/>/!{
|
|
||||||
N
|
|
||||||
b c
|
|
||||||
}
|
|
||||||
/>/s/<.*>//g
|
|
||||||
}
|
|
||||||
/^[ ]*$/!p'
|
|
||||||
|
|
||||||
content=`sed -e "$sptagonline" < $infile | sed -ne "$spselecttags" | \
|
|
||||||
sed -ne "$spstriptags"`
|
|
||||||
|
|
||||||
(echo '<html><head>'
|
|
||||||
echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
|
|
||||||
echo '</head><body><pre>'
|
|
||||||
echo "$content"
|
|
||||||
echo '</pre></body></html>') \
|
|
||||||
| iconv -f $encoding -t UTF-8 -c -s
|
|
||||||
|
|
||||||
|
|
||||||
# exit normally
|
|
||||||
exit 0
|
exit 0
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user