internal xslt: support multiple archive members containing metadata or body text. Apply to indexing openxml foot/endnotes

This commit is contained in:
Jean-Francois Dockes 2020-04-22 14:04:20 +02:00
parent ad03540394
commit 19149020e3
6 changed files with 288 additions and 159 deletions

View File

@ -6,7 +6,7 @@
PPA_KEYID=D38B9201
RCLVERS=1.27.0pre1
RCLVERS=1.27.0pre3
SCOPEVERS=1.20.2.4
GSSPVERS=1.0.0
PPAVERS=2

View File

@ -1,9 +1,10 @@
recoll (1.27.0pre2-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
recoll (1.27.0pre3-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
* Support for language-sensitive analysis of Korean text
* Index docx endnotes and footnotes
* Other small improvements.
-- Jean-Francois Dockes <jf@dockes.org> Thu, 21 Apr 2020 15:32:00 +0100
-- Jean-Francois Dockes <jf@dockes.org> Tue, 21 Apr 2020 15:32:00 +0100
recoll (1.26.6-1~ppaPPAVERS~SERIES1) SERIES; urgency=low

View File

@ -1 +1 @@
1.27.0pre2
1.27.0pre3

View File

@ -47,13 +47,13 @@ public:
virtual ~FileScanXML() {
if (ctxt) {
xmlFreeParserCtxt(ctxt);
// This should not be necessary (done by free), but see
// http://xmlsoft.org/xmlmem.html#Compacting The
// malloc_trim() and mallopt() doc seems to be a bit
// misleading, there is probably a frag size under which
// free() does not try to malloc_trim() at all
// This should not be necessary (done by free), but see
// http://xmlsoft.org/xmlmem.html#Compacting The
// malloc_trim() and mallopt() doc seems to be a bit
// misleading, there is probably a frag size under which
// free() does not try to malloc_trim() at all
#ifdef HAVE_MALLOC_TRIM
malloc_trim(0);
malloc_trim(0);
#endif /* HAVE_MALLOC_TRIM */
}
}
@ -112,11 +112,11 @@ public:
Internal(MimeHandlerXslt *_p)
: p(_p) {}
~Internal() {
if (metaOrAllSS) {
xsltFreeStylesheet(metaOrAllSS);
for (auto& entry : metaOrAllSS) {
xsltFreeStylesheet(entry.second);
}
if (bodySS) {
xsltFreeStylesheet(bodySS);
for (auto& entry : bodySS) {
xsltFreeStylesheet(entry.second);
}
}
@ -128,10 +128,16 @@ public:
MimeHandlerXslt *p;
bool ok{false};
string metamember;
xsltStylesheet *metaOrAllSS{nullptr};
string bodymember;
xsltStylesheet *bodySS{nullptr};
// Pairs of zip archive member names and style sheet names for the
// metadata, and map of style sheets refd by their names.
// Exception: there can be a single entry which does meta and
// body, in which case bodymembers/bodySS are empty.
vector<pair<string,string>> metaMembers;
map <string, xsltStylesheet*> metaOrAllSS;
// Same for body data
vector<pair<string,string>> bodyMembers;
map<string, xsltStylesheet*> bodySS;
string result;
string filtersdir;
};
@ -152,20 +158,43 @@ MimeHandlerXslt::MimeHandlerXslt(RclConfig *cnf, const std::string& id,
xmlLoadExtDtdDefaultValue = 0;
// params can be "xslt stylesheetall" or
// "xslt metamember metastylesheet bodymember bodystylesheet"
// "xslt meta/body memberpath stylesheetnm [... ... ...] ...
if (params.size() == 2) {
m->metaOrAllSS = m->prepare_stylesheet(params[1]);
if (m->metaOrAllSS) {
auto ss = m->prepare_stylesheet(params[1]);
if (ss) {
m->ok = true;
m->metaOrAllSS[""] = ss;
}
} else if (params.size() == 5) {
m->metamember = params[1];
m->metaOrAllSS = m->prepare_stylesheet(params[2]);
m->bodymember = params[3];
m->bodySS = m->prepare_stylesheet(params[4]);
if (m->metaOrAllSS && m->bodySS) {
m->ok = true;
} else if (params.size() > 3 && params.size() % 3 == 1) {
auto it = params.begin();
it++;
while (it != params.end()) {
// meta/body membername ssname
const string& tp = *it++;
const string& znm = *it++;
const string& ssnm = *it++;
vector<pair<string,string>> *mbrv;
map<string,xsltStylesheet*> *ssmp;
if (tp == "meta") {
mbrv = &m->metaMembers;
ssmp = &m->metaOrAllSS;
} else if (tp == "body") {
mbrv = &m->bodyMembers;
ssmp = &m->bodySS;
} else {
LOGERR("MimeHandlerXslt: bad member type " << tp << endl);
return;
}
if (ssmp->find(ssnm) == ssmp->end()) {
auto ss = m->prepare_stylesheet(ssnm);
if (nullptr == ss) {
return;
}
ssmp->insert({ssnm, ss});
}
mbrv->push_back({znm, ssnm});
}
m->ok = true;
} else {
LOGERR("MimeHandlerXslt: constructor with wrong param vector: " <<
stringsToString(params) << endl);
@ -209,7 +238,7 @@ bool MimeHandlerXslt::Internal::apply_stylesheet(
res = string_scan(data.c_str(), data.size(), &XMLdoc, &reason, md5p);
} else {
res = string_scan(data.c_str(), data.size(), member, &XMLdoc,
&reason);
&reason);
}
}
if (!res) {
@ -231,7 +260,7 @@ bool MimeHandlerXslt::Internal::apply_stylesheet(
}
xmlChar *outstr;
int outlen;
xsltSaveResultToString(&outstr, &outlen, transformed, metaOrAllSS);
xsltSaveResultToString(&outstr, &outlen, transformed, ssp);
result = string((const char*)outstr, outlen);
xmlFree(outstr);
xmlFreeDoc(transformed);
@ -242,14 +271,15 @@ bool MimeHandlerXslt::Internal::apply_stylesheet(
bool MimeHandlerXslt::Internal::process_doc_or_string(
bool forpreview, const string& fn, const string& data)
{
if (nullptr == metaOrAllSS && nullptr == bodySS) {
LOGERR("MimeHandlerXslt::set_document_file_impl: both ss empty??\n");
return false;
}
p->m_metaData[cstr_dj_keycharset] = cstr_utf8;
if (nullptr == bodySS) {
if (bodySS.empty()) {
auto ssp = metaOrAllSS.find("");
if (ssp == metaOrAllSS.end()) {
LOGERR("MimeHandlerXslt::process: no style sheet !\n");
return false;
}
string md5;
if (apply_stylesheet(fn, string(), data, metaOrAllSS, result,
if (apply_stylesheet(fn, string(), data, ssp->second, result,
forpreview ? nullptr : &md5)) {
if (!forpreview) {
p->m_metaData[cstr_dj_keymd5] = md5;
@ -260,16 +290,34 @@ bool MimeHandlerXslt::Internal::process_doc_or_string(
} else {
result = "<html>\n<head>\n<meta http-equiv=\"Content-Type\""
"content=\"text/html; charset=UTF-8\">";
string part;
if (!apply_stylesheet(fn,metamember, data, metaOrAllSS, part, nullptr)) {
return false;
for (auto& member : metaMembers) {
auto it = metaOrAllSS.find(member.second);
if (it == metaOrAllSS.end()) {
LOGERR("MimeHandlerXslt::process: no style sheet found for " <<
member.first << ":" << member.second << "!\n");
return false;
}
string part;
if (!apply_stylesheet(fn, member.first, data, it->second, part, nullptr)) {
return false;
}
result += part;
}
result += part;
result += "</head>\n<body>\n";
if (!apply_stylesheet(fn, bodymember, data, bodySS, part, nullptr)) {
return false;
for (auto& member : bodyMembers) {
auto it = bodySS.find(member.second);
if (it == bodySS.end()) {
LOGERR("MimeHandlerXslt::process: no style sheet found for " <<
member.first << ":" << member.second << "!\n");
return false;
}
string part;
if (!apply_stylesheet(fn, member.first, data, it->second, part, nullptr)) {
return false;
}
result += part;
}
result += part;
result += "</body></html>";
}
return true;
@ -307,7 +355,7 @@ bool MimeHandlerXslt::next_document()
return false;
}
if (m_havedoc == false)
return false;
return false;
m_havedoc = false;
m_metaData[cstr_dj_keymt] = cstr_texthtml;
m_metaData[cstr_dj_keycontent].swap(m->result);

View File

@ -53,9 +53,6 @@ application/x-zstd = uncompress rcluncomp "unzstd --rm -q" %f %t
# A different format (ie text/plain), and a character set can be defined for
# each filter, see the exemples below (ie: msword)
[index]
application/epub+zip = execm rclepub
# Returned by xdg-mime for .js. Future-proofing
application/javascript = internal text/plain
# MSWORD: the rcldoc script handles a number of marginal case that raw
# antiword won't:
@ -67,52 +64,99 @@ application/msword = execm rcldoc.py
#application/msword = exec antiword -t -i 1 -m UTF-8;mimetype=text/plain
# You can also use wvware directly but it's much slower.
# application/msword = exec wvWare --charset=utf-8 --nographics
application/x-hwp = execm rclhwp.py
application/vnd.ms-excel = execm rclxls.py
application/vnd.ms-outlook = execm rclpst.py
application/vnd.ms-powerpoint = execm rclppt.py
# Also Handle the mime type returned by "file -i" for a suffix-less word
# file. This could probably just as well be an excel file, but we have to
# chose one.
application/vnd.ms-office = execm rcldoc.py
application/ogg = execm rclaudio
application/pdf = execm rclpdf.py
application/postscript = exec rclps
application/sql = internal text/plain
application/vnd.ms-excel = execm rclxls.py
application/vnd.ms-outlook = execm rclpst.py
application/vnd.ms-powerpoint = execm rclppt.py
application/vnd.oasis.opendocument.text = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.text-template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.presentation = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.spreadsheet = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.graphics = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.presentation-flat-xml = internal xsltproc opendoc-flat.xsl
application/vnd.oasis.opendocument.text-flat-xml = internal xsltproc opendoc-flat.xsl
application/vnd.oasis.opendocument.spreadsheet-flat-xml = internal xsltproc opendoc-flat.xsl
application/vnd.oasis.opendocument.text = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.text-template = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.presentation = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.spreadsheet = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.graphics = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.presentation-flat-xml = \
internal xsltproc opendoc-flat.xsl
application/vnd.oasis.opendocument.text-flat-xml = \
internal xsltproc opendoc-flat.xsl
application/vnd.oasis.opendocument.spreadsheet-flat-xml = \
internal xsltproc opendoc-flat.xsl
application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl
internal xsltproc meta docProps/core.xml openxml-meta.xsl \
body word/document.xml openxml-word-body.xsl \
body word/footnotes.xml openxml-word-body.xsl \
body word/endnotes.xml openxml-word-body.xsl
application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl
internal xsltproc meta docProps/core.xml openxml-meta.xsl \
body word/document.xml openxml-word-body.xsl \
body word/footnotes.xml openxml-word-body.xsl \
body word/endnotes.xml openxml-word-body.xsl
application/vnd.openxmlformats-officedocument.presentationml.template = \
execm rclopxml.py
application/vnd.openxmlformats-officedocument.presentationml.presentation = \
execm rclopxml.py
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl
internal xsltproc meta docProps/core.xml openxml-meta.xsl \
body xl/sharedStrings.xml openxml-xls-body.xsl
application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl
application/vnd.sun.xml.calc = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.calc.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.draw = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.draw.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.impress = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.impress.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.math = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.global = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
internal xsltproc meta docProps/core.xml openxml-meta.xsl \
body xl/sharedStrings.xml openxml-xls-body.xsl
application/vnd.sun.xml.calc = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.calc.template = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.draw = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.draw.template = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.impress = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.impress.template = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.math = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.writer = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.global = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.template = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
#application/x-mobipocket-ebook = execm rclmobi
#application/x-tar = execm rcltar
application/epub+zip = execm rclepub
application/javascript = internal text/plain
application/ogg = execm rclaudio
application/pdf = execm rclpdf.py
application/postscript = exec rclps
application/sql = internal text/plain
application/vnd.wordperfect = exec wpd2html;mimetype=text/html
application/x-7z-compressed = execm rcl7z
application/x-abiword = internal xsltproc abiword.xsl
application/x-awk = internal text/plain
application/x-chm = execm rclchm
@ -122,74 +166,70 @@ application/x-flac = execm rclaudio
application/x-gnote = execm rclxml.py
application/x-gnuinfo = execm rclinfo
application/x-gnumeric = internal xsltproc gnumeric.xsl
application/x-hwp = execm rclhwp.py
application/x-kword = exec rclkwd
application/x-lyx = exec rcllyx
application/x-mimehtml = internal message/rfc822
#application/x-mobipocket-ebook = execm rclmobi
application/x-okular-notes = internal xsltproc okular-note.xsl
application/x-perl = internal text/plain
# Returned by xdg-mime for .php. Future-proofing
application/x-php = internal text/plain
application/x-rar = execm rclrar;charset=default
application/x-ruby = internal text/plain
application/x-scribus = exec rclscribus
application/x-shellscript = internal text/plain
#application/x-tar = execm rcltar
application/x-tex = exec rcltex
application/x-webarchive = execm rclwar
application/x-zerosize = internal
application/zip = execm rclzip;charset=default
application/x-7z-compressed = execm rcl7z
audio/ape = execm rclaudio
audio/mpeg = execm rclaudio
audio/mp4 = execm rclaudio
video/mp4 = execm rclaudio
video/x-msvideo = execm rclimg
audio/aac = execm rclaudio
audio/ape = execm rclaudio
audio/mp4 = execm rclaudio
audio/mpeg = execm rclaudio
audio/x-karaoke = execm rclkar
audio/x-wavpack = execm rclaudio
audio/x-musepack = execm rclaudio
audio/x-wavpack = execm rclaudio
image/gif = execm rclimg
image/jp2 = execm rclimg
image/jpeg = execm rclimg
image/png = execm rclimg
image/svg+xml = internal xsltproc svg.xsl
image/tiff = execm rclimg
image/vnd.djvu = execm rcldjvu.py
image/svg+xml = internal xsltproc svg.xsl
image/x-xcf = execm rclimg
image/x-nikon-nef = execm rclimg
image/x-xcf = execm rclimg
inode/symlink = internal
application/x-zerosize = internal
inode/x-empty = internal application/x-zerosize
message/rfc822 = internal
text/calendar = execm rclics;mimetype=text/plain
text/css = internal text/plain
text/html = internal
text/plain = internal
text/rtf = exec unrtf --nopict --html;mimetype=text/html
text/x-c = internal
text/x-c++ = internal
text/x-c+ = internal
text/x-csharp = internal text/plain
text/css = internal text/plain
application/javascript = internal text/plain
text/x-bibtex = exec rclbibtex.sh ; mimetype = text/plain
text/x-c = internal
text/x-c+ = internal
text/x-c++ = internal
text/x-chm-html = internal text/html
text/x-csharp = internal text/plain
text/x-csv = internal text/plain
text/x-fictionbook = internal xsltproc fb2.xsl
text/x-gaim-log = exec rclgaim
text/x-html-aptosid-man = exec rclaptosidman
text/x-lua = internal
text/x-chm-html = internal text/html
text/x-ini = internal text/plain
text/x-java = internal text/plain
text/x-lua = internal
text/x-mail = internal
text/x-man = exec rclman
text/x-perl = internal text/plain
text/x-purple-log = exec rclpurple
text/x-purple-html-log = internal text/html
text/x-purple-log = exec rclpurple
text/x-python = exec rclpython
text/x-ruby = internal
text/x-shellscript = internal text/plain
text/x-srt = internal text/plain
text/x-tex = exec rcltex
video/mp4 = execm rclaudio
video/x-msvideo = execm rclimg
# Generic XML is best indexed as text, else it generates too many errors

View File

@ -46,109 +46,149 @@ application/x-lzma = uncompress python rcluncomp.py 7z %f %t
# A different format (ie text/plain), and a character set can be defined for
# each filter, see the exemples below (ie: msword)
[index]
application/msword = execm python rcldoc.py
application/vnd.ms-excel = execm python rclxls.py
application/vnd.ms-outlook = execm python rclpst.py
application/vnd.ms-powerpoint = execm python rclppt.py
application/pdf = execm python rclpdf.py
application/x-hwp = execm python rclhwp.py
application/vnd.oasis.opendocument.text = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.text-template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.presentation = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.spreadsheet = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.graphics = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.calc = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.calc.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.draw = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.draw.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.impress = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.impress.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.math = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.global = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.wordperfect = exec wpd/wpd2html;mimetype=text/html
application/x-abiword = internal xsltproc abiword.xsl
text/x-fictionbook = internal xsltproc fb2.xsl
application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl
application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl
application/vnd.openxmlformats-officedocument.presentationml.template = \
execm python rclopxml.py
application/vnd.openxmlformats-officedocument.presentationml.presentation = \
execm python rclopxml.py
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl
application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl
application/epub+zip = execm python rclepub
# Returned by xdg-mime for .js. Future-proofing
application/javascript = internal text/plain
# Also Handle the mime type returned by "file -i" for a suffix-less word
# file. This could probably just as well be an excel file, but we have to
# chose one.
application/vnd.ms-office = execm python rcldoc.py
application/vnd.ms-outlook = execm python rclpst.py
application/vnd.oasis.opendocument.text = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.text-template = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.presentation = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.spreadsheet = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.graphics = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.presentation-flat-xml = \
internal xsltproc opendoc-flat.xsl
application/vnd.oasis.opendocument.text-flat-xml = \
internal xsltproc opendoc-flat.xsl
application/vnd.oasis.opendocument.spreadsheet-flat-xml = \
internal xsltproc opendoc-flat.xsl
application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
internal xsltproc meta docProps/core.xml openxml-meta.xsl \
body word/document.xml openxml-word-body.xsl \
body word/footnotes.xml openxml-word-body.xsl \
body word/endnotes.xml openxml-word-body.xsl
application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
internal xsltproc meta docProps/core.xml openxml-meta.xsl \
body word/document.xml openxml-word-body.xsl \
body word/footnotes.xml openxml-word-body.xsl \
body word/endnotes.xml openxml-word-body.xsl
application/vnd.openxmlformats-officedocument.presentationml.template = \
execm python rclopxml.py
application/vnd.openxmlformats-officedocument.presentationml.presentation = \
execm python rclopxml.py
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
internal xsltproc meta docProps/core.xml openxml-meta.xsl \
body xl/sharedStrings.xml openxml-xls-body.xsl
application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
internal xsltproc meta docProps/core.xml openxml-meta.xsl \
body xl/sharedStrings.xml openxml-xls-body.xsl
application/vnd.sun.xml.calc = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.calc.template = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.draw = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.draw.template = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.impress = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.impress.template = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.math = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.writer = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.global = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.template = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
#application/postscript = exec rclps
#application/x-gnuinfo = execm python rclinfo
#application/x-tar = execm python rcltar
application/epub+zip = execm python rclepub
application/javascript = internal text/plain
application/ogg = execm python rclaudio
application/pdf = execm python rclpdf.py
application/sql = internal text/plain
application/vnd.wordperfect = exec wpd/wpd2html;mimetype=text/html
application/x-7z-compressed = execm python rcl7z
application/x-abiword = internal xsltproc abiword.xsl
application/x-awk = internal text/plain
application/x-chm = execm python rclchm
application/x-dia-diagram = execm python rcldia;mimetype=text/plain
application/x-flac = execm python rclaudio
application/x-gnote = execm python rclxml.py
#application/x-gnuinfo = execm python rclinfo
application/x-hwp = execm python rclhwp.py
application/x-mimehtml = internal message/rfc822
application/x-perl = internal text/plain
application/x-php = internal text/plain
application/x-rar = execm python rclrar;charset=default
application/x-shellscript = internal text/plain
#application/x-tar = execm python rcltar
application/x-webarchive = execm python rclwar
application/x-7z-compressed = execm python rcl7z
application/x-zerosize = internal
application/zip = execm python rclzip;charset=default
audio/mpeg = execm python rclaudio
audio/mp4 = execm python rclaudio
audio/aac = execm python rclaudio
audio/mp4 = execm python rclaudio
audio/mpeg = execm python rclaudio
audio/x-karaoke = execm python rclkar
image/gif = execm rclimg.exe
image/jp2 = execm rclimg.exe
image/jpeg = execm rclimg.exe
image/png = execm rclimg.exe
image/tiff = execm rclimg.exe
image/svg+xml = internal xsltproc svg.xsl
#image/x-xcf = execm rclimg.exe
image/tiff = execm rclimg.exe
inode/symlink = internal
application/x-zerosize = internal
inode/x-empty = internal application/x-zerosize
message/rfc822 = internal
text/calendar = execm python rclics;mimetype=text/plain
text/css = internal text/plain
text/html = internal
text/plain = internal
text/rtf = exec unrtf --nopict --html;mimetype=text/html
#text/rtf = execm python rclrtf.py
text/rtf = exec unrtf --nopict --html;mimetype=text/html
text/x-c = internal
text/x-c++ = internal
text/x-c+ = internal
text/x-csharp = internal text/plain
text/css = internal text/plain
application/javascript = internal text/plain
text/x-csv = internal text/plain
text/x-c++ = internal
text/x-chm-html = internal text/html
text/x-csharp = internal text/plain
text/x-csv = internal text/plain
text/x-fictionbook = internal xsltproc fb2.xsl
text/x-ini = internal text/plain
text/x-mail = internal
text/x-perl = internal text/plain
text/x-python = exec python rclpython
text/x-shellscript = internal text/plain
text/x-srt = internal text/plain
image/x-xcf = execm rclimg.exe
# Generic XML is best indexed as text, else it generates too many errors
# All parameter and tag names, attribute values etc, are indexed as