From 19149020e377d4899093992da49eb5c1f7941a8a Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Wed, 22 Apr 2020 14:04:20 +0200 Subject: [PATCH] internal xslt: support multiple archive members containing metadata or body text. Apply to indexing openxml foot/endnotes --- packaging/debian/buildppa.sh | 2 +- packaging/debian/debian/changelog | 5 +- src/VERSION | 2 +- src/internfile/mh_xslt.cpp | 128 ++++++++++++++++-------- src/sampleconf/mimeconf | 154 ++++++++++++++++++----------- src/windows/mimeconf | 156 +++++++++++++++++++----------- 6 files changed, 288 insertions(+), 159 deletions(-) diff --git a/packaging/debian/buildppa.sh b/packaging/debian/buildppa.sh index a2b19515..c9fb5d9b 100644 --- a/packaging/debian/buildppa.sh +++ b/packaging/debian/buildppa.sh @@ -6,7 +6,7 @@ PPA_KEYID=D38B9201 -RCLVERS=1.27.0pre1 +RCLVERS=1.27.0pre3 SCOPEVERS=1.20.2.4 GSSPVERS=1.0.0 PPAVERS=2 diff --git a/packaging/debian/debian/changelog b/packaging/debian/debian/changelog index 3ed9efc3..99c2f5ed 100644 --- a/packaging/debian/debian/changelog +++ b/packaging/debian/debian/changelog @@ -1,9 +1,10 @@ -recoll (1.27.0pre2-1~ppaPPAVERS~SERIES1) SERIES; urgency=low +recoll (1.27.0pre3-1~ppaPPAVERS~SERIES1) SERIES; urgency=low * Support for language-sensitive analysis of Korean text + * Index docx endnotes and footnotes * Other small improvements. - -- Jean-Francois Dockes Thu, 21 Apr 2020 15:32:00 +0100 + -- Jean-Francois Dockes Tue, 21 Apr 2020 15:32:00 +0100 recoll (1.26.6-1~ppaPPAVERS~SERIES1) SERIES; urgency=low diff --git a/src/VERSION b/src/VERSION index ff2c550d..3533845d 100644 --- a/src/VERSION +++ b/src/VERSION @@ -1 +1 @@ -1.27.0pre2 +1.27.0pre3 diff --git a/src/internfile/mh_xslt.cpp b/src/internfile/mh_xslt.cpp index 5afdf19d..f350e77f 100644 --- a/src/internfile/mh_xslt.cpp +++ b/src/internfile/mh_xslt.cpp @@ -47,13 +47,13 @@ public: virtual ~FileScanXML() { if (ctxt) { xmlFreeParserCtxt(ctxt); - // This should not be necessary (done by free), but see - // http://xmlsoft.org/xmlmem.html#Compacting The - // malloc_trim() and mallopt() doc seems to be a bit - // misleading, there is probably a frag size under which - // free() does not try to malloc_trim() at all + // This should not be necessary (done by free), but see + // http://xmlsoft.org/xmlmem.html#Compacting The + // malloc_trim() and mallopt() doc seems to be a bit + // misleading, there is probably a frag size under which + // free() does not try to malloc_trim() at all #ifdef HAVE_MALLOC_TRIM - malloc_trim(0); + malloc_trim(0); #endif /* HAVE_MALLOC_TRIM */ } } @@ -112,11 +112,11 @@ public: Internal(MimeHandlerXslt *_p) : p(_p) {} ~Internal() { - if (metaOrAllSS) { - xsltFreeStylesheet(metaOrAllSS); + for (auto& entry : metaOrAllSS) { + xsltFreeStylesheet(entry.second); } - if (bodySS) { - xsltFreeStylesheet(bodySS); + for (auto& entry : bodySS) { + xsltFreeStylesheet(entry.second); } } @@ -128,10 +128,16 @@ public: MimeHandlerXslt *p; bool ok{false}; - string metamember; - xsltStylesheet *metaOrAllSS{nullptr}; - string bodymember; - xsltStylesheet *bodySS{nullptr}; + + // Pairs of zip archive member names and style sheet names for the + // metadata, and map of style sheets refd by their names. + // Exception: there can be a single entry which does meta and + // body, in which case bodymembers/bodySS are empty. + vector> metaMembers; + map metaOrAllSS; + // Same for body data + vector> bodyMembers; + map bodySS; string result; string filtersdir; }; @@ -152,20 +158,43 @@ MimeHandlerXslt::MimeHandlerXslt(RclConfig *cnf, const std::string& id, xmlLoadExtDtdDefaultValue = 0; // params can be "xslt stylesheetall" or - // "xslt metamember metastylesheet bodymember bodystylesheet" + // "xslt meta/body memberpath stylesheetnm [... ... ...] ... if (params.size() == 2) { - m->metaOrAllSS = m->prepare_stylesheet(params[1]); - if (m->metaOrAllSS) { + auto ss = m->prepare_stylesheet(params[1]); + if (ss) { m->ok = true; + m->metaOrAllSS[""] = ss; } - } else if (params.size() == 5) { - m->metamember = params[1]; - m->metaOrAllSS = m->prepare_stylesheet(params[2]); - m->bodymember = params[3]; - m->bodySS = m->prepare_stylesheet(params[4]); - if (m->metaOrAllSS && m->bodySS) { - m->ok = true; + } else if (params.size() > 3 && params.size() % 3 == 1) { + auto it = params.begin(); + it++; + while (it != params.end()) { + // meta/body membername ssname + const string& tp = *it++; + const string& znm = *it++; + const string& ssnm = *it++; + vector> *mbrv; + map *ssmp; + if (tp == "meta") { + mbrv = &m->metaMembers; + ssmp = &m->metaOrAllSS; + } else if (tp == "body") { + mbrv = &m->bodyMembers; + ssmp = &m->bodySS; + } else { + LOGERR("MimeHandlerXslt: bad member type " << tp << endl); + return; + } + if (ssmp->find(ssnm) == ssmp->end()) { + auto ss = m->prepare_stylesheet(ssnm); + if (nullptr == ss) { + return; + } + ssmp->insert({ssnm, ss}); + } + mbrv->push_back({znm, ssnm}); } + m->ok = true; } else { LOGERR("MimeHandlerXslt: constructor with wrong param vector: " << stringsToString(params) << endl); @@ -209,7 +238,7 @@ bool MimeHandlerXslt::Internal::apply_stylesheet( res = string_scan(data.c_str(), data.size(), &XMLdoc, &reason, md5p); } else { res = string_scan(data.c_str(), data.size(), member, &XMLdoc, - &reason); + &reason); } } if (!res) { @@ -231,7 +260,7 @@ bool MimeHandlerXslt::Internal::apply_stylesheet( } xmlChar *outstr; int outlen; - xsltSaveResultToString(&outstr, &outlen, transformed, metaOrAllSS); + xsltSaveResultToString(&outstr, &outlen, transformed, ssp); result = string((const char*)outstr, outlen); xmlFree(outstr); xmlFreeDoc(transformed); @@ -242,14 +271,15 @@ bool MimeHandlerXslt::Internal::apply_stylesheet( bool MimeHandlerXslt::Internal::process_doc_or_string( bool forpreview, const string& fn, const string& data) { - if (nullptr == metaOrAllSS && nullptr == bodySS) { - LOGERR("MimeHandlerXslt::set_document_file_impl: both ss empty??\n"); - return false; - } p->m_metaData[cstr_dj_keycharset] = cstr_utf8; - if (nullptr == bodySS) { + if (bodySS.empty()) { + auto ssp = metaOrAllSS.find(""); + if (ssp == metaOrAllSS.end()) { + LOGERR("MimeHandlerXslt::process: no style sheet !\n"); + return false; + } string md5; - if (apply_stylesheet(fn, string(), data, metaOrAllSS, result, + if (apply_stylesheet(fn, string(), data, ssp->second, result, forpreview ? nullptr : &md5)) { if (!forpreview) { p->m_metaData[cstr_dj_keymd5] = md5; @@ -260,16 +290,34 @@ bool MimeHandlerXslt::Internal::process_doc_or_string( } else { result = "\n\n"; - string part; - if (!apply_stylesheet(fn,metamember, data, metaOrAllSS, part, nullptr)) { - return false; + for (auto& member : metaMembers) { + auto it = metaOrAllSS.find(member.second); + if (it == metaOrAllSS.end()) { + LOGERR("MimeHandlerXslt::process: no style sheet found for " << + member.first << ":" << member.second << "!\n"); + return false; + } + string part; + if (!apply_stylesheet(fn, member.first, data, it->second, part, nullptr)) { + return false; + } + result += part; } - result += part; result += "\n\n"; - if (!apply_stylesheet(fn, bodymember, data, bodySS, part, nullptr)) { - return false; + + for (auto& member : bodyMembers) { + auto it = bodySS.find(member.second); + if (it == bodySS.end()) { + LOGERR("MimeHandlerXslt::process: no style sheet found for " << + member.first << ":" << member.second << "!\n"); + return false; + } + string part; + if (!apply_stylesheet(fn, member.first, data, it->second, part, nullptr)) { + return false; + } + result += part; } - result += part; result += ""; } return true; @@ -307,7 +355,7 @@ bool MimeHandlerXslt::next_document() return false; } if (m_havedoc == false) - return false; + return false; m_havedoc = false; m_metaData[cstr_dj_keymt] = cstr_texthtml; m_metaData[cstr_dj_keycontent].swap(m->result); diff --git a/src/sampleconf/mimeconf b/src/sampleconf/mimeconf index b72f96b7..72899b82 100644 --- a/src/sampleconf/mimeconf +++ b/src/sampleconf/mimeconf @@ -53,9 +53,6 @@ application/x-zstd = uncompress rcluncomp "unzstd --rm -q" %f %t # A different format (ie text/plain), and a character set can be defined for # each filter, see the exemples below (ie: msword) [index] -application/epub+zip = execm rclepub -# Returned by xdg-mime for .js. Future-proofing -application/javascript = internal text/plain # MSWORD: the rcldoc script handles a number of marginal case that raw # antiword won't: @@ -67,52 +64,99 @@ application/msword = execm rcldoc.py #application/msword = exec antiword -t -i 1 -m UTF-8;mimetype=text/plain # You can also use wvware directly but it's much slower. # application/msword = exec wvWare --charset=utf-8 --nographics - -application/x-hwp = execm rclhwp.py - +application/vnd.ms-excel = execm rclxls.py +application/vnd.ms-outlook = execm rclpst.py +application/vnd.ms-powerpoint = execm rclppt.py # Also Handle the mime type returned by "file -i" for a suffix-less word # file. This could probably just as well be an excel file, but we have to # chose one. application/vnd.ms-office = execm rcldoc.py -application/ogg = execm rclaudio -application/pdf = execm rclpdf.py -application/postscript = exec rclps -application/sql = internal text/plain -application/vnd.ms-excel = execm rclxls.py -application/vnd.ms-outlook = execm rclpst.py -application/vnd.ms-powerpoint = execm rclppt.py -application/vnd.oasis.opendocument.text = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.oasis.opendocument.text-template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.oasis.opendocument.presentation = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.oasis.opendocument.spreadsheet = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.oasis.opendocument.graphics = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.oasis.opendocument.presentation-flat-xml = internal xsltproc opendoc-flat.xsl -application/vnd.oasis.opendocument.text-flat-xml = internal xsltproc opendoc-flat.xsl -application/vnd.oasis.opendocument.spreadsheet-flat-xml = internal xsltproc opendoc-flat.xsl +application/vnd.oasis.opendocument.text = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.oasis.opendocument.text-template = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.oasis.opendocument.presentation = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.oasis.opendocument.spreadsheet = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.oasis.opendocument.graphics = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.oasis.opendocument.presentation-flat-xml = \ + internal xsltproc opendoc-flat.xsl +application/vnd.oasis.opendocument.text-flat-xml = \ + internal xsltproc opendoc-flat.xsl +application/vnd.oasis.opendocument.spreadsheet-flat-xml = \ + internal xsltproc opendoc-flat.xsl + application/vnd.openxmlformats-officedocument.wordprocessingml.document = \ - internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl + internal xsltproc meta docProps/core.xml openxml-meta.xsl \ + body word/document.xml openxml-word-body.xsl \ + body word/footnotes.xml openxml-word-body.xsl \ + body word/endnotes.xml openxml-word-body.xsl application/vnd.openxmlformats-officedocument.wordprocessingml.template = \ - internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl + internal xsltproc meta docProps/core.xml openxml-meta.xsl \ + body word/document.xml openxml-word-body.xsl \ + body word/footnotes.xml openxml-word-body.xsl \ + body word/endnotes.xml openxml-word-body.xsl application/vnd.openxmlformats-officedocument.presentationml.template = \ execm rclopxml.py application/vnd.openxmlformats-officedocument.presentationml.presentation = \ execm rclopxml.py application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \ - internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl + internal xsltproc meta docProps/core.xml openxml-meta.xsl \ + body xl/sharedStrings.xml openxml-xls-body.xsl application/vnd.openxmlformats-officedocument.spreadsheetml.template =\ - internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl -application/vnd.sun.xml.calc = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.sun.xml.calc.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.sun.xml.draw = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.sun.xml.draw.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.sun.xml.impress = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.sun.xml.impress.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.sun.xml.math = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.sun.xml.writer = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.sun.xml.writer.global = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.sun.xml.writer.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl + internal xsltproc meta docProps/core.xml openxml-meta.xsl \ + body xl/sharedStrings.xml openxml-xls-body.xsl + +application/vnd.sun.xml.calc = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.sun.xml.calc.template = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.sun.xml.draw = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.sun.xml.draw.template = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.sun.xml.impress = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.sun.xml.impress.template = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.sun.xml.math = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.sun.xml.writer = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.sun.xml.writer.global = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.sun.xml.writer.template = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl + +#application/x-mobipocket-ebook = execm rclmobi +#application/x-tar = execm rcltar + +application/epub+zip = execm rclepub +application/javascript = internal text/plain +application/ogg = execm rclaudio +application/pdf = execm rclpdf.py +application/postscript = exec rclps +application/sql = internal text/plain application/vnd.wordperfect = exec wpd2html;mimetype=text/html +application/x-7z-compressed = execm rcl7z application/x-abiword = internal xsltproc abiword.xsl application/x-awk = internal text/plain application/x-chm = execm rclchm @@ -122,74 +166,70 @@ application/x-flac = execm rclaudio application/x-gnote = execm rclxml.py application/x-gnuinfo = execm rclinfo application/x-gnumeric = internal xsltproc gnumeric.xsl +application/x-hwp = execm rclhwp.py application/x-kword = exec rclkwd application/x-lyx = exec rcllyx application/x-mimehtml = internal message/rfc822 -#application/x-mobipocket-ebook = execm rclmobi application/x-okular-notes = internal xsltproc okular-note.xsl application/x-perl = internal text/plain -# Returned by xdg-mime for .php. Future-proofing application/x-php = internal text/plain application/x-rar = execm rclrar;charset=default application/x-ruby = internal text/plain application/x-scribus = exec rclscribus application/x-shellscript = internal text/plain -#application/x-tar = execm rcltar application/x-tex = exec rcltex application/x-webarchive = execm rclwar +application/x-zerosize = internal application/zip = execm rclzip;charset=default -application/x-7z-compressed = execm rcl7z -audio/ape = execm rclaudio -audio/mpeg = execm rclaudio -audio/mp4 = execm rclaudio -video/mp4 = execm rclaudio -video/x-msvideo = execm rclimg audio/aac = execm rclaudio +audio/ape = execm rclaudio +audio/mp4 = execm rclaudio +audio/mpeg = execm rclaudio audio/x-karaoke = execm rclkar -audio/x-wavpack = execm rclaudio audio/x-musepack = execm rclaudio +audio/x-wavpack = execm rclaudio image/gif = execm rclimg image/jp2 = execm rclimg image/jpeg = execm rclimg image/png = execm rclimg +image/svg+xml = internal xsltproc svg.xsl image/tiff = execm rclimg image/vnd.djvu = execm rcldjvu.py -image/svg+xml = internal xsltproc svg.xsl -image/x-xcf = execm rclimg image/x-nikon-nef = execm rclimg +image/x-xcf = execm rclimg inode/symlink = internal -application/x-zerosize = internal inode/x-empty = internal application/x-zerosize message/rfc822 = internal text/calendar = execm rclics;mimetype=text/plain +text/css = internal text/plain text/html = internal text/plain = internal text/rtf = exec unrtf --nopict --html;mimetype=text/html -text/x-c = internal -text/x-c++ = internal -text/x-c+ = internal -text/x-csharp = internal text/plain -text/css = internal text/plain -application/javascript = internal text/plain text/x-bibtex = exec rclbibtex.sh ; mimetype = text/plain +text/x-c = internal +text/x-c+ = internal +text/x-c++ = internal +text/x-chm-html = internal text/html +text/x-csharp = internal text/plain text/x-csv = internal text/plain text/x-fictionbook = internal xsltproc fb2.xsl text/x-gaim-log = exec rclgaim text/x-html-aptosid-man = exec rclaptosidman -text/x-lua = internal -text/x-chm-html = internal text/html text/x-ini = internal text/plain text/x-java = internal text/plain +text/x-lua = internal text/x-mail = internal text/x-man = exec rclman text/x-perl = internal text/plain -text/x-purple-log = exec rclpurple text/x-purple-html-log = internal text/html +text/x-purple-log = exec rclpurple text/x-python = exec rclpython text/x-ruby = internal text/x-shellscript = internal text/plain text/x-srt = internal text/plain text/x-tex = exec rcltex +video/mp4 = execm rclaudio +video/x-msvideo = execm rclimg # Generic XML is best indexed as text, else it generates too many errors diff --git a/src/windows/mimeconf b/src/windows/mimeconf index 2be6d7f6..49574b42 100644 --- a/src/windows/mimeconf +++ b/src/windows/mimeconf @@ -46,109 +46,149 @@ application/x-lzma = uncompress python rcluncomp.py 7z %f %t # A different format (ie text/plain), and a character set can be defined for # each filter, see the exemples below (ie: msword) [index] + application/msword = execm python rcldoc.py application/vnd.ms-excel = execm python rclxls.py +application/vnd.ms-outlook = execm python rclpst.py application/vnd.ms-powerpoint = execm python rclppt.py - -application/pdf = execm python rclpdf.py - -application/x-hwp = execm python rclhwp.py - -application/vnd.oasis.opendocument.text = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.oasis.opendocument.text-template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.oasis.opendocument.presentation = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.oasis.opendocument.spreadsheet = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.oasis.opendocument.graphics = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.sun.xml.calc = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.sun.xml.calc.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.sun.xml.draw = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.sun.xml.draw.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.sun.xml.impress = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.sun.xml.impress.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.sun.xml.math = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.sun.xml.writer = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.sun.xml.writer.global = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl -application/vnd.sun.xml.writer.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl - -application/vnd.wordperfect = exec wpd/wpd2html;mimetype=text/html -application/x-abiword = internal xsltproc abiword.xsl -text/x-fictionbook = internal xsltproc fb2.xsl - -application/vnd.openxmlformats-officedocument.wordprocessingml.document = \ - internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl -application/vnd.openxmlformats-officedocument.wordprocessingml.template = \ - internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl -application/vnd.openxmlformats-officedocument.presentationml.template = \ - execm python rclopxml.py -application/vnd.openxmlformats-officedocument.presentationml.presentation = \ - execm python rclopxml.py -application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \ - internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl -application/vnd.openxmlformats-officedocument.spreadsheetml.template =\ - internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl - -application/epub+zip = execm python rclepub -# Returned by xdg-mime for .js. Future-proofing -application/javascript = internal text/plain - # Also Handle the mime type returned by "file -i" for a suffix-less word # file. This could probably just as well be an excel file, but we have to # chose one. application/vnd.ms-office = execm python rcldoc.py -application/vnd.ms-outlook = execm python rclpst.py +application/vnd.oasis.opendocument.text = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.oasis.opendocument.text-template = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.oasis.opendocument.presentation = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.oasis.opendocument.spreadsheet = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.oasis.opendocument.graphics = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.oasis.opendocument.presentation-flat-xml = \ + internal xsltproc opendoc-flat.xsl +application/vnd.oasis.opendocument.text-flat-xml = \ + internal xsltproc opendoc-flat.xsl +application/vnd.oasis.opendocument.spreadsheet-flat-xml = \ + internal xsltproc opendoc-flat.xsl +application/vnd.openxmlformats-officedocument.wordprocessingml.document = \ + internal xsltproc meta docProps/core.xml openxml-meta.xsl \ + body word/document.xml openxml-word-body.xsl \ + body word/footnotes.xml openxml-word-body.xsl \ + body word/endnotes.xml openxml-word-body.xsl +application/vnd.openxmlformats-officedocument.wordprocessingml.template = \ + internal xsltproc meta docProps/core.xml openxml-meta.xsl \ + body word/document.xml openxml-word-body.xsl \ + body word/footnotes.xml openxml-word-body.xsl \ + body word/endnotes.xml openxml-word-body.xsl +application/vnd.openxmlformats-officedocument.presentationml.template = \ + execm python rclopxml.py +application/vnd.openxmlformats-officedocument.presentationml.presentation = \ + execm python rclopxml.py +application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \ + internal xsltproc meta docProps/core.xml openxml-meta.xsl \ + body xl/sharedStrings.xml openxml-xls-body.xsl +application/vnd.openxmlformats-officedocument.spreadsheetml.template =\ + internal xsltproc meta docProps/core.xml openxml-meta.xsl \ + body xl/sharedStrings.xml openxml-xls-body.xsl + +application/vnd.sun.xml.calc = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.sun.xml.calc.template = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.sun.xml.draw = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.sun.xml.draw.template = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.sun.xml.impress = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.sun.xml.impress.template = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.sun.xml.math = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.sun.xml.writer = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.sun.xml.writer.global = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl +application/vnd.sun.xml.writer.template = \ + internal xsltproc meta meta.xml opendoc-meta.xsl \ + body content.xml opendoc-body.xsl + +#application/postscript = exec rclps +#application/x-gnuinfo = execm python rclinfo +#application/x-tar = execm python rcltar + +application/epub+zip = execm python rclepub +application/javascript = internal text/plain application/ogg = execm python rclaudio - +application/pdf = execm python rclpdf.py +application/sql = internal text/plain +application/vnd.wordperfect = exec wpd/wpd2html;mimetype=text/html +application/x-7z-compressed = execm python rcl7z +application/x-abiword = internal xsltproc abiword.xsl application/x-awk = internal text/plain application/x-chm = execm python rclchm application/x-dia-diagram = execm python rcldia;mimetype=text/plain application/x-flac = execm python rclaudio application/x-gnote = execm python rclxml.py -#application/x-gnuinfo = execm python rclinfo +application/x-hwp = execm python rclhwp.py application/x-mimehtml = internal message/rfc822 application/x-perl = internal text/plain application/x-php = internal text/plain application/x-rar = execm python rclrar;charset=default application/x-shellscript = internal text/plain -#application/x-tar = execm python rcltar application/x-webarchive = execm python rclwar -application/x-7z-compressed = execm python rcl7z +application/x-zerosize = internal application/zip = execm python rclzip;charset=default -audio/mpeg = execm python rclaudio -audio/mp4 = execm python rclaudio audio/aac = execm python rclaudio +audio/mp4 = execm python rclaudio +audio/mpeg = execm python rclaudio audio/x-karaoke = execm python rclkar image/gif = execm rclimg.exe image/jp2 = execm rclimg.exe image/jpeg = execm rclimg.exe image/png = execm rclimg.exe -image/tiff = execm rclimg.exe image/svg+xml = internal xsltproc svg.xsl -#image/x-xcf = execm rclimg.exe +image/tiff = execm rclimg.exe inode/symlink = internal -application/x-zerosize = internal inode/x-empty = internal application/x-zerosize message/rfc822 = internal text/calendar = execm python rclics;mimetype=text/plain +text/css = internal text/plain text/html = internal text/plain = internal -text/rtf = exec unrtf --nopict --html;mimetype=text/html #text/rtf = execm python rclrtf.py +text/rtf = exec unrtf --nopict --html;mimetype=text/html text/x-c = internal -text/x-c++ = internal text/x-c+ = internal -text/x-csharp = internal text/plain -text/css = internal text/plain -application/javascript = internal text/plain -text/x-csv = internal text/plain +text/x-c++ = internal text/x-chm-html = internal text/html +text/x-csharp = internal text/plain +text/x-csv = internal text/plain +text/x-fictionbook = internal xsltproc fb2.xsl text/x-ini = internal text/plain text/x-mail = internal text/x-perl = internal text/plain text/x-python = exec python rclpython text/x-shellscript = internal text/plain text/x-srt = internal text/plain +image/x-xcf = execm rclimg.exe # Generic XML is best indexed as text, else it generates too many errors # All parameter and tag names, attribute values etc, are indexed as