internal xslt: support multiple archive members containing metadata or body text. Apply to indexing openxml foot/endnotes

This commit is contained in:
Jean-Francois Dockes 2020-04-22 14:04:20 +02:00
parent ad03540394
commit 19149020e3
6 changed files with 288 additions and 159 deletions

View File

@ -6,7 +6,7 @@
PPA_KEYID=D38B9201 PPA_KEYID=D38B9201
RCLVERS=1.27.0pre1 RCLVERS=1.27.0pre3
SCOPEVERS=1.20.2.4 SCOPEVERS=1.20.2.4
GSSPVERS=1.0.0 GSSPVERS=1.0.0
PPAVERS=2 PPAVERS=2

View File

@ -1,9 +1,10 @@
recoll (1.27.0pre2-1~ppaPPAVERS~SERIES1) SERIES; urgency=low recoll (1.27.0pre3-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
* Support for language-sensitive analysis of Korean text * Support for language-sensitive analysis of Korean text
* Index docx endnotes and footnotes
* Other small improvements. * Other small improvements.
-- Jean-Francois Dockes <jf@dockes.org> Thu, 21 Apr 2020 15:32:00 +0100 -- Jean-Francois Dockes <jf@dockes.org> Tue, 21 Apr 2020 15:32:00 +0100
recoll (1.26.6-1~ppaPPAVERS~SERIES1) SERIES; urgency=low recoll (1.26.6-1~ppaPPAVERS~SERIES1) SERIES; urgency=low

View File

@ -1 +1 @@
1.27.0pre2 1.27.0pre3

View File

@ -47,13 +47,13 @@ public:
virtual ~FileScanXML() { virtual ~FileScanXML() {
if (ctxt) { if (ctxt) {
xmlFreeParserCtxt(ctxt); xmlFreeParserCtxt(ctxt);
// This should not be necessary (done by free), but see // This should not be necessary (done by free), but see
// http://xmlsoft.org/xmlmem.html#Compacting The // http://xmlsoft.org/xmlmem.html#Compacting The
// malloc_trim() and mallopt() doc seems to be a bit // malloc_trim() and mallopt() doc seems to be a bit
// misleading, there is probably a frag size under which // misleading, there is probably a frag size under which
// free() does not try to malloc_trim() at all // free() does not try to malloc_trim() at all
#ifdef HAVE_MALLOC_TRIM #ifdef HAVE_MALLOC_TRIM
malloc_trim(0); malloc_trim(0);
#endif /* HAVE_MALLOC_TRIM */ #endif /* HAVE_MALLOC_TRIM */
} }
} }
@ -112,11 +112,11 @@ public:
Internal(MimeHandlerXslt *_p) Internal(MimeHandlerXslt *_p)
: p(_p) {} : p(_p) {}
~Internal() { ~Internal() {
if (metaOrAllSS) { for (auto& entry : metaOrAllSS) {
xsltFreeStylesheet(metaOrAllSS); xsltFreeStylesheet(entry.second);
} }
if (bodySS) { for (auto& entry : bodySS) {
xsltFreeStylesheet(bodySS); xsltFreeStylesheet(entry.second);
} }
} }
@ -128,10 +128,16 @@ public:
MimeHandlerXslt *p; MimeHandlerXslt *p;
bool ok{false}; bool ok{false};
string metamember;
xsltStylesheet *metaOrAllSS{nullptr}; // Pairs of zip archive member names and style sheet names for the
string bodymember; // metadata, and map of style sheets refd by their names.
xsltStylesheet *bodySS{nullptr}; // Exception: there can be a single entry which does meta and
// body, in which case bodymembers/bodySS are empty.
vector<pair<string,string>> metaMembers;
map <string, xsltStylesheet*> metaOrAllSS;
// Same for body data
vector<pair<string,string>> bodyMembers;
map<string, xsltStylesheet*> bodySS;
string result; string result;
string filtersdir; string filtersdir;
}; };
@ -152,20 +158,43 @@ MimeHandlerXslt::MimeHandlerXslt(RclConfig *cnf, const std::string& id,
xmlLoadExtDtdDefaultValue = 0; xmlLoadExtDtdDefaultValue = 0;
// params can be "xslt stylesheetall" or // params can be "xslt stylesheetall" or
// "xslt metamember metastylesheet bodymember bodystylesheet" // "xslt meta/body memberpath stylesheetnm [... ... ...] ...
if (params.size() == 2) { if (params.size() == 2) {
m->metaOrAllSS = m->prepare_stylesheet(params[1]); auto ss = m->prepare_stylesheet(params[1]);
if (m->metaOrAllSS) { if (ss) {
m->ok = true; m->ok = true;
m->metaOrAllSS[""] = ss;
} }
} else if (params.size() == 5) { } else if (params.size() > 3 && params.size() % 3 == 1) {
m->metamember = params[1]; auto it = params.begin();
m->metaOrAllSS = m->prepare_stylesheet(params[2]); it++;
m->bodymember = params[3]; while (it != params.end()) {
m->bodySS = m->prepare_stylesheet(params[4]); // meta/body membername ssname
if (m->metaOrAllSS && m->bodySS) { const string& tp = *it++;
m->ok = true; const string& znm = *it++;
const string& ssnm = *it++;
vector<pair<string,string>> *mbrv;
map<string,xsltStylesheet*> *ssmp;
if (tp == "meta") {
mbrv = &m->metaMembers;
ssmp = &m->metaOrAllSS;
} else if (tp == "body") {
mbrv = &m->bodyMembers;
ssmp = &m->bodySS;
} else {
LOGERR("MimeHandlerXslt: bad member type " << tp << endl);
return;
}
if (ssmp->find(ssnm) == ssmp->end()) {
auto ss = m->prepare_stylesheet(ssnm);
if (nullptr == ss) {
return;
}
ssmp->insert({ssnm, ss});
}
mbrv->push_back({znm, ssnm});
} }
m->ok = true;
} else { } else {
LOGERR("MimeHandlerXslt: constructor with wrong param vector: " << LOGERR("MimeHandlerXslt: constructor with wrong param vector: " <<
stringsToString(params) << endl); stringsToString(params) << endl);
@ -209,7 +238,7 @@ bool MimeHandlerXslt::Internal::apply_stylesheet(
res = string_scan(data.c_str(), data.size(), &XMLdoc, &reason, md5p); res = string_scan(data.c_str(), data.size(), &XMLdoc, &reason, md5p);
} else { } else {
res = string_scan(data.c_str(), data.size(), member, &XMLdoc, res = string_scan(data.c_str(), data.size(), member, &XMLdoc,
&reason); &reason);
} }
} }
if (!res) { if (!res) {
@ -231,7 +260,7 @@ bool MimeHandlerXslt::Internal::apply_stylesheet(
} }
xmlChar *outstr; xmlChar *outstr;
int outlen; int outlen;
xsltSaveResultToString(&outstr, &outlen, transformed, metaOrAllSS); xsltSaveResultToString(&outstr, &outlen, transformed, ssp);
result = string((const char*)outstr, outlen); result = string((const char*)outstr, outlen);
xmlFree(outstr); xmlFree(outstr);
xmlFreeDoc(transformed); xmlFreeDoc(transformed);
@ -242,14 +271,15 @@ bool MimeHandlerXslt::Internal::apply_stylesheet(
bool MimeHandlerXslt::Internal::process_doc_or_string( bool MimeHandlerXslt::Internal::process_doc_or_string(
bool forpreview, const string& fn, const string& data) bool forpreview, const string& fn, const string& data)
{ {
if (nullptr == metaOrAllSS && nullptr == bodySS) {
LOGERR("MimeHandlerXslt::set_document_file_impl: both ss empty??\n");
return false;
}
p->m_metaData[cstr_dj_keycharset] = cstr_utf8; p->m_metaData[cstr_dj_keycharset] = cstr_utf8;
if (nullptr == bodySS) { if (bodySS.empty()) {
auto ssp = metaOrAllSS.find("");
if (ssp == metaOrAllSS.end()) {
LOGERR("MimeHandlerXslt::process: no style sheet !\n");
return false;
}
string md5; string md5;
if (apply_stylesheet(fn, string(), data, metaOrAllSS, result, if (apply_stylesheet(fn, string(), data, ssp->second, result,
forpreview ? nullptr : &md5)) { forpreview ? nullptr : &md5)) {
if (!forpreview) { if (!forpreview) {
p->m_metaData[cstr_dj_keymd5] = md5; p->m_metaData[cstr_dj_keymd5] = md5;
@ -260,16 +290,34 @@ bool MimeHandlerXslt::Internal::process_doc_or_string(
} else { } else {
result = "<html>\n<head>\n<meta http-equiv=\"Content-Type\"" result = "<html>\n<head>\n<meta http-equiv=\"Content-Type\""
"content=\"text/html; charset=UTF-8\">"; "content=\"text/html; charset=UTF-8\">";
string part; for (auto& member : metaMembers) {
if (!apply_stylesheet(fn,metamember, data, metaOrAllSS, part, nullptr)) { auto it = metaOrAllSS.find(member.second);
return false; if (it == metaOrAllSS.end()) {
LOGERR("MimeHandlerXslt::process: no style sheet found for " <<
member.first << ":" << member.second << "!\n");
return false;
}
string part;
if (!apply_stylesheet(fn, member.first, data, it->second, part, nullptr)) {
return false;
}
result += part;
} }
result += part;
result += "</head>\n<body>\n"; result += "</head>\n<body>\n";
if (!apply_stylesheet(fn, bodymember, data, bodySS, part, nullptr)) {
return false; for (auto& member : bodyMembers) {
auto it = bodySS.find(member.second);
if (it == bodySS.end()) {
LOGERR("MimeHandlerXslt::process: no style sheet found for " <<
member.first << ":" << member.second << "!\n");
return false;
}
string part;
if (!apply_stylesheet(fn, member.first, data, it->second, part, nullptr)) {
return false;
}
result += part;
} }
result += part;
result += "</body></html>"; result += "</body></html>";
} }
return true; return true;
@ -307,7 +355,7 @@ bool MimeHandlerXslt::next_document()
return false; return false;
} }
if (m_havedoc == false) if (m_havedoc == false)
return false; return false;
m_havedoc = false; m_havedoc = false;
m_metaData[cstr_dj_keymt] = cstr_texthtml; m_metaData[cstr_dj_keymt] = cstr_texthtml;
m_metaData[cstr_dj_keycontent].swap(m->result); m_metaData[cstr_dj_keycontent].swap(m->result);

View File

@ -53,9 +53,6 @@ application/x-zstd = uncompress rcluncomp "unzstd --rm -q" %f %t
# A different format (ie text/plain), and a character set can be defined for # A different format (ie text/plain), and a character set can be defined for
# each filter, see the exemples below (ie: msword) # each filter, see the exemples below (ie: msword)
[index] [index]
application/epub+zip = execm rclepub
# Returned by xdg-mime for .js. Future-proofing
application/javascript = internal text/plain
# MSWORD: the rcldoc script handles a number of marginal case that raw # MSWORD: the rcldoc script handles a number of marginal case that raw
# antiword won't: # antiword won't:
@ -67,52 +64,99 @@ application/msword = execm rcldoc.py
#application/msword = exec antiword -t -i 1 -m UTF-8;mimetype=text/plain #application/msword = exec antiword -t -i 1 -m UTF-8;mimetype=text/plain
# You can also use wvware directly but it's much slower. # You can also use wvware directly but it's much slower.
# application/msword = exec wvWare --charset=utf-8 --nographics # application/msword = exec wvWare --charset=utf-8 --nographics
application/vnd.ms-excel = execm rclxls.py
application/x-hwp = execm rclhwp.py application/vnd.ms-outlook = execm rclpst.py
application/vnd.ms-powerpoint = execm rclppt.py
# Also Handle the mime type returned by "file -i" for a suffix-less word # Also Handle the mime type returned by "file -i" for a suffix-less word
# file. This could probably just as well be an excel file, but we have to # file. This could probably just as well be an excel file, but we have to
# chose one. # chose one.
application/vnd.ms-office = execm rcldoc.py application/vnd.ms-office = execm rcldoc.py
application/ogg = execm rclaudio application/vnd.oasis.opendocument.text = \
application/pdf = execm rclpdf.py internal xsltproc meta meta.xml opendoc-meta.xsl \
application/postscript = exec rclps body content.xml opendoc-body.xsl
application/sql = internal text/plain application/vnd.oasis.opendocument.text-template = \
application/vnd.ms-excel = execm rclxls.py internal xsltproc meta meta.xml opendoc-meta.xsl \
application/vnd.ms-outlook = execm rclpst.py body content.xml opendoc-body.xsl
application/vnd.ms-powerpoint = execm rclppt.py application/vnd.oasis.opendocument.presentation = \
application/vnd.oasis.opendocument.text = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl internal xsltproc meta meta.xml opendoc-meta.xsl \
application/vnd.oasis.opendocument.text-template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl body content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.presentation = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl application/vnd.oasis.opendocument.spreadsheet = \
application/vnd.oasis.opendocument.spreadsheet = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl internal xsltproc meta meta.xml opendoc-meta.xsl \
application/vnd.oasis.opendocument.graphics = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl body content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.presentation-flat-xml = internal xsltproc opendoc-flat.xsl application/vnd.oasis.opendocument.graphics = \
application/vnd.oasis.opendocument.text-flat-xml = internal xsltproc opendoc-flat.xsl internal xsltproc meta meta.xml opendoc-meta.xsl \
application/vnd.oasis.opendocument.spreadsheet-flat-xml = internal xsltproc opendoc-flat.xsl body content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.presentation-flat-xml = \
internal xsltproc opendoc-flat.xsl
application/vnd.oasis.opendocument.text-flat-xml = \
internal xsltproc opendoc-flat.xsl
application/vnd.oasis.opendocument.spreadsheet-flat-xml = \
internal xsltproc opendoc-flat.xsl
application/vnd.openxmlformats-officedocument.wordprocessingml.document = \ application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl internal xsltproc meta docProps/core.xml openxml-meta.xsl \
body word/document.xml openxml-word-body.xsl \
body word/footnotes.xml openxml-word-body.xsl \
body word/endnotes.xml openxml-word-body.xsl
application/vnd.openxmlformats-officedocument.wordprocessingml.template = \ application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl internal xsltproc meta docProps/core.xml openxml-meta.xsl \
body word/document.xml openxml-word-body.xsl \
body word/footnotes.xml openxml-word-body.xsl \
body word/endnotes.xml openxml-word-body.xsl
application/vnd.openxmlformats-officedocument.presentationml.template = \ application/vnd.openxmlformats-officedocument.presentationml.template = \
execm rclopxml.py execm rclopxml.py
application/vnd.openxmlformats-officedocument.presentationml.presentation = \ application/vnd.openxmlformats-officedocument.presentationml.presentation = \
execm rclopxml.py execm rclopxml.py
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \ application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl internal xsltproc meta docProps/core.xml openxml-meta.xsl \
body xl/sharedStrings.xml openxml-xls-body.xsl
application/vnd.openxmlformats-officedocument.spreadsheetml.template =\ application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl internal xsltproc meta docProps/core.xml openxml-meta.xsl \
application/vnd.sun.xml.calc = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl body xl/sharedStrings.xml openxml-xls-body.xsl
application/vnd.sun.xml.calc.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.draw = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl application/vnd.sun.xml.calc = \
application/vnd.sun.xml.draw.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl internal xsltproc meta meta.xml opendoc-meta.xsl \
application/vnd.sun.xml.impress = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl body content.xml opendoc-body.xsl
application/vnd.sun.xml.impress.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl application/vnd.sun.xml.calc.template = \
application/vnd.sun.xml.math = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl internal xsltproc meta meta.xml opendoc-meta.xsl \
application/vnd.sun.xml.writer = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl body content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.global = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl application/vnd.sun.xml.draw = \
application/vnd.sun.xml.writer.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.draw.template = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.impress = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.impress.template = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.math = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.writer = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.global = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.template = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
#application/x-mobipocket-ebook = execm rclmobi
#application/x-tar = execm rcltar
application/epub+zip = execm rclepub
application/javascript = internal text/plain
application/ogg = execm rclaudio
application/pdf = execm rclpdf.py
application/postscript = exec rclps
application/sql = internal text/plain
application/vnd.wordperfect = exec wpd2html;mimetype=text/html application/vnd.wordperfect = exec wpd2html;mimetype=text/html
application/x-7z-compressed = execm rcl7z
application/x-abiword = internal xsltproc abiword.xsl application/x-abiword = internal xsltproc abiword.xsl
application/x-awk = internal text/plain application/x-awk = internal text/plain
application/x-chm = execm rclchm application/x-chm = execm rclchm
@ -122,74 +166,70 @@ application/x-flac = execm rclaudio
application/x-gnote = execm rclxml.py application/x-gnote = execm rclxml.py
application/x-gnuinfo = execm rclinfo application/x-gnuinfo = execm rclinfo
application/x-gnumeric = internal xsltproc gnumeric.xsl application/x-gnumeric = internal xsltproc gnumeric.xsl
application/x-hwp = execm rclhwp.py
application/x-kword = exec rclkwd application/x-kword = exec rclkwd
application/x-lyx = exec rcllyx application/x-lyx = exec rcllyx
application/x-mimehtml = internal message/rfc822 application/x-mimehtml = internal message/rfc822
#application/x-mobipocket-ebook = execm rclmobi
application/x-okular-notes = internal xsltproc okular-note.xsl application/x-okular-notes = internal xsltproc okular-note.xsl
application/x-perl = internal text/plain application/x-perl = internal text/plain
# Returned by xdg-mime for .php. Future-proofing
application/x-php = internal text/plain application/x-php = internal text/plain
application/x-rar = execm rclrar;charset=default application/x-rar = execm rclrar;charset=default
application/x-ruby = internal text/plain application/x-ruby = internal text/plain
application/x-scribus = exec rclscribus application/x-scribus = exec rclscribus
application/x-shellscript = internal text/plain application/x-shellscript = internal text/plain
#application/x-tar = execm rcltar
application/x-tex = exec rcltex application/x-tex = exec rcltex
application/x-webarchive = execm rclwar application/x-webarchive = execm rclwar
application/x-zerosize = internal
application/zip = execm rclzip;charset=default application/zip = execm rclzip;charset=default
application/x-7z-compressed = execm rcl7z
audio/ape = execm rclaudio
audio/mpeg = execm rclaudio
audio/mp4 = execm rclaudio
video/mp4 = execm rclaudio
video/x-msvideo = execm rclimg
audio/aac = execm rclaudio audio/aac = execm rclaudio
audio/ape = execm rclaudio
audio/mp4 = execm rclaudio
audio/mpeg = execm rclaudio
audio/x-karaoke = execm rclkar audio/x-karaoke = execm rclkar
audio/x-wavpack = execm rclaudio
audio/x-musepack = execm rclaudio audio/x-musepack = execm rclaudio
audio/x-wavpack = execm rclaudio
image/gif = execm rclimg image/gif = execm rclimg
image/jp2 = execm rclimg image/jp2 = execm rclimg
image/jpeg = execm rclimg image/jpeg = execm rclimg
image/png = execm rclimg image/png = execm rclimg
image/svg+xml = internal xsltproc svg.xsl
image/tiff = execm rclimg image/tiff = execm rclimg
image/vnd.djvu = execm rcldjvu.py image/vnd.djvu = execm rcldjvu.py
image/svg+xml = internal xsltproc svg.xsl
image/x-xcf = execm rclimg
image/x-nikon-nef = execm rclimg image/x-nikon-nef = execm rclimg
image/x-xcf = execm rclimg
inode/symlink = internal inode/symlink = internal
application/x-zerosize = internal
inode/x-empty = internal application/x-zerosize inode/x-empty = internal application/x-zerosize
message/rfc822 = internal message/rfc822 = internal
text/calendar = execm rclics;mimetype=text/plain text/calendar = execm rclics;mimetype=text/plain
text/css = internal text/plain
text/html = internal text/html = internal
text/plain = internal text/plain = internal
text/rtf = exec unrtf --nopict --html;mimetype=text/html text/rtf = exec unrtf --nopict --html;mimetype=text/html
text/x-c = internal
text/x-c++ = internal
text/x-c+ = internal
text/x-csharp = internal text/plain
text/css = internal text/plain
application/javascript = internal text/plain
text/x-bibtex = exec rclbibtex.sh ; mimetype = text/plain text/x-bibtex = exec rclbibtex.sh ; mimetype = text/plain
text/x-c = internal
text/x-c+ = internal
text/x-c++ = internal
text/x-chm-html = internal text/html
text/x-csharp = internal text/plain
text/x-csv = internal text/plain text/x-csv = internal text/plain
text/x-fictionbook = internal xsltproc fb2.xsl text/x-fictionbook = internal xsltproc fb2.xsl
text/x-gaim-log = exec rclgaim text/x-gaim-log = exec rclgaim
text/x-html-aptosid-man = exec rclaptosidman text/x-html-aptosid-man = exec rclaptosidman
text/x-lua = internal
text/x-chm-html = internal text/html
text/x-ini = internal text/plain text/x-ini = internal text/plain
text/x-java = internal text/plain text/x-java = internal text/plain
text/x-lua = internal
text/x-mail = internal text/x-mail = internal
text/x-man = exec rclman text/x-man = exec rclman
text/x-perl = internal text/plain text/x-perl = internal text/plain
text/x-purple-log = exec rclpurple
text/x-purple-html-log = internal text/html text/x-purple-html-log = internal text/html
text/x-purple-log = exec rclpurple
text/x-python = exec rclpython text/x-python = exec rclpython
text/x-ruby = internal text/x-ruby = internal
text/x-shellscript = internal text/plain text/x-shellscript = internal text/plain
text/x-srt = internal text/plain text/x-srt = internal text/plain
text/x-tex = exec rcltex text/x-tex = exec rcltex
video/mp4 = execm rclaudio
video/x-msvideo = execm rclimg
# Generic XML is best indexed as text, else it generates too many errors # Generic XML is best indexed as text, else it generates too many errors

View File

@ -46,109 +46,149 @@ application/x-lzma = uncompress python rcluncomp.py 7z %f %t
# A different format (ie text/plain), and a character set can be defined for # A different format (ie text/plain), and a character set can be defined for
# each filter, see the exemples below (ie: msword) # each filter, see the exemples below (ie: msword)
[index] [index]
application/msword = execm python rcldoc.py application/msword = execm python rcldoc.py
application/vnd.ms-excel = execm python rclxls.py application/vnd.ms-excel = execm python rclxls.py
application/vnd.ms-outlook = execm python rclpst.py
application/vnd.ms-powerpoint = execm python rclppt.py application/vnd.ms-powerpoint = execm python rclppt.py
application/pdf = execm python rclpdf.py
application/x-hwp = execm python rclhwp.py
application/vnd.oasis.opendocument.text = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.text-template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.presentation = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.spreadsheet = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.graphics = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.calc = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.calc.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.draw = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.draw.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.impress = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.impress.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.math = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.global = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.wordperfect = exec wpd/wpd2html;mimetype=text/html
application/x-abiword = internal xsltproc abiword.xsl
text/x-fictionbook = internal xsltproc fb2.xsl
application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl
application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl
application/vnd.openxmlformats-officedocument.presentationml.template = \
execm python rclopxml.py
application/vnd.openxmlformats-officedocument.presentationml.presentation = \
execm python rclopxml.py
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl
application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl
application/epub+zip = execm python rclepub
# Returned by xdg-mime for .js. Future-proofing
application/javascript = internal text/plain
# Also Handle the mime type returned by "file -i" for a suffix-less word # Also Handle the mime type returned by "file -i" for a suffix-less word
# file. This could probably just as well be an excel file, but we have to # file. This could probably just as well be an excel file, but we have to
# chose one. # chose one.
application/vnd.ms-office = execm python rcldoc.py application/vnd.ms-office = execm python rcldoc.py
application/vnd.ms-outlook = execm python rclpst.py application/vnd.oasis.opendocument.text = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.text-template = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.presentation = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.spreadsheet = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.graphics = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.presentation-flat-xml = \
internal xsltproc opendoc-flat.xsl
application/vnd.oasis.opendocument.text-flat-xml = \
internal xsltproc opendoc-flat.xsl
application/vnd.oasis.opendocument.spreadsheet-flat-xml = \
internal xsltproc opendoc-flat.xsl
application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
internal xsltproc meta docProps/core.xml openxml-meta.xsl \
body word/document.xml openxml-word-body.xsl \
body word/footnotes.xml openxml-word-body.xsl \
body word/endnotes.xml openxml-word-body.xsl
application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
internal xsltproc meta docProps/core.xml openxml-meta.xsl \
body word/document.xml openxml-word-body.xsl \
body word/footnotes.xml openxml-word-body.xsl \
body word/endnotes.xml openxml-word-body.xsl
application/vnd.openxmlformats-officedocument.presentationml.template = \
execm python rclopxml.py
application/vnd.openxmlformats-officedocument.presentationml.presentation = \
execm python rclopxml.py
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
internal xsltproc meta docProps/core.xml openxml-meta.xsl \
body xl/sharedStrings.xml openxml-xls-body.xsl
application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
internal xsltproc meta docProps/core.xml openxml-meta.xsl \
body xl/sharedStrings.xml openxml-xls-body.xsl
application/vnd.sun.xml.calc = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.calc.template = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.draw = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.draw.template = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.impress = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.impress.template = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.math = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.writer = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.global = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.template = \
internal xsltproc meta meta.xml opendoc-meta.xsl \
body content.xml opendoc-body.xsl
#application/postscript = exec rclps
#application/x-gnuinfo = execm python rclinfo
#application/x-tar = execm python rcltar
application/epub+zip = execm python rclepub
application/javascript = internal text/plain
application/ogg = execm python rclaudio application/ogg = execm python rclaudio
application/pdf = execm python rclpdf.py
application/sql = internal text/plain
application/vnd.wordperfect = exec wpd/wpd2html;mimetype=text/html
application/x-7z-compressed = execm python rcl7z
application/x-abiword = internal xsltproc abiword.xsl
application/x-awk = internal text/plain application/x-awk = internal text/plain
application/x-chm = execm python rclchm application/x-chm = execm python rclchm
application/x-dia-diagram = execm python rcldia;mimetype=text/plain application/x-dia-diagram = execm python rcldia;mimetype=text/plain
application/x-flac = execm python rclaudio application/x-flac = execm python rclaudio
application/x-gnote = execm python rclxml.py application/x-gnote = execm python rclxml.py
#application/x-gnuinfo = execm python rclinfo application/x-hwp = execm python rclhwp.py
application/x-mimehtml = internal message/rfc822 application/x-mimehtml = internal message/rfc822
application/x-perl = internal text/plain application/x-perl = internal text/plain
application/x-php = internal text/plain application/x-php = internal text/plain
application/x-rar = execm python rclrar;charset=default application/x-rar = execm python rclrar;charset=default
application/x-shellscript = internal text/plain application/x-shellscript = internal text/plain
#application/x-tar = execm python rcltar
application/x-webarchive = execm python rclwar application/x-webarchive = execm python rclwar
application/x-7z-compressed = execm python rcl7z application/x-zerosize = internal
application/zip = execm python rclzip;charset=default application/zip = execm python rclzip;charset=default
audio/mpeg = execm python rclaudio
audio/mp4 = execm python rclaudio
audio/aac = execm python rclaudio audio/aac = execm python rclaudio
audio/mp4 = execm python rclaudio
audio/mpeg = execm python rclaudio
audio/x-karaoke = execm python rclkar audio/x-karaoke = execm python rclkar
image/gif = execm rclimg.exe image/gif = execm rclimg.exe
image/jp2 = execm rclimg.exe image/jp2 = execm rclimg.exe
image/jpeg = execm rclimg.exe image/jpeg = execm rclimg.exe
image/png = execm rclimg.exe image/png = execm rclimg.exe
image/tiff = execm rclimg.exe
image/svg+xml = internal xsltproc svg.xsl image/svg+xml = internal xsltproc svg.xsl
#image/x-xcf = execm rclimg.exe image/tiff = execm rclimg.exe
inode/symlink = internal inode/symlink = internal
application/x-zerosize = internal
inode/x-empty = internal application/x-zerosize inode/x-empty = internal application/x-zerosize
message/rfc822 = internal message/rfc822 = internal
text/calendar = execm python rclics;mimetype=text/plain text/calendar = execm python rclics;mimetype=text/plain
text/css = internal text/plain
text/html = internal text/html = internal
text/plain = internal text/plain = internal
text/rtf = exec unrtf --nopict --html;mimetype=text/html
#text/rtf = execm python rclrtf.py #text/rtf = execm python rclrtf.py
text/rtf = exec unrtf --nopict --html;mimetype=text/html
text/x-c = internal text/x-c = internal
text/x-c++ = internal
text/x-c+ = internal text/x-c+ = internal
text/x-csharp = internal text/plain text/x-c++ = internal
text/css = internal text/plain
application/javascript = internal text/plain
text/x-csv = internal text/plain
text/x-chm-html = internal text/html text/x-chm-html = internal text/html
text/x-csharp = internal text/plain
text/x-csv = internal text/plain
text/x-fictionbook = internal xsltproc fb2.xsl
text/x-ini = internal text/plain text/x-ini = internal text/plain
text/x-mail = internal text/x-mail = internal
text/x-perl = internal text/plain text/x-perl = internal text/plain
text/x-python = exec python rclpython text/x-python = exec python rclpython
text/x-shellscript = internal text/plain text/x-shellscript = internal text/plain
text/x-srt = internal text/plain text/x-srt = internal text/plain
image/x-xcf = execm rclimg.exe
# Generic XML is best indexed as text, else it generates too many errors # Generic XML is best indexed as text, else it generates too many errors
# All parameter and tag names, attribute values etc, are indexed as # All parameter and tag names, attribute values etc, are indexed as