diff --git a/src/Makefile.am b/src/Makefile.am
index 2333c5d2..bd2e44af 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -653,7 +653,9 @@ filters/fb2.xsl \
filters/gnumeric.xsl \
filters/msodump.zip \
filters/okular-note.xsl \
+filters/opendoc-body.xsl \
filters/opendoc-flat.xsl \
+filters/opendoc-meta.xsl \
filters/ppt-dump.py \
filters/rcl7z \
filters/rclabw.py \
diff --git a/src/filters/opendoc-body.xsl b/src/filters/opendoc-body.xsl
new file mode 100644
index 00000000..b4b6e049
--- /dev/null
+++ b/src/filters/opendoc-body.xsl
@@ -0,0 +1,32 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/filters/opendoc-meta.xsl b/src/filters/opendoc-meta.xsl
new file mode 100644
index 00000000..ab49b867
--- /dev/null
+++ b/src/filters/opendoc-meta.xsl
@@ -0,0 +1,67 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ abstract
+
+
+
+
+
+
+
+
+
+ keywords
+
+
+
+
+
+
+
+
+
+ author
+
+
+
+
+
+
+
+
+
+ keywords
+
+
+
+
+
+
+
+
diff --git a/src/internfile/mh_xslt.cpp b/src/internfile/mh_xslt.cpp
index 3d30fa3c..8afbc83a 100644
--- a/src/internfile/mh_xslt.cpp
+++ b/src/internfile/mh_xslt.cpp
@@ -94,18 +94,31 @@ private:
class MimeHandlerXslt::Internal {
public:
+ Internal(MimeHandlerXslt *_p)
+ : p(_p) {}
~Internal() {
if (metaOrAllSS) {
xsltFreeStylesheet(metaOrAllSS);
}
- if (dataSS) {
- xsltFreeStylesheet(dataSS);
+ if (bodySS) {
+ xsltFreeStylesheet(bodySS);
}
}
+
+ xsltStylesheet *prepare_stylesheet(const string& ssnm);
+ bool process_doc_or_string(bool forpv, const string& fn, const string& data);
+ bool apply_stylesheet(
+ const string& fn, const string& member, const string& data,
+ xsltStylesheet *ssp, string& result, string *md5p);
+
+ MimeHandlerXslt *p;
bool ok{false};
+ string metamember;
xsltStylesheet *metaOrAllSS{nullptr};
- xsltStylesheet *dataSS{nullptr};
+ string bodymember;
+ xsltStylesheet *bodySS{nullptr};
string result;
+ string filtersdir;
};
MimeHandlerXslt::~MimeHandlerXslt()
@@ -115,99 +128,163 @@ MimeHandlerXslt::~MimeHandlerXslt()
MimeHandlerXslt::MimeHandlerXslt(RclConfig *cnf, const std::string& id,
const std::vector& params)
- : RecollFilter(cnf, id), m(new Internal)
+ : RecollFilter(cnf, id), m(new Internal(this))
{
LOGDEB("MimeHandlerXslt: params: " << stringsToString(params) << endl);
- string filtersdir = path_cat(cnf->getDatadir(), "filters");
+ m->filtersdir = path_cat(cnf->getDatadir(), "filters");
xmlSubstituteEntitiesDefault(0);
xmlLoadExtDtdDefaultValue = 0;
// params can be "xslt stylesheetall" or
- // "xslt metamember stylesheetmeta datamember stylesheetdata"
+ // "xslt metamember metastylesheet bodymember bodystylesheet"
if (params.size() == 2) {
- string ssfn = path_cat(filtersdir, params[1]);
- FileScanXML XMLstyle(ssfn);
- string reason;
- if (!file_scan(ssfn, &XMLstyle, &reason)) {
- LOGERR("MimeHandlerXslt: file_scan failed for style sheet " <<
- ssfn << " : " << reason << endl);
- return;
- }
- xmlDoc *stl = XMLstyle.getDoc();
- if (stl == nullptr) {
- LOGERR("MimeHandlerXslt: getDoc failed for style sheet " <<
- ssfn << endl);
- return;
- }
- m->metaOrAllSS = xsltParseStylesheetDoc(stl);
+ m->metaOrAllSS = m->prepare_stylesheet(params[1]);
if (m->metaOrAllSS) {
m->ok = true;
}
- } else if (params.size() == 4) {
+ } else if (params.size() == 5) {
+ m->metamember = params[1];
+ m->metaOrAllSS = m->prepare_stylesheet(params[2]);
+ m->bodymember = params[3];
+ m->bodySS = m->prepare_stylesheet(params[4]);
+ if (m->metaOrAllSS && m->bodySS) {
+ m->ok = true;
+ }
} else {
LOGERR("MimeHandlerXslt: constructor with wrong param vector: " <<
stringsToString(params) << endl);
}
}
-bool MimeHandlerXslt::set_document_file_impl(const std::string& mt,
- const std::string &file_path)
+xsltStylesheet *MimeHandlerXslt::Internal::prepare_stylesheet(const string& ssnm)
{
- LOGDEB0("MimeHandlerXslt::set_document_file_: fn: " << file_path << endl);
- if (!m || !m->ok) {
+ string ssfn = path_cat(filtersdir, ssnm);
+ FileScanXML XMLstyle(ssfn);
+ string reason;
+ if (!file_scan(ssfn, &XMLstyle, &reason)) {
+ LOGERR("MimeHandlerXslt: file_scan failed for style sheet " <<
+ ssfn << " : " << reason << endl);
+ return nullptr;
+ }
+ xmlDoc *stl = XMLstyle.getDoc();
+ if (stl == nullptr) {
+ LOGERR("MimeHandlerXslt: getDoc failed for style sheet " <<
+ ssfn << endl);
+ return nullptr;
+ }
+ return xsltParseStylesheetDoc(stl);
+}
+
+bool MimeHandlerXslt::Internal::apply_stylesheet(
+ const string& fn, const string& member, const string& data,
+ xsltStylesheet *ssp, string& result, string *md5p)
+{
+ FileScanXML XMLdoc(fn);
+ string md5, reason;
+ bool res;
+ if (!fn.empty()) {
+ if (member.empty()) {
+ res = file_scan(fn, &XMLdoc, 0, -1, &reason, md5p);
+ } else {
+ res = file_scan(fn, member, &XMLdoc, &reason);
+ }
+ } else {
+ if (member.empty()) {
+ res = string_scan(data.c_str(), data.size(), &XMLdoc, &reason, md5p);
+ } else {
+ res = string_scan(data.c_str(), data.size(), member, &XMLdoc,
+ &reason);
+ }
+ }
+ if (!res) {
+ LOGERR("MimeHandlerXslt::set_document_: file_scan failed for "<<
+ fn << " " << member << " : " << reason << endl);
return false;
}
- if (nullptr == m->dataSS) {
- if (nullptr == m->metaOrAllSS) {
- LOGERR("MimeHandlerXslt::set_document_file_impl: both ss empty??\n");
- return false;
- }
- FileScanXML XMLdoc(file_path);
- string md5, reason;
- if (!file_scan(file_path, &XMLdoc, 0, -1, &reason,
- m_forPreview ? nullptr : &md5)) {
- LOGERR("MimeHandlerXslt::set_document_file_impl: file_scan failed "
- "for " << file_path << " : " << reason << endl);
- return false;
- }
- if (!m_forPreview) {
- m_metaData[cstr_dj_keymd5] = md5;
- }
- xmlDocPtr doc = XMLdoc.getDoc();
- if (nullptr == doc) {
- LOGERR("MimeHandlerXslt::set_doc_file_impl: no parsed doc\n");
- return false;
- }
- xmlDocPtr transformed = xsltApplyStylesheet(m->metaOrAllSS, doc, NULL);
- if (nullptr == transformed) {
- LOGERR("MimeHandlerXslt::set_doc_file_: xslt transform failed\n");
- xmlFreeDoc(doc);
- return false;
- }
- xmlChar *outstr;
- int outlen;
- xsltSaveResultToString(&outstr, &outlen, transformed, m->metaOrAllSS);
- m->result = string((const char*)outstr, outlen);
- xmlFree(outstr);
- xmlFreeDoc(transformed);
- xmlFreeDoc(doc);
- } else {
- LOGERR("Not ready for multipart yet\n");
- abort();
+
+ xmlDocPtr doc = XMLdoc.getDoc();
+ if (nullptr == doc) {
+ LOGERR("MimeHandlerXslt::set_document_: no parsed doc\n");
+ return false;
}
-
- m_havedoc = true;
+ xmlDocPtr transformed = xsltApplyStylesheet(ssp, doc, NULL);
+ if (nullptr == transformed) {
+ LOGERR("MimeHandlerXslt::set_document_: xslt transform failed\n");
+ xmlFreeDoc(doc);
+ return false;
+ }
+ xmlChar *outstr;
+ int outlen;
+ xsltSaveResultToString(&outstr, &outlen, transformed, metaOrAllSS);
+ result = string((const char*)outstr, outlen);
+ xmlFree(outstr);
+ xmlFreeDoc(transformed);
+ xmlFreeDoc(doc);
return true;
}
-bool MimeHandlerXslt::set_document_string_impl(const string& mt,
- const string& msgtxt)
+bool MimeHandlerXslt::Internal::process_doc_or_string(
+ bool forpreview, const string& fn, const string& data)
{
+ if (nullptr == metaOrAllSS && nullptr == bodySS) {
+ LOGERR("MimeHandlerXslt::set_document_file_impl: both ss empty??\n");
+ return false;
+ }
+ if (nullptr == bodySS) {
+ string md5;
+ if (apply_stylesheet(fn, string(), data, metaOrAllSS, result,
+ forpreview ? nullptr : &md5)) {
+ if (!forpreview) {
+ p->m_metaData[cstr_dj_keymd5] = md5;
+ }
+ return true;
+ }
+ return false;
+ } else {
+ result = "\n\n";
+ string part;
+ if (!apply_stylesheet(fn,metamember, data, metaOrAllSS, part, nullptr)) {
+ return false;
+ }
+ result += part;
+ result += "\n\n";
+ if (!apply_stylesheet(fn, bodymember, data, bodySS, part, nullptr)) {
+ return false;
+ }
+ result += part;
+ result += "";
+ }
+ return true;
+}
+
+bool MimeHandlerXslt::set_document_file_impl(const std::string& mt,
+ const std::string &fn)
+{
+ LOGDEB0("MimeHandlerXslt::set_document_file_: fn: " << fn << endl);
if (!m || !m->ok) {
return false;
}
- return true;
+ bool ret = m->process_doc_or_string(m_forPreview, fn, string());
+ if (ret) {
+ m_havedoc = true;
+ }
+ return ret;
+}
+
+bool MimeHandlerXslt::set_document_string_impl(const string& mt,
+ const string& txt)
+{
+ LOGDEB0("MimeHandlerXslt::set_document_string_\n");
+ if (!m || !m->ok) {
+ return false;
+ }
+ bool ret = m->process_doc_or_string(m_forPreview, string(), txt);
+ if (ret) {
+ m_havedoc = true;
+ }
+ return ret;
}
bool MimeHandlerXslt::next_document()
diff --git a/src/sampleconf/mimeconf b/src/sampleconf/mimeconf
index 1ab2f270..7a0e8850 100644
--- a/src/sampleconf/mimeconf
+++ b/src/sampleconf/mimeconf
@@ -79,11 +79,11 @@ application/postscript = exec rclps
application/sql = internal text/plain
application/vnd.ms-excel = execm rclxls.py
application/vnd.ms-powerpoint = execm rclppt.py
-application/vnd.oasis.opendocument.text = execm rclsoff.py
-application/vnd.oasis.opendocument.text-template = execm rclsoff.py
-application/vnd.oasis.opendocument.presentation = execm rclsoff.py
-application/vnd.oasis.opendocument.spreadsheet = execm rclsoff.py
-application/vnd.oasis.opendocument.graphics = execm rclsoff.py
+application/vnd.oasis.opendocument.text = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.oasis.opendocument.text-template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.oasis.opendocument.presentation = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.oasis.opendocument.spreadsheet = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.oasis.opendocument.graphics = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.presentation-flat-xml = internal xsltproc opendoc-flat.xsl
application/vnd.oasis.opendocument.text-flat-xml = internal xsltproc opendoc-flat.xsl
application/vnd.oasis.opendocument.spreadsheet-flat-xml = internal xsltproc opendoc-flat.xsl
@@ -99,16 +99,16 @@ application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
execm rclopxml.py
application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
execm rclopxml.py
-application/vnd.sun.xml.calc = execm rclsoff.py
-application/vnd.sun.xml.calc.template = execm rclsoff.py
-application/vnd.sun.xml.draw = execm rclsoff.py
-application/vnd.sun.xml.draw.template = execm rclsoff.py
-application/vnd.sun.xml.impress = execm rclsoff.py
-application/vnd.sun.xml.impress.template = execm rclsoff.py
-application/vnd.sun.xml.math = execm rclsoff.py
-application/vnd.sun.xml.writer = execm rclsoff.py
-application/vnd.sun.xml.writer.global = execm rclsoff.py
-application/vnd.sun.xml.writer.template = execm rclsoff.py
+application/vnd.sun.xml.calc = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.calc.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.draw = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.draw.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.impress = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.impress.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.math = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.writer = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.writer.global = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
+application/vnd.sun.xml.writer.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.wordperfect = exec wpd2html;mimetype=text/html
application/x-abiword = internal xsltproc abiword.xsl
application/x-awk = internal text/plain
diff --git a/src/utils/readfile.cpp b/src/utils/readfile.cpp
index 0fa93806..ae2edf16 100644
--- a/src/utils/readfile.cpp
+++ b/src/utils/readfile.cpp
@@ -381,10 +381,15 @@ protected:
// Source taking data from a ZIP archive member
class FileScanSourceZip : public FileScanSource {
public:
- FileScanSourceZip(FileScanDo *next, const string& fn, const string& member,
- string *reason)
+ FileScanSourceZip(FileScanDo *next, const string& fn,
+ const string& member, string *reason)
: FileScanSource(next), m_fn(fn), m_member(member),
- m_reason(reason) { }
+ m_reason(reason) {}
+
+ FileScanSourceZip(const char *data, size_t cnt, FileScanDo *next,
+ const string& member, string *reason)
+ : FileScanSource(next), m_data(data), m_cnt(cnt), m_member(member),
+ m_reason(reason) {}
virtual bool scan() {
bool ret = false;
@@ -392,13 +397,21 @@ public:
mz_zip_zero_struct(&zip);
void *opaque = this;
- if (!mz_zip_reader_init_file(&zip, m_fn.c_str(), 0)) {
+ bool ret1;
+ if (m_fn.empty()) {
+ ret1 = mz_zip_reader_init_mem(&zip, m_data, m_cnt, 0);
+ } else {
+ ret1 = mz_zip_reader_init_file(&zip, m_fn.c_str(), 0);
+ }
+ if (!ret1) {
if (m_reason) {
- *m_reason += "mz_zip_reader_init_file() failed: ";
- *m_reason += string(mz_zip_get_error_string(zip.m_last_error));
+ *m_reason += "mz_zip_reader_init_xx() failed: ";
+ *m_reason +=
+ string(mz_zip_get_error_string(zip.m_last_error));
}
return false;
}
+
mz_uint32 file_index;
if (mz_zip_reader_locate_file_v2(&zip, m_member.c_str(), NULL, 0,
&file_index) < 0) {
@@ -453,6 +466,8 @@ public:
}
protected:
+ const char *m_data;
+ size_t m_cnt;
string m_fn;
string m_member;
string *m_reason;
@@ -469,6 +484,17 @@ bool file_scan(const std::string& filename, const std::string& membername,
}
}
+bool string_scan(const char *data, size_t cnt, const std::string& membername,
+ FileScanDo* doer, std::string *reason)
+{
+ if (membername.empty()) {
+ return string_scan(data, cnt, doer, reason, nullptr);
+ } else {
+ FileScanSourceZip source(data, cnt, doer, membername, reason);
+ return source.scan();
+ }
+}
+
#endif // READFILE_ENABLE_ZIP
bool file_scan(const string& fn, FileScanDo* doer, int64_t startoffs,
@@ -515,3 +541,52 @@ bool file_scan(const string& fn, FileScanDo* doer, string *reason)
{
return file_scan(fn, doer, 0, -1, reason, nullptr);
}
+
+
+class FileScanSourceBuffer : public FileScanSource {
+public:
+ FileScanSourceBuffer(FileScanDo *next, const char *data, size_t cnt,
+ string *reason)
+ : FileScanSource(next), m_data(data), m_cnt(cnt), m_reason(reason) {}
+
+ virtual bool scan() {
+ if (out()) {
+ if (!out()->init(m_cnt, m_reason)) {
+ return false;
+ }
+ return out()->data(m_data, m_cnt, m_reason);
+ } else {
+ return true;
+ }
+ }
+
+protected:
+ const char *m_data{nullptr};
+ size_t m_cnt{0};
+ string *m_reason{nullptr};
+};
+
+bool string_scan(const char *data, size_t cnt, FileScanDo* doer,
+ std::string *reason, std::string *md5p)
+{
+ FileScanSourceBuffer source(doer, data, cnt, reason);
+ FileScanUpstream *up = &source;
+
+ // We compute the MD5 on the uncompressed data, so insert this
+ // right at the source.
+ string digest;
+ FileScanMd5 md5filter(digest);
+ if (md5p) {
+ md5filter.insertAtSink(doer, up);
+ up = &md5filter;
+ }
+
+ bool ret = source.scan();
+
+ if (md5p) {
+ md5filter.finish();
+ MD5HexPrint(digest, *md5p);
+ }
+ return ret;
+}
+
diff --git a/src/utils/readfile.h b/src/utils/readfile.h
index 05dc51d5..64323965 100644
--- a/src/utils/readfile.h
+++ b/src/utils/readfile.h
@@ -65,6 +65,10 @@ public:
bool file_scan(const std::string& fn, FileScanDo* doer, int64_t startoffs,
int64_t cnttoread, std::string *reason, std::string *md5p);
+/** Same as file_scan, from a memory buffer */
+bool string_scan(const char *data, size_t cnt, FileScanDo* doer,
+ std::string *reason, std::string *md5p);
+
/** Same as above, not offset/cnt/md5 */
bool file_scan(const std::string& filename, FileScanDo* doer,
std::string *reason);
@@ -74,6 +78,8 @@ bool file_scan(const std::string& filename, FileScanDo* doer,
/* Process a zip archive member */
bool file_scan(const std::string& filename, const std::string& membername,
FileScanDo* doer, std::string *reason);
+bool string_scan(const char* data, size_t cnt, const std::string& membername,
+ FileScanDo* doer, std::string *reason);
#endif
/**