diff --git a/src/filters/rclopxml.py b/src/filters/rclopxml.py new file mode 100755 index 00000000..a1546796 --- /dev/null +++ b/src/filters/rclopxml.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python +# Copyright (C) 2015 J.F.Dockes +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the +# Free Software Foundation, Inc., +# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +###################################### +from __future__ import print_function + +import sys +import rclexecm +import rclxslt +import fnmatch +from zipfile import ZipFile + +meta_stylesheet = ''' + + + + + + + + + + + + + + + + + author + + + + + + + + + + + + date + + + + + + + + + + + + +''' + +word_tagmatch = 'w:p' +word_xmlns_decls = '''xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0" +xmlns:ve="http://schemas.openxmlformats.org/markup-compatibility/2006" +xmlns:o="urn:schemas-microsoft-com:office:office" +xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" +xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" +xmlns:v="urn:schemas-microsoft-com:vml" +xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" +xmlns:w10="urn:schemas-microsoft-com:office:word" +xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" +xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" +''' +word_moretemplates = '' + + +xl_tagmatch = 'x:t' +xl_xmlns_decls='''xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0" +xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main" + ''' +xl_moretemplates = '' + +pp_tagmatch = 'a:t' +pp_xmlns_decls = '''xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0" +xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" +xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" +xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" +''' +# I want to suppress text output for all except a:t, don't know how to do it +# help ! At least get rid of these: +pp_moretemplates = ''' + +''' + +content_stylesheet = ''' + + + + + +
+ +
+
+ + +

+ +

+
+ +@MORETEMPLATES@ + +
+''' + +class OXExtractor: + def __init__(self, em): + self.em = em + self.currentindex = 0 + + # Replace values inside data style sheet, depending on type of doc + def computestylesheet(self, nm): + decls = globals()[nm + '_xmlns_decls'] + stylesheet = content_stylesheet.replace('@XMLNS_DECLS@', decls) + tagmatch = globals()[nm + '_tagmatch'] + stylesheet = stylesheet.replace('@TAGMATCH@', tagmatch) + moretmpl = globals()[nm + '_moretemplates'] + stylesheet = stylesheet.replace('@MORETEMPLATES@', moretmpl) + + return stylesheet + + def extractone(self, params): + if not params.has_key("filename:"): + self.em.rclog("extractone: no mime or file name") + return (False, "", "", rclexecm.RclExecM.eofnow) + fn = params["filename:"] + + try: + zip = ZipFile(fn) + except Exception as err: + self.em.rclog("unzip failed: " + str(err)) + return (False, "", "", rclexecm.RclExecM.eofnow) + + docdata = '' + + try: + metadata = zip.read("docProps/core.xml") + if metadata: + res = rclxslt.apply_sheet_data(meta_stylesheet, metadata) + docdata += res + except: + # To be checked. I'm under the impression that I get this when + # nothing matches? + # self.em.rclog("no/bad metadata in %s" % fn) + pass + + docdata += '' + + try: + content= zip.read('word/document.xml') + stl = self.computestylesheet('word') + docdata += rclxslt.apply_sheet_data(stl, content) + except: + pass + + try: + content = zip.read('xl/sharedStrings.xml') + stl = self.computestylesheet('xl') + docdata += rclxslt.apply_sheet_data(stl, content) + except: + pass + + try: + stl = self.computestylesheet('pp') + # Note that we'd need a numeric sort really (else we get slide1 + # slide11 slide2) + for fn in sorted(zip.namelist()): + if fnmatch.fnmatch(fn, 'ppt/slides/slide*.xml'): + content = zip.read(fn) + docdata += rclxslt.apply_sheet_data(stl, content) + except: + pass + + docdata += '' + + return (True, docdata, "", rclexecm.RclExecM.eofnow) + + + ###### File type handler api, used by rclexecm ----------> + def openfile(self, params): + self.currentindex = 0 + return True + + def getipath(self, params): + return self.extractone(params) + + def getnext(self, params): + if self.currentindex >= 1: + return (False, "", "", rclexecm.RclExecM.eofnow) + else: + ret= self.extractone(params) + self.currentindex += 1 + return ret + +if __name__ == '__main__': + proto = rclexecm.RclExecM() + extract = OXExtractor(proto) + rclexecm.main(proto, extract) diff --git a/src/filters/rclsoff.py b/src/filters/rclsoff.py index cb28ef26..a5235bd2 100755 --- a/src/filters/rclsoff.py +++ b/src/filters/rclsoff.py @@ -1,8 +1,22 @@ #!/usr/bin/env python +# Copyright (C) 2014 J.F.Dockes +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the +# Free Software Foundation, Inc., +# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +###################################### import sys -import os -import subprocess import rclexecm import rclxslt from zipfile import ZipFile @@ -170,10 +184,6 @@ class OOExtractor: return ret if __name__ == '__main__': - # Check for unzip - if not rclexecm.which("unzip"): - print("RECFILTERROR HELPERNOTFOUND unzip") - sys.exit(1) proto = rclexecm.RclExecM() extract = OOExtractor(proto) rclexecm.main(proto, extract) diff --git a/src/filters/rclxslt.py b/src/filters/rclxslt.py index 574cd582..4b0e2e8c 100644 --- a/src/filters/rclxslt.py +++ b/src/filters/rclxslt.py @@ -1,4 +1,21 @@ -#!/usr/bin/env python +# Copyright (C) 2014 J.F.Dockes +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the +# Free Software Foundation, Inc., +# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +###################################### + +# Helper module for xslt-based filters import sys