python scripts for ppt and xls
--HG-- branch : WINDOWSPORT
This commit is contained in:
parent
86ef362461
commit
002eb67185
@ -34,12 +34,14 @@ import rclexecm
|
||||
# the object which we receive as a parameter, which in turn is defined
|
||||
# in the actual executable filter (e.g. rcldoc.py)
|
||||
class Executor:
|
||||
opt_ignxval = 1
|
||||
|
||||
def __init__(self, em, flt):
|
||||
self.em = em
|
||||
self.flt = flt
|
||||
self.currentindex = 0
|
||||
|
||||
def runCmd(self, cmd, filename, postproc):
|
||||
def runCmd(self, cmd, filename, postproc, opt):
|
||||
''' Substitute parameters and execute command, process output
|
||||
with the specific postprocessor and return the complete text.
|
||||
We expect cmd as a list of command name + arguments'''
|
||||
@ -60,7 +62,7 @@ class Executor:
|
||||
postproc.takeLine(line.strip())
|
||||
|
||||
proc.wait()
|
||||
if proc.returncode:
|
||||
if (opt & self.opt_ignxval) == 0 and proc.returncode:
|
||||
self.em.rclog("extractone: [%s] returncode %d" % \
|
||||
(filename, proc.returncode))
|
||||
return False, postproc.wrapData()
|
||||
@ -78,9 +80,12 @@ class Executor:
|
||||
|
||||
fn = params["filename:"]
|
||||
while True:
|
||||
cmd, postproc = self.flt.getCmd(fn)
|
||||
cmdseq = self.flt.getCmd(fn)
|
||||
cmd = cmdseq[0]
|
||||
postproc = cmdseq[1]
|
||||
opt = cmdseq[2] if len(cmdseq) == 3 else 0
|
||||
if cmd:
|
||||
ok, data = self.runCmd(cmd, fn, postproc)
|
||||
ok, data = self.runCmd(cmd, fn, postproc, opt)
|
||||
if ok:
|
||||
break
|
||||
else:
|
||||
|
||||
58
src/filters/rclppt.py
Executable file
58
src/filters/rclppt.py
Executable file
@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import rclexecm
|
||||
import rclexec1
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Processing the output from unrtf
|
||||
class PPTProcessData:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.out = ""
|
||||
self.gotdata = 0
|
||||
|
||||
# Some versions of unrtf put out a garbled charset line.
|
||||
# Apart from this, we pass the data untouched.
|
||||
def takeLine(self, line):
|
||||
if not self.gotdata:
|
||||
self.out += '''<html><head>''' + \
|
||||
'''<meta http-equiv="Content-Type" ''' + \
|
||||
'''content="text/html;charset=UTF-8">''' + \
|
||||
'''</head><body><pre>'''
|
||||
self.gotdata = True
|
||||
self.out += self.em.htmlescape(line)
|
||||
|
||||
def wrapData(self):
|
||||
return self.out + '''</pre></body></html>'''
|
||||
|
||||
class PPTFilter:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.ntry = 0
|
||||
|
||||
def reset(self):
|
||||
self.ntry = 0
|
||||
pass
|
||||
|
||||
def getCmd(self, fn):
|
||||
if self.ntry:
|
||||
return ([], None)
|
||||
self.ntry = 1
|
||||
cmd = rclexecm.which("ppt-dump.py")
|
||||
if cmd:
|
||||
# ppt-dump.py often exits 1 with valid data. Ignore exit value
|
||||
return ([cmd, "--no-struct-output", "--dump-text"],
|
||||
PPTProcessData(self.em), rclexec1.Executor.opt_ignxval)
|
||||
else:
|
||||
return ([], None)
|
||||
|
||||
if __name__ == '__main__':
|
||||
if not rclexecm.which("ppt-dump.py"):
|
||||
print("RECFILTERROR HELPERNOTFOUND ppt-dump.py")
|
||||
sys.exit(1)
|
||||
proto = rclexecm.RclExecM()
|
||||
filter = PPTFilter(proto)
|
||||
extract = rclexec1.Executor(proto, filter)
|
||||
rclexecm.main(proto, extract)
|
||||
@ -35,16 +35,20 @@ class RTFProcessData:
|
||||
class RTFFilter:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.ntry = 0
|
||||
|
||||
def reset(self):
|
||||
pass
|
||||
self.ntry = 0
|
||||
|
||||
def getCmd(self, fn):
|
||||
if self.ntry:
|
||||
return ([], None)
|
||||
self.ntry = 1
|
||||
cmd = rclexecm.which("unrtf")
|
||||
if cmd:
|
||||
return ([cmd, "--nopict", "--html"], RTFProcessData(self.em))
|
||||
else:
|
||||
return ([],None)
|
||||
return ([], None)
|
||||
|
||||
if __name__ == '__main__':
|
||||
if not rclexecm.which("unrtf"):
|
||||
|
||||
64
src/filters/rclxls.py
Executable file
64
src/filters/rclxls.py
Executable file
@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import rclexecm
|
||||
import rclexec1
|
||||
import xlsxmltocsv
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
import xml.sax
|
||||
|
||||
# Processing the output from unrtf
|
||||
class XLSProcessData:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.out = ""
|
||||
self.gotdata = 0
|
||||
self.xmldata = ""
|
||||
|
||||
# Some versions of unrtf put out a garbled charset line.
|
||||
# Apart from this, we pass the data untouched.
|
||||
def takeLine(self, line):
|
||||
if not self.gotdata:
|
||||
self.out += '''<html><head>''' + \
|
||||
'''<meta http-equiv="Content-Type" ''' + \
|
||||
'''content="text/html;charset=UTF-8">''' + \
|
||||
'''</head><body><pre>'''
|
||||
self.gotdata = True
|
||||
self.xmldata += line
|
||||
|
||||
def wrapData(self):
|
||||
handler = xlsxmltocsv.XlsXmlHandler()
|
||||
data = xml.sax.parseString(self.xmldata, handler)
|
||||
self.out += self.em.htmlescape(handler.output)
|
||||
return self.out + '''</pre></body></html>'''
|
||||
|
||||
class XLSFilter:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.ntry = 0
|
||||
|
||||
def reset(self):
|
||||
self.ntry = 0
|
||||
pass
|
||||
|
||||
def getCmd(self, fn):
|
||||
if self.ntry:
|
||||
return ([], None)
|
||||
self.ntry = 1
|
||||
cmd = rclexecm.which("xls-dump.py")
|
||||
if cmd:
|
||||
# xls-dump.py often exits 1 with valid data. Ignore exit value
|
||||
return ([cmd, "--dump-mode=canonical-xml", "--utf-8", "--catch"],
|
||||
XLSProcessData(self.em), rclexec1.Executor.opt_ignxval)
|
||||
else:
|
||||
return ([], None)
|
||||
|
||||
if __name__ == '__main__':
|
||||
if not rclexecm.which("ppt-dump.py"):
|
||||
print("RECFILTERROR HELPERNOTFOUND ppt-dump.py")
|
||||
sys.exit(1)
|
||||
proto = rclexecm.RclExecM()
|
||||
filter = XLSFilter(proto)
|
||||
extract = rclexec1.Executor(proto, filter)
|
||||
rclexecm.main(proto, extract)
|
||||
@ -15,10 +15,13 @@ else:
|
||||
dquote = '"'
|
||||
|
||||
class XlsXmlHandler(xml.sax.handler.ContentHandler):
|
||||
def __init__(self):
|
||||
self.output = ""
|
||||
|
||||
def startElement(self, name, attrs):
|
||||
if name == "worksheet":
|
||||
if "name" in attrs:
|
||||
print("%s" % attrs["name"].encode("UTF-8"))
|
||||
self.output += "%s\n" % attrs["name"].encode("UTF-8")
|
||||
elif name == "row":
|
||||
self.cells = dict()
|
||||
elif name == "label-cell" or name == "number-cell":
|
||||
@ -30,7 +33,7 @@ class XlsXmlHandler(xml.sax.handler.ContentHandler):
|
||||
self.cells[int(attrs["col"])] = value
|
||||
else:
|
||||
#??
|
||||
sys.stdout.write("%s%s"%(value.encode("UTF-8"),sepstring))
|
||||
self.output += "%s%s" % (value.encode("UTF-8"), sepstring)
|
||||
elif name == "formula-cell":
|
||||
if "formula-result" in attrs and "col" in attrs:
|
||||
self.cells[int(attrs["col"])] = \
|
||||
@ -40,17 +43,21 @@ class XlsXmlHandler(xml.sax.handler.ContentHandler):
|
||||
if name == "row":
|
||||
curidx = 0
|
||||
for idx, value in self.cells.iteritems():
|
||||
sys.stdout.write(sepstring * (idx - curidx))
|
||||
sys.stdout.write('%s%s%s' % (dquote, value, dquote))
|
||||
self.output += sepstring * (idx - curidx)
|
||||
self.output += "%s%s%s" % (dquote, value, dquote)
|
||||
curidx = idx
|
||||
sys.stdout.write("\n")
|
||||
self.output += "\n"
|
||||
elif name == "worksheet":
|
||||
print("")
|
||||
self.output += "\n"
|
||||
|
||||
try:
|
||||
xml.sax.parse(sys.stdin, XlsXmlHandler())
|
||||
except BaseException as err:
|
||||
error("xml-parse: %s\n" % (str(sys.exc_info()[:2]),))
|
||||
sys.exit(1)
|
||||
|
||||
sys.exit(0)
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
handler = XlsXmlHandler()
|
||||
xml.sax.parse(sys.stdin, handler)
|
||||
print(handler.output)
|
||||
except BaseException as err:
|
||||
error("xml-parse: %s\n" % (str(sys.exc_info()[:2]),))
|
||||
sys.exit(1)
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user