Hanword: use the html converter, the text ones drops data from tables
This commit is contained in:
parent
d2b695d705
commit
03cbc203e1
@ -25,6 +25,7 @@
|
|||||||
|
|
||||||
import sys
|
import sys
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
import subprocess
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
from rclbasehandler import RclBaseHandler
|
from rclbasehandler import RclBaseHandler
|
||||||
@ -68,16 +69,12 @@ class HWP5Dump(RclBaseHandler):
|
|||||||
# hwp wants str filenames. This is unfortunate
|
# hwp wants str filenames. This is unfortunate
|
||||||
fn = fn.decode('utf-8')
|
fn = fn.decode('utf-8')
|
||||||
|
|
||||||
html = b'<html><head>\n' + \
|
|
||||||
b'<meta http-equiv="content-type" \
|
|
||||||
content="text/html; charset=utf-8">\n'
|
|
||||||
|
|
||||||
hwpfile = fs_Hwp5File(fn)
|
hwpfile = fs_Hwp5File(fn)
|
||||||
try:
|
try:
|
||||||
tt = hwpfile.summaryinfo.title.strip()
|
tt = hwpfile.summaryinfo.title.strip()
|
||||||
if tt:
|
if tt:
|
||||||
tt = self.em.htmlescape(tt.encode('utf-8'))
|
tt = self.em.htmlescape(tt.encode('utf-8'))
|
||||||
html += b'<title>' + tt + b'</title>\n'
|
self.em.setfield('caption', tt)
|
||||||
|
|
||||||
for k,v in metafields(hwpfile.summaryinfo):
|
for k,v in metafields(hwpfile.summaryinfo):
|
||||||
v = "{0}".format(v)
|
v = "{0}".format(v)
|
||||||
@ -85,28 +82,17 @@ class HWP5Dump(RclBaseHandler):
|
|||||||
if v:
|
if v:
|
||||||
v = self.em.htmlescape(v.encode('utf-8'))
|
v = self.em.htmlescape(v.encode('utf-8'))
|
||||||
k = k.encode('utf-8')
|
k = k.encode('utf-8')
|
||||||
html += b'<meta name="' + k + b'" content="' + \
|
self.em.setfield(k, v)
|
||||||
v + b'">\n'
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.em.rclog("Exception: %s" % e)
|
self.em.rclog("Exception: %s" % e)
|
||||||
finally:
|
finally:
|
||||||
hwpfile.close()
|
hwpfile.close()
|
||||||
|
|
||||||
html += b'</head><body><pre>\n'
|
# The first version of this file used conversion to text using
|
||||||
|
# the hwp5 module (no subproc). But this apparently mishandled
|
||||||
hwpfile = xml_Hwp5File(fn)
|
# tables. Switched to executing hwp5html instead. See 1st git
|
||||||
text_transform = TextTransform()
|
# version for the old approach.
|
||||||
transform = text_transform.transform_hwp5_to_text
|
html = subprocess.check_output(["hwp5html", "--html", fn])
|
||||||
dest = BytesIO()
|
|
||||||
try:
|
|
||||||
transform(hwpfile, dest)
|
|
||||||
except Exception as e:
|
|
||||||
self.em.rclog("Exception: %s" % e)
|
|
||||||
finally:
|
|
||||||
hwpfile.close()
|
|
||||||
dest.seek(0)
|
|
||||||
html += self.em.htmlescape(dest.read())
|
|
||||||
html += b'</pre></body></html>'
|
|
||||||
return html
|
return html
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|||||||
@ -43,7 +43,7 @@ application/x-mobipocket-ebook = ebook-viewer %f
|
|||||||
|
|
||||||
application/x-kword = kword %f
|
application/x-kword = kword %f
|
||||||
application/x-abiword = abiword %f
|
application/x-abiword = abiword %f
|
||||||
application/x-hwp = evince-hwp
|
application/x-hwp = libreoffice
|
||||||
|
|
||||||
# Note: the Linux Mint evince clones, atril and xread, have the same options
|
# Note: the Linux Mint evince clones, atril and xread, have the same options
|
||||||
application/pdf = evince --page-index=%p --find=%s %f
|
application/pdf = evince --page-index=%p --find=%s %f
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user