rclchm: add concatenating mode
This commit is contained in:
parent
5f9095b472
commit
544e687afe
@ -2,11 +2,20 @@
|
|||||||
"""Extract Html files from a Microsoft Compiled Html Help file (.chm)
|
"""Extract Html files from a Microsoft Compiled Html Help file (.chm)
|
||||||
Needs at least python 2.2 for HTMLParser (chmlib needs 2.2 too)"""
|
Needs at least python 2.2 for HTMLParser (chmlib needs 2.2 too)"""
|
||||||
|
|
||||||
|
# Do we return individual chapters as html pages or concatenate everything?
|
||||||
|
rclchm_catenate = 0
|
||||||
|
# Use special html type to allow for mimeconf/mimeview Open magic,
|
||||||
|
# Or go the regular html way with text/html
|
||||||
|
#rclchm_html_mtype = "text/x-chm-html"
|
||||||
|
rclchm_html_mtype = "text/html"
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import posixpath
|
import posixpath
|
||||||
import urlparse
|
import urlparse
|
||||||
import urllib
|
import urllib
|
||||||
|
if rclchm_catenate:
|
||||||
|
import subprocess
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
|
|
||||||
@ -171,7 +180,11 @@ class rclCHM:
|
|||||||
self.tp = ChmTopicsParser(em)
|
self.tp = ChmTopicsParser(em)
|
||||||
self.currentindex = 0
|
self.currentindex = 0
|
||||||
self.em = em
|
self.em = em
|
||||||
|
if rclchm_catenate:
|
||||||
|
self.em.setmimetype("text/plain")
|
||||||
|
else:
|
||||||
|
self.em.setmimetype(rclchm_html_mtype)
|
||||||
|
|
||||||
def extractone(self, path):
|
def extractone(self, path):
|
||||||
"""Extract one path-named internal file from the chm file"""
|
"""Extract one path-named internal file from the chm file"""
|
||||||
|
|
||||||
@ -188,10 +201,27 @@ class rclCHM:
|
|||||||
res, doc = self.chm.RetrieveObject(ui)
|
res, doc = self.chm.RetrieveObject(ui)
|
||||||
#self.em.rclog("extract: RetrieveObject: %d [%s]" % (res, doc))
|
#self.em.rclog("extract: RetrieveObject: %d [%s]" % (res, doc))
|
||||||
if res > 0:
|
if res > 0:
|
||||||
self.em.setmimetype("text/html")
|
self.em.setmimetype(rclchm_html_mtype)
|
||||||
return (True, doc, path, iseof)
|
return (True, doc, path, iseof)
|
||||||
return (False, "", path, iseof)
|
return (False, "", path, iseof)
|
||||||
|
|
||||||
|
def dumpall(self):
|
||||||
|
alltxt=""
|
||||||
|
for pth in self.tp.contents:
|
||||||
|
ret,doc,path,iseof = self.extractone(pth)
|
||||||
|
if not ret:
|
||||||
|
continue
|
||||||
|
# Feed doc to lynx
|
||||||
|
process = subprocess.Popen(["lynx", "-stdin", "-dump", "-nolist",
|
||||||
|
"-display_charset=utf8",
|
||||||
|
"-force_html"],
|
||||||
|
stdin=subprocess.PIPE,
|
||||||
|
stdout=subprocess.PIPE
|
||||||
|
)
|
||||||
|
txt,err = process.communicate(doc)
|
||||||
|
alltxt += txt
|
||||||
|
return alltxt
|
||||||
|
|
||||||
def openfile(self, params):
|
def openfile(self, params):
|
||||||
"""Open the chm file and build the contents list by extracting and
|
"""Open the chm file and build the contents list by extracting and
|
||||||
parsing the Topics object"""
|
parsing the Topics object"""
|
||||||
@ -239,6 +269,13 @@ class rclCHM:
|
|||||||
return self.extractone(params["ipath:"])
|
return self.extractone(params["ipath:"])
|
||||||
|
|
||||||
def getnext(self, params):
|
def getnext(self, params):
|
||||||
|
if rclchm_catenate:
|
||||||
|
alltxt = self.dumpall()
|
||||||
|
if alltxt:
|
||||||
|
return (True, alltxt, "", rclexecm.RclExecM.eofnext)
|
||||||
|
else:
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
if self.currentindex >= len(self.tp.contents):
|
if self.currentindex >= len(self.tp.contents):
|
||||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
else:
|
else:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user