rclchm: add concatenating mode

2012-04-03 17:29:01 +02:00 · 2012-04-03 17:29:01 +02:00 · 544e687afe
commit 544e687afe
parent 5f9095b472
1 changed files with 40 additions and 3 deletions
--- a/src/filters/rclchm
+++ b/src/filters/rclchm
@ -2,11 +2,20 @@
 """Extract Html files from a Microsoft Compiled Html Help file (.chm)
 Needs at least python 2.2 for HTMLParser (chmlib needs 2.2 too)"""

+# Do we return individual chapters as html pages or concatenate everything?
+rclchm_catenate = 0
+# Use special html type to allow for mimeconf/mimeview Open magic,
+# Or go the regular html way with text/html
+#rclchm_html_mtype = "text/x-chm-html"
+rclchm_html_mtype = "text/html"
+
 import sys
 import os
 import posixpath
 import urlparse
 import urllib
+if rclchm_catenate:
+    import subprocess

 import rclexecm

@ -171,7 +180,11 @@ class rclCHM:
        self.tp = ChmTopicsParser(em)
        self.currentindex = 0
        self.em = em
-        
+        if rclchm_catenate:
+            self.em.setmimetype("text/plain")
+        else:
+            self.em.setmimetype(rclchm_html_mtype)
+
    def extractone(self, path):
        """Extract one path-named internal file from the chm file"""

@ -188,10 +201,27 @@ class rclCHM:
        res, doc = self.chm.RetrieveObject(ui)
        #self.em.rclog("extract: RetrieveObject: %d [%s]" % (res, doc))
        if res > 0:
-            self.em.setmimetype("text/html")
+            self.em.setmimetype(rclchm_html_mtype)
            return (True, doc, path, iseof)
        return (False, "", path, iseof)
-    
+
+    def dumpall(self):
+        alltxt=""
+        for pth in self.tp.contents:
+            ret,doc,path,iseof = self.extractone(pth)
+            if not ret:
+                continue
+            # Feed doc to lynx
+            process = subprocess.Popen(["lynx", "-stdin", "-dump", "-nolist",
+                                        "-display_charset=utf8",
+                                        "-force_html"], 
+                                       stdin=subprocess.PIPE,
+                                       stdout=subprocess.PIPE
+                                       )
+            txt,err = process.communicate(doc)
+            alltxt += txt
+        return alltxt
+
    def openfile(self, params):
        """Open the chm file and build the contents list by extracting and
        parsing the Topics object"""
@ -239,6 +269,13 @@ class rclCHM:
        return self.extractone(params["ipath:"])
        
    def getnext(self, params):
+        if rclchm_catenate:
+            alltxt = self.dumpall()
+            if alltxt:
+                return (True, alltxt, "", rclexecm.RclExecM.eofnext)
+            else:
+                return (False, "", "", rclexecm.RclExecM.eofnow)
+
        if self.currentindex >= len(self.tp.contents):
            return (False, "", "", rclexecm.RclExecM.eofnow)
        else: