Merge branch 'RECOLL_1_23_MAINT'

2018-01-05 17:56:44 +01:00 · 2018-01-05 17:56:44 +01:00 · b99372d379
commit b99372d379
parent 63907cf573 413c710f34
2 changed files with 39 additions and 10 deletions
--- a/src/filters/rclchm
+++ b/src/filters/rclchm
@ -7,11 +7,6 @@ from __future__ import print_function
 # Note: this is not converted to python3, libchm does not have a
 # python3 wrapper at this point (2015-11)
 # Do we return individual chapters as html pages or concatenate everything?
 rclchm_catenate = 0
 # Use special html type to allow for mimeconf/mimeview Open magic,
 # Or go the regular html way with text/html
 #rclchm_html_mtype = "text/x-chm-html"
 rclchm_html_mtype = "text/html"
 import sys
@ -20,9 +15,9 @@ import re
 import posixpath
 import urlparse
 import urllib
-if rclchm_catenate:
+import subprocess
    import subprocess
 import rclconfig
 import rclexecm
 try:
@ -196,7 +191,10 @@ class rclCHM:
        self.contents = []
        self.chm = chm.CHMFile()
        self.em = em
-        if rclchm_catenate:
+        cf = rclconfig.RclConfig()
        self.catenate = cf.getConfParam("chmcatenate")
        self.catenate = int(self.catenate) if self.catenate else False
        if self.catenate:
            self.em.setmimetype("text/plain")
        else:
            self.em.setmimetype(rclchm_html_mtype)
@ -314,7 +312,8 @@ class rclCHM:
        return self.extractone(params["ipath:"])
    def getnext(self, params):
-        if rclchm_catenate:
+        if self.catenate:
            self.em.setmimetype("text/plain")
            alltxt = self.dumpall()
            if alltxt:
                return (True, alltxt, "", rclexecm.RclExecM.eofnext)
--- a/src/filters/rclepub
+++ b/src/filters/rclepub
@ -1,5 +1,5 @@
 #!/usr/bin/env python2
-"""Extract Html content from an EPUB file (.chm)"""
+"""Extract Html content from an EPUB file (.epub)"""
 from __future__ import print_function
 rclepub_html_mtype = "text/html"
@ -7,8 +7,10 @@ rclepub_html_mtype = "text/html"
 import sys
 import os
 import re
 import subprocess
 import rclexecm
 import rclconfig
 try:
    import epub
@ -25,6 +27,9 @@ class rclEPUB:
        self.currentindex = 0
        self.em = em
        self.em.setmimetype(rclepub_html_mtype)
        cf = rclconfig.RclConfig()
        self.catenate = cf.getConfParam("epubcatenate")
        self.catenate = int(self.catenate) if self.catenate else False
    def _selfdoc(self):
        meta = self.book.opf.metadata
@ -72,6 +77,25 @@ class rclEPUB:
            self.em.rclog("extractone: failed: [%s]" % err)
            return (False, "", id, iseof)
    def dumpall(self):
        self.em.setmimetype('text/plain')
        alltxt=""
        for idx in range(len(self.contents)):
            ret,doc,path,iseof = self.extractone(self.contents[idx])
            if not ret:
                continue
            # Feed doc to lynx
            process = subprocess.Popen(["lynx", "-stdin", "-dump", "-nolist",
                                        "-display_charset=utf8",
                                        "-force_html"], 
                                       stdin=subprocess.PIPE,
                                       stdout=subprocess.PIPE
                                       )
            txt,err = process.communicate(doc)
            alltxt += txt
        return alltxt
    def openfile(self, params):
        """Open the EPUB file, create a contents array"""
        self.currentindex = -1
@ -91,6 +115,12 @@ class rclEPUB:
        return self.extractone(params["ipath:"])
    def getnext(self, params):
        if self.catenate:
            alltxt = self.dumpall()
            if alltxt:
                return (True, alltxt, "", rclexecm.RclExecM.eofnext)
            else:
                return (False, "", "", rclexecm.RclExecM.eofnow)
        if self.currentindex == -1:
            self.currentindex = 0