Merge branch 'RECOLL_1_23_MAINT'
This commit is contained in:
commit
b99372d379
@ -7,11 +7,6 @@ from __future__ import print_function
|
||||
# Note: this is not converted to python3, libchm does not have a
|
||||
# python3 wrapper at this point (2015-11)
|
||||
|
||||
# Do we return individual chapters as html pages or concatenate everything?
|
||||
rclchm_catenate = 0
|
||||
# Use special html type to allow for mimeconf/mimeview Open magic,
|
||||
# Or go the regular html way with text/html
|
||||
#rclchm_html_mtype = "text/x-chm-html"
|
||||
rclchm_html_mtype = "text/html"
|
||||
|
||||
import sys
|
||||
@ -20,9 +15,9 @@ import re
|
||||
import posixpath
|
||||
import urlparse
|
||||
import urllib
|
||||
if rclchm_catenate:
|
||||
import subprocess
|
||||
import subprocess
|
||||
|
||||
import rclconfig
|
||||
import rclexecm
|
||||
|
||||
try:
|
||||
@ -196,7 +191,10 @@ class rclCHM:
|
||||
self.contents = []
|
||||
self.chm = chm.CHMFile()
|
||||
self.em = em
|
||||
if rclchm_catenate:
|
||||
cf = rclconfig.RclConfig()
|
||||
self.catenate = cf.getConfParam("chmcatenate")
|
||||
self.catenate = int(self.catenate) if self.catenate else False
|
||||
if self.catenate:
|
||||
self.em.setmimetype("text/plain")
|
||||
else:
|
||||
self.em.setmimetype(rclchm_html_mtype)
|
||||
@ -314,7 +312,8 @@ class rclCHM:
|
||||
return self.extractone(params["ipath:"])
|
||||
|
||||
def getnext(self, params):
|
||||
if rclchm_catenate:
|
||||
if self.catenate:
|
||||
self.em.setmimetype("text/plain")
|
||||
alltxt = self.dumpall()
|
||||
if alltxt:
|
||||
return (True, alltxt, "", rclexecm.RclExecM.eofnext)
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python2
|
||||
"""Extract Html content from an EPUB file (.chm)"""
|
||||
"""Extract Html content from an EPUB file (.epub)"""
|
||||
from __future__ import print_function
|
||||
|
||||
rclepub_html_mtype = "text/html"
|
||||
@ -7,8 +7,10 @@ rclepub_html_mtype = "text/html"
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
import rclexecm
|
||||
import rclconfig
|
||||
|
||||
try:
|
||||
import epub
|
||||
@ -25,6 +27,9 @@ class rclEPUB:
|
||||
self.currentindex = 0
|
||||
self.em = em
|
||||
self.em.setmimetype(rclepub_html_mtype)
|
||||
cf = rclconfig.RclConfig()
|
||||
self.catenate = cf.getConfParam("epubcatenate")
|
||||
self.catenate = int(self.catenate) if self.catenate else False
|
||||
|
||||
def _selfdoc(self):
|
||||
meta = self.book.opf.metadata
|
||||
@ -72,6 +77,25 @@ class rclEPUB:
|
||||
self.em.rclog("extractone: failed: [%s]" % err)
|
||||
return (False, "", id, iseof)
|
||||
|
||||
def dumpall(self):
|
||||
self.em.setmimetype('text/plain')
|
||||
alltxt=""
|
||||
|
||||
for idx in range(len(self.contents)):
|
||||
ret,doc,path,iseof = self.extractone(self.contents[idx])
|
||||
if not ret:
|
||||
continue
|
||||
# Feed doc to lynx
|
||||
process = subprocess.Popen(["lynx", "-stdin", "-dump", "-nolist",
|
||||
"-display_charset=utf8",
|
||||
"-force_html"],
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE
|
||||
)
|
||||
txt,err = process.communicate(doc)
|
||||
alltxt += txt
|
||||
return alltxt
|
||||
|
||||
def openfile(self, params):
|
||||
"""Open the EPUB file, create a contents array"""
|
||||
self.currentindex = -1
|
||||
@ -91,6 +115,12 @@ class rclEPUB:
|
||||
return self.extractone(params["ipath:"])
|
||||
|
||||
def getnext(self, params):
|
||||
if self.catenate:
|
||||
alltxt = self.dumpall()
|
||||
if alltxt:
|
||||
return (True, alltxt, "", rclexecm.RclExecM.eofnext)
|
||||
else:
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
if self.currentindex == -1:
|
||||
self.currentindex = 0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user