Merge branch 'RECOLL_1_23_MAINT'
This commit is contained in:
commit
b99372d379
@ -7,11 +7,6 @@ from __future__ import print_function
|
|||||||
# Note: this is not converted to python3, libchm does not have a
|
# Note: this is not converted to python3, libchm does not have a
|
||||||
# python3 wrapper at this point (2015-11)
|
# python3 wrapper at this point (2015-11)
|
||||||
|
|
||||||
# Do we return individual chapters as html pages or concatenate everything?
|
|
||||||
rclchm_catenate = 0
|
|
||||||
# Use special html type to allow for mimeconf/mimeview Open magic,
|
|
||||||
# Or go the regular html way with text/html
|
|
||||||
#rclchm_html_mtype = "text/x-chm-html"
|
|
||||||
rclchm_html_mtype = "text/html"
|
rclchm_html_mtype = "text/html"
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
@ -20,9 +15,9 @@ import re
|
|||||||
import posixpath
|
import posixpath
|
||||||
import urlparse
|
import urlparse
|
||||||
import urllib
|
import urllib
|
||||||
if rclchm_catenate:
|
import subprocess
|
||||||
import subprocess
|
|
||||||
|
|
||||||
|
import rclconfig
|
||||||
import rclexecm
|
import rclexecm
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -196,7 +191,10 @@ class rclCHM:
|
|||||||
self.contents = []
|
self.contents = []
|
||||||
self.chm = chm.CHMFile()
|
self.chm = chm.CHMFile()
|
||||||
self.em = em
|
self.em = em
|
||||||
if rclchm_catenate:
|
cf = rclconfig.RclConfig()
|
||||||
|
self.catenate = cf.getConfParam("chmcatenate")
|
||||||
|
self.catenate = int(self.catenate) if self.catenate else False
|
||||||
|
if self.catenate:
|
||||||
self.em.setmimetype("text/plain")
|
self.em.setmimetype("text/plain")
|
||||||
else:
|
else:
|
||||||
self.em.setmimetype(rclchm_html_mtype)
|
self.em.setmimetype(rclchm_html_mtype)
|
||||||
@ -314,7 +312,8 @@ class rclCHM:
|
|||||||
return self.extractone(params["ipath:"])
|
return self.extractone(params["ipath:"])
|
||||||
|
|
||||||
def getnext(self, params):
|
def getnext(self, params):
|
||||||
if rclchm_catenate:
|
if self.catenate:
|
||||||
|
self.em.setmimetype("text/plain")
|
||||||
alltxt = self.dumpall()
|
alltxt = self.dumpall()
|
||||||
if alltxt:
|
if alltxt:
|
||||||
return (True, alltxt, "", rclexecm.RclExecM.eofnext)
|
return (True, alltxt, "", rclexecm.RclExecM.eofnext)
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
"""Extract Html content from an EPUB file (.chm)"""
|
"""Extract Html content from an EPUB file (.epub)"""
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
rclepub_html_mtype = "text/html"
|
rclepub_html_mtype = "text/html"
|
||||||
@ -7,8 +7,10 @@ rclepub_html_mtype = "text/html"
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import subprocess
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
|
import rclconfig
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import epub
|
import epub
|
||||||
@ -25,6 +27,9 @@ class rclEPUB:
|
|||||||
self.currentindex = 0
|
self.currentindex = 0
|
||||||
self.em = em
|
self.em = em
|
||||||
self.em.setmimetype(rclepub_html_mtype)
|
self.em.setmimetype(rclepub_html_mtype)
|
||||||
|
cf = rclconfig.RclConfig()
|
||||||
|
self.catenate = cf.getConfParam("epubcatenate")
|
||||||
|
self.catenate = int(self.catenate) if self.catenate else False
|
||||||
|
|
||||||
def _selfdoc(self):
|
def _selfdoc(self):
|
||||||
meta = self.book.opf.metadata
|
meta = self.book.opf.metadata
|
||||||
@ -72,6 +77,25 @@ class rclEPUB:
|
|||||||
self.em.rclog("extractone: failed: [%s]" % err)
|
self.em.rclog("extractone: failed: [%s]" % err)
|
||||||
return (False, "", id, iseof)
|
return (False, "", id, iseof)
|
||||||
|
|
||||||
|
def dumpall(self):
|
||||||
|
self.em.setmimetype('text/plain')
|
||||||
|
alltxt=""
|
||||||
|
|
||||||
|
for idx in range(len(self.contents)):
|
||||||
|
ret,doc,path,iseof = self.extractone(self.contents[idx])
|
||||||
|
if not ret:
|
||||||
|
continue
|
||||||
|
# Feed doc to lynx
|
||||||
|
process = subprocess.Popen(["lynx", "-stdin", "-dump", "-nolist",
|
||||||
|
"-display_charset=utf8",
|
||||||
|
"-force_html"],
|
||||||
|
stdin=subprocess.PIPE,
|
||||||
|
stdout=subprocess.PIPE
|
||||||
|
)
|
||||||
|
txt,err = process.communicate(doc)
|
||||||
|
alltxt += txt
|
||||||
|
return alltxt
|
||||||
|
|
||||||
def openfile(self, params):
|
def openfile(self, params):
|
||||||
"""Open the EPUB file, create a contents array"""
|
"""Open the EPUB file, create a contents array"""
|
||||||
self.currentindex = -1
|
self.currentindex = -1
|
||||||
@ -91,6 +115,12 @@ class rclEPUB:
|
|||||||
return self.extractone(params["ipath:"])
|
return self.extractone(params["ipath:"])
|
||||||
|
|
||||||
def getnext(self, params):
|
def getnext(self, params):
|
||||||
|
if self.catenate:
|
||||||
|
alltxt = self.dumpall()
|
||||||
|
if alltxt:
|
||||||
|
return (True, alltxt, "", rclexecm.RclExecM.eofnext)
|
||||||
|
else:
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
if self.currentindex == -1:
|
if self.currentindex == -1:
|
||||||
self.currentindex = 0
|
self.currentindex = 0
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user