add epub viewer and set rclaptg meta tag for chm and info

This commit is contained in:
Jean-Francois Dockes 2012-10-11 14:03:30 +02:00
parent ec9041e2c7
commit 7c18d74541
4 changed files with 83 additions and 1 deletions

View File

@ -11,6 +11,7 @@ rclchm_html_mtype = "text/html"
import sys
import os
import re
import posixpath
import urlparse
import urllib
@ -201,6 +202,8 @@ class rclCHM:
res, doc = self.chm.RetrieveObject(ui)
#self.em.rclog("extract: RetrieveObject: %d [%s]" % (res, doc))
if res > 0:
doc = re.sub('''</[hH][eE][aA][dD]''',
'''<meta name="rclaptg" content="chm"></head>''', doc)
self.em.setmimetype(rclchm_html_mtype)
return (True, doc, path, iseof)
return (False, "", path, iseof)

74
src/filters/rclepub Executable file
View File

@ -0,0 +1,74 @@
#!/usr/bin/env python
"""Extract Html content from an EPUB file (.chm)"""
rclepub_html_mtype = "text/html"
import sys
import os
import re
import rclexecm
try:
import epub
except:
print "RECFILTERROR HELPERNOTFOUND python:epub"
sys.exit(1);
class rclEPUB:
"""RclExecM slave worker for extracting all text from an EPUB
file. We first extract the list of internal nodes, and them return them
one by one. The ipath is the internal href"""
def __init__(self, em):
self.currentindex = 0
self.em = em
self.em.setmimetype(rclepub_html_mtype)
def extractone(self, path):
"""Extract one path-named internal file from the EPUB file"""
#self.em.rclog("extractone: [%s]"%(path))
iseof = rclexecm.RclExecM.noteof
if self.currentindex >= len(self.contents) -1:
iseof = rclexecm.RclExecM.eofnext
try:
doc = self.book.read(path)
doc = re.sub('''</[hH][eE][aA][dD]''',
'''<meta name="rclaptg" content="epub"></head>''', doc)
return (True, doc, path, iseof)
except Exception, err:
self.em.rclog("openfile: failed: [%s]" % err)
return (False, "", path, iseof)
def openfile(self, params):
"""Open the EPUB file"""
self.currentindex = 0
self.contents = []
try:
self.book = epub.open(params["filename:"])
except Exception, err:
self.em.rclog("openfile: failed: [%s]" % err)
return False
for id, item in self.book.opf.manifest.iteritems():
# print item.__dict__
if item.media_type == u'application/xhtml+xml':
self.contents.append(item.href)
return True
def getipath(self, params):
return self.extractone(params["ipath:"])
def getnext(self, params):
if self.currentindex >= len(self.contents):
return (False, "", "", rclexecm.RclExecM.eofnow)
else:
ret= self.extractone(self.contents[self.currentindex])
self.currentindex += 1
return ret
proto = rclexecm.RclExecM()
extract = rclEPUB(proto)
rclexecm.main(proto, extract)

View File

@ -20,8 +20,10 @@ htmltemplate = '''
<html>
<head>
<title>%s</title>
<meta name="rclaptg" content="gnuinfo">
</head>
<body><pre>
<body>
<pre style="white-space: pre-wrap">
%s
</pre></body>
</html>

3
src/filters/rclshowchm Executable file
View File

@ -0,0 +1,3 @@
#!/bin/sh
kchmviewer --url $2 $1