rclchm: decode internal urls

This commit is contained in:
Jean-Francois Dockes 2012-03-27 18:51:27 +02:00
parent a259b1c256
commit 8074523a56

View File

@ -6,6 +6,7 @@ import sys
import os import os
import posixpath import posixpath
import urlparse import urlparse
import urllib
import rclexecm import rclexecm
@ -38,12 +39,13 @@ class ChmTopicsParser(HTMLParser):
all values for parameters named "Local" (with some filtering/massaging), all values for parameters named "Local" (with some filtering/massaging),
until proven wrong until proven wrong
""" """
def __init__(self): def __init__(self, em):
HTMLParser.__init__(self) HTMLParser.__init__(self)
self.contents = [] self.contents = []
self.em = em
def handle_starttag(self, tag, attrs): def handle_starttag(self, tag, attrs):
#print >> sys.stderr, "Encountered the beginning of a %s tag" % tag #self.em.rclog("Beginning of a %s tag" % tag)
# If this is a param tag with name Local, we're interested in # If this is a param tag with name Local, we're interested in
# the value which lists a file ref. Discard those with # # the value which lists a file ref. Discard those with #
# in them (references inside files) # in them (references inside files)
@ -61,19 +63,24 @@ class ChmTopicsParser(HTMLParser):
if nm == 'value': if nm == 'value':
value = val value = val
#self.em.rclog("Name [%s] value [%s]" %(name, value))
if name != 'Local' or value == '': if name != 'Local' or value == '':
return return
# value may be url-encoded. Decode it. If there are no % in there, will
# do nothing
value = urllib.unquote(value)
localpath = "" localpath = ""
ll = value.split(":") ll = value.split(":")
if len(ll) == 1: if len(ll) == 1:
localpath = value localpath = value
elif len(ll) == 4 and ll[-1] and ll[-3]: elif len(ll) == 4 and ll[-1] and ll[-3]:
#print >>sys.stderr, "File: %s" % ll[-3] #self.em.rclog("File: %s" % ll[-3])
if ll[-3] == self.fname: if ll[-3] == self.fname:
localpath = ll[-1] localpath = ll[-1]
else: else:
#print >> sys.stderr, "SKIPPING %s" % ll[-3] #self.em.rclog("SKIPPING %s" % ll[-3])
pass pass
if len(localpath) != 0 and localpath.find("#") == -1: if len(localpath) != 0 and localpath.find("#") == -1:
@ -161,7 +168,7 @@ class rclCHM:
def __init__(self, em): def __init__(self, em):
self.chm = chm.CHMFile() self.chm = chm.CHMFile()
self.tp = ChmTopicsParser() self.tp = ChmTopicsParser(em)
self.currentindex = 0 self.currentindex = 0
self.em = em self.em = em
@ -202,6 +209,7 @@ class rclCHM:
self.topics = self.chm.GetTopicsTree() self.topics = self.chm.GetTopicsTree()
if self.topics: if self.topics:
# Parse Topics file and extract list of internal nodes # Parse Topics file and extract list of internal nodes
#self.em.rclog("Got topics");
self.tp.setname(os.path.basename(filename)) self.tp.setname(os.path.basename(filename))
self.tp.feed(self.topics) self.tp.feed(self.topics)
self.tp.close() self.tp.close()