ensure chm file can be renamed
This commit is contained in:
parent
d4edbbaedb
commit
7fcb7c9bf7
@ -32,10 +32,33 @@ except:
|
||||
print "RECFILTERROR HELPERNOTFOUND python:HTMLParser"
|
||||
sys.exit(1);
|
||||
|
||||
# Small helper routines
|
||||
def getfile(chmfile, path):
|
||||
"""Extract internal file text from chm object, given path"""
|
||||
res, ui = chmfile.ResolveObject(path)
|
||||
if res != chmlib.CHM_RESOLVE_SUCCESS:
|
||||
#print "ResolveObject failed", path
|
||||
return ""
|
||||
res, doc = chmfile.RetrieveObject(ui)
|
||||
if not res:
|
||||
print "RetrieveObject failed", path
|
||||
return ""
|
||||
return doc
|
||||
|
||||
def peekfile(chmfile, path):
|
||||
"""Check that path resolves in chm object"""
|
||||
res, ui = chmfile.ResolveObject(path)
|
||||
if res != chmlib.CHM_RESOLVE_SUCCESS:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
# CHM Topics tree handler
|
||||
|
||||
class ChmTopicsParser(HTMLParser):
|
||||
"""Parse the chm's Topic file which is basically
|
||||
a listing of internal nodes (html files mostly). Build a list of
|
||||
all nodes (self.contents), which will then be used to walk and index
|
||||
all nodes (parent.contents), which will then be used to walk and index
|
||||
the chm.
|
||||
|
||||
Most nodes in the Topic file look like the following:
|
||||
@ -49,10 +72,10 @@ class ChmTopicsParser(HTMLParser):
|
||||
all values for parameters named "Local" (with some filtering/massaging),
|
||||
until proven wrong
|
||||
"""
|
||||
def __init__(self, em):
|
||||
def __init__(self, rclchm):
|
||||
HTMLParser.__init__(self)
|
||||
self.contents = []
|
||||
self.em = em
|
||||
self.em = rclchm.em
|
||||
self.rclchm = rclchm
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
#self.em.rclog("Beginning of a %s tag" % tag)
|
||||
@ -86,47 +109,31 @@ class ChmTopicsParser(HTMLParser):
|
||||
if len(ll) == 1:
|
||||
localpath = value
|
||||
elif len(ll) == 4 and ll[-1] and ll[-3]:
|
||||
#self.em.rclog("File: %s" % ll[-3])
|
||||
if ll[-3] == self.fname:
|
||||
localpath = ll[-1]
|
||||
else:
|
||||
#self.em.rclog("File: [%s] sfn [%s]" % ((ll[-3]), self.rclchm.sfn))
|
||||
# We used to test against the simple file name, but this does
|
||||
# not work if the file is renamed. Just check that the internal
|
||||
# path resolves. Old: if ll[-3] == self.rclchm.sfn:
|
||||
localpath = ll[-1]
|
||||
if not peekfile(self.rclchm.chm, localpath):
|
||||
#self.em.rclog("SKIPPING %s" % ll[-3])
|
||||
pass
|
||||
localpath = ""
|
||||
|
||||
if len(localpath) != 0 and localpath.find("#") == -1:
|
||||
if localpath[0] != '/':
|
||||
localpath = "/" + localpath
|
||||
self.contents.append(localpath)
|
||||
|
||||
def reset(self):
|
||||
self.contents = []
|
||||
self.fname = ""
|
||||
HTMLParser.reset(self)
|
||||
|
||||
def setname(self, name):
|
||||
self.fname = name
|
||||
|
||||
def getfile(chmfile, path):
|
||||
"""Extract internal file text from chm object, given path"""
|
||||
res, ui = chmfile.ResolveObject(path)
|
||||
if res != chmlib.CHM_RESOLVE_SUCCESS:
|
||||
#print "ResolveObject failed", path
|
||||
return ""
|
||||
res, doc = chmfile.RetrieveObject(ui)
|
||||
if not res:
|
||||
print "RetrieveObject failed", path
|
||||
return ""
|
||||
return doc
|
||||
self.rclchm.contents.append(localpath)
|
||||
|
||||
|
||||
# Used when there is no Topics node. Walk the links tree
|
||||
class ChmWalker(HTMLParser):
|
||||
"""Links tree walker. This recursivelyfollows all internal links
|
||||
found in the from the top node given as input, and augments the contents
|
||||
list."""
|
||||
"""Links tree walker. This recursively follows all internal links
|
||||
found in the tree from the top node given as input, and augments
|
||||
the contents list."""
|
||||
|
||||
def __init__(self, chm, path, contents):
|
||||
def __init__(self, rclchm, path, contents):
|
||||
HTMLParser.__init__(self)
|
||||
self.chm = chm
|
||||
self.rclchm = rclchm
|
||||
self.chm = rclchm.chm
|
||||
self.contents = contents
|
||||
self.path = posixpath.normpath(path)
|
||||
self.dir = posixpath.dirname(self.path)
|
||||
@ -146,9 +153,13 @@ class ChmWalker(HTMLParser):
|
||||
if (not res.scheme or res.scheme.lower == "ms-its"):
|
||||
path = res.path
|
||||
lpath = path.split(':')
|
||||
if len(lpath) == 3 and lpath[1] == cefilename:
|
||||
# MS-ITS::somefile.chm:/some/path/file.htm ?
|
||||
if len(lpath) == 3:
|
||||
# MS-ITS::somefile.chm:/some/path/file.htm ? As far as I
|
||||
# know this never happens because there was a runtime error
|
||||
# in this path
|
||||
path = lpath[2]
|
||||
if not peekfile(self.chm, path):
|
||||
path = ""
|
||||
elif len(lpath) == 1:
|
||||
path = lpath[0]
|
||||
else:
|
||||
@ -166,7 +177,7 @@ class ChmWalker(HTMLParser):
|
||||
text = getfile(self.chm, npath)
|
||||
if text:
|
||||
try:
|
||||
newwalker = ChmWalker(self.chm, npath, self.contents)
|
||||
newwalker = ChmWalker(self.rclchm, npath, self.contents)
|
||||
newwalker.feed(text)
|
||||
except:
|
||||
pass
|
||||
@ -177,8 +188,8 @@ class rclCHM:
|
||||
one by one. The ipath is the node path"""
|
||||
|
||||
def __init__(self, em):
|
||||
self.contents = []
|
||||
self.chm = chm.CHMFile()
|
||||
self.tp = ChmTopicsParser(em)
|
||||
self.currentindex = 0
|
||||
self.em = em
|
||||
if rclchm_catenate:
|
||||
@ -189,9 +200,9 @@ class rclCHM:
|
||||
def extractone(self, path):
|
||||
"""Extract one path-named internal file from the chm file"""
|
||||
|
||||
#self.em.rclog("extractone: [%s]"%(path))
|
||||
#self.em.rclog("extractone: [%s]" % (path,))
|
||||
iseof = rclexecm.RclExecM.noteof
|
||||
if self.currentindex >= len(self.tp.contents) -1:
|
||||
if self.currentindex >= len(self.contents) -1:
|
||||
iseof = rclexecm.RclExecM.eofnext
|
||||
|
||||
res, ui = self.chm.ResolveObject(path)
|
||||
@ -210,7 +221,7 @@ class rclCHM:
|
||||
|
||||
def dumpall(self):
|
||||
alltxt=""
|
||||
for pth in self.tp.contents:
|
||||
for pth in self.contents:
|
||||
ret,doc,path,iseof = self.extractone(pth)
|
||||
if not ret:
|
||||
continue
|
||||
@ -230,22 +241,25 @@ class rclCHM:
|
||||
parsing the Topics object"""
|
||||
|
||||
self.currentindex = 0
|
||||
self.tp.reset()
|
||||
self.contents = []
|
||||
|
||||
filename = params["filename:"]
|
||||
if not self.chm.LoadCHM(filename):
|
||||
self.em.rclog("LoadCHM failed")
|
||||
return False
|
||||
if not self.chm.GetArchiveInfo():
|
||||
self.em.rclog("GetArchiveInfo failed")
|
||||
return False
|
||||
|
||||
self.sfn = os.path.basename(filename)
|
||||
|
||||
#self.em.rclog("home [%s] topics [%s] title [%s]" %
|
||||
# (self.chm.home, self.chm.topics, self.chm.title))
|
||||
|
||||
self.topics = self.chm.GetTopicsTree()
|
||||
if self.topics:
|
||||
# Parse Topics file and extract list of internal nodes
|
||||
#self.em.rclog("Got topics");
|
||||
self.tp.setname(os.path.basename(filename))
|
||||
self.tp.feed(self.topics)
|
||||
self.tp.close()
|
||||
tp = ChmTopicsParser(self)
|
||||
tp.feed(self.topics)
|
||||
tp.close()
|
||||
else:
|
||||
# No topics. If there is a home, let's try to walk the tree
|
||||
#self.em.rclog("GetTopicsTree failed")
|
||||
@ -259,13 +273,13 @@ class rclCHM:
|
||||
if not text:
|
||||
self.em.rclog("No topics and no home content")
|
||||
return False
|
||||
walker = ChmWalker(self.chm, self.chm.home, self.tp.contents)
|
||||
walker = ChmWalker(self, self.chm.home, self.contents)
|
||||
walker.feed(text)
|
||||
walker.close()
|
||||
|
||||
#self.em.rclog("Contents size %d" % len(self.tp.contents))
|
||||
uniq = set(self.tp.contents)
|
||||
self.tp.contents = list(uniq)
|
||||
#self.em.rclog("Contents size %d" % len(self.contents))
|
||||
uniq = set(self.contents)
|
||||
self.contents = list(uniq)
|
||||
return True
|
||||
|
||||
def getipath(self, params):
|
||||
@ -279,10 +293,10 @@ class rclCHM:
|
||||
else:
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
if self.currentindex >= len(self.tp.contents):
|
||||
if self.currentindex >= len(self.contents):
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
else:
|
||||
ret= self.extractone(self.tp.contents[self.currentindex])
|
||||
ret= self.extractone(self.contents[self.currentindex])
|
||||
self.currentindex += 1
|
||||
return ret
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user