ensure chm file can be renamed
This commit is contained in:
parent
d4edbbaedb
commit
7fcb7c9bf7
@ -32,10 +32,33 @@ except:
|
|||||||
print "RECFILTERROR HELPERNOTFOUND python:HTMLParser"
|
print "RECFILTERROR HELPERNOTFOUND python:HTMLParser"
|
||||||
sys.exit(1);
|
sys.exit(1);
|
||||||
|
|
||||||
|
# Small helper routines
|
||||||
|
def getfile(chmfile, path):
|
||||||
|
"""Extract internal file text from chm object, given path"""
|
||||||
|
res, ui = chmfile.ResolveObject(path)
|
||||||
|
if res != chmlib.CHM_RESOLVE_SUCCESS:
|
||||||
|
#print "ResolveObject failed", path
|
||||||
|
return ""
|
||||||
|
res, doc = chmfile.RetrieveObject(ui)
|
||||||
|
if not res:
|
||||||
|
print "RetrieveObject failed", path
|
||||||
|
return ""
|
||||||
|
return doc
|
||||||
|
|
||||||
|
def peekfile(chmfile, path):
|
||||||
|
"""Check that path resolves in chm object"""
|
||||||
|
res, ui = chmfile.ResolveObject(path)
|
||||||
|
if res != chmlib.CHM_RESOLVE_SUCCESS:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
# CHM Topics tree handler
|
||||||
|
|
||||||
class ChmTopicsParser(HTMLParser):
|
class ChmTopicsParser(HTMLParser):
|
||||||
"""Parse the chm's Topic file which is basically
|
"""Parse the chm's Topic file which is basically
|
||||||
a listing of internal nodes (html files mostly). Build a list of
|
a listing of internal nodes (html files mostly). Build a list of
|
||||||
all nodes (self.contents), which will then be used to walk and index
|
all nodes (parent.contents), which will then be used to walk and index
|
||||||
the chm.
|
the chm.
|
||||||
|
|
||||||
Most nodes in the Topic file look like the following:
|
Most nodes in the Topic file look like the following:
|
||||||
@ -49,10 +72,10 @@ class ChmTopicsParser(HTMLParser):
|
|||||||
all values for parameters named "Local" (with some filtering/massaging),
|
all values for parameters named "Local" (with some filtering/massaging),
|
||||||
until proven wrong
|
until proven wrong
|
||||||
"""
|
"""
|
||||||
def __init__(self, em):
|
def __init__(self, rclchm):
|
||||||
HTMLParser.__init__(self)
|
HTMLParser.__init__(self)
|
||||||
self.contents = []
|
self.em = rclchm.em
|
||||||
self.em = em
|
self.rclchm = rclchm
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
def handle_starttag(self, tag, attrs):
|
||||||
#self.em.rclog("Beginning of a %s tag" % tag)
|
#self.em.rclog("Beginning of a %s tag" % tag)
|
||||||
@ -86,47 +109,31 @@ class ChmTopicsParser(HTMLParser):
|
|||||||
if len(ll) == 1:
|
if len(ll) == 1:
|
||||||
localpath = value
|
localpath = value
|
||||||
elif len(ll) == 4 and ll[-1] and ll[-3]:
|
elif len(ll) == 4 and ll[-1] and ll[-3]:
|
||||||
#self.em.rclog("File: %s" % ll[-3])
|
#self.em.rclog("File: [%s] sfn [%s]" % ((ll[-3]), self.rclchm.sfn))
|
||||||
if ll[-3] == self.fname:
|
# We used to test against the simple file name, but this does
|
||||||
localpath = ll[-1]
|
# not work if the file is renamed. Just check that the internal
|
||||||
else:
|
# path resolves. Old: if ll[-3] == self.rclchm.sfn:
|
||||||
|
localpath = ll[-1]
|
||||||
|
if not peekfile(self.rclchm.chm, localpath):
|
||||||
#self.em.rclog("SKIPPING %s" % ll[-3])
|
#self.em.rclog("SKIPPING %s" % ll[-3])
|
||||||
pass
|
localpath = ""
|
||||||
|
|
||||||
if len(localpath) != 0 and localpath.find("#") == -1:
|
if len(localpath) != 0 and localpath.find("#") == -1:
|
||||||
if localpath[0] != '/':
|
if localpath[0] != '/':
|
||||||
localpath = "/" + localpath
|
localpath = "/" + localpath
|
||||||
self.contents.append(localpath)
|
self.rclchm.contents.append(localpath)
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
self.contents = []
|
|
||||||
self.fname = ""
|
|
||||||
HTMLParser.reset(self)
|
|
||||||
|
|
||||||
def setname(self, name):
|
|
||||||
self.fname = name
|
|
||||||
|
|
||||||
def getfile(chmfile, path):
|
|
||||||
"""Extract internal file text from chm object, given path"""
|
|
||||||
res, ui = chmfile.ResolveObject(path)
|
|
||||||
if res != chmlib.CHM_RESOLVE_SUCCESS:
|
|
||||||
#print "ResolveObject failed", path
|
|
||||||
return ""
|
|
||||||
res, doc = chmfile.RetrieveObject(ui)
|
|
||||||
if not res:
|
|
||||||
print "RetrieveObject failed", path
|
|
||||||
return ""
|
|
||||||
return doc
|
|
||||||
|
|
||||||
|
|
||||||
|
# Used when there is no Topics node. Walk the links tree
|
||||||
class ChmWalker(HTMLParser):
|
class ChmWalker(HTMLParser):
|
||||||
"""Links tree walker. This recursivelyfollows all internal links
|
"""Links tree walker. This recursively follows all internal links
|
||||||
found in the from the top node given as input, and augments the contents
|
found in the tree from the top node given as input, and augments
|
||||||
list."""
|
the contents list."""
|
||||||
|
|
||||||
def __init__(self, chm, path, contents):
|
def __init__(self, rclchm, path, contents):
|
||||||
HTMLParser.__init__(self)
|
HTMLParser.__init__(self)
|
||||||
self.chm = chm
|
self.rclchm = rclchm
|
||||||
|
self.chm = rclchm.chm
|
||||||
self.contents = contents
|
self.contents = contents
|
||||||
self.path = posixpath.normpath(path)
|
self.path = posixpath.normpath(path)
|
||||||
self.dir = posixpath.dirname(self.path)
|
self.dir = posixpath.dirname(self.path)
|
||||||
@ -146,9 +153,13 @@ class ChmWalker(HTMLParser):
|
|||||||
if (not res.scheme or res.scheme.lower == "ms-its"):
|
if (not res.scheme or res.scheme.lower == "ms-its"):
|
||||||
path = res.path
|
path = res.path
|
||||||
lpath = path.split(':')
|
lpath = path.split(':')
|
||||||
if len(lpath) == 3 and lpath[1] == cefilename:
|
if len(lpath) == 3:
|
||||||
# MS-ITS::somefile.chm:/some/path/file.htm ?
|
# MS-ITS::somefile.chm:/some/path/file.htm ? As far as I
|
||||||
|
# know this never happens because there was a runtime error
|
||||||
|
# in this path
|
||||||
path = lpath[2]
|
path = lpath[2]
|
||||||
|
if not peekfile(self.chm, path):
|
||||||
|
path = ""
|
||||||
elif len(lpath) == 1:
|
elif len(lpath) == 1:
|
||||||
path = lpath[0]
|
path = lpath[0]
|
||||||
else:
|
else:
|
||||||
@ -166,7 +177,7 @@ class ChmWalker(HTMLParser):
|
|||||||
text = getfile(self.chm, npath)
|
text = getfile(self.chm, npath)
|
||||||
if text:
|
if text:
|
||||||
try:
|
try:
|
||||||
newwalker = ChmWalker(self.chm, npath, self.contents)
|
newwalker = ChmWalker(self.rclchm, npath, self.contents)
|
||||||
newwalker.feed(text)
|
newwalker.feed(text)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
@ -177,8 +188,8 @@ class rclCHM:
|
|||||||
one by one. The ipath is the node path"""
|
one by one. The ipath is the node path"""
|
||||||
|
|
||||||
def __init__(self, em):
|
def __init__(self, em):
|
||||||
|
self.contents = []
|
||||||
self.chm = chm.CHMFile()
|
self.chm = chm.CHMFile()
|
||||||
self.tp = ChmTopicsParser(em)
|
|
||||||
self.currentindex = 0
|
self.currentindex = 0
|
||||||
self.em = em
|
self.em = em
|
||||||
if rclchm_catenate:
|
if rclchm_catenate:
|
||||||
@ -189,9 +200,9 @@ class rclCHM:
|
|||||||
def extractone(self, path):
|
def extractone(self, path):
|
||||||
"""Extract one path-named internal file from the chm file"""
|
"""Extract one path-named internal file from the chm file"""
|
||||||
|
|
||||||
#self.em.rclog("extractone: [%s]"%(path))
|
#self.em.rclog("extractone: [%s]" % (path,))
|
||||||
iseof = rclexecm.RclExecM.noteof
|
iseof = rclexecm.RclExecM.noteof
|
||||||
if self.currentindex >= len(self.tp.contents) -1:
|
if self.currentindex >= len(self.contents) -1:
|
||||||
iseof = rclexecm.RclExecM.eofnext
|
iseof = rclexecm.RclExecM.eofnext
|
||||||
|
|
||||||
res, ui = self.chm.ResolveObject(path)
|
res, ui = self.chm.ResolveObject(path)
|
||||||
@ -210,7 +221,7 @@ class rclCHM:
|
|||||||
|
|
||||||
def dumpall(self):
|
def dumpall(self):
|
||||||
alltxt=""
|
alltxt=""
|
||||||
for pth in self.tp.contents:
|
for pth in self.contents:
|
||||||
ret,doc,path,iseof = self.extractone(pth)
|
ret,doc,path,iseof = self.extractone(pth)
|
||||||
if not ret:
|
if not ret:
|
||||||
continue
|
continue
|
||||||
@ -230,22 +241,25 @@ class rclCHM:
|
|||||||
parsing the Topics object"""
|
parsing the Topics object"""
|
||||||
|
|
||||||
self.currentindex = 0
|
self.currentindex = 0
|
||||||
self.tp.reset()
|
self.contents = []
|
||||||
|
|
||||||
filename = params["filename:"]
|
filename = params["filename:"]
|
||||||
if not self.chm.LoadCHM(filename):
|
if not self.chm.LoadCHM(filename):
|
||||||
self.em.rclog("LoadCHM failed")
|
self.em.rclog("LoadCHM failed")
|
||||||
return False
|
return False
|
||||||
if not self.chm.GetArchiveInfo():
|
|
||||||
self.em.rclog("GetArchiveInfo failed")
|
self.sfn = os.path.basename(filename)
|
||||||
return False
|
|
||||||
|
#self.em.rclog("home [%s] topics [%s] title [%s]" %
|
||||||
|
# (self.chm.home, self.chm.topics, self.chm.title))
|
||||||
|
|
||||||
self.topics = self.chm.GetTopicsTree()
|
self.topics = self.chm.GetTopicsTree()
|
||||||
if self.topics:
|
if self.topics:
|
||||||
# Parse Topics file and extract list of internal nodes
|
# Parse Topics file and extract list of internal nodes
|
||||||
#self.em.rclog("Got topics");
|
#self.em.rclog("Got topics");
|
||||||
self.tp.setname(os.path.basename(filename))
|
tp = ChmTopicsParser(self)
|
||||||
self.tp.feed(self.topics)
|
tp.feed(self.topics)
|
||||||
self.tp.close()
|
tp.close()
|
||||||
else:
|
else:
|
||||||
# No topics. If there is a home, let's try to walk the tree
|
# No topics. If there is a home, let's try to walk the tree
|
||||||
#self.em.rclog("GetTopicsTree failed")
|
#self.em.rclog("GetTopicsTree failed")
|
||||||
@ -259,13 +273,13 @@ class rclCHM:
|
|||||||
if not text:
|
if not text:
|
||||||
self.em.rclog("No topics and no home content")
|
self.em.rclog("No topics and no home content")
|
||||||
return False
|
return False
|
||||||
walker = ChmWalker(self.chm, self.chm.home, self.tp.contents)
|
walker = ChmWalker(self, self.chm.home, self.contents)
|
||||||
walker.feed(text)
|
walker.feed(text)
|
||||||
walker.close()
|
walker.close()
|
||||||
|
|
||||||
#self.em.rclog("Contents size %d" % len(self.tp.contents))
|
#self.em.rclog("Contents size %d" % len(self.contents))
|
||||||
uniq = set(self.tp.contents)
|
uniq = set(self.contents)
|
||||||
self.tp.contents = list(uniq)
|
self.contents = list(uniq)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def getipath(self, params):
|
def getipath(self, params):
|
||||||
@ -279,10 +293,10 @@ class rclCHM:
|
|||||||
else:
|
else:
|
||||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
if self.currentindex >= len(self.tp.contents):
|
if self.currentindex >= len(self.contents):
|
||||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
else:
|
else:
|
||||||
ret= self.extractone(self.tp.contents[self.currentindex])
|
ret= self.extractone(self.contents[self.currentindex])
|
||||||
self.currentindex += 1
|
self.currentindex += 1
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user