index: support webarchive (.war) and mimehtml (.mhtml) formats
This commit is contained in:
parent
22788b8900
commit
d0cb158d26
54
src/filters/rclwar
Executable file
54
src/filters/rclwar
Executable file
@ -0,0 +1,54 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# WAR web archive filter for recoll. War file are gzipped tar files
|
||||||
|
|
||||||
|
import rclexecm
|
||||||
|
import tarfile
|
||||||
|
|
||||||
|
class WarExtractor:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.em = em
|
||||||
|
|
||||||
|
def extractone(self, tarinfo):
|
||||||
|
docdata = ""
|
||||||
|
try:
|
||||||
|
member = self.tar.extractfile(tarinfo)
|
||||||
|
docdata = member.read()
|
||||||
|
ok = True
|
||||||
|
except Exception, err:
|
||||||
|
self.em.rclog("extractone: failed: [%s]" % err)
|
||||||
|
ok = False
|
||||||
|
return (ok, docdata, tarinfo.name, rclexecm.RclExecM.noteof)
|
||||||
|
|
||||||
|
###### File type handler api, used by rclexecm ---------->
|
||||||
|
def openfile(self, params):
|
||||||
|
self.currentindex = 0
|
||||||
|
try:
|
||||||
|
self.tar = tarfile.open(params["filename:"])
|
||||||
|
return True
|
||||||
|
except Exception, err:
|
||||||
|
self.em.rclog(str(err))
|
||||||
|
return False
|
||||||
|
|
||||||
|
def getipath(self, params):
|
||||||
|
ipath = params["ipath:"]
|
||||||
|
try:
|
||||||
|
tarinfo = self.tar.getmember(ipath)
|
||||||
|
except Exception, err:
|
||||||
|
self.em.rclog(str(err))
|
||||||
|
return (False, "", ipath, rclexecm.RclExecM.noteof)
|
||||||
|
return self.extractone(tarinfo)
|
||||||
|
|
||||||
|
def getnext(self, params):
|
||||||
|
tarinfo = self.tar.next()
|
||||||
|
if tarinfo is None:
|
||||||
|
#self.em.rclog("getnext: EOF hit")
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
else:
|
||||||
|
ret = self.extractone(tarinfo)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
# Main program: create protocol handler and extractor and run them
|
||||||
|
proto = rclexecm.RclExecM()
|
||||||
|
extract = WarExtractor(proto)
|
||||||
|
rclexecm.main(proto, extract)
|
||||||
@ -71,12 +71,13 @@ application/x-flac = execm rclaudio
|
|||||||
application/x-gnuinfo = execm rclinfo
|
application/x-gnuinfo = execm rclinfo
|
||||||
application/x-kword = exec rclkwd
|
application/x-kword = exec rclkwd
|
||||||
application/x-lyx = exec rcllyx
|
application/x-lyx = exec rcllyx
|
||||||
|
application/x-mimehtml = internal message/rfc822
|
||||||
application/x-perl = internal text/plain
|
application/x-perl = internal text/plain
|
||||||
application/x-scribus = exec rclscribus
|
application/x-scribus = exec rclscribus
|
||||||
application/x-shellscript = internal text/plain
|
application/x-shellscript = internal text/plain
|
||||||
application/x-tex = exec rcltex
|
application/x-tex = exec rcltex
|
||||||
text/x-tex = exec rcltex
|
|
||||||
application/x-chm = execm rclchm
|
application/x-chm = execm rclchm
|
||||||
|
application/x-webarchive = execm rclwar
|
||||||
application/zip = execm rclzip;charset=default
|
application/zip = execm rclzip;charset=default
|
||||||
audio/mpeg = execm rclaudio
|
audio/mpeg = execm rclaudio
|
||||||
audio/x-karaoke = execm rclkar
|
audio/x-karaoke = execm rclkar
|
||||||
@ -104,6 +105,7 @@ text/x-purple-log = exec rclpurple
|
|||||||
text/x-purple-html-log = internal text/html
|
text/x-purple-html-log = internal text/html
|
||||||
text/x-python = exec rclpython
|
text/x-python = exec rclpython
|
||||||
text/x-shellscript = internal text/plain
|
text/x-shellscript = internal text/plain
|
||||||
|
text/x-tex = exec rcltex
|
||||||
|
|
||||||
## #############################################
|
## #############################################
|
||||||
# Icons to be used in the result list if required by gui config
|
# Icons to be used in the result list if required by gui config
|
||||||
|
|||||||
@ -112,6 +112,10 @@
|
|||||||
|
|
||||||
.fb2 = text/x-fictionbook
|
.fb2 = text/x-fictionbook
|
||||||
|
|
||||||
|
.war = application/x-webarchive
|
||||||
|
.mht = application/x-mimehtml
|
||||||
|
.mhtml = application/x-mimehtml
|
||||||
|
|
||||||
# A list of suffixes (name endings) that we don't want to touch at all.
|
# A list of suffixes (name endings) that we don't want to touch at all.
|
||||||
# Having these explicitely listed speeds things up a bit by avoiding
|
# Having these explicitely listed speeds things up a bit by avoiding
|
||||||
# unneeded decompression or 'file' calls. File names still get indexed if
|
# unneeded decompression or 'file' calls. File names still get indexed if
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user