added contributed dia filter
This commit is contained in:
parent
544e687afe
commit
ae01899962
124
src/filters/rcldia
Executable file
124
src/filters/rcldia
Executable file
@ -0,0 +1,124 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# dia (http://live.gnome.org/Dia) file filter for recoll
|
||||||
|
# stefan.friedel@iwr.uni-heidelberg.de 2012
|
||||||
|
#
|
||||||
|
# add the following to ~/.recoll/mimeconf into the [index] section:
|
||||||
|
# application/x-dia-diagram = execm rcldia;mimetype=text/html;charset=utf-8
|
||||||
|
# and into the [icons] section:
|
||||||
|
# application/x-dia-diagram = drawing
|
||||||
|
# and finally under [categories]:
|
||||||
|
# other = ...\
|
||||||
|
# application/x-dia-diagram
|
||||||
|
#
|
||||||
|
# in ~/.recoll/mimemap:
|
||||||
|
# .dia = application/x-dia-diagram
|
||||||
|
|
||||||
|
# Small fixes from jfd: dia files are sometimes not compressed.
|
||||||
|
# And a note: this file actually has no reason to return HTML as there is
|
||||||
|
# no metadata. We could just as well and more simply return text/plain
|
||||||
|
import rclexecm
|
||||||
|
import re
|
||||||
|
from gzip import GzipFile
|
||||||
|
import xml.parsers.expat
|
||||||
|
|
||||||
|
# some regexps to parse/format the xml data: delete #/spaces at the b/eol and
|
||||||
|
# ignore empty lines
|
||||||
|
rhs = re.compile(r'^[#|\s+](.*)')
|
||||||
|
rhe = re.compile(r'(.*)[#|\s+]$')
|
||||||
|
rempty = re.compile(r'^#?\s*#?$')
|
||||||
|
|
||||||
|
htmltemplate = '''
|
||||||
|
<html><head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
{0}
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
'''
|
||||||
|
|
||||||
|
# xml parser for dia xml file
|
||||||
|
class Parser:
|
||||||
|
def __init__(self,rclem):
|
||||||
|
self._parser = xml.parsers.expat.ParserCreate(encoding='UTF-8')
|
||||||
|
self._parser.StartElementHandler = self.startelement
|
||||||
|
self._parser.EndElementHandler = self.endelement
|
||||||
|
self._parser.CharacterDataHandler = self.chardata
|
||||||
|
self.string = []
|
||||||
|
self.handlethis = False
|
||||||
|
self.rclem = rclem
|
||||||
|
|
||||||
|
def startelement(self, name, attrs):
|
||||||
|
if name == 'dia:string':
|
||||||
|
self.handlethis = True
|
||||||
|
else:
|
||||||
|
self.handlethis = False
|
||||||
|
|
||||||
|
def chardata(self,data):
|
||||||
|
if self.handlethis:
|
||||||
|
# check if line is not empty and replace hashes/spaces
|
||||||
|
# tricky: after htmlescape check also for umlauts
|
||||||
|
if not rempty.search(data):
|
||||||
|
self.string.append(self.rclem.htmlescape(
|
||||||
|
rhe.sub(r'\1',rhs.sub(r'\1',data))).encode('ascii', 'xmlcharrefreplace'))
|
||||||
|
|
||||||
|
def endelement(self,name):
|
||||||
|
self.handlethis = False
|
||||||
|
|
||||||
|
def feed(self, fh):
|
||||||
|
self._parser.ParseFile(fh)
|
||||||
|
del self._parser
|
||||||
|
|
||||||
|
class DiaExtractor:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.em = em
|
||||||
|
|
||||||
|
def extractdia(self):
|
||||||
|
docdata = ""
|
||||||
|
ipath = ""
|
||||||
|
try:
|
||||||
|
docdata = self.ExtractDiaText()
|
||||||
|
ok = True
|
||||||
|
except Exception, err:
|
||||||
|
ok = False
|
||||||
|
iseof = rclexecm.RclExecM.eofnext
|
||||||
|
self.em.setmimetype("text/html")
|
||||||
|
return (ok, docdata, ipath, iseof)
|
||||||
|
|
||||||
|
###### File type handler api, used by rclexecm ---------->
|
||||||
|
def openfile(self, params):
|
||||||
|
try:
|
||||||
|
self.dia = GzipFile(params["filename:"], 'r')
|
||||||
|
# Dial files are sometimes not compressed. Quite weirdly,
|
||||||
|
# GzipFile does not complain until we try to read. Have to do it
|
||||||
|
# here to be able to retry an uncompressed open.
|
||||||
|
data = self.dia.readline()
|
||||||
|
self.dia.seek(0)
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
# File not compressed ?
|
||||||
|
try:
|
||||||
|
self.dia = open(params["filename:"], 'r')
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def getipath(self, params):
|
||||||
|
ok, data, ipath, eof = self.extractdia()
|
||||||
|
return (ok, data, ipath, eof)
|
||||||
|
|
||||||
|
def getnext(self, params):
|
||||||
|
ok, data, ipath, eof = self.extractdia()
|
||||||
|
return (ok, data, ipath, eof)
|
||||||
|
|
||||||
|
###### read data
|
||||||
|
def ExtractDiaText(self):
|
||||||
|
diap = Parser(self.em)
|
||||||
|
diap.feed(self.dia)
|
||||||
|
return htmltemplate.format('\n'.join(diap.string))
|
||||||
|
|
||||||
|
# Main program: create protocol handler and extractor and run them
|
||||||
|
proto = rclexecm.RclExecM()
|
||||||
|
extract = DiaExtractor(proto)
|
||||||
|
rclexecm.main(proto, extract)
|
||||||
@ -71,6 +71,7 @@ application/vnd.wordperfect = exec wpd2html;mimetype=text/html
|
|||||||
application/x-abiword = exec rclabw
|
application/x-abiword = exec rclabw
|
||||||
application/x-awk = internal text/plain
|
application/x-awk = internal text/plain
|
||||||
application/x-chm = execm rclchm
|
application/x-chm = execm rclchm
|
||||||
|
application/x-dia-diagram = execm rcldia;mimetype=text/html;charset=utf-8
|
||||||
application/x-dvi = exec rcldvi
|
application/x-dvi = exec rcldvi
|
||||||
application/x-flac = execm rclaudio
|
application/x-flac = execm rclaudio
|
||||||
application/x-gnuinfo = execm rclinfo
|
application/x-gnuinfo = execm rclinfo
|
||||||
@ -107,6 +108,7 @@ text/x-fictionbook = exec rclfb2
|
|||||||
text/x-gaim-log = exec rclgaim
|
text/x-gaim-log = exec rclgaim
|
||||||
text/x-html-sidux-man = exec rclsiduxman
|
text/x-html-sidux-man = exec rclsiduxman
|
||||||
text/x-html-aptosid-man = exec rclaptosidman
|
text/x-html-aptosid-man = exec rclaptosidman
|
||||||
|
text/x-chm-html = internal text/html
|
||||||
text/x-ini = internal text/plain
|
text/x-ini = internal text/plain
|
||||||
text/x-mail = internal
|
text/x-mail = internal
|
||||||
text/x-man = exec rclman
|
text/x-man = exec rclman
|
||||||
@ -153,6 +155,7 @@ application/vnd.sun.xml.writer.global = wordprocessing
|
|||||||
application/vnd.sun.xml.writer.template = wordprocessing
|
application/vnd.sun.xml.writer.template = wordprocessing
|
||||||
application/vnd.wordperfect = wordprocessing
|
application/vnd.wordperfect = wordprocessing
|
||||||
application/x-abiword = wordprocessing
|
application/x-abiword = wordprocessing
|
||||||
|
application/x-dia-diagram = drawing
|
||||||
application/x-dvi = document
|
application/x-dvi = document
|
||||||
application/x-flac = sownd
|
application/x-flac = sownd
|
||||||
application/x-fsdirectory = folder
|
application/x-fsdirectory = folder
|
||||||
@ -283,6 +286,7 @@ message = message/rfc822 \
|
|||||||
other = application/vnd.sun.xml.draw \
|
other = application/vnd.sun.xml.draw \
|
||||||
application/vnd.sun.xml.draw.template \
|
application/vnd.sun.xml.draw.template \
|
||||||
application/vnd.sun.xml.math \
|
application/vnd.sun.xml.math \
|
||||||
|
application/x-dia-diagram \
|
||||||
application/x-fsdirectory \
|
application/x-fsdirectory \
|
||||||
application/x-mimehtml \
|
application/x-mimehtml \
|
||||||
application/x-rar \
|
application/x-rar \
|
||||||
|
|||||||
@ -41,6 +41,7 @@
|
|||||||
|
|
||||||
.djvu = image/vnd.djvu
|
.djvu = image/vnd.djvu
|
||||||
.svg = image/svg+xml
|
.svg = image/svg+xml
|
||||||
|
.dia = application/x-dia-diagram
|
||||||
|
|
||||||
.gz = application/x-gzip
|
.gz = application/x-gzip
|
||||||
.Z = application/x-gzip
|
.Z = application/x-gzip
|
||||||
|
|||||||
@ -82,6 +82,7 @@ text/x-c+ = emacsclient %f
|
|||||||
text/x-c++ = emacsclient %f
|
text/x-c++ = emacsclient %f
|
||||||
text/x-html-sidux-man = konqueror %f
|
text/x-html-sidux-man = konqueror %f
|
||||||
text/x-html-aptosid-man = iceweasel %f
|
text/x-html-aptosid-man = iceweasel %f
|
||||||
|
text/x-chm-html = openchm %f %i
|
||||||
text/x-ini = emacsclient %f
|
text/x-ini = emacsclient %f
|
||||||
text/x-man = xterm -u8 -e "groff -T ascii -man %f | more"
|
text/x-man = xterm -u8 -e "groff -T ascii -man %f | more"
|
||||||
text/x-python = idle %f
|
text/x-python = idle %f
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user