diff --git a/src/filters/rcldia b/src/filters/rcldia new file mode 100755 index 00000000..185a4dc3 --- /dev/null +++ b/src/filters/rcldia @@ -0,0 +1,124 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# dia (http://live.gnome.org/Dia) file filter for recoll +# stefan.friedel@iwr.uni-heidelberg.de 2012 +# +# add the following to ~/.recoll/mimeconf into the [index] section: +# application/x-dia-diagram = execm rcldia;mimetype=text/html;charset=utf-8 +# and into the [icons] section: +# application/x-dia-diagram = drawing +# and finally under [categories]: +# other = ...\ +# application/x-dia-diagram +# +# in ~/.recoll/mimemap: +# .dia = application/x-dia-diagram + +# Small fixes from jfd: dia files are sometimes not compressed. +# And a note: this file actually has no reason to return HTML as there is +# no metadata. We could just as well and more simply return text/plain +import rclexecm +import re +from gzip import GzipFile +import xml.parsers.expat + +# some regexps to parse/format the xml data: delete #/spaces at the b/eol and +# ignore empty lines +rhs = re.compile(r'^[#|\s+](.*)') +rhe = re.compile(r'(.*)[#|\s+]$') +rempty = re.compile(r'^#?\s*#?$') + +htmltemplate = ''' + + + + +{0} + + +''' + +# xml parser for dia xml file +class Parser: + def __init__(self,rclem): + self._parser = xml.parsers.expat.ParserCreate(encoding='UTF-8') + self._parser.StartElementHandler = self.startelement + self._parser.EndElementHandler = self.endelement + self._parser.CharacterDataHandler = self.chardata + self.string = [] + self.handlethis = False + self.rclem = rclem + + def startelement(self, name, attrs): + if name == 'dia:string': + self.handlethis = True + else: + self.handlethis = False + + def chardata(self,data): + if self.handlethis: + # check if line is not empty and replace hashes/spaces + # tricky: after htmlescape check also for umlauts + if not rempty.search(data): + self.string.append(self.rclem.htmlescape( + rhe.sub(r'\1',rhs.sub(r'\1',data))).encode('ascii', 'xmlcharrefreplace')) + + def endelement(self,name): + self.handlethis = False + + def feed(self, fh): + self._parser.ParseFile(fh) + del self._parser + +class DiaExtractor: + def __init__(self, em): + self.em = em + + def extractdia(self): + docdata = "" + ipath = "" + try: + docdata = self.ExtractDiaText() + ok = True + except Exception, err: + ok = False + iseof = rclexecm.RclExecM.eofnext + self.em.setmimetype("text/html") + return (ok, docdata, ipath, iseof) + + ###### File type handler api, used by rclexecm ----------> + def openfile(self, params): + try: + self.dia = GzipFile(params["filename:"], 'r') + # Dial files are sometimes not compressed. Quite weirdly, + # GzipFile does not complain until we try to read. Have to do it + # here to be able to retry an uncompressed open. + data = self.dia.readline() + self.dia.seek(0) + return True + except: + # File not compressed ? + try: + self.dia = open(params["filename:"], 'r') + except: + return False + return True + + def getipath(self, params): + ok, data, ipath, eof = self.extractdia() + return (ok, data, ipath, eof) + + def getnext(self, params): + ok, data, ipath, eof = self.extractdia() + return (ok, data, ipath, eof) + + ###### read data + def ExtractDiaText(self): + diap = Parser(self.em) + diap.feed(self.dia) + return htmltemplate.format('\n'.join(diap.string)) + +# Main program: create protocol handler and extractor and run them +proto = rclexecm.RclExecM() +extract = DiaExtractor(proto) +rclexecm.main(proto, extract) diff --git a/src/sampleconf/mimeconf b/src/sampleconf/mimeconf index 210039e4..37774f47 100644 --- a/src/sampleconf/mimeconf +++ b/src/sampleconf/mimeconf @@ -71,6 +71,7 @@ application/vnd.wordperfect = exec wpd2html;mimetype=text/html application/x-abiword = exec rclabw application/x-awk = internal text/plain application/x-chm = execm rclchm +application/x-dia-diagram = execm rcldia;mimetype=text/html;charset=utf-8 application/x-dvi = exec rcldvi application/x-flac = execm rclaudio application/x-gnuinfo = execm rclinfo @@ -107,6 +108,7 @@ text/x-fictionbook = exec rclfb2 text/x-gaim-log = exec rclgaim text/x-html-sidux-man = exec rclsiduxman text/x-html-aptosid-man = exec rclaptosidman +text/x-chm-html = internal text/html text/x-ini = internal text/plain text/x-mail = internal text/x-man = exec rclman @@ -153,6 +155,7 @@ application/vnd.sun.xml.writer.global = wordprocessing application/vnd.sun.xml.writer.template = wordprocessing application/vnd.wordperfect = wordprocessing application/x-abiword = wordprocessing +application/x-dia-diagram = drawing application/x-dvi = document application/x-flac = sownd application/x-fsdirectory = folder @@ -283,6 +286,7 @@ message = message/rfc822 \ other = application/vnd.sun.xml.draw \ application/vnd.sun.xml.draw.template \ application/vnd.sun.xml.math \ + application/x-dia-diagram \ application/x-fsdirectory \ application/x-mimehtml \ application/x-rar \ diff --git a/src/sampleconf/mimemap b/src/sampleconf/mimemap index 7b532828..3d184aec 100644 --- a/src/sampleconf/mimemap +++ b/src/sampleconf/mimemap @@ -41,6 +41,7 @@ .djvu = image/vnd.djvu .svg = image/svg+xml +.dia = application/x-dia-diagram .gz = application/x-gzip .Z = application/x-gzip diff --git a/src/sampleconf/mimeview b/src/sampleconf/mimeview index fdff86b3..14bf4983 100644 --- a/src/sampleconf/mimeview +++ b/src/sampleconf/mimeview @@ -82,6 +82,7 @@ text/x-c+ = emacsclient %f text/x-c++ = emacsclient %f text/x-html-sidux-man = konqueror %f text/x-html-aptosid-man = iceweasel %f +text/x-chm-html = openchm %f %i text/x-ini = emacsclient %f text/x-man = xterm -u8 -e "groff -T ascii -man %f | more" text/x-python = idle %f