From 7b9718a5aaa9e3c9e8de21a1b930ab8c9674b5f2 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Mon, 14 Mar 2011 08:25:20 +0100 Subject: [PATCH] cleanup --- website/filters/filters.html | 4 + website/filters/mimeconf | 233 ----------------------------------- website/filters/mimemap | 152 ----------------------- website/filters/mimeview | 68 ---------- website/filters/rclics | 180 --------------------------- 5 files changed, 4 insertions(+), 633 deletions(-) delete mode 100644 website/filters/mimeconf delete mode 100644 website/filters/mimemap delete mode 100644 website/filters/mimeview delete mode 100755 website/filters/rclics diff --git a/website/filters/filters.html b/website/filters/filters.html index 3dd06c34..bb2ed7ac 100644 --- a/website/filters/filters.html +++ b/website/filters/filters.html @@ -72,6 +72,10 @@ --> +

Updated zip archive filter

+

The filter is corrected to handle utf-8 paths in zip archives: + rclzip

+

Updated audio tag filter

The mutagen-based rclaudio filter delivered with recoll 1.14.2 used a very recent mutagen interface which will only work with diff --git a/website/filters/mimeconf b/website/filters/mimeconf deleted file mode 100644 index 8b06ae12..00000000 --- a/website/filters/mimeconf +++ /dev/null @@ -1,233 +0,0 @@ -# @(#$Id: mimeconf,v 1.48 2008-11-27 13:35:24 dockes Exp $ (C) 2004 J.F.Dockes - -# Recoll : associations of mime types to processing filters. -# There are different sections for decompression, 'interning' for indexing -# and preview, and external viewers - -## ####################################### -# Decompression: these types need a first pass to create a temp file to -# work with. We use a script because uncompress utilities usually work in -# place, which is not suitable. -# -# The %t parameter will be substituted to the name of a temporary directory -# by recoll. This directory is guaranteed empty when calling the filter -# -# The %f parameter will be substituted with the input file. -# -# The script (ie: rcluncomp) must output the uncompressed file name on -# stdout. -application/x-gzip = uncompress rcluncomp gunzip %f %t -application/x-compress = uncompress rcluncomp gunzip %f %t -application/x-bzip2 = uncompress rcluncomp bunzip2 %f %t - -## ################################### -# Filters for indexing and internal preview. -# The "internal" filters are hardwired in the c++ code. -# The external "exec" filters are typically scripts. By default, they output the -# document in simple html format, have a look at the scripts. -# A different format (ie text/plain), and a character set can be defined for -# each filter, see the exemples below (ie: msword) -[index] -# Note: rcldoc did some work to splice hyphenated words at eol. Seems -# actually not needed because antiword apparently does it too -# application/msword = exec rcldoc -application/msword = exec antiword -t -i 1 -m UTF-8;mimetype=text/plain;charset=utf-8 -# Alternatively you can use wvWare for msword. It's much slower than -# antiword, but will handle documents which provoke the 'I'm afraid the -# text stream of this file is too small to handle' antiword error -# application/msword = exec wvWare --charset=utf-8 --nographics -application/ogg = execm rclaudio -application/pdf = exec rclpdf -application/postscript = exec pstotext;charset=iso-8859-1;mimetype=text/plain -application/vnd.ms-excel = exec xls2csv -c " " -d utf-8;charset=utf-8;mimetype=text/plain -application/vnd.ms-powerpoint = exec catppt -d utf-8;charset=utf-8;mimetype=text/plain -application/vnd.openxmlformats-officedocument.wordprocessingml.document = \ - exec rclopxml -application/vnd.openxmlformats-officedocument.wordprocessingml.template = \ - exec rclopxml -application/vnd.openxmlformats-officedocument.presentationml.template = \ - exec rclopxml -application/vnd.openxmlformats-officedocument.presentationml.presentation = \ - exec rclopxml -application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \ - exec rclopxml -application/vnd.openxmlformats-officedocument.spreadsheetml.template =\ - exec rclopxml -application/vnd.sun.xml.calc = exec rclsoff -application/vnd.sun.xml.calc.template = exec rclsoff -application/vnd.sun.xml.draw = exec rclsoff -application/vnd.sun.xml.draw.template = exec rclsoff -application/vnd.sun.xml.impress = exec rclsoff -application/vnd.sun.xml.impress.template = exec rclsoff -application/vnd.sun.xml.math = exec rclsoff -application/vnd.sun.xml.writer = exec rclsoff -application/vnd.sun.xml.writer.global = exec rclsoff -application/vnd.sun.xml.writer.template = exec rclsoff -application/vnd.wordperfect = exec wpd2html;mimetype=text/html -application/x-abiword = exec rclabw -application/x-awk = internal -application/x-dvi = exec rcldvi -application/x-flac = execm rclaudio -application/x-kword = exec rclkwd -application/x-lyx = exec rcllyx -application/x-perl = internal -application/x-scribus = exec rclscribus -application/x-shellscript = internal -application/x-tex = exec rcltex -application/x-chm = execm rclchm -application/zip = execm rclzip -audio/mpeg = execm rclaudio -image/gif = execm rclimg -image/jpeg = execm rclimg -image/png = execm rclimg -image/tiff = execm rclimg -image/vnd.djvu = exec rcldjvu -image/svg+xml = exec rclsvg -message/rfc822 = internal -text/calendar = execm rclics;mimetype=text/plain;charset=utf-8 -text/html = internal -text/plain = internal -text/rtf = exec unrtf --nopict --html;charset=iso-8859-1;mimetype=text/html -text/x-c = internal -text/x-fictionbook = exec rclfb2 -text/x-gaim-log = exec rclgaim -text/x-html-sidux-man = exec rclsiduxman -text/x-html-aptosid-man = exec rclaptosidman -text/x-mail = internal -text/x-man = exec rclman -text/x-purple-log = exec rclpurple -text/x-python = exec rclpython -text/x-shellscript = internal - -## ############################################# -# Icons to be used in the result list if required by gui config -[icons] -application/msword = wordprocessing -application/ogg = sownd -application/pdf = pdf -application/postscript = postscript -application/vnd.ms-excel = spreadsheet -application/vnd.ms-powerpoint = presentation -application/vnd.openxmlformats-officedocument.wordprocessingml.document = \ - wordprocessing -application/vnd.openxmlformats-officedocument.wordprocessingml.template = \ - wordprocessing -application/vnd.openxmlformats-officedocument.presentationml.template = \ - presentation -application/vnd.openxmlformats-officedocument.presentationml.presentation = \ - presentation -application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \ - spreadsheet -application/vnd.openxmlformats-officedocument.spreadsheetml.template =\ - spreadsheet -application/vnd.sun.xml.calc = spreadsheet -application/vnd.sun.xml.calc.template = spreadsheet -application/vnd.sun.xml.draw = drawing -application/vnd.sun.xml.draw.template = drawing -application/vnd.sun.xml.impress = presentation -application/vnd.sun.xml.impress.template = presentation -application/vnd.sun.xml.writer = wordprocessing -application/vnd.sun.xml.writer.global = wordprocessing -application/vnd.sun.xml.writer.template = wordprocessing -application/vnd.wordperfect = wordprocessing -application/x-abiword = wordprocessing -application/x-dvi = document -application/x-flac = sownd -application/x-fsdirectory = folder -application/x-kword = wordprocessing -application/x-lyx = wordprocessing -application/x-scribus = document -application/x-tex = wordprocessing -application/x-awk = source -application/x-perl = source -application/x-shellscript = source -audio/mpeg = sownd -image/gif = image -image/jpeg = image -image/png = image -image/tiff = image -image/vnd.djvu = document -image/svg+xml = drawing -message/rfc822 = message -text/html = html -text/plain = txt -text/x-c = source -text/x-c++ = source -text/x-fictionbook = document -text/x-html-sidux-man = sidux-book -text/x-html-aptosid-man = aptosid-manual -text/x-mail = message -text/x-man = document -application/x-chm = document -text/x-purple-log = pidgin -text/x-python = text-x-python - -[categories] - -text = \ - application/msword \ - application/pdf \ - application/postscript \ - application/vnd.openxmlformats-officedocument.wordprocessingml.document \ - application/vnd.openxmlformats-officedocument.wordprocessingml.template \ - application/vnd.sun.xml.writer \ - application/vnd.sun.xml.writer.global \ - application/vnd.sun.xml.writer.template \ - application/vnd.wordperfect \ - application/x-abiword \ - application/x-awk \ - application/x-chm \ - application/x-dvi \ - application/x-kword \ - application/x-lyx \ - application/x-perl \ - application/x-scribus \ - application/x-shellscript \ - application/x-tex \ - image/vnd.djvu \ - text/calendar \ - text/html \ - text/plain \ - text/rtf \ - text/x-c \ - text/x-c++ \ - text/x-fictionbook \ - text/x-html-sidux-man \ - text/x-html-aptosid-man \ - text/x-man \ - text/x-python \ - text/x-shellscript - -spreadsheet = \ - application/vnd.ms-excel \ - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet \ - application/vnd.openxmlformats-officedocument.spreadsheetml.template \ - application/vnd.sun.xml.calc \ - application/vnd.sun.xml.calc.template - -presentation = application/vnd.ms-powerpoint \ - application/vnd.openxmlformats-officedocument.presentationml.template \ - application/vnd.openxmlformats-officedocument.presentationml.presentation \ - application/vnd.sun.xml.impress \ - application/vnd.sun.xml.impress.template - -media = \ - audio/mpeg \ - application/ogg \ - application/x-flac \ - image/jpeg \ - image/png \ - image/tiff \ - image/gif \ - -message = message/rfc822 \ - text/x-gaim-log \ - text/x-mail \ - text/x-purple-log \ - -other = application/vnd.sun.xml.draw \ - application/vnd.sun.xml.draw.template \ - application/vnd.sun.xml.math \ - application/x-fsdirectory \ - application/zip \ - image/svg+xml \ diff --git a/website/filters/mimemap b/website/filters/mimemap deleted file mode 100644 index ed3cb633..00000000 --- a/website/filters/mimemap +++ /dev/null @@ -1,152 +0,0 @@ -# @(#$Id: mimemap,v 1.32 2008-09-15 08:03:37 dockes Exp $ (C) 2004 J.F.Dockes -# Recoll: associations of file name extensions to mime types - -.txt = text/plain -.text = text/plain -.d = text/plain - -# Source files. -# Defining them with specific types allows using a specific ext viewer (in -# mimeview). You can in general use rcltext to wrap them in html for -# indexing the contents (and rough preview). You could also just set them -# as text/plain (index as text, use text viewer) -.cpp = text/x-c -.h = text/x-c -.c = text/x-c -.cc = text/x-c -.py = text/x-python -.awk = application/x-awk -.pl = application/x-perl -.sh = application/x-shellscript - -.rtf = text/rtf - -.html = text/html -.htm = text/html -.shtml = text/html -.php = text/html -.ics = text/calendar - -.pdf = application/pdf - -.ps = application/postscript -.eps = application/postscript -.ai = application/postscript - -.tex = application/x-tex -.dvi = application/x-dvi - -.djvu = image/vnd.djvu -.svg = image/svg+xml - -.gz = application/x-gzip -.Z = application/x-gzip -.bz2 = application/x-bzip2 -#.Z = application/x-compress -.zip = application/zip - -.doc = application/msword -.ppt = application/vnd.ms-powerpoint -.xls = application/vnd.ms-excel -.chm = application/x-chm - -# OpenOffice / opendocument. We handle opendocument as old openoffice files -# for now -.sxc = application/vnd.sun.xml.calc -.ods = application/vnd.sun.xml.calc -.stc = application/vnd.sun.xml.calc.template -.sxd = application/vnd.sun.xml.draw -.std = application/vnd.sun.xml.draw.template -.sxi = application/vnd.sun.xml.impress -.odp = application/vnd.sun.xml.impress -.sti = application/vnd.sun.xml.impress.template -.sxm = application/vnd.sun.xml.math -.sxw = application/vnd.sun.xml.writer -.odt = application/vnd.sun.xml.writer -.sxg = application/vnd.sun.xml.writer.global -.stw = application/vnd.sun.xml.writer.template - -# ms openxml -.docm = application/vnd.ms-word.document.macroEnabled.12 -.docx = application/vnd.openxmlformats-officedocument.wordprocessingml.document -.dotm = application/vnd.ms-word.template.macroEnabled.12 -.dotx = application/vnd.openxmlformats-officedocument.wordprocessingml.template -.potm = application/vnd.ms-powerpoint.template.macroEnabled.12 -.potx = application/vnd.openxmlformats-officedocument.presentationml.template -.ppam = application/vnd.ms-powerpoint.addin.macroEnabled.12 -.ppsm = application/vnd.ms-powerpoint.slideshow.macroEnabled.12 -.ppsx = application/vnd.openxmlformats-officedocument.presentationml.slideshow -.pptm = application/vnd.ms-powerpoint.presentation.macroEnabled.12 -.pptx = application/vnd.openxmlformats-officedocument.presentationml.presentation -.xlam = application/vnd.ms-excel.addin.macroEnabled.12 -.xlsb = application/vnd.ms-excel.sheet.binary.macroEnabled.12 -.xlsm = application/vnd.ms-excel.sheet.macroEnabled.12 -.xlsx = application/vnd.openxmlformats-officedocument.spreadsheetml.sheet -.xltm = application/vnd.ms-excel.template.macroEnabled.12 -.xltx = application/vnd.openxmlformats-officedocument.spreadsheetml.template - -.abw = application/x-abiword -.lyx = application/x-lyx -.sla = application/x-scribus -.scd = application/x-scribus - -.kwd = application/x-kword - -.wpd = application/vnd.wordperfect - -.rtf = text/rtf - -.mp3 = audio/mpeg -.flac = application/x-flac -.ogg = application/ogg - -.png = image/png -.jpg = image/jpeg -.jpeg = image/jpeg -.gif = image/gif -.tiff = image/tiff -.tif = image/tiff - -.fb2 = text/x-fictionbook - -# A list of suffixes (name endings) that we don't want to touch at all. -# Having these explicitely listed speeds things up a bit by avoiding -# unneeded decompression or 'file' calls. File names still get indexed if -# indexallfilenames is set (so this is different from skippedNames). It's a -# bit unconsistent to have it listed among the suffix translations, but no -# problem in practice. -recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz .log.gz .md5 .map \ - .o .lib .dll .a \ - .dat .bak .rdf .log .db .msf .pid \ - .gnm .gnumeric \ - .gif .bmp .xpm \ - ,v ~ # - -# Special handling of .txt files inside ~/.gaim and ~/.purple directories -[~/.gaim] -.txt = text/x-gaim-log -[~/.purple] -.txt = text/x-purple-log - -# Special handling of sidux/aptosid manual menu system -[/usr/share/sidux-manual] -.htm = text/x-html-sidux-man -.html = text/x-html-sidux-man -[/usr/share/aptosid-manual] -.htm = text/x-html-aptosid-man -.html = text/x-html-aptosid-man - -# Manual files. You may want to adjust the location for your system -# We can't use the default text/troff type because this doesn't say -# what macro set to use (groff -man) -[/usr/share/man] -.1 = text/x-man -.2 = text/x-man -.3 = text/x-man -.4 = text/x-man -.5 = text/x-man -.6 = text/x-man -.7 = text/x-man -.8 = text/x-man -.n = text/x-man -.3pm = text/x-man diff --git a/website/filters/mimeview b/website/filters/mimeview deleted file mode 100644 index 873ce204..00000000 --- a/website/filters/mimeview +++ /dev/null @@ -1,68 +0,0 @@ -# @(#$Id: mimeview,v 1.16 2008-09-15 08:03:37 dockes Exp $ (C) 2004 J.F.Dockes - -## ########################################## -# External viewers, launched by the recoll GUI when you click on a result -# 'edit' link - -# Mime types which we should not uncompress if they are found gzipped or -# bzipped because the native viewer knows how to handle. These would be -# exceptions and the list is normally empty -#nouncompforviewmts = - -[view] -# Pseudo entry used if the 'use desktop' preference is set in the GUI -application/x-all = xdg-open %f - -application/x-kword = kword %f -application/x-abiword = abiword %f - -application/msword = openoffice %f -application/ogg = xmms %f -application/pdf = xpdf %f -application/postscript = gv %f -application/vnd.ms-excel = openoffice %f -application/vnd.ms-powerpoint = openoffice %f -application/vnd.openxmlformats-officedocument.wordprocessingml.document = \ - openoffice %f -application/vnd.openxmlformats-officedocument.wordprocessingml.template = \ - openoffice %f -application/vnd.openxmlformats-officedocument.presentationml.template = \ - openoffice %f -application/vnd.openxmlformats-officedocument.presentationml.presentation = \ - openoffice %f -application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \ - openoffice %f -application/vnd.openxmlformats-officedocument.spreadsheetml.template =\ - openoffice %f -application/vnd.sun.xml.calc = openoffice %f -application/vnd.sun.xml.calc.template = openoffice %f -application/vnd.sun.xml.draw = openoffice %f -application/vnd.sun.xml.draw.template = openoffice %f -application/vnd.sun.xml.impress = openoffice %f -application/vnd.sun.xml.impress.template = openoffice %f -application/vnd.sun.xml.math = openoffice %f -application/vnd.sun.xml.writer = openoffice %f -application/vnd.sun.xml.writer.global = openoffice %f -application/vnd.sun.xml.writer.template = openoffice %f -application/vnd.wordperfect = openoffice %f -application/x-chm = okular %f -application/x-dvi = xdvi %f -application/x-fsdirectory = rox %f -application/x-flac = xmms %f -application/x-lyx = lyx %f -application/x-scribus = scribus %f -application/x-tex = gnuclient -q %f -audio/mpeg = xmms %f -image/jpeg = xv %f -image/png = xv %f -image/tiff = xv %f -image/gif = xv %f -image/svg+xml = inkview %f -image/vnd.djvu = djview %f -# Or firefox -remote "openFile(%u)" -text/html = firefox %u -text/plain = gnuclient -q %f -text/x-c = gnuclient -q %f -text/x-html-sidux-man = konqueror %f -text/x-html-aptosid-man = konqueror %f -text/x-python = idle %f diff --git a/website/filters/rclics b/website/filters/rclics deleted file mode 100755 index f9f0d6c9..00000000 --- a/website/filters/rclics +++ /dev/null @@ -1,180 +0,0 @@ -#!/usr/bin/env python - -# Read an ICS file, break it into "documents" which are events, todos, -# or journal entries, and interface with recoll execm -# -# For historical reasons, this can use either the icalendar or the -# vobject Python modules, or an internal splitter. The default is now -# to use the internal splitter, the other modules are more trouble -# than they're worth (to us and until we will want to get into date -# computations etc.) - -import rclexecm -import sys - -# Decide how we'll process the file. -modules = ('internal', 'icalendar', 'vobject') -usemodule = 'internal' -forcevobject = 0 -if usemodule != 'internal': - try: - if forcevobject: - raise Exception - from icalendar import Calendar, Event - usemodule = 'icalendar' - except: - try: - import vobject - usemodule = 'vobject' - except: - print "RECFILTERROR HELPERNOTFOUND python:icalendar" - print "RECFILTERROR HELPERNOTFOUND python:vobject" - sys.exit(1); - - -class IcalExtractor: - def __init__(self, em): - self.file = "" - self.contents = [] - self.em = em - self.em.setmimetype("text/plain") - - def extractone(self, index): - if index >= len(self.contents): - return(False, "", "", True) - docdata = self.contents[index] - #self.em.rclog(docdata) - - iseof = rclexecm.RclExecM.noteof - if self.currentindex >= len(self.contents) -1: - iseof = rclexecm.RclExecM.eofnext - return (True, docdata, str(index), iseof) - - ###### File type handler api, used by rclexecm ----------> - def openfile(self, params): - self.file = params["filename:"] - - try: - calstr = open(self.file, 'rb') - except Exception, e: - self.em.rclog("Openfile: open: %s" % str(e)) - return False - - self.currentindex = 0 - - if usemodule == 'internal': - self.contents = ICalSimpleSplitter().splitcalendar(calstr) - elif usemodule == 'icalendar': - try: - cal = Calendar.from_string(calstr.read()) - except Exception, e: - self.em.rclog("Openfile: read or parse error: %s" % str(e)) - return False - self.contents = cal.walk() - self.contents = [item.as_string() for item in self.contents - if (item.name == 'VEVENT' or item.name == 'VTODO' - or item.name == 'VJOURNAL')] - else: - try: - cal = vobject.readOne(calstr) - except Exception, e: - self.em.rclog("Openfile: cant parse object: %s" % str(e)) - return False - for lstnm in ('vevent_list', 'vtodo_list', 'vjournal_list'): - lst = getattr(cal, lstnm, []) - for ev in lst: - self.contents.append(ev.serialize()) - - #self.em.rclog("openfile: Entry count: %d"%(len(self.contents))) - return True - - def getipath(self, params): - try: - index = int(params["ipath:"]) - except: - return False - return self.extractone(index) - - def getnext(self, params): - if self.currentindex >= len(self.contents): - self.em.rclog("getnext: EOF hit") - return (False, "", "", rclexecm.RclExecM.eofnow) - else: - ret= self.extractone(self.currentindex) - self.currentindex += 1 - return ret - -# Trivial splitter: cut objects on BEGIN/END (only for 'interesting' objects) -# ignore all other syntax -class ICalSimpleSplitter: - # Note that if an 'interesting' element is nested inside another one, - # it will not be extracted (stay as text in external event). This is - # not an issue and I don't think it can happen with the current list - interesting = ('VTODO', 'VEVENT', 'VJOURNAL') - - def splitcalendar(self, fin): - curblkname = '' - curblk = '' - - lo = [] - for line in fin: - line = line.rstrip() - if line == '': - continue - - if curblkname: - curblk = curblk + line + "\n" - - l = line.split(":") - if len(l) < 2: - continue - - # If not currently inside a block and we see an - # 'interesting' BEGIN, start block - if curblkname == '' and l[0].upper() == "BEGIN" : - name = l[1].upper() - if name in ICalSimpleSplitter.interesting: - curblkname = name - curblk = curblk + line + "\n" - - # If currently accumulating block lines, check for end - if curblkname and l[0].upper() == "END" and \ - l[1].upper() == curblkname: - lo.append(curblk) - curblkname = '' - curblk = '' - - if curblk: - lo.append(curblk) - curblkname = '' - curblk = '' - - return lo - - -##### Main program: either talk to the parent or execute test loop - -e = rclexecm.RclExecM() -ical = IcalExtractor(e) - -if len(sys.argv) == 1: - e.mainloop(ical) -else: - # Got a file name parameter: testing without an execm parent - # Loop on all entries - if not ical.openfile({'filename:':sys.argv[1]}): - print "Open error" - sys.exit(1) - - ecnt = 0 - while 1: - ok, data, ipath, eof = ical.getnext("") - if ok: - ecnt = ecnt + 1 - print "=========== ENTRY %d =================" % ecnt - print data - print - else: - print "Got error, eof %d"%eof - break -