This commit is contained in:
Jean-Francois Dockes 2011-03-14 08:25:20 +01:00
parent 205fdde5a9
commit 7b9718a5aa
5 changed files with 4 additions and 633 deletions

View File

@ -72,6 +72,10 @@
</blockquote>
-->
<h2>Updated zip archive filter</h2>
<p>The filter is corrected to handle utf-8 paths in zip archives:
<a href="rclzip">rclzip</a></p>
<h2>Updated audio tag filter</h2>
<p>The mutagen-based rclaudio filter delivered with recoll 1.14.2
used a very recent mutagen interface which will only work with

View File

@ -1,233 +0,0 @@
# @(#$Id: mimeconf,v 1.48 2008-11-27 13:35:24 dockes Exp $ (C) 2004 J.F.Dockes
# Recoll : associations of mime types to processing filters.
# There are different sections for decompression, 'interning' for indexing
# and preview, and external viewers
## #######################################
# Decompression: these types need a first pass to create a temp file to
# work with. We use a script because uncompress utilities usually work in
# place, which is not suitable.
#
# The %t parameter will be substituted to the name of a temporary directory
# by recoll. This directory is guaranteed empty when calling the filter
#
# The %f parameter will be substituted with the input file.
#
# The script (ie: rcluncomp) must output the uncompressed file name on
# stdout.
application/x-gzip = uncompress rcluncomp gunzip %f %t
application/x-compress = uncompress rcluncomp gunzip %f %t
application/x-bzip2 = uncompress rcluncomp bunzip2 %f %t
## ###################################
# Filters for indexing and internal preview.
# The "internal" filters are hardwired in the c++ code.
# The external "exec" filters are typically scripts. By default, they output the
# document in simple html format, have a look at the scripts.
# A different format (ie text/plain), and a character set can be defined for
# each filter, see the exemples below (ie: msword)
[index]
# Note: rcldoc did some work to splice hyphenated words at eol. Seems
# actually not needed because antiword apparently does it too
# application/msword = exec rcldoc
application/msword = exec antiword -t -i 1 -m UTF-8;mimetype=text/plain;charset=utf-8
# Alternatively you can use wvWare for msword. It's much slower than
# antiword, but will handle documents which provoke the 'I'm afraid the
# text stream of this file is too small to handle' antiword error
# application/msword = exec wvWare --charset=utf-8 --nographics
application/ogg = execm rclaudio
application/pdf = exec rclpdf
application/postscript = exec pstotext;charset=iso-8859-1;mimetype=text/plain
application/vnd.ms-excel = exec xls2csv -c " " -d utf-8;charset=utf-8;mimetype=text/plain
application/vnd.ms-powerpoint = exec catppt -d utf-8;charset=utf-8;mimetype=text/plain
application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
exec rclopxml
application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
exec rclopxml
application/vnd.openxmlformats-officedocument.presentationml.template = \
exec rclopxml
application/vnd.openxmlformats-officedocument.presentationml.presentation = \
exec rclopxml
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
exec rclopxml
application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
exec rclopxml
application/vnd.sun.xml.calc = exec rclsoff
application/vnd.sun.xml.calc.template = exec rclsoff
application/vnd.sun.xml.draw = exec rclsoff
application/vnd.sun.xml.draw.template = exec rclsoff
application/vnd.sun.xml.impress = exec rclsoff
application/vnd.sun.xml.impress.template = exec rclsoff
application/vnd.sun.xml.math = exec rclsoff
application/vnd.sun.xml.writer = exec rclsoff
application/vnd.sun.xml.writer.global = exec rclsoff
application/vnd.sun.xml.writer.template = exec rclsoff
application/vnd.wordperfect = exec wpd2html;mimetype=text/html
application/x-abiword = exec rclabw
application/x-awk = internal
application/x-dvi = exec rcldvi
application/x-flac = execm rclaudio
application/x-kword = exec rclkwd
application/x-lyx = exec rcllyx
application/x-perl = internal
application/x-scribus = exec rclscribus
application/x-shellscript = internal
application/x-tex = exec rcltex
application/x-chm = execm rclchm
application/zip = execm rclzip
audio/mpeg = execm rclaudio
image/gif = execm rclimg
image/jpeg = execm rclimg
image/png = execm rclimg
image/tiff = execm rclimg
image/vnd.djvu = exec rcldjvu
image/svg+xml = exec rclsvg
message/rfc822 = internal
text/calendar = execm rclics;mimetype=text/plain;charset=utf-8
text/html = internal
text/plain = internal
text/rtf = exec unrtf --nopict --html;charset=iso-8859-1;mimetype=text/html
text/x-c = internal
text/x-fictionbook = exec rclfb2
text/x-gaim-log = exec rclgaim
text/x-html-sidux-man = exec rclsiduxman
text/x-html-aptosid-man = exec rclaptosidman
text/x-mail = internal
text/x-man = exec rclman
text/x-purple-log = exec rclpurple
text/x-python = exec rclpython
text/x-shellscript = internal
## #############################################
# Icons to be used in the result list if required by gui config
[icons]
application/msword = wordprocessing
application/ogg = sownd
application/pdf = pdf
application/postscript = postscript
application/vnd.ms-excel = spreadsheet
application/vnd.ms-powerpoint = presentation
application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
wordprocessing
application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
wordprocessing
application/vnd.openxmlformats-officedocument.presentationml.template = \
presentation
application/vnd.openxmlformats-officedocument.presentationml.presentation = \
presentation
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
spreadsheet
application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
spreadsheet
application/vnd.sun.xml.calc = spreadsheet
application/vnd.sun.xml.calc.template = spreadsheet
application/vnd.sun.xml.draw = drawing
application/vnd.sun.xml.draw.template = drawing
application/vnd.sun.xml.impress = presentation
application/vnd.sun.xml.impress.template = presentation
application/vnd.sun.xml.writer = wordprocessing
application/vnd.sun.xml.writer.global = wordprocessing
application/vnd.sun.xml.writer.template = wordprocessing
application/vnd.wordperfect = wordprocessing
application/x-abiword = wordprocessing
application/x-dvi = document
application/x-flac = sownd
application/x-fsdirectory = folder
application/x-kword = wordprocessing
application/x-lyx = wordprocessing
application/x-scribus = document
application/x-tex = wordprocessing
application/x-awk = source
application/x-perl = source
application/x-shellscript = source
audio/mpeg = sownd
image/gif = image
image/jpeg = image
image/png = image
image/tiff = image
image/vnd.djvu = document
image/svg+xml = drawing
message/rfc822 = message
text/html = html
text/plain = txt
text/x-c = source
text/x-c++ = source
text/x-fictionbook = document
text/x-html-sidux-man = sidux-book
text/x-html-aptosid-man = aptosid-manual
text/x-mail = message
text/x-man = document
application/x-chm = document
text/x-purple-log = pidgin
text/x-python = text-x-python
[categories]
text = \
application/msword \
application/pdf \
application/postscript \
application/vnd.openxmlformats-officedocument.wordprocessingml.document \
application/vnd.openxmlformats-officedocument.wordprocessingml.template \
application/vnd.sun.xml.writer \
application/vnd.sun.xml.writer.global \
application/vnd.sun.xml.writer.template \
application/vnd.wordperfect \
application/x-abiword \
application/x-awk \
application/x-chm \
application/x-dvi \
application/x-kword \
application/x-lyx \
application/x-perl \
application/x-scribus \
application/x-shellscript \
application/x-tex \
image/vnd.djvu \
text/calendar \
text/html \
text/plain \
text/rtf \
text/x-c \
text/x-c++ \
text/x-fictionbook \
text/x-html-sidux-man \
text/x-html-aptosid-man \
text/x-man \
text/x-python \
text/x-shellscript
spreadsheet = \
application/vnd.ms-excel \
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet \
application/vnd.openxmlformats-officedocument.spreadsheetml.template \
application/vnd.sun.xml.calc \
application/vnd.sun.xml.calc.template
presentation = application/vnd.ms-powerpoint \
application/vnd.openxmlformats-officedocument.presentationml.template \
application/vnd.openxmlformats-officedocument.presentationml.presentation \
application/vnd.sun.xml.impress \
application/vnd.sun.xml.impress.template
media = \
audio/mpeg \
application/ogg \
application/x-flac \
image/jpeg \
image/png \
image/tiff \
image/gif \
message = message/rfc822 \
text/x-gaim-log \
text/x-mail \
text/x-purple-log \
other = application/vnd.sun.xml.draw \
application/vnd.sun.xml.draw.template \
application/vnd.sun.xml.math \
application/x-fsdirectory \
application/zip \
image/svg+xml \

View File

@ -1,152 +0,0 @@
# @(#$Id: mimemap,v 1.32 2008-09-15 08:03:37 dockes Exp $ (C) 2004 J.F.Dockes
# Recoll: associations of file name extensions to mime types
.txt = text/plain
.text = text/plain
.d = text/plain
# Source files.
# Defining them with specific types allows using a specific ext viewer (in
# mimeview). You can in general use rcltext to wrap them in html for
# indexing the contents (and rough preview). You could also just set them
# as text/plain (index as text, use text viewer)
.cpp = text/x-c
.h = text/x-c
.c = text/x-c
.cc = text/x-c
.py = text/x-python
.awk = application/x-awk
.pl = application/x-perl
.sh = application/x-shellscript
.rtf = text/rtf
.html = text/html
.htm = text/html
.shtml = text/html
.php = text/html
.ics = text/calendar
.pdf = application/pdf
.ps = application/postscript
.eps = application/postscript
.ai = application/postscript
.tex = application/x-tex
.dvi = application/x-dvi
.djvu = image/vnd.djvu
.svg = image/svg+xml
.gz = application/x-gzip
.Z = application/x-gzip
.bz2 = application/x-bzip2
#.Z = application/x-compress
.zip = application/zip
.doc = application/msword
.ppt = application/vnd.ms-powerpoint
.xls = application/vnd.ms-excel
.chm = application/x-chm
# OpenOffice / opendocument. We handle opendocument as old openoffice files
# for now
.sxc = application/vnd.sun.xml.calc
.ods = application/vnd.sun.xml.calc
.stc = application/vnd.sun.xml.calc.template
.sxd = application/vnd.sun.xml.draw
.std = application/vnd.sun.xml.draw.template
.sxi = application/vnd.sun.xml.impress
.odp = application/vnd.sun.xml.impress
.sti = application/vnd.sun.xml.impress.template
.sxm = application/vnd.sun.xml.math
.sxw = application/vnd.sun.xml.writer
.odt = application/vnd.sun.xml.writer
.sxg = application/vnd.sun.xml.writer.global
.stw = application/vnd.sun.xml.writer.template
# ms openxml
.docm = application/vnd.ms-word.document.macroEnabled.12
.docx = application/vnd.openxmlformats-officedocument.wordprocessingml.document
.dotm = application/vnd.ms-word.template.macroEnabled.12
.dotx = application/vnd.openxmlformats-officedocument.wordprocessingml.template
.potm = application/vnd.ms-powerpoint.template.macroEnabled.12
.potx = application/vnd.openxmlformats-officedocument.presentationml.template
.ppam = application/vnd.ms-powerpoint.addin.macroEnabled.12
.ppsm = application/vnd.ms-powerpoint.slideshow.macroEnabled.12
.ppsx = application/vnd.openxmlformats-officedocument.presentationml.slideshow
.pptm = application/vnd.ms-powerpoint.presentation.macroEnabled.12
.pptx = application/vnd.openxmlformats-officedocument.presentationml.presentation
.xlam = application/vnd.ms-excel.addin.macroEnabled.12
.xlsb = application/vnd.ms-excel.sheet.binary.macroEnabled.12
.xlsm = application/vnd.ms-excel.sheet.macroEnabled.12
.xlsx = application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
.xltm = application/vnd.ms-excel.template.macroEnabled.12
.xltx = application/vnd.openxmlformats-officedocument.spreadsheetml.template
.abw = application/x-abiword
.lyx = application/x-lyx
.sla = application/x-scribus
.scd = application/x-scribus
.kwd = application/x-kword
.wpd = application/vnd.wordperfect
.rtf = text/rtf
.mp3 = audio/mpeg
.flac = application/x-flac
.ogg = application/ogg
.png = image/png
.jpg = image/jpeg
.jpeg = image/jpeg
.gif = image/gif
.tiff = image/tiff
.tif = image/tiff
.fb2 = text/x-fictionbook
# A list of suffixes (name endings) that we don't want to touch at all.
# Having these explicitely listed speeds things up a bit by avoiding
# unneeded decompression or 'file' calls. File names still get indexed if
# indexallfilenames is set (so this is different from skippedNames). It's a
# bit unconsistent to have it listed among the suffix translations, but no
# problem in practice.
recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz .log.gz .md5 .map \
.o .lib .dll .a \
.dat .bak .rdf .log .db .msf .pid \
.gnm .gnumeric \
.gif .bmp .xpm \
,v ~ #
# Special handling of .txt files inside ~/.gaim and ~/.purple directories
[~/.gaim]
.txt = text/x-gaim-log
[~/.purple]
.txt = text/x-purple-log
# Special handling of sidux/aptosid manual menu system
[/usr/share/sidux-manual]
.htm = text/x-html-sidux-man
.html = text/x-html-sidux-man
[/usr/share/aptosid-manual]
.htm = text/x-html-aptosid-man
.html = text/x-html-aptosid-man
# Manual files. You may want to adjust the location for your system
# We can't use the default text/troff type because this doesn't say
# what macro set to use (groff -man)
[/usr/share/man]
.1 = text/x-man
.2 = text/x-man
.3 = text/x-man
.4 = text/x-man
.5 = text/x-man
.6 = text/x-man
.7 = text/x-man
.8 = text/x-man
.n = text/x-man
.3pm = text/x-man

View File

@ -1,68 +0,0 @@
# @(#$Id: mimeview,v 1.16 2008-09-15 08:03:37 dockes Exp $ (C) 2004 J.F.Dockes
## ##########################################
# External viewers, launched by the recoll GUI when you click on a result
# 'edit' link
# Mime types which we should not uncompress if they are found gzipped or
# bzipped because the native viewer knows how to handle. These would be
# exceptions and the list is normally empty
#nouncompforviewmts =
[view]
# Pseudo entry used if the 'use desktop' preference is set in the GUI
application/x-all = xdg-open %f
application/x-kword = kword %f
application/x-abiword = abiword %f
application/msword = openoffice %f
application/ogg = xmms %f
application/pdf = xpdf %f
application/postscript = gv %f
application/vnd.ms-excel = openoffice %f
application/vnd.ms-powerpoint = openoffice %f
application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
openoffice %f
application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
openoffice %f
application/vnd.openxmlformats-officedocument.presentationml.template = \
openoffice %f
application/vnd.openxmlformats-officedocument.presentationml.presentation = \
openoffice %f
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
openoffice %f
application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
openoffice %f
application/vnd.sun.xml.calc = openoffice %f
application/vnd.sun.xml.calc.template = openoffice %f
application/vnd.sun.xml.draw = openoffice %f
application/vnd.sun.xml.draw.template = openoffice %f
application/vnd.sun.xml.impress = openoffice %f
application/vnd.sun.xml.impress.template = openoffice %f
application/vnd.sun.xml.math = openoffice %f
application/vnd.sun.xml.writer = openoffice %f
application/vnd.sun.xml.writer.global = openoffice %f
application/vnd.sun.xml.writer.template = openoffice %f
application/vnd.wordperfect = openoffice %f
application/x-chm = okular %f
application/x-dvi = xdvi %f
application/x-fsdirectory = rox %f
application/x-flac = xmms %f
application/x-lyx = lyx %f
application/x-scribus = scribus %f
application/x-tex = gnuclient -q %f
audio/mpeg = xmms %f
image/jpeg = xv %f
image/png = xv %f
image/tiff = xv %f
image/gif = xv %f
image/svg+xml = inkview %f
image/vnd.djvu = djview %f
# Or firefox -remote "openFile(%u)"
text/html = firefox %u
text/plain = gnuclient -q %f
text/x-c = gnuclient -q %f
text/x-html-sidux-man = konqueror %f
text/x-html-aptosid-man = konqueror %f
text/x-python = idle %f

View File

@ -1,180 +0,0 @@
#!/usr/bin/env python
# Read an ICS file, break it into "documents" which are events, todos,
# or journal entries, and interface with recoll execm
#
# For historical reasons, this can use either the icalendar or the
# vobject Python modules, or an internal splitter. The default is now
# to use the internal splitter, the other modules are more trouble
# than they're worth (to us and until we will want to get into date
# computations etc.)
import rclexecm
import sys
# Decide how we'll process the file.
modules = ('internal', 'icalendar', 'vobject')
usemodule = 'internal'
forcevobject = 0
if usemodule != 'internal':
try:
if forcevobject:
raise Exception
from icalendar import Calendar, Event
usemodule = 'icalendar'
except:
try:
import vobject
usemodule = 'vobject'
except:
print "RECFILTERROR HELPERNOTFOUND python:icalendar"
print "RECFILTERROR HELPERNOTFOUND python:vobject"
sys.exit(1);
class IcalExtractor:
def __init__(self, em):
self.file = ""
self.contents = []
self.em = em
self.em.setmimetype("text/plain")
def extractone(self, index):
if index >= len(self.contents):
return(False, "", "", True)
docdata = self.contents[index]
#self.em.rclog(docdata)
iseof = rclexecm.RclExecM.noteof
if self.currentindex >= len(self.contents) -1:
iseof = rclexecm.RclExecM.eofnext
return (True, docdata, str(index), iseof)
###### File type handler api, used by rclexecm ---------->
def openfile(self, params):
self.file = params["filename:"]
try:
calstr = open(self.file, 'rb')
except Exception, e:
self.em.rclog("Openfile: open: %s" % str(e))
return False
self.currentindex = 0
if usemodule == 'internal':
self.contents = ICalSimpleSplitter().splitcalendar(calstr)
elif usemodule == 'icalendar':
try:
cal = Calendar.from_string(calstr.read())
except Exception, e:
self.em.rclog("Openfile: read or parse error: %s" % str(e))
return False
self.contents = cal.walk()
self.contents = [item.as_string() for item in self.contents
if (item.name == 'VEVENT' or item.name == 'VTODO'
or item.name == 'VJOURNAL')]
else:
try:
cal = vobject.readOne(calstr)
except Exception, e:
self.em.rclog("Openfile: cant parse object: %s" % str(e))
return False
for lstnm in ('vevent_list', 'vtodo_list', 'vjournal_list'):
lst = getattr(cal, lstnm, [])
for ev in lst:
self.contents.append(ev.serialize())
#self.em.rclog("openfile: Entry count: %d"%(len(self.contents)))
return True
def getipath(self, params):
try:
index = int(params["ipath:"])
except:
return False
return self.extractone(index)
def getnext(self, params):
if self.currentindex >= len(self.contents):
self.em.rclog("getnext: EOF hit")
return (False, "", "", rclexecm.RclExecM.eofnow)
else:
ret= self.extractone(self.currentindex)
self.currentindex += 1
return ret
# Trivial splitter: cut objects on BEGIN/END (only for 'interesting' objects)
# ignore all other syntax
class ICalSimpleSplitter:
# Note that if an 'interesting' element is nested inside another one,
# it will not be extracted (stay as text in external event). This is
# not an issue and I don't think it can happen with the current list
interesting = ('VTODO', 'VEVENT', 'VJOURNAL')
def splitcalendar(self, fin):
curblkname = ''
curblk = ''
lo = []
for line in fin:
line = line.rstrip()
if line == '':
continue
if curblkname:
curblk = curblk + line + "\n"
l = line.split(":")
if len(l) < 2:
continue
# If not currently inside a block and we see an
# 'interesting' BEGIN, start block
if curblkname == '' and l[0].upper() == "BEGIN" :
name = l[1].upper()
if name in ICalSimpleSplitter.interesting:
curblkname = name
curblk = curblk + line + "\n"
# If currently accumulating block lines, check for end
if curblkname and l[0].upper() == "END" and \
l[1].upper() == curblkname:
lo.append(curblk)
curblkname = ''
curblk = ''
if curblk:
lo.append(curblk)
curblkname = ''
curblk = ''
return lo
##### Main program: either talk to the parent or execute test loop
e = rclexecm.RclExecM()
ical = IcalExtractor(e)
if len(sys.argv) == 1:
e.mainloop(ical)
else:
# Got a file name parameter: testing without an execm parent
# Loop on all entries
if not ical.openfile({'filename:':sys.argv[1]}):
print "Open error"
sys.exit(1)
ecnt = 0
while 1:
ok, data, ipath, eof = ical.getnext("")
if ok:
ecnt = ecnt + 1
print "=========== ENTRY %d =================" % ecnt
print data
print
else:
print "Got error, eof %d"%eof
break