diff --git a/src/doc/user/usermanual.sgml b/src/doc/user/usermanual.sgml index 1e98d818..19f4df6e 100644 --- a/src/doc/user/usermanual.sgml +++ b/src/doc/user/usermanual.sgml @@ -2670,10 +2670,7 @@ while query.next >= 0 and query.next < nres: url="http://www.jedrea.com/chmlib/">chmlib). - ics: iCalendar files need Python and the - icalendar - module. - + ics: iCalendar files need Python. zip: Zip archives need Python (and the standard zipfile module). diff --git a/src/filters/rclics b/src/filters/rclics index e0fee210..f9f0d6c9 100755 --- a/src/filters/rclics +++ b/src/filters/rclics @@ -1,25 +1,48 @@ #!/usr/bin/env python +# Read an ICS file, break it into "documents" which are events, todos, +# or journal entries, and interface with recoll execm +# +# For historical reasons, this can use either the icalendar or the +# vobject Python modules, or an internal splitter. The default is now +# to use the internal splitter, the other modules are more trouble +# than they're worth (to us and until we will want to get into date +# computations etc.) + import rclexecm import sys -try: - from icalendar import Calendar, Event -except: - print "RECFILTERROR HELPERNOTFOUND python:icalendar" - sys.exit(1); +# Decide how we'll process the file. +modules = ('internal', 'icalendar', 'vobject') +usemodule = 'internal' +forcevobject = 0 +if usemodule != 'internal': + try: + if forcevobject: + raise Exception + from icalendar import Calendar, Event + usemodule = 'icalendar' + except: + try: + import vobject + usemodule = 'vobject' + except: + print "RECFILTERROR HELPERNOTFOUND python:icalendar" + print "RECFILTERROR HELPERNOTFOUND python:vobject" + sys.exit(1); + class IcalExtractor: def __init__(self, em): self.file = "" self.contents = [] self.em = em - em.setmimetype("text/plain") + self.em.setmimetype("text/plain") def extractone(self, index): if index >= len(self.contents): return(False, "", "", True) - docdata = self.contents[index].as_string() + docdata = self.contents[index] #self.em.rclog(docdata) iseof = rclexecm.RclExecM.noteof @@ -30,13 +53,39 @@ class IcalExtractor: ###### File type handler api, used by rclexecm ----------> def openfile(self, params): self.file = params["filename:"] + try: - self.cal = Calendar.from_string(open(self.file,'rb').read()) - except: + calstr = open(self.file, 'rb') + except Exception, e: + self.em.rclog("Openfile: open: %s" % str(e)) return False - # Skip the top level object - self.currentindex = 1 - self.contents = self.cal.walk() + + self.currentindex = 0 + + if usemodule == 'internal': + self.contents = ICalSimpleSplitter().splitcalendar(calstr) + elif usemodule == 'icalendar': + try: + cal = Calendar.from_string(calstr.read()) + except Exception, e: + self.em.rclog("Openfile: read or parse error: %s" % str(e)) + return False + self.contents = cal.walk() + self.contents = [item.as_string() for item in self.contents + if (item.name == 'VEVENT' or item.name == 'VTODO' + or item.name == 'VJOURNAL')] + else: + try: + cal = vobject.readOne(calstr) + except Exception, e: + self.em.rclog("Openfile: cant parse object: %s" % str(e)) + return False + for lstnm in ('vevent_list', 'vtodo_list', 'vjournal_list'): + lst = getattr(cal, lstnm, []) + for ev in lst: + self.contents.append(ev.serialize()) + + #self.em.rclog("openfile: Entry count: %d"%(len(self.contents))) return True def getipath(self, params): @@ -48,12 +97,84 @@ class IcalExtractor: def getnext(self, params): if self.currentindex >= len(self.contents): - #em.rclog("getnext: EOF hit") + self.em.rclog("getnext: EOF hit") return (False, "", "", rclexecm.RclExecM.eofnow) else: ret= self.extractone(self.currentindex) self.currentindex += 1 return ret +# Trivial splitter: cut objects on BEGIN/END (only for 'interesting' objects) +# ignore all other syntax +class ICalSimpleSplitter: + # Note that if an 'interesting' element is nested inside another one, + # it will not be extracted (stay as text in external event). This is + # not an issue and I don't think it can happen with the current list + interesting = ('VTODO', 'VEVENT', 'VJOURNAL') + + def splitcalendar(self, fin): + curblkname = '' + curblk = '' + + lo = [] + for line in fin: + line = line.rstrip() + if line == '': + continue + + if curblkname: + curblk = curblk + line + "\n" + + l = line.split(":") + if len(l) < 2: + continue + + # If not currently inside a block and we see an + # 'interesting' BEGIN, start block + if curblkname == '' and l[0].upper() == "BEGIN" : + name = l[1].upper() + if name in ICalSimpleSplitter.interesting: + curblkname = name + curblk = curblk + line + "\n" + + # If currently accumulating block lines, check for end + if curblkname and l[0].upper() == "END" and \ + l[1].upper() == curblkname: + lo.append(curblk) + curblkname = '' + curblk = '' + + if curblk: + lo.append(curblk) + curblkname = '' + curblk = '' + + return lo + + +##### Main program: either talk to the parent or execute test loop + e = rclexecm.RclExecM() -e.mainloop(IcalExtractor(e)) +ical = IcalExtractor(e) + +if len(sys.argv) == 1: + e.mainloop(ical) +else: + # Got a file name parameter: testing without an execm parent + # Loop on all entries + if not ical.openfile({'filename:':sys.argv[1]}): + print "Open error" + sys.exit(1) + + ecnt = 0 + while 1: + ok, data, ipath, eof = ical.getnext("") + if ok: + ecnt = ecnt + 1 + print "=========== ENTRY %d =================" % ecnt + print data + print + else: + print "Got error, eof %d"%eof + break +