diff --git a/src/filters/rclgaim b/src/filters/rclgaim new file mode 100755 index 00000000..e5e58d91 --- /dev/null +++ b/src/filters/rclgaim @@ -0,0 +1,122 @@ +#!/bin/sh +# @(#$Id: rclgaim,v 1.1 2005-11-18 17:03:12 dockes Exp $ (C) 2004 J.F.Dockes +# Parts taken from Estraier: +#================================================================ +# Estraier: a personal full-text search system +# Copyright (C) 2003-2004 Mikio Hirabayashi +#================================================================ +#================================================================ +# rclgaim +# Extract text and other information from gaim logs +# +#================================================================ + + +# set variables +LANG=C ; export LANG +LC_ALL=C ; export LC_ALL +progname="rclsoff" + + +# show help message +if test $# -ne 1 -o "$1" = "--help" +then + printf 'Convert a gaim log to unformatted HTML text for Recoll.\n' + printf 'Usage: %s [infile]\n' "$progname" + exit 1 +fi + +infile="$1" + +iscmd() +{ + cmd=$1 + case $cmd in + */*) + if test -x $cmd ; then return 0; else return 1; fi ;; + *) + oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs + for d in $*;do test -x $d/$cmd && return 0;done + return 1 ;; + esac +} + +checkcmds() +{ + for cmd in $*;do + if iscmd $cmd + then + a=1 + else + echo $cmd not found 1>&2 + exit 1 + fi + done +} + +checkcmds awk iconv unzip + +# check the input file existence +if test ! -f "$infile" +then + printf '%s: %s: no such file\n' "$progname" "$infile" + exit 1 +fi + +awk ' +# First line: parse from, to , output html header +NR == 1 { + if (NF != 9) { + printf("Bad format: (NF %d) %s\n", NF, $0) + exit 1 + } + to = $3 + from = $8 + proto = $9 + date = $5 " " $6 + #printf("from [%s] to [%s] proto [%s] date [%s]\n", from, to, proto, date) + print "" + print " " $0 "" + print "" + # Yes there is no such thing as a "date" meta tag. This probably should + # be http-equiv=last-modified or such + printf("\n", date) + print "" + + # Remember who the main persons are. + authors[from] = "yes" + authors[to] = "yes" + next +} +# Message first line. We strip from/to and time when indexing +/^\([0-2][0-9]:[0-5][0-9]:[0-5][0-9]\)/ { + if (ENVIRON["RECOLL_FILTER_FORPREVIEW"] == "yes") { + # Preview: output everything + print $0 " " "
" + } else { + # Index: output only text, except each new author once + #printf("INDEX: NF %d [%s] [%s] [%s] ", NF, $1, $2, $3); + from = $2 + sub(":$", "", from); + if (authors[from] == "") { + authors[from] = "yes" + printf("%s : ", from); + } + for (idx = 3; idx <= NR; idx++) { + printf("%s ", $idx) + } + printf("
\n") + } + next +} +# Continuation line: print it +{ + printf("%s
\n", $0) +} +END { + printf("\n") +} +' < $infile + +# exit normally +exit 0