diff --git a/src/filters/rclpurple b/src/filters/rclpurple index 39a8a784..b991e78b 100755 --- a/src/filters/rclpurple +++ b/src/filters/rclpurple @@ -82,41 +82,79 @@ umask 77 # !! Leave the following line unmodified ! #ENDRECFILTCOMMONCODE -checkcmds awk iconv +checkcmds awk awk ' # First line: parse from, to , output html header NR == 1 { - if (NF != 13) { + if (NF != 14 && NF != 13 && NF != 9) { printf("Bad format: (NF %d) %s\n", NF, $0) exit 1 } to = $3 - from = $12 - proto = $13 - date = $5 " " $6 " " $7 " " $8 " " $9 " " $10 - #printf("from [%s] to [%s] proto [%s] date [%s]\n", from, to, proto, date) + if (NF == 14 || NF == 13) { + mon_i["Jan"] = "01" + mon_i["Feb"] = "02" + mon_i["Mar"] = "03" + mon_i["Apr"] = "04" + mon_i["May"] = "05" + mon_i["Jun"] = "06" + mon_i["Jul"] = "07" + mon_i["Aug"] = "08" + mon_i["Sep"] = "09" + mon_i["Oct"] = "10" + mon_i["Nov"] = "11" + mon_i["Dec"] = "12" + date = $8 "-" mon_i[$7] "-" $6 "T" $9 + if (NF == 14) { + from = $13 + } + if (NF == 13) { + from = $12 + } + } + + if (NF == 9) { + from = $8 + date = $5 + } + + #printf("from [%s] to [%s] date [%s]\n", from, to, date) + print "
" print ""
+
+ if (ENVIRON["RECOLL_FILTER_FORPREVIEW"] == "yes") {
+ printf("%s\n", $0)
+ }
+
+ # Remember who the main persons are. This is so that we output
+ # them once while indexing the conversation body, to avoid giving
+ # excessive weight by repeated indexing to the term.
authors[from] = "yes"
authors[to] = "yes"
next
}
-# Message first line. We strip from/to and time when indexing
+
/^\([0-2][0-9]:[0-5][0-9]:[0-5][0-9]\)/ {
+ # Conversation element 1st line. We strip from/to (except 1st
+ # occurrence) and time when indexing. Time is not interesting and
+ # repeated from/to indexing would give excessive weight
if (ENVIRON["RECOLL_FILTER_FORPREVIEW"] == "yes") {
- # Preview: output everything
- print $0 " " "
"
+ # Preview: output everything
+ print $0
} else {
- # Index: output only text, except each new author once
- #printf("INDEX: NF %d [%s] [%s] [%s] ", NF, $1, $2, $3);
+ # Index: output only text, except each new author once. Unfortunately,
+ # it is too late to add them to the "author" field.
from = $2
sub(":$", "", from);
if (authors[from] == "") {
@@ -126,16 +164,16 @@ NR == 1 {
for (idx = 3; idx <= NR; idx++) {
printf("%s ", $idx)
}
- printf("
\n")
+ printf("\n")
}
next
}
-# Continuation line: print it
+# Conversation element continuation line: print it
{
- printf("%s
\n", $0)
+ printf("%s\n", $0)
}
END {
- printf("\n")
+ printf("