PST : account for badly formed headers

This commit is contained in:
Jean-Francois Dockes 2021-10-21 20:42:27 +02:00
parent 1d158f329a
commit 58d98b5626

View File

@ -28,12 +28,14 @@ import os
import pathlib import pathlib
import email.parser import email.parser
import email.policy import email.policy
import email.message
import mailbox import mailbox
import subprocess import subprocess
import rclexecm import rclexecm
import rclconfig import rclconfig
import conftree import conftree
import base64 import base64
import traceback
_mswindows = (sys.platform == "win32" or sys.platform == "msys") _mswindows = (sys.platform == "win32" or sys.platform == "msys")
if _mswindows: if _mswindows:
@ -95,14 +97,26 @@ class EmailBuilder(object):
newmsg = email.message.EmailMessage(policy=email.policy.default) newmsg = email.message.EmailMessage(policy=email.policy.default)
headerstr = self.headers.decode("UTF-8", errors='replace') headerstr = self.headers.decode("UTF-8", errors='replace')
# print("%s" % headerstr) # print("%s" % headerstr)
headers = self.parser.parsestr(headerstr, headersonly=True) try:
headers = self.parser.parsestr(headerstr, headersonly=True)
except:
# This sometimes fails, for example with 'day is out of range for month'. Try to go on
# without headers
headers = email.message.EmailMessage()
#self.log("EmailBuilder: content-type %s" % headers['content-type']) #self.log("EmailBuilder: content-type %s" % headers['content-type'])
for nm in ('from', 'subject', 'date'): for nm in ('from', 'subject', 'date'):
if nm in headers: if nm in headers:
newmsg.add_header(nm, headers[nm]) try:
newmsg.add_header(nm, headers[nm])
except:
pass
for h in ('to', 'cc'): for h in ('to', 'cc'):
tolist = headers.get_all(h) try:
tolist = headers.get_all(h)
except:
tolist = []
if not tolist: if not tolist:
continue continue
alldests = "" alldests = ""
@ -113,7 +127,10 @@ class EmailBuilder(object):
alldests += sd + ", " alldests += sd + ", "
if alldests: if alldests:
alldests = alldests.rstrip(", ") alldests = alldests.rstrip(", ")
newmsg.add_header(h, alldests) try:
newmsg.add_header(h, alldests)
except:
pass
# Decoding the body: the .pst contains the text value decoded from qp # Decoding the body: the .pst contains the text value decoded from qp
# or base64 (at least that's what libpff sends). Unfortunately, it # or base64 (at least that's what libpff sends). Unfortunately, it
@ -382,6 +399,7 @@ class PstExtractor(object):
return(False, "", "", rclexecm.RclExecM.eofnow) return(False, "", "", rclexecm.RclExecM.eofnow)
except Exception as ex: except Exception as ex:
self.em.rclog("getnext: exception: %s" % ex) self.em.rclog("getnext: exception: %s" % ex)
traceback.print_exc()
return(False, "", "", rclexecm.RclExecM.eofnow) return(False, "", "", rclexecm.RclExecM.eofnow)
return (True, doc, ipath, rclexecm.RclExecM.noteof) return (True, doc, ipath, rclexecm.RclExecM.noteof)