PST : account for badly formed headers

This commit is contained in:
Jean-Francois Dockes 2021-10-21 20:42:27 +02:00
parent 1d158f329a
commit 58d98b5626

View File

@ -28,12 +28,14 @@ import os
import pathlib
import email.parser
import email.policy
import email.message
import mailbox
import subprocess
import rclexecm
import rclconfig
import conftree
import base64
import traceback
_mswindows = (sys.platform == "win32" or sys.platform == "msys")
if _mswindows:
@ -95,14 +97,26 @@ class EmailBuilder(object):
newmsg = email.message.EmailMessage(policy=email.policy.default)
headerstr = self.headers.decode("UTF-8", errors='replace')
# print("%s" % headerstr)
headers = self.parser.parsestr(headerstr, headersonly=True)
try:
headers = self.parser.parsestr(headerstr, headersonly=True)
except:
# This sometimes fails, for example with 'day is out of range for month'. Try to go on
# without headers
headers = email.message.EmailMessage()
#self.log("EmailBuilder: content-type %s" % headers['content-type'])
for nm in ('from', 'subject', 'date'):
if nm in headers:
newmsg.add_header(nm, headers[nm])
try:
newmsg.add_header(nm, headers[nm])
except:
pass
for h in ('to', 'cc'):
tolist = headers.get_all(h)
try:
tolist = headers.get_all(h)
except:
tolist = []
if not tolist:
continue
alldests = ""
@ -113,7 +127,10 @@ class EmailBuilder(object):
alldests += sd + ", "
if alldests:
alldests = alldests.rstrip(", ")
newmsg.add_header(h, alldests)
try:
newmsg.add_header(h, alldests)
except:
pass
# Decoding the body: the .pst contains the text value decoded from qp
# or base64 (at least that's what libpff sends). Unfortunately, it
@ -382,6 +399,7 @@ class PstExtractor(object):
return(False, "", "", rclexecm.RclExecM.eofnow)
except Exception as ex:
self.em.rclog("getnext: exception: %s" % ex)
traceback.print_exc()
return(False, "", "", rclexecm.RclExecM.eofnow)
return (True, doc, ipath, rclexecm.RclExecM.noteof)