From 694d0f155d4845a5e42f94b8b4e3159a2fdad9df Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Sat, 10 Oct 2020 12:48:18 +0200 Subject: [PATCH] pdf annot: guard against possible exception while formatting results --- src/filters/rclpdf.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/filters/rclpdf.py b/src/filters/rclpdf.py index 8415735a..692f1778 100755 --- a/src/filters/rclpdf.py +++ b/src/filters/rclpdf.py @@ -427,15 +427,19 @@ class PDFExtractor: for annot_mapping in annot_mappings: atype = annot_mapping.annot.get_annot_type().value_name if atype != 'POPPLER_ANNOT_LINK': - atext = f.format( - pnum, - annot_mapping.annot.get_modified(), - annot_mapping.annot.get_annot_type().value_nick, - annot_mapping.annot.get_contents()) + "\n" - if pnum in abypage: - abypage[pnum] += atext - else: - abypage[pnum] = atext + # Catch because we sometimes get None values + try: + atext = f.format( + pnum, + annot_mapping.annot.get_modified(), + annot_mapping.annot.get_annot_type().value_nick, + annot_mapping.annot.get_contents()) + "\n" + if pnum in abypage: + abypage[pnum] += atext + else: + abypage[pnum] = atext + except: + pass #self.em.rclog("Annotations: %s" % abypage) pagevec = html.split(b"\f") html = b""