diff --git a/src/filters/rclpdf.py b/src/filters/rclpdf.py index cb95c11e..5471a40b 100755 --- a/src/filters/rclpdf.py +++ b/src/filters/rclpdf.py @@ -469,7 +469,40 @@ class PDFExtractor: if annotsfield: self.em.setfield("pdfannot", annotsfield) return html - + + def _patch_meta(self, html): + '''This fixes https://gitlab.freedesktop.org/poppler/poppler/-/issues/136''' + + if not _mswindows: + pdfinfo = rclexecm.which("pdfinfo") + if not pdfinfo: + pdfinfo = rclexecm.which("poppler/pdfinfo") + + if not pdfinfo: + return html + + info = subprocess.check_output([pdfinfo, '-isodates', self.filename]) + + meta = {} + for line in info.split(b'\n'): + try: + key, value = line.strip().split(b':', 1) + meta[key.strip()] = value.strip().replace(b'"', b'\"') + except ValueError: + pass + + if b'CreationDate' in meta: + meta[b'date'] = meta[b'CreationDate'] + + title = meta.get('Title') + head = [b'