From f0abc1df684e8eb56202adcf84826b06725e74a6 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Wed, 4 Nov 2020 14:33:55 +0100 Subject: [PATCH] pdf: discard pdftk stdout message "Error occurred during initialization of VM", it breaks pdf indexing when it occurs --- src/filters/rclpdf.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/filters/rclpdf.py b/src/filters/rclpdf.py index dddc38dd..14f73938 100755 --- a/src/filters/rclpdf.py +++ b/src/filters/rclpdf.py @@ -209,8 +209,16 @@ class PDFExtractor: return True try: vacuumdir(tmpdir) - subprocess.check_call([self.pdftk, self.filename, "unpack_files", - "output", tmpdir]) + # Note: the java version of pdftk sometimes/often fails + # here with writing to stdout: "Error occurred during + # initialization of VM". Maybe unsufficient resources when + # execd from Python ? In any case, the important thing is + # to discard the output, until we fix the error or + # preferably find a way to do it with poppler... + with open(os.devnull, 'w') as FNULL: + subprocess.check_call( + [self.pdftk, self.filename, "unpack_files", "output", + tmpdir], stdout=FNULL) self.attachlist = sorted(os.listdir(tmpdir)) return True except Exception as e: