pdf: discard pdftk stdout message "Error occurred during initialization of VM", it breaks pdf indexing when it occurs

This commit is contained in:
Jean-Francois Dockes 2020-11-04 14:33:55 +01:00
parent f50a4e54b1
commit f0abc1df68

View File

@ -209,8 +209,16 @@ class PDFExtractor:
return True
try:
vacuumdir(tmpdir)
subprocess.check_call([self.pdftk, self.filename, "unpack_files",
"output", tmpdir])
# Note: the java version of pdftk sometimes/often fails
# here with writing to stdout: "Error occurred during
# initialization of VM". Maybe unsufficient resources when
# execd from Python ? In any case, the important thing is
# to discard the output, until we fix the error or
# preferably find a way to do it with poppler...
with open(os.devnull, 'w') as FNULL:
subprocess.check_call(
[self.pdftk, self.filename, "unpack_files", "output",
tmpdir], stdout=FNULL)
self.attachlist = sorted(os.listdir(tmpdir))
return True
except Exception as e: