From f66b5d1ef985dcff2a21df5f89f4e22a99b8f1a9 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Fri, 11 Oct 2019 12:05:26 +0200 Subject: [PATCH] pdf: fix test on pdfocr config value --- src/filters/rclexecm.py | 15 +++++++++++++++ src/filters/rclpdf.py | 11 ++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/filters/rclexecm.py b/src/filters/rclexecm.py index e889eab6..cb83a325 100644 --- a/src/filters/rclexecm.py +++ b/src/filters/rclexecm.py @@ -48,6 +48,21 @@ def subprocfile(fn): else: return fn +def configparamtrue(value): + if not value: + return False + try: + ivalue = int(value) + if ivalue: + return True + else: + return False + except: + pass + if value[0] in 'tT': + return True + return False + my_config = rclconfig.RclConfig() ############################################ diff --git a/src/filters/rclpdf.py b/src/filters/rclpdf.py index 917ae325..86a0980d 100755 --- a/src/filters/rclpdf.py +++ b/src/filters/rclpdf.py @@ -138,6 +138,7 @@ class PDFExtractor: self.attextractdone = False self.attachlist = [] cf_attach = self.config.getConfParam("pdfattach") + cf_attach = rclexecm.configparamtrue(cf_attach) if cf_attach: self.pdftk = rclexecm.which("pdftk") if self.pdftk: @@ -494,18 +495,22 @@ class PDFExtractor: if isempty and self.ocrpossible: self.config.setKeyDir(os.path.dirname(self.filename)) - cf_doocr = self.config.getConfParam("pdfocr") - if cf_doocr or os.path.isfile(os.path.join(self.confdir, "ocrpdf")): + s = self.config.getConfParam("pdfocr") + cf_doocr = rclexecm.configparamtrue(s) + file_doocr = os.path.isfile(os.path.join(self.confdir, "ocrpdf")) + if cf_doocr or file_doocr: html = self.ocrpdf() if self.extrameta: try: html = self._setextrameta(html) except Exception as err: - self.em.rclog("Metadata extraction failed: %s %s" % (err, traceback.format_exc())) + self.em.rclog("Metadata extraction failed: %s %s" % + (err, traceback.format_exc())) return (True, html, "", eof) + def maybemaketmpdir(self): global tmpdir if tmpdir: