pdf: fix test on pdfocr config value

This commit is contained in:
Jean-Francois Dockes 2019-10-11 12:05:26 +02:00
parent 9f1d482564
commit f66b5d1ef9
2 changed files with 23 additions and 3 deletions

View File

@ -48,6 +48,21 @@ def subprocfile(fn):
else: else:
return fn return fn
def configparamtrue(value):
if not value:
return False
try:
ivalue = int(value)
if ivalue:
return True
else:
return False
except:
pass
if value[0] in 'tT':
return True
return False
my_config = rclconfig.RclConfig() my_config = rclconfig.RclConfig()
############################################ ############################################

View File

@ -138,6 +138,7 @@ class PDFExtractor:
self.attextractdone = False self.attextractdone = False
self.attachlist = [] self.attachlist = []
cf_attach = self.config.getConfParam("pdfattach") cf_attach = self.config.getConfParam("pdfattach")
cf_attach = rclexecm.configparamtrue(cf_attach)
if cf_attach: if cf_attach:
self.pdftk = rclexecm.which("pdftk") self.pdftk = rclexecm.which("pdftk")
if self.pdftk: if self.pdftk:
@ -494,18 +495,22 @@ class PDFExtractor:
if isempty and self.ocrpossible: if isempty and self.ocrpossible:
self.config.setKeyDir(os.path.dirname(self.filename)) self.config.setKeyDir(os.path.dirname(self.filename))
cf_doocr = self.config.getConfParam("pdfocr") s = self.config.getConfParam("pdfocr")
if cf_doocr or os.path.isfile(os.path.join(self.confdir, "ocrpdf")): cf_doocr = rclexecm.configparamtrue(s)
file_doocr = os.path.isfile(os.path.join(self.confdir, "ocrpdf"))
if cf_doocr or file_doocr:
html = self.ocrpdf() html = self.ocrpdf()
if self.extrameta: if self.extrameta:
try: try:
html = self._setextrameta(html) html = self._setextrameta(html)
except Exception as err: except Exception as err:
self.em.rclog("Metadata extraction failed: %s %s" % (err, traceback.format_exc())) self.em.rclog("Metadata extraction failed: %s %s" %
(err, traceback.format_exc()))
return (True, html, "", eof) return (True, html, "", eof)
def maybemaketmpdir(self): def maybemaketmpdir(self):
global tmpdir global tmpdir
if tmpdir: if tmpdir: