Merge branch fixocrtmp

This commit is contained in:
Jean-Francois Dockes 2021-12-03 10:53:51 +01:00
commit d942b23c85
4 changed files with 47 additions and 15 deletions

View File

@ -26,6 +26,8 @@
import os import os
import sys import sys
import atexit
import signal
import importlib.util import importlib.util
import rclconfig import rclconfig
@ -33,7 +35,27 @@ import rclocrcache
import rclexecm import rclexecm
def _deb(s): def _deb(s):
rclexecm.logmsg(s) rclexecm.logmsg("rclocr: %s" % s)
ocrcleanupmodule = None
@atexit.register
def finalcleanup():
if ocrcleanupmodule:
ocrcleanupmodule.cleanocr()
def signal_handler(sig, frame):
sys.exit(1)
# Not all signals necessary exist on all systems, use catch
try: signal.signal(signal.SIGHUP, signal_handler)
except: pass
try: signal.signal(signal.SIGINT, signal_handler)
except: pass
try: signal.signal(signal.SIGQUIT, signal_handler)
except: pass
try: signal.signal(signal.SIGTERM, signal_handler)
except: pass
def Usage(): def Usage():
_deb("Usage: rclocr.py <imagefilename>") _deb("Usage: rclocr.py <imagefilename>")
@ -72,7 +94,7 @@ if incache:
try: try:
breakwrite(sys.stdout.buffer, data) breakwrite(sys.stdout.buffer, data)
except Exception as e: except Exception as e:
_deb("RCLOCR error writing: %s" % e) _deb("error writing: %s" % e)
sys.exit(1) sys.exit(1)
sys.exit(0) sys.exit(0)
@ -112,6 +134,7 @@ if not ok:
# The OCR module will retrieve its specific parameters from the # The OCR module will retrieve its specific parameters from the
# configuration # configuration
ocrcleanupmodule = ocr
status, data = ocr.runocr(config, path) status, data = ocr.runocr(config, path)
if not status: if not status:

View File

@ -42,6 +42,9 @@ abbyocrdir = ""
def _deb(s): def _deb(s):
rclexecm.logmsg(s) rclexecm.logmsg(s)
def cleanocr():
pass
# Return true if abbyy appears to be available # Return true if abbyy appears to be available
def ocrpossible(config, path): def ocrpossible(config, path):
global abbyyocrcmd global abbyyocrcmd

View File

@ -21,7 +21,6 @@
import os import os
import sys import sys
import atexit
import tempfile import tempfile
import subprocess import subprocess
import glob import glob
@ -41,8 +40,7 @@ pdftoppmcmd = None
def _deb(s): def _deb(s):
rclexecm.logmsg(s) rclexecm.logmsg("rclocrtesseract: %s" % s)
def vacuumdir(dir): def vacuumdir(dir):
if dir: if dir:
@ -61,18 +59,16 @@ def _maybemaketmpdir():
_deb("openfile: vacuumdir %s failed" % tmpdir) _deb("openfile: vacuumdir %s failed" % tmpdir)
return False return False
else: else:
tmpdir = tempfile.mkdtemp(prefix='rclmpdf') tmpdir = tempfile.mkdtemp(prefix='rclocrtmp')
def finalcleanup(): def cleanocr():
global tmpdir
if tmpdir: if tmpdir:
vacuumdir(tmpdir) vacuumdir(tmpdir)
os.rmdir(tmpdir) os.rmdir(tmpdir)
tmpdir = None
atexit.register(finalcleanup)
# Return true if tesseract and the appropriate conversion program for # Return true if tesseract and the appropriate conversion program for
# the file type (e.g. pdftoppt for pdf) appear to be available # the file type (e.g. pdftoppt for pdf) appear to be available
def ocrpossible(config, path): def ocrpossible(config, path):

View File

@ -33,6 +33,7 @@ import glob
import traceback import traceback
import atexit import atexit
import signal import signal
import time
import rclexecm import rclexecm
import rclconfig import rclconfig
@ -66,11 +67,18 @@ _htmlprefix =b'''<html><head>
_htmlsuffix = b'''</pre></body></html>''' _htmlsuffix = b'''</pre></body></html>'''
def finalcleanup(): def finalcleanup():
global tmpdir
if tmpdir: if tmpdir:
vacuumdir(tmpdir) vacuumdir(tmpdir)
os.rmdir(tmpdir) os.rmdir(tmpdir)
tmpdir = None
ocrproc = None
def signal_handler(signal, frame): def signal_handler(signal, frame):
global ocrproc
if ocrproc:
ocrproc.wait()
ocrproc = None
sys.exit(1) sys.exit(1)
atexit.register(finalcleanup) atexit.register(finalcleanup)
@ -491,9 +499,11 @@ class PDFExtractor:
s = self.config.getConfParam("pdfocr") s = self.config.getConfParam("pdfocr")
if rclexecm.configparamtrue(s): if rclexecm.configparamtrue(s):
try: try:
cmd = [sys.executable, os.path.join(_execdir, "rclocr.py"), cmd = [sys.executable, os.path.join(_execdir, "rclocr.py"), self.filename]
self.filename] global ocrproc
data = subprocess.check_output(cmd) ocrproc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
data, stderr = ocrproc.communicate()
ocrproc = None
html = _htmlprefix + rclexecm.htmlescape(data) + _htmlsuffix html = _htmlprefix + rclexecm.htmlescape(data) + _htmlsuffix
except Exception as e: except Exception as e:
self.em.rclog("%s failed: %s" % (cmd, e)) self.em.rclog("%s failed: %s" % (cmd, e))