Merge branch fixocrtmp

This commit is contained in:
Jean-Francois Dockes 2021-12-03 10:53:51 +01:00
commit d942b23c85
4 changed files with 47 additions and 15 deletions

View File

@ -26,6 +26,8 @@
import os
import sys
import atexit
import signal
import importlib.util
import rclconfig
@ -33,7 +35,27 @@ import rclocrcache
import rclexecm
def _deb(s):
rclexecm.logmsg(s)
rclexecm.logmsg("rclocr: %s" % s)
ocrcleanupmodule = None
@atexit.register
def finalcleanup():
if ocrcleanupmodule:
ocrcleanupmodule.cleanocr()
def signal_handler(sig, frame):
sys.exit(1)
# Not all signals necessary exist on all systems, use catch
try: signal.signal(signal.SIGHUP, signal_handler)
except: pass
try: signal.signal(signal.SIGINT, signal_handler)
except: pass
try: signal.signal(signal.SIGQUIT, signal_handler)
except: pass
try: signal.signal(signal.SIGTERM, signal_handler)
except: pass
def Usage():
_deb("Usage: rclocr.py <imagefilename>")
@ -72,7 +94,7 @@ if incache:
try:
breakwrite(sys.stdout.buffer, data)
except Exception as e:
_deb("RCLOCR error writing: %s" % e)
_deb("error writing: %s" % e)
sys.exit(1)
sys.exit(0)
@ -112,6 +134,7 @@ if not ok:
# The OCR module will retrieve its specific parameters from the
# configuration
ocrcleanupmodule = ocr
status, data = ocr.runocr(config, path)
if not status:

View File

@ -42,6 +42,9 @@ abbyocrdir = ""
def _deb(s):
rclexecm.logmsg(s)
def cleanocr():
pass
# Return true if abbyy appears to be available
def ocrpossible(config, path):
global abbyyocrcmd

View File

@ -21,7 +21,6 @@
import os
import sys
import atexit
import tempfile
import subprocess
import glob
@ -41,8 +40,7 @@ pdftoppmcmd = None
def _deb(s):
rclexecm.logmsg(s)
rclexecm.logmsg("rclocrtesseract: %s" % s)
def vacuumdir(dir):
if dir:
@ -61,18 +59,16 @@ def _maybemaketmpdir():
_deb("openfile: vacuumdir %s failed" % tmpdir)
return False
else:
tmpdir = tempfile.mkdtemp(prefix='rclmpdf')
tmpdir = tempfile.mkdtemp(prefix='rclocrtmp')
def finalcleanup():
def cleanocr():
global tmpdir
if tmpdir:
vacuumdir(tmpdir)
os.rmdir(tmpdir)
atexit.register(finalcleanup)
tmpdir = None
# Return true if tesseract and the appropriate conversion program for
# the file type (e.g. pdftoppt for pdf) appear to be available
def ocrpossible(config, path):

View File

@ -33,6 +33,7 @@ import glob
import traceback
import atexit
import signal
import time
import rclexecm
import rclconfig
@ -66,11 +67,18 @@ _htmlprefix =b'''<html><head>
_htmlsuffix = b'''</pre></body></html>'''
def finalcleanup():
global tmpdir
if tmpdir:
vacuumdir(tmpdir)
os.rmdir(tmpdir)
tmpdir = None
ocrproc = None
def signal_handler(signal, frame):
global ocrproc
if ocrproc:
ocrproc.wait()
ocrproc = None
sys.exit(1)
atexit.register(finalcleanup)
@ -491,9 +499,11 @@ class PDFExtractor:
s = self.config.getConfParam("pdfocr")
if rclexecm.configparamtrue(s):
try:
cmd = [sys.executable, os.path.join(_execdir, "rclocr.py"),
self.filename]
data = subprocess.check_output(cmd)
cmd = [sys.executable, os.path.join(_execdir, "rclocr.py"), self.filename]
global ocrproc
ocrproc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
data, stderr = ocrproc.communicate()
ocrproc = None
html = _htmlprefix + rclexecm.htmlescape(data) + _htmlsuffix
except Exception as e:
self.em.rclog("%s failed: %s" % (cmd, e))