Merge branch fixocrtmp
This commit is contained in:
commit
d942b23c85
@ -26,6 +26,8 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import atexit
|
||||||
|
import signal
|
||||||
import importlib.util
|
import importlib.util
|
||||||
|
|
||||||
import rclconfig
|
import rclconfig
|
||||||
@ -33,7 +35,27 @@ import rclocrcache
|
|||||||
import rclexecm
|
import rclexecm
|
||||||
|
|
||||||
def _deb(s):
|
def _deb(s):
|
||||||
rclexecm.logmsg(s)
|
rclexecm.logmsg("rclocr: %s" % s)
|
||||||
|
|
||||||
|
ocrcleanupmodule = None
|
||||||
|
@atexit.register
|
||||||
|
def finalcleanup():
|
||||||
|
if ocrcleanupmodule:
|
||||||
|
ocrcleanupmodule.cleanocr()
|
||||||
|
|
||||||
|
def signal_handler(sig, frame):
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Not all signals necessary exist on all systems, use catch
|
||||||
|
try: signal.signal(signal.SIGHUP, signal_handler)
|
||||||
|
except: pass
|
||||||
|
try: signal.signal(signal.SIGINT, signal_handler)
|
||||||
|
except: pass
|
||||||
|
try: signal.signal(signal.SIGQUIT, signal_handler)
|
||||||
|
except: pass
|
||||||
|
try: signal.signal(signal.SIGTERM, signal_handler)
|
||||||
|
except: pass
|
||||||
|
|
||||||
|
|
||||||
def Usage():
|
def Usage():
|
||||||
_deb("Usage: rclocr.py <imagefilename>")
|
_deb("Usage: rclocr.py <imagefilename>")
|
||||||
@ -72,7 +94,7 @@ if incache:
|
|||||||
try:
|
try:
|
||||||
breakwrite(sys.stdout.buffer, data)
|
breakwrite(sys.stdout.buffer, data)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
_deb("RCLOCR error writing: %s" % e)
|
_deb("error writing: %s" % e)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
@ -112,6 +134,7 @@ if not ok:
|
|||||||
|
|
||||||
# The OCR module will retrieve its specific parameters from the
|
# The OCR module will retrieve its specific parameters from the
|
||||||
# configuration
|
# configuration
|
||||||
|
ocrcleanupmodule = ocr
|
||||||
status, data = ocr.runocr(config, path)
|
status, data = ocr.runocr(config, path)
|
||||||
|
|
||||||
if not status:
|
if not status:
|
||||||
|
|||||||
@ -42,6 +42,9 @@ abbyocrdir = ""
|
|||||||
def _deb(s):
|
def _deb(s):
|
||||||
rclexecm.logmsg(s)
|
rclexecm.logmsg(s)
|
||||||
|
|
||||||
|
def cleanocr():
|
||||||
|
pass
|
||||||
|
|
||||||
# Return true if abbyy appears to be available
|
# Return true if abbyy appears to be available
|
||||||
def ocrpossible(config, path):
|
def ocrpossible(config, path):
|
||||||
global abbyyocrcmd
|
global abbyyocrcmd
|
||||||
|
|||||||
@ -21,7 +21,6 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import atexit
|
|
||||||
import tempfile
|
import tempfile
|
||||||
import subprocess
|
import subprocess
|
||||||
import glob
|
import glob
|
||||||
@ -41,8 +40,7 @@ pdftoppmcmd = None
|
|||||||
|
|
||||||
|
|
||||||
def _deb(s):
|
def _deb(s):
|
||||||
rclexecm.logmsg(s)
|
rclexecm.logmsg("rclocrtesseract: %s" % s)
|
||||||
|
|
||||||
|
|
||||||
def vacuumdir(dir):
|
def vacuumdir(dir):
|
||||||
if dir:
|
if dir:
|
||||||
@ -61,18 +59,16 @@ def _maybemaketmpdir():
|
|||||||
_deb("openfile: vacuumdir %s failed" % tmpdir)
|
_deb("openfile: vacuumdir %s failed" % tmpdir)
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
tmpdir = tempfile.mkdtemp(prefix='rclmpdf')
|
tmpdir = tempfile.mkdtemp(prefix='rclocrtmp')
|
||||||
|
|
||||||
|
|
||||||
def finalcleanup():
|
def cleanocr():
|
||||||
|
global tmpdir
|
||||||
if tmpdir:
|
if tmpdir:
|
||||||
vacuumdir(tmpdir)
|
vacuumdir(tmpdir)
|
||||||
os.rmdir(tmpdir)
|
os.rmdir(tmpdir)
|
||||||
|
tmpdir = None
|
||||||
|
|
||||||
atexit.register(finalcleanup)
|
|
||||||
|
|
||||||
|
|
||||||
# Return true if tesseract and the appropriate conversion program for
|
# Return true if tesseract and the appropriate conversion program for
|
||||||
# the file type (e.g. pdftoppt for pdf) appear to be available
|
# the file type (e.g. pdftoppt for pdf) appear to be available
|
||||||
def ocrpossible(config, path):
|
def ocrpossible(config, path):
|
||||||
|
|||||||
@ -33,6 +33,7 @@ import glob
|
|||||||
import traceback
|
import traceback
|
||||||
import atexit
|
import atexit
|
||||||
import signal
|
import signal
|
||||||
|
import time
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
import rclconfig
|
import rclconfig
|
||||||
@ -66,11 +67,18 @@ _htmlprefix =b'''<html><head>
|
|||||||
_htmlsuffix = b'''</pre></body></html>'''
|
_htmlsuffix = b'''</pre></body></html>'''
|
||||||
|
|
||||||
def finalcleanup():
|
def finalcleanup():
|
||||||
|
global tmpdir
|
||||||
if tmpdir:
|
if tmpdir:
|
||||||
vacuumdir(tmpdir)
|
vacuumdir(tmpdir)
|
||||||
os.rmdir(tmpdir)
|
os.rmdir(tmpdir)
|
||||||
|
tmpdir = None
|
||||||
|
|
||||||
|
ocrproc = None
|
||||||
def signal_handler(signal, frame):
|
def signal_handler(signal, frame):
|
||||||
|
global ocrproc
|
||||||
|
if ocrproc:
|
||||||
|
ocrproc.wait()
|
||||||
|
ocrproc = None
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
atexit.register(finalcleanup)
|
atexit.register(finalcleanup)
|
||||||
@ -491,9 +499,11 @@ class PDFExtractor:
|
|||||||
s = self.config.getConfParam("pdfocr")
|
s = self.config.getConfParam("pdfocr")
|
||||||
if rclexecm.configparamtrue(s):
|
if rclexecm.configparamtrue(s):
|
||||||
try:
|
try:
|
||||||
cmd = [sys.executable, os.path.join(_execdir, "rclocr.py"),
|
cmd = [sys.executable, os.path.join(_execdir, "rclocr.py"), self.filename]
|
||||||
self.filename]
|
global ocrproc
|
||||||
data = subprocess.check_output(cmd)
|
ocrproc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
|
||||||
|
data, stderr = ocrproc.communicate()
|
||||||
|
ocrproc = None
|
||||||
html = _htmlprefix + rclexecm.htmlescape(data) + _htmlsuffix
|
html = _htmlprefix + rclexecm.htmlescape(data) + _htmlsuffix
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.em.rclog("%s failed: %s" % (cmd, e))
|
self.em.rclog("%s failed: %s" % (cmd, e))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user