OCR: small adjustments for Windows

This commit is contained in:
Jean-Francois Dockes 2020-02-28 09:22:03 +01:00
parent 34a20e561a
commit 1fb9421163
3 changed files with 37 additions and 13 deletions

View File

@ -31,13 +31,32 @@ import importlib.util
import rclconfig
import rclocrcache
_mswindows = (sys.platform == "win32")
def _deb(s):
print("rclocr: %s" % s, file=sys.stderr)
if not _mswindows:
print("rclocr: %s" % s, file=sys.stderr)
def Usage():
_deb("Usage: rclocr.py <imagefilename>")
sys.exit(1)
def breakwrite(f, data):
# On Windows, writing big chunks can fail with a "not enough space"
# error. Seems a combined windows/python bug, depending on versions.
# See https://bugs.python.org/issue11395
# In any case, just break it up
total = len(data)
bs = 4*1024
offset = 0
while total > 0:
if total < bs:
tow = total
else:
tow = bs
f.write(data[offset:offset+tow])
offset += tow
total -= tow
if len(sys.argv) != 2:
Usage()
@ -50,7 +69,11 @@ cache = rclocrcache.OCRCache(config)
incache, data = cache.get(path)
if incache:
sys.stdout.buffer.write(data)
try:
breakwrite(sys.stdout.buffer, data)
except Exception as e:
_deb("RCLOCR error writing: %s" % e)
sys.exit(1)
sys.exit(0)
#### Data not in cache

View File

@ -410,7 +410,8 @@ class PDFExtractor:
self.filename]
data = subprocess.check_output(cmd)
html = _htmlprefix + self.em.htmlescape(data) + _htmlsuffix
except:
except Exception as e:
self.em.rclog("%s failed: %s" % (cmd, e))
pass
if self.extrameta:

View File

@ -195,22 +195,22 @@ class ConfTree(ConfSimple):
raise TypeError("getbin: parameters must be binary not unicode")
#_debug("ConfTree::getbin: nm [%s] sk [%s]" % (nm, sk))
if sk == b'' or sk[0] != b'/'[0]:
# Note the test for root. There does not seem to be a direct
# way to do this in os.path
if not sk:
return ConfSimple.getbin(self, nm, sk)
if sk[len(sk)-1] == b'/'[0]:
sk = sk[:len(sk)-1]
# Try all sk ancestors as submaps (/a/b/c-> /a/b/c, /a/b, /a, b'')
while sk:
while True:
if sk in self.submaps:
return ConfSimple.getbin(self, nm, sk)
if sk + b'/' in self.submaps:
return ConfSimple.getbin(self, nm, sk+b'/')
i = sk.rfind(b'/')
if i == -1:
break
sk = sk[:i]
return ConfSimple.getbin(self, nm, sk + b'/')
nsk = os.path.dirname(sk)
if nsk == sk:
# sk was already root, we're done.
break;
sk = nsk
return ConfSimple.getbin(self, nm)