OCR: small adjustments for Windows
This commit is contained in:
parent
34a20e561a
commit
1fb9421163
@ -31,13 +31,32 @@ import importlib.util
|
||||
import rclconfig
|
||||
import rclocrcache
|
||||
|
||||
_mswindows = (sys.platform == "win32")
|
||||
def _deb(s):
|
||||
print("rclocr: %s" % s, file=sys.stderr)
|
||||
if not _mswindows:
|
||||
print("rclocr: %s" % s, file=sys.stderr)
|
||||
|
||||
def Usage():
|
||||
_deb("Usage: rclocr.py <imagefilename>")
|
||||
sys.exit(1)
|
||||
|
||||
def breakwrite(f, data):
|
||||
# On Windows, writing big chunks can fail with a "not enough space"
|
||||
# error. Seems a combined windows/python bug, depending on versions.
|
||||
# See https://bugs.python.org/issue11395
|
||||
# In any case, just break it up
|
||||
total = len(data)
|
||||
bs = 4*1024
|
||||
offset = 0
|
||||
while total > 0:
|
||||
if total < bs:
|
||||
tow = total
|
||||
else:
|
||||
tow = bs
|
||||
f.write(data[offset:offset+tow])
|
||||
offset += tow
|
||||
total -= tow
|
||||
|
||||
if len(sys.argv) != 2:
|
||||
Usage()
|
||||
|
||||
@ -50,7 +69,11 @@ cache = rclocrcache.OCRCache(config)
|
||||
|
||||
incache, data = cache.get(path)
|
||||
if incache:
|
||||
sys.stdout.buffer.write(data)
|
||||
try:
|
||||
breakwrite(sys.stdout.buffer, data)
|
||||
except Exception as e:
|
||||
_deb("RCLOCR error writing: %s" % e)
|
||||
sys.exit(1)
|
||||
sys.exit(0)
|
||||
|
||||
#### Data not in cache
|
||||
|
||||
@ -410,7 +410,8 @@ class PDFExtractor:
|
||||
self.filename]
|
||||
data = subprocess.check_output(cmd)
|
||||
html = _htmlprefix + self.em.htmlescape(data) + _htmlsuffix
|
||||
except:
|
||||
except Exception as e:
|
||||
self.em.rclog("%s failed: %s" % (cmd, e))
|
||||
pass
|
||||
|
||||
if self.extrameta:
|
||||
|
||||
@ -195,22 +195,22 @@ class ConfTree(ConfSimple):
|
||||
raise TypeError("getbin: parameters must be binary not unicode")
|
||||
#_debug("ConfTree::getbin: nm [%s] sk [%s]" % (nm, sk))
|
||||
|
||||
if sk == b'' or sk[0] != b'/'[0]:
|
||||
# Note the test for root. There does not seem to be a direct
|
||||
# way to do this in os.path
|
||||
if not sk:
|
||||
return ConfSimple.getbin(self, nm, sk)
|
||||
|
||||
if sk[len(sk)-1] == b'/'[0]:
|
||||
sk = sk[:len(sk)-1]
|
||||
|
||||
# Try all sk ancestors as submaps (/a/b/c-> /a/b/c, /a/b, /a, b'')
|
||||
while sk:
|
||||
while True:
|
||||
if sk in self.submaps:
|
||||
return ConfSimple.getbin(self, nm, sk)
|
||||
if sk + b'/' in self.submaps:
|
||||
return ConfSimple.getbin(self, nm, sk+b'/')
|
||||
i = sk.rfind(b'/')
|
||||
if i == -1:
|
||||
break
|
||||
sk = sk[:i]
|
||||
return ConfSimple.getbin(self, nm, sk + b'/')
|
||||
nsk = os.path.dirname(sk)
|
||||
if nsk == sk:
|
||||
# sk was already root, we're done.
|
||||
break;
|
||||
sk = nsk
|
||||
|
||||
return ConfSimple.getbin(self, nm)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user