OCR: small adjustments for Windows
This commit is contained in:
parent
34a20e561a
commit
1fb9421163
@ -31,13 +31,32 @@ import importlib.util
|
|||||||
import rclconfig
|
import rclconfig
|
||||||
import rclocrcache
|
import rclocrcache
|
||||||
|
|
||||||
|
_mswindows = (sys.platform == "win32")
|
||||||
def _deb(s):
|
def _deb(s):
|
||||||
print("rclocr: %s" % s, file=sys.stderr)
|
if not _mswindows:
|
||||||
|
print("rclocr: %s" % s, file=sys.stderr)
|
||||||
|
|
||||||
def Usage():
|
def Usage():
|
||||||
_deb("Usage: rclocr.py <imagefilename>")
|
_deb("Usage: rclocr.py <imagefilename>")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
def breakwrite(f, data):
|
||||||
|
# On Windows, writing big chunks can fail with a "not enough space"
|
||||||
|
# error. Seems a combined windows/python bug, depending on versions.
|
||||||
|
# See https://bugs.python.org/issue11395
|
||||||
|
# In any case, just break it up
|
||||||
|
total = len(data)
|
||||||
|
bs = 4*1024
|
||||||
|
offset = 0
|
||||||
|
while total > 0:
|
||||||
|
if total < bs:
|
||||||
|
tow = total
|
||||||
|
else:
|
||||||
|
tow = bs
|
||||||
|
f.write(data[offset:offset+tow])
|
||||||
|
offset += tow
|
||||||
|
total -= tow
|
||||||
|
|
||||||
if len(sys.argv) != 2:
|
if len(sys.argv) != 2:
|
||||||
Usage()
|
Usage()
|
||||||
|
|
||||||
@ -50,7 +69,11 @@ cache = rclocrcache.OCRCache(config)
|
|||||||
|
|
||||||
incache, data = cache.get(path)
|
incache, data = cache.get(path)
|
||||||
if incache:
|
if incache:
|
||||||
sys.stdout.buffer.write(data)
|
try:
|
||||||
|
breakwrite(sys.stdout.buffer, data)
|
||||||
|
except Exception as e:
|
||||||
|
_deb("RCLOCR error writing: %s" % e)
|
||||||
|
sys.exit(1)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
#### Data not in cache
|
#### Data not in cache
|
||||||
|
|||||||
@ -410,7 +410,8 @@ class PDFExtractor:
|
|||||||
self.filename]
|
self.filename]
|
||||||
data = subprocess.check_output(cmd)
|
data = subprocess.check_output(cmd)
|
||||||
html = _htmlprefix + self.em.htmlescape(data) + _htmlsuffix
|
html = _htmlprefix + self.em.htmlescape(data) + _htmlsuffix
|
||||||
except:
|
except Exception as e:
|
||||||
|
self.em.rclog("%s failed: %s" % (cmd, e))
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if self.extrameta:
|
if self.extrameta:
|
||||||
|
|||||||
@ -195,22 +195,22 @@ class ConfTree(ConfSimple):
|
|||||||
raise TypeError("getbin: parameters must be binary not unicode")
|
raise TypeError("getbin: parameters must be binary not unicode")
|
||||||
#_debug("ConfTree::getbin: nm [%s] sk [%s]" % (nm, sk))
|
#_debug("ConfTree::getbin: nm [%s] sk [%s]" % (nm, sk))
|
||||||
|
|
||||||
if sk == b'' or sk[0] != b'/'[0]:
|
# Note the test for root. There does not seem to be a direct
|
||||||
|
# way to do this in os.path
|
||||||
|
if not sk:
|
||||||
return ConfSimple.getbin(self, nm, sk)
|
return ConfSimple.getbin(self, nm, sk)
|
||||||
|
|
||||||
if sk[len(sk)-1] == b'/'[0]:
|
|
||||||
sk = sk[:len(sk)-1]
|
|
||||||
|
|
||||||
# Try all sk ancestors as submaps (/a/b/c-> /a/b/c, /a/b, /a, b'')
|
# Try all sk ancestors as submaps (/a/b/c-> /a/b/c, /a/b, /a, b'')
|
||||||
while sk:
|
while True:
|
||||||
if sk in self.submaps:
|
if sk in self.submaps:
|
||||||
return ConfSimple.getbin(self, nm, sk)
|
return ConfSimple.getbin(self, nm, sk)
|
||||||
if sk + b'/' in self.submaps:
|
if sk + b'/' in self.submaps:
|
||||||
return ConfSimple.getbin(self, nm, sk+b'/')
|
return ConfSimple.getbin(self, nm, sk + b'/')
|
||||||
i = sk.rfind(b'/')
|
nsk = os.path.dirname(sk)
|
||||||
if i == -1:
|
if nsk == sk:
|
||||||
break
|
# sk was already root, we're done.
|
||||||
sk = sk[:i]
|
break;
|
||||||
|
sk = nsk
|
||||||
|
|
||||||
return ConfSimple.getbin(self, nm)
|
return ConfSimple.getbin(self, nm)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user