OCR: small adjustments for Windows

2020-02-28 09:22:03 +01:00 · 2020-02-28 09:22:03 +01:00 · 1fb9421163
commit 1fb9421163
parent 34a20e561a
3 changed files with 37 additions and 13 deletions
--- a/src/filters/rclocr.py
+++ b/src/filters/rclocr.py
@ -31,13 +31,32 @@ import importlib.util
 import rclconfig
 import rclocrcache
 _mswindows = (sys.platform == "win32")
 def _deb(s):
-    print("rclocr: %s" % s, file=sys.stderr)
+    if not _mswindows:
        print("rclocr: %s" % s, file=sys.stderr)
 def Usage():
    _deb("Usage: rclocr.py <imagefilename>")
    sys.exit(1)
 def breakwrite(f, data):
    # On Windows, writing big chunks can fail with a "not enough space"
    # error. Seems a combined windows/python bug, depending on versions.
    # See https://bugs.python.org/issue11395
    # In any case, just break it up
    total = len(data)
    bs = 4*1024
    offset = 0
    while total > 0:
        if total < bs:
            tow = total
        else:
            tow = bs
        f.write(data[offset:offset+tow])
        offset += tow
        total -= tow
 if len(sys.argv) != 2:
    Usage()
@ -50,7 +69,11 @@ cache = rclocrcache.OCRCache(config)
 incache, data = cache.get(path)
 if incache:
-    sys.stdout.buffer.write(data)
+    try:
        breakwrite(sys.stdout.buffer, data)
    except Exception as e:
        _deb("RCLOCR error writing: %s" % e)
        sys.exit(1)
    sys.exit(0)
 #### Data not in cache
--- a/src/filters/rclpdf.py
+++ b/src/filters/rclpdf.py
@ -410,7 +410,8 @@ class PDFExtractor:
                           self.filename]
                    data = subprocess.check_output(cmd)
                    html = _htmlprefix + self.em.htmlescape(data) + _htmlsuffix
-                except:
+                except Exception as e:
                    self.em.rclog("%s failed: %s" % (cmd, e))
                    pass
        if self.extrameta:
--- a/src/python/recoll/recoll/conftree.py
+++ b/src/python/recoll/recoll/conftree.py
@ -195,22 +195,22 @@ class ConfTree(ConfSimple):
            raise TypeError("getbin: parameters must be binary not unicode")
        #_debug("ConfTree::getbin: nm [%s] sk [%s]" % (nm, sk))
-        if sk == b'' or sk[0] != b'/'[0]:
+        # Note the test for root. There does not seem to be a direct
        # way to do this in os.path
        if not sk:
            return ConfSimple.getbin(self, nm, sk)
        if sk[len(sk)-1] == b'/'[0]:
             sk = sk[:len(sk)-1]
        # Try all sk ancestors as submaps (/a/b/c-> /a/b/c, /a/b, /a, b'')
-        while sk:
+        while True:
            if sk in self.submaps:
                return ConfSimple.getbin(self, nm, sk)
            if sk + b'/' in self.submaps:
-                return ConfSimple.getbin(self, nm, sk+b'/')
+                return ConfSimple.getbin(self, nm, sk + b'/')
-            i = sk.rfind(b'/')
+            nsk = os.path.dirname(sk)
-            if i == -1:
+            if nsk == sk:
-                break
+                # sk was already root, we're done. 
-            sk = sk[:i]
+                break;
            sk = nsk
        return ConfSimple.getbin(self, nm)