python filters: replace misc message printing with single method in rclexecm
This commit is contained in:
parent
fd5297dc73
commit
0dd609cf1a
@ -31,8 +31,26 @@ import rclconfig
|
|||||||
import cmdtalk
|
import cmdtalk
|
||||||
|
|
||||||
PY3 = (sys.version > '3')
|
PY3 = (sys.version > '3')
|
||||||
_mswindows = (sys.platform == "win32")
|
_g_mswindows = (sys.platform == "win32")
|
||||||
_execdir = os.path.dirname(sys.argv[0])
|
_g_execdir = os.path.dirname(sys.argv[0])
|
||||||
|
|
||||||
|
_g_config = rclconfig.RclConfig()
|
||||||
|
_g_debugfile = _g_config.getConfParam("filterdebuglog")
|
||||||
|
_g_errfout = None
|
||||||
|
|
||||||
|
|
||||||
|
def logmsg(msg):
|
||||||
|
global _g_debugfile, _g_errfout
|
||||||
|
if _g_debugfile and not _g_errfout:
|
||||||
|
try:
|
||||||
|
_g_errfout = open(_g_debugfile, "a")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
if _g_errfout:
|
||||||
|
print("%s" % msg, file=_g_errfout)
|
||||||
|
elif not _g_mswindows:
|
||||||
|
print("%s" % msg, file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
# Convert to bytes if not already such.
|
# Convert to bytes if not already such.
|
||||||
def makebytes(data):
|
def makebytes(data):
|
||||||
@ -40,6 +58,7 @@ def makebytes(data):
|
|||||||
return data.encode("UTF-8")
|
return data.encode("UTF-8")
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
# Possibly decode binary file name for use as subprocess argument,
|
# Possibly decode binary file name for use as subprocess argument,
|
||||||
# depending on platform.
|
# depending on platform.
|
||||||
def subprocfile(fn):
|
def subprocfile(fn):
|
||||||
@ -48,26 +67,22 @@ def subprocfile(fn):
|
|||||||
# to convert.
|
# to convert.
|
||||||
# On Unix all list elements get converted to bytes in the C
|
# On Unix all list elements get converted to bytes in the C
|
||||||
# _posixsubprocess module, nothing to do.
|
# _posixsubprocess module, nothing to do.
|
||||||
if PY3 and _mswindows and type(fn) != type(''):
|
if PY3 and _g_mswindows and type(fn) != type(''):
|
||||||
return fn.decode('UTF-8')
|
return fn.decode('UTF-8')
|
||||||
else:
|
else:
|
||||||
return fn
|
return fn
|
||||||
|
|
||||||
|
|
||||||
# Check for truthness of rclconfig value.
|
# Check for truthness of rclconfig value.
|
||||||
def configparamtrue(value):
|
def configparamtrue(value):
|
||||||
if not value:
|
if not value:
|
||||||
return False
|
return False
|
||||||
try:
|
try:
|
||||||
ivalue = int(value)
|
ivalue = int(value)
|
||||||
if ivalue:
|
return True if ivalue else False
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
except:
|
except:
|
||||||
pass
|
return True if value[0] in 'tT' else False
|
||||||
if value[0] in 'tT':
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Escape special characters in plain text for inclusion in HTML doc.
|
# Escape special characters in plain text for inclusion in HTML doc.
|
||||||
# Note: tried replacing this with a multiple replacer according to
|
# Note: tried replacing this with a multiple replacer according to
|
||||||
@ -84,8 +99,6 @@ def htmlescape(txt):
|
|||||||
return txt
|
return txt
|
||||||
|
|
||||||
|
|
||||||
my_config = rclconfig.RclConfig()
|
|
||||||
|
|
||||||
############################################
|
############################################
|
||||||
# RclExecM implements the communication protocol with the recollindex
|
# RclExecM implements the communication protocol with the recollindex
|
||||||
# process. It calls the object specific of the document type to
|
# process. It calls the object specific of the document type to
|
||||||
@ -109,7 +122,7 @@ class RclExecM(cmdtalk.CmdTalk):
|
|||||||
self.maxmembersize = self.maxmembersize * 1024
|
self.maxmembersize = self.maxmembersize * 1024
|
||||||
|
|
||||||
# Tell cmdtalk where to log
|
# Tell cmdtalk where to log
|
||||||
self.debugfile = my_config.getConfParam("filterdebuglog")
|
self.debugfile = _g_config.getConfParam("filterdebuglog")
|
||||||
# Some of our params are binary, cmdtalk should not decode them
|
# Some of our params are binary, cmdtalk should not decode them
|
||||||
self.nodecodeinput = True
|
self.nodecodeinput = True
|
||||||
|
|
||||||
@ -222,7 +235,7 @@ def which(program):
|
|||||||
|
|
||||||
def path_candidates():
|
def path_candidates():
|
||||||
yield os.path.dirname(sys.argv[0])
|
yield os.path.dirname(sys.argv[0])
|
||||||
rclpath = my_config.getConfParam("recollhelperpath")
|
rclpath = _g_config.getConfParam("recollhelperpath")
|
||||||
if rclpath:
|
if rclpath:
|
||||||
for path in rclpath.split(os.pathsep):
|
for path in rclpath.split(os.pathsep):
|
||||||
yield path
|
yield path
|
||||||
@ -244,9 +257,9 @@ def which(program):
|
|||||||
def execPythonScript(icmd):
|
def execPythonScript(icmd):
|
||||||
import subprocess
|
import subprocess
|
||||||
cmd = list(icmd)
|
cmd = list(icmd)
|
||||||
if _mswindows:
|
if _g_mswindows:
|
||||||
if not os.path.isabs(cmd[0]):
|
if not os.path.isabs(cmd[0]):
|
||||||
cmd[0] = os.path.join(_execdir, cmd[0])
|
cmd[0] = os.path.join(_g_execdir, cmd[0])
|
||||||
cmd = [sys.executable] + cmd
|
cmd = [sys.executable] + cmd
|
||||||
return subprocess.check_output(cmd)
|
return subprocess.check_output(cmd)
|
||||||
|
|
||||||
@ -347,8 +360,8 @@ def main(proto, extract):
|
|||||||
|
|
||||||
# Some filters (e.g. rclaudio) need/get a MIME type from the indexer.
|
# Some filters (e.g. rclaudio) need/get a MIME type from the indexer.
|
||||||
# We make a half-assed attempt to emulate:
|
# We make a half-assed attempt to emulate:
|
||||||
mimetype = my_config.mimeType(path)
|
mimetype = _g_config.mimeType(path)
|
||||||
if not mimetype and not _mswindows:
|
if not mimetype and not _g_mswindows:
|
||||||
mimetype = mimetype_with_file(path)
|
mimetype = mimetype_with_file(path)
|
||||||
if mimetype:
|
if mimetype:
|
||||||
params['mimetype'] = mimetype
|
params['mimetype'] = mimetype
|
||||||
|
|||||||
@ -29,9 +29,6 @@ import sys
|
|||||||
from struct import unpack, pack
|
from struct import unpack, pack
|
||||||
import six
|
import six
|
||||||
|
|
||||||
def debug(s):
|
|
||||||
print("%s"%s, file=sys.stderr)
|
|
||||||
|
|
||||||
PY3 = sys.version > '3'
|
PY3 = sys.version > '3'
|
||||||
|
|
||||||
if PY3:
|
if PY3:
|
||||||
|
|||||||
@ -30,11 +30,10 @@ import importlib.util
|
|||||||
|
|
||||||
import rclconfig
|
import rclconfig
|
||||||
import rclocrcache
|
import rclocrcache
|
||||||
|
import rclexecm
|
||||||
|
|
||||||
_mswindows = (sys.platform == "win32")
|
|
||||||
def _deb(s):
|
def _deb(s):
|
||||||
if not _mswindows:
|
rclexecm.logmsg(s)
|
||||||
print("rclocr: %s" % s, file=sys.stderr)
|
|
||||||
|
|
||||||
def Usage():
|
def Usage():
|
||||||
_deb("Usage: rclocr.py <imagefilename>")
|
_deb("Usage: rclocr.py <imagefilename>")
|
||||||
@ -57,6 +56,7 @@ def breakwrite(f, data):
|
|||||||
offset += tow
|
offset += tow
|
||||||
total -= tow
|
total -= tow
|
||||||
|
|
||||||
|
|
||||||
if len(sys.argv) != 2:
|
if len(sys.argv) != 2:
|
||||||
Usage()
|
Usage()
|
||||||
|
|
||||||
|
|||||||
@ -40,8 +40,7 @@ abbyyocrcmd = ""
|
|||||||
abbyocrdir = ""
|
abbyocrdir = ""
|
||||||
|
|
||||||
def _deb(s):
|
def _deb(s):
|
||||||
if not _mswindows:
|
rclexecm.logmsg(s)
|
||||||
print("rclocrabbyy: %s" % s, file=sys.stderr)
|
|
||||||
|
|
||||||
# Return true if abbyy appears to be available
|
# Return true if abbyy appears to be available
|
||||||
def ocrpossible(config, path):
|
def ocrpossible(config, path):
|
||||||
|
|||||||
@ -61,8 +61,11 @@ import urllib.parse
|
|||||||
import zlib
|
import zlib
|
||||||
import glob
|
import glob
|
||||||
|
|
||||||
|
import rclexecm
|
||||||
|
|
||||||
def _deb(s):
|
def _deb(s):
|
||||||
print("rclocrcache: %s" %s, file=sys.stderr)
|
rclexecm.logmsg(s)
|
||||||
|
|
||||||
|
|
||||||
class OCRCache(object):
|
class OCRCache(object):
|
||||||
def __init__(self, conf):
|
def __init__(self, conf):
|
||||||
@ -324,4 +327,3 @@ if __name__ == '__main__':
|
|||||||
# if not incache:
|
# if not incache:
|
||||||
# trystore(path)
|
# trystore(path)
|
||||||
#
|
#
|
||||||
|
|
||||||
|
|||||||
@ -39,9 +39,10 @@ _okexts = ('.tif', '.tiff', '.jpg', '.png', '.jpeg')
|
|||||||
tesseractcmd = None
|
tesseractcmd = None
|
||||||
pdftoppmcmd = None
|
pdftoppmcmd = None
|
||||||
|
|
||||||
|
|
||||||
def _deb(s):
|
def _deb(s):
|
||||||
if not _mswindows:
|
rclexecm.logmsg(s)
|
||||||
print("rclocrtesseract: %s" % s, file=sys.stderr)
|
|
||||||
|
|
||||||
def vacuumdir(dir):
|
def vacuumdir(dir):
|
||||||
if dir:
|
if dir:
|
||||||
@ -51,6 +52,7 @@ def vacuumdir(dir):
|
|||||||
os.unlink(path)
|
os.unlink(path)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
tmpdir = None
|
tmpdir = None
|
||||||
def _maybemaketmpdir():
|
def _maybemaketmpdir():
|
||||||
global tmpdir
|
global tmpdir
|
||||||
@ -61,13 +63,16 @@ def _maybemaketmpdir():
|
|||||||
else:
|
else:
|
||||||
tmpdir = tempfile.mkdtemp(prefix='rclmpdf')
|
tmpdir = tempfile.mkdtemp(prefix='rclmpdf')
|
||||||
|
|
||||||
|
|
||||||
def finalcleanup():
|
def finalcleanup():
|
||||||
if tmpdir:
|
if tmpdir:
|
||||||
vacuumdir(tmpdir)
|
vacuumdir(tmpdir)
|
||||||
os.rmdir(tmpdir)
|
os.rmdir(tmpdir)
|
||||||
|
|
||||||
|
|
||||||
atexit.register(finalcleanup)
|
atexit.register(finalcleanup)
|
||||||
|
|
||||||
|
|
||||||
# Return true if tesseract and the appropriate conversion program for
|
# Return true if tesseract and the appropriate conversion program for
|
||||||
# the file type (e.g. pdftoppt for pdf) appear to be available
|
# the file type (e.g. pdftoppt for pdf) appear to be available
|
||||||
def ocrpossible(config, path):
|
def ocrpossible(config, path):
|
||||||
@ -145,6 +150,7 @@ def _guesstesseractlang(config, path):
|
|||||||
_deb("Tesseract lang (guessed): %s" % tesseractlang)
|
_deb("Tesseract lang (guessed): %s" % tesseractlang)
|
||||||
return tesseractlang
|
return tesseractlang
|
||||||
|
|
||||||
|
|
||||||
# Process pdf file: use pdftoppm to split it into ppm pages, then run
|
# Process pdf file: use pdftoppm to split it into ppm pages, then run
|
||||||
# tesseract on each and concatenate the result. It would probably be
|
# tesseract on each and concatenate the result. It would probably be
|
||||||
# possible instead to use pdftocairo to produce a tiff, buf pdftocairo
|
# possible instead to use pdftocairo to produce a tiff, buf pdftocairo
|
||||||
|
|||||||
@ -9,24 +9,26 @@ import platform
|
|||||||
import subprocess
|
import subprocess
|
||||||
import glob
|
import glob
|
||||||
|
|
||||||
ftrace = sys.stderr
|
|
||||||
#ftrace = open("C:/Users/Bill/log-uncomp.txt", "w")
|
def _msg(s):
|
||||||
|
rclexecm.logmsg(s)
|
||||||
|
|
||||||
|
|
||||||
sysplat = platform.system()
|
sysplat = platform.system()
|
||||||
if sysplat != "Windows":
|
if sysplat != "Windows":
|
||||||
print("rcluncomp.py: only for Windows", file = ftrace)
|
_msg("rcluncomp.py: only for Windows")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import msvcrt
|
import msvcrt
|
||||||
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
|
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
print("setmode binary failed: %s" % str(err), file = ftrace)
|
_msg("setmode binary failed: %s" % str(err))
|
||||||
|
|
||||||
sevenz = rclexecm.which("7z")
|
sevenz = rclexecm.which("7z")
|
||||||
if not sevenz:
|
if not sevenz:
|
||||||
print("rcluncomp.py: can't find 7z exe. Maybe set recollhelperpath " \
|
_msg("rcluncomp.py: can't find 7z exe. Maybe set recollhelperpath " \
|
||||||
"in recoll.conf ?", file=ftrace)
|
"in recoll.conf ?")
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
|
||||||
# Params: uncompression program, input file name, temp directory.
|
# Params: uncompression program, input file name, temp directory.
|
||||||
@ -34,7 +36,7 @@ if not sevenz:
|
|||||||
|
|
||||||
infile = sys.argv[2]
|
infile = sys.argv[2]
|
||||||
outdir = sys.argv[3]
|
outdir = sys.argv[3]
|
||||||
# print("rcluncomp.py infile [%s], outdir [%s]" % (infile, outdir), file = ftrace)
|
# _msg("rcluncomp.py infile [%s], outdir [%s]" % (infile, outdir))
|
||||||
|
|
||||||
# There is apparently no way to suppress 7z output. Hopefully the
|
# There is apparently no way to suppress 7z output. Hopefully the
|
||||||
# possible deadlock described by the subprocess module doc can't occur
|
# possible deadlock described by the subprocess module doc can't occur
|
||||||
@ -47,7 +49,7 @@ try:
|
|||||||
# There should be only one file in there..
|
# There should be only one file in there..
|
||||||
print(outputname[0])
|
print(outputname[0])
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
print("%s" % (str(err),), file = ftrace)
|
_msg("%s" % (str(err),))
|
||||||
sys.exit(4)
|
sys.exit(4)
|
||||||
|
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user