python filters: replace misc message printing with single method in rclexecm
This commit is contained in:
parent
fd5297dc73
commit
0dd609cf1a
@ -31,8 +31,26 @@ import rclconfig
|
||||
import cmdtalk
|
||||
|
||||
PY3 = (sys.version > '3')
|
||||
_mswindows = (sys.platform == "win32")
|
||||
_execdir = os.path.dirname(sys.argv[0])
|
||||
_g_mswindows = (sys.platform == "win32")
|
||||
_g_execdir = os.path.dirname(sys.argv[0])
|
||||
|
||||
_g_config = rclconfig.RclConfig()
|
||||
_g_debugfile = _g_config.getConfParam("filterdebuglog")
|
||||
_g_errfout = None
|
||||
|
||||
|
||||
def logmsg(msg):
|
||||
global _g_debugfile, _g_errfout
|
||||
if _g_debugfile and not _g_errfout:
|
||||
try:
|
||||
_g_errfout = open(_g_debugfile, "a")
|
||||
except:
|
||||
pass
|
||||
if _g_errfout:
|
||||
print("%s" % msg, file=_g_errfout)
|
||||
elif not _g_mswindows:
|
||||
print("%s" % msg, file=sys.stderr)
|
||||
|
||||
|
||||
# Convert to bytes if not already such.
|
||||
def makebytes(data):
|
||||
@ -40,6 +58,7 @@ def makebytes(data):
|
||||
return data.encode("UTF-8")
|
||||
return data
|
||||
|
||||
|
||||
# Possibly decode binary file name for use as subprocess argument,
|
||||
# depending on platform.
|
||||
def subprocfile(fn):
|
||||
@ -48,26 +67,22 @@ def subprocfile(fn):
|
||||
# to convert.
|
||||
# On Unix all list elements get converted to bytes in the C
|
||||
# _posixsubprocess module, nothing to do.
|
||||
if PY3 and _mswindows and type(fn) != type(''):
|
||||
if PY3 and _g_mswindows and type(fn) != type(''):
|
||||
return fn.decode('UTF-8')
|
||||
else:
|
||||
return fn
|
||||
|
||||
|
||||
# Check for truthness of rclconfig value.
|
||||
def configparamtrue(value):
|
||||
if not value:
|
||||
return False
|
||||
try:
|
||||
ivalue = int(value)
|
||||
if ivalue:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
return True if ivalue else False
|
||||
except:
|
||||
pass
|
||||
if value[0] in 'tT':
|
||||
return True
|
||||
return False
|
||||
return True if value[0] in 'tT' else False
|
||||
|
||||
|
||||
# Escape special characters in plain text for inclusion in HTML doc.
|
||||
# Note: tried replacing this with a multiple replacer according to
|
||||
@ -84,8 +99,6 @@ def htmlescape(txt):
|
||||
return txt
|
||||
|
||||
|
||||
my_config = rclconfig.RclConfig()
|
||||
|
||||
############################################
|
||||
# RclExecM implements the communication protocol with the recollindex
|
||||
# process. It calls the object specific of the document type to
|
||||
@ -109,7 +122,7 @@ class RclExecM(cmdtalk.CmdTalk):
|
||||
self.maxmembersize = self.maxmembersize * 1024
|
||||
|
||||
# Tell cmdtalk where to log
|
||||
self.debugfile = my_config.getConfParam("filterdebuglog")
|
||||
self.debugfile = _g_config.getConfParam("filterdebuglog")
|
||||
# Some of our params are binary, cmdtalk should not decode them
|
||||
self.nodecodeinput = True
|
||||
|
||||
@ -222,7 +235,7 @@ def which(program):
|
||||
|
||||
def path_candidates():
|
||||
yield os.path.dirname(sys.argv[0])
|
||||
rclpath = my_config.getConfParam("recollhelperpath")
|
||||
rclpath = _g_config.getConfParam("recollhelperpath")
|
||||
if rclpath:
|
||||
for path in rclpath.split(os.pathsep):
|
||||
yield path
|
||||
@ -244,9 +257,9 @@ def which(program):
|
||||
def execPythonScript(icmd):
|
||||
import subprocess
|
||||
cmd = list(icmd)
|
||||
if _mswindows:
|
||||
if _g_mswindows:
|
||||
if not os.path.isabs(cmd[0]):
|
||||
cmd[0] = os.path.join(_execdir, cmd[0])
|
||||
cmd[0] = os.path.join(_g_execdir, cmd[0])
|
||||
cmd = [sys.executable] + cmd
|
||||
return subprocess.check_output(cmd)
|
||||
|
||||
@ -347,8 +360,8 @@ def main(proto, extract):
|
||||
|
||||
# Some filters (e.g. rclaudio) need/get a MIME type from the indexer.
|
||||
# We make a half-assed attempt to emulate:
|
||||
mimetype = my_config.mimeType(path)
|
||||
if not mimetype and not _mswindows:
|
||||
mimetype = _g_config.mimeType(path)
|
||||
if not mimetype and not _g_mswindows:
|
||||
mimetype = mimetype_with_file(path)
|
||||
if mimetype:
|
||||
params['mimetype'] = mimetype
|
||||
|
||||
@ -29,9 +29,6 @@ import sys
|
||||
from struct import unpack, pack
|
||||
import six
|
||||
|
||||
def debug(s):
|
||||
print("%s"%s, file=sys.stderr)
|
||||
|
||||
PY3 = sys.version > '3'
|
||||
|
||||
if PY3:
|
||||
|
||||
@ -30,11 +30,10 @@ import importlib.util
|
||||
|
||||
import rclconfig
|
||||
import rclocrcache
|
||||
import rclexecm
|
||||
|
||||
_mswindows = (sys.platform == "win32")
|
||||
def _deb(s):
|
||||
if not _mswindows:
|
||||
print("rclocr: %s" % s, file=sys.stderr)
|
||||
rclexecm.logmsg(s)
|
||||
|
||||
def Usage():
|
||||
_deb("Usage: rclocr.py <imagefilename>")
|
||||
@ -57,6 +56,7 @@ def breakwrite(f, data):
|
||||
offset += tow
|
||||
total -= tow
|
||||
|
||||
|
||||
if len(sys.argv) != 2:
|
||||
Usage()
|
||||
|
||||
|
||||
@ -40,8 +40,7 @@ abbyyocrcmd = ""
|
||||
abbyocrdir = ""
|
||||
|
||||
def _deb(s):
|
||||
if not _mswindows:
|
||||
print("rclocrabbyy: %s" % s, file=sys.stderr)
|
||||
rclexecm.logmsg(s)
|
||||
|
||||
# Return true if abbyy appears to be available
|
||||
def ocrpossible(config, path):
|
||||
|
||||
@ -61,9 +61,12 @@ import urllib.parse
|
||||
import zlib
|
||||
import glob
|
||||
|
||||
import rclexecm
|
||||
|
||||
def _deb(s):
|
||||
print("rclocrcache: %s" %s, file=sys.stderr)
|
||||
rclexecm.logmsg(s)
|
||||
|
||||
|
||||
class OCRCache(object):
|
||||
def __init__(self, conf):
|
||||
self.config = conf
|
||||
@ -324,4 +327,3 @@ if __name__ == '__main__':
|
||||
# if not incache:
|
||||
# trystore(path)
|
||||
#
|
||||
|
||||
|
||||
@ -39,9 +39,10 @@ _okexts = ('.tif', '.tiff', '.jpg', '.png', '.jpeg')
|
||||
tesseractcmd = None
|
||||
pdftoppmcmd = None
|
||||
|
||||
|
||||
def _deb(s):
|
||||
if not _mswindows:
|
||||
print("rclocrtesseract: %s" % s, file=sys.stderr)
|
||||
rclexecm.logmsg(s)
|
||||
|
||||
|
||||
def vacuumdir(dir):
|
||||
if dir:
|
||||
@ -51,6 +52,7 @@ def vacuumdir(dir):
|
||||
os.unlink(path)
|
||||
return True
|
||||
|
||||
|
||||
tmpdir = None
|
||||
def _maybemaketmpdir():
|
||||
global tmpdir
|
||||
@ -61,13 +63,16 @@ def _maybemaketmpdir():
|
||||
else:
|
||||
tmpdir = tempfile.mkdtemp(prefix='rclmpdf')
|
||||
|
||||
|
||||
def finalcleanup():
|
||||
if tmpdir:
|
||||
vacuumdir(tmpdir)
|
||||
os.rmdir(tmpdir)
|
||||
|
||||
|
||||
atexit.register(finalcleanup)
|
||||
|
||||
|
||||
# Return true if tesseract and the appropriate conversion program for
|
||||
# the file type (e.g. pdftoppt for pdf) appear to be available
|
||||
def ocrpossible(config, path):
|
||||
@ -145,6 +150,7 @@ def _guesstesseractlang(config, path):
|
||||
_deb("Tesseract lang (guessed): %s" % tesseractlang)
|
||||
return tesseractlang
|
||||
|
||||
|
||||
# Process pdf file: use pdftoppm to split it into ppm pages, then run
|
||||
# tesseract on each and concatenate the result. It would probably be
|
||||
# possible instead to use pdftocairo to produce a tiff, buf pdftocairo
|
||||
|
||||
@ -9,24 +9,26 @@ import platform
|
||||
import subprocess
|
||||
import glob
|
||||
|
||||
ftrace = sys.stderr
|
||||
#ftrace = open("C:/Users/Bill/log-uncomp.txt", "w")
|
||||
|
||||
def _msg(s):
|
||||
rclexecm.logmsg(s)
|
||||
|
||||
|
||||
sysplat = platform.system()
|
||||
if sysplat != "Windows":
|
||||
print("rcluncomp.py: only for Windows", file = ftrace)
|
||||
_msg("rcluncomp.py: only for Windows")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
import msvcrt
|
||||
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
|
||||
except Exception as err:
|
||||
print("setmode binary failed: %s" % str(err), file = ftrace)
|
||||
_msg("setmode binary failed: %s" % str(err))
|
||||
|
||||
sevenz = rclexecm.which("7z")
|
||||
if not sevenz:
|
||||
print("rcluncomp.py: can't find 7z exe. Maybe set recollhelperpath " \
|
||||
"in recoll.conf ?", file=ftrace)
|
||||
_msg("rcluncomp.py: can't find 7z exe. Maybe set recollhelperpath " \
|
||||
"in recoll.conf ?")
|
||||
sys.exit(2)
|
||||
|
||||
# Params: uncompression program, input file name, temp directory.
|
||||
@ -34,7 +36,7 @@ if not sevenz:
|
||||
|
||||
infile = sys.argv[2]
|
||||
outdir = sys.argv[3]
|
||||
# print("rcluncomp.py infile [%s], outdir [%s]" % (infile, outdir), file = ftrace)
|
||||
# _msg("rcluncomp.py infile [%s], outdir [%s]" % (infile, outdir))
|
||||
|
||||
# There is apparently no way to suppress 7z output. Hopefully the
|
||||
# possible deadlock described by the subprocess module doc can't occur
|
||||
@ -47,7 +49,7 @@ try:
|
||||
# There should be only one file in there..
|
||||
print(outputname[0])
|
||||
except Exception as err:
|
||||
print("%s" % (str(err),), file = ftrace)
|
||||
_msg("%s" % (str(err),))
|
||||
sys.exit(4)
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user