python filters: replace misc message printing with single method in rclexecm

This commit is contained in:
Jean-Francois Dockes 2020-09-23 18:38:22 +02:00
parent fd5297dc73
commit 0dd609cf1a
7 changed files with 58 additions and 39 deletions

View File

@ -31,8 +31,26 @@ import rclconfig
import cmdtalk
PY3 = (sys.version > '3')
_mswindows = (sys.platform == "win32")
_execdir = os.path.dirname(sys.argv[0])
_g_mswindows = (sys.platform == "win32")
_g_execdir = os.path.dirname(sys.argv[0])
_g_config = rclconfig.RclConfig()
_g_debugfile = _g_config.getConfParam("filterdebuglog")
_g_errfout = None
def logmsg(msg):
global _g_debugfile, _g_errfout
if _g_debugfile and not _g_errfout:
try:
_g_errfout = open(_g_debugfile, "a")
except:
pass
if _g_errfout:
print("%s" % msg, file=_g_errfout)
elif not _g_mswindows:
print("%s" % msg, file=sys.stderr)
# Convert to bytes if not already such.
def makebytes(data):
@ -40,6 +58,7 @@ def makebytes(data):
return data.encode("UTF-8")
return data
# Possibly decode binary file name for use as subprocess argument,
# depending on platform.
def subprocfile(fn):
@ -48,26 +67,22 @@ def subprocfile(fn):
# to convert.
# On Unix all list elements get converted to bytes in the C
# _posixsubprocess module, nothing to do.
if PY3 and _mswindows and type(fn) != type(''):
if PY3 and _g_mswindows and type(fn) != type(''):
return fn.decode('UTF-8')
else:
return fn
# Check for truthness of rclconfig value.
def configparamtrue(value):
if not value:
return False
try:
ivalue = int(value)
if ivalue:
return True
else:
return False
return True if ivalue else False
except:
pass
if value[0] in 'tT':
return True
return False
return True if value[0] in 'tT' else False
# Escape special characters in plain text for inclusion in HTML doc.
# Note: tried replacing this with a multiple replacer according to
@ -84,8 +99,6 @@ def htmlescape(txt):
return txt
my_config = rclconfig.RclConfig()
############################################
# RclExecM implements the communication protocol with the recollindex
# process. It calls the object specific of the document type to
@ -109,7 +122,7 @@ class RclExecM(cmdtalk.CmdTalk):
self.maxmembersize = self.maxmembersize * 1024
# Tell cmdtalk where to log
self.debugfile = my_config.getConfParam("filterdebuglog")
self.debugfile = _g_config.getConfParam("filterdebuglog")
# Some of our params are binary, cmdtalk should not decode them
self.nodecodeinput = True
@ -222,7 +235,7 @@ def which(program):
def path_candidates():
yield os.path.dirname(sys.argv[0])
rclpath = my_config.getConfParam("recollhelperpath")
rclpath = _g_config.getConfParam("recollhelperpath")
if rclpath:
for path in rclpath.split(os.pathsep):
yield path
@ -244,9 +257,9 @@ def which(program):
def execPythonScript(icmd):
import subprocess
cmd = list(icmd)
if _mswindows:
if _g_mswindows:
if not os.path.isabs(cmd[0]):
cmd[0] = os.path.join(_execdir, cmd[0])
cmd[0] = os.path.join(_g_execdir, cmd[0])
cmd = [sys.executable] + cmd
return subprocess.check_output(cmd)
@ -347,8 +360,8 @@ def main(proto, extract):
# Some filters (e.g. rclaudio) need/get a MIME type from the indexer.
# We make a half-assed attempt to emulate:
mimetype = my_config.mimeType(path)
if not mimetype and not _mswindows:
mimetype = _g_config.mimeType(path)
if not mimetype and not _g_mswindows:
mimetype = mimetype_with_file(path)
if mimetype:
params['mimetype'] = mimetype

View File

@ -29,9 +29,6 @@ import sys
from struct import unpack, pack
import six
def debug(s):
print("%s"%s, file=sys.stderr)
PY3 = sys.version > '3'
if PY3:

View File

@ -30,11 +30,10 @@ import importlib.util
import rclconfig
import rclocrcache
import rclexecm
_mswindows = (sys.platform == "win32")
def _deb(s):
if not _mswindows:
print("rclocr: %s" % s, file=sys.stderr)
rclexecm.logmsg(s)
def Usage():
_deb("Usage: rclocr.py <imagefilename>")
@ -57,6 +56,7 @@ def breakwrite(f, data):
offset += tow
total -= tow
if len(sys.argv) != 2:
Usage()

View File

@ -40,8 +40,7 @@ abbyyocrcmd = ""
abbyocrdir = ""
def _deb(s):
if not _mswindows:
print("rclocrabbyy: %s" % s, file=sys.stderr)
rclexecm.logmsg(s)
# Return true if abbyy appears to be available
def ocrpossible(config, path):

View File

@ -61,9 +61,12 @@ import urllib.parse
import zlib
import glob
import rclexecm
def _deb(s):
print("rclocrcache: %s" %s, file=sys.stderr)
rclexecm.logmsg(s)
class OCRCache(object):
def __init__(self, conf):
self.config = conf
@ -324,4 +327,3 @@ if __name__ == '__main__':
# if not incache:
# trystore(path)
#

View File

@ -39,9 +39,10 @@ _okexts = ('.tif', '.tiff', '.jpg', '.png', '.jpeg')
tesseractcmd = None
pdftoppmcmd = None
def _deb(s):
if not _mswindows:
print("rclocrtesseract: %s" % s, file=sys.stderr)
rclexecm.logmsg(s)
def vacuumdir(dir):
if dir:
@ -51,6 +52,7 @@ def vacuumdir(dir):
os.unlink(path)
return True
tmpdir = None
def _maybemaketmpdir():
global tmpdir
@ -61,13 +63,16 @@ def _maybemaketmpdir():
else:
tmpdir = tempfile.mkdtemp(prefix='rclmpdf')
def finalcleanup():
if tmpdir:
vacuumdir(tmpdir)
os.rmdir(tmpdir)
atexit.register(finalcleanup)
# Return true if tesseract and the appropriate conversion program for
# the file type (e.g. pdftoppt for pdf) appear to be available
def ocrpossible(config, path):
@ -145,6 +150,7 @@ def _guesstesseractlang(config, path):
_deb("Tesseract lang (guessed): %s" % tesseractlang)
return tesseractlang
# Process pdf file: use pdftoppm to split it into ppm pages, then run
# tesseract on each and concatenate the result. It would probably be
# possible instead to use pdftocairo to produce a tiff, buf pdftocairo

View File

@ -9,24 +9,26 @@ import platform
import subprocess
import glob
ftrace = sys.stderr
#ftrace = open("C:/Users/Bill/log-uncomp.txt", "w")
def _msg(s):
rclexecm.logmsg(s)
sysplat = platform.system()
if sysplat != "Windows":
print("rcluncomp.py: only for Windows", file = ftrace)
_msg("rcluncomp.py: only for Windows")
sys.exit(1)
try:
import msvcrt
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
except Exception as err:
print("setmode binary failed: %s" % str(err), file = ftrace)
_msg("setmode binary failed: %s" % str(err))
sevenz = rclexecm.which("7z")
if not sevenz:
print("rcluncomp.py: can't find 7z exe. Maybe set recollhelperpath " \
"in recoll.conf ?", file=ftrace)
_msg("rcluncomp.py: can't find 7z exe. Maybe set recollhelperpath " \
"in recoll.conf ?")
sys.exit(2)
# Params: uncompression program, input file name, temp directory.
@ -34,7 +36,7 @@ if not sevenz:
infile = sys.argv[2]
outdir = sys.argv[3]
# print("rcluncomp.py infile [%s], outdir [%s]" % (infile, outdir), file = ftrace)
# _msg("rcluncomp.py infile [%s], outdir [%s]" % (infile, outdir))
# There is apparently no way to suppress 7z output. Hopefully the
# possible deadlock described by the subprocess module doc can't occur
@ -47,7 +49,7 @@ try:
# There should be only one file in there..
print(outputname[0])
except Exception as err:
print("%s" % (str(err),), file = ftrace)
_msg("%s" % (str(err),))
sys.exit(4)
sys.exit(0)