python filters: replace misc message printing with single method in rclexecm

This commit is contained in:
Jean-Francois Dockes 2020-09-23 18:38:22 +02:00
parent fd5297dc73
commit 0dd609cf1a
7 changed files with 58 additions and 39 deletions

View File

@ -31,8 +31,26 @@ import rclconfig
import cmdtalk import cmdtalk
PY3 = (sys.version > '3') PY3 = (sys.version > '3')
_mswindows = (sys.platform == "win32") _g_mswindows = (sys.platform == "win32")
_execdir = os.path.dirname(sys.argv[0]) _g_execdir = os.path.dirname(sys.argv[0])
_g_config = rclconfig.RclConfig()
_g_debugfile = _g_config.getConfParam("filterdebuglog")
_g_errfout = None
def logmsg(msg):
global _g_debugfile, _g_errfout
if _g_debugfile and not _g_errfout:
try:
_g_errfout = open(_g_debugfile, "a")
except:
pass
if _g_errfout:
print("%s" % msg, file=_g_errfout)
elif not _g_mswindows:
print("%s" % msg, file=sys.stderr)
# Convert to bytes if not already such. # Convert to bytes if not already such.
def makebytes(data): def makebytes(data):
@ -40,6 +58,7 @@ def makebytes(data):
return data.encode("UTF-8") return data.encode("UTF-8")
return data return data
# Possibly decode binary file name for use as subprocess argument, # Possibly decode binary file name for use as subprocess argument,
# depending on platform. # depending on platform.
def subprocfile(fn): def subprocfile(fn):
@ -48,26 +67,22 @@ def subprocfile(fn):
# to convert. # to convert.
# On Unix all list elements get converted to bytes in the C # On Unix all list elements get converted to bytes in the C
# _posixsubprocess module, nothing to do. # _posixsubprocess module, nothing to do.
if PY3 and _mswindows and type(fn) != type(''): if PY3 and _g_mswindows and type(fn) != type(''):
return fn.decode('UTF-8') return fn.decode('UTF-8')
else: else:
return fn return fn
# Check for truthness of rclconfig value. # Check for truthness of rclconfig value.
def configparamtrue(value): def configparamtrue(value):
if not value: if not value:
return False return False
try: try:
ivalue = int(value) ivalue = int(value)
if ivalue: return True if ivalue else False
return True
else:
return False
except: except:
pass return True if value[0] in 'tT' else False
if value[0] in 'tT':
return True
return False
# Escape special characters in plain text for inclusion in HTML doc. # Escape special characters in plain text for inclusion in HTML doc.
# Note: tried replacing this with a multiple replacer according to # Note: tried replacing this with a multiple replacer according to
@ -84,8 +99,6 @@ def htmlescape(txt):
return txt return txt
my_config = rclconfig.RclConfig()
############################################ ############################################
# RclExecM implements the communication protocol with the recollindex # RclExecM implements the communication protocol with the recollindex
# process. It calls the object specific of the document type to # process. It calls the object specific of the document type to
@ -109,7 +122,7 @@ class RclExecM(cmdtalk.CmdTalk):
self.maxmembersize = self.maxmembersize * 1024 self.maxmembersize = self.maxmembersize * 1024
# Tell cmdtalk where to log # Tell cmdtalk where to log
self.debugfile = my_config.getConfParam("filterdebuglog") self.debugfile = _g_config.getConfParam("filterdebuglog")
# Some of our params are binary, cmdtalk should not decode them # Some of our params are binary, cmdtalk should not decode them
self.nodecodeinput = True self.nodecodeinput = True
@ -222,7 +235,7 @@ def which(program):
def path_candidates(): def path_candidates():
yield os.path.dirname(sys.argv[0]) yield os.path.dirname(sys.argv[0])
rclpath = my_config.getConfParam("recollhelperpath") rclpath = _g_config.getConfParam("recollhelperpath")
if rclpath: if rclpath:
for path in rclpath.split(os.pathsep): for path in rclpath.split(os.pathsep):
yield path yield path
@ -244,9 +257,9 @@ def which(program):
def execPythonScript(icmd): def execPythonScript(icmd):
import subprocess import subprocess
cmd = list(icmd) cmd = list(icmd)
if _mswindows: if _g_mswindows:
if not os.path.isabs(cmd[0]): if not os.path.isabs(cmd[0]):
cmd[0] = os.path.join(_execdir, cmd[0]) cmd[0] = os.path.join(_g_execdir, cmd[0])
cmd = [sys.executable] + cmd cmd = [sys.executable] + cmd
return subprocess.check_output(cmd) return subprocess.check_output(cmd)
@ -347,8 +360,8 @@ def main(proto, extract):
# Some filters (e.g. rclaudio) need/get a MIME type from the indexer. # Some filters (e.g. rclaudio) need/get a MIME type from the indexer.
# We make a half-assed attempt to emulate: # We make a half-assed attempt to emulate:
mimetype = my_config.mimeType(path) mimetype = _g_config.mimeType(path)
if not mimetype and not _mswindows: if not mimetype and not _g_mswindows:
mimetype = mimetype_with_file(path) mimetype = mimetype_with_file(path)
if mimetype: if mimetype:
params['mimetype'] = mimetype params['mimetype'] = mimetype

View File

@ -29,9 +29,6 @@ import sys
from struct import unpack, pack from struct import unpack, pack
import six import six
def debug(s):
print("%s"%s, file=sys.stderr)
PY3 = sys.version > '3' PY3 = sys.version > '3'
if PY3: if PY3:

View File

@ -30,11 +30,10 @@ import importlib.util
import rclconfig import rclconfig
import rclocrcache import rclocrcache
import rclexecm
_mswindows = (sys.platform == "win32")
def _deb(s): def _deb(s):
if not _mswindows: rclexecm.logmsg(s)
print("rclocr: %s" % s, file=sys.stderr)
def Usage(): def Usage():
_deb("Usage: rclocr.py <imagefilename>") _deb("Usage: rclocr.py <imagefilename>")
@ -57,6 +56,7 @@ def breakwrite(f, data):
offset += tow offset += tow
total -= tow total -= tow
if len(sys.argv) != 2: if len(sys.argv) != 2:
Usage() Usage()

View File

@ -40,8 +40,7 @@ abbyyocrcmd = ""
abbyocrdir = "" abbyocrdir = ""
def _deb(s): def _deb(s):
if not _mswindows: rclexecm.logmsg(s)
print("rclocrabbyy: %s" % s, file=sys.stderr)
# Return true if abbyy appears to be available # Return true if abbyy appears to be available
def ocrpossible(config, path): def ocrpossible(config, path):

View File

@ -61,9 +61,12 @@ import urllib.parse
import zlib import zlib
import glob import glob
import rclexecm
def _deb(s): def _deb(s):
print("rclocrcache: %s" %s, file=sys.stderr) rclexecm.logmsg(s)
class OCRCache(object): class OCRCache(object):
def __init__(self, conf): def __init__(self, conf):
self.config = conf self.config = conf
@ -324,4 +327,3 @@ if __name__ == '__main__':
# if not incache: # if not incache:
# trystore(path) # trystore(path)
# #

View File

@ -39,9 +39,10 @@ _okexts = ('.tif', '.tiff', '.jpg', '.png', '.jpeg')
tesseractcmd = None tesseractcmd = None
pdftoppmcmd = None pdftoppmcmd = None
def _deb(s): def _deb(s):
if not _mswindows: rclexecm.logmsg(s)
print("rclocrtesseract: %s" % s, file=sys.stderr)
def vacuumdir(dir): def vacuumdir(dir):
if dir: if dir:
@ -51,6 +52,7 @@ def vacuumdir(dir):
os.unlink(path) os.unlink(path)
return True return True
tmpdir = None tmpdir = None
def _maybemaketmpdir(): def _maybemaketmpdir():
global tmpdir global tmpdir
@ -61,13 +63,16 @@ def _maybemaketmpdir():
else: else:
tmpdir = tempfile.mkdtemp(prefix='rclmpdf') tmpdir = tempfile.mkdtemp(prefix='rclmpdf')
def finalcleanup(): def finalcleanup():
if tmpdir: if tmpdir:
vacuumdir(tmpdir) vacuumdir(tmpdir)
os.rmdir(tmpdir) os.rmdir(tmpdir)
atexit.register(finalcleanup) atexit.register(finalcleanup)
# Return true if tesseract and the appropriate conversion program for # Return true if tesseract and the appropriate conversion program for
# the file type (e.g. pdftoppt for pdf) appear to be available # the file type (e.g. pdftoppt for pdf) appear to be available
def ocrpossible(config, path): def ocrpossible(config, path):
@ -145,6 +150,7 @@ def _guesstesseractlang(config, path):
_deb("Tesseract lang (guessed): %s" % tesseractlang) _deb("Tesseract lang (guessed): %s" % tesseractlang)
return tesseractlang return tesseractlang
# Process pdf file: use pdftoppm to split it into ppm pages, then run # Process pdf file: use pdftoppm to split it into ppm pages, then run
# tesseract on each and concatenate the result. It would probably be # tesseract on each and concatenate the result. It would probably be
# possible instead to use pdftocairo to produce a tiff, buf pdftocairo # possible instead to use pdftocairo to produce a tiff, buf pdftocairo

View File

@ -9,24 +9,26 @@ import platform
import subprocess import subprocess
import glob import glob
ftrace = sys.stderr
#ftrace = open("C:/Users/Bill/log-uncomp.txt", "w") def _msg(s):
rclexecm.logmsg(s)
sysplat = platform.system() sysplat = platform.system()
if sysplat != "Windows": if sysplat != "Windows":
print("rcluncomp.py: only for Windows", file = ftrace) _msg("rcluncomp.py: only for Windows")
sys.exit(1) sys.exit(1)
try: try:
import msvcrt import msvcrt
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
except Exception as err: except Exception as err:
print("setmode binary failed: %s" % str(err), file = ftrace) _msg("setmode binary failed: %s" % str(err))
sevenz = rclexecm.which("7z") sevenz = rclexecm.which("7z")
if not sevenz: if not sevenz:
print("rcluncomp.py: can't find 7z exe. Maybe set recollhelperpath " \ _msg("rcluncomp.py: can't find 7z exe. Maybe set recollhelperpath " \
"in recoll.conf ?", file=ftrace) "in recoll.conf ?")
sys.exit(2) sys.exit(2)
# Params: uncompression program, input file name, temp directory. # Params: uncompression program, input file name, temp directory.
@ -34,7 +36,7 @@ if not sevenz:
infile = sys.argv[2] infile = sys.argv[2]
outdir = sys.argv[3] outdir = sys.argv[3]
# print("rcluncomp.py infile [%s], outdir [%s]" % (infile, outdir), file = ftrace) # _msg("rcluncomp.py infile [%s], outdir [%s]" % (infile, outdir))
# There is apparently no way to suppress 7z output. Hopefully the # There is apparently no way to suppress 7z output. Hopefully the
# possible deadlock described by the subprocess module doc can't occur # possible deadlock described by the subprocess module doc can't occur
@ -47,7 +49,7 @@ try:
# There should be only one file in there.. # There should be only one file in there..
print(outputname[0]) print(outputname[0])
except Exception as err: except Exception as err:
print("%s" % (str(err),), file = ftrace) _msg("%s" % (str(err),))
sys.exit(4) sys.exit(4)
sys.exit(0) sys.exit(0)