Python handlers: factorise tmp dir code
This commit is contained in:
parent
d942b23c85
commit
e121695a3c
@ -265,19 +265,30 @@ def execPythonScript(icmd):
|
||||
|
||||
# Temp dir helper
|
||||
class SafeTmpDir:
|
||||
def __init__(self, em):
|
||||
def __init__(self, tag, em=None):
|
||||
self.tag = tag
|
||||
self.em = em
|
||||
self.toptmp = ""
|
||||
self.tmpdir = ""
|
||||
self.toptmp = None
|
||||
self.tmpdir = None
|
||||
|
||||
def __del__(self):
|
||||
try:
|
||||
if self.toptmp:
|
||||
shutil.rmtree(self.tmpdir, True)
|
||||
if self.toptmp:
|
||||
try:
|
||||
if self.tmpdir:
|
||||
shutil.rmtree(self.tmpdir, True)
|
||||
os.rmdir(self.toptmp)
|
||||
except Exception as err:
|
||||
self.em.rclog("delete dir failed for " + self.toptmp)
|
||||
except Exception as err:
|
||||
if self.em:
|
||||
self.em.rclog("delete dir failed for " + self.toptmp)
|
||||
|
||||
def vacuumdir(self):
|
||||
if self.tmpdir:
|
||||
for fn in os.listdir(self.tmpdir):
|
||||
path = os.path.join(self.tmpdir, fn)
|
||||
if os.path.isfile(path):
|
||||
os.unlink(path)
|
||||
return True
|
||||
|
||||
def getpath(self):
|
||||
if not self.tmpdir:
|
||||
envrcltmp = os.getenv('RECOLL_TMPDIR')
|
||||
@ -286,7 +297,7 @@ class SafeTmpDir:
|
||||
else:
|
||||
self.toptmp = tempfile.mkdtemp(prefix='rcltmp')
|
||||
|
||||
self.tmpdir = os.path.join(self.toptmp, 'rclsofftmp')
|
||||
self.tmpdir = os.path.join(self.toptmp, self.tag)
|
||||
os.makedirs(self.tmpdir)
|
||||
|
||||
return self.tmpdir
|
||||
|
||||
@ -42,33 +42,25 @@ pdftoppmcmd = None
|
||||
def _deb(s):
|
||||
rclexecm.logmsg("rclocrtesseract: %s" % s)
|
||||
|
||||
def vacuumdir(dir):
|
||||
if dir:
|
||||
for fn in os.listdir(dir):
|
||||
path = os.path.join(dir, fn)
|
||||
if os.path.isfile(path):
|
||||
os.unlink(path)
|
||||
return True
|
||||
|
||||
|
||||
tmpdir = None
|
||||
|
||||
def _maybemaketmpdir():
|
||||
global tmpdir
|
||||
if tmpdir:
|
||||
if not vacuumdir(tmpdir):
|
||||
_deb("openfile: vacuumdir %s failed" % tmpdir)
|
||||
if not tmpdir.vacuumdir():
|
||||
_deb("openfile: vacuumdir %s failed" % tmpdir.getpath())
|
||||
return False
|
||||
else:
|
||||
tmpdir = tempfile.mkdtemp(prefix='rclocrtmp')
|
||||
tmpdir = rclexecm.SafeTmpDir("rclocrtesseract")
|
||||
|
||||
|
||||
def cleanocr():
|
||||
global tmpdir
|
||||
if tmpdir:
|
||||
vacuumdir(tmpdir)
|
||||
os.rmdir(tmpdir)
|
||||
del tmpdir
|
||||
tmpdir = None
|
||||
|
||||
|
||||
|
||||
# Return true if tesseract and the appropriate conversion program for
|
||||
# the file type (e.g. pdftoppt for pdf) appear to be available
|
||||
def ocrpossible(config, path):
|
||||
@ -165,12 +157,12 @@ def _pdftesseract(config, path):
|
||||
|
||||
tesseractlang = _guesstesseractlang(config, path)
|
||||
|
||||
#tesserrorfile = os.path.join(tmpdir, "tesserrorfile")
|
||||
tmpfile = os.path.join(tmpdir, "ocrXXXXXX")
|
||||
#tesserrorfile = os.path.join(tmpdir.getpath(), "tesserrorfile")
|
||||
tmpfile = os.path.join(tmpdir.getpath(), "ocrXXXXXX")
|
||||
|
||||
# Split pdf pages
|
||||
try:
|
||||
vacuumdir(tmpdir)
|
||||
tmpdir.vacuumdir()
|
||||
cmd = [pdftoppmcmd, "-r", "300", path, tmpfile]
|
||||
#_deb("Executing %s" % cmd)
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
@ -69,8 +69,7 @@ _htmlsuffix = b'''</pre></body></html>'''
|
||||
def finalcleanup():
|
||||
global tmpdir
|
||||
if tmpdir:
|
||||
vacuumdir(tmpdir)
|
||||
os.rmdir(tmpdir)
|
||||
del tmpdir
|
||||
tmpdir = None
|
||||
|
||||
ocrproc = None
|
||||
@ -93,14 +92,6 @@ except: pass
|
||||
try: signal.signal(signal.SIGTERM, signal_handler)
|
||||
except: pass
|
||||
|
||||
def vacuumdir(dir):
|
||||
if dir:
|
||||
for fn in os.listdir(dir):
|
||||
path = os.path.join(dir, fn)
|
||||
if os.path.isfile(path):
|
||||
os.unlink(path)
|
||||
return True
|
||||
|
||||
class PDFExtractor:
|
||||
def __init__(self, em):
|
||||
self.currentindex = 0
|
||||
@ -221,7 +212,7 @@ class PDFExtractor:
|
||||
# no big deal
|
||||
return True
|
||||
try:
|
||||
vacuumdir(tmpdir)
|
||||
tmpdir.vacuumdir()
|
||||
# Note: the java version of pdftk sometimes/often fails
|
||||
# here with writing to stdout:
|
||||
# Error occurred during initialization of VM
|
||||
@ -231,9 +222,9 @@ class PDFExtractor:
|
||||
# output, until we fix the error or preferably find a way
|
||||
# to do it with poppler...
|
||||
subprocess.check_call(
|
||||
[self.pdftk, self.filename, "unpack_files", "output",
|
||||
tmpdir], stdout=sys.stderr)
|
||||
self.attachlist = sorted(os.listdir(tmpdir))
|
||||
[self.pdftk, self.filename, "unpack_files", "output", tmpdir.getpath()],
|
||||
stdout=sys.stderr)
|
||||
self.attachlist = sorted(os.listdir(tmpdir.getpath()))
|
||||
return True
|
||||
except Exception as e:
|
||||
self.em.rclog("extractAttach: failed: %s" % e)
|
||||
@ -407,11 +398,12 @@ class PDFExtractor:
|
||||
def maybemaketmpdir(self):
|
||||
global tmpdir
|
||||
if tmpdir:
|
||||
if not vacuumdir(tmpdir):
|
||||
self.em.rclog("openfile: vacuumdir %s failed" % tmpdir)
|
||||
if not tmpdir.vacuumdir():
|
||||
self.em.rclog("openfile: vacuumdir %s failed" % tmpdir.getpath())
|
||||
return False
|
||||
else:
|
||||
tmpdir = tempfile.mkdtemp(prefix='rclmpdf')
|
||||
tmpdir = rclexecm.SafeTmpDir("rclpdf", self.em)
|
||||
#self.em.rclog("Using temporary directory %s" % tmpdir.getpath())
|
||||
if self.pdftk and re.match("/snap/", self.pdftk):
|
||||
# We know this is Unix (Ubuntu actually). Check that tmpdir
|
||||
# belongs to the user as snap commands can't use /tmp to share
|
||||
@ -423,9 +415,7 @@ class PDFExtractor:
|
||||
if st.st_uid == os.getuid():
|
||||
ok = True
|
||||
if not ok:
|
||||
self.em.rclog(
|
||||
"pdftk is a snap command and needs TMPDIR to be "
|
||||
"a directory you own")
|
||||
self.em.rclog("pdftk is a snap command and needs TMPDIR to be owned by you")
|
||||
|
||||
def _process_annotations(self, html):
|
||||
doc = Poppler.Document.new_from_file(
|
||||
@ -530,7 +520,7 @@ class PDFExtractor:
|
||||
if not self.attextractdone:
|
||||
if not self.extractAttach():
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
path = os.path.join(tmpdir, ipath)
|
||||
path = os.path.join(tmpdir.getpath(), ipath)
|
||||
if os.path.isfile(path):
|
||||
f = open(path, "rb")
|
||||
docdata = f.read();
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user