rclpst: indexing / searching mostly working with maybe issues in data
charset conversions (check). Preview does not work, ipath needs conversion inside pffexport
This commit is contained in:
parent
0101e6e160
commit
f0944ae0b2
@ -25,21 +25,31 @@
|
|||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import posixpath
|
|
||||||
import pathlib
|
import pathlib
|
||||||
import tempfile
|
|
||||||
import shutil
|
|
||||||
import getopt
|
|
||||||
import traceback
|
|
||||||
import email.parser
|
import email.parser
|
||||||
import email.policy
|
import email.policy
|
||||||
import mailbox
|
import mailbox
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
import rclconfig
|
import rclconfig
|
||||||
import conftree
|
import conftree
|
||||||
|
|
||||||
|
_mswindows = (sys.platform == "win32" or sys.platform == "msys")
|
||||||
|
if _mswindows:
|
||||||
|
import ntpath
|
||||||
|
met_basename = ntpath.basename
|
||||||
|
met_dirname = ntpath.dirname
|
||||||
|
met_splitext = ntpath.splitext
|
||||||
|
met_join = ntpath.join
|
||||||
|
def _backslashize(s):
|
||||||
|
return s.replace("/", "\\")
|
||||||
|
else:
|
||||||
|
met_basename = os.path.basename
|
||||||
|
met_dirname = os.path.dirname
|
||||||
|
met_splitext = os.path.splitext
|
||||||
|
met_join = os.path.join
|
||||||
|
def _backslashize(s):
|
||||||
|
return s
|
||||||
|
|
||||||
# The pffexport stream yields the email in several pieces, with some
|
# The pffexport stream yields the email in several pieces, with some
|
||||||
# data missing (e.g. attachment MIME types). We rebuild a complete
|
# data missing (e.g. attachment MIME types). We rebuild a complete
|
||||||
@ -103,7 +113,7 @@ class EmailBuilder(object):
|
|||||||
|
|
||||||
for att in self.attachments:
|
for att in self.attachments:
|
||||||
fn = att[1]
|
fn = att[1]
|
||||||
ext = os.path.splitext(fn)[1]
|
ext = met_splitext(fn)[1]
|
||||||
mime = self.mimemap.get(ext)
|
mime = self.mimemap.get(ext)
|
||||||
if not mime:
|
if not mime:
|
||||||
mime = 'application/octet-stream'
|
mime = 'application/octet-stream'
|
||||||
@ -174,8 +184,9 @@ class PFFReader(object):
|
|||||||
if name == 'filename':
|
if name == 'filename':
|
||||||
#self.log("filename: %s" % paramstr)
|
#self.log("filename: %s" % paramstr)
|
||||||
fullpath = paramstr
|
fullpath = paramstr
|
||||||
basename = os.path.basename(fullpath)
|
basename = met_basename(fullpath)
|
||||||
parentdir = os.path.basename(os.path.dirname(fullpath))
|
parentdir = met_basename(met_dirname(fullpath))
|
||||||
|
#self.log("basename [%s] parentdir [%s]" % (basename, parentdir))
|
||||||
elif name == 'data':
|
elif name == 'data':
|
||||||
if parentdir == 'Attachments':
|
if parentdir == 'Attachments':
|
||||||
#self.log("Attachment: %s" % basename)
|
#self.log("Attachment: %s" % basename)
|
||||||
@ -186,18 +197,21 @@ class PFFReader(object):
|
|||||||
if doc:
|
if doc:
|
||||||
yield((doc, ipath))
|
yield((doc, ipath))
|
||||||
elif basename == 'InternetHeaders.txt':
|
elif basename == 'InternetHeaders.txt':
|
||||||
#self.log("name: [%s] data: %s" % (name, paramstr))
|
#self.log("name: [%s] data: %s" % (name, paramstr[:20]))
|
||||||
# This part is the indispensable one. Record
|
# This part is the indispensable one. Record
|
||||||
# the ipath at this point:
|
# the ipath at this point:
|
||||||
p = pathlib.Path(fullpath)
|
if _mswindows:
|
||||||
|
p = pathlib.PureWindowsPath(fullpath)
|
||||||
|
else:
|
||||||
|
p = pathlib.Path(fullpath)
|
||||||
# Strip the top dir (/nonexistent.export/)
|
# Strip the top dir (/nonexistent.export/)
|
||||||
p = p.relative_to(*p.parts[:2])
|
p = p.relative_to(*p.parts[:2])
|
||||||
# We use the parent directory as ipath: all
|
# We use the parent directory as ipath: all
|
||||||
# the message parts are in there
|
# the message parts are in there
|
||||||
ipath = str(p.parents[0])
|
ipath = str(p.parents[0])
|
||||||
self.msg.setheaders(data)
|
self.msg.setheaders(data)
|
||||||
elif os.path.splitext(basename)[0] == 'Message':
|
elif met_splitext(basename)[0] == 'Message':
|
||||||
ext = os.path.splitext(basename)[1]
|
ext = met_splitext(basename)[1]
|
||||||
if ext == '.txt':
|
if ext == '.txt':
|
||||||
self.msg.setbody(data, 'text', 'plain')
|
self.msg.setbody(data, 'text', 'plain')
|
||||||
elif ext == '.html':
|
elif ext == '.html':
|
||||||
@ -224,13 +238,25 @@ class PstExtractor(object):
|
|||||||
def __init__(self, em):
|
def __init__(self, em):
|
||||||
self.generator = None
|
self.generator = None
|
||||||
self.em = em
|
self.em = em
|
||||||
self.target = "/nonexistent"
|
if _mswindows:
|
||||||
self.cmd = ["pffexport", "-q", "-t", self.target, "-s"]
|
self.target = "\\\\?\\c:\\nonexistent"
|
||||||
|
else:
|
||||||
|
self.target = "/nonexistent"
|
||||||
|
self.pffexport = rclexecm.which("pffexport")
|
||||||
|
if not self.pffexport:
|
||||||
|
self.pffexport = rclexecm.which("pffinstall/mingw32/bin/pffexport")
|
||||||
|
if not self.pffexport:
|
||||||
|
# No need for anything else. openfile() will return an
|
||||||
|
# error at once
|
||||||
|
return
|
||||||
|
self.cmd = [self.pffexport, "-q", "-t", self.target, "-s"]
|
||||||
|
|
||||||
def startCmd(self, filename, ipath=None):
|
def startCmd(self, filename, ipath=None):
|
||||||
fullcmd = self.cmd + [rclexecm.subprocfile(filename)]
|
fullcmd = self.cmd
|
||||||
if ipath:
|
if ipath:
|
||||||
fullcmd += ["-p", ipath]
|
fullcmd += ["-p", ipath]
|
||||||
|
fn = _backslashize(rclexecm.subprocfile(filename))
|
||||||
|
fullcmd += [fn,]
|
||||||
try:
|
try:
|
||||||
self.proc = subprocess.Popen(fullcmd, stdout=subprocess.PIPE)
|
self.proc = subprocess.Popen(fullcmd, stdout=subprocess.PIPE)
|
||||||
except subprocess.CalledProcessError as err:
|
except subprocess.CalledProcessError as err:
|
||||||
@ -244,13 +270,16 @@ class PstExtractor(object):
|
|||||||
|
|
||||||
###### File type handler api, used by rclexecm ---------->
|
###### File type handler api, used by rclexecm ---------->
|
||||||
def openfile(self, params):
|
def openfile(self, params):
|
||||||
|
if not self.pffexport:
|
||||||
|
print("RECFILTERROR HELPERNOTFOUND pffexport")
|
||||||
|
sys.exit(1);
|
||||||
self.filename = params["filename:"]
|
self.filename = params["filename:"]
|
||||||
self.em.rclog("openfile: %s" % self.filename)
|
self.em.rclog("openfile: sys.platform [%s] [%s]" % (sys.platform,self.filename))
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def getipath(self, params):
|
def getipath(self, params):
|
||||||
ipath = posixpath.join(self.target + ".export",
|
ipath = met_join(self.target + ".export",
|
||||||
params["ipath:"].decode("UTF-8"))
|
params["ipath:"].decode("UTF-8"))
|
||||||
self.em.rclog("getipath: [%s]" % ipath)
|
self.em.rclog("getipath: [%s]" % ipath)
|
||||||
if not self.startCmd(self.filename, ipath=ipath):
|
if not self.startCmd(self.filename, ipath=ipath):
|
||||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
|||||||
@ -94,6 +94,8 @@ application/javascript = internal text/plain
|
|||||||
# chose one.
|
# chose one.
|
||||||
application/vnd.ms-office = execm python rcldoc.py
|
application/vnd.ms-office = execm python rcldoc.py
|
||||||
|
|
||||||
|
application/vnd.ms-outlook = execm python rclpst.py
|
||||||
|
|
||||||
application/ogg = execm python rclaudio
|
application/ogg = execm python rclaudio
|
||||||
|
|
||||||
application/x-awk = internal text/plain
|
application/x-awk = internal text/plain
|
||||||
|
|||||||
@ -45,6 +45,7 @@ LIBWPD=${RCLDEPS}libwpd/libwpd-0.10.0/
|
|||||||
LIBREVENGE=${RCLDEPS}libwpd/librevenge-0.0.1.jfd/
|
LIBREVENGE=${RCLDEPS}libwpd/librevenge-0.0.1.jfd/
|
||||||
CHM=${RCLDEPS}pychm
|
CHM=${RCLDEPS}pychm
|
||||||
MISC=${RCLDEPS}misc
|
MISC=${RCLDEPS}misc
|
||||||
|
LIBPFF=${RCLDEPS}pffinstall
|
||||||
|
|
||||||
# Where to copy the Qt Dlls from:
|
# Where to copy the Qt Dlls from:
|
||||||
QTBIN=C:/Qt/Qt5.8.0/5.8/mingw53_32/bin
|
QTBIN=C:/Qt/Qt5.8.0/5.8/mingw53_32/bin
|
||||||
@ -261,6 +262,13 @@ copychm()
|
|||||||
cp -rp $CHM/chm $DEST || fatal "can't copy pychm"
|
cp -rp $CHM/chm $DEST || fatal "can't copy pychm"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
copypff()
|
||||||
|
{
|
||||||
|
DEST=$FILTERS
|
||||||
|
cp -rp $LIBPFF $DEST || fatal "can't copy pffinstall"
|
||||||
|
chkcp $LIBPFF/mingw32/bin/pffexport.exe $DEST/pffinstall/mingw32
|
||||||
|
}
|
||||||
|
|
||||||
for d in doc examples filters images translations; do
|
for d in doc examples filters images translations; do
|
||||||
test -d $DESTDIR/Share/$d || mkdir -p $DESTDIR/Share/$d || \
|
test -d $DESTDIR/Share/$d || mkdir -p $DESTDIR/Share/$d || \
|
||||||
fatal mkdir $d failed
|
fatal mkdir $d failed
|
||||||
@ -293,4 +301,5 @@ copyepub
|
|||||||
#copypyexiv2
|
#copypyexiv2
|
||||||
copywpd
|
copywpd
|
||||||
#copychm
|
#copychm
|
||||||
|
copypff
|
||||||
copypython
|
copypython
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user