Python filters beginning to work, still issues.
--HG-- branch : WINDOWSPORT
This commit is contained in:
parent
06f43c573e
commit
330c7fc30d
@ -117,23 +117,33 @@ class WordFilter:
|
|||||||
identification. Do 2 tries at most'''
|
identification. Do 2 tries at most'''
|
||||||
if self.ntry == 0:
|
if self.ntry == 0:
|
||||||
self.ntry = 1
|
self.ntry = 1
|
||||||
return (["antiword", "-t", "-i", "1", "-m", "UTF-8"],
|
cmd = rclexecm.which("antiword")
|
||||||
WordProcessData(self.em))
|
if cmd:
|
||||||
|
return ([cmd, "-t", "-i", "1", "-m", "UTF-8"],
|
||||||
|
WordProcessData(self.em))
|
||||||
|
else:
|
||||||
|
return ([],None)
|
||||||
elif self.ntry == 1:
|
elif self.ntry == 1:
|
||||||
self.ntry = 2
|
self.ntry = 2
|
||||||
# antiword failed. Check for an rtf file, or text and
|
# antiword failed. Check for an rtf file, or text and
|
||||||
# process accordingly. It the doc is actually msword, try
|
# process accordingly. It the doc is actually msword, try
|
||||||
# wvWare.
|
# wvWare.
|
||||||
mt = self.mimetype(fn)
|
mt = self.mimetype(fn)
|
||||||
|
self.em.rclog("rcldoc.py: actual MIME type %s" % mt)
|
||||||
if mt == "text/plain":
|
if mt == "text/plain":
|
||||||
return ([os.path.join(self.execdir,"rcltext")],
|
return ([python, os.path.join(self.execdir, "rcltext")],
|
||||||
WordPassData(self.em))
|
WordPassData(self.em))
|
||||||
elif mt == "text/rtf":
|
elif mt == "text/rtf":
|
||||||
return ([os.path.join(self.execdir, "rclrtf")],
|
cmd = [python, os.path.join(self.execdir, "rclrtf.py")]
|
||||||
WordPassData(self.em))
|
self.em.rclog("rcldoc.py: returning cmd %s" % cmd)
|
||||||
|
return (cmd, WordPassData(self.em))
|
||||||
elif mt == "application/msword":
|
elif mt == "application/msword":
|
||||||
return (["wvWare", "--nographics", "--charset=utf-8"],
|
cmd = rclexecm.which("wvWare")
|
||||||
WordPassData(self.em))
|
if cmd:
|
||||||
|
return ([cmd, "--nographics", "--charset=utf-8"],
|
||||||
|
WordPassData(self.em))
|
||||||
|
else:
|
||||||
|
return ([],None)
|
||||||
else:
|
else:
|
||||||
return ([],None)
|
return ([],None)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@ -49,6 +49,9 @@ class RclExecM:
|
|||||||
else:
|
else:
|
||||||
self.maxmembersize = 50 * 1024
|
self.maxmembersize = 50 * 1024
|
||||||
self.maxmembersize = self.maxmembersize * 1024
|
self.maxmembersize = self.maxmembersize * 1024
|
||||||
|
if sys.platform == "win32":
|
||||||
|
import msvcrt
|
||||||
|
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
|
||||||
|
|
||||||
def rclog(self, s, doexit = 0, exitvalue = 1):
|
def rclog(self, s, doexit = 0, exitvalue = 1):
|
||||||
print >> sys.stderr, "RCLMFILT:", self.myname, ":", s
|
print >> sys.stderr, "RCLMFILT:", self.myname, ":", s
|
||||||
@ -216,14 +219,15 @@ class Executor:
|
|||||||
We expect cmd as a list of command name + arguments'''
|
We expect cmd as a list of command name + arguments'''
|
||||||
|
|
||||||
try:
|
try:
|
||||||
proc = subprocess.Popen(cmd + [filename],
|
fullcmd = cmd + [filename]
|
||||||
|
proc = subprocess.Popen(fullcmd,
|
||||||
stdout = subprocess.PIPE)
|
stdout = subprocess.PIPE)
|
||||||
stdout = proc.stdout
|
stdout = proc.stdout
|
||||||
except subprocess.CalledProcessError as err:
|
except subprocess.CalledProcessError as err:
|
||||||
self.em.rclog("extractone: Popen() error: %s" % err)
|
self.em.rclog("extractone: Popen(%s) error: %s" % (fullcmd, err))
|
||||||
return (False, "")
|
return (False, "")
|
||||||
except OSError as err:
|
except OSError as err:
|
||||||
self.em.rclog("extractone: Popen OS error: %s" % err)
|
self.em.rclog("extractone: Popen(%s) OS error: %s" % (fullcmd, err))
|
||||||
return (False, "")
|
return (False, "")
|
||||||
|
|
||||||
for line in stdout:
|
for line in stdout:
|
||||||
@ -231,6 +235,7 @@ class Executor:
|
|||||||
|
|
||||||
proc.wait()
|
proc.wait()
|
||||||
if proc.returncode:
|
if proc.returncode:
|
||||||
|
self.em.rclog("extractone: [%s] returncode %d" % (returncode))
|
||||||
return False, postproc.wrapData()
|
return False, postproc.wrapData()
|
||||||
else:
|
else:
|
||||||
return True, postproc.wrapData()
|
return True, postproc.wrapData()
|
||||||
@ -283,12 +288,17 @@ def which(program):
|
|||||||
for ext in os.environ.get("PATHEXT", "").split(os.pathsep):
|
for ext in os.environ.get("PATHEXT", "").split(os.pathsep):
|
||||||
yield fpath + ext
|
yield fpath + ext
|
||||||
|
|
||||||
|
def path_candidates():
|
||||||
|
yield os.path.dirname(sys.argv[0])
|
||||||
|
for path in os.environ["PATH"].split(os.pathsep):
|
||||||
|
yield path
|
||||||
|
|
||||||
fpath, fname = os.path.split(program)
|
fpath, fname = os.path.split(program)
|
||||||
if fpath:
|
if fpath:
|
||||||
if is_exe(program):
|
if is_exe(program):
|
||||||
return program
|
return program
|
||||||
else:
|
else:
|
||||||
for path in os.environ["PATH"].split(os.pathsep):
|
for path in path_candidates():
|
||||||
exe_file = os.path.join(path, program)
|
exe_file = os.path.join(path, program)
|
||||||
for candidate in ext_candidates(exe_file):
|
for candidate in ext_candidates(exe_file):
|
||||||
if is_exe(candidate):
|
if is_exe(candidate):
|
||||||
|
|||||||
52
src/filters/rclrtf.py
Normal file
52
src/filters/rclrtf.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import rclexecm
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Processing the output from unrtf
|
||||||
|
class RTFProcessData:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.em = em
|
||||||
|
self.out = ""
|
||||||
|
self.gothead = 0
|
||||||
|
self.patendhead = re.compile('''</head>''')
|
||||||
|
self.patcharset = re.compile('''^<meta http-equiv=''')
|
||||||
|
|
||||||
|
# Some versions of unrtf put out a garbled charset line.
|
||||||
|
# Apart from this, we pass the data untouched.
|
||||||
|
def takeLine(self, line):
|
||||||
|
if not self.gothead:
|
||||||
|
if self.patendhead.search(line):
|
||||||
|
self.out += '<meta http-equiv="Content-Type"' + \
|
||||||
|
'content="text/html;charset=UTF-8">' + "\n"
|
||||||
|
self.out += line + "\n"
|
||||||
|
self.gothead = 1
|
||||||
|
elif not self.patcharset.search(line):
|
||||||
|
self.out += line + "\n"
|
||||||
|
else:
|
||||||
|
self.out += line + "\n"
|
||||||
|
|
||||||
|
def wrapData(self):
|
||||||
|
return self.out
|
||||||
|
|
||||||
|
class RTFFilter:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.em = em
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def getCmd(self, fn):
|
||||||
|
cmd = rclexecm.which("unrtf")
|
||||||
|
if cmd:
|
||||||
|
return ([cmd, "--nopict", "--html"], RTFProcessData(self.em))
|
||||||
|
else:
|
||||||
|
return ([],None)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
proto = rclexecm.RclExecM()
|
||||||
|
filter = RTFFilter(proto)
|
||||||
|
extract = rclexecm.Executor(proto, filter)
|
||||||
|
rclexecm.main(proto, extract)
|
||||||
@ -90,6 +90,9 @@ bool MimeHandlerExecMultiple::readDataElement(string& name, string &data)
|
|||||||
LOGERR(("MHExecMultiple: getline error\n"));
|
LOGERR(("MHExecMultiple: getline error\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LOGDEB1(("MHEM:rde: line [%s]\n", ibuf.c_str()));
|
||||||
|
|
||||||
// Empty line (end of message) ?
|
// Empty line (end of message) ?
|
||||||
if (!ibuf.compare("\n")) {
|
if (!ibuf.compare("\n")) {
|
||||||
LOGDEB(("MHExecMultiple: Got empty line\n"));
|
LOGDEB(("MHExecMultiple: Got empty line\n"));
|
||||||
@ -163,7 +166,7 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_cmd.getChildPid() < 0 && !startCmd()) {
|
if (m_cmd.getChildPid() <= 0 && !startCmd()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -658,6 +658,7 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,
|
|||||||
|
|
||||||
// Create the child process.
|
// Create the child process.
|
||||||
// Need a writable buffer for the command line, for some reason.
|
// Need a writable buffer for the command line, for some reason.
|
||||||
|
LOGDEB1(("ExecCmd:startExec: cmdline [%s]\n", cmdline.c_str()));
|
||||||
LPSTR buf = (LPSTR)malloc(cmdline.size() + 1);
|
LPSTR buf = (LPSTR)malloc(cmdline.size() + 1);
|
||||||
memcpy(buf, cmdline.c_str(), cmdline.size());
|
memcpy(buf, cmdline.c_str(), cmdline.size());
|
||||||
buf[cmdline.size()] = 0;
|
buf[cmdline.size()] = 0;
|
||||||
@ -818,7 +819,7 @@ int ExecCmd::receive(string& data, int cnt)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (cnt == 0)
|
if ((cnt == 0 && totread > 0) || (cnt > 0 && totread == cnt))
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
LOGDEB1(("ExecCmd::receive: returning %d bytes\n", totread));
|
LOGDEB1(("ExecCmd::receive: returning %d bytes\n", totread));
|
||||||
|
|||||||
86
src/windows/mkinstdir.sh
Normal file
86
src/windows/mkinstdir.sh
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
# Script to make a prototype recoll install directory from locally compiled
|
||||||
|
# software. *** Needs cygwin ***
|
||||||
|
|
||||||
|
##############
|
||||||
|
# Local values (to be adjusted)
|
||||||
|
# Target directory where we copy things.
|
||||||
|
DESTDIR=/cygdrive/c/recollinst
|
||||||
|
|
||||||
|
# Recoll src/build tree
|
||||||
|
RECOLL=/cygdrive/c/recoll/src
|
||||||
|
|
||||||
|
UNRTF=/cygdrive/c/unrtf
|
||||||
|
ANTIWORD=/cygdrive/c/recolldeps/antiword
|
||||||
|
|
||||||
|
CONFIGURATION=Debug
|
||||||
|
PLATFORM=x64
|
||||||
|
|
||||||
|
|
||||||
|
################
|
||||||
|
# Script:
|
||||||
|
|
||||||
|
FILTERS=$DESTDIR/Share/filters
|
||||||
|
|
||||||
|
fatal()
|
||||||
|
{
|
||||||
|
echo $*
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# checkcopy.
|
||||||
|
cc()
|
||||||
|
{
|
||||||
|
test -f $1 || fatal $1 does not exist
|
||||||
|
cp $1 $2 || exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
copyrecoll()
|
||||||
|
{
|
||||||
|
bindir=$RECOLL/windows/$PLATFORM/$CONFIGURATION/
|
||||||
|
|
||||||
|
cc $bindir/recollindex.exe $DESTDIR
|
||||||
|
cc $bindir/recollq.exe $DESTDIR
|
||||||
|
cc $bindir/pthreadVC2.dll $DESTDIR
|
||||||
|
|
||||||
|
cc $RECOLL/sampleconf/fields $DESTDIR/Share/examples
|
||||||
|
cc $RECOLL/sampleconf/fragbuts.xml $DESTDIR/Share/examples
|
||||||
|
cc $RECOLL/sampleconf/mimeconf $DESTDIR/Share/examples
|
||||||
|
cc $RECOLL/sampleconf/mimemap $DESTDIR/Share/examples
|
||||||
|
cc $RECOLL/sampleconf/mimeview $DESTDIR/Share/examples
|
||||||
|
cc $RECOLL/sampleconf/recoll.conf $DESTDIR/Share/examples
|
||||||
|
cc $RECOLL/sampleconf/recoll.qss $DESTDIR/Share/examples
|
||||||
|
|
||||||
|
cp $RECOLL/filters/* $FILTERS || exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
copyantiword()
|
||||||
|
{
|
||||||
|
bindir=$ANTIWORD/Win32-only/$PLATFORM/$CONFIGURATION
|
||||||
|
|
||||||
|
test -d $Filters/Resources || mkdir -p $FILTERS/Resources || exit 1
|
||||||
|
|
||||||
|
cc $bindir/antiword.exe $FILTERS
|
||||||
|
|
||||||
|
cp $ANTIWORD/Resources/* $FILTERS/Resources || exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
copyunrtf()
|
||||||
|
{
|
||||||
|
bindir=$UNRTF/Windows/$PLATFORM/$CONFIGURATION
|
||||||
|
|
||||||
|
cc $bindir/unrtf.exe $FILTERS
|
||||||
|
|
||||||
|
test -d $FILTERS/Share || mkdir -p $FILTERS/Share || exit 1
|
||||||
|
cp $UNRTF/outputs/*.conf $FILTERS/Share || exit 1
|
||||||
|
cc $UNRTF/outputs/SYMBOL.charmap $FILTERS/Share
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
test -d $DESTDIR || mkdir -p $DESTDIR || exit 1
|
||||||
|
test -d $DESTDIR/Share/examples || mkdir -p $DESTDIR/Share/examples || exit 1
|
||||||
|
test -d $FILTERS || mkdir -p $FILTERS || exit 1
|
||||||
|
copyrecoll
|
||||||
|
copyunrtf
|
||||||
|
copyantiword
|
||||||
Loading…
x
Reference in New Issue
Block a user