Python filters beginning to work, still issues.
--HG-- branch : WINDOWSPORT
This commit is contained in:
parent
06f43c573e
commit
330c7fc30d
@ -117,23 +117,33 @@ class WordFilter:
|
||||
identification. Do 2 tries at most'''
|
||||
if self.ntry == 0:
|
||||
self.ntry = 1
|
||||
return (["antiword", "-t", "-i", "1", "-m", "UTF-8"],
|
||||
WordProcessData(self.em))
|
||||
cmd = rclexecm.which("antiword")
|
||||
if cmd:
|
||||
return ([cmd, "-t", "-i", "1", "-m", "UTF-8"],
|
||||
WordProcessData(self.em))
|
||||
else:
|
||||
return ([],None)
|
||||
elif self.ntry == 1:
|
||||
self.ntry = 2
|
||||
# antiword failed. Check for an rtf file, or text and
|
||||
# process accordingly. It the doc is actually msword, try
|
||||
# wvWare.
|
||||
mt = self.mimetype(fn)
|
||||
self.em.rclog("rcldoc.py: actual MIME type %s" % mt)
|
||||
if mt == "text/plain":
|
||||
return ([os.path.join(self.execdir,"rcltext")],
|
||||
return ([python, os.path.join(self.execdir, "rcltext")],
|
||||
WordPassData(self.em))
|
||||
elif mt == "text/rtf":
|
||||
return ([os.path.join(self.execdir, "rclrtf")],
|
||||
WordPassData(self.em))
|
||||
cmd = [python, os.path.join(self.execdir, "rclrtf.py")]
|
||||
self.em.rclog("rcldoc.py: returning cmd %s" % cmd)
|
||||
return (cmd, WordPassData(self.em))
|
||||
elif mt == "application/msword":
|
||||
return (["wvWare", "--nographics", "--charset=utf-8"],
|
||||
WordPassData(self.em))
|
||||
cmd = rclexecm.which("wvWare")
|
||||
if cmd:
|
||||
return ([cmd, "--nographics", "--charset=utf-8"],
|
||||
WordPassData(self.em))
|
||||
else:
|
||||
return ([],None)
|
||||
else:
|
||||
return ([],None)
|
||||
else:
|
||||
|
||||
@ -49,6 +49,9 @@ class RclExecM:
|
||||
else:
|
||||
self.maxmembersize = 50 * 1024
|
||||
self.maxmembersize = self.maxmembersize * 1024
|
||||
if sys.platform == "win32":
|
||||
import msvcrt
|
||||
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
|
||||
|
||||
def rclog(self, s, doexit = 0, exitvalue = 1):
|
||||
print >> sys.stderr, "RCLMFILT:", self.myname, ":", s
|
||||
@ -216,14 +219,15 @@ class Executor:
|
||||
We expect cmd as a list of command name + arguments'''
|
||||
|
||||
try:
|
||||
proc = subprocess.Popen(cmd + [filename],
|
||||
fullcmd = cmd + [filename]
|
||||
proc = subprocess.Popen(fullcmd,
|
||||
stdout = subprocess.PIPE)
|
||||
stdout = proc.stdout
|
||||
except subprocess.CalledProcessError as err:
|
||||
self.em.rclog("extractone: Popen() error: %s" % err)
|
||||
self.em.rclog("extractone: Popen(%s) error: %s" % (fullcmd, err))
|
||||
return (False, "")
|
||||
except OSError as err:
|
||||
self.em.rclog("extractone: Popen OS error: %s" % err)
|
||||
self.em.rclog("extractone: Popen(%s) OS error: %s" % (fullcmd, err))
|
||||
return (False, "")
|
||||
|
||||
for line in stdout:
|
||||
@ -231,6 +235,7 @@ class Executor:
|
||||
|
||||
proc.wait()
|
||||
if proc.returncode:
|
||||
self.em.rclog("extractone: [%s] returncode %d" % (returncode))
|
||||
return False, postproc.wrapData()
|
||||
else:
|
||||
return True, postproc.wrapData()
|
||||
@ -283,12 +288,17 @@ def which(program):
|
||||
for ext in os.environ.get("PATHEXT", "").split(os.pathsep):
|
||||
yield fpath + ext
|
||||
|
||||
def path_candidates():
|
||||
yield os.path.dirname(sys.argv[0])
|
||||
for path in os.environ["PATH"].split(os.pathsep):
|
||||
yield path
|
||||
|
||||
fpath, fname = os.path.split(program)
|
||||
if fpath:
|
||||
if is_exe(program):
|
||||
return program
|
||||
else:
|
||||
for path in os.environ["PATH"].split(os.pathsep):
|
||||
for path in path_candidates():
|
||||
exe_file = os.path.join(path, program)
|
||||
for candidate in ext_candidates(exe_file):
|
||||
if is_exe(candidate):
|
||||
|
||||
52
src/filters/rclrtf.py
Normal file
52
src/filters/rclrtf.py
Normal file
@ -0,0 +1,52 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import rclexecm
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Processing the output from unrtf
|
||||
class RTFProcessData:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.out = ""
|
||||
self.gothead = 0
|
||||
self.patendhead = re.compile('''</head>''')
|
||||
self.patcharset = re.compile('''^<meta http-equiv=''')
|
||||
|
||||
# Some versions of unrtf put out a garbled charset line.
|
||||
# Apart from this, we pass the data untouched.
|
||||
def takeLine(self, line):
|
||||
if not self.gothead:
|
||||
if self.patendhead.search(line):
|
||||
self.out += '<meta http-equiv="Content-Type"' + \
|
||||
'content="text/html;charset=UTF-8">' + "\n"
|
||||
self.out += line + "\n"
|
||||
self.gothead = 1
|
||||
elif not self.patcharset.search(line):
|
||||
self.out += line + "\n"
|
||||
else:
|
||||
self.out += line + "\n"
|
||||
|
||||
def wrapData(self):
|
||||
return self.out
|
||||
|
||||
class RTFFilter:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
|
||||
def reset(self):
|
||||
pass
|
||||
|
||||
def getCmd(self, fn):
|
||||
cmd = rclexecm.which("unrtf")
|
||||
if cmd:
|
||||
return ([cmd, "--nopict", "--html"], RTFProcessData(self.em))
|
||||
else:
|
||||
return ([],None)
|
||||
|
||||
if __name__ == '__main__':
|
||||
proto = rclexecm.RclExecM()
|
||||
filter = RTFFilter(proto)
|
||||
extract = rclexecm.Executor(proto, filter)
|
||||
rclexecm.main(proto, extract)
|
||||
@ -90,6 +90,9 @@ bool MimeHandlerExecMultiple::readDataElement(string& name, string &data)
|
||||
LOGERR(("MHExecMultiple: getline error\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
LOGDEB1(("MHEM:rde: line [%s]\n", ibuf.c_str()));
|
||||
|
||||
// Empty line (end of message) ?
|
||||
if (!ibuf.compare("\n")) {
|
||||
LOGDEB(("MHExecMultiple: Got empty line\n"));
|
||||
@ -163,7 +166,7 @@ bool MimeHandlerExecMultiple::next_document()
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_cmd.getChildPid() < 0 && !startCmd()) {
|
||||
if (m_cmd.getChildPid() <= 0 && !startCmd()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -658,6 +658,7 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,
|
||||
|
||||
// Create the child process.
|
||||
// Need a writable buffer for the command line, for some reason.
|
||||
LOGDEB1(("ExecCmd:startExec: cmdline [%s]\n", cmdline.c_str()));
|
||||
LPSTR buf = (LPSTR)malloc(cmdline.size() + 1);
|
||||
memcpy(buf, cmdline.c_str(), cmdline.size());
|
||||
buf[cmdline.size()] = 0;
|
||||
@ -818,7 +819,7 @@ int ExecCmd::receive(string& data, int cnt)
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (cnt == 0)
|
||||
if ((cnt == 0 && totread > 0) || (cnt > 0 && totread == cnt))
|
||||
break;
|
||||
}
|
||||
LOGDEB1(("ExecCmd::receive: returning %d bytes\n", totread));
|
||||
|
||||
86
src/windows/mkinstdir.sh
Normal file
86
src/windows/mkinstdir.sh
Normal file
@ -0,0 +1,86 @@
|
||||
#!/bin/sh
|
||||
|
||||
# Script to make a prototype recoll install directory from locally compiled
|
||||
# software. *** Needs cygwin ***
|
||||
|
||||
##############
|
||||
# Local values (to be adjusted)
|
||||
# Target directory where we copy things.
|
||||
DESTDIR=/cygdrive/c/recollinst
|
||||
|
||||
# Recoll src/build tree
|
||||
RECOLL=/cygdrive/c/recoll/src
|
||||
|
||||
UNRTF=/cygdrive/c/unrtf
|
||||
ANTIWORD=/cygdrive/c/recolldeps/antiword
|
||||
|
||||
CONFIGURATION=Debug
|
||||
PLATFORM=x64
|
||||
|
||||
|
||||
################
|
||||
# Script:
|
||||
|
||||
FILTERS=$DESTDIR/Share/filters
|
||||
|
||||
fatal()
|
||||
{
|
||||
echo $*
|
||||
exit 1
|
||||
}
|
||||
|
||||
# checkcopy.
|
||||
cc()
|
||||
{
|
||||
test -f $1 || fatal $1 does not exist
|
||||
cp $1 $2 || exit 1
|
||||
}
|
||||
|
||||
copyrecoll()
|
||||
{
|
||||
bindir=$RECOLL/windows/$PLATFORM/$CONFIGURATION/
|
||||
|
||||
cc $bindir/recollindex.exe $DESTDIR
|
||||
cc $bindir/recollq.exe $DESTDIR
|
||||
cc $bindir/pthreadVC2.dll $DESTDIR
|
||||
|
||||
cc $RECOLL/sampleconf/fields $DESTDIR/Share/examples
|
||||
cc $RECOLL/sampleconf/fragbuts.xml $DESTDIR/Share/examples
|
||||
cc $RECOLL/sampleconf/mimeconf $DESTDIR/Share/examples
|
||||
cc $RECOLL/sampleconf/mimemap $DESTDIR/Share/examples
|
||||
cc $RECOLL/sampleconf/mimeview $DESTDIR/Share/examples
|
||||
cc $RECOLL/sampleconf/recoll.conf $DESTDIR/Share/examples
|
||||
cc $RECOLL/sampleconf/recoll.qss $DESTDIR/Share/examples
|
||||
|
||||
cp $RECOLL/filters/* $FILTERS || exit 1
|
||||
}
|
||||
|
||||
copyantiword()
|
||||
{
|
||||
bindir=$ANTIWORD/Win32-only/$PLATFORM/$CONFIGURATION
|
||||
|
||||
test -d $Filters/Resources || mkdir -p $FILTERS/Resources || exit 1
|
||||
|
||||
cc $bindir/antiword.exe $FILTERS
|
||||
|
||||
cp $ANTIWORD/Resources/* $FILTERS/Resources || exit 1
|
||||
}
|
||||
|
||||
copyunrtf()
|
||||
{
|
||||
bindir=$UNRTF/Windows/$PLATFORM/$CONFIGURATION
|
||||
|
||||
cc $bindir/unrtf.exe $FILTERS
|
||||
|
||||
test -d $FILTERS/Share || mkdir -p $FILTERS/Share || exit 1
|
||||
cp $UNRTF/outputs/*.conf $FILTERS/Share || exit 1
|
||||
cc $UNRTF/outputs/SYMBOL.charmap $FILTERS/Share
|
||||
}
|
||||
|
||||
|
||||
test -d $DESTDIR || mkdir -p $DESTDIR || exit 1
|
||||
test -d $DESTDIR/Share/examples || mkdir -p $DESTDIR/Share/examples || exit 1
|
||||
test -d $FILTERS || mkdir -p $FILTERS || exit 1
|
||||
copyrecoll
|
||||
copyunrtf
|
||||
copyantiword
|
||||
Loading…
x
Reference in New Issue
Block a user