From 330c7fc30da72decc51d6066ee1b3aa7bdef45c1 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Fri, 11 Sep 2015 16:16:16 +0200 Subject: [PATCH] Python filters beginning to work, still issues. --HG-- branch : WINDOWSPORT --- src/filters/rcldoc.py | 24 ++++++++--- src/filters/rclexecm.py | 18 ++++++-- src/filters/rclrtf.py | 52 ++++++++++++++++++++++ src/internfile/mh_execm.cpp | 5 ++- src/windows/execmd_w.cpp | 3 +- src/windows/mkinstdir.sh | 86 +++++++++++++++++++++++++++++++++++++ 6 files changed, 175 insertions(+), 13 deletions(-) create mode 100644 src/filters/rclrtf.py create mode 100644 src/windows/mkinstdir.sh diff --git a/src/filters/rcldoc.py b/src/filters/rcldoc.py index 8fb7a31a..0e67239c 100755 --- a/src/filters/rcldoc.py +++ b/src/filters/rcldoc.py @@ -117,23 +117,33 @@ class WordFilter: identification. Do 2 tries at most''' if self.ntry == 0: self.ntry = 1 - return (["antiword", "-t", "-i", "1", "-m", "UTF-8"], - WordProcessData(self.em)) + cmd = rclexecm.which("antiword") + if cmd: + return ([cmd, "-t", "-i", "1", "-m", "UTF-8"], + WordProcessData(self.em)) + else: + return ([],None) elif self.ntry == 1: self.ntry = 2 # antiword failed. Check for an rtf file, or text and # process accordingly. It the doc is actually msword, try # wvWare. mt = self.mimetype(fn) + self.em.rclog("rcldoc.py: actual MIME type %s" % mt) if mt == "text/plain": - return ([os.path.join(self.execdir,"rcltext")], + return ([python, os.path.join(self.execdir, "rcltext")], WordPassData(self.em)) elif mt == "text/rtf": - return ([os.path.join(self.execdir, "rclrtf")], - WordPassData(self.em)) + cmd = [python, os.path.join(self.execdir, "rclrtf.py")] + self.em.rclog("rcldoc.py: returning cmd %s" % cmd) + return (cmd, WordPassData(self.em)) elif mt == "application/msword": - return (["wvWare", "--nographics", "--charset=utf-8"], - WordPassData(self.em)) + cmd = rclexecm.which("wvWare") + if cmd: + return ([cmd, "--nographics", "--charset=utf-8"], + WordPassData(self.em)) + else: + return ([],None) else: return ([],None) else: diff --git a/src/filters/rclexecm.py b/src/filters/rclexecm.py index ebb659df..65a4e119 100644 --- a/src/filters/rclexecm.py +++ b/src/filters/rclexecm.py @@ -49,6 +49,9 @@ class RclExecM: else: self.maxmembersize = 50 * 1024 self.maxmembersize = self.maxmembersize * 1024 + if sys.platform == "win32": + import msvcrt + msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) def rclog(self, s, doexit = 0, exitvalue = 1): print >> sys.stderr, "RCLMFILT:", self.myname, ":", s @@ -216,14 +219,15 @@ class Executor: We expect cmd as a list of command name + arguments''' try: - proc = subprocess.Popen(cmd + [filename], + fullcmd = cmd + [filename] + proc = subprocess.Popen(fullcmd, stdout = subprocess.PIPE) stdout = proc.stdout except subprocess.CalledProcessError as err: - self.em.rclog("extractone: Popen() error: %s" % err) + self.em.rclog("extractone: Popen(%s) error: %s" % (fullcmd, err)) return (False, "") except OSError as err: - self.em.rclog("extractone: Popen OS error: %s" % err) + self.em.rclog("extractone: Popen(%s) OS error: %s" % (fullcmd, err)) return (False, "") for line in stdout: @@ -231,6 +235,7 @@ class Executor: proc.wait() if proc.returncode: + self.em.rclog("extractone: [%s] returncode %d" % (returncode)) return False, postproc.wrapData() else: return True, postproc.wrapData() @@ -283,12 +288,17 @@ def which(program): for ext in os.environ.get("PATHEXT", "").split(os.pathsep): yield fpath + ext + def path_candidates(): + yield os.path.dirname(sys.argv[0]) + for path in os.environ["PATH"].split(os.pathsep): + yield path + fpath, fname = os.path.split(program) if fpath: if is_exe(program): return program else: - for path in os.environ["PATH"].split(os.pathsep): + for path in path_candidates(): exe_file = os.path.join(path, program) for candidate in ext_candidates(exe_file): if is_exe(candidate): diff --git a/src/filters/rclrtf.py b/src/filters/rclrtf.py new file mode 100644 index 00000000..bc560380 --- /dev/null +++ b/src/filters/rclrtf.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python + +import rclexecm +import re +import sys +import os + +# Processing the output from unrtf +class RTFProcessData: + def __init__(self, em): + self.em = em + self.out = "" + self.gothead = 0 + self.patendhead = re.compile('''''') + self.patcharset = re.compile('''^' + "\n" + self.out += line + "\n" + self.gothead = 1 + elif not self.patcharset.search(line): + self.out += line + "\n" + else: + self.out += line + "\n" + + def wrapData(self): + return self.out + +class RTFFilter: + def __init__(self, em): + self.em = em + + def reset(self): + pass + + def getCmd(self, fn): + cmd = rclexecm.which("unrtf") + if cmd: + return ([cmd, "--nopict", "--html"], RTFProcessData(self.em)) + else: + return ([],None) + +if __name__ == '__main__': + proto = rclexecm.RclExecM() + filter = RTFFilter(proto) + extract = rclexecm.Executor(proto, filter) + rclexecm.main(proto, extract) diff --git a/src/internfile/mh_execm.cpp b/src/internfile/mh_execm.cpp index 8a7a82c4..6c47ab8c 100644 --- a/src/internfile/mh_execm.cpp +++ b/src/internfile/mh_execm.cpp @@ -90,6 +90,9 @@ bool MimeHandlerExecMultiple::readDataElement(string& name, string &data) LOGERR(("MHExecMultiple: getline error\n")); return false; } + + LOGDEB1(("MHEM:rde: line [%s]\n", ibuf.c_str())); + // Empty line (end of message) ? if (!ibuf.compare("\n")) { LOGDEB(("MHExecMultiple: Got empty line\n")); @@ -163,7 +166,7 @@ bool MimeHandlerExecMultiple::next_document() return false; } - if (m_cmd.getChildPid() < 0 && !startCmd()) { + if (m_cmd.getChildPid() <= 0 && !startCmd()) { return false; } diff --git a/src/windows/execmd_w.cpp b/src/windows/execmd_w.cpp index 5aff3b62..becd503c 100644 --- a/src/windows/execmd_w.cpp +++ b/src/windows/execmd_w.cpp @@ -658,6 +658,7 @@ int ExecCmd::startExec(const string &cmd, const vector& args, // Create the child process. // Need a writable buffer for the command line, for some reason. + LOGDEB1(("ExecCmd:startExec: cmdline [%s]\n", cmdline.c_str())); LPSTR buf = (LPSTR)malloc(cmdline.size() + 1); memcpy(buf, cmdline.c_str(), cmdline.size()); buf[cmdline.size()] = 0; @@ -818,7 +819,7 @@ int ExecCmd::receive(string& data, int cnt) break; } } - if (cnt == 0) + if ((cnt == 0 && totread > 0) || (cnt > 0 && totread == cnt)) break; } LOGDEB1(("ExecCmd::receive: returning %d bytes\n", totread)); diff --git a/src/windows/mkinstdir.sh b/src/windows/mkinstdir.sh new file mode 100644 index 00000000..34d41607 --- /dev/null +++ b/src/windows/mkinstdir.sh @@ -0,0 +1,86 @@ +#!/bin/sh + +# Script to make a prototype recoll install directory from locally compiled +# software. *** Needs cygwin *** + +############## +# Local values (to be adjusted) +# Target directory where we copy things. +DESTDIR=/cygdrive/c/recollinst + +# Recoll src/build tree +RECOLL=/cygdrive/c/recoll/src + +UNRTF=/cygdrive/c/unrtf +ANTIWORD=/cygdrive/c/recolldeps/antiword + +CONFIGURATION=Debug +PLATFORM=x64 + + +################ +# Script: + +FILTERS=$DESTDIR/Share/filters + +fatal() +{ + echo $* + exit 1 +} + +# checkcopy. +cc() +{ + test -f $1 || fatal $1 does not exist + cp $1 $2 || exit 1 +} + +copyrecoll() +{ + bindir=$RECOLL/windows/$PLATFORM/$CONFIGURATION/ + + cc $bindir/recollindex.exe $DESTDIR + cc $bindir/recollq.exe $DESTDIR + cc $bindir/pthreadVC2.dll $DESTDIR + + cc $RECOLL/sampleconf/fields $DESTDIR/Share/examples + cc $RECOLL/sampleconf/fragbuts.xml $DESTDIR/Share/examples + cc $RECOLL/sampleconf/mimeconf $DESTDIR/Share/examples + cc $RECOLL/sampleconf/mimemap $DESTDIR/Share/examples + cc $RECOLL/sampleconf/mimeview $DESTDIR/Share/examples + cc $RECOLL/sampleconf/recoll.conf $DESTDIR/Share/examples + cc $RECOLL/sampleconf/recoll.qss $DESTDIR/Share/examples + + cp $RECOLL/filters/* $FILTERS || exit 1 +} + +copyantiword() +{ + bindir=$ANTIWORD/Win32-only/$PLATFORM/$CONFIGURATION + + test -d $Filters/Resources || mkdir -p $FILTERS/Resources || exit 1 + + cc $bindir/antiword.exe $FILTERS + + cp $ANTIWORD/Resources/* $FILTERS/Resources || exit 1 +} + +copyunrtf() +{ + bindir=$UNRTF/Windows/$PLATFORM/$CONFIGURATION + + cc $bindir/unrtf.exe $FILTERS + + test -d $FILTERS/Share || mkdir -p $FILTERS/Share || exit 1 + cp $UNRTF/outputs/*.conf $FILTERS/Share || exit 1 + cc $UNRTF/outputs/SYMBOL.charmap $FILTERS/Share +} + + +test -d $DESTDIR || mkdir -p $DESTDIR || exit 1 +test -d $DESTDIR/Share/examples || mkdir -p $DESTDIR/Share/examples || exit 1 +test -d $FILTERS || mkdir -p $FILTERS || exit 1 +copyrecoll +copyunrtf +copyantiword