diff --git a/src/filters/rcldoc.py b/src/filters/rcldoc.py
index 8fb7a31a..0e67239c 100755
--- a/src/filters/rcldoc.py
+++ b/src/filters/rcldoc.py
@@ -117,23 +117,33 @@ class WordFilter:
identification. Do 2 tries at most'''
if self.ntry == 0:
self.ntry = 1
- return (["antiword", "-t", "-i", "1", "-m", "UTF-8"],
- WordProcessData(self.em))
+ cmd = rclexecm.which("antiword")
+ if cmd:
+ return ([cmd, "-t", "-i", "1", "-m", "UTF-8"],
+ WordProcessData(self.em))
+ else:
+ return ([],None)
elif self.ntry == 1:
self.ntry = 2
# antiword failed. Check for an rtf file, or text and
# process accordingly. It the doc is actually msword, try
# wvWare.
mt = self.mimetype(fn)
+ self.em.rclog("rcldoc.py: actual MIME type %s" % mt)
if mt == "text/plain":
- return ([os.path.join(self.execdir,"rcltext")],
+ return ([python, os.path.join(self.execdir, "rcltext")],
WordPassData(self.em))
elif mt == "text/rtf":
- return ([os.path.join(self.execdir, "rclrtf")],
- WordPassData(self.em))
+ cmd = [python, os.path.join(self.execdir, "rclrtf.py")]
+ self.em.rclog("rcldoc.py: returning cmd %s" % cmd)
+ return (cmd, WordPassData(self.em))
elif mt == "application/msword":
- return (["wvWare", "--nographics", "--charset=utf-8"],
- WordPassData(self.em))
+ cmd = rclexecm.which("wvWare")
+ if cmd:
+ return ([cmd, "--nographics", "--charset=utf-8"],
+ WordPassData(self.em))
+ else:
+ return ([],None)
else:
return ([],None)
else:
diff --git a/src/filters/rclexecm.py b/src/filters/rclexecm.py
index ebb659df..65a4e119 100644
--- a/src/filters/rclexecm.py
+++ b/src/filters/rclexecm.py
@@ -49,6 +49,9 @@ class RclExecM:
else:
self.maxmembersize = 50 * 1024
self.maxmembersize = self.maxmembersize * 1024
+ if sys.platform == "win32":
+ import msvcrt
+ msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
def rclog(self, s, doexit = 0, exitvalue = 1):
print >> sys.stderr, "RCLMFILT:", self.myname, ":", s
@@ -216,14 +219,15 @@ class Executor:
We expect cmd as a list of command name + arguments'''
try:
- proc = subprocess.Popen(cmd + [filename],
+ fullcmd = cmd + [filename]
+ proc = subprocess.Popen(fullcmd,
stdout = subprocess.PIPE)
stdout = proc.stdout
except subprocess.CalledProcessError as err:
- self.em.rclog("extractone: Popen() error: %s" % err)
+ self.em.rclog("extractone: Popen(%s) error: %s" % (fullcmd, err))
return (False, "")
except OSError as err:
- self.em.rclog("extractone: Popen OS error: %s" % err)
+ self.em.rclog("extractone: Popen(%s) OS error: %s" % (fullcmd, err))
return (False, "")
for line in stdout:
@@ -231,6 +235,7 @@ class Executor:
proc.wait()
if proc.returncode:
+ self.em.rclog("extractone: [%s] returncode %d" % (returncode))
return False, postproc.wrapData()
else:
return True, postproc.wrapData()
@@ -283,12 +288,17 @@ def which(program):
for ext in os.environ.get("PATHEXT", "").split(os.pathsep):
yield fpath + ext
+ def path_candidates():
+ yield os.path.dirname(sys.argv[0])
+ for path in os.environ["PATH"].split(os.pathsep):
+ yield path
+
fpath, fname = os.path.split(program)
if fpath:
if is_exe(program):
return program
else:
- for path in os.environ["PATH"].split(os.pathsep):
+ for path in path_candidates():
exe_file = os.path.join(path, program)
for candidate in ext_candidates(exe_file):
if is_exe(candidate):
diff --git a/src/filters/rclrtf.py b/src/filters/rclrtf.py
new file mode 100644
index 00000000..bc560380
--- /dev/null
+++ b/src/filters/rclrtf.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+
+import rclexecm
+import re
+import sys
+import os
+
+# Processing the output from unrtf
+class RTFProcessData:
+ def __init__(self, em):
+ self.em = em
+ self.out = ""
+ self.gothead = 0
+ self.patendhead = re.compile('''''')
+ self.patcharset = re.compile('''^' + "\n"
+ self.out += line + "\n"
+ self.gothead = 1
+ elif not self.patcharset.search(line):
+ self.out += line + "\n"
+ else:
+ self.out += line + "\n"
+
+ def wrapData(self):
+ return self.out
+
+class RTFFilter:
+ def __init__(self, em):
+ self.em = em
+
+ def reset(self):
+ pass
+
+ def getCmd(self, fn):
+ cmd = rclexecm.which("unrtf")
+ if cmd:
+ return ([cmd, "--nopict", "--html"], RTFProcessData(self.em))
+ else:
+ return ([],None)
+
+if __name__ == '__main__':
+ proto = rclexecm.RclExecM()
+ filter = RTFFilter(proto)
+ extract = rclexecm.Executor(proto, filter)
+ rclexecm.main(proto, extract)
diff --git a/src/internfile/mh_execm.cpp b/src/internfile/mh_execm.cpp
index 8a7a82c4..6c47ab8c 100644
--- a/src/internfile/mh_execm.cpp
+++ b/src/internfile/mh_execm.cpp
@@ -90,6 +90,9 @@ bool MimeHandlerExecMultiple::readDataElement(string& name, string &data)
LOGERR(("MHExecMultiple: getline error\n"));
return false;
}
+
+ LOGDEB1(("MHEM:rde: line [%s]\n", ibuf.c_str()));
+
// Empty line (end of message) ?
if (!ibuf.compare("\n")) {
LOGDEB(("MHExecMultiple: Got empty line\n"));
@@ -163,7 +166,7 @@ bool MimeHandlerExecMultiple::next_document()
return false;
}
- if (m_cmd.getChildPid() < 0 && !startCmd()) {
+ if (m_cmd.getChildPid() <= 0 && !startCmd()) {
return false;
}
diff --git a/src/windows/execmd_w.cpp b/src/windows/execmd_w.cpp
index 5aff3b62..becd503c 100644
--- a/src/windows/execmd_w.cpp
+++ b/src/windows/execmd_w.cpp
@@ -658,6 +658,7 @@ int ExecCmd::startExec(const string &cmd, const vector& args,
// Create the child process.
// Need a writable buffer for the command line, for some reason.
+ LOGDEB1(("ExecCmd:startExec: cmdline [%s]\n", cmdline.c_str()));
LPSTR buf = (LPSTR)malloc(cmdline.size() + 1);
memcpy(buf, cmdline.c_str(), cmdline.size());
buf[cmdline.size()] = 0;
@@ -818,7 +819,7 @@ int ExecCmd::receive(string& data, int cnt)
break;
}
}
- if (cnt == 0)
+ if ((cnt == 0 && totread > 0) || (cnt > 0 && totread == cnt))
break;
}
LOGDEB1(("ExecCmd::receive: returning %d bytes\n", totread));
diff --git a/src/windows/mkinstdir.sh b/src/windows/mkinstdir.sh
new file mode 100644
index 00000000..34d41607
--- /dev/null
+++ b/src/windows/mkinstdir.sh
@@ -0,0 +1,86 @@
+#!/bin/sh
+
+# Script to make a prototype recoll install directory from locally compiled
+# software. *** Needs cygwin ***
+
+##############
+# Local values (to be adjusted)
+# Target directory where we copy things.
+DESTDIR=/cygdrive/c/recollinst
+
+# Recoll src/build tree
+RECOLL=/cygdrive/c/recoll/src
+
+UNRTF=/cygdrive/c/unrtf
+ANTIWORD=/cygdrive/c/recolldeps/antiword
+
+CONFIGURATION=Debug
+PLATFORM=x64
+
+
+################
+# Script:
+
+FILTERS=$DESTDIR/Share/filters
+
+fatal()
+{
+ echo $*
+ exit 1
+}
+
+# checkcopy.
+cc()
+{
+ test -f $1 || fatal $1 does not exist
+ cp $1 $2 || exit 1
+}
+
+copyrecoll()
+{
+ bindir=$RECOLL/windows/$PLATFORM/$CONFIGURATION/
+
+ cc $bindir/recollindex.exe $DESTDIR
+ cc $bindir/recollq.exe $DESTDIR
+ cc $bindir/pthreadVC2.dll $DESTDIR
+
+ cc $RECOLL/sampleconf/fields $DESTDIR/Share/examples
+ cc $RECOLL/sampleconf/fragbuts.xml $DESTDIR/Share/examples
+ cc $RECOLL/sampleconf/mimeconf $DESTDIR/Share/examples
+ cc $RECOLL/sampleconf/mimemap $DESTDIR/Share/examples
+ cc $RECOLL/sampleconf/mimeview $DESTDIR/Share/examples
+ cc $RECOLL/sampleconf/recoll.conf $DESTDIR/Share/examples
+ cc $RECOLL/sampleconf/recoll.qss $DESTDIR/Share/examples
+
+ cp $RECOLL/filters/* $FILTERS || exit 1
+}
+
+copyantiword()
+{
+ bindir=$ANTIWORD/Win32-only/$PLATFORM/$CONFIGURATION
+
+ test -d $Filters/Resources || mkdir -p $FILTERS/Resources || exit 1
+
+ cc $bindir/antiword.exe $FILTERS
+
+ cp $ANTIWORD/Resources/* $FILTERS/Resources || exit 1
+}
+
+copyunrtf()
+{
+ bindir=$UNRTF/Windows/$PLATFORM/$CONFIGURATION
+
+ cc $bindir/unrtf.exe $FILTERS
+
+ test -d $FILTERS/Share || mkdir -p $FILTERS/Share || exit 1
+ cp $UNRTF/outputs/*.conf $FILTERS/Share || exit 1
+ cc $UNRTF/outputs/SYMBOL.charmap $FILTERS/Share
+}
+
+
+test -d $DESTDIR || mkdir -p $DESTDIR || exit 1
+test -d $DESTDIR/Share/examples || mkdir -p $DESTDIR/Share/examples || exit 1
+test -d $FILTERS || mkdir -p $FILTERS || exit 1
+copyrecoll
+copyunrtf
+copyantiword