diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index 0b93f888..a5084af8 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -1369,7 +1369,21 @@ bool RclConfig::getUncompressor(const string &mtype, vector& cmd) const if (stringlowercmp("uncompress", *it++)) return false; cmd.clear(); - cmd.push_back(findFilter(*it++)); + cmd.push_back(findFilter(*it)); + + // Special-case python and perl on windows: we need to also locate the + // first argument which is the script name "python somescript.py". + // On Unix, thanks to #!, we usually just run "somescript.py", but need + // the same change if we ever want to use the same cmdling as windows + if (!stringlowercmp("python", *it) || !stringlowercmp("perl", *it)) { + it++; + if (tokens.size() < 3) { + LOGERR(("getUncpressor: python/perl cmd: no script?. [%s]\n", mtype.c_str())); + } else { + *it = findFilter(*it); + } + } + cmd.insert(cmd.end(), it, tokens.end()); return true; } diff --git a/src/filters/rclmpdf.py b/src/filters/rclmpdf.py index 27f221ec..e78d76b9 100755 --- a/src/filters/rclmpdf.py +++ b/src/filters/rclmpdf.py @@ -70,6 +70,7 @@ class PDFExtractor: self.pdftk = "" self.em = em self.attextractdone = False + self.attachlist = [] # Extract all attachments if any into temporary directory def extractAttach(self): @@ -79,7 +80,8 @@ class PDFExtractor: global tmpdir if not tmpdir or not self.pdftk: - return False + # no big deal + return True try: vacuumdir(tmpdir) @@ -89,7 +91,8 @@ class PDFExtractor: return True except Exception, e: self.em.rclog("extractAttach: failed: %s" % e) - return False + # Return true anyway, pdf attachments are no big deal + return True def extractone(self, ipath): #self.em.rclog("extractone: [%s]" % ipath) @@ -186,6 +189,8 @@ class PDFExtractor: if not self.pdftotext: self.pdftotext = rclexecm.which("pdftotext") + if not self.pdftotext: + self.pdftotext = rclexecm.which("poppler/pdftotext") if not self.pdftotext: print("RECFILTERROR HELPERNOTFOUND pdftotext") sys.exit(1); @@ -209,7 +214,8 @@ class PDFExtractor: # eofnext error instead of waiting for actual eof, # which avoids a bug in recollindex up to 1.20 self.extractAttach() - + else: + self.attextractdone = True return True def getipath(self, params): @@ -218,8 +224,8 @@ class PDFExtractor: return (ok, data, ipath, eof) def getnext(self, params): + # self.em.rclog("getnext: current %d" % self.currentindex) if self.currentindex == -1: - #self.em.rclog("getnext: current -1") self.currentindex = 0 return self._selfdoc() else: diff --git a/src/filters/rcluncomp.py b/src/filters/rcluncomp.py index a354fe78..438dab62 100644 --- a/src/filters/rcluncomp.py +++ b/src/filters/rcluncomp.py @@ -9,30 +9,44 @@ import platform import subprocess import glob -sysplat = platform.system() +ftrace = sys.stderr +#ftrace = open("C:/Users/Bill/log-uncomp.txt", "w") +sysplat = platform.system() if sysplat != "Windows": - print("rcluncomp.py: only for Windows", file = sys.stderr) + print("rcluncomp.py: only for Windows", file = ftrace) + sys.exit(1) + +try: + import msvcrt + msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) +except Exception as err: + print("setmode binary failed: %s" % str(err), file = ftrace) sevenz = rclexecm.which("7z") if not sevenz: print("rcluncomp.py: can't find 7z exe. Maybe set recollhelperpath " \ - "in recoll.conf ?", file=sys.stderr) - sys.exit(1) -#print("rcluncomp.py: 7z is %s" % sevenz, file = sys.stderr) + "in recoll.conf ?", file=ftrace) + sys.exit(2) # Params: uncompression program, input file name, temp directory. # We ignore the uncomp program, and always use 7z on Windows infile = sys.argv[2] outdir = sys.argv[3] +# print("rcluncomp.py infile [%s], outdir [%s]" % (infile, outdir), file = ftrace) # There is apparently no way to suppress 7z output. Hopefully the # possible deadlock described by the subprocess module doc can't occur # here because there is little data printed. AFAIK nothing goes to stderr anyway -subprocess.check_output([sevenz, "e", "-bd", "-y", "-o" + outdir, infile], - stderr = subprocess.PIPE) +try: + cmd = [sevenz, "e", "-bd", "-y", "-o" + outdir, infile] + subprocess.check_output(cmd, stderr = subprocess.PIPE) + outputname = glob.glob(os.path.join(outdir, "*")) + # There should be only one file in there.. + print(outputname[0]) +except Exception as err: + print("%s" % (str(err),), file = ftrace) + sys.exit(4) -outputname = glob.glob(os.path.join(outdir, "*")) -# There should be only one file in there.. -print(outputname[0]) +sys.exit(0) diff --git a/src/utils/pathut.cpp b/src/utils/pathut.cpp index 82e19ef7..a132a8e0 100644 --- a/src/utils/pathut.cpp +++ b/src/utils/pathut.cpp @@ -145,6 +145,7 @@ string path_wingettempfilename(TCHAR *pref) // Windows will have created a temp file, we delete it. string filename = path_tchartoutf8(buf); unlink(filename.c_str()); + path_slashize(filename); return filename; } #endif diff --git a/src/windows/mimeconf b/src/windows/mimeconf index d4d29d09..0f0e14f5 100644 --- a/src/windows/mimeconf +++ b/src/windows/mimeconf @@ -149,8 +149,8 @@ text/x-python = exec python rclpython text/x-shellscript = internal text/plain text/x-srt = internal text/plain -application/xml = execm python rclxml -text/xml = execm python rclxml +application/xml = execm python rclxml.py +text/xml = execm python rclxml.py # Using these instead of the two above would index all parameter and tag # names, attribute values etc, instead of just the text content. #application/xml = internal text/plain diff --git a/src/windows/mkinstdir.sh b/src/windows/mkinstdir.sh index e224ab80..ec920fc1 100644 --- a/src/windows/mkinstdir.sh +++ b/src/windows/mkinstdir.sh @@ -62,9 +62,9 @@ RCLS=$RCLW/build-rclstartw-${QTA}-${qtsdir}/${qtsdir}/rclstartw.exe # Needed for a VS build (which we did not ever complete because of -# missing Qt VS2015 support). Needed for unrtf -CONFIGURATION=Release -PLATFORM=Win32 +# missing Qt VS2015 support). +#CONFIGURATION=Release +#PLATFORM=Win32 ################ # Script: @@ -82,12 +82,14 @@ copyqt() { cd $DESTDIR $QTBIN/windeployqt recoll.exe + chkcp $QTBIN/libwinpthread-1.dll $DESTDIR } copyxapian() { chkcp $LIBXAPIAN $DESTDIR } + copyzlib() { chkcp $ZLIB/zlib1.dll $DESTDIR @@ -133,7 +135,8 @@ copyantiword() copyunrtf() { - bindir=$UNRTF/Windows/$PLATFORM/$CONFIGURATION +# bindir=$UNRTF/Windows/$PLATFORM/$CONFIGURATION + bindir=$UNRTF/Windows/ test -d $FILTERS/Share || mkdir -p $FILTERS/Share || exit 1 chkcp $bindir/unrtf.exe $FILTERS @@ -158,8 +161,7 @@ copyepub() copypyexiv2() { cp -rp $PYEXIV2/pyexiv2 $FILTERS - # Check - chkcp $PYEXIV2/pyexiv2/exif.py $FILTERS/pyexiv2 + chkcp $PYEXIV2/libexiv2python.pyd $FILTERS/ } copyxslt() @@ -170,10 +172,12 @@ copyxslt() copypoppler() { + test -d $FILTERS/poppler || mkdir $FILTERS/poppler || \ + fatal cant create poppler dir for f in pdftotext.exe libpoppler.dll freetype6.dll jpeg62.dll \ libpng16-16.dll zlib1.dll libtiff3.dll \ libgcc_s_dw2-1.dll libstdc++-6.dll; do - chkcp $POPPLER/bin/$f $FILTERS/ + chkcp $POPPLER/bin/$f $FILTERS/poppler done } diff --git a/src/windows/qmkrecoll/recollindex.pro b/src/windows/qmkrecoll/recollindex.pro index a1d9c264..c81be767 100644 --- a/src/windows/qmkrecoll/recollindex.pro +++ b/src/windows/qmkrecoll/recollindex.pro @@ -32,6 +32,5 @@ windows { C:/recoll/src/windows/build-librecoll-Desktop_Qt_5_5_0_MinGW_32bit-Debug/debug/librecoll.dll \ -lshlwapi -lpsapi -lkernel32 - INCLUDEPATH += ../../windows \ - C:/temp/xapian-core-1.2.8/include + INCLUDEPATH += ../../windows } diff --git a/src/windows/qmkrecoll/recollq.pro b/src/windows/qmkrecoll/recollq.pro index 3ec77d7c..60e04967 100644 --- a/src/windows/qmkrecoll/recollq.pro +++ b/src/windows/qmkrecoll/recollq.pro @@ -30,6 +30,5 @@ windows { C:/recoll/src/windows/build-librecoll-Desktop_Qt_5_5_0_MinGW_32bit-Debug/debug/librecoll.dll \ -lshlwapi -lpsapi -lkernel32 - INCLUDEPATH += ../../windows \ - C:/recolldeps/xapian/xapian-core-1.2.8/include + INCLUDEPATH += ../../windows }