Get uncompression to work and fix a few other issues

This commit is contained in:
Jean-Francois Dockes 2015-10-13 16:48:16 +02:00
parent a02a611694
commit 8324f09d19
8 changed files with 65 additions and 28 deletions

View File

@ -1369,7 +1369,21 @@ bool RclConfig::getUncompressor(const string &mtype, vector<string>& cmd) const
if (stringlowercmp("uncompress", *it++)) if (stringlowercmp("uncompress", *it++))
return false; return false;
cmd.clear(); cmd.clear();
cmd.push_back(findFilter(*it++)); cmd.push_back(findFilter(*it));
// Special-case python and perl on windows: we need to also locate the
// first argument which is the script name "python somescript.py".
// On Unix, thanks to #!, we usually just run "somescript.py", but need
// the same change if we ever want to use the same cmdling as windows
if (!stringlowercmp("python", *it) || !stringlowercmp("perl", *it)) {
it++;
if (tokens.size() < 3) {
LOGERR(("getUncpressor: python/perl cmd: no script?. [%s]\n", mtype.c_str()));
} else {
*it = findFilter(*it);
}
}
cmd.insert(cmd.end(), it, tokens.end()); cmd.insert(cmd.end(), it, tokens.end());
return true; return true;
} }

View File

@ -70,6 +70,7 @@ class PDFExtractor:
self.pdftk = "" self.pdftk = ""
self.em = em self.em = em
self.attextractdone = False self.attextractdone = False
self.attachlist = []
# Extract all attachments if any into temporary directory # Extract all attachments if any into temporary directory
def extractAttach(self): def extractAttach(self):
@ -79,7 +80,8 @@ class PDFExtractor:
global tmpdir global tmpdir
if not tmpdir or not self.pdftk: if not tmpdir or not self.pdftk:
return False # no big deal
return True
try: try:
vacuumdir(tmpdir) vacuumdir(tmpdir)
@ -89,7 +91,8 @@ class PDFExtractor:
return True return True
except Exception, e: except Exception, e:
self.em.rclog("extractAttach: failed: %s" % e) self.em.rclog("extractAttach: failed: %s" % e)
return False # Return true anyway, pdf attachments are no big deal
return True
def extractone(self, ipath): def extractone(self, ipath):
#self.em.rclog("extractone: [%s]" % ipath) #self.em.rclog("extractone: [%s]" % ipath)
@ -186,6 +189,8 @@ class PDFExtractor:
if not self.pdftotext: if not self.pdftotext:
self.pdftotext = rclexecm.which("pdftotext") self.pdftotext = rclexecm.which("pdftotext")
if not self.pdftotext:
self.pdftotext = rclexecm.which("poppler/pdftotext")
if not self.pdftotext: if not self.pdftotext:
print("RECFILTERROR HELPERNOTFOUND pdftotext") print("RECFILTERROR HELPERNOTFOUND pdftotext")
sys.exit(1); sys.exit(1);
@ -209,7 +214,8 @@ class PDFExtractor:
# eofnext error instead of waiting for actual eof, # eofnext error instead of waiting for actual eof,
# which avoids a bug in recollindex up to 1.20 # which avoids a bug in recollindex up to 1.20
self.extractAttach() self.extractAttach()
else:
self.attextractdone = True
return True return True
def getipath(self, params): def getipath(self, params):
@ -218,8 +224,8 @@ class PDFExtractor:
return (ok, data, ipath, eof) return (ok, data, ipath, eof)
def getnext(self, params): def getnext(self, params):
# self.em.rclog("getnext: current %d" % self.currentindex)
if self.currentindex == -1: if self.currentindex == -1:
#self.em.rclog("getnext: current -1")
self.currentindex = 0 self.currentindex = 0
return self._selfdoc() return self._selfdoc()
else: else:

View File

@ -9,30 +9,44 @@ import platform
import subprocess import subprocess
import glob import glob
sysplat = platform.system() ftrace = sys.stderr
#ftrace = open("C:/Users/Bill/log-uncomp.txt", "w")
sysplat = platform.system()
if sysplat != "Windows": if sysplat != "Windows":
print("rcluncomp.py: only for Windows", file = sys.stderr) print("rcluncomp.py: only for Windows", file = ftrace)
sys.exit(1)
try:
import msvcrt
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
except Exception as err:
print("setmode binary failed: %s" % str(err), file = ftrace)
sevenz = rclexecm.which("7z") sevenz = rclexecm.which("7z")
if not sevenz: if not sevenz:
print("rcluncomp.py: can't find 7z exe. Maybe set recollhelperpath " \ print("rcluncomp.py: can't find 7z exe. Maybe set recollhelperpath " \
"in recoll.conf ?", file=sys.stderr) "in recoll.conf ?", file=ftrace)
sys.exit(1) sys.exit(2)
#print("rcluncomp.py: 7z is %s" % sevenz, file = sys.stderr)
# Params: uncompression program, input file name, temp directory. # Params: uncompression program, input file name, temp directory.
# We ignore the uncomp program, and always use 7z on Windows # We ignore the uncomp program, and always use 7z on Windows
infile = sys.argv[2] infile = sys.argv[2]
outdir = sys.argv[3] outdir = sys.argv[3]
# print("rcluncomp.py infile [%s], outdir [%s]" % (infile, outdir), file = ftrace)
# There is apparently no way to suppress 7z output. Hopefully the # There is apparently no way to suppress 7z output. Hopefully the
# possible deadlock described by the subprocess module doc can't occur # possible deadlock described by the subprocess module doc can't occur
# here because there is little data printed. AFAIK nothing goes to stderr anyway # here because there is little data printed. AFAIK nothing goes to stderr anyway
subprocess.check_output([sevenz, "e", "-bd", "-y", "-o" + outdir, infile], try:
stderr = subprocess.PIPE) cmd = [sevenz, "e", "-bd", "-y", "-o" + outdir, infile]
subprocess.check_output(cmd, stderr = subprocess.PIPE)
outputname = glob.glob(os.path.join(outdir, "*"))
# There should be only one file in there..
print(outputname[0])
except Exception as err:
print("%s" % (str(err),), file = ftrace)
sys.exit(4)
outputname = glob.glob(os.path.join(outdir, "*")) sys.exit(0)
# There should be only one file in there..
print(outputname[0])

View File

@ -145,6 +145,7 @@ string path_wingettempfilename(TCHAR *pref)
// Windows will have created a temp file, we delete it. // Windows will have created a temp file, we delete it.
string filename = path_tchartoutf8(buf); string filename = path_tchartoutf8(buf);
unlink(filename.c_str()); unlink(filename.c_str());
path_slashize(filename);
return filename; return filename;
} }
#endif #endif

View File

@ -149,8 +149,8 @@ text/x-python = exec python rclpython
text/x-shellscript = internal text/plain text/x-shellscript = internal text/plain
text/x-srt = internal text/plain text/x-srt = internal text/plain
application/xml = execm python rclxml application/xml = execm python rclxml.py
text/xml = execm python rclxml text/xml = execm python rclxml.py
# Using these instead of the two above would index all parameter and tag # Using these instead of the two above would index all parameter and tag
# names, attribute values etc, instead of just the text content. # names, attribute values etc, instead of just the text content.
#application/xml = internal text/plain #application/xml = internal text/plain

View File

@ -62,9 +62,9 @@ RCLS=$RCLW/build-rclstartw-${QTA}-${qtsdir}/${qtsdir}/rclstartw.exe
# Needed for a VS build (which we did not ever complete because of # Needed for a VS build (which we did not ever complete because of
# missing Qt VS2015 support). Needed for unrtf # missing Qt VS2015 support).
CONFIGURATION=Release #CONFIGURATION=Release
PLATFORM=Win32 #PLATFORM=Win32
################ ################
# Script: # Script:
@ -82,12 +82,14 @@ copyqt()
{ {
cd $DESTDIR cd $DESTDIR
$QTBIN/windeployqt recoll.exe $QTBIN/windeployqt recoll.exe
chkcp $QTBIN/libwinpthread-1.dll $DESTDIR
} }
copyxapian() copyxapian()
{ {
chkcp $LIBXAPIAN $DESTDIR chkcp $LIBXAPIAN $DESTDIR
} }
copyzlib() copyzlib()
{ {
chkcp $ZLIB/zlib1.dll $DESTDIR chkcp $ZLIB/zlib1.dll $DESTDIR
@ -133,7 +135,8 @@ copyantiword()
copyunrtf() copyunrtf()
{ {
bindir=$UNRTF/Windows/$PLATFORM/$CONFIGURATION # bindir=$UNRTF/Windows/$PLATFORM/$CONFIGURATION
bindir=$UNRTF/Windows/
test -d $FILTERS/Share || mkdir -p $FILTERS/Share || exit 1 test -d $FILTERS/Share || mkdir -p $FILTERS/Share || exit 1
chkcp $bindir/unrtf.exe $FILTERS chkcp $bindir/unrtf.exe $FILTERS
@ -158,8 +161,7 @@ copyepub()
copypyexiv2() copypyexiv2()
{ {
cp -rp $PYEXIV2/pyexiv2 $FILTERS cp -rp $PYEXIV2/pyexiv2 $FILTERS
# Check chkcp $PYEXIV2/libexiv2python.pyd $FILTERS/
chkcp $PYEXIV2/pyexiv2/exif.py $FILTERS/pyexiv2
} }
copyxslt() copyxslt()
@ -170,10 +172,12 @@ copyxslt()
copypoppler() copypoppler()
{ {
test -d $FILTERS/poppler || mkdir $FILTERS/poppler || \
fatal cant create poppler dir
for f in pdftotext.exe libpoppler.dll freetype6.dll jpeg62.dll \ for f in pdftotext.exe libpoppler.dll freetype6.dll jpeg62.dll \
libpng16-16.dll zlib1.dll libtiff3.dll \ libpng16-16.dll zlib1.dll libtiff3.dll \
libgcc_s_dw2-1.dll libstdc++-6.dll; do libgcc_s_dw2-1.dll libstdc++-6.dll; do
chkcp $POPPLER/bin/$f $FILTERS/ chkcp $POPPLER/bin/$f $FILTERS/poppler
done done
} }

View File

@ -32,6 +32,5 @@ windows {
C:/recoll/src/windows/build-librecoll-Desktop_Qt_5_5_0_MinGW_32bit-Debug/debug/librecoll.dll \ C:/recoll/src/windows/build-librecoll-Desktop_Qt_5_5_0_MinGW_32bit-Debug/debug/librecoll.dll \
-lshlwapi -lpsapi -lkernel32 -lshlwapi -lpsapi -lkernel32
INCLUDEPATH += ../../windows \ INCLUDEPATH += ../../windows
C:/temp/xapian-core-1.2.8/include
} }

View File

@ -30,6 +30,5 @@ windows {
C:/recoll/src/windows/build-librecoll-Desktop_Qt_5_5_0_MinGW_32bit-Debug/debug/librecoll.dll \ C:/recoll/src/windows/build-librecoll-Desktop_Qt_5_5_0_MinGW_32bit-Debug/debug/librecoll.dll \
-lshlwapi -lpsapi -lkernel32 -lshlwapi -lpsapi -lkernel32
INCLUDEPATH += ../../windows \ INCLUDEPATH += ../../windows
C:/recolldeps/xapian/xapian-core-1.2.8/include
} }