Get uncompression to work and fix a few other issues
This commit is contained in:
parent
a02a611694
commit
8324f09d19
@ -1369,7 +1369,21 @@ bool RclConfig::getUncompressor(const string &mtype, vector<string>& cmd) const
|
||||
if (stringlowercmp("uncompress", *it++))
|
||||
return false;
|
||||
cmd.clear();
|
||||
cmd.push_back(findFilter(*it++));
|
||||
cmd.push_back(findFilter(*it));
|
||||
|
||||
// Special-case python and perl on windows: we need to also locate the
|
||||
// first argument which is the script name "python somescript.py".
|
||||
// On Unix, thanks to #!, we usually just run "somescript.py", but need
|
||||
// the same change if we ever want to use the same cmdling as windows
|
||||
if (!stringlowercmp("python", *it) || !stringlowercmp("perl", *it)) {
|
||||
it++;
|
||||
if (tokens.size() < 3) {
|
||||
LOGERR(("getUncpressor: python/perl cmd: no script?. [%s]\n", mtype.c_str()));
|
||||
} else {
|
||||
*it = findFilter(*it);
|
||||
}
|
||||
}
|
||||
|
||||
cmd.insert(cmd.end(), it, tokens.end());
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -70,6 +70,7 @@ class PDFExtractor:
|
||||
self.pdftk = ""
|
||||
self.em = em
|
||||
self.attextractdone = False
|
||||
self.attachlist = []
|
||||
|
||||
# Extract all attachments if any into temporary directory
|
||||
def extractAttach(self):
|
||||
@ -79,7 +80,8 @@ class PDFExtractor:
|
||||
|
||||
global tmpdir
|
||||
if not tmpdir or not self.pdftk:
|
||||
return False
|
||||
# no big deal
|
||||
return True
|
||||
|
||||
try:
|
||||
vacuumdir(tmpdir)
|
||||
@ -89,7 +91,8 @@ class PDFExtractor:
|
||||
return True
|
||||
except Exception, e:
|
||||
self.em.rclog("extractAttach: failed: %s" % e)
|
||||
return False
|
||||
# Return true anyway, pdf attachments are no big deal
|
||||
return True
|
||||
|
||||
def extractone(self, ipath):
|
||||
#self.em.rclog("extractone: [%s]" % ipath)
|
||||
@ -186,6 +189,8 @@ class PDFExtractor:
|
||||
|
||||
if not self.pdftotext:
|
||||
self.pdftotext = rclexecm.which("pdftotext")
|
||||
if not self.pdftotext:
|
||||
self.pdftotext = rclexecm.which("poppler/pdftotext")
|
||||
if not self.pdftotext:
|
||||
print("RECFILTERROR HELPERNOTFOUND pdftotext")
|
||||
sys.exit(1);
|
||||
@ -209,7 +214,8 @@ class PDFExtractor:
|
||||
# eofnext error instead of waiting for actual eof,
|
||||
# which avoids a bug in recollindex up to 1.20
|
||||
self.extractAttach()
|
||||
|
||||
else:
|
||||
self.attextractdone = True
|
||||
return True
|
||||
|
||||
def getipath(self, params):
|
||||
@ -218,8 +224,8 @@ class PDFExtractor:
|
||||
return (ok, data, ipath, eof)
|
||||
|
||||
def getnext(self, params):
|
||||
# self.em.rclog("getnext: current %d" % self.currentindex)
|
||||
if self.currentindex == -1:
|
||||
#self.em.rclog("getnext: current -1")
|
||||
self.currentindex = 0
|
||||
return self._selfdoc()
|
||||
else:
|
||||
|
||||
@ -9,30 +9,44 @@ import platform
|
||||
import subprocess
|
||||
import glob
|
||||
|
||||
sysplat = platform.system()
|
||||
ftrace = sys.stderr
|
||||
#ftrace = open("C:/Users/Bill/log-uncomp.txt", "w")
|
||||
|
||||
sysplat = platform.system()
|
||||
if sysplat != "Windows":
|
||||
print("rcluncomp.py: only for Windows", file = sys.stderr)
|
||||
print("rcluncomp.py: only for Windows", file = ftrace)
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
import msvcrt
|
||||
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
|
||||
except Exception as err:
|
||||
print("setmode binary failed: %s" % str(err), file = ftrace)
|
||||
|
||||
sevenz = rclexecm.which("7z")
|
||||
if not sevenz:
|
||||
print("rcluncomp.py: can't find 7z exe. Maybe set recollhelperpath " \
|
||||
"in recoll.conf ?", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
#print("rcluncomp.py: 7z is %s" % sevenz, file = sys.stderr)
|
||||
"in recoll.conf ?", file=ftrace)
|
||||
sys.exit(2)
|
||||
|
||||
# Params: uncompression program, input file name, temp directory.
|
||||
# We ignore the uncomp program, and always use 7z on Windows
|
||||
|
||||
infile = sys.argv[2]
|
||||
outdir = sys.argv[3]
|
||||
# print("rcluncomp.py infile [%s], outdir [%s]" % (infile, outdir), file = ftrace)
|
||||
|
||||
# There is apparently no way to suppress 7z output. Hopefully the
|
||||
# possible deadlock described by the subprocess module doc can't occur
|
||||
# here because there is little data printed. AFAIK nothing goes to stderr anyway
|
||||
subprocess.check_output([sevenz, "e", "-bd", "-y", "-o" + outdir, infile],
|
||||
stderr = subprocess.PIPE)
|
||||
try:
|
||||
cmd = [sevenz, "e", "-bd", "-y", "-o" + outdir, infile]
|
||||
subprocess.check_output(cmd, stderr = subprocess.PIPE)
|
||||
outputname = glob.glob(os.path.join(outdir, "*"))
|
||||
# There should be only one file in there..
|
||||
print(outputname[0])
|
||||
except Exception as err:
|
||||
print("%s" % (str(err),), file = ftrace)
|
||||
sys.exit(4)
|
||||
|
||||
outputname = glob.glob(os.path.join(outdir, "*"))
|
||||
# There should be only one file in there..
|
||||
print(outputname[0])
|
||||
sys.exit(0)
|
||||
|
||||
@ -145,6 +145,7 @@ string path_wingettempfilename(TCHAR *pref)
|
||||
// Windows will have created a temp file, we delete it.
|
||||
string filename = path_tchartoutf8(buf);
|
||||
unlink(filename.c_str());
|
||||
path_slashize(filename);
|
||||
return filename;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -149,8 +149,8 @@ text/x-python = exec python rclpython
|
||||
text/x-shellscript = internal text/plain
|
||||
text/x-srt = internal text/plain
|
||||
|
||||
application/xml = execm python rclxml
|
||||
text/xml = execm python rclxml
|
||||
application/xml = execm python rclxml.py
|
||||
text/xml = execm python rclxml.py
|
||||
# Using these instead of the two above would index all parameter and tag
|
||||
# names, attribute values etc, instead of just the text content.
|
||||
#application/xml = internal text/plain
|
||||
|
||||
@ -62,9 +62,9 @@ RCLS=$RCLW/build-rclstartw-${QTA}-${qtsdir}/${qtsdir}/rclstartw.exe
|
||||
|
||||
|
||||
# Needed for a VS build (which we did not ever complete because of
|
||||
# missing Qt VS2015 support). Needed for unrtf
|
||||
CONFIGURATION=Release
|
||||
PLATFORM=Win32
|
||||
# missing Qt VS2015 support).
|
||||
#CONFIGURATION=Release
|
||||
#PLATFORM=Win32
|
||||
|
||||
################
|
||||
# Script:
|
||||
@ -82,12 +82,14 @@ copyqt()
|
||||
{
|
||||
cd $DESTDIR
|
||||
$QTBIN/windeployqt recoll.exe
|
||||
chkcp $QTBIN/libwinpthread-1.dll $DESTDIR
|
||||
}
|
||||
|
||||
copyxapian()
|
||||
{
|
||||
chkcp $LIBXAPIAN $DESTDIR
|
||||
}
|
||||
|
||||
copyzlib()
|
||||
{
|
||||
chkcp $ZLIB/zlib1.dll $DESTDIR
|
||||
@ -133,7 +135,8 @@ copyantiword()
|
||||
|
||||
copyunrtf()
|
||||
{
|
||||
bindir=$UNRTF/Windows/$PLATFORM/$CONFIGURATION
|
||||
# bindir=$UNRTF/Windows/$PLATFORM/$CONFIGURATION
|
||||
bindir=$UNRTF/Windows/
|
||||
|
||||
test -d $FILTERS/Share || mkdir -p $FILTERS/Share || exit 1
|
||||
chkcp $bindir/unrtf.exe $FILTERS
|
||||
@ -158,8 +161,7 @@ copyepub()
|
||||
copypyexiv2()
|
||||
{
|
||||
cp -rp $PYEXIV2/pyexiv2 $FILTERS
|
||||
# Check
|
||||
chkcp $PYEXIV2/pyexiv2/exif.py $FILTERS/pyexiv2
|
||||
chkcp $PYEXIV2/libexiv2python.pyd $FILTERS/
|
||||
}
|
||||
|
||||
copyxslt()
|
||||
@ -170,10 +172,12 @@ copyxslt()
|
||||
|
||||
copypoppler()
|
||||
{
|
||||
test -d $FILTERS/poppler || mkdir $FILTERS/poppler || \
|
||||
fatal cant create poppler dir
|
||||
for f in pdftotext.exe libpoppler.dll freetype6.dll jpeg62.dll \
|
||||
libpng16-16.dll zlib1.dll libtiff3.dll \
|
||||
libgcc_s_dw2-1.dll libstdc++-6.dll; do
|
||||
chkcp $POPPLER/bin/$f $FILTERS/
|
||||
chkcp $POPPLER/bin/$f $FILTERS/poppler
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
@ -32,6 +32,5 @@ windows {
|
||||
C:/recoll/src/windows/build-librecoll-Desktop_Qt_5_5_0_MinGW_32bit-Debug/debug/librecoll.dll \
|
||||
-lshlwapi -lpsapi -lkernel32
|
||||
|
||||
INCLUDEPATH += ../../windows \
|
||||
C:/temp/xapian-core-1.2.8/include
|
||||
INCLUDEPATH += ../../windows
|
||||
}
|
||||
|
||||
@ -30,6 +30,5 @@ windows {
|
||||
C:/recoll/src/windows/build-librecoll-Desktop_Qt_5_5_0_MinGW_32bit-Debug/debug/librecoll.dll \
|
||||
-lshlwapi -lpsapi -lkernel32
|
||||
|
||||
INCLUDEPATH += ../../windows \
|
||||
C:/recolldeps/xapian/xapian-core-1.2.8/include
|
||||
INCLUDEPATH += ../../windows
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user