Get uncompression to work and fix a few other issues
This commit is contained in:
parent
a02a611694
commit
8324f09d19
@ -1369,7 +1369,21 @@ bool RclConfig::getUncompressor(const string &mtype, vector<string>& cmd) const
|
|||||||
if (stringlowercmp("uncompress", *it++))
|
if (stringlowercmp("uncompress", *it++))
|
||||||
return false;
|
return false;
|
||||||
cmd.clear();
|
cmd.clear();
|
||||||
cmd.push_back(findFilter(*it++));
|
cmd.push_back(findFilter(*it));
|
||||||
|
|
||||||
|
// Special-case python and perl on windows: we need to also locate the
|
||||||
|
// first argument which is the script name "python somescript.py".
|
||||||
|
// On Unix, thanks to #!, we usually just run "somescript.py", but need
|
||||||
|
// the same change if we ever want to use the same cmdling as windows
|
||||||
|
if (!stringlowercmp("python", *it) || !stringlowercmp("perl", *it)) {
|
||||||
|
it++;
|
||||||
|
if (tokens.size() < 3) {
|
||||||
|
LOGERR(("getUncpressor: python/perl cmd: no script?. [%s]\n", mtype.c_str()));
|
||||||
|
} else {
|
||||||
|
*it = findFilter(*it);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
cmd.insert(cmd.end(), it, tokens.end());
|
cmd.insert(cmd.end(), it, tokens.end());
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -70,6 +70,7 @@ class PDFExtractor:
|
|||||||
self.pdftk = ""
|
self.pdftk = ""
|
||||||
self.em = em
|
self.em = em
|
||||||
self.attextractdone = False
|
self.attextractdone = False
|
||||||
|
self.attachlist = []
|
||||||
|
|
||||||
# Extract all attachments if any into temporary directory
|
# Extract all attachments if any into temporary directory
|
||||||
def extractAttach(self):
|
def extractAttach(self):
|
||||||
@ -79,7 +80,8 @@ class PDFExtractor:
|
|||||||
|
|
||||||
global tmpdir
|
global tmpdir
|
||||||
if not tmpdir or not self.pdftk:
|
if not tmpdir or not self.pdftk:
|
||||||
return False
|
# no big deal
|
||||||
|
return True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
vacuumdir(tmpdir)
|
vacuumdir(tmpdir)
|
||||||
@ -89,7 +91,8 @@ class PDFExtractor:
|
|||||||
return True
|
return True
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
self.em.rclog("extractAttach: failed: %s" % e)
|
self.em.rclog("extractAttach: failed: %s" % e)
|
||||||
return False
|
# Return true anyway, pdf attachments are no big deal
|
||||||
|
return True
|
||||||
|
|
||||||
def extractone(self, ipath):
|
def extractone(self, ipath):
|
||||||
#self.em.rclog("extractone: [%s]" % ipath)
|
#self.em.rclog("extractone: [%s]" % ipath)
|
||||||
@ -186,6 +189,8 @@ class PDFExtractor:
|
|||||||
|
|
||||||
if not self.pdftotext:
|
if not self.pdftotext:
|
||||||
self.pdftotext = rclexecm.which("pdftotext")
|
self.pdftotext = rclexecm.which("pdftotext")
|
||||||
|
if not self.pdftotext:
|
||||||
|
self.pdftotext = rclexecm.which("poppler/pdftotext")
|
||||||
if not self.pdftotext:
|
if not self.pdftotext:
|
||||||
print("RECFILTERROR HELPERNOTFOUND pdftotext")
|
print("RECFILTERROR HELPERNOTFOUND pdftotext")
|
||||||
sys.exit(1);
|
sys.exit(1);
|
||||||
@ -209,7 +214,8 @@ class PDFExtractor:
|
|||||||
# eofnext error instead of waiting for actual eof,
|
# eofnext error instead of waiting for actual eof,
|
||||||
# which avoids a bug in recollindex up to 1.20
|
# which avoids a bug in recollindex up to 1.20
|
||||||
self.extractAttach()
|
self.extractAttach()
|
||||||
|
else:
|
||||||
|
self.attextractdone = True
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def getipath(self, params):
|
def getipath(self, params):
|
||||||
@ -218,8 +224,8 @@ class PDFExtractor:
|
|||||||
return (ok, data, ipath, eof)
|
return (ok, data, ipath, eof)
|
||||||
|
|
||||||
def getnext(self, params):
|
def getnext(self, params):
|
||||||
|
# self.em.rclog("getnext: current %d" % self.currentindex)
|
||||||
if self.currentindex == -1:
|
if self.currentindex == -1:
|
||||||
#self.em.rclog("getnext: current -1")
|
|
||||||
self.currentindex = 0
|
self.currentindex = 0
|
||||||
return self._selfdoc()
|
return self._selfdoc()
|
||||||
else:
|
else:
|
||||||
|
|||||||
@ -9,30 +9,44 @@ import platform
|
|||||||
import subprocess
|
import subprocess
|
||||||
import glob
|
import glob
|
||||||
|
|
||||||
sysplat = platform.system()
|
ftrace = sys.stderr
|
||||||
|
#ftrace = open("C:/Users/Bill/log-uncomp.txt", "w")
|
||||||
|
|
||||||
|
sysplat = platform.system()
|
||||||
if sysplat != "Windows":
|
if sysplat != "Windows":
|
||||||
print("rcluncomp.py: only for Windows", file = sys.stderr)
|
print("rcluncomp.py: only for Windows", file = ftrace)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
import msvcrt
|
||||||
|
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
|
||||||
|
except Exception as err:
|
||||||
|
print("setmode binary failed: %s" % str(err), file = ftrace)
|
||||||
|
|
||||||
sevenz = rclexecm.which("7z")
|
sevenz = rclexecm.which("7z")
|
||||||
if not sevenz:
|
if not sevenz:
|
||||||
print("rcluncomp.py: can't find 7z exe. Maybe set recollhelperpath " \
|
print("rcluncomp.py: can't find 7z exe. Maybe set recollhelperpath " \
|
||||||
"in recoll.conf ?", file=sys.stderr)
|
"in recoll.conf ?", file=ftrace)
|
||||||
sys.exit(1)
|
sys.exit(2)
|
||||||
#print("rcluncomp.py: 7z is %s" % sevenz, file = sys.stderr)
|
|
||||||
|
|
||||||
# Params: uncompression program, input file name, temp directory.
|
# Params: uncompression program, input file name, temp directory.
|
||||||
# We ignore the uncomp program, and always use 7z on Windows
|
# We ignore the uncomp program, and always use 7z on Windows
|
||||||
|
|
||||||
infile = sys.argv[2]
|
infile = sys.argv[2]
|
||||||
outdir = sys.argv[3]
|
outdir = sys.argv[3]
|
||||||
|
# print("rcluncomp.py infile [%s], outdir [%s]" % (infile, outdir), file = ftrace)
|
||||||
|
|
||||||
# There is apparently no way to suppress 7z output. Hopefully the
|
# There is apparently no way to suppress 7z output. Hopefully the
|
||||||
# possible deadlock described by the subprocess module doc can't occur
|
# possible deadlock described by the subprocess module doc can't occur
|
||||||
# here because there is little data printed. AFAIK nothing goes to stderr anyway
|
# here because there is little data printed. AFAIK nothing goes to stderr anyway
|
||||||
subprocess.check_output([sevenz, "e", "-bd", "-y", "-o" + outdir, infile],
|
try:
|
||||||
stderr = subprocess.PIPE)
|
cmd = [sevenz, "e", "-bd", "-y", "-o" + outdir, infile]
|
||||||
|
subprocess.check_output(cmd, stderr = subprocess.PIPE)
|
||||||
|
outputname = glob.glob(os.path.join(outdir, "*"))
|
||||||
|
# There should be only one file in there..
|
||||||
|
print(outputname[0])
|
||||||
|
except Exception as err:
|
||||||
|
print("%s" % (str(err),), file = ftrace)
|
||||||
|
sys.exit(4)
|
||||||
|
|
||||||
outputname = glob.glob(os.path.join(outdir, "*"))
|
sys.exit(0)
|
||||||
# There should be only one file in there..
|
|
||||||
print(outputname[0])
|
|
||||||
|
|||||||
@ -145,6 +145,7 @@ string path_wingettempfilename(TCHAR *pref)
|
|||||||
// Windows will have created a temp file, we delete it.
|
// Windows will have created a temp file, we delete it.
|
||||||
string filename = path_tchartoutf8(buf);
|
string filename = path_tchartoutf8(buf);
|
||||||
unlink(filename.c_str());
|
unlink(filename.c_str());
|
||||||
|
path_slashize(filename);
|
||||||
return filename;
|
return filename;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -149,8 +149,8 @@ text/x-python = exec python rclpython
|
|||||||
text/x-shellscript = internal text/plain
|
text/x-shellscript = internal text/plain
|
||||||
text/x-srt = internal text/plain
|
text/x-srt = internal text/plain
|
||||||
|
|
||||||
application/xml = execm python rclxml
|
application/xml = execm python rclxml.py
|
||||||
text/xml = execm python rclxml
|
text/xml = execm python rclxml.py
|
||||||
# Using these instead of the two above would index all parameter and tag
|
# Using these instead of the two above would index all parameter and tag
|
||||||
# names, attribute values etc, instead of just the text content.
|
# names, attribute values etc, instead of just the text content.
|
||||||
#application/xml = internal text/plain
|
#application/xml = internal text/plain
|
||||||
|
|||||||
@ -62,9 +62,9 @@ RCLS=$RCLW/build-rclstartw-${QTA}-${qtsdir}/${qtsdir}/rclstartw.exe
|
|||||||
|
|
||||||
|
|
||||||
# Needed for a VS build (which we did not ever complete because of
|
# Needed for a VS build (which we did not ever complete because of
|
||||||
# missing Qt VS2015 support). Needed for unrtf
|
# missing Qt VS2015 support).
|
||||||
CONFIGURATION=Release
|
#CONFIGURATION=Release
|
||||||
PLATFORM=Win32
|
#PLATFORM=Win32
|
||||||
|
|
||||||
################
|
################
|
||||||
# Script:
|
# Script:
|
||||||
@ -82,12 +82,14 @@ copyqt()
|
|||||||
{
|
{
|
||||||
cd $DESTDIR
|
cd $DESTDIR
|
||||||
$QTBIN/windeployqt recoll.exe
|
$QTBIN/windeployqt recoll.exe
|
||||||
|
chkcp $QTBIN/libwinpthread-1.dll $DESTDIR
|
||||||
}
|
}
|
||||||
|
|
||||||
copyxapian()
|
copyxapian()
|
||||||
{
|
{
|
||||||
chkcp $LIBXAPIAN $DESTDIR
|
chkcp $LIBXAPIAN $DESTDIR
|
||||||
}
|
}
|
||||||
|
|
||||||
copyzlib()
|
copyzlib()
|
||||||
{
|
{
|
||||||
chkcp $ZLIB/zlib1.dll $DESTDIR
|
chkcp $ZLIB/zlib1.dll $DESTDIR
|
||||||
@ -133,7 +135,8 @@ copyantiword()
|
|||||||
|
|
||||||
copyunrtf()
|
copyunrtf()
|
||||||
{
|
{
|
||||||
bindir=$UNRTF/Windows/$PLATFORM/$CONFIGURATION
|
# bindir=$UNRTF/Windows/$PLATFORM/$CONFIGURATION
|
||||||
|
bindir=$UNRTF/Windows/
|
||||||
|
|
||||||
test -d $FILTERS/Share || mkdir -p $FILTERS/Share || exit 1
|
test -d $FILTERS/Share || mkdir -p $FILTERS/Share || exit 1
|
||||||
chkcp $bindir/unrtf.exe $FILTERS
|
chkcp $bindir/unrtf.exe $FILTERS
|
||||||
@ -158,8 +161,7 @@ copyepub()
|
|||||||
copypyexiv2()
|
copypyexiv2()
|
||||||
{
|
{
|
||||||
cp -rp $PYEXIV2/pyexiv2 $FILTERS
|
cp -rp $PYEXIV2/pyexiv2 $FILTERS
|
||||||
# Check
|
chkcp $PYEXIV2/libexiv2python.pyd $FILTERS/
|
||||||
chkcp $PYEXIV2/pyexiv2/exif.py $FILTERS/pyexiv2
|
|
||||||
}
|
}
|
||||||
|
|
||||||
copyxslt()
|
copyxslt()
|
||||||
@ -170,10 +172,12 @@ copyxslt()
|
|||||||
|
|
||||||
copypoppler()
|
copypoppler()
|
||||||
{
|
{
|
||||||
|
test -d $FILTERS/poppler || mkdir $FILTERS/poppler || \
|
||||||
|
fatal cant create poppler dir
|
||||||
for f in pdftotext.exe libpoppler.dll freetype6.dll jpeg62.dll \
|
for f in pdftotext.exe libpoppler.dll freetype6.dll jpeg62.dll \
|
||||||
libpng16-16.dll zlib1.dll libtiff3.dll \
|
libpng16-16.dll zlib1.dll libtiff3.dll \
|
||||||
libgcc_s_dw2-1.dll libstdc++-6.dll; do
|
libgcc_s_dw2-1.dll libstdc++-6.dll; do
|
||||||
chkcp $POPPLER/bin/$f $FILTERS/
|
chkcp $POPPLER/bin/$f $FILTERS/poppler
|
||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -32,6 +32,5 @@ windows {
|
|||||||
C:/recoll/src/windows/build-librecoll-Desktop_Qt_5_5_0_MinGW_32bit-Debug/debug/librecoll.dll \
|
C:/recoll/src/windows/build-librecoll-Desktop_Qt_5_5_0_MinGW_32bit-Debug/debug/librecoll.dll \
|
||||||
-lshlwapi -lpsapi -lkernel32
|
-lshlwapi -lpsapi -lkernel32
|
||||||
|
|
||||||
INCLUDEPATH += ../../windows \
|
INCLUDEPATH += ../../windows
|
||||||
C:/temp/xapian-core-1.2.8/include
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -30,6 +30,5 @@ windows {
|
|||||||
C:/recoll/src/windows/build-librecoll-Desktop_Qt_5_5_0_MinGW_32bit-Debug/debug/librecoll.dll \
|
C:/recoll/src/windows/build-librecoll-Desktop_Qt_5_5_0_MinGW_32bit-Debug/debug/librecoll.dll \
|
||||||
-lshlwapi -lpsapi -lkernel32
|
-lshlwapi -lpsapi -lkernel32
|
||||||
|
|
||||||
INCLUDEPATH += ../../windows \
|
INCLUDEPATH += ../../windows
|
||||||
C:/recolldeps/xapian/xapian-core-1.2.8/include
|
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user