dont use 0-sized doc to mean eof now
This commit is contained in:
parent
451041e7db
commit
ebd10680f8
@ -91,17 +91,20 @@ class rclCHM:
|
||||
"""Extract one path-named internal file from the chm file"""
|
||||
|
||||
#self.em.rclog("extractone: [%s]"%(path))
|
||||
eof = (self.currentindex >= len(self.tp.contents) -1)
|
||||
iseof = rclexecm.RclExecM.noteof
|
||||
if self.currentindex >= len(self.tp.contents) -1:
|
||||
iseof = rclexecm.RclExecM.eofnext
|
||||
|
||||
res, ui = self.chm.ResolveObject(path)
|
||||
#self.em.rclog("extract: ResolveO: %d [%s]" % (res, ui))
|
||||
if res != chmlib.CHM_RESOLVE_SUCCESS:
|
||||
return (False, "", path, eof)
|
||||
return (False, "", path, iseof)
|
||||
# RetrieveObject() returns len,value
|
||||
res, doc = self.chm.RetrieveObject(ui)
|
||||
#self.em.rclog("extract: RetrieveObject: %d [%s]" % (res, doc))
|
||||
if res > 0:
|
||||
return (True, doc, path, eof)
|
||||
return (False, "", path, eof)
|
||||
return (True, doc, path, iseof)
|
||||
return (False, "", path, iseof)
|
||||
|
||||
def openfile(self, params):
|
||||
"""Open the chm file and build the contents list by extracting and
|
||||
@ -128,7 +131,7 @@ class rclCHM:
|
||||
|
||||
def getnext(self, params):
|
||||
if self.currentindex >= len(self.tp.contents):
|
||||
return (False, "", "", 1)
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
else:
|
||||
ret= self.extractone(self.tp.contents[self.currentindex])
|
||||
self.currentindex += 1
|
||||
|
||||
@ -6,6 +6,10 @@ import sys
|
||||
import os
|
||||
|
||||
class RclExecM:
|
||||
noteof = 0
|
||||
eofnext = 1
|
||||
eofnow = 2
|
||||
|
||||
def __init__(self):
|
||||
self.myname = os.path.basename(sys.argv[0])
|
||||
self.mimetype = ""
|
||||
@ -45,7 +49,7 @@ class RclExecM:
|
||||
return (paramname, paramdata)
|
||||
|
||||
# Send answer: document, ipath, possible eof.
|
||||
def answer(self, docdata, ipath, iseof):
|
||||
def answer(self, docdata, ipath, iseof = noteof):
|
||||
|
||||
print "Document:", len(docdata)
|
||||
sys.stdout.write(docdata)
|
||||
@ -59,8 +63,11 @@ class RclExecM:
|
||||
sys.stdout.write(self.mimetype)
|
||||
|
||||
# If we're at the end of the contents, say so
|
||||
if iseof:
|
||||
print "Eof: 0"
|
||||
if iseof == self.eofnow:
|
||||
print "Eofnow: 0"
|
||||
elif iseof == self.eofnext:
|
||||
print "Eofnext: 0"
|
||||
|
||||
# End of message
|
||||
print
|
||||
sys.stdout.flush()
|
||||
|
||||
@ -15,8 +15,11 @@ class IcalExtractor:
|
||||
return(False, "", "", True)
|
||||
docdata = self.contents[index].as_string()
|
||||
#self.em.rclog(docdata)
|
||||
eof = (self.currentindex >= len(self.contents) -1)
|
||||
return (True, docdata, str(index), eof)
|
||||
|
||||
iseof = rclexecm.RclExecM.noteof
|
||||
if self.currentindex >= len(self.contents) -1:
|
||||
iseof = rclexecm.RclExecM.eofnext
|
||||
return (True, docdata, str(index), iseof)
|
||||
|
||||
###### File type handler api, used by rclexecm ---------->
|
||||
def openfile(self, params):
|
||||
@ -40,7 +43,7 @@ class IcalExtractor:
|
||||
def getnext(self, params):
|
||||
if self.currentindex >= len(self.contents):
|
||||
#em.rclog("getnext: EOF hit")
|
||||
return (False, "", "", 1)
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
else:
|
||||
ret= self.extractone(self.currentindex)
|
||||
self.currentindex += 1
|
||||
|
||||
@ -95,7 +95,7 @@ sub imgTagsToHtml {
|
||||
return $output;
|
||||
}
|
||||
|
||||
# Get one line from stdin, exit on eof
|
||||
# Get one line from stdin (from recollindex), exit on eof
|
||||
sub readlineorexit {
|
||||
my $s = <STDIN>;
|
||||
unless ($s) {
|
||||
@ -131,7 +131,7 @@ sub readparam {
|
||||
}
|
||||
|
||||
#
|
||||
# Main program starts here
|
||||
# Main program starts here. Talks the rclexecm protocol
|
||||
#
|
||||
|
||||
# JFD: replaced the "use" call with a runtime load with error checking,
|
||||
@ -159,9 +159,9 @@ while (1) {
|
||||
}
|
||||
unless (defined $params{"filename:"}) {
|
||||
print STDERR "RCLIMG: no filename ??\n";
|
||||
# Recoll is requesting next subdocument, but we have none, just say
|
||||
# so:
|
||||
print "Document: 0\n\n";
|
||||
# Recoll is requesting next subdocument (it shouldn't cause we
|
||||
# returned eofnext last time), but we have none, just say so:
|
||||
print "Eofnow:0\nDocument: 0\n\n";
|
||||
next;
|
||||
}
|
||||
|
||||
@ -172,7 +172,7 @@ while (1) {
|
||||
# print STDERR "RCLIMG: writing $l bytes of data\n";
|
||||
print $data;
|
||||
# Say we have no further documents for this file
|
||||
print "Eof: 0\n";
|
||||
print "Eofnext: 0\n";
|
||||
# End of output parameters: print empty line
|
||||
print "\n";
|
||||
# print STDERR "RCLIMG: done writing data\n";
|
||||
|
||||
@ -22,8 +22,10 @@ class ZipExtractor:
|
||||
except error, err:
|
||||
self.em.rclog("extractone: failed: [%s]" % err)
|
||||
ok = False
|
||||
eof = (self.currentindex >= len(self.zip.namelist()) -1)
|
||||
return (ok, docdata, ipath, eof)
|
||||
iseof = rclexecm.RclExecM.noteof
|
||||
if self.currentindex >= len(self.zip.namelist()) -1:
|
||||
iseof = rclexecm.RclExecM.eofnext
|
||||
return (ok, docdata, ipath, iseof)
|
||||
|
||||
###### File type handler api, used by rclexecm ---------->
|
||||
def openfile(self, params):
|
||||
@ -40,7 +42,7 @@ class ZipExtractor:
|
||||
def getnext(self, params):
|
||||
if self.currentindex >= len(self.zip.namelist()):
|
||||
#self.em.rclog("getnext: EOF hit")
|
||||
return (False, "", "", 1)
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
else:
|
||||
ret= self.extractone(self.zip.namelist()[self.currentindex])
|
||||
self.currentindex += 1
|
||||
|
||||
@ -166,7 +166,8 @@ bool MimeHandlerExecMultiple::next_document()
|
||||
|
||||
// Read answer (multiple elements)
|
||||
LOGDEB1(("MHExecMultiple: reading answer\n"));
|
||||
bool eof_received = false;
|
||||
bool eofnext_received = false;
|
||||
bool eofnow_received = false;
|
||||
string ipath;
|
||||
string mtype;
|
||||
for (int loop=0;;loop++) {
|
||||
@ -176,9 +177,13 @@ bool MimeHandlerExecMultiple::next_document()
|
||||
}
|
||||
if (name.empty())
|
||||
break;
|
||||
if (!stringlowercmp("eof:", name)) {
|
||||
LOGDEB(("MHExecMultiple: got EOF\n"));
|
||||
eof_received = true;
|
||||
if (!stringlowercmp("eofnext:", name)) {
|
||||
LOGDEB(("MHExecMultiple: got EOFNEXT\n"));
|
||||
eofnext_received = true;
|
||||
}
|
||||
if (!stringlowercmp("eofnow:", name)) {
|
||||
LOGDEB(("MHExecMultiple: got EOFNOW\n"));
|
||||
eofnow_received = true;
|
||||
}
|
||||
if (!stringlowercmp("ipath:", name)) {
|
||||
ipath = data;
|
||||
@ -194,17 +199,21 @@ bool MimeHandlerExecMultiple::next_document()
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// The end of data can be signaled from the filter in two ways:
|
||||
// either by returning an empty document (if the filter just hits
|
||||
// eof while trying to read the doc), or with an "eof:" field
|
||||
// accompanying a normal document (if the filter hit eof at the
|
||||
// end of the current doc, which is the preferred way).
|
||||
if (m_metaData["content"].length() == 0) {
|
||||
LOGDEB(("MHExecMultiple: got empty document\n"));
|
||||
|
||||
if (eofnow_received) {
|
||||
// No more docs
|
||||
m_havedoc = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
// It used to be that eof could be signalled just by an empty document, but
|
||||
// this was wrong. Empty documents can be found ie in zip files and should
|
||||
// not be interpreted as eof.
|
||||
if (m_metaData["content"].length() == 0) {
|
||||
LOGDEB0(("MHExecMultiple: got empty document inside [%s]: [%s]\n",
|
||||
m_fn.c_str(), ipath.c_str()));
|
||||
}
|
||||
|
||||
// If this has an ipath, it is an internal doc from a
|
||||
// multi-document file. In this case, either the filter supplies the
|
||||
// mimetype, or the ipath MUST be a filename-like string which we can use
|
||||
@ -217,7 +226,10 @@ bool MimeHandlerExecMultiple::next_document()
|
||||
// mimetype() won't call idFile when there is no file. Do it
|
||||
mtype = idFileMem(m_metaData["content"]);
|
||||
if (mtype.empty()) {
|
||||
LOGERR(("MHExecMultiple: cant guess mime type\n"));
|
||||
// Note this happens for example for directory zip members
|
||||
// We could recognize them by the end /, but wouldn't know
|
||||
// what to do with them anyway.
|
||||
LOGINFO(("MHExecMultiple: cant guess mime type\n"));
|
||||
mtype = "application/octet-stream";
|
||||
}
|
||||
}
|
||||
@ -238,7 +250,7 @@ bool MimeHandlerExecMultiple::next_document()
|
||||
}
|
||||
}
|
||||
|
||||
if (eof_received)
|
||||
if (eofnext_received)
|
||||
m_havedoc = false;
|
||||
|
||||
return true;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user