dont use 0-sized doc to mean eof now

This commit is contained in:
dockes 2009-11-13 08:15:19 +00:00
parent 451041e7db
commit ebd10680f8
6 changed files with 60 additions and 33 deletions

View File

@ -91,17 +91,20 @@ class rclCHM:
"""Extract one path-named internal file from the chm file"""
#self.em.rclog("extractone: [%s]"%(path))
eof = (self.currentindex >= len(self.tp.contents) -1)
iseof = rclexecm.RclExecM.noteof
if self.currentindex >= len(self.tp.contents) -1:
iseof = rclexecm.RclExecM.eofnext
res, ui = self.chm.ResolveObject(path)
#self.em.rclog("extract: ResolveO: %d [%s]" % (res, ui))
if res != chmlib.CHM_RESOLVE_SUCCESS:
return (False, "", path, eof)
return (False, "", path, iseof)
# RetrieveObject() returns len,value
res, doc = self.chm.RetrieveObject(ui)
#self.em.rclog("extract: RetrieveObject: %d [%s]" % (res, doc))
if res > 0:
return (True, doc, path, eof)
return (False, "", path, eof)
return (True, doc, path, iseof)
return (False, "", path, iseof)
def openfile(self, params):
"""Open the chm file and build the contents list by extracting and
@ -128,7 +131,7 @@ class rclCHM:
def getnext(self, params):
if self.currentindex >= len(self.tp.contents):
return (False, "", "", 1)
return (False, "", "", rclexecm.RclExecM.eofnow)
else:
ret= self.extractone(self.tp.contents[self.currentindex])
self.currentindex += 1

View File

@ -6,6 +6,10 @@ import sys
import os
class RclExecM:
noteof = 0
eofnext = 1
eofnow = 2
def __init__(self):
self.myname = os.path.basename(sys.argv[0])
self.mimetype = ""
@ -45,7 +49,7 @@ class RclExecM:
return (paramname, paramdata)
# Send answer: document, ipath, possible eof.
def answer(self, docdata, ipath, iseof):
def answer(self, docdata, ipath, iseof = noteof):
print "Document:", len(docdata)
sys.stdout.write(docdata)
@ -59,8 +63,11 @@ class RclExecM:
sys.stdout.write(self.mimetype)
# If we're at the end of the contents, say so
if iseof:
print "Eof: 0"
if iseof == self.eofnow:
print "Eofnow: 0"
elif iseof == self.eofnext:
print "Eofnext: 0"
# End of message
print
sys.stdout.flush()

View File

@ -15,8 +15,11 @@ class IcalExtractor:
return(False, "", "", True)
docdata = self.contents[index].as_string()
#self.em.rclog(docdata)
eof = (self.currentindex >= len(self.contents) -1)
return (True, docdata, str(index), eof)
iseof = rclexecm.RclExecM.noteof
if self.currentindex >= len(self.contents) -1:
iseof = rclexecm.RclExecM.eofnext
return (True, docdata, str(index), iseof)
###### File type handler api, used by rclexecm ---------->
def openfile(self, params):
@ -40,7 +43,7 @@ class IcalExtractor:
def getnext(self, params):
if self.currentindex >= len(self.contents):
#em.rclog("getnext: EOF hit")
return (False, "", "", 1)
return (False, "", "", rclexecm.RclExecM.eofnow)
else:
ret= self.extractone(self.currentindex)
self.currentindex += 1

View File

@ -95,7 +95,7 @@ sub imgTagsToHtml {
return $output;
}
# Get one line from stdin, exit on eof
# Get one line from stdin (from recollindex), exit on eof
sub readlineorexit {
my $s = <STDIN>;
unless ($s) {
@ -131,7 +131,7 @@ sub readparam {
}
#
# Main program starts here
# Main program starts here. Talks the rclexecm protocol
#
# JFD: replaced the "use" call with a runtime load with error checking,
@ -159,9 +159,9 @@ while (1) {
}
unless (defined $params{"filename:"}) {
print STDERR "RCLIMG: no filename ??\n";
# Recoll is requesting next subdocument, but we have none, just say
# so:
print "Document: 0\n\n";
# Recoll is requesting next subdocument (it shouldn't cause we
# returned eofnext last time), but we have none, just say so:
print "Eofnow:0\nDocument: 0\n\n";
next;
}
@ -172,7 +172,7 @@ while (1) {
# print STDERR "RCLIMG: writing $l bytes of data\n";
print $data;
# Say we have no further documents for this file
print "Eof: 0\n";
print "Eofnext: 0\n";
# End of output parameters: print empty line
print "\n";
# print STDERR "RCLIMG: done writing data\n";

View File

@ -22,8 +22,10 @@ class ZipExtractor:
except error, err:
self.em.rclog("extractone: failed: [%s]" % err)
ok = False
eof = (self.currentindex >= len(self.zip.namelist()) -1)
return (ok, docdata, ipath, eof)
iseof = rclexecm.RclExecM.noteof
if self.currentindex >= len(self.zip.namelist()) -1:
iseof = rclexecm.RclExecM.eofnext
return (ok, docdata, ipath, iseof)
###### File type handler api, used by rclexecm ---------->
def openfile(self, params):
@ -40,7 +42,7 @@ class ZipExtractor:
def getnext(self, params):
if self.currentindex >= len(self.zip.namelist()):
#self.em.rclog("getnext: EOF hit")
return (False, "", "", 1)
return (False, "", "", rclexecm.RclExecM.eofnow)
else:
ret= self.extractone(self.zip.namelist()[self.currentindex])
self.currentindex += 1

View File

@ -166,7 +166,8 @@ bool MimeHandlerExecMultiple::next_document()
// Read answer (multiple elements)
LOGDEB1(("MHExecMultiple: reading answer\n"));
bool eof_received = false;
bool eofnext_received = false;
bool eofnow_received = false;
string ipath;
string mtype;
for (int loop=0;;loop++) {
@ -176,9 +177,13 @@ bool MimeHandlerExecMultiple::next_document()
}
if (name.empty())
break;
if (!stringlowercmp("eof:", name)) {
LOGDEB(("MHExecMultiple: got EOF\n"));
eof_received = true;
if (!stringlowercmp("eofnext:", name)) {
LOGDEB(("MHExecMultiple: got EOFNEXT\n"));
eofnext_received = true;
}
if (!stringlowercmp("eofnow:", name)) {
LOGDEB(("MHExecMultiple: got EOFNOW\n"));
eofnow_received = true;
}
if (!stringlowercmp("ipath:", name)) {
ipath = data;
@ -194,17 +199,21 @@ bool MimeHandlerExecMultiple::next_document()
return false;
}
}
// The end of data can be signaled from the filter in two ways:
// either by returning an empty document (if the filter just hits
// eof while trying to read the doc), or with an "eof:" field
// accompanying a normal document (if the filter hit eof at the
// end of the current doc, which is the preferred way).
if (m_metaData["content"].length() == 0) {
LOGDEB(("MHExecMultiple: got empty document\n"));
if (eofnow_received) {
// No more docs
m_havedoc = false;
return false;
}
// It used to be that eof could be signalled just by an empty document, but
// this was wrong. Empty documents can be found ie in zip files and should
// not be interpreted as eof.
if (m_metaData["content"].length() == 0) {
LOGDEB0(("MHExecMultiple: got empty document inside [%s]: [%s]\n",
m_fn.c_str(), ipath.c_str()));
}
// If this has an ipath, it is an internal doc from a
// multi-document file. In this case, either the filter supplies the
// mimetype, or the ipath MUST be a filename-like string which we can use
@ -217,7 +226,10 @@ bool MimeHandlerExecMultiple::next_document()
// mimetype() won't call idFile when there is no file. Do it
mtype = idFileMem(m_metaData["content"]);
if (mtype.empty()) {
LOGERR(("MHExecMultiple: cant guess mime type\n"));
// Note this happens for example for directory zip members
// We could recognize them by the end /, but wouldn't know
// what to do with them anyway.
LOGINFO(("MHExecMultiple: cant guess mime type\n"));
mtype = "application/octet-stream";
}
}
@ -238,7 +250,7 @@ bool MimeHandlerExecMultiple::next_document()
}
}
if (eof_received)
if (eofnext_received)
m_havedoc = false;
return true;