dont use 0-sized doc to mean eof now

This commit is contained in:
dockes 2009-11-13 08:15:19 +00:00
parent 451041e7db
commit ebd10680f8
6 changed files with 60 additions and 33 deletions

View File

@ -91,17 +91,20 @@ class rclCHM:
"""Extract one path-named internal file from the chm file""" """Extract one path-named internal file from the chm file"""
#self.em.rclog("extractone: [%s]"%(path)) #self.em.rclog("extractone: [%s]"%(path))
eof = (self.currentindex >= len(self.tp.contents) -1) iseof = rclexecm.RclExecM.noteof
if self.currentindex >= len(self.tp.contents) -1:
iseof = rclexecm.RclExecM.eofnext
res, ui = self.chm.ResolveObject(path) res, ui = self.chm.ResolveObject(path)
#self.em.rclog("extract: ResolveO: %d [%s]" % (res, ui)) #self.em.rclog("extract: ResolveO: %d [%s]" % (res, ui))
if res != chmlib.CHM_RESOLVE_SUCCESS: if res != chmlib.CHM_RESOLVE_SUCCESS:
return (False, "", path, eof) return (False, "", path, iseof)
# RetrieveObject() returns len,value # RetrieveObject() returns len,value
res, doc = self.chm.RetrieveObject(ui) res, doc = self.chm.RetrieveObject(ui)
#self.em.rclog("extract: RetrieveObject: %d [%s]" % (res, doc)) #self.em.rclog("extract: RetrieveObject: %d [%s]" % (res, doc))
if res > 0: if res > 0:
return (True, doc, path, eof) return (True, doc, path, iseof)
return (False, "", path, eof) return (False, "", path, iseof)
def openfile(self, params): def openfile(self, params):
"""Open the chm file and build the contents list by extracting and """Open the chm file and build the contents list by extracting and
@ -128,7 +131,7 @@ class rclCHM:
def getnext(self, params): def getnext(self, params):
if self.currentindex >= len(self.tp.contents): if self.currentindex >= len(self.tp.contents):
return (False, "", "", 1) return (False, "", "", rclexecm.RclExecM.eofnow)
else: else:
ret= self.extractone(self.tp.contents[self.currentindex]) ret= self.extractone(self.tp.contents[self.currentindex])
self.currentindex += 1 self.currentindex += 1

View File

@ -6,6 +6,10 @@ import sys
import os import os
class RclExecM: class RclExecM:
noteof = 0
eofnext = 1
eofnow = 2
def __init__(self): def __init__(self):
self.myname = os.path.basename(sys.argv[0]) self.myname = os.path.basename(sys.argv[0])
self.mimetype = "" self.mimetype = ""
@ -45,7 +49,7 @@ class RclExecM:
return (paramname, paramdata) return (paramname, paramdata)
# Send answer: document, ipath, possible eof. # Send answer: document, ipath, possible eof.
def answer(self, docdata, ipath, iseof): def answer(self, docdata, ipath, iseof = noteof):
print "Document:", len(docdata) print "Document:", len(docdata)
sys.stdout.write(docdata) sys.stdout.write(docdata)
@ -59,8 +63,11 @@ class RclExecM:
sys.stdout.write(self.mimetype) sys.stdout.write(self.mimetype)
# If we're at the end of the contents, say so # If we're at the end of the contents, say so
if iseof: if iseof == self.eofnow:
print "Eof: 0" print "Eofnow: 0"
elif iseof == self.eofnext:
print "Eofnext: 0"
# End of message # End of message
print print
sys.stdout.flush() sys.stdout.flush()

View File

@ -15,8 +15,11 @@ class IcalExtractor:
return(False, "", "", True) return(False, "", "", True)
docdata = self.contents[index].as_string() docdata = self.contents[index].as_string()
#self.em.rclog(docdata) #self.em.rclog(docdata)
eof = (self.currentindex >= len(self.contents) -1)
return (True, docdata, str(index), eof) iseof = rclexecm.RclExecM.noteof
if self.currentindex >= len(self.contents) -1:
iseof = rclexecm.RclExecM.eofnext
return (True, docdata, str(index), iseof)
###### File type handler api, used by rclexecm ----------> ###### File type handler api, used by rclexecm ---------->
def openfile(self, params): def openfile(self, params):
@ -40,7 +43,7 @@ class IcalExtractor:
def getnext(self, params): def getnext(self, params):
if self.currentindex >= len(self.contents): if self.currentindex >= len(self.contents):
#em.rclog("getnext: EOF hit") #em.rclog("getnext: EOF hit")
return (False, "", "", 1) return (False, "", "", rclexecm.RclExecM.eofnow)
else: else:
ret= self.extractone(self.currentindex) ret= self.extractone(self.currentindex)
self.currentindex += 1 self.currentindex += 1

View File

@ -95,7 +95,7 @@ sub imgTagsToHtml {
return $output; return $output;
} }
# Get one line from stdin, exit on eof # Get one line from stdin (from recollindex), exit on eof
sub readlineorexit { sub readlineorexit {
my $s = <STDIN>; my $s = <STDIN>;
unless ($s) { unless ($s) {
@ -131,7 +131,7 @@ sub readparam {
} }
# #
# Main program starts here # Main program starts here. Talks the rclexecm protocol
# #
# JFD: replaced the "use" call with a runtime load with error checking, # JFD: replaced the "use" call with a runtime load with error checking,
@ -159,9 +159,9 @@ while (1) {
} }
unless (defined $params{"filename:"}) { unless (defined $params{"filename:"}) {
print STDERR "RCLIMG: no filename ??\n"; print STDERR "RCLIMG: no filename ??\n";
# Recoll is requesting next subdocument, but we have none, just say # Recoll is requesting next subdocument (it shouldn't cause we
# so: # returned eofnext last time), but we have none, just say so:
print "Document: 0\n\n"; print "Eofnow:0\nDocument: 0\n\n";
next; next;
} }
@ -172,7 +172,7 @@ while (1) {
# print STDERR "RCLIMG: writing $l bytes of data\n"; # print STDERR "RCLIMG: writing $l bytes of data\n";
print $data; print $data;
# Say we have no further documents for this file # Say we have no further documents for this file
print "Eof: 0\n"; print "Eofnext: 0\n";
# End of output parameters: print empty line # End of output parameters: print empty line
print "\n"; print "\n";
# print STDERR "RCLIMG: done writing data\n"; # print STDERR "RCLIMG: done writing data\n";

View File

@ -22,8 +22,10 @@ class ZipExtractor:
except error, err: except error, err:
self.em.rclog("extractone: failed: [%s]" % err) self.em.rclog("extractone: failed: [%s]" % err)
ok = False ok = False
eof = (self.currentindex >= len(self.zip.namelist()) -1) iseof = rclexecm.RclExecM.noteof
return (ok, docdata, ipath, eof) if self.currentindex >= len(self.zip.namelist()) -1:
iseof = rclexecm.RclExecM.eofnext
return (ok, docdata, ipath, iseof)
###### File type handler api, used by rclexecm ----------> ###### File type handler api, used by rclexecm ---------->
def openfile(self, params): def openfile(self, params):
@ -40,7 +42,7 @@ class ZipExtractor:
def getnext(self, params): def getnext(self, params):
if self.currentindex >= len(self.zip.namelist()): if self.currentindex >= len(self.zip.namelist()):
#self.em.rclog("getnext: EOF hit") #self.em.rclog("getnext: EOF hit")
return (False, "", "", 1) return (False, "", "", rclexecm.RclExecM.eofnow)
else: else:
ret= self.extractone(self.zip.namelist()[self.currentindex]) ret= self.extractone(self.zip.namelist()[self.currentindex])
self.currentindex += 1 self.currentindex += 1

View File

@ -166,7 +166,8 @@ bool MimeHandlerExecMultiple::next_document()
// Read answer (multiple elements) // Read answer (multiple elements)
LOGDEB1(("MHExecMultiple: reading answer\n")); LOGDEB1(("MHExecMultiple: reading answer\n"));
bool eof_received = false; bool eofnext_received = false;
bool eofnow_received = false;
string ipath; string ipath;
string mtype; string mtype;
for (int loop=0;;loop++) { for (int loop=0;;loop++) {
@ -176,9 +177,13 @@ bool MimeHandlerExecMultiple::next_document()
} }
if (name.empty()) if (name.empty())
break; break;
if (!stringlowercmp("eof:", name)) { if (!stringlowercmp("eofnext:", name)) {
LOGDEB(("MHExecMultiple: got EOF\n")); LOGDEB(("MHExecMultiple: got EOFNEXT\n"));
eof_received = true; eofnext_received = true;
}
if (!stringlowercmp("eofnow:", name)) {
LOGDEB(("MHExecMultiple: got EOFNOW\n"));
eofnow_received = true;
} }
if (!stringlowercmp("ipath:", name)) { if (!stringlowercmp("ipath:", name)) {
ipath = data; ipath = data;
@ -194,17 +199,21 @@ bool MimeHandlerExecMultiple::next_document()
return false; return false;
} }
} }
// The end of data can be signaled from the filter in two ways:
// either by returning an empty document (if the filter just hits if (eofnow_received) {
// eof while trying to read the doc), or with an "eof:" field // No more docs
// accompanying a normal document (if the filter hit eof at the
// end of the current doc, which is the preferred way).
if (m_metaData["content"].length() == 0) {
LOGDEB(("MHExecMultiple: got empty document\n"));
m_havedoc = false; m_havedoc = false;
return false; return false;
} }
// It used to be that eof could be signalled just by an empty document, but
// this was wrong. Empty documents can be found ie in zip files and should
// not be interpreted as eof.
if (m_metaData["content"].length() == 0) {
LOGDEB0(("MHExecMultiple: got empty document inside [%s]: [%s]\n",
m_fn.c_str(), ipath.c_str()));
}
// If this has an ipath, it is an internal doc from a // If this has an ipath, it is an internal doc from a
// multi-document file. In this case, either the filter supplies the // multi-document file. In this case, either the filter supplies the
// mimetype, or the ipath MUST be a filename-like string which we can use // mimetype, or the ipath MUST be a filename-like string which we can use
@ -217,7 +226,10 @@ bool MimeHandlerExecMultiple::next_document()
// mimetype() won't call idFile when there is no file. Do it // mimetype() won't call idFile when there is no file. Do it
mtype = idFileMem(m_metaData["content"]); mtype = idFileMem(m_metaData["content"]);
if (mtype.empty()) { if (mtype.empty()) {
LOGERR(("MHExecMultiple: cant guess mime type\n")); // Note this happens for example for directory zip members
// We could recognize them by the end /, but wouldn't know
// what to do with them anyway.
LOGINFO(("MHExecMultiple: cant guess mime type\n"));
mtype = "application/octet-stream"; mtype = "application/octet-stream";
} }
} }
@ -238,7 +250,7 @@ bool MimeHandlerExecMultiple::next_document()
} }
} }
if (eof_received) if (eofnext_received)
m_havedoc = false; m_havedoc = false;
return true; return true;