dont use 0-sized doc to mean eof now
This commit is contained in:
parent
451041e7db
commit
ebd10680f8
@ -91,17 +91,20 @@ class rclCHM:
|
|||||||
"""Extract one path-named internal file from the chm file"""
|
"""Extract one path-named internal file from the chm file"""
|
||||||
|
|
||||||
#self.em.rclog("extractone: [%s]"%(path))
|
#self.em.rclog("extractone: [%s]"%(path))
|
||||||
eof = (self.currentindex >= len(self.tp.contents) -1)
|
iseof = rclexecm.RclExecM.noteof
|
||||||
|
if self.currentindex >= len(self.tp.contents) -1:
|
||||||
|
iseof = rclexecm.RclExecM.eofnext
|
||||||
|
|
||||||
res, ui = self.chm.ResolveObject(path)
|
res, ui = self.chm.ResolveObject(path)
|
||||||
#self.em.rclog("extract: ResolveO: %d [%s]" % (res, ui))
|
#self.em.rclog("extract: ResolveO: %d [%s]" % (res, ui))
|
||||||
if res != chmlib.CHM_RESOLVE_SUCCESS:
|
if res != chmlib.CHM_RESOLVE_SUCCESS:
|
||||||
return (False, "", path, eof)
|
return (False, "", path, iseof)
|
||||||
# RetrieveObject() returns len,value
|
# RetrieveObject() returns len,value
|
||||||
res, doc = self.chm.RetrieveObject(ui)
|
res, doc = self.chm.RetrieveObject(ui)
|
||||||
#self.em.rclog("extract: RetrieveObject: %d [%s]" % (res, doc))
|
#self.em.rclog("extract: RetrieveObject: %d [%s]" % (res, doc))
|
||||||
if res > 0:
|
if res > 0:
|
||||||
return (True, doc, path, eof)
|
return (True, doc, path, iseof)
|
||||||
return (False, "", path, eof)
|
return (False, "", path, iseof)
|
||||||
|
|
||||||
def openfile(self, params):
|
def openfile(self, params):
|
||||||
"""Open the chm file and build the contents list by extracting and
|
"""Open the chm file and build the contents list by extracting and
|
||||||
@ -128,7 +131,7 @@ class rclCHM:
|
|||||||
|
|
||||||
def getnext(self, params):
|
def getnext(self, params):
|
||||||
if self.currentindex >= len(self.tp.contents):
|
if self.currentindex >= len(self.tp.contents):
|
||||||
return (False, "", "", 1)
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
else:
|
else:
|
||||||
ret= self.extractone(self.tp.contents[self.currentindex])
|
ret= self.extractone(self.tp.contents[self.currentindex])
|
||||||
self.currentindex += 1
|
self.currentindex += 1
|
||||||
|
|||||||
@ -6,6 +6,10 @@ import sys
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
class RclExecM:
|
class RclExecM:
|
||||||
|
noteof = 0
|
||||||
|
eofnext = 1
|
||||||
|
eofnow = 2
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.myname = os.path.basename(sys.argv[0])
|
self.myname = os.path.basename(sys.argv[0])
|
||||||
self.mimetype = ""
|
self.mimetype = ""
|
||||||
@ -45,7 +49,7 @@ class RclExecM:
|
|||||||
return (paramname, paramdata)
|
return (paramname, paramdata)
|
||||||
|
|
||||||
# Send answer: document, ipath, possible eof.
|
# Send answer: document, ipath, possible eof.
|
||||||
def answer(self, docdata, ipath, iseof):
|
def answer(self, docdata, ipath, iseof = noteof):
|
||||||
|
|
||||||
print "Document:", len(docdata)
|
print "Document:", len(docdata)
|
||||||
sys.stdout.write(docdata)
|
sys.stdout.write(docdata)
|
||||||
@ -59,8 +63,11 @@ class RclExecM:
|
|||||||
sys.stdout.write(self.mimetype)
|
sys.stdout.write(self.mimetype)
|
||||||
|
|
||||||
# If we're at the end of the contents, say so
|
# If we're at the end of the contents, say so
|
||||||
if iseof:
|
if iseof == self.eofnow:
|
||||||
print "Eof: 0"
|
print "Eofnow: 0"
|
||||||
|
elif iseof == self.eofnext:
|
||||||
|
print "Eofnext: 0"
|
||||||
|
|
||||||
# End of message
|
# End of message
|
||||||
print
|
print
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
|||||||
@ -15,8 +15,11 @@ class IcalExtractor:
|
|||||||
return(False, "", "", True)
|
return(False, "", "", True)
|
||||||
docdata = self.contents[index].as_string()
|
docdata = self.contents[index].as_string()
|
||||||
#self.em.rclog(docdata)
|
#self.em.rclog(docdata)
|
||||||
eof = (self.currentindex >= len(self.contents) -1)
|
|
||||||
return (True, docdata, str(index), eof)
|
iseof = rclexecm.RclExecM.noteof
|
||||||
|
if self.currentindex >= len(self.contents) -1:
|
||||||
|
iseof = rclexecm.RclExecM.eofnext
|
||||||
|
return (True, docdata, str(index), iseof)
|
||||||
|
|
||||||
###### File type handler api, used by rclexecm ---------->
|
###### File type handler api, used by rclexecm ---------->
|
||||||
def openfile(self, params):
|
def openfile(self, params):
|
||||||
@ -40,7 +43,7 @@ class IcalExtractor:
|
|||||||
def getnext(self, params):
|
def getnext(self, params):
|
||||||
if self.currentindex >= len(self.contents):
|
if self.currentindex >= len(self.contents):
|
||||||
#em.rclog("getnext: EOF hit")
|
#em.rclog("getnext: EOF hit")
|
||||||
return (False, "", "", 1)
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
else:
|
else:
|
||||||
ret= self.extractone(self.currentindex)
|
ret= self.extractone(self.currentindex)
|
||||||
self.currentindex += 1
|
self.currentindex += 1
|
||||||
|
|||||||
@ -95,7 +95,7 @@ sub imgTagsToHtml {
|
|||||||
return $output;
|
return $output;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Get one line from stdin, exit on eof
|
# Get one line from stdin (from recollindex), exit on eof
|
||||||
sub readlineorexit {
|
sub readlineorexit {
|
||||||
my $s = <STDIN>;
|
my $s = <STDIN>;
|
||||||
unless ($s) {
|
unless ($s) {
|
||||||
@ -131,7 +131,7 @@ sub readparam {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#
|
#
|
||||||
# Main program starts here
|
# Main program starts here. Talks the rclexecm protocol
|
||||||
#
|
#
|
||||||
|
|
||||||
# JFD: replaced the "use" call with a runtime load with error checking,
|
# JFD: replaced the "use" call with a runtime load with error checking,
|
||||||
@ -159,9 +159,9 @@ while (1) {
|
|||||||
}
|
}
|
||||||
unless (defined $params{"filename:"}) {
|
unless (defined $params{"filename:"}) {
|
||||||
print STDERR "RCLIMG: no filename ??\n";
|
print STDERR "RCLIMG: no filename ??\n";
|
||||||
# Recoll is requesting next subdocument, but we have none, just say
|
# Recoll is requesting next subdocument (it shouldn't cause we
|
||||||
# so:
|
# returned eofnext last time), but we have none, just say so:
|
||||||
print "Document: 0\n\n";
|
print "Eofnow:0\nDocument: 0\n\n";
|
||||||
next;
|
next;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -172,7 +172,7 @@ while (1) {
|
|||||||
# print STDERR "RCLIMG: writing $l bytes of data\n";
|
# print STDERR "RCLIMG: writing $l bytes of data\n";
|
||||||
print $data;
|
print $data;
|
||||||
# Say we have no further documents for this file
|
# Say we have no further documents for this file
|
||||||
print "Eof: 0\n";
|
print "Eofnext: 0\n";
|
||||||
# End of output parameters: print empty line
|
# End of output parameters: print empty line
|
||||||
print "\n";
|
print "\n";
|
||||||
# print STDERR "RCLIMG: done writing data\n";
|
# print STDERR "RCLIMG: done writing data\n";
|
||||||
|
|||||||
@ -22,8 +22,10 @@ class ZipExtractor:
|
|||||||
except error, err:
|
except error, err:
|
||||||
self.em.rclog("extractone: failed: [%s]" % err)
|
self.em.rclog("extractone: failed: [%s]" % err)
|
||||||
ok = False
|
ok = False
|
||||||
eof = (self.currentindex >= len(self.zip.namelist()) -1)
|
iseof = rclexecm.RclExecM.noteof
|
||||||
return (ok, docdata, ipath, eof)
|
if self.currentindex >= len(self.zip.namelist()) -1:
|
||||||
|
iseof = rclexecm.RclExecM.eofnext
|
||||||
|
return (ok, docdata, ipath, iseof)
|
||||||
|
|
||||||
###### File type handler api, used by rclexecm ---------->
|
###### File type handler api, used by rclexecm ---------->
|
||||||
def openfile(self, params):
|
def openfile(self, params):
|
||||||
@ -40,7 +42,7 @@ class ZipExtractor:
|
|||||||
def getnext(self, params):
|
def getnext(self, params):
|
||||||
if self.currentindex >= len(self.zip.namelist()):
|
if self.currentindex >= len(self.zip.namelist()):
|
||||||
#self.em.rclog("getnext: EOF hit")
|
#self.em.rclog("getnext: EOF hit")
|
||||||
return (False, "", "", 1)
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
else:
|
else:
|
||||||
ret= self.extractone(self.zip.namelist()[self.currentindex])
|
ret= self.extractone(self.zip.namelist()[self.currentindex])
|
||||||
self.currentindex += 1
|
self.currentindex += 1
|
||||||
|
|||||||
@ -166,7 +166,8 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
|
|
||||||
// Read answer (multiple elements)
|
// Read answer (multiple elements)
|
||||||
LOGDEB1(("MHExecMultiple: reading answer\n"));
|
LOGDEB1(("MHExecMultiple: reading answer\n"));
|
||||||
bool eof_received = false;
|
bool eofnext_received = false;
|
||||||
|
bool eofnow_received = false;
|
||||||
string ipath;
|
string ipath;
|
||||||
string mtype;
|
string mtype;
|
||||||
for (int loop=0;;loop++) {
|
for (int loop=0;;loop++) {
|
||||||
@ -176,9 +177,13 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
}
|
}
|
||||||
if (name.empty())
|
if (name.empty())
|
||||||
break;
|
break;
|
||||||
if (!stringlowercmp("eof:", name)) {
|
if (!stringlowercmp("eofnext:", name)) {
|
||||||
LOGDEB(("MHExecMultiple: got EOF\n"));
|
LOGDEB(("MHExecMultiple: got EOFNEXT\n"));
|
||||||
eof_received = true;
|
eofnext_received = true;
|
||||||
|
}
|
||||||
|
if (!stringlowercmp("eofnow:", name)) {
|
||||||
|
LOGDEB(("MHExecMultiple: got EOFNOW\n"));
|
||||||
|
eofnow_received = true;
|
||||||
}
|
}
|
||||||
if (!stringlowercmp("ipath:", name)) {
|
if (!stringlowercmp("ipath:", name)) {
|
||||||
ipath = data;
|
ipath = data;
|
||||||
@ -194,17 +199,21 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// The end of data can be signaled from the filter in two ways:
|
|
||||||
// either by returning an empty document (if the filter just hits
|
if (eofnow_received) {
|
||||||
// eof while trying to read the doc), or with an "eof:" field
|
// No more docs
|
||||||
// accompanying a normal document (if the filter hit eof at the
|
|
||||||
// end of the current doc, which is the preferred way).
|
|
||||||
if (m_metaData["content"].length() == 0) {
|
|
||||||
LOGDEB(("MHExecMultiple: got empty document\n"));
|
|
||||||
m_havedoc = false;
|
m_havedoc = false;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// It used to be that eof could be signalled just by an empty document, but
|
||||||
|
// this was wrong. Empty documents can be found ie in zip files and should
|
||||||
|
// not be interpreted as eof.
|
||||||
|
if (m_metaData["content"].length() == 0) {
|
||||||
|
LOGDEB0(("MHExecMultiple: got empty document inside [%s]: [%s]\n",
|
||||||
|
m_fn.c_str(), ipath.c_str()));
|
||||||
|
}
|
||||||
|
|
||||||
// If this has an ipath, it is an internal doc from a
|
// If this has an ipath, it is an internal doc from a
|
||||||
// multi-document file. In this case, either the filter supplies the
|
// multi-document file. In this case, either the filter supplies the
|
||||||
// mimetype, or the ipath MUST be a filename-like string which we can use
|
// mimetype, or the ipath MUST be a filename-like string which we can use
|
||||||
@ -217,7 +226,10 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
// mimetype() won't call idFile when there is no file. Do it
|
// mimetype() won't call idFile when there is no file. Do it
|
||||||
mtype = idFileMem(m_metaData["content"]);
|
mtype = idFileMem(m_metaData["content"]);
|
||||||
if (mtype.empty()) {
|
if (mtype.empty()) {
|
||||||
LOGERR(("MHExecMultiple: cant guess mime type\n"));
|
// Note this happens for example for directory zip members
|
||||||
|
// We could recognize them by the end /, but wouldn't know
|
||||||
|
// what to do with them anyway.
|
||||||
|
LOGINFO(("MHExecMultiple: cant guess mime type\n"));
|
||||||
mtype = "application/octet-stream";
|
mtype = "application/octet-stream";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -238,7 +250,7 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (eof_received)
|
if (eofnext_received)
|
||||||
m_havedoc = false;
|
m_havedoc = false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user