dont use 0-sized doc to mean eof now

2009-11-13 08:15:19 +00:00 · 2009-11-13 08:15:19 +00:00 · ebd10680f8
commit ebd10680f8
parent 451041e7db
6 changed files with 60 additions and 33 deletions
--- a/src/filters/rclchm
+++ b/src/filters/rclchm
@ -91,17 +91,20 @@ class rclCHM:
        """Extract one path-named internal file from the chm file"""
        #self.em.rclog("extractone: [%s]"%(path))
-        eof = (self.currentindex >= len(self.tp.contents) -1)
+        iseof = rclexecm.RclExecM.noteof
        if self.currentindex >= len(self.tp.contents) -1:
            iseof = rclexecm.RclExecM.eofnext
        res, ui = self.chm.ResolveObject(path)
        #self.em.rclog("extract: ResolveO: %d [%s]" % (res, ui))
        if res != chmlib.CHM_RESOLVE_SUCCESS:
-            return (False, "", path, eof)
+            return (False, "", path, iseof)
        # RetrieveObject() returns len,value
        res, doc = self.chm.RetrieveObject(ui)
        #self.em.rclog("extract: RetrieveObject: %d [%s]" % (res, doc))
        if res > 0:
-            return (True, doc, path, eof)
+            return (True, doc, path, iseof)
-        return (False, "", path, eof)
+        return (False, "", path, iseof)
    def openfile(self, params):
        """Open the chm file and build the contents list by extracting and
@ -128,7 +131,7 @@ class rclCHM:
    def getnext(self, params):
        if self.currentindex >= len(self.tp.contents):
-            return (False, "", "", 1)
+            return (False, "", "", rclexecm.RclExecM.eofnow)
        else:
            ret= self.extractone(self.tp.contents[self.currentindex])
            self.currentindex += 1
--- a/src/filters/rclexecm.py
+++ b/src/filters/rclexecm.py
@ -6,6 +6,10 @@ import sys
 import os
 class RclExecM:
    noteof  = 0
    eofnext = 1
    eofnow = 2
    def __init__(self):
        self.myname = os.path.basename(sys.argv[0])
        self.mimetype = ""
@ -45,7 +49,7 @@ class RclExecM:
        return (paramname, paramdata)
    # Send answer: document, ipath, possible eof.
-    def answer(self, docdata, ipath, iseof):
+    def answer(self, docdata, ipath, iseof = noteof):
        print "Document:", len(docdata)
        sys.stdout.write(docdata)
@ -59,8 +63,11 @@ class RclExecM:
            sys.stdout.write(self.mimetype)
        # If we're at the end of the contents, say so
-        if iseof:
+        if iseof == self.eofnow:
-            print "Eof: 0"
+            print "Eofnow: 0"
        elif iseof == self.eofnext:
            print "Eofnext: 0"
        # End of message
        print
        sys.stdout.flush()
--- a/src/filters/rclics
+++ b/src/filters/rclics
@ -15,8 +15,11 @@ class IcalExtractor:
            return(False, "", "", True)
        docdata = self.contents[index].as_string()
 	#self.em.rclog(docdata)
-        eof = (self.currentindex >= len(self.contents) -1)
+
-        return (True, docdata, str(index), eof)
+        iseof = rclexecm.RclExecM.noteof
        if self.currentindex >= len(self.contents) -1:
            iseof = rclexecm.RclExecM.eofnext
        return (True, docdata, str(index), iseof)
    ###### File type handler api, used by rclexecm ---------->
    def openfile(self, params):
@ -40,7 +43,7 @@ class IcalExtractor:
    def getnext(self, params):
        if self.currentindex >= len(self.contents):
            #em.rclog("getnext: EOF hit")
-            return (False, "", "", 1)
+            return (False, "", "", rclexecm.RclExecM.eofnow)
        else:
            ret= self.extractone(self.currentindex)
            self.currentindex += 1
--- a/src/filters/rclimg
+++ b/src/filters/rclimg
@ -95,7 +95,7 @@ sub imgTagsToHtml {
    return $output;
 }
-# Get one line from stdin, exit on eof
+# Get one line from stdin (from recollindex), exit on eof
 sub readlineorexit {
    my $s = <STDIN>;
    unless ($s) {
@ -131,7 +131,7 @@ sub readparam {
 }
 #
-# Main program starts here
+# Main program starts here. Talks the rclexecm protocol
 #
 # JFD: replaced the "use" call with a runtime load with error checking,
@ -159,9 +159,9 @@ while (1) {
    }
    unless (defined $params{"filename:"}) {
        print STDERR "RCLIMG: no filename ??\n";
-	# Recoll is requesting next subdocument, but we have none, just say
+	# Recoll is requesting next subdocument (it shouldn't cause we 
-	# so:
+	# returned eofnext last time), but we have none, just say so:
-        print "Document: 0\n\n";
+        print "Eofnow:0\nDocument: 0\n\n";
 	next;
    }
@ -172,7 +172,7 @@ while (1) {
    # print STDERR "RCLIMG: writing $l bytes of data\n";
    print $data;
    # Say we have no further documents for this file
-    print "Eof: 0\n";
+    print "Eofnext: 0\n";
    # End of output parameters: print empty line
    print "\n";
    # print STDERR "RCLIMG: done writing data\n";
--- a/src/filters/rclzip
+++ b/src/filters/rclzip
@ -22,8 +22,10 @@ class ZipExtractor:
        except error, err:
            self.em.rclog("extractone: failed: [%s]" % err)
            ok = False
-        eof = (self.currentindex >= len(self.zip.namelist()) -1)
+        iseof = rclexecm.RclExecM.noteof
-        return (ok, docdata, ipath, eof)
+        if self.currentindex >= len(self.zip.namelist()) -1:
            iseof = rclexecm.RclExecM.eofnext
        return (ok, docdata, ipath, iseof)
    ###### File type handler api, used by rclexecm ---------->
    def openfile(self, params):
@ -40,7 +42,7 @@ class ZipExtractor:
    def getnext(self, params):
        if self.currentindex >= len(self.zip.namelist()):
            #self.em.rclog("getnext: EOF hit")
-            return (False, "", "", 1)
+            return (False, "", "", rclexecm.RclExecM.eofnow)
        else:
            ret= self.extractone(self.zip.namelist()[self.currentindex])
            self.currentindex += 1
--- a/src/internfile/mh_execm.cpp
+++ b/src/internfile/mh_execm.cpp
@ -166,7 +166,8 @@ bool MimeHandlerExecMultiple::next_document()
    // Read answer (multiple elements)
    LOGDEB1(("MHExecMultiple: reading answer\n"));
-    bool eof_received = false;
+    bool eofnext_received = false;
    bool eofnow_received = false;
    string ipath;
    string mtype;
    for (int loop=0;;loop++) {
@ -176,9 +177,13 @@ bool MimeHandlerExecMultiple::next_document()
        }
        if (name.empty())
            break;
-        if (!stringlowercmp("eof:", name)) {
+        if (!stringlowercmp("eofnext:", name)) {
-            LOGDEB(("MHExecMultiple: got EOF\n"));
+            LOGDEB(("MHExecMultiple: got EOFNEXT\n"));
-            eof_received = true;
+            eofnext_received = true;
        }
        if (!stringlowercmp("eofnow:", name)) {
            LOGDEB(("MHExecMultiple: got EOFNOW\n"));
            eofnow_received = true;
        }
        if (!stringlowercmp("ipath:", name)) {
            ipath = data;
@ -194,17 +199,21 @@ bool MimeHandlerExecMultiple::next_document()
            return false;
        }
    }
-    // The end of data can be signaled from the filter in two ways:
+
-    // either by returning an empty document (if the filter just hits
+    if (eofnow_received) {
-    // eof while trying to read the doc), or with an "eof:" field
+        // No more docs
    // accompanying a normal document (if the filter hit eof at the
    // end of the current doc, which is the preferred way).
    if (m_metaData["content"].length() == 0) {
        LOGDEB(("MHExecMultiple: got empty document\n"));
        m_havedoc = false;
        return false;
    }
    // It used to be that eof could be signalled just by an empty document, but
    // this was wrong. Empty documents can be found ie in zip files and should 
    // not be interpreted as eof.
    if (m_metaData["content"].length() == 0) {
        LOGDEB0(("MHExecMultiple: got empty document inside [%s]: [%s]\n", 
                m_fn.c_str(), ipath.c_str()));
    }
    // If this has an ipath, it is an internal doc from a
    // multi-document file. In this case, either the filter supplies the 
    // mimetype, or the ipath MUST be a filename-like string which we can use
@ -217,7 +226,10 @@ bool MimeHandlerExecMultiple::next_document()
                // mimetype() won't call idFile when there is no file. Do it
                mtype = idFileMem(m_metaData["content"]);
                if (mtype.empty()) {
-                    LOGERR(("MHExecMultiple: cant guess mime type\n"));
+                    // Note this happens for example for directory zip members
                    // We could recognize them by the end /, but wouldn't know
                    // what to do with them anyway.
                    LOGINFO(("MHExecMultiple: cant guess mime type\n"));
                    mtype = "application/octet-stream";
                }
            }
@ -238,7 +250,7 @@ bool MimeHandlerExecMultiple::next_document()
        }
    }
-    if (eof_received)
+    if (eofnext_received)
        m_havedoc = false;
    return true;