diff --git a/src/internfile/internfile.cpp b/src/internfile/internfile.cpp index 4d885a6f..6a50c04a 100644 --- a/src/internfile/internfile.cpp +++ b/src/internfile/internfile.cpp @@ -102,6 +102,12 @@ string FileInterner::getLastIpathElt(const string& ipath) } } +bool FileInterner::ipathContains(const string& parent, const string& child) +{ + return child.find(parent) == 0 && + child.find(cstr_isep, parent.size()) == parent.size(); +} + // Constructor: identify the input file, possibly create an // uncompressed temporary copy, and create the top filter for the // uncompressed file type. diff --git a/src/internfile/internfile.h b/src/internfile/internfile.h index 69d88b47..d2392036 100644 --- a/src/internfile/internfile.h +++ b/src/internfile/internfile.h @@ -206,6 +206,9 @@ class FileInterner { /** Return last element in ipath, like basename */ static std::string getLastIpathElt(const std::string& ipath); + /** Check that 2nd param is child of first */ + static bool ipathContains(const std::string& parent, + const std::string& child); /** * Build sig for doc coming from rcldb. This is here because we know how * to query the right backend. Used to check up-to-dateness at query time */ diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 51077278..51caccc1 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -55,6 +55,7 @@ using namespace std; #include "termproc.h" #include "expansiondbs.h" #include "rclinit.h" +#include "internfile.h" // Recoll index format version is stored in user metadata. When this change, // we can't open the db and will have to reindex. @@ -2119,15 +2120,23 @@ bool Db::hasSubDocs(const Doc &idoc) LOGERR(("Db::hasSubDocs: no input udi or empty\n")); return false; } + LOGDEB1(("Db::hasSubDocs: idxi %d inudi [%s]\n", idoc.idxi, inudi.c_str())); + + // Not sure why we perform both the subDocs() call and the test on + // has_children. The former will return docs if the input is a + // file-level document, but the latter should be true both in this + // case and if the input is already a subdoc, so the first test + // should be redundant. Does not hurt much in any case, to be + // checked one day. vector docids; if (!m_ndb->subDocs(inudi, idoc.idxi, docids)) { - LOGDEB(("Db:getSubDocs: lower level subdocs failed\n")); + LOGDEB(("Db::hasSubDocs: lower level subdocs failed\n")); return false; } if (!docids.empty()) return true; - // Check if doc has an has_children term + // Check if doc has an "has_children" term if (m_ndb->hasTerm(inudi, idoc.idxi, has_children_term)) return true; return false; @@ -2148,6 +2157,8 @@ bool Db::getSubDocs(const Doc &idoc, vector& subdocs) string rootudi; string ipath = idoc.ipath; + LOGDEB0(("Db::getSubDocs: idxi %d inudi [%s] ipath [%s]\n", + idoc.idxi, inudi.c_str(), ipath.c_str())); if (ipath.empty()) { // File-level doc. Use it as root rootudi = inudi; @@ -2178,7 +2189,7 @@ bool Db::getSubDocs(const Doc &idoc, vector& subdocs) // Retrieve all subdoc xapian ids for the root vector docids; if (!m_ndb->subDocs(rootudi, idoc.idxi, docids)) { - LOGDEB(("Db:getSubDocs: lower level subdocs failed\n")); + LOGDEB(("Db::getSubDocs: lower level subdocs failed\n")); return false; } @@ -2199,8 +2210,10 @@ bool Db::getSubDocs(const Doc &idoc, vector& subdocs) LOGERR(("Db::getSubDocs: doc conversion error\n")); return false; } - if (ipath.empty() || doc.ipath.find(ipath) == 0) - subdocs.push_back(doc); + if (ipath.empty() || + FileInterner::ipathContains(ipath, doc.ipath)) { + subdocs.push_back(doc); + } } return true; } catch (const Xapian::DatabaseModifiedError &e) {