diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index a18eb409..3ccb0c3e 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -316,6 +316,8 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, // Set xdocid at once so that we can call whatDbIdx() doc.xdocid = docid; + doc.haspages = hasPages(docid); + // Compute what index this comes from, and check for path translations string dbdir = m_rcldb->m_basedir; if (!m_rcldb->m_extraDbs.empty()) { @@ -364,6 +366,21 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, return true; } +bool Db::Native::hasPages(Xapian::docid docid) +{ + string ermsg; + Xapian::PositionIterator pos; + XAPTRY(pos = xrdb.positionlist_begin(docid, page_break_term); + if (pos != xrdb.positionlist_end(docid, page_break_term)) { + return true; + }, + xrdb, ermsg); + if (!ermsg.empty()) { + LOGERR(("Db::Native::hasPages: xapian error: %s\n", ermsg.c_str())); + } + return false; +} + // Return the positions list for the page break term bool Db::Native::getPagePositions(Xapian::docid docid, vector& vpos) { diff --git a/src/rcldb/rcldb_p.h b/src/rcldb/rcldb_p.h index 8e32e02a..da9859f9 100644 --- a/src/rcldb/rcldb_p.h +++ b/src/rcldb/rcldb_p.h @@ -150,6 +150,9 @@ class Db::Native { * */ bool subDocs(const string &udi, vector& docids); + + /** Check if a page position list is defined */ + bool hasPages(Xapian::docid id); }; // This is the word position offset at which we index the body text diff --git a/src/rcldb/rclquery.cpp b/src/rcldb/rclquery.cpp index cf9b250b..0400f318 100644 --- a/src/rcldb/rclquery.cpp +++ b/src/rcldb/rclquery.cpp @@ -264,7 +264,7 @@ bool Query::getQueryTerms(vector& terms) return true; } -int Query::makeDocAbstract(Doc &doc, +int Query::makeDocAbstract(const Doc &doc, vector& abstract, int maxoccs, int ctxwords) { @@ -284,7 +284,7 @@ int Query::makeDocAbstract(Doc &doc, return ret; } -bool Query::makeDocAbstract(Doc &doc, vector& abstract) +bool Query::makeDocAbstract(const Doc &doc, vector& abstract) { vector vpabs; if (!makeDocAbstract(doc, vpabs)) @@ -293,7 +293,6 @@ bool Query::makeDocAbstract(Doc &doc, vector& abstract) it != vpabs.end(); it++) { string chunk; if (it->page > 0) { - doc.haspages = true; ostringstream ss; ss << it->page; chunk += string(" [p ") + ss.str() + "] "; @@ -304,7 +303,7 @@ bool Query::makeDocAbstract(Doc &doc, vector& abstract) return true; } -bool Query::makeDocAbstract(Doc &doc, string& abstract) +bool Query::makeDocAbstract(const Doc &doc, string& abstract) { vector vpabs; if (!makeDocAbstract(doc, vpabs)) @@ -317,9 +316,9 @@ bool Query::makeDocAbstract(Doc &doc, string& abstract) return m_reason.empty() ? true : false; } -int Query::getFirstMatchPage(Doc &doc, string& term) +int Query::getFirstMatchPage(const Doc &doc, string& term) { - LOGDEB1(("Db::getFirstMatchPages\n"));; + LOGDEB1(("Db::getFirstMatchPage\n"));; if (!m_nq) { LOGERR(("Query::getFirstMatchPage: no nq\n")); return false; diff --git a/src/rcldb/rclquery.h b/src/rcldb/rclquery.h index 79886ef2..438d9e1c 100644 --- a/src/rcldb/rclquery.h +++ b/src/rcldb/rclquery.h @@ -106,14 +106,15 @@ class Query { /** Build synthetic abstract for document, extracting chunks relevant for * the input query. This uses index data only (no access to the file) */ // Abstract returned as one string - bool makeDocAbstract(Doc &doc, std::string& abstract); + bool makeDocAbstract(const Doc &doc, std::string& abstract); // Returned as a snippets vector - bool makeDocAbstract(Doc &doc, std::vector& abstract); + bool makeDocAbstract(const Doc &doc, std::vector& abstract); // Returned as a vector of pair page is 0 if unknown - int makeDocAbstract(Doc &doc, std::vector& abst, + int makeDocAbstract(const Doc &doc, std::vector& abst, int maxoccs= -1, int ctxwords = -1); - /** Retrieve page number for first match for term */ - int getFirstMatchPage(Doc &doc, std::string& term); + /** Retrieve page number for first match for "significant" query term + * @param term returns the chosen term */ + int getFirstMatchPage(const Doc &doc, std::string& term); /** Retrieve a reference to the searchData we are using */ RefCntr getSD()