From 8b129f605850e913913ef9b7f6185e333bd4381d Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Tue, 21 Jun 2022 16:32:12 +0200 Subject: [PATCH] Query: include line numbers in snippets --- src/rcldb/rclabsfromtext.cpp | 23 ++++++++++++++++------- src/rcldb/rclquery.h | 11 ++++++----- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/rcldb/rclabsfromtext.cpp b/src/rcldb/rclabsfromtext.cpp index 4195b1c4..9a09c076 100644 --- a/src/rcldb/rclabsfromtext.cpp +++ b/src/rcldb/rclabsfromtext.cpp @@ -94,13 +94,14 @@ struct MatchFragment { unsigned int hitpos; // "best term" for this match (e.g. for use as ext app search term) string term; - + int line; + MatchFragment(int sta, int sto, double c, #ifdef COMPUTE_HLZONES vector>& hl, #endif - unsigned int pos, string& trm) - : start(sta), stop(sto), coef(c), hitpos(pos) { + unsigned int pos, string& trm, int ln) + : start(sta), stop(sto), coef(c), hitpos(pos), line(ln) { #ifdef COMPUTE_HLZONES hlzones.swap(hl); #endif @@ -137,9 +138,13 @@ public: } } + virtual void newline(int) override { + m_line++; + } + // Accept a word and its position. If the word is a matched term, // add/update fragment definition. - virtual bool takeword(const std::string& term, int pos, int bts, int bte) { + virtual bool takeword(const std::string& term, int pos, int bts, int bte) override { LOGDEB1("takeword: [" << term << "] bytepos: "< "<> m_prevterms; // Data about the fragment we are building pair m_curfrag{0,0}; + int m_curfragline{0}; double m_curfragcoef{0.0}; unsigned int m_remainingWords{0}; unsigned int m_extcount{0}; @@ -372,6 +380,7 @@ private: // "best" term string m_curterm; double m_curtermcoef{0.0}; + int m_line{1}; // Group terms, extracted from m_hdata unordered_set m_gterms; @@ -492,7 +501,7 @@ int Query::Native::abstractFromText( page = 0; } LOGDEB0("=== FRAGMENT: p. " << page << " Coef: " << entry.coef << ": " << frag << endl); - vabs.push_back(Snippet(page, frag).setTerm(entry.term)); + vabs.push_back(Snippet(page, frag, entry.line).setTerm(entry.term)); if (count++ >= maxtotaloccs) break; } diff --git a/src/rcldb/rclquery.h b/src/rcldb/rclquery.h index fd8874d3..36b70010 100644 --- a/src/rcldb/rclquery.h +++ b/src/rcldb/rclquery.h @@ -36,18 +36,19 @@ enum abstract_result { ABSRES_TERMMISS = 4 }; -// Snippet entry for makeDocAbstract +// Snippet data out of makeDocAbstract class Snippet { public: - Snippet(int page, const std::string& snip) - : page(page), snippet(snip) {} + Snippet(int page, const std::string& snip, int ln = 0) + : page(page), snippet(snip), line(ln) {} Snippet& setTerm(const std::string& trm) { term = trm; return *this; } - int page; - std::string term; + int page{0}; std::string snippet; + int line{0}; + std::string term; };