Query: include line numbers in snippets

This commit is contained in:
Jean-Francois Dockes 2022-06-21 16:32:12 +02:00
parent 389daa134c
commit 8b129f6058
2 changed files with 22 additions and 12 deletions

View File

@ -94,13 +94,14 @@ struct MatchFragment {
unsigned int hitpos; unsigned int hitpos;
// "best term" for this match (e.g. for use as ext app search term) // "best term" for this match (e.g. for use as ext app search term)
string term; string term;
int line;
MatchFragment(int sta, int sto, double c, MatchFragment(int sta, int sto, double c,
#ifdef COMPUTE_HLZONES #ifdef COMPUTE_HLZONES
vector<pair<int,int>>& hl, vector<pair<int,int>>& hl,
#endif #endif
unsigned int pos, string& trm) unsigned int pos, string& trm, int ln)
: start(sta), stop(sto), coef(c), hitpos(pos) { : start(sta), stop(sto), coef(c), hitpos(pos), line(ln) {
#ifdef COMPUTE_HLZONES #ifdef COMPUTE_HLZONES
hlzones.swap(hl); hlzones.swap(hl);
#endif #endif
@ -137,9 +138,13 @@ public:
} }
} }
virtual void newline(int) override {
m_line++;
}
// Accept a word and its position. If the word is a matched term, // Accept a word and its position. If the word is a matched term,
// add/update fragment definition. // add/update fragment definition.
virtual bool takeword(const std::string& term, int pos, int bts, int bte) { virtual bool takeword(const std::string& term, int pos, int bts, int bte) override {
LOGDEB1("takeword: [" << term << "] bytepos: "<<bts<<":"<<bte<<endl); LOGDEB1("takeword: [" << term << "] bytepos: "<<bts<<":"<<bte<<endl);
// Limit time taken with monster documents. The resulting abstract will be incorrect or // Limit time taken with monster documents. The resulting abstract will be incorrect or
// inexistent, but this is better than taking forever (the default cutoff value comes from // inexistent, but this is better than taking forever (the default cutoff value comes from
@ -189,6 +194,7 @@ public:
#endif #endif
m_curterm = term; m_curterm = term;
m_curtermcoef = coef; m_curtermcoef = coef;
m_curfragline = m_line;
} else { } else {
LOGDEB2("Extending current fragment: "<<m_remainingWords<<" -> "<<m_ctxwords<< "\n"); LOGDEB2("Extending current fragment: "<<m_remainingWords<<" -> "<<m_ctxwords<< "\n");
m_extcount++; m_extcount++;
@ -257,7 +263,8 @@ public:
m_curhlzones, m_curhlzones,
#endif #endif
m_curhitpos, m_curhitpos,
m_curterm m_curterm,
m_curfragline
)); ));
m_totalcoef += m_curfragcoef; m_totalcoef += m_curfragcoef;
m_curfragcoef = 0.0; m_curfragcoef = 0.0;
@ -283,7 +290,7 @@ public:
#ifdef COMPUTE_HLZONES #ifdef COMPUTE_HLZONES
m_curhlzones, m_curhlzones,
#endif #endif
m_curhitpos, m_curterm)); m_curhitpos, m_curterm, m_curfragline));
m_totalcoef += m_curfragcoef; m_totalcoef += m_curfragcoef;
m_curfragcoef = 0.0; m_curfragcoef = 0.0;
m_curtermcoef = 0.0; m_curtermcoef = 0.0;
@ -358,6 +365,7 @@ private:
deque<pair<int,int>> m_prevterms; deque<pair<int,int>> m_prevterms;
// Data about the fragment we are building // Data about the fragment we are building
pair<int,int> m_curfrag{0,0}; pair<int,int> m_curfrag{0,0};
int m_curfragline{0};
double m_curfragcoef{0.0}; double m_curfragcoef{0.0};
unsigned int m_remainingWords{0}; unsigned int m_remainingWords{0};
unsigned int m_extcount{0}; unsigned int m_extcount{0};
@ -372,6 +380,7 @@ private:
// "best" term // "best" term
string m_curterm; string m_curterm;
double m_curtermcoef{0.0}; double m_curtermcoef{0.0};
int m_line{1};
// Group terms, extracted from m_hdata // Group terms, extracted from m_hdata
unordered_set<string> m_gterms; unordered_set<string> m_gterms;
@ -492,7 +501,7 @@ int Query::Native::abstractFromText(
page = 0; page = 0;
} }
LOGDEB0("=== FRAGMENT: p. " << page << " Coef: " << entry.coef << ": " << frag << endl); LOGDEB0("=== FRAGMENT: p. " << page << " Coef: " << entry.coef << ": " << frag << endl);
vabs.push_back(Snippet(page, frag).setTerm(entry.term)); vabs.push_back(Snippet(page, frag, entry.line).setTerm(entry.term));
if (count++ >= maxtotaloccs) if (count++ >= maxtotaloccs)
break; break;
} }

View File

@ -36,18 +36,19 @@ enum abstract_result {
ABSRES_TERMMISS = 4 ABSRES_TERMMISS = 4
}; };
// Snippet entry for makeDocAbstract // Snippet data out of makeDocAbstract
class Snippet { class Snippet {
public: public:
Snippet(int page, const std::string& snip) Snippet(int page, const std::string& snip, int ln = 0)
: page(page), snippet(snip) {} : page(page), snippet(snip), line(ln) {}
Snippet& setTerm(const std::string& trm) { Snippet& setTerm(const std::string& trm) {
term = trm; term = trm;
return *this; return *this;
} }
int page; int page{0};
std::string term;
std::string snippet; std::string snippet;
int line{0};
std::string term;
}; };