From 736051fcd6363a66826e99d92c412ad6a0fe2f20 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Fri, 28 Jun 2019 14:20:47 +0200 Subject: [PATCH] GUI snippets window: add options for the max list length and for sorting the snippets by page number --- src/qtgui/guiutils.cpp | 11 +- src/qtgui/guiutils.h | 15 +-- src/qtgui/snippets_w.cpp | 4 +- src/qtgui/uiprefs.ui | 64 +++++++++-- src/qtgui/uiprefs_w.cpp | 5 +- src/query/docseq.h | 198 ++++++++++++++++------------------- src/query/docseqdb.cpp | 147 +++++++++++++------------- src/query/docseqdb.h | 16 +-- src/rcldb/rclabsfromtext.cpp | 34 ++++-- src/rcldb/rclabstract.cpp | 9 +- src/rcldb/rclquery.cpp | 4 +- src/rcldb/rclquery.h | 2 +- src/rcldb/rclquery_p.h | 5 +- 13 files changed, 290 insertions(+), 224 deletions(-) diff --git a/src/qtgui/guiutils.cpp b/src/qtgui/guiutils.cpp index 3d958068..ef6f7480 100644 --- a/src/qtgui/guiutils.cpp +++ b/src/qtgui/guiutils.cpp @@ -164,10 +164,6 @@ void rwSettings(bool writing) } } - // Abstract snippet separator - SETTING_RW(prefs.abssep, "/Recoll/prefs/reslist/abssep", String,"…"); - if (!writing && prefs.abssep == "") - prefs.abssep = "…"; SETTING_RW(prefs.reslistdateformat, "/Recoll/prefs/reslist/dateformat", String," %Y-%m-%d %H:%M:%S %z"); if (!writing && prefs.reslistdateformat == "") @@ -224,6 +220,13 @@ void rwSettings(bool writing) Int, 250); SETTING_RW(prefs.syntAbsCtx, "/Recoll/prefs/query/syntAbsCtx", Int, 4); + // Abstract snippet separator + SETTING_RW(prefs.abssep, "/Recoll/prefs/reslist/abssep", String,"…"); + if (!writing && prefs.abssep == "") + prefs.abssep = "…"; + SETTING_RW(prefs.snipwMaxLength, "/Recoll/prefs/snipwin/maxlen", Int, 1000); + SETTING_RW(prefs.snipwSortByPage,"/Recoll/prefs/snipwin/bypage", Bool, false); + SETTING_RW(prefs.autoSuffs, "/Recoll/prefs/query/autoSuffs", String, ""); SETTING_RW(prefs.autoSuffsEnable, "/Recoll/prefs/query/autoSuffsEnable", Bool, false); diff --git a/src/qtgui/guiutils.h b/src/qtgui/guiutils.h index 396e3ff4..139a5e5c 100644 --- a/src/qtgui/guiutils.h +++ b/src/qtgui/guiutils.h @@ -57,8 +57,6 @@ class PrefsPack { QString reslistformat; string creslistformat; QString reslistheadertext; - // Abstract snippet separator - QString abssep; // Date strftime format QString reslistdateformat; string creslistdateformat; @@ -86,6 +84,15 @@ class PrefsPack { // Abstract preferences. Building abstracts can slow result display bool queryBuildAbstract; bool queryReplaceAbstract; + // Synthetized abstract length (chars) and word context size (words) + int syntAbsLen; + int syntAbsCtx; + // Abstract snippet separator + QString abssep; + // Snippets window max list size + int snipwMaxLength; + // Snippets window sort by page (dflt: by weight) + bool snipwSortByPage; bool startWithAdvSearchOpen; // Try to display html if it exists in the internfile stack. bool previewHtml; @@ -122,10 +129,6 @@ class PrefsPack { QStringList restableFields; vector restableColWidths; - // Synthetized abstract length and word context size - int syntAbsLen; - int syntAbsCtx; - // Remembered term match mode int termMatchType; diff --git a/src/qtgui/snippets_w.cpp b/src/qtgui/snippets_w.cpp index 6663f78a..28c9add7 100644 --- a/src/qtgui/snippets_w.cpp +++ b/src/qtgui/snippets_w.cpp @@ -107,6 +107,7 @@ void SnippetsW::init() connect(nextPB, SIGNAL(clicked()), this, SLOT(slotEditFindNext())); connect(prevPB, SIGNAL(clicked()), this, SLOT(slotEditFindPrevious())); + delete browserw; #if defined(USING_WEBKIT) browserw = new QWebView(this); verticalLayout->insertWidget(0, browserw); @@ -165,7 +166,8 @@ void SnippetsW::init() setWindowTitle(title); vector vpabs; - m_source->getAbstract(m_doc, vpabs); + m_source->getAbstract(m_doc, vpabs, + prefs.snipwMaxLength, prefs.snipwSortByPage); HighlightData hdata; m_source->getTerms(hdata); diff --git a/src/qtgui/uiprefs.ui b/src/qtgui/uiprefs.ui index 6727a80b..f7f3dfb7 100644 --- a/src/qtgui/uiprefs.ui +++ b/src/qtgui/uiprefs.ui @@ -45,15 +45,15 @@ - - Query terms highlighting in results. <br>Maybe try something like "color:red;background:yellow" for something more lively than the default blue... - 50 0 + + Query terms highlighting in results. <br>Maybe try something like "color:red;background:yellow" for something more lively than the default blue... + @@ -137,12 +137,12 @@ + + Make links inside the preview window clickable, and start an external browser when they are clicked. + Activate links in preview. - - Make links inside the preview window clickable, and start an external browser when they are clicked. - false @@ -527,6 +527,52 @@ + + + + + + + 1 + 0 + + + + Maximum number of snippets displayed in the snippets window + + + false + + + + + + + 1 + + + 10000000 + + + 10 + + + 1000 + + + + + + + + + Sort snippets by page number (default: by weigth). + + + false + + + @@ -903,12 +949,12 @@ May be slow for big documents. - - Paths translations - Set path translations for the selected index or for the main one if no selection exists. + + Paths translations + diff --git a/src/qtgui/uiprefs_w.cpp b/src/qtgui/uiprefs_w.cpp index 9a7bc09d..9c8f85f8 100644 --- a/src/qtgui/uiprefs_w.cpp +++ b/src/qtgui/uiprefs_w.cpp @@ -184,7 +184,8 @@ void UIPrefsDialog::setFromPrefs() string nm = path_getsimple((const char *)snipCssFile.toLocal8Bit()); snipCssPB->setText(QString::fromLocal8Bit(nm.c_str())); } - + snipwMaxLenSB->setValue(prefs.snipwMaxLength); + snipwByPageCB->setChecked(prefs.snipwSortByPage); paraFormat = prefs.reslistformat; headerText = prefs.reslistheadertext; @@ -301,6 +302,8 @@ void UIPrefsDialog::accept() prefs.reslistformat = prefs.dfltResListFormat; paraFormat = prefs.reslistformat; } + prefs.snipwMaxLength = snipwMaxLenSB->value(); + prefs.snipwSortByPage = snipwByPageCB->isChecked(); prefs.creslistformat = (const char*)prefs.reslistformat.toUtf8(); diff --git a/src/query/docseq.h b/src/query/docseq.h index 7c590488..be73e42e 100644 --- a/src/query/docseq.h +++ b/src/query/docseq.h @@ -39,7 +39,7 @@ struct ResListEntry { /** Sort specification. */ class DocSeqSortSpec { - public: +public: DocSeqSortSpec() : desc(false) {} bool isNotNull() const {return !field.empty();} void reset() {field.erase();} @@ -50,12 +50,12 @@ class DocSeqSortSpec { /** Filtering spec. This is only used to filter by doc category for now, hence the rather specialized interface */ class DocSeqFiltSpec { - public: +public: DocSeqFiltSpec() {} enum Crit {DSFS_MIMETYPE, DSFS_QLANG, DSFS_PASSALL}; void orCrit(Crit crit, const std::string& value) { - crits.push_back(crit); - values.push_back(value); + crits.push_back(crit); + values.push_back(value); } std::vector crits; std::vector values; @@ -75,7 +75,7 @@ class DocSeqFiltSpec { the current one will have to do for now. */ class DocSequence { - public: +public: DocSequence(const std::string &t) : m_title(t) {} virtual ~DocSequence() {} @@ -92,29 +92,26 @@ class DocSequence { /** Get next page of documents. This accumulates entries into the result * list parameter (doesn't reset it). */ virtual int getSeqSlice(int offs, int cnt, - std::vector& result); + std::vector& result); /** Get abstract for document. This is special because it may take time. * The default is to return the input doc's abstract fields, but some * sequences can compute a better value (ie: docseqdb) */ virtual bool getAbstract(Rcl::Doc& doc, std::vector& abs) { - abs.push_back(doc.meta[Rcl::Doc::keyabs]); - return true; + abs.push_back(doc.meta[Rcl::Doc::keyabs]); + return true; } - virtual bool getAbstract(Rcl::Doc& doc, - std::vector& abs) - { - abs.push_back(Rcl::Snippet(0, doc.meta[Rcl::Doc::keyabs])); - return true; + virtual bool getAbstract(Rcl::Doc& doc, std::vector& abs, + int, bool) { + abs.push_back(Rcl::Snippet(0, doc.meta[Rcl::Doc::keyabs])); + return true; } - virtual int getFirstMatchPage(Rcl::Doc&, std::string&) - { - return -1; + virtual int getFirstMatchPage(Rcl::Doc&, std::string&) { + return -1; } /** Get duplicates. */ - virtual bool docDups(const Rcl::Doc&, std::vector&) - { - return false; + virtual bool docDups(const Rcl::Doc&, std::vector&) { + return false; } virtual bool getEnclosing(Rcl::Doc&, Rcl::Doc&); @@ -124,43 +121,44 @@ class DocSequence { /** Get title for result list */ virtual std::string title() - { - return m_title; - } + { + return m_title; + } /** Can do snippets ? */ virtual bool snippetsCapable() - { - return false; - } + { + return false; + } /** Get description for underlying query */ virtual std::string getDescription() = 0; /** Get search terms (for highlighting abstracts). Some sequences * may have no associated search terms. Implement this for them. */ - virtual void getTerms(HighlightData& hld) - { - hld.clear(); - } + virtual void getTerms(HighlightData& hld) + { + hld.clear(); + } virtual std::list expand(Rcl::Doc &) - { - return std::list(); - } + { + return std::list(); + } virtual std::string getReason() - { - return m_reason; - } + { + return m_reason; + } /** Optional functionality. */ virtual bool canFilter() {return false;} virtual bool canSort() {return false;} virtual bool setFiltSpec(const DocSeqFiltSpec &) {return false;} virtual bool setSortSpec(const DocSeqSortSpec &) {return false;} - virtual std::shared_ptr getSourceSeq() {return std::shared_ptr();} + virtual std::shared_ptr getSourceSeq() { + return std::shared_ptr();} - static void set_translations(const std::string& sort, const std::string& filt) - { - o_sort_trans = sort; - o_filt_trans = filt; + static void set_translations(const std::string& sort, + const std::string& filt) { + o_sort_trans = sort; + o_filt_trans = filt; } @@ -172,7 +170,7 @@ protected: static std::string o_filt_trans; std::string m_reason; - private: +private: std::string m_title; }; @@ -182,75 +180,65 @@ protected: class DocSeqModifier : public DocSequence { public: DocSeqModifier(std::shared_ptr iseq) - : DocSequence(""), m_seq(iseq) - {} + : DocSequence(""), m_seq(iseq) + {} virtual ~DocSeqModifier() {} - virtual bool getAbstract(Rcl::Doc& doc, std::vector& abs) - { - if (!m_seq) - return false; - return m_seq->getAbstract(doc, abs); + virtual bool getAbstract(Rcl::Doc& doc, std::vector& abs) { + if (!m_seq) + return false; + return m_seq->getAbstract(doc, abs); } - virtual bool getAbstract(Rcl::Doc& doc, - std::vector& abs) - { - if (!m_seq) - return false; - return m_seq->getAbstract(doc, abs); + virtual bool getAbstract(Rcl::Doc& doc, std::vector& abs, + int maxlen, bool bypage) override { + if (!m_seq) + return false; + return m_seq->getAbstract(doc, abs, maxlen, bypage); } /** Get duplicates. */ - virtual bool docDups(const Rcl::Doc& doc, std::vector& dups) - { - if (!m_seq) - return false; - return m_seq->docDups(doc, dups); + virtual bool docDups(const Rcl::Doc& doc, std::vector& dups) { + if (!m_seq) + return false; + return m_seq->docDups(doc, dups); } - virtual bool snippetsCapable() - { - if (!m_seq) - return false; - return m_seq->snippetsCapable(); + virtual bool snippetsCapable() { + if (!m_seq) + return false; + return m_seq->snippetsCapable(); } - virtual std::string getDescription() - { - if (!m_seq) - return ""; - return m_seq->getDescription(); + virtual std::string getDescription() { + if (!m_seq) + return ""; + return m_seq->getDescription(); } - virtual void getTerms(HighlightData& hld) - { - if (!m_seq) - return; - m_seq->getTerms(hld); + virtual void getTerms(HighlightData& hld) { + if (!m_seq) + return; + m_seq->getTerms(hld); } - virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc) - { - if (!m_seq) - return false; - return m_seq->getEnclosing(doc, pdoc); + virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc) { + if (!m_seq) + return false; + return m_seq->getEnclosing(doc, pdoc); } - virtual std::string getReason() - { - if (!m_seq) - return string(); - return m_seq->getReason(); + virtual std::string getReason() { + if (!m_seq) + return string(); + return m_seq->getReason(); } - virtual std::string title() - { - return m_seq->title(); + virtual std::string title() { + return m_seq->title(); } - virtual std::shared_ptr getSourceSeq() - { - return m_seq; + virtual std::shared_ptr getSourceSeq() { + return m_seq; } protected: virtual std::shared_ptr getDb() { - if (!m_seq) - return 0; - return m_seq->getDb(); + if (!m_seq) + return 0; + return m_seq->getDb(); } std::shared_ptr m_seq; @@ -263,23 +251,21 @@ class RclConfig; class DocSource : public DocSeqModifier { public: DocSource(RclConfig *config, std::shared_ptr iseq) - : DocSeqModifier(iseq), m_config(config) - {} + : DocSeqModifier(iseq), m_config(config) + {} virtual bool canFilter() {return true;} virtual bool canSort() {return true;} virtual bool setFiltSpec(const DocSeqFiltSpec &); virtual bool setSortSpec(const DocSeqSortSpec &); - virtual bool getDoc(int num, Rcl::Doc &doc, std::string *sh = 0) - { - if (!m_seq) - return false; - return m_seq->getDoc(num, doc, sh); + virtual bool getDoc(int num, Rcl::Doc &doc, std::string *sh = 0) { + if (!m_seq) + return false; + return m_seq->getDoc(num, doc, sh); } - virtual int getResCnt() - { - if (!m_seq) - return 0; - return m_seq->getResCnt(); + virtual int getResCnt() { + if (!m_seq) + return 0; + return m_seq->getResCnt(); } virtual std::string title(); private: @@ -290,4 +276,4 @@ private: DocSeqSortSpec m_sspec; }; -#endif /* _DOCSEQ_H_INCLUDED_ */ +#endif /* _DOCSEQ_H_ */ diff --git a/src/query/docseqdb.cpp b/src/query/docseqdb.cpp index f26d0c02..5ed61b6d 100644 --- a/src/query/docseqdb.cpp +++ b/src/query/docseqdb.cpp @@ -30,7 +30,7 @@ using std::list; DocSequenceDb::DocSequenceDb(std::shared_ptr db, std::shared_ptr q, const string &t, - std::shared_ptr sdata) + std::shared_ptr sdata) : DocSequence(t), m_db(db), m_q(q), m_sdata(sdata), m_fsdata(sdata), m_rescnt(-1), m_queryBuildAbstract(true), @@ -56,7 +56,7 @@ bool DocSequenceDb::getDoc(int num, Rcl::Doc &doc, string *sh) { std::unique_lock locker(o_dblock); if (!setQuery()) - return false; + return false; if (sh) sh->erase(); return m_q->getDoc(num, doc); } @@ -65,9 +65,9 @@ int DocSequenceDb::getResCnt() { std::unique_lock locker(o_dblock); if (!setQuery()) - return false; + return false; if (m_rescnt < 0) { - m_rescnt= m_q->getResCnt(); + m_rescnt= m_q->getResCnt(); } return m_rescnt; } @@ -76,32 +76,33 @@ static const string cstr_mre("[...]"); // This one only gets called to fill-up the snippets window // We ignore most abstract/snippets preferences. -bool DocSequenceDb::getAbstract(Rcl::Doc &doc, vector& vpabs) +bool DocSequenceDb::getAbstract(Rcl::Doc &doc, vector& vpabs, + int maxlen, bool sortbypage) { - LOGDEB("DocSequenceDb::getAbstract/pair\n" ); + LOGDEB("DocSequenceDb::getAbstract/pair\n"); std::unique_lock locker(o_dblock); if (!setQuery()) - return false; + return false; // Have to put the limit somewhere. - int maxoccs = 1000; int ret = Rcl::ABSRES_ERROR; if (m_q->whatDb()) { - ret = m_q->makeDocAbstract(doc, vpabs, maxoccs, - m_q->whatDb()->getAbsCtxLen()+ 2); + ret = m_q->makeDocAbstract( + doc, vpabs, maxlen, m_q->whatDb()->getAbsCtxLen() + 2, sortbypage); } - LOGDEB("DocSequenceDb::getAbstract: got ret " << (ret) << " vpabs len " << (vpabs.size()) << "\n" ); + LOGDEB("DocSequenceDb::getAbstract: got ret " << ret << " vpabs len " << + vpabs.size() << "\n"); if (vpabs.empty()) { - return true; + return true; } // If the list was probably truncated, indicate it. if (ret & Rcl::ABSRES_TRUNC) { - vpabs.push_back(Rcl::Snippet(-1, cstr_mre)); + vpabs.push_back(Rcl::Snippet(-1, cstr_mre)); } if (ret & Rcl::ABSRES_TERMMISS) { - vpabs.insert(vpabs.begin(), - Rcl::Snippet(-1, "(Words missing in snippets)")); + vpabs.insert(vpabs.begin(), + Rcl::Snippet(-1, "(Words missing in snippets)")); } return true; @@ -111,13 +112,13 @@ bool DocSequenceDb::getAbstract(Rcl::Doc &doc, vector& vabs) { std::unique_lock locker(o_dblock); if (!setQuery()) - return false; + return false; if (m_q->whatDb() && - m_queryBuildAbstract && (doc.syntabs || m_queryReplaceAbstract)) { - m_q->makeDocAbstract(doc, vabs); + m_queryBuildAbstract && (doc.syntabs || m_queryReplaceAbstract)) { + m_q->makeDocAbstract(doc, vabs); } if (vabs.empty()) - vabs.push_back(doc.meta[Rcl::Doc::keyabs]); + vabs.push_back(doc.meta[Rcl::Doc::keyabs]); return true; } @@ -125,9 +126,9 @@ int DocSequenceDb::getFirstMatchPage(Rcl::Doc &doc, string& term) { std::unique_lock locker(o_dblock); if (!setQuery()) - return false; + return false; if (m_q->whatDb()) { - return m_q->getFirstMatchPage(doc, term); + return m_q->getFirstMatchPage(doc, term); } return -1; } @@ -136,7 +137,7 @@ list DocSequenceDb::expand(Rcl::Doc &doc) { std::unique_lock locker(o_dblock); if (!setQuery()) - return list(); + return list(); vector v = m_q->expand(doc); return list(v.begin(), v.end()); } @@ -145,58 +146,58 @@ string DocSequenceDb::title() { string qual; if (m_isFiltered && !m_isSorted) - qual = string(" (") + o_filt_trans + string(")"); + qual = string(" (") + o_filt_trans + string(")"); else if (!m_isFiltered && m_isSorted) - qual = string(" (") + o_sort_trans + string(")"); + qual = string(" (") + o_sort_trans + string(")"); else if (m_isFiltered && m_isSorted) - qual = string(" (") + o_sort_trans + string(",") + o_filt_trans + + qual = string(" (") + o_sort_trans + string(",") + o_filt_trans + string(")"); return DocSequence::title() + qual; } bool DocSequenceDb::setFiltSpec(const DocSeqFiltSpec &fs) { - LOGDEB("DocSequenceDb::setFiltSpec\n" ); + LOGDEB("DocSequenceDb::setFiltSpec\n"); std::unique_lock locker(o_dblock); if (fs.isNotNull()) { - // We build a search spec by adding a filtering layer to the base one. - m_fsdata = std::shared_ptr( - new Rcl::SearchData(Rcl::SCLT_AND, m_sdata->getStemLang())); - Rcl::SearchDataClauseSub *cl = - new Rcl::SearchDataClauseSub(m_sdata); - m_fsdata->addClause(cl); + // We build a search spec by adding a filtering layer to the base one. + m_fsdata = std::shared_ptr( + new Rcl::SearchData(Rcl::SCLT_AND, m_sdata->getStemLang())); + Rcl::SearchDataClauseSub *cl = + new Rcl::SearchDataClauseSub(m_sdata); + m_fsdata->addClause(cl); - for (unsigned int i = 0; i < fs.crits.size(); i++) { - switch (fs.crits[i]) { - case DocSeqFiltSpec::DSFS_MIMETYPE: - m_fsdata->addFiletype(fs.values[i]); - break; - case DocSeqFiltSpec::DSFS_QLANG: - { - if (!m_q) - break; - - string reason; - Rcl::SearchData *sd = - wasaStringToRcl(m_q->whatDb()->getConf(), - m_sdata->getStemLang(), - fs.values[i], reason); - if (sd) { - Rcl::SearchDataClauseSub *cl1 = - new Rcl::SearchDataClauseSub( - std::shared_ptr(sd)); - m_fsdata->addClause(cl1); - } - } - break; - default: - break; - } - } - m_isFiltered = true; + for (unsigned int i = 0; i < fs.crits.size(); i++) { + switch (fs.crits[i]) { + case DocSeqFiltSpec::DSFS_MIMETYPE: + m_fsdata->addFiletype(fs.values[i]); + break; + case DocSeqFiltSpec::DSFS_QLANG: + { + if (!m_q) + break; + + string reason; + Rcl::SearchData *sd = + wasaStringToRcl(m_q->whatDb()->getConf(), + m_sdata->getStemLang(), + fs.values[i], reason); + if (sd) { + Rcl::SearchDataClauseSub *cl1 = + new Rcl::SearchDataClauseSub( + std::shared_ptr(sd)); + m_fsdata->addClause(cl1); + } + } + break; + default: + break; + } + } + m_isFiltered = true; } else { - m_fsdata = m_sdata; - m_isFiltered = false; + m_fsdata = m_sdata; + m_isFiltered = false; } m_needSetQuery = true; return true; @@ -204,14 +205,15 @@ bool DocSequenceDb::setFiltSpec(const DocSeqFiltSpec &fs) bool DocSequenceDb::setSortSpec(const DocSeqSortSpec &spec) { - LOGDEB("DocSequenceDb::setSortSpec: fld [" << (spec.field) << "] " << (spec.desc ? "desc" : "asc") << "\n" ); + LOGDEB("DocSequenceDb::setSortSpec: fld [" << spec.field << "] " << + (spec.desc ? "desc" : "asc") << "\n"); std::unique_lock locker(o_dblock); if (spec.isNotNull()) { - m_q->setSortBy(spec.field, !spec.desc); - m_isSorted = true; + m_q->setSortBy(spec.field, !spec.desc); + m_isSorted = true; } else { - m_q->setSortBy(string(), true); - m_isSorted = false; + m_q->setSortBy(string(), true); + m_isSorted = false; } m_needSetQuery = true; return true; @@ -220,14 +222,15 @@ bool DocSequenceDb::setSortSpec(const DocSeqSortSpec &spec) bool DocSequenceDb::setQuery() { if (!m_needSetQuery) - return true; + return true; m_needSetQuery = false; m_rescnt = -1; m_lastSQStatus = m_q->setQuery(m_fsdata); if (!m_lastSQStatus) { - m_reason = m_q->getReason(); - LOGERR("DocSequenceDb::setQuery: rclquery::setQuery failed: " << (m_reason) << "\n" ); + m_reason = m_q->getReason(); + LOGERR("DocSequenceDb::setQuery: rclquery::setQuery failed: " << + m_reason << "\n"); } return m_lastSQStatus; } @@ -236,9 +239,9 @@ bool DocSequenceDb::docDups(const Rcl::Doc& doc, std::vector& dups) { if (m_q->whatDb()) { std::unique_lock locker(o_dblock); - return m_q->whatDb()->docDups(doc, dups); + return m_q->whatDb()->docDups(doc, dups); } else { - return false; + return false; } } diff --git a/src/query/docseqdb.h b/src/query/docseqdb.h index b42327d9..c47ec465 100644 --- a/src/query/docseqdb.h +++ b/src/query/docseqdb.h @@ -16,9 +16,10 @@ */ #ifndef _DOCSEQDB_H_INCLUDED_ #define _DOCSEQDB_H_INCLUDED_ -#include "docseq.h" + #include +#include "docseq.h" #include "searchdata.h" #include "rclquery.h" @@ -26,21 +27,22 @@ class DocSequenceDb : public DocSequence { public: DocSequenceDb(std::shared_ptr db, - std::shared_ptr q, const string &t, + std::shared_ptr q, const std::string &t, std::shared_ptr sdata); virtual ~DocSequenceDb() {} - virtual bool getDoc(int num, Rcl::Doc &doc, string * = 0); + virtual bool getDoc(int num, Rcl::Doc &doc, std::string * = 0); virtual int getResCnt(); virtual void getTerms(HighlightData& hld); // Called to fill-up the snippets window. Ignoers // buildabstract/replaceabstract and syntabslen - virtual bool getAbstract(Rcl::Doc &doc, vector&); + virtual bool getAbstract(Rcl::Doc &doc, std::vector&, + int maxlen, bool sortbypage) override; - virtual bool getAbstract(Rcl::Doc &doc, vector&); + virtual bool getAbstract(Rcl::Doc &doc, std::vector&); virtual int getFirstMatchPage(Rcl::Doc&, std::string& term); virtual bool docDups(const Rcl::Doc& doc, std::vector& dups); - virtual string getDescription(); + virtual std::string getDescription(); virtual std::list expand(Rcl::Doc &doc); virtual bool canFilter() {return true;} virtual bool setFiltSpec(const DocSeqFiltSpec &filtspec); @@ -56,7 +58,7 @@ class DocSequenceDb : public DocSequence { { return true; } - virtual string title(); + virtual std::string title(); protected: virtual std::shared_ptr getDb() { diff --git a/src/rcldb/rclabsfromtext.cpp b/src/rcldb/rclabsfromtext.cpp index a08a3503..d56ad5cc 100644 --- a/src/rcldb/rclabsfromtext.cpp +++ b/src/rcldb/rclabsfromtext.cpp @@ -138,12 +138,14 @@ public: if (maxtermcount && termcount++ > maxtermcount) { LOGINF("Rclabsfromtext: stopping because maxtermcount reached: "<< maxtermcount << endl); + retflags |= ABSRES_TRUNC; return false; } // Also limit the number of fragments (just in case safety) if (m_fragments.size() > maxtermcount / 100) { LOGINF("Rclabsfromtext: stopping because maxfragments reached: "<< maxtermcount/100 << endl); + retflags |= ABSRES_TRUNC; return false; } // Remember recent past @@ -326,6 +328,10 @@ public: return; } + + int getretflags() { + return retflags; + } private: // Past terms because we need to go back for context before a hit @@ -364,6 +370,7 @@ private: unsigned int termcount{0}; unsigned int maxtermcount{0}; + int retflags{0}; }; int Query::Native::abstractFromText( @@ -375,7 +382,8 @@ int Query::Native::abstractFromText( int ctxwords, unsigned int maxtotaloccs, vector& vabs, - Chrono& chron + Chrono& chron, + bool sortbypage ) { (void)chron; @@ -423,13 +431,21 @@ int Query::Native::abstractFromText( // Sort the fragments by decreasing weight const vector& res1 = splitter.getFragments(); vector result(res1.begin(), res1.end()); - std::sort(result.begin(), result.end(), - [](const MatchFragment& a, - const MatchFragment& b) -> bool { - return a.coef > b.coef; - } - ); - + if (sortbypage) { + std::sort(result.begin(), result.end(), + [](const MatchFragment& a, + const MatchFragment& b) -> bool { + return a.hitpos < b.hitpos; + } + ); + } else { + std::sort(result.begin(), result.end(), + [](const MatchFragment& a, + const MatchFragment& b) -> bool { + return a.coef > b.coef; + } + ); + } vector vpbreaks; ndb->getPagePositions(docid, vpbreaks); @@ -464,7 +480,7 @@ int Query::Native::abstractFromText( if (count++ >= maxtotaloccs) break; } - return ABSRES_OK; + return ABSRES_OK | splitter.getretflags(); } } diff --git a/src/rcldb/rclabstract.cpp b/src/rcldb/rclabstract.cpp index 63bd4b4a..6a651f7b 100644 --- a/src/rcldb/rclabstract.cpp +++ b/src/rcldb/rclabstract.cpp @@ -623,11 +623,12 @@ int Query::Native::abstractFromIndex( // @param[out] vabs the abstract is returned as a vector of snippets. int Query::Native::makeAbstract(Xapian::docid docid, vector& vabs, - int imaxoccs, int ictxwords) + int imaxoccs, int ictxwords, bool sortbypage) { chron.restart(); - LOGABS("makeAbstract: docid " << docid << " imaxoccs " << - imaxoccs << " ictxwords " << ictxwords << "\n"); + LOGDEB("makeAbstract: docid " << docid << " imaxoccs " << + imaxoccs << " ictxwords " << ictxwords << " sort by page " << + sortbypage << "\n"); // The (unprefixed) terms matched by this document vector matchedTerms; @@ -675,7 +676,7 @@ int Query::Native::makeAbstract(Xapian::docid docid, if (ndb->m_storetext) { return abstractFromText(ndb, docid, matchedTerms, byQ, totalweight, ctxwords, maxtotaloccs, vabs, - chron); + chron, sortbypage); } else { return abstractFromIndex(ndb, docid, matchedTerms, byQ, totalweight, ctxwords, maxtotaloccs, vabs, diff --git a/src/rcldb/rclquery.cpp b/src/rcldb/rclquery.cpp index 7d9e66e1..690729f7 100644 --- a/src/rcldb/rclquery.cpp +++ b/src/rcldb/rclquery.cpp @@ -267,7 +267,7 @@ bool Query::getQueryTerms(vector& terms) } int Query::makeDocAbstract(const Doc &doc, vector& abstract, - int maxoccs, int ctxwords) + int maxoccs, int ctxwords, bool sortbypage) { LOGDEB("makeDocAbstract: maxoccs " << maxoccs << " ctxwords " << ctxwords << "\n"); @@ -276,7 +276,7 @@ int Query::makeDocAbstract(const Doc &doc, vector& abstract, return ABSRES_ERROR; } int ret = ABSRES_ERROR; - XAPTRY(ret = m_nq->makeAbstract(doc.xdocid, abstract, maxoccs, ctxwords), + XAPTRY(ret = m_nq->makeAbstract(doc.xdocid, abstract, maxoccs, ctxwords, sortbypage), m_db->m_ndb->xrdb, m_reason); if (!m_reason.empty()) { LOGDEB("makeDocAbstract: makeAbstract: reason: " << m_reason << "\n"); diff --git a/src/rcldb/rclquery.h b/src/rcldb/rclquery.h index 82068655..b1fd6255 100644 --- a/src/rcldb/rclquery.h +++ b/src/rcldb/rclquery.h @@ -107,7 +107,7 @@ public: bool makeDocAbstract(const Doc &doc, std::vector& abstract); // Returned as a vector of pair page is 0 if unknown int makeDocAbstract(const Doc &doc, std::vector& abst, - int maxoccs= -1, int ctxwords = -1); + int maxoccs= -1, int ctxwords = -1, bool sortbypage=false); /** Retrieve page number for first match for "significant" query term * @param term returns the chosen term */ int getFirstMatchPage(const Doc &doc, std::string& term); diff --git a/src/rcldb/rclquery_p.h b/src/rcldb/rclquery_p.h index c4eb7b31..7d44e626 100644 --- a/src/rcldb/rclquery_p.h +++ b/src/rcldb/rclquery_p.h @@ -55,7 +55,7 @@ public: /** Return a list of terms which matched for a specific result document */ bool getMatchTerms(unsigned long xdocid, std::vector& terms); int makeAbstract(Xapian::docid id, std::vector&, - int maxoccs = -1, int ctxwords = -1); + int maxoccs, int ctxwords, bool sortbypage); int getFirstMatchPage(Xapian::docid docid, std::string& term); void setDbWideQTermsFreqs(); double qualityTerms(Xapian::docid docid, @@ -109,7 +109,8 @@ public: int ctxwords, unsigned int maxtotaloccs, vector& vabs, - Chrono& chron + Chrono& chron, + bool sortbypage ); };