From 20e845709e5651b8736f673bc2a0145b09b551c2 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Sat, 29 Aug 2020 09:43:21 +0200 Subject: [PATCH] recollq: added option -p to be used with -A for showing page-numbered snippets instead of abstract --- src/doc/man/recollq.1 | 7 ++- src/query/recollq.cpp | 139 ++++++++++++++++++++++++------------------ src/rcldb/rclquery.h | 16 ++--- 3 files changed, 93 insertions(+), 69 deletions(-) diff --git a/src/doc/man/recollq.1 b/src/doc/man/recollq.1 index 07aed737..16885e19 100644 --- a/src/doc/man/recollq.1 +++ b/src/doc/man/recollq.1 @@ -25,6 +25,9 @@ recollq \- command line / standard output Recoll query command. .B \-A ] [ +.B \-p +] +[ .B \-e ] [ @@ -106,7 +109,9 @@ is set, the whole metadata array will be dumped for each document. .PP If .B \-A -is set, the document abstracts will be printed. +is set, the document abstracts will be printed. With an additional +.B \-p +option, snippets with page numbers (when available) will be shown instead. .PP .B \-S diff --git a/src/query/recollq.cpp b/src/query/recollq.cpp index c91fab9a..e2110f04 100644 --- a/src/query/recollq.cpp +++ b/src/query/recollq.cpp @@ -24,6 +24,7 @@ #include #include +#include #include #include @@ -57,8 +58,27 @@ bool dump_contents(RclConfig *rclconfig, Rcl::Doc& idoc) return true; } +string make_abstract(Rcl::Doc& doc, Rcl::Query& query, bool asSnippets) +{ + string abstract; + if (asSnippets) { + std::vector snippets; + std::ostringstream str; + if (query.makeDocAbstract(doc, snippets, -1, -1, true)) { + for (const auto snippet : snippets) { + str << snippet.page << " : " << snippet.snippet << endl; + } + } + abstract = str.str(); + } else { + query.makeDocAbstract(doc, abstract); + abstract += "\n"; + } + return abstract; +} + void output_fields(vector fields, Rcl::Doc& doc, - Rcl::Query& query, Rcl::Db&, bool printnames) + Rcl::Query& query, Rcl::Db&, bool printnames, bool asSnippets) { if (fields.empty()) { map::const_iterator it; @@ -70,9 +90,7 @@ void output_fields(vector fields, Rcl::Doc& doc, it != fields.end(); it++) { string out; if (!it->compare("abstract")) { - string abstract; - query.makeDocAbstract(doc, abstract); - base64_encode(abstract, out); + base64_encode(make_abstract(doc, query, asSnippets), out); } else if (!it->compare("xdocid")) { char cdocid[30]; sprintf(cdocid, "%lu", (unsigned long)doc.xdocid); @@ -94,45 +112,46 @@ void output_fields(vector fields, Rcl::Doc& doc, static char *thisprog; static char usage [] = -" -P: Show the date span for all the documents present in the index.\n" -" [-o|-a|-f] [-q] \n" -" Runs a recoll query and displays result lines. \n" -" Default: will interpret the argument(s) as a xesam query string.\n" -" Query elements: \n" -" * Implicit AND, exclusion, field spec: t1 -t2 title:t3\n" -" * OR has priority: t1 OR t2 t3 OR t4 means (t1 OR t2) AND (t3 OR t4)\n" -" * Phrase: \"t1 t2\" (needs additional quoting on cmd line)\n" -" -o Emulate the GUI simple search in ANY TERM mode.\n" -" -a Emulate the GUI simple search in ALL TERMS mode.\n" -" -f Emulate the GUI simple search in filename mode.\n" -" -q is just ignored (compatibility with the recoll GUI command line).\n" -"Common options:\n" -" -c : specify config directory, overriding $RECOLL_CONFDIR.\n" -" -C : collapse duplicates\n" -" -d also dump file contents.\n" -" -n [first-] define the result slice. The default value for [first]\n" -" is 0. Without the option, the default max count is 2000.\n" -" Use n=0 for no limit.\n" -" -b : basic. Just output urls, no mime types or titles.\n" -" -Q : no result lines, just the processed query and result count.\n" -" -m : dump the whole document meta[] array for each result.\n" -" -A : output the document abstracts.\n" -" -S fld : sort by field .\n" -" -D : sort descending.\n" -" -s stemlang : set stemming language to use (must exist in index...).\n" -" Use -s \"\" to turn off stem expansion.\n" -" -T : use the parameter (Thesaurus) for word expansion.\n" -" -i : additional index, several can be given.\n" -" -e use url encoding (%xx) for urls.\n" -" -E use exact result count instead of lower bound estimate.\n" -" -F : output exactly these fields for each result.\n" -" The field values are encoded in base64, output in one line and \n" -" separated by one space character. This is the recommended format \n" -" for use by other programs. Use a normal query with option -m to \n" -" see the field names. Use -F '' to output all fields, but you probably\n" -" also want option -N in this case.\n" -" -N : with -F, print the (plain text) field names before the field values.\n" -; + " -P: Show the date span for all the documents present in the index.\n" + " [-o|-a|-f] [-q] \n" + " Runs a recoll query and displays result lines. \n" + " Default: will interpret the argument(s) as a xesam query string.\n" + " Query elements: \n" + " * Implicit AND, exclusion, field spec: t1 -t2 title:t3\n" + " * OR has priority: t1 OR t2 t3 OR t4 means (t1 OR t2) AND (t3 OR t4)\n" + " * Phrase: \"t1 t2\" (needs additional quoting on cmd line)\n" + " -o Emulate the GUI simple search in ANY TERM mode.\n" + " -a Emulate the GUI simple search in ALL TERMS mode.\n" + " -f Emulate the GUI simple search in filename mode.\n" + " -q is just ignored (compatibility with the recoll GUI command line).\n" + "Common options:\n" + " -c : specify config directory, overriding $RECOLL_CONFDIR.\n" + " -C : collapse duplicates\n" + " -d also dump file contents.\n" + " -n [first-] define the result slice. The default value for [first]\n" + " is 0. Without the option, the default max count is 2000.\n" + " Use n=0 for no limit.\n" + " -b : basic. Just output urls, no mime types or titles.\n" + " -Q : no result lines, just the processed query and result count.\n" + " -m : dump the whole document meta[] array for each result.\n" + " -A : output the document abstracts.\n" + " -p : show snippets, with page numbers instead of abstract.\n" + " -S fld : sort by field .\n" + " -D : sort descending.\n" + " -s stemlang : set stemming language to use (must exist in index...).\n" + " Use -s \"\" to turn off stem expansion.\n" + " -T : use the parameter (Thesaurus) for word expansion.\n" + " -i : additional index, several can be given.\n" + " -e use url encoding (%xx) for urls.\n" + " -E use exact result count instead of lower bound estimate.\n" + " -F : output exactly these fields for each result.\n" + " The field values are encoded in base64, output in one line and \n" + " separated by one space character. This is the recommended format \n" + " for use by other programs. Use a normal query with option -m to \n" + " see the field names. Use -F '' to output all fields, but you probably\n" + " also want option -N in this case.\n" + " -N : with -F, print the (plain text) field names before the field values.\n" + ; static void Usage(void) @@ -167,18 +186,19 @@ static int op_flags; #define OPT_n 0x4000 // GUI: -o same #define OPT_o 0x8000 -#define OPT_P 0x10000 -#define OPT_Q 0x20000 +#define OPT_p 0x10000 +#define OPT_P 0x20000 +#define OPT_Q 0x40000 // GUI: -q same -#define OPT_q 0x40000 -#define OPT_S 0x80000 -#define OPT_s 0x100000 -#define OPT_T 0x200000 +#define OPT_q 0x80000 +#define OPT_S 0x100000 +#define OPT_s 0x2000000 +#define OPT_T 0x4000000 // GUI: -t use command line, us: ignored -#define OPT_t 0x400000 +#define OPT_t 0x800000 // GUI uses -v : show version. Us: usage // GUI uses -w : open minimized -#define OPT_E 0x800000 +#define OPT_E 0x1000000 int recollq(RclConfig **cfp, int argc, char **argv) { @@ -245,6 +265,7 @@ int recollq(RclConfig **cfp, int argc, char **argv) argc--; goto b1; case 'o': op_flags |= OPT_o; break; case 'P': op_flags |= OPT_P; break; + case 'p': op_flags |= OPT_p; break; case 'q': op_flags |= OPT_q; break; case 'Q': op_flags |= OPT_Q; break; case 'S': op_flags |= OPT_S; if (argc < 2) Usage(); @@ -402,7 +423,8 @@ endopts: break; if (op_flags & OPT_F) { - output_fields(fields, doc, query, rcldb, op_flags & OPT_N); + output_fields(fields, doc, query, rcldb, + op_flags & OPT_N, op_flags & OPT_p); continue; } @@ -435,11 +457,13 @@ endopts: } } if (op_flags & OPT_A) { - string abstract; - if (query.makeDocAbstract(doc, abstract)) { - cout << "ABSTRACT" << endl; - cout << abstract << endl; - cout << "/ABSTRACT" << endl; + bool asSnippets = (op_flags & OPT_p) != 0; + string abstract = make_abstract(doc, query, asSnippets); + string marker = asSnippets ? "SNIPPETS" : "ABSTRACT"; + if (!abstract.empty()) { + cout << marker << endl; + cout << abstract; + cout << string("/") + marker << endl; } } } @@ -450,4 +474,3 @@ endopts: return 0; } - diff --git a/src/rcldb/rclquery.h b/src/rcldb/rclquery.h index b712c634..76d04016 100644 --- a/src/rcldb/rclquery.h +++ b/src/rcldb/rclquery.h @@ -40,11 +40,8 @@ enum abstract_result { class Snippet { public: Snippet(int page, const std::string& snip) - : page(page), snippet(snip) - { - } - Snippet& setTerm(const std::string& trm) - { + : page(page), snippet(snip) {} + Snippet& setTerm(const std::string& trm) { term = trm; return *this; } @@ -65,6 +62,9 @@ public: Query(Db *db); ~Query(); + Query(const Query &) = delete; + Query& operator=(const Query &) = delete; + /** Get explanation about last error */ std::string getReason() const { return m_reason; @@ -114,7 +114,7 @@ public: bool makeDocAbstract(const Doc &doc, std::vector& abstract); // Returned as a vector of pair page is 0 if unknown int makeDocAbstract(const Doc &doc, std::vector& abst, - int maxoccs= -1, int ctxwords = -1, bool sortbypage=false); + int maxoccs= -1, int ctxwords= -1,bool sortbypage=false); /** Retrieve page number for first match for "significant" query term * @param term returns the chosen term */ int getFirstMatchPage(const Doc &doc, std::string& term); @@ -146,10 +146,6 @@ private: int m_resCnt; std::shared_ptr m_sd; int m_snipMaxPosWalk; - - /* Copyconst and assignement private and forbidden */ - Query(const Query &) {} - Query & operator=(const Query &) {return *this;}; }; #ifndef NO_NAMESPACES