recollq: added option -p to be used with -A for showing page-numbered snippets instead of abstract
This commit is contained in:
parent
92f636be39
commit
20e845709e
@ -25,6 +25,9 @@ recollq \- command line / standard output Recoll query command.
|
|||||||
.B \-A
|
.B \-A
|
||||||
]
|
]
|
||||||
[
|
[
|
||||||
|
.B \-p
|
||||||
|
]
|
||||||
|
[
|
||||||
.B \-e
|
.B \-e
|
||||||
]
|
]
|
||||||
[
|
[
|
||||||
@ -106,7 +109,9 @@ is set, the whole metadata array will be dumped for each document.
|
|||||||
.PP
|
.PP
|
||||||
If
|
If
|
||||||
.B \-A
|
.B \-A
|
||||||
is set, the document abstracts will be printed.
|
is set, the document abstracts will be printed. With an additional
|
||||||
|
.B \-p
|
||||||
|
option, snippets with page numbers (when available) will be shown instead.
|
||||||
.PP
|
.PP
|
||||||
.B \-S
|
.B \-S
|
||||||
<fieldname>
|
<fieldname>
|
||||||
|
|||||||
@ -24,6 +24,7 @@
|
|||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <sstream>
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
@ -57,8 +58,27 @@ bool dump_contents(RclConfig *rclconfig, Rcl::Doc& idoc)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string make_abstract(Rcl::Doc& doc, Rcl::Query& query, bool asSnippets)
|
||||||
|
{
|
||||||
|
string abstract;
|
||||||
|
if (asSnippets) {
|
||||||
|
std::vector<Rcl::Snippet> snippets;
|
||||||
|
std::ostringstream str;
|
||||||
|
if (query.makeDocAbstract(doc, snippets, -1, -1, true)) {
|
||||||
|
for (const auto snippet : snippets) {
|
||||||
|
str << snippet.page << " : " << snippet.snippet << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
abstract = str.str();
|
||||||
|
} else {
|
||||||
|
query.makeDocAbstract(doc, abstract);
|
||||||
|
abstract += "\n";
|
||||||
|
}
|
||||||
|
return abstract;
|
||||||
|
}
|
||||||
|
|
||||||
void output_fields(vector<string> fields, Rcl::Doc& doc,
|
void output_fields(vector<string> fields, Rcl::Doc& doc,
|
||||||
Rcl::Query& query, Rcl::Db&, bool printnames)
|
Rcl::Query& query, Rcl::Db&, bool printnames, bool asSnippets)
|
||||||
{
|
{
|
||||||
if (fields.empty()) {
|
if (fields.empty()) {
|
||||||
map<string,string>::const_iterator it;
|
map<string,string>::const_iterator it;
|
||||||
@ -70,9 +90,7 @@ void output_fields(vector<string> fields, Rcl::Doc& doc,
|
|||||||
it != fields.end(); it++) {
|
it != fields.end(); it++) {
|
||||||
string out;
|
string out;
|
||||||
if (!it->compare("abstract")) {
|
if (!it->compare("abstract")) {
|
||||||
string abstract;
|
base64_encode(make_abstract(doc, query, asSnippets), out);
|
||||||
query.makeDocAbstract(doc, abstract);
|
|
||||||
base64_encode(abstract, out);
|
|
||||||
} else if (!it->compare("xdocid")) {
|
} else if (!it->compare("xdocid")) {
|
||||||
char cdocid[30];
|
char cdocid[30];
|
||||||
sprintf(cdocid, "%lu", (unsigned long)doc.xdocid);
|
sprintf(cdocid, "%lu", (unsigned long)doc.xdocid);
|
||||||
@ -94,45 +112,46 @@ void output_fields(vector<string> fields, Rcl::Doc& doc,
|
|||||||
|
|
||||||
static char *thisprog;
|
static char *thisprog;
|
||||||
static char usage [] =
|
static char usage [] =
|
||||||
" -P: Show the date span for all the documents present in the index.\n"
|
" -P: Show the date span for all the documents present in the index.\n"
|
||||||
" [-o|-a|-f] [-q] <query string>\n"
|
" [-o|-a|-f] [-q] <query string>\n"
|
||||||
" Runs a recoll query and displays result lines. \n"
|
" Runs a recoll query and displays result lines. \n"
|
||||||
" Default: will interpret the argument(s) as a xesam query string.\n"
|
" Default: will interpret the argument(s) as a xesam query string.\n"
|
||||||
" Query elements: \n"
|
" Query elements: \n"
|
||||||
" * Implicit AND, exclusion, field spec: t1 -t2 title:t3\n"
|
" * Implicit AND, exclusion, field spec: t1 -t2 title:t3\n"
|
||||||
" * OR has priority: t1 OR t2 t3 OR t4 means (t1 OR t2) AND (t3 OR t4)\n"
|
" * OR has priority: t1 OR t2 t3 OR t4 means (t1 OR t2) AND (t3 OR t4)\n"
|
||||||
" * Phrase: \"t1 t2\" (needs additional quoting on cmd line)\n"
|
" * Phrase: \"t1 t2\" (needs additional quoting on cmd line)\n"
|
||||||
" -o Emulate the GUI simple search in ANY TERM mode.\n"
|
" -o Emulate the GUI simple search in ANY TERM mode.\n"
|
||||||
" -a Emulate the GUI simple search in ALL TERMS mode.\n"
|
" -a Emulate the GUI simple search in ALL TERMS mode.\n"
|
||||||
" -f Emulate the GUI simple search in filename mode.\n"
|
" -f Emulate the GUI simple search in filename mode.\n"
|
||||||
" -q is just ignored (compatibility with the recoll GUI command line).\n"
|
" -q is just ignored (compatibility with the recoll GUI command line).\n"
|
||||||
"Common options:\n"
|
"Common options:\n"
|
||||||
" -c <configdir> : specify config directory, overriding $RECOLL_CONFDIR.\n"
|
" -c <configdir> : specify config directory, overriding $RECOLL_CONFDIR.\n"
|
||||||
" -C : collapse duplicates\n"
|
" -C : collapse duplicates\n"
|
||||||
" -d also dump file contents.\n"
|
" -d also dump file contents.\n"
|
||||||
" -n [first-]<cnt> define the result slice. The default value for [first]\n"
|
" -n [first-]<cnt> define the result slice. The default value for [first]\n"
|
||||||
" is 0. Without the option, the default max count is 2000.\n"
|
" is 0. Without the option, the default max count is 2000.\n"
|
||||||
" Use n=0 for no limit.\n"
|
" Use n=0 for no limit.\n"
|
||||||
" -b : basic. Just output urls, no mime types or titles.\n"
|
" -b : basic. Just output urls, no mime types or titles.\n"
|
||||||
" -Q : no result lines, just the processed query and result count.\n"
|
" -Q : no result lines, just the processed query and result count.\n"
|
||||||
" -m : dump the whole document meta[] array for each result.\n"
|
" -m : dump the whole document meta[] array for each result.\n"
|
||||||
" -A : output the document abstracts.\n"
|
" -A : output the document abstracts.\n"
|
||||||
" -S fld : sort by field <fld>.\n"
|
" -p : show snippets, with page numbers instead of abstract.\n"
|
||||||
" -D : sort descending.\n"
|
" -S fld : sort by field <fld>.\n"
|
||||||
" -s stemlang : set stemming language to use (must exist in index...).\n"
|
" -D : sort descending.\n"
|
||||||
" Use -s \"\" to turn off stem expansion.\n"
|
" -s stemlang : set stemming language to use (must exist in index...).\n"
|
||||||
" -T <synonyms file>: use the parameter (Thesaurus) for word expansion.\n"
|
" Use -s \"\" to turn off stem expansion.\n"
|
||||||
" -i <dbdir> : additional index, several can be given.\n"
|
" -T <synonyms file>: use the parameter (Thesaurus) for word expansion.\n"
|
||||||
" -e use url encoding (%xx) for urls.\n"
|
" -i <dbdir> : additional index, several can be given.\n"
|
||||||
" -E use exact result count instead of lower bound estimate.\n"
|
" -e use url encoding (%xx) for urls.\n"
|
||||||
" -F <field name list> : output exactly these fields for each result.\n"
|
" -E use exact result count instead of lower bound estimate.\n"
|
||||||
" The field values are encoded in base64, output in one line and \n"
|
" -F <field name list> : output exactly these fields for each result.\n"
|
||||||
" separated by one space character. This is the recommended format \n"
|
" The field values are encoded in base64, output in one line and \n"
|
||||||
" for use by other programs. Use a normal query with option -m to \n"
|
" separated by one space character. This is the recommended format \n"
|
||||||
" see the field names. Use -F '' to output all fields, but you probably\n"
|
" for use by other programs. Use a normal query with option -m to \n"
|
||||||
" also want option -N in this case.\n"
|
" see the field names. Use -F '' to output all fields, but you probably\n"
|
||||||
" -N : with -F, print the (plain text) field names before the field values.\n"
|
" also want option -N in this case.\n"
|
||||||
;
|
" -N : with -F, print the (plain text) field names before the field values.\n"
|
||||||
|
;
|
||||||
|
|
||||||
static void
|
static void
|
||||||
Usage(void)
|
Usage(void)
|
||||||
@ -167,18 +186,19 @@ static int op_flags;
|
|||||||
#define OPT_n 0x4000
|
#define OPT_n 0x4000
|
||||||
// GUI: -o same
|
// GUI: -o same
|
||||||
#define OPT_o 0x8000
|
#define OPT_o 0x8000
|
||||||
#define OPT_P 0x10000
|
#define OPT_p 0x10000
|
||||||
#define OPT_Q 0x20000
|
#define OPT_P 0x20000
|
||||||
|
#define OPT_Q 0x40000
|
||||||
// GUI: -q same
|
// GUI: -q same
|
||||||
#define OPT_q 0x40000
|
#define OPT_q 0x80000
|
||||||
#define OPT_S 0x80000
|
#define OPT_S 0x100000
|
||||||
#define OPT_s 0x100000
|
#define OPT_s 0x2000000
|
||||||
#define OPT_T 0x200000
|
#define OPT_T 0x4000000
|
||||||
// GUI: -t use command line, us: ignored
|
// GUI: -t use command line, us: ignored
|
||||||
#define OPT_t 0x400000
|
#define OPT_t 0x800000
|
||||||
// GUI uses -v : show version. Us: usage
|
// GUI uses -v : show version. Us: usage
|
||||||
// GUI uses -w : open minimized
|
// GUI uses -w : open minimized
|
||||||
#define OPT_E 0x800000
|
#define OPT_E 0x1000000
|
||||||
|
|
||||||
int recollq(RclConfig **cfp, int argc, char **argv)
|
int recollq(RclConfig **cfp, int argc, char **argv)
|
||||||
{
|
{
|
||||||
@ -245,6 +265,7 @@ int recollq(RclConfig **cfp, int argc, char **argv)
|
|||||||
argc--; goto b1;
|
argc--; goto b1;
|
||||||
case 'o': op_flags |= OPT_o; break;
|
case 'o': op_flags |= OPT_o; break;
|
||||||
case 'P': op_flags |= OPT_P; break;
|
case 'P': op_flags |= OPT_P; break;
|
||||||
|
case 'p': op_flags |= OPT_p; break;
|
||||||
case 'q': op_flags |= OPT_q; break;
|
case 'q': op_flags |= OPT_q; break;
|
||||||
case 'Q': op_flags |= OPT_Q; break;
|
case 'Q': op_flags |= OPT_Q; break;
|
||||||
case 'S': op_flags |= OPT_S; if (argc < 2) Usage();
|
case 'S': op_flags |= OPT_S; if (argc < 2) Usage();
|
||||||
@ -402,7 +423,8 @@ endopts:
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
if (op_flags & OPT_F) {
|
if (op_flags & OPT_F) {
|
||||||
output_fields(fields, doc, query, rcldb, op_flags & OPT_N);
|
output_fields(fields, doc, query, rcldb,
|
||||||
|
op_flags & OPT_N, op_flags & OPT_p);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -435,11 +457,13 @@ endopts:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (op_flags & OPT_A) {
|
if (op_flags & OPT_A) {
|
||||||
string abstract;
|
bool asSnippets = (op_flags & OPT_p) != 0;
|
||||||
if (query.makeDocAbstract(doc, abstract)) {
|
string abstract = make_abstract(doc, query, asSnippets);
|
||||||
cout << "ABSTRACT" << endl;
|
string marker = asSnippets ? "SNIPPETS" : "ABSTRACT";
|
||||||
cout << abstract << endl;
|
if (!abstract.empty()) {
|
||||||
cout << "/ABSTRACT" << endl;
|
cout << marker << endl;
|
||||||
|
cout << abstract;
|
||||||
|
cout << string("/") + marker << endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -450,4 +474,3 @@ endopts:
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -40,11 +40,8 @@ enum abstract_result {
|
|||||||
class Snippet {
|
class Snippet {
|
||||||
public:
|
public:
|
||||||
Snippet(int page, const std::string& snip)
|
Snippet(int page, const std::string& snip)
|
||||||
: page(page), snippet(snip)
|
: page(page), snippet(snip) {}
|
||||||
{
|
Snippet& setTerm(const std::string& trm) {
|
||||||
}
|
|
||||||
Snippet& setTerm(const std::string& trm)
|
|
||||||
{
|
|
||||||
term = trm;
|
term = trm;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
@ -65,6 +62,9 @@ public:
|
|||||||
Query(Db *db);
|
Query(Db *db);
|
||||||
~Query();
|
~Query();
|
||||||
|
|
||||||
|
Query(const Query &) = delete;
|
||||||
|
Query& operator=(const Query &) = delete;
|
||||||
|
|
||||||
/** Get explanation about last error */
|
/** Get explanation about last error */
|
||||||
std::string getReason() const {
|
std::string getReason() const {
|
||||||
return m_reason;
|
return m_reason;
|
||||||
@ -114,7 +114,7 @@ public:
|
|||||||
bool makeDocAbstract(const Doc &doc, std::vector<std::string>& abstract);
|
bool makeDocAbstract(const Doc &doc, std::vector<std::string>& abstract);
|
||||||
// Returned as a vector of pair<page,snippet> page is 0 if unknown
|
// Returned as a vector of pair<page,snippet> page is 0 if unknown
|
||||||
int makeDocAbstract(const Doc &doc, std::vector<Snippet>& abst,
|
int makeDocAbstract(const Doc &doc, std::vector<Snippet>& abst,
|
||||||
int maxoccs= -1, int ctxwords = -1, bool sortbypage=false);
|
int maxoccs= -1, int ctxwords= -1,bool sortbypage=false);
|
||||||
/** Retrieve page number for first match for "significant" query term
|
/** Retrieve page number for first match for "significant" query term
|
||||||
* @param term returns the chosen term */
|
* @param term returns the chosen term */
|
||||||
int getFirstMatchPage(const Doc &doc, std::string& term);
|
int getFirstMatchPage(const Doc &doc, std::string& term);
|
||||||
@ -146,10 +146,6 @@ private:
|
|||||||
int m_resCnt;
|
int m_resCnt;
|
||||||
std::shared_ptr<SearchData> m_sd;
|
std::shared_ptr<SearchData> m_sd;
|
||||||
int m_snipMaxPosWalk;
|
int m_snipMaxPosWalk;
|
||||||
|
|
||||||
/* Copyconst and assignement private and forbidden */
|
|
||||||
Query(const Query &) {}
|
|
||||||
Query & operator=(const Query &) {return *this;};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef NO_NAMESPACES
|
#ifndef NO_NAMESPACES
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user