recollq: add option to extract result document to file + doc and test
This commit is contained in:
parent
7d028e7ca9
commit
ea133d7d60
@ -59,6 +59,10 @@ recollq \- command line / standard output Recoll query command.
|
||||
.B \-F
|
||||
<quoted space separated field name list>
|
||||
]
|
||||
[
|
||||
.B \--extract-to
|
||||
<file path>
|
||||
]
|
||||
<query string>
|
||||
|
||||
.B recollq \-P
|
||||
@ -120,9 +124,10 @@ sorts the results according to the specified field. Use
|
||||
for descending order.
|
||||
.PP
|
||||
.B \-n
|
||||
<cnt>
|
||||
<[first-]cnt>
|
||||
can be used to set the maximum number of results that should be
|
||||
printed. The default is 2000. Use a value of 0 for no limit.
|
||||
printed. The default is 2000. Use a value of 0 for no limit. If the argument is of the form
|
||||
first-cnt, it also defines the first result to output (from 0).
|
||||
.PP
|
||||
.B \-s
|
||||
<language>
|
||||
@ -144,6 +149,11 @@ base64 and separated by one space character. Empty fields are indicated by
|
||||
consecutive space characters. There is one additional space character at
|
||||
the end of each line.
|
||||
.PP
|
||||
.B \--extract-to
|
||||
<file path>
|
||||
Will extract the first result document of the query to the argument path, which must not exist. Use
|
||||
-n first-cnt to select the document.
|
||||
.PP
|
||||
.B recollq \-P
|
||||
(Period) will print the minimum and maximum modification years for
|
||||
documents in the index.
|
||||
|
||||
@ -3852,42 +3852,48 @@ fs.inotify.max_user_watches=32768
|
||||
The Usage string follows:</para>
|
||||
<programlisting><![CDATA[
|
||||
recollq: usage:
|
||||
-P: Show the date span for all the documents present in the index
|
||||
-P: Show the date span for all the documents present in the index.
|
||||
[-o|-a|-f] [-q] <query string>
|
||||
Runs a recoll query and displays result lines.
|
||||
Default: will interpret the argument(s) as a xesam query string
|
||||
Default: will interpret the argument(s) as a xesam query string.
|
||||
Query elements:
|
||||
* Implicit AND, exclusion, field spec: t1 -t2 title:t3
|
||||
* OR has priority: t1 OR t2 t3 OR t4 means (t1 OR t2) AND (t3 OR t4)
|
||||
* Phrase: "t1 t2" (needs additional quoting on cmd line)
|
||||
-o Emulate the GUI simple search in ANY TERM mode
|
||||
-a Emulate the GUI simple search in ALL TERMS mode
|
||||
-f Emulate the GUI simple search in filename mode
|
||||
-q is just ignored (compatibility with the recoll GUI command line)
|
||||
-o Emulate the GUI simple search in ANY TERM mode.
|
||||
-a Emulate the GUI simple search in ALL TERMS mode.
|
||||
-f Emulate the GUI simple search in filename mode.
|
||||
-q is just ignored (compatibility with the recoll GUI command line).
|
||||
Common options:
|
||||
-c <configdir> : specify config directory, overriding $RECOLL_CONFDIR
|
||||
-d also dump file contents
|
||||
-c <configdir> : specify config directory, overriding $RECOLL_CONFDIR.
|
||||
-C : collapse duplicates
|
||||
-d also dump file contents.
|
||||
-n [first-]<cnt> define the result slice. The default value for [first]
|
||||
is 0. Without the option, the default max count is 2000.
|
||||
Use n=0 for no limit
|
||||
-b : basic. Just output urls, no mime types or titles
|
||||
-Q : no result lines, just the processed query and result count
|
||||
-m : dump the whole document meta[] array for each result
|
||||
-A : output the document abstracts
|
||||
-S fld : sort by field <fld>
|
||||
-D : sort descending
|
||||
-s stemlang : set stemming language to use (must exist in index...)
|
||||
Use -s "" to turn off stem expansion
|
||||
-T <synonyms file>: use the parameter (Thesaurus) for word expansion
|
||||
-i <dbdir> : additional index, several can be given
|
||||
-e use url encoding (%xx) for urls
|
||||
Use n=0 for no limit.
|
||||
-b : basic. Just output urls, no mime types or titles.
|
||||
-Q : no result lines, just the processed query and result count.
|
||||
-m : dump the whole document meta[] array for each result.
|
||||
-A : output the document abstracts.
|
||||
-p <cnt> : show <cnt> snippets, with page numbers instead of the concatenated abstract.
|
||||
-g <cnt> : show <cnt> snippets, with line numbers instead of the concatenated abstract.
|
||||
-S fld : sort by field <fld>.
|
||||
-D : sort descending.
|
||||
-s stemlang : set stemming language to use (must exist in index...).
|
||||
Use -s "" to turn off stem expansion.
|
||||
-T <synonyms file>: use the parameter (Thesaurus) for word expansion.
|
||||
-i <dbdir> : additional index, several can be given.
|
||||
-e use url encoding (%xx) for urls.
|
||||
-E use exact result count instead of lower bound estimate.
|
||||
-F <field name list> : output exactly these fields for each result.
|
||||
The field values are encoded in base64, output in one line and
|
||||
separated by one space character. This is the recommended format
|
||||
for use by other programs. Use a normal query with option -m to
|
||||
see the field names. Use -F '' to output all fields, but you probably
|
||||
also want option -N in this case
|
||||
-N : with -F, print the (plain text) field names before the field values
|
||||
also want option -N in this case.
|
||||
-N : with -F, print the (plain text) field names before the field values.
|
||||
--extract_to <filepath> : extract the first result to filepath, which must not exist.
|
||||
Use a -n option with an offset to select the appropriate result.
|
||||
]]></programlisting>
|
||||
|
||||
<para>Sample execution:</para>
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2006 J.F.Dockes
|
||||
/* Copyright (C) 2006-2022 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
@ -22,6 +22,7 @@
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <getopt.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
@ -42,6 +43,8 @@
|
||||
#include "smallut.h"
|
||||
#include "chrono.h"
|
||||
#include "base64.h"
|
||||
#include "rclutil.h"
|
||||
#include "internfile.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -51,9 +54,9 @@ bool dump_contents(RclConfig *rclconfig, Rcl::Doc& idoc)
|
||||
Rcl::Doc fdoc;
|
||||
string ipath = idoc.ipath;
|
||||
if (interner.internfile(fdoc, ipath)) {
|
||||
cout << fdoc.text << endl;
|
||||
cout << fdoc.text << "\n";
|
||||
} else {
|
||||
cout << "Cant turn to text:" << idoc.url << " | " << idoc.ipath << endl;
|
||||
cout << "Cant turn to text:" << idoc.url << " | " << idoc.ipath << "\n";
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -67,7 +70,7 @@ string make_abstract(Rcl::Doc& doc, Rcl::Query& query, bool asSnippets,
|
||||
std::ostringstream str;
|
||||
if (query.makeDocAbstract(doc, snippets, snipcount, -1, true)) {
|
||||
for (const auto& snippet : snippets) {
|
||||
str << (showlines ? snippet.line : snippet.page) << " : " << snippet.snippet << endl;
|
||||
str << (showlines ? snippet.line : snippet.page) << " : " << snippet.snippet << "\n";
|
||||
}
|
||||
}
|
||||
abstract = str.str();
|
||||
@ -83,7 +86,6 @@ void output_fields(vector<string> fields, Rcl::Doc& doc,
|
||||
bool asSnippets, int snipcnt, bool showlines)
|
||||
{
|
||||
if (fields.empty()) {
|
||||
map<string,string>::const_iterator it;
|
||||
for (const auto& entry : doc.meta) {
|
||||
fields.push_back(entry.first);
|
||||
}
|
||||
@ -103,11 +105,13 @@ void output_fields(vector<string> fields, Rcl::Doc& doc,
|
||||
// fields. This is a problem when printing names and using strtok, but
|
||||
// have to keep the old behaviour when printnames is not set.
|
||||
if (!(out.empty() && printnames)) {
|
||||
if (printnames)
|
||||
cout << fld << " " << out << " ";
|
||||
if (printnames) {
|
||||
cout << fld << " ";
|
||||
}
|
||||
cout << out << " ";
|
||||
}
|
||||
}
|
||||
cout << endl;
|
||||
cout << "\n";
|
||||
}
|
||||
|
||||
static char *thisprog;
|
||||
@ -151,170 +155,147 @@ static char usage [] =
|
||||
" for use by other programs. Use a normal query with option -m to \n"
|
||||
" see the field names. Use -F '' to output all fields, but you probably\n"
|
||||
" also want option -N in this case.\n"
|
||||
" -N : with -F, print the (plain text) field names before the field values.\n"
|
||||
" -N : with -F, print the (plain text) field names before the field values.\n"
|
||||
" --extract_to <filepath> : extract the first result to filepath, which must not exist.\n"
|
||||
" Use a -n option with an offset to select the appropriate result.\n"
|
||||
;
|
||||
|
||||
static void
|
||||
Usage(void)
|
||||
static void Usage(std::ostream &os = std::cerr)
|
||||
{
|
||||
cerr << thisprog << ": usage:" << endl << usage;
|
||||
os << thisprog << ": usage:" << "\n" << usage;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// BEWARE COMPATIBILITY WITH recoll OPTIONS letters
|
||||
static int op_flags;
|
||||
|
||||
#define OPT_A 0x1
|
||||
// GUI: -a same
|
||||
#define OPT_a 0x2
|
||||
#define OPT_b 0x4
|
||||
#define OPT_C 0x8
|
||||
// GUI: -c same
|
||||
#define OPT_c 0x10
|
||||
#define OPT_D 0x20
|
||||
#define OPT_d 0x40
|
||||
#define OPT_e 0x80
|
||||
#define OPT_F 0x100
|
||||
#define OPT_g 0x200
|
||||
// GUI: -f same
|
||||
#define OPT_f 0x400
|
||||
// GUI uses -h for help. us: usage
|
||||
#define OPT_i 0x800
|
||||
// GUI uses -L to set language of messages
|
||||
// GUI: -l specifies query language, which is the default. Accept and ignore
|
||||
#define OPT_l 0x1000
|
||||
#define OPT_m 0x2000
|
||||
#define OPT_N 0x4000
|
||||
#define OPT_n 0x8000
|
||||
// GUI: -o same
|
||||
#define OPT_o 0x10000
|
||||
#define OPT_p 0x20000
|
||||
#define OPT_P 0x40000
|
||||
#define OPT_Q 0x80000
|
||||
// GUI: -q same
|
||||
#define OPT_q 0x100000
|
||||
#define OPT_S 0x200000
|
||||
#define OPT_s 0x400000
|
||||
#define OPT_T 0x800000
|
||||
// GUI: -t use command line, us: ignored
|
||||
#define OPT_t 0x100000
|
||||
// GUI uses -v : show version. Us: usage
|
||||
// GUI uses -w : open minimized
|
||||
#define OPT_E 0x200000
|
||||
#define OPT_A 0x1
|
||||
#define OPT_a 0x2
|
||||
#define OPT_b 0x4
|
||||
#define OPT_C 0x8
|
||||
#define OPT_D 0x10
|
||||
#define OPT_d 0x20
|
||||
#define OPT_e 0x40
|
||||
#define OPT_F 0x80
|
||||
#define OPT_g 0x100
|
||||
#define OPT_f 0x200
|
||||
#define OPT_l 0x400
|
||||
#define OPT_m 0x800
|
||||
#define OPT_N 0x1000
|
||||
#define OPT_o 0x2000
|
||||
#define OPT_p 0x4000
|
||||
#define OPT_P 0x8000
|
||||
#define OPT_Q 0x10000
|
||||
#define OPT_q 0x20000
|
||||
#define OPT_S 0x40000
|
||||
#define OPT_E 0x80000
|
||||
|
||||
static struct option long_options[] = {
|
||||
#define OPTION_EXTRACT 1000
|
||||
{"extract-to", required_argument, 0, OPTION_EXTRACT},
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
|
||||
int recollq(RclConfig **cfp, int argc, char **argv)
|
||||
{
|
||||
thisprog = argv[0];
|
||||
|
||||
string a_config;
|
||||
string sortfield;
|
||||
string stemlang("english");
|
||||
list<string> extra_dbs;
|
||||
string sf;
|
||||
vector<string> fields;
|
||||
string syngroupsfn;
|
||||
|
||||
int firstres = 0;
|
||||
int maxcount = 2000;
|
||||
int snipcnt = -1;
|
||||
std::string extractfile;
|
||||
|
||||
thisprog = argv[0];
|
||||
argc--; argv++;
|
||||
|
||||
while (argc > 0 && **argv == '-') {
|
||||
(*argv)++;
|
||||
if (!(**argv))
|
||||
/* Cas du "adb - core" */
|
||||
Usage();
|
||||
while (**argv)
|
||||
switch (*(*argv)++) {
|
||||
case '-':
|
||||
// -- : end of options
|
||||
if (*(*argv) != 0)
|
||||
Usage();
|
||||
goto endopts;
|
||||
case 'A': op_flags |= OPT_A; break;
|
||||
case 'a': op_flags |= OPT_a; break;
|
||||
case 'b': op_flags |= OPT_b; break;
|
||||
case 'C': op_flags |= OPT_C; break;
|
||||
case 'c': op_flags |= OPT_c; if (argc < 2) Usage();
|
||||
a_config = *(++argv);
|
||||
argc--; goto b1;
|
||||
case 'd': op_flags |= OPT_d; break;
|
||||
case 'D': op_flags |= OPT_D; break;
|
||||
case 'E': op_flags |= OPT_E; break;
|
||||
case 'e': op_flags |= OPT_e; break;
|
||||
case 'f': op_flags |= OPT_f; break;
|
||||
case 'F': op_flags |= OPT_F; if (argc < 2) Usage();
|
||||
sf = *(++argv);
|
||||
argc--; goto b1;
|
||||
case 'i': op_flags |= OPT_i; if (argc < 2) Usage();
|
||||
extra_dbs.push_back(*(++argv));
|
||||
argc--; goto b1;
|
||||
case 'l': op_flags |= OPT_l; break;
|
||||
case 'm': op_flags |= OPT_m; break;
|
||||
case 'N': op_flags |= OPT_N; break;
|
||||
case 'n': op_flags |= OPT_n; if (argc < 2) Usage();
|
||||
{
|
||||
string rescnt = *(++argv);
|
||||
string::size_type dash = rescnt.find("-");
|
||||
if (dash != string::npos) {
|
||||
firstres = atoi(rescnt.substr(0, dash).c_str());
|
||||
if (dash < rescnt.size()-1) {
|
||||
maxcount = atoi(rescnt.substr(dash+1).c_str());
|
||||
}
|
||||
} else {
|
||||
maxcount = atoi(rescnt.c_str());
|
||||
int ret;
|
||||
while ((ret = getopt_long(argc, argv, "+AabCc:DdEefF:hi:lmNn:oPp:g:QqS:s:tT:v",
|
||||
long_options, NULL)) != -1) {
|
||||
switch (ret) {
|
||||
case 'A': op_flags |= OPT_A; break;
|
||||
// GUI: -a same
|
||||
case 'a': op_flags |= OPT_a; break;
|
||||
case 'b': op_flags |= OPT_b; break;
|
||||
case 'C': op_flags |= OPT_C; break;
|
||||
// GUI: -c same
|
||||
case 'c': a_config = optarg; break;
|
||||
case 'd': op_flags |= OPT_d; break;
|
||||
case 'D': op_flags |= OPT_D; break;
|
||||
case 'E': op_flags |= OPT_E; break;
|
||||
case 'e': op_flags |= OPT_e; break;
|
||||
case 'F': op_flags |= OPT_F; sf = optarg; break;
|
||||
// GUI: -f same
|
||||
case 'f': op_flags |= OPT_f; break;
|
||||
// GUI: -h same
|
||||
case 'h': Usage(std::cout); // Usage exits
|
||||
case 'i': extra_dbs.push_back(optarg); break;
|
||||
// GUI uses -L to set language of messages
|
||||
// GUI: -l specifies query language, which is the default. Accept and ignore
|
||||
case 'l': op_flags |= OPT_l; break;
|
||||
case 'm': op_flags |= OPT_m; break;
|
||||
case 'N': op_flags |= OPT_N; break;
|
||||
case 'n':
|
||||
{
|
||||
string rescnt = optarg;
|
||||
string::size_type dash = rescnt.find("-");
|
||||
if (dash != string::npos) {
|
||||
firstres = atoi(rescnt.substr(0, dash).c_str());
|
||||
if (dash < rescnt.size()-1) {
|
||||
maxcount = atoi(rescnt.substr(dash+1).c_str());
|
||||
}
|
||||
if (maxcount <= 0) maxcount = INT_MAX;
|
||||
} else {
|
||||
maxcount = atoi(rescnt.c_str());
|
||||
}
|
||||
argc--; goto b1;
|
||||
case 'o': op_flags |= OPT_o; break;
|
||||
case 'P': op_flags |= OPT_P; break;
|
||||
case 'p':
|
||||
op_flags |= OPT_p;
|
||||
goto porg;
|
||||
case 'g':
|
||||
op_flags |= OPT_g;
|
||||
goto porg;
|
||||
if (maxcount <= 0) maxcount = INT_MAX;
|
||||
}
|
||||
break;
|
||||
// GUI: -o same
|
||||
case 'o': op_flags |= OPT_o; break;
|
||||
case 'P': op_flags |= OPT_P; break;
|
||||
case 'p':
|
||||
op_flags |= OPT_p;
|
||||
goto porg;
|
||||
case 'g':
|
||||
op_flags |= OPT_g;
|
||||
{
|
||||
porg:
|
||||
if (argc < 2)
|
||||
Usage();
|
||||
const char *cp = *(++argv);
|
||||
const char *cp = optarg;
|
||||
char *cpe;
|
||||
snipcnt = strtol(cp, &cpe, 10);
|
||||
if (*cpe != 0)
|
||||
Usage();
|
||||
argc--; goto b1;
|
||||
}
|
||||
case 'q': op_flags |= OPT_q; break;
|
||||
case 'Q': op_flags |= OPT_Q; break;
|
||||
case 'S': op_flags |= OPT_S; if (argc < 2) Usage();
|
||||
sortfield = *(++argv);
|
||||
argc--; goto b1;
|
||||
case 's': op_flags |= OPT_s; if (argc < 2) Usage();
|
||||
stemlang = *(++argv);
|
||||
argc--; goto b1;
|
||||
case 't': op_flags |= OPT_t; break;
|
||||
case 'T': op_flags |= OPT_T; if (argc < 2) Usage();
|
||||
syngroupsfn = *(++argv);
|
||||
argc--; goto b1;
|
||||
default: Usage(); break;
|
||||
}
|
||||
b1: argc--; argv++;
|
||||
break;
|
||||
case 'Q': op_flags |= OPT_Q; break;
|
||||
// GUI: -q same
|
||||
case 'q': op_flags |= OPT_q; break;
|
||||
case 'S': op_flags |= OPT_S; sortfield = optarg; break;
|
||||
case 's': stemlang = optarg; break;
|
||||
// GUI: -t use command line, us: ignored
|
||||
case 't': break;
|
||||
case 'T': syngroupsfn = optarg; break;
|
||||
// GUI: -v same
|
||||
case 'v': std::cout << Rcl::version_string() << "\n"; return 0;
|
||||
// GUI uses -w : open minimized
|
||||
case OPTION_EXTRACT: extractfile=optarg;break;
|
||||
}
|
||||
}
|
||||
endopts:
|
||||
|
||||
string reason;
|
||||
*cfp = recollinit(0, 0, 0, reason, &a_config);
|
||||
RclConfig *rclconfig = *cfp;
|
||||
if (!rclconfig || !rclconfig->ok()) {
|
||||
fprintf(stderr, "Recoll init failed: %s\n", reason.c_str());
|
||||
std::cerr << "Recoll init failed: " << reason << "\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (argc < 1 && !(op_flags & OPT_P)) {
|
||||
Usage();
|
||||
}
|
||||
vector<string> fields;
|
||||
if (op_flags & OPT_F) {
|
||||
if (op_flags & (OPT_b|OPT_d|OPT_b|OPT_Q|OPT_m|OPT_A))
|
||||
Usage();
|
||||
@ -322,44 +303,43 @@ endopts:
|
||||
}
|
||||
Rcl::Db rcldb(rclconfig);
|
||||
if (!extra_dbs.empty()) {
|
||||
for (list<string>::iterator it = extra_dbs.begin();
|
||||
it != extra_dbs.end(); it++) {
|
||||
if (!rcldb.addQueryDb(*it)) {
|
||||
cerr << "Can't add index: " << *it << endl;
|
||||
for (const auto& db : extra_dbs) {
|
||||
if (!rcldb.addQueryDb(db)) {
|
||||
cerr << "Can't add index: " << db << "\n";
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!syngroupsfn.empty()) {
|
||||
if (!rcldb.setSynGroupsFile(syngroupsfn)) {
|
||||
cerr << "Can't use synonyms file: " << syngroupsfn << endl;
|
||||
cerr << "Can't use synonyms file: " << syngroupsfn << "\n";
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
if (!rcldb.open(Rcl::Db::DbRO)) {
|
||||
cerr << "Cant open database in " << rclconfig->getDbDir() <<
|
||||
" reason: " << rcldb.getReason() << endl;
|
||||
" reason: " << rcldb.getReason() << "\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (op_flags & OPT_P) {
|
||||
int minyear, maxyear;
|
||||
if (!rcldb.maxYearSpan(&minyear, &maxyear)) {
|
||||
cerr << "maxYearSpan failed: " << rcldb.getReason() << endl;
|
||||
exit(1);
|
||||
cerr << "maxYearSpan failed: " << rcldb.getReason() << "\n";
|
||||
return 1;
|
||||
} else {
|
||||
cout << "Min year " << minyear << " Max year " << maxyear << endl;
|
||||
exit(0);
|
||||
cout << "Min year " << minyear << " Max year " << maxyear << "\n";
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (argc < 1) {
|
||||
if (optind >= argc) {
|
||||
Usage();
|
||||
}
|
||||
string qs = *argv++;argc--;
|
||||
while (argc > 0) {
|
||||
qs += string(" ") + *argv++;argc--;
|
||||
string qs;
|
||||
while (optind < argc) {
|
||||
qs += std::string(argv[optind++]) + " ";
|
||||
}
|
||||
|
||||
{
|
||||
@ -367,11 +347,11 @@ endopts:
|
||||
string charset = rclconfig->getDefCharset(true);
|
||||
int ercnt;
|
||||
if (!transcode(qs, uq, charset, "UTF-8", &ercnt)) {
|
||||
fprintf(stderr, "Can't convert command line args to utf-8\n");
|
||||
exit(1);
|
||||
std::cerr << "Can't convert command line args to utf-8\n";
|
||||
return 1;
|
||||
} else if (ercnt) {
|
||||
fprintf(stderr, "%d errors while converting arguments from %s "
|
||||
"to utf-8\n", ercnt, charset.c_str());
|
||||
std::cerr <<
|
||||
ercnt << " errors while converting arguments from " << charset << "to utf-8\n";
|
||||
}
|
||||
qs = uq;
|
||||
}
|
||||
@ -380,13 +360,12 @@ endopts:
|
||||
|
||||
if (op_flags & (OPT_a|OPT_o|OPT_f)) {
|
||||
sd = std::make_shared<Rcl::SearchData>(Rcl::SCLT_OR, stemlang);
|
||||
Rcl::SearchDataClause *clp = 0;
|
||||
Rcl::SearchDataClause *clp;
|
||||
if (op_flags & OPT_f) {
|
||||
clp = new Rcl::SearchDataClauseFilename(qs);
|
||||
} else {
|
||||
clp = new Rcl::SearchDataClauseSimple((op_flags & OPT_o)?
|
||||
Rcl::SCLT_OR : Rcl::SCLT_AND,
|
||||
qs);
|
||||
clp = new Rcl::SearchDataClauseSimple(
|
||||
(op_flags & OPT_o) ? Rcl::SCLT_OR : Rcl::SCLT_AND, qs);
|
||||
}
|
||||
if (sd)
|
||||
sd->addClause(clp);
|
||||
@ -395,7 +374,7 @@ endopts:
|
||||
}
|
||||
|
||||
if (!sd) {
|
||||
cerr << "Query string interpretation failed: " << reason << endl;
|
||||
std::cerr << "Query string interpretation failed: " << reason << "\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -409,8 +388,8 @@ endopts:
|
||||
}
|
||||
Chrono chron;
|
||||
if (!query.setQuery(rq)) {
|
||||
cerr << "Query setup failed: " << query.getReason() << endl;
|
||||
return(1);
|
||||
std::cerr << "Query setup failed: " << query.getReason() << "\n";
|
||||
return 1;
|
||||
}
|
||||
int cnt;
|
||||
if (op_flags & OPT_E) {
|
||||
@ -419,29 +398,37 @@ endopts:
|
||||
cnt = query.getResCnt();
|
||||
}
|
||||
if (!(op_flags & OPT_b)) {
|
||||
cout << "Recoll query: " << rq->getDescription() << endl;
|
||||
std::cout << "Recoll query: " << rq->getDescription() << "\n";
|
||||
if (firstres == 0) {
|
||||
if (cnt <= maxcount)
|
||||
cout << cnt << " results" << endl;
|
||||
std::cout << cnt << " results" << "\n";
|
||||
else
|
||||
cout << cnt << " results (printing " << maxcount << " max):"
|
||||
<< endl;
|
||||
std::cout << cnt << " results (printing " << maxcount << " max):" << "\n";
|
||||
} else {
|
||||
cout << "Printing at most " << cnt - (firstres+maxcount) <<
|
||||
" results from first " << firstres << endl;
|
||||
std::cout << "Printing at most " << cnt - (firstres+maxcount) <<
|
||||
" results from first " << firstres << "\n";
|
||||
}
|
||||
}
|
||||
if (op_flags & OPT_Q)
|
||||
cout << "Query setup took " << chron.millis() << " mS" << endl;
|
||||
|
||||
if (op_flags & OPT_Q)
|
||||
return(0);
|
||||
if (op_flags & OPT_Q) {
|
||||
std::cout << "Query setup took " << chron.millis() << " mS" << "\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (int i = firstres; i < firstres + maxcount; i++) {
|
||||
Rcl::Doc doc;
|
||||
if (!query.getDoc(i, doc))
|
||||
break;
|
||||
|
||||
if (!extractfile.empty()) {
|
||||
if (path_exists(extractfile)) {
|
||||
std::cerr << "Output file must not exist.\n";
|
||||
return 1;
|
||||
}
|
||||
TempFile unused;
|
||||
auto ret = FileInterner::idocToFile(unused, extractfile, rclconfig, doc, false);
|
||||
return ret ? 0 : 1;
|
||||
}
|
||||
|
||||
if (op_flags & OPT_F) {
|
||||
output_fields(fields, doc, query, rcldb,
|
||||
op_flags & OPT_N, op_flags & (OPT_p|OPT_g), snipcnt, op_flags & OPT_g);
|
||||
@ -452,7 +439,7 @@ endopts:
|
||||
doc.url = url_encode(doc.url);
|
||||
|
||||
if (op_flags & OPT_b) {
|
||||
cout << doc.url << endl;
|
||||
cout << doc.url << "\n";
|
||||
} else {
|
||||
string titleorfn = doc.meta[Rcl::Doc::keytt];
|
||||
if (titleorfn.empty())
|
||||
@ -470,10 +457,10 @@ endopts:
|
||||
<< "[" << doc.url << "]" << "\t"
|
||||
<< "[" << titleorfn << "]" << "\t"
|
||||
<< doc.fbytes << "\tbytes" << "\t"
|
||||
<< endl;
|
||||
<< "\n";
|
||||
if (op_flags & OPT_m) {
|
||||
for (const auto& ent : doc.meta) {
|
||||
cout << ent.first << " = " << ent.second << endl;
|
||||
cout << ent.first << " = " << ent.second << "\n";
|
||||
}
|
||||
}
|
||||
if (op_flags & OPT_A) {
|
||||
@ -482,9 +469,9 @@ endopts:
|
||||
string abstract = make_abstract(doc, query, asSnippets, snipcnt, showlines);
|
||||
string marker = asSnippets ? "SNIPPETS" : "ABSTRACT";
|
||||
if (!abstract.empty()) {
|
||||
cout << marker << endl;
|
||||
cout << marker << "\n";
|
||||
cout << abstract;
|
||||
cout << string("/") + marker << endl;
|
||||
cout << string("/") + marker << "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
21
tests/extracto/extracto.sh
Executable file
21
tests/extracto/extracto.sh
Executable file
@ -0,0 +1,21 @@
|
||||
#!/bin/sh
|
||||
|
||||
topdir=`dirname $0`/..
|
||||
. $topdir/shared.sh
|
||||
|
||||
initvariables $0
|
||||
|
||||
(
|
||||
tmpfile=$(
|
||||
echo 'mkstemp(template)' |
|
||||
m4 -D template="${TMPDIR:-/tmp}/rcltsttmpXXXXXX"
|
||||
) || exit 1
|
||||
trap "rm -f -- '$tmpfile'" EXIT
|
||||
rm -f "$tmpfile"
|
||||
recollq --extract-to "$tmpfile" -q HtmlAttachment_uniqueTerm
|
||||
md5sum "$tmpfile" | awk '{print $1}'
|
||||
) 2> $mystderr | egrep -v '^Recoll query: ' > $mystdout
|
||||
|
||||
diff -w ${myname}.txt $mystdout > $mydiffs 2>&1
|
||||
|
||||
checkresult
|
||||
2
tests/extracto/extracto.txt
Normal file
2
tests/extracto/extracto.txt
Normal file
@ -0,0 +1,2 @@
|
||||
1 results
|
||||
90c700bb60b7df96e56cca46561f0597
|
||||
@ -7,8 +7,8 @@ initvariables $0
|
||||
|
||||
(
|
||||
|
||||
recollq -S url -q '"simulating shock turbulence interactions"'
|
||||
recollq -S url Utf8pathunique
|
||||
recollq -S fmtime -q '"simulating shock turbulence interactions"'
|
||||
recollq -S fmtime Utf8pathunique
|
||||
|
||||
) 2> $mystderr | egrep -v '^Recoll query: ' > $mystdout
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user