add fully parseable base64-encoded output mode for use by external programs

This commit is contained in:
Jean-Francois Dockes 2011-10-04 08:02:57 +02:00
parent a3898343a7
commit 3e533298c0

View File

@ -43,6 +43,7 @@ using namespace std;
#include "transcode.h" #include "transcode.h"
#include "textsplit.h" #include "textsplit.h"
#include "smallut.h" #include "smallut.h"
#include "base64.h"
bool dump_contents(RclConfig *rclconfig, TempDir& tmpdir, Rcl::Doc& idoc) bool dump_contents(RclConfig *rclconfig, TempDir& tmpdir, Rcl::Doc& idoc)
{ {
@ -58,6 +59,23 @@ bool dump_contents(RclConfig *rclconfig, TempDir& tmpdir, Rcl::Doc& idoc)
return true; return true;
} }
void output_fields(const vector<string>fields, Rcl::Doc& doc,
Rcl::Query& query, Rcl::Db& rcldb)
{
for (vector<string>::const_iterator it = fields.begin();
it != fields.end(); it++) {
string out;
if (!it->compare("abstract")) {
string abstract;
rcldb.makeDocAbstract(doc, &query, abstract);
base64_encode(abstract, out);
} else {
base64_encode(doc.meta[*it], out);
}
cout << out << " ";
}
cout << endl;
}
static char *thisprog; static char *thisprog;
static char usage [] = static char usage [] =
@ -83,6 +101,12 @@ static char usage [] =
" -S fld : sort by field name\n" " -S fld : sort by field name\n"
" -D : sort descending\n" " -D : sort descending\n"
" -i <dbdir> : additional index, several can be given\n" " -i <dbdir> : additional index, several can be given\n"
" -e use url encoding (%xx) for urls\n"
" -F <field name list> : output exactly these fields for each result.\n"
" The field values are encoded in base64, output in one line and \n"
" separated by one space character. This is the recommended format \n"
" for use by other programs. Use a normal query with option -m to \n"
" see the field names.\n"
; ;
static void static void
Usage(void) Usage(void)
@ -92,26 +116,31 @@ Usage(void)
} }
// ATTENTION A LA COMPATIBILITE AVEC LES OPTIONS DE recoll // ATTENTION A LA COMPATIBILITE AVEC LES OPTIONS DE recoll
// OPT_q and OPT_t are ignored // -q, -t and -l are accepted and ignored
// -a/f/o -c have the same meaning
// -h is not used
static int op_flags; static int op_flags;
#define OPT_o 0x2 #define OPT_A 0x1
#define OPT_a 0x4 #define OPT_a 0x2
#define OPT_b 0x4
#define OPT_c 0x8 #define OPT_c 0x8
#define OPT_d 0x10 #define OPT_D 0x10
#define OPT_n 0x20 #define OPT_d 0x20
#define OPT_b 0x40 #define OPT_f 0x40
#define OPT_f 0x80 #define OPT_i 0x80
#define OPT_l 0x100 #define OPT_l 0x100
#define OPT_q 0x200 #define OPT_m 0x200
#define OPT_t 0x400 #define OPT_n 0x400
#define OPT_m 0x800 #define OPT_o 0x800
#define OPT_D 0x1000 #define OPT_P 0x1000
#define OPT_S 0x2000 #define OPT_Q 0x2000
#define OPT_s 0x4000 #define OPT_q 0x4000
#define OPT_A 0x8000 #define OPT_S 0x8000
#define OPT_i 0x10000 #define OPT_s 0x10000
#define OPT_P 0x20000 #define OPT_t 0x20000
#define OPT_Q 0x40000 #define OPT_e 0x40000
#define OPT_F 0x80000
int recollq(RclConfig **cfp, int argc, char **argv) int recollq(RclConfig **cfp, int argc, char **argv)
{ {
@ -119,6 +148,8 @@ int recollq(RclConfig **cfp, int argc, char **argv)
string sortfield; string sortfield;
string stemlang("english"); string stemlang("english");
list<string> extra_dbs; list<string> extra_dbs;
string sf;
vector<string> fields;
int limit = 2000; int limit = 2000;
thisprog = argv[0]; thisprog = argv[0];
@ -139,7 +170,11 @@ int recollq(RclConfig **cfp, int argc, char **argv)
argc--; goto b1; argc--; goto b1;
case 'd': op_flags |= OPT_d; break; case 'd': op_flags |= OPT_d; break;
case 'D': op_flags |= OPT_D; break; case 'D': op_flags |= OPT_D; break;
case 'e': op_flags |= OPT_e; break;
case 'f': op_flags |= OPT_f; break; case 'f': op_flags |= OPT_f; break;
case 'F': op_flags |= OPT_F; if (argc < 2) Usage();
sf = *(++argv);
argc--; goto b1;
case 'i': op_flags |= OPT_i; if (argc < 2) Usage(); case 'i': op_flags |= OPT_i; if (argc < 2) Usage();
extra_dbs.push_back(*(++argv)); extra_dbs.push_back(*(++argv));
argc--; goto b1; argc--; goto b1;
@ -176,7 +211,11 @@ int recollq(RclConfig **cfp, int argc, char **argv)
if (argc < 1 && !(op_flags & OPT_P)) { if (argc < 1 && !(op_flags & OPT_P)) {
Usage(); Usage();
} }
if (op_flags & OPT_F) {
if (op_flags & (OPT_b|OPT_d|OPT_b|OPT_Q|OPT_m|OPT_A))
Usage();
stringToStrings(sf, fields);
}
Rcl::Db rcldb(rclconfig); Rcl::Db rcldb(rclconfig);
if (!extra_dbs.empty()) { if (!extra_dbs.empty()) {
for (list<string>::iterator it = extra_dbs.begin(); for (list<string>::iterator it = extra_dbs.begin();
@ -291,16 +330,24 @@ int recollq(RclConfig **cfp, int argc, char **argv)
if (!query.getDoc(i, doc)) if (!query.getDoc(i, doc))
break; break;
if (op_flags & OPT_F) {
output_fields(fields, doc, query, rcldb);
continue;
}
if (op_flags & OPT_e)
doc.url = url_encode(doc.url);
if (op_flags & OPT_b) { if (op_flags & OPT_b) {
cout << doc.url.c_str() << endl; cout << doc.url << endl;
} else { } else {
char cpc[20]; char cpc[20];
sprintf(cpc, "%d", doc.pc); sprintf(cpc, "%d", doc.pc);
cout cout
<< doc.mimetype.c_str() << "\t" << doc.mimetype << "\t"
<< "[" << doc.url.c_str() << "]" << "\t" << "[" << doc.url << "]" << "\t"
<< "[" << doc.meta[Rcl::Doc::keytt].c_str() << "]" << "\t" << "[" << doc.meta[Rcl::Doc::keytt] << "]" << "\t"
<< doc.fbytes.c_str() << "\tbytes" << "\t" << doc.fbytes << "\tbytes" << "\t"
<< endl; << endl;
if (op_flags & OPT_m) { if (op_flags & OPT_m) {
for (map<string,string>::const_iterator it = doc.meta.begin(); for (map<string,string>::const_iterator it = doc.meta.begin();