diff --git a/src/query/recollq.cpp b/src/query/recollq.cpp index 154d29da..01e386db 100644 --- a/src/query/recollq.cpp +++ b/src/query/recollq.cpp @@ -49,15 +49,15 @@ bool dump_contents(RclConfig *rclconfig, Rcl::Doc& idoc) Rcl::Doc fdoc; string ipath = idoc.ipath; if (interner.internfile(fdoc, ipath)) { - cout << fdoc.text << endl; + cout << fdoc.text << endl; } else { - cout << "Cant turn to text:" << idoc.url << " | " << idoc.ipath << endl; + cout << "Cant turn to text:" << idoc.url << " | " << idoc.ipath << endl; } return true; } void output_fields(vector fields, Rcl::Doc& doc, - Rcl::Query& query, Rcl::Db& rcldb, bool printnames) + Rcl::Query& query, Rcl::Db& rcldb, bool printnames) { if (fields.empty()) { map::const_iterator it; @@ -66,19 +66,19 @@ void output_fields(vector fields, Rcl::Doc& doc, } } for (vector::const_iterator it = fields.begin(); - it != fields.end(); it++) { - string out; - if (!it->compare("abstract")) { - string abstract; - query.makeDocAbstract(doc, abstract); - base64_encode(abstract, out); + it != fields.end(); it++) { + string out; + if (!it->compare("abstract")) { + string abstract; + query.makeDocAbstract(doc, abstract); + base64_encode(abstract, out); } else if (!it->compare("xdocid")) { char cdocid[30]; sprintf(cdocid, "%lu", (unsigned long)doc.xdocid); base64_encode(cdocid, out); - } else { - base64_encode(doc.meta[*it], out); - } + } else { + base64_encode(doc.meta[*it], out); + } // Before printnames existed, recollq printed a single blank for empty // fields. This is a problem when printing names and using strtok, but // have to keep the old behaviour when printnames is not set. @@ -93,45 +93,45 @@ void output_fields(vector fields, Rcl::Doc& doc, static char *thisprog; static char usage [] = -" -P: Show the date span for all the documents present in the index.\n" -" [-o|-a|-f] [-q] \n" -" Runs a recoll query and displays result lines. \n" -" Default: will interpret the argument(s) as a xesam query string.\n" -" Query elements: \n" -" * Implicit AND, exclusion, field spec: t1 -t2 title:t3\n" -" * OR has priority: t1 OR t2 t3 OR t4 means (t1 OR t2) AND (t3 OR t4)\n" -" * Phrase: \"t1 t2\" (needs additional quoting on cmd line)\n" -" -o Emulate the GUI simple search in ANY TERM mode.\n" -" -a Emulate the GUI simple search in ALL TERMS mode.\n" -" -f Emulate the GUI simple search in filename mode.\n" -" -q is just ignored (compatibility with the recoll GUI command line).\n" -"Common options:\n" -" -c : specify config directory, overriding $RECOLL_CONFDIR.\n" -" -C : collapse duplicates\n" -" -d also dump file contents.\n" -" -n [first-] define the result slice. The default value for [first]\n" -" is 0. Without the option, the default max count is 2000.\n" -" Use n=0 for no limit.\n" -" -b : basic. Just output urls, no mime types or titles.\n" -" -Q : no result lines, just the processed query and result count.\n" -" -m : dump the whole document meta[] array for each result.\n" -" -A : output the document abstracts.\n" -" -S fld : sort by field .\n" -" -D : sort descending.\n" -" -s stemlang : set stemming language to use (must exist in index...).\n" -" Use -s \"\" to turn off stem expansion.\n" -" -T : use the parameter (Thesaurus) for word expansion.\n" -" -i : additional index, several can be given.\n" -" -e use url encoding (%xx) for urls.\n" -" -E use exact result count instead of lower bound estimate" -" -F : output exactly these fields for each result.\n" -" The field values are encoded in base64, output in one line and \n" -" separated by one space character. This is the recommended format \n" -" for use by other programs. Use a normal query with option -m to \n" -" see the field names. Use -F '' to output all fields, but you probably\n" -" also want option -N in this case.\n" -" -N : with -F, print the (plain text) field names before the field values.\n" -; + " -P: Show the date span for all the documents present in the index.\n" + " [-o|-a|-f] [-q] \n" + " Runs a recoll query and displays result lines. \n" + " Default: will interpret the argument(s) as a xesam query string.\n" + " Query elements: \n" + " * Implicit AND, exclusion, field spec: t1 -t2 title:t3\n" + " * OR has priority: t1 OR t2 t3 OR t4 means (t1 OR t2) AND (t3 OR t4)\n" + " * Phrase: \"t1 t2\" (needs additional quoting on cmd line)\n" + " -o Emulate the GUI simple search in ANY TERM mode.\n" + " -a Emulate the GUI simple search in ALL TERMS mode.\n" + " -f Emulate the GUI simple search in filename mode.\n" + " -q is just ignored (compatibility with the recoll GUI command line).\n" + "Common options:\n" + " -c : specify config directory, overriding $RECOLL_CONFDIR.\n" + " -C : collapse duplicates\n" + " -d also dump file contents.\n" + " -n [first-] define the result slice. The default value for [first]\n" + " is 0. Without the option, the default max count is 2000.\n" + " Use n=0 for no limit.\n" + " -b : basic. Just output urls, no mime types or titles.\n" + " -Q : no result lines, just the processed query and result count.\n" + " -m : dump the whole document meta[] array for each result.\n" + " -A : output the document abstracts.\n" + " -S fld : sort by field .\n" + " -D : sort descending.\n" + " -s stemlang : set stemming language to use (must exist in index...).\n" + " Use -s \"\" to turn off stem expansion.\n" + " -T : use the parameter (Thesaurus) for word expansion.\n" + " -i : additional index, several can be given.\n" + " -e use url encoding (%xx) for urls.\n" + " -E use exact result count instead of lower bound estimate" + " -F : output exactly these fields for each result.\n" + " The field values are encoded in base64, output in one line and \n" + " separated by one space character. This is the recommended format \n" + " for use by other programs. Use a normal query with option -m to \n" + " see the field names. Use -F '' to output all fields, but you probably\n" + " also want option -N in this case.\n" + " -N : with -F, print the (plain text) field names before the field values.\n" + ; static void Usage(void) { @@ -200,61 +200,61 @@ int recollq(RclConfig **cfp, int argc, char **argv) Usage(); while (**argv) switch (*(*argv)++) { - case '-': - // -- : end of options - if (*(*argv) != 0) - Usage(); - goto endopts; + case '-': + // -- : end of options + if (*(*argv) != 0) + Usage(); + goto endopts; case 'A': op_flags |= OPT_A; break; case 'a': op_flags |= OPT_a; break; case 'b': op_flags |= OPT_b; break; case 'C': op_flags |= OPT_C; break; - case 'c': op_flags |= OPT_c; if (argc < 2) Usage(); - a_config = *(++argv); - argc--; goto b1; + case 'c': op_flags |= OPT_c; if (argc < 2) Usage(); + a_config = *(++argv); + argc--; goto b1; case 'd': op_flags |= OPT_d; break; case 'D': op_flags |= OPT_D; break; case 'E': op_flags |= OPT_E; break; case 'e': op_flags |= OPT_e; break; case 'f': op_flags |= OPT_f; break; - case 'F': op_flags |= OPT_F; if (argc < 2) Usage(); - sf = *(++argv); - argc--; goto b1; - case 'i': op_flags |= OPT_i; if (argc < 2) Usage(); - extra_dbs.push_back(*(++argv)); - argc--; goto b1; + case 'F': op_flags |= OPT_F; if (argc < 2) Usage(); + sf = *(++argv); + argc--; goto b1; + case 'i': op_flags |= OPT_i; if (argc < 2) Usage(); + extra_dbs.push_back(*(++argv)); + argc--; goto b1; case 'l': op_flags |= OPT_l; break; case 'm': op_flags |= OPT_m; break; case 'N': op_flags |= OPT_N; break; - case 'n': op_flags |= OPT_n; if (argc < 2) Usage(); - { - string rescnt = *(++argv); - string::size_type dash = rescnt.find("-"); - if (dash != string::npos) { - firstres = atoi(rescnt.substr(0, dash).c_str()); - if (dash < rescnt.size()-1) { - maxcount = atoi(rescnt.substr(dash+1).c_str()); - } - } else { - maxcount = atoi(rescnt.c_str()); - } - if (maxcount <= 0) maxcount = INT_MAX; - } - argc--; goto b1; + case 'n': op_flags |= OPT_n; if (argc < 2) Usage(); + { + string rescnt = *(++argv); + string::size_type dash = rescnt.find("-"); + if (dash != string::npos) { + firstres = atoi(rescnt.substr(0, dash).c_str()); + if (dash < rescnt.size()-1) { + maxcount = atoi(rescnt.substr(dash+1).c_str()); + } + } else { + maxcount = atoi(rescnt.c_str()); + } + if (maxcount <= 0) maxcount = INT_MAX; + } + argc--; goto b1; case 'o': op_flags |= OPT_o; break; case 'P': op_flags |= OPT_P; break; case 'q': op_flags |= OPT_q; break; case 'Q': op_flags |= OPT_Q; break; - case 'S': op_flags |= OPT_S; if (argc < 2) Usage(); - sortfield = *(++argv); - argc--; goto b1; - case 's': op_flags |= OPT_s; if (argc < 2) Usage(); - stemlang = *(++argv); - argc--; goto b1; + case 'S': op_flags |= OPT_S; if (argc < 2) Usage(); + sortfield = *(++argv); + argc--; goto b1; + case 's': op_flags |= OPT_s; if (argc < 2) Usage(); + stemlang = *(++argv); + argc--; goto b1; case 't': op_flags |= OPT_t; break; - case 'T': op_flags |= OPT_T; if (argc < 2) Usage(); - syngroupsfn = *(++argv); - argc--; goto b1; + case 'T': op_flags |= OPT_T; if (argc < 2) Usage(); + syngroupsfn = *(++argv); + argc--; goto b1; default: Usage(); break; } b1: argc--; argv++; @@ -265,17 +265,17 @@ endopts: *cfp = recollinit(0, 0, 0, reason, &a_config); RclConfig *rclconfig = *cfp; if (!rclconfig || !rclconfig->ok()) { - fprintf(stderr, "Recoll init failed: %s\n", reason.c_str()); - exit(1); + fprintf(stderr, "Recoll init failed: %s\n", reason.c_str()); + exit(1); } if (argc < 1 && !(op_flags & OPT_P)) { - Usage(); + Usage(); } if (op_flags & OPT_F) { - if (op_flags & (OPT_b|OPT_d|OPT_b|OPT_Q|OPT_m|OPT_A)) - Usage(); - stringToStrings(sf, fields); + if (op_flags & (OPT_b|OPT_d|OPT_b|OPT_Q|OPT_m|OPT_A)) + Usage(); + stringToStrings(sf, fields); } Rcl::Db rcldb(rclconfig); if (!extra_dbs.empty()) { @@ -295,9 +295,9 @@ endopts: } if (!rcldb.open(Rcl::Db::DbRO)) { - cerr << "Cant open database in " << rclconfig->getDbDir() << - " reason: " << rcldb.getReason() << endl; - exit(1); + cerr << "Cant open database in " << rclconfig->getDbDir() << + " reason: " << rcldb.getReason() << endl; + exit(1); } if (op_flags & OPT_P) { @@ -312,48 +312,48 @@ endopts: } if (argc < 1) { - Usage(); + Usage(); } string qs = *argv++;argc--; while (argc > 0) { - qs += string(" ") + *argv++;argc--; + qs += string(" ") + *argv++;argc--; } { - string uq; - string charset = rclconfig->getDefCharset(true); - int ercnt; - if (!transcode(qs, uq, charset, "UTF-8", &ercnt)) { - fprintf(stderr, "Can't convert command line args to utf-8\n"); - exit(1); - } else if (ercnt) { - fprintf(stderr, "%d errors while converting arguments from %s " - "to utf-8\n", ercnt, charset.c_str()); - } - qs = uq; + string uq; + string charset = rclconfig->getDefCharset(true); + int ercnt; + if (!transcode(qs, uq, charset, "UTF-8", &ercnt)) { + fprintf(stderr, "Can't convert command line args to utf-8\n"); + exit(1); + } else if (ercnt) { + fprintf(stderr, "%d errors while converting arguments from %s " + "to utf-8\n", ercnt, charset.c_str()); + } + qs = uq; } Rcl::SearchData *sd = 0; if (op_flags & (OPT_a|OPT_o|OPT_f)) { - sd = new Rcl::SearchData(Rcl::SCLT_OR, stemlang); - Rcl::SearchDataClause *clp = 0; - if (op_flags & OPT_f) { - clp = new Rcl::SearchDataClauseFilename(qs); - } else { - clp = new Rcl::SearchDataClauseSimple((op_flags & OPT_o)? + sd = new Rcl::SearchData(Rcl::SCLT_OR, stemlang); + Rcl::SearchDataClause *clp = 0; + if (op_flags & OPT_f) { + clp = new Rcl::SearchDataClauseFilename(qs); + } else { + clp = new Rcl::SearchDataClauseSimple((op_flags & OPT_o)? Rcl::SCLT_OR : Rcl::SCLT_AND, qs); - } - if (sd) - sd->addClause(clp); + } + if (sd) + sd->addClause(clp); } else { - sd = wasaStringToRcl(rclconfig, stemlang, qs, reason); + sd = wasaStringToRcl(rclconfig, stemlang, qs, reason); } if (!sd) { - cerr << "Query string interpretation failed: " << reason << endl; - return 1; + cerr << "Query string interpretation failed: " << reason << endl; + return 1; } std::shared_ptr rq(sd); @@ -362,12 +362,12 @@ endopts: query.setCollapseDuplicates(true); } if (op_flags & OPT_S) { - query.setSortBy(sortfield, (op_flags & OPT_D) ? false : true); + query.setSortBy(sortfield, (op_flags & OPT_D) ? false : true); } Chrono chron; if (!query.setQuery(rq)) { - cerr << "Query setup failed: " << query.getReason() << endl; - return(1); + cerr << "Query setup failed: " << query.getReason() << endl; + return(1); } int cnt; if (op_flags & OPT_E) { @@ -376,62 +376,62 @@ endopts: cnt = query.getResCnt(); } if (!(op_flags & OPT_b)) { - cout << "Recoll query: " << rq->getDescription() << endl; - if (firstres == 0) { - if (cnt <= maxcount) - cout << cnt << " results" << endl; - else - cout << cnt << " results (printing " << maxcount << " max):" - << endl; - } else { - cout << "Printing at most " << cnt - (firstres+maxcount) << - " results from first " << firstres << endl; - } + cout << "Recoll query: " << rq->getDescription() << endl; + if (firstres == 0) { + if (cnt <= maxcount) + cout << cnt << " results" << endl; + else + cout << cnt << " results (printing " << maxcount << " max):" + << endl; + } else { + cout << "Printing at most " << cnt - (firstres+maxcount) << + " results from first " << firstres << endl; + } } if (op_flags & OPT_Q) - cout << "Query setup took " << chron.millis() << " mS" << endl; + cout << "Query setup took " << chron.millis() << " mS" << endl; if (op_flags & OPT_Q) - return(0); + return(0); for (int i = firstres; i < firstres + maxcount; i++) { - Rcl::Doc doc; - if (!query.getDoc(i, doc)) - break; + Rcl::Doc doc; + if (!query.getDoc(i, doc)) + break; - if (op_flags & OPT_F) { - output_fields(fields, doc, query, rcldb, op_flags & OPT_N); - continue; - } + if (op_flags & OPT_F) { + output_fields(fields, doc, query, rcldb, op_flags & OPT_N); + continue; + } - if (op_flags & OPT_e) - doc.url = url_encode(doc.url); + if (op_flags & OPT_e) + doc.url = url_encode(doc.url); - if (op_flags & OPT_b) { - cout << doc.url << endl; - } else { - string titleorfn = doc.meta[Rcl::Doc::keytt]; - if (titleorfn.empty()) - titleorfn = doc.meta[Rcl::Doc::keyfn]; - if (titleorfn.empty()) { + if (op_flags & OPT_b) { + cout << doc.url << endl; + } else { + string titleorfn = doc.meta[Rcl::Doc::keytt]; + if (titleorfn.empty()) + titleorfn = doc.meta[Rcl::Doc::keyfn]; + if (titleorfn.empty()) { string url; printableUrl(rclconfig->getDefCharset(), doc.url, url); titleorfn = path_getsimple(url); } - char cpc[20]; - sprintf(cpc, "%d", doc.pc); - cout - << doc.mimetype << "\t" - << "[" << doc.url << "]" << "\t" - << "[" << titleorfn << "]" << "\t" - << doc.fbytes << "\tbytes" << "\t" - << endl; - if (op_flags & OPT_m) { - for (const auto ent : doc.meta) { - cout << ent.first << " = " << ent.second << endl; - } - } + char cpc[20]; + sprintf(cpc, "%d", doc.pc); + cout + << doc.mimetype << "\t" + << "[" << doc.url << "]" << "\t" + << "[" << titleorfn << "]" << "\t" + << doc.fbytes << "\tbytes" << "\t" + << endl; + if (op_flags & OPT_m) { + for (const auto ent : doc.meta) { + cout << ent.first << " = " << ent.second << endl; + } + } if (op_flags & OPT_A) { string abstract; if (query.makeDocAbstract(doc, abstract)) { @@ -443,7 +443,7 @@ endopts: } if (op_flags & OPT_d) { dump_contents(rclconfig, doc); - } + } } return 0;