This commit is contained in:
Jean-Francois Dockes 2022-09-07 09:48:32 +02:00
parent 84159dd54a
commit 30bf5df03e

View File

@ -39,8 +39,9 @@ using namespace std;
#include "log.h"
#include "rclinit.h"
#include "indexer.h"
#include "rclconfig.h"
#include "smallut.h"
#include "readfile.h"
#include "pathut.h"
#include "rclutil.h"
#include "cancelcheck.h"
@ -51,27 +52,118 @@ using namespace std;
// Command line options
static int op_flags;
#define OPT_c 0x2
#define OPT_f 0x40
#define OPT_h 0x80
#define OPT_i 0x200
#define OPT_p 0x10000
#define OPT_H 0x1
#define OPT_L 0x2
#define OPT_c 0x4
#define OPT_e 0x8
#define OPT_f 0x10
#define OPT_h 0x20
#define OPT_i 0x40
#define OPT_l 0x80
#define OPT_p 0x100
#define OPT_v 0x200
#define OPT_x 0x400
#define OPT_n 0x800
#define OPT_A 0x1000
#define OPT_B 0x2000
#define OPT_C 0x4000
#define OPTVAL_RECOLL_CONFIG 1000
#define OPTVAL_HELP 1001
static struct option long_options[] = {
{"regexp", required_argument, 0, 'e'},
{"file", required_argument, 0, 'f'},
{"invert-match", required_argument, 0, 'v'},
{"word-regexp", 0, 0, 'w'}, // Unimplemented
{"line-regexp", 0, 0, 'x'},
{"config", required_argument, 0, OPTVAL_RECOLL_CONFIG},
{"count", 0, 0, 'c'},
{"files-without-match", 0, 0, 'L'},
{"files-with-match", 0, 0, 'l'},
{"with-filename", 0, 0, 'H'},
{"no-filename", 0, 0, 'h'},
{"line-number", 0, 0, 'n'},
{"help", 0, 0, OPTVAL_HELP},
{"after-context", required_argument, 0, 'A'},
{"before-context", required_argument, 0, 'B'},
{"context", required_argument, 0, 'C'},
{0, 0, 0, 0}
};
SimpleRegexp *exp_p;
std::vector<SimpleRegexp *> g_expressions;
int g_reflags = SimpleRegexp::SRE_NOSUB;
static RclConfig *config;
// Working directory before we change: it's simpler to change early
// but some options need the original for computing absolute paths.
static std::string orig_cwd;
static std::string current_topdir;
static int beforecontext;
static int aftercontext;
void grepit(const Rcl::Doc& doc)
{
std::vector<std::string> lines;
std::vector<std::string> lines;
int matchcount = 0;
stringToTokens(doc.text, lines, "\n");
for (const auto& line: lines) {
//std::cout << "LINE:[" << line << "]\n";
if (exp_p->simpleMatch(line)) {
std::cout << fileurltolocalpath(doc.url) << ":" << doc.ipath << "::" << line << "\n";
std::string ppath;
if (op_flags & OPT_H) {
ppath = fileurltolocalpath(doc.url);
if (ppath.size() > current_topdir.size()) {
ppath = ppath.substr(current_topdir.size());
}
ppath += ":";
ppath += doc.ipath + "::";
}
int lnum = 0;
std::string ln;
bool inmatch{false};
for (const auto& line: lines) {
++lnum;
//std::cout << "LINE:[" << line << "]\n";
for (const auto e_p : g_expressions) {
auto match = e_p->simpleMatch(line);
if (((op_flags & OPT_v) && match) || (!(op_flags&OPT_v) && !match)) {
inmatch = false;
goto nextline;
}
}
if (op_flags & OPT_c) {
matchcount++;
} else {
if (op_flags & OPT_n) {
ln = ulltodecstr(lnum) + ":";
}
int idx = lnum -1;
if (beforecontext) {
for (int i = std::max(0, idx - beforecontext); i < idx; i++) {
std::cout << ppath << ln << lines[i] << "\n";
}
}
std::cout << ppath << ln << line << "\n";
if (aftercontext && idx < int(lines.size() - 1)) {
for (int i = idx + 1; i < std::min(int(lines.size()), idx + aftercontext + 1); i++) {
std::cout << ppath << ln << lines[i] << "\n";
}
std::cout << "--\n";
}
}
nextline:
continue;
}
if (op_flags & OPT_L) {
if (matchcount == 0) {
std::cout << ppath << "\n";
}
} else if (op_flags & OPT_l) {
if (matchcount) {
std::cout << ppath << "\n";
}
} else if (op_flags & OPT_c) {
std::cout << ppath << matchcount << "\n";
}
}
@ -150,6 +242,10 @@ bool recursive_grep(RclConfig *config, const string& top, const vector<string>&
list<string> files;
WalkerCB cb(files, selpats, config);
FsTreeWalker walker;
current_topdir = top;
if (path_isdir(top)) {
path_catslash(current_topdir);
}
walker.walk(top, cb);
return true;
}
@ -188,56 +284,96 @@ std::string thisprog;
static const char usage [] =
"\n"
"rclgrep [-h] \n"
"rclgrep [--help] \n"
" Print help\n"
"rclgrep [-f] [<path [path ...]>]\n"
" Index individual files. No database purge or stem database updates\n"
" Will read paths on stdin if none is given as argument\n"
" -f : ignore skippedPaths and skippedNames while doing this\n"
"Common options:\n"
" Search files.\n"
" -c <configdir> : specify config directory, overriding $RECOLL_CONFDIR\n"
" -e PATTERNS, --regexp=PATTERNS patterns to search for. Can be given multiple times\n"
;
static void Usage()
static void Usage(FILE* fp = stdout)
{
FILE *fp = (op_flags & OPT_h) ? stdout : stderr;
fprintf(fp, "%s: Usage: %s", path_getsimple(thisprog).c_str(), usage);
fprintf(fp, "Recoll version: %s\n", Rcl::version_string().c_str());
exit((op_flags & OPT_h)==0);
exit(1);
}
static RclConfig *config;
static void add_expressions(const std::string& exps)
{
std::vector<std::string> vexps;
stringToTokens(exps, vexps, "\n");
for (const auto& pattern : vexps) {
if (op_flags & OPT_x) {
auto newpat = std::string("^(") + pattern + ")$";
g_expressions.push_back(new SimpleRegexp(newpat, g_reflags));
} else {
g_expressions.push_back(new SimpleRegexp(pattern, g_reflags));
}
}
}
// Working directory before we change: it's simpler to change early
// but some options need the original for computing absolute paths.
static std::string orig_cwd;
std::vector<std::string> g_expstrings;
static void buildexps()
{
for (const auto& s : g_expstrings)
add_expressions(s);
}
static void exps_from_file(const std::string& fn)
{
std::string data;
std::string reason;
if (!file_to_string(fn, data, -1, -1, &reason)) {
std::cerr << "Could not read " << fn << " : " << reason << "\n";
exit(1);
}
g_expstrings.push_back(data);
}
int main(int argc, char *argv[])
{
int ret;
std::string a_config;
vector<string> selpatterns;
int reflags = SimpleRegexp::SRE_NOSUB;
while ((ret = getopt_long(argc, argv, "c:fhip:", long_options, NULL)) != -1) {
while ((ret = getopt_long(argc, argv, "A:B:C:ce:f:hHiLlnp:vx", long_options, NULL)) != -1) {
switch (ret) {
case 'c': op_flags |= OPT_c; a_config = optarg; break;
case 'f': op_flags |= OPT_f; break;
case 'A': op_flags |= OPT_A; aftercontext = atoi(optarg); break;
case 'B': op_flags |= OPT_B; beforecontext = atoi(optarg); break;
case 'C': op_flags |= OPT_C; aftercontext = beforecontext = atoi(optarg); break;
case 'c': op_flags |= OPT_c; break;
case 'e': op_flags |= OPT_e; g_expstrings.push_back(optarg); break;
case 'f': op_flags |= OPT_f; exps_from_file(optarg);break;
case 'h': op_flags |= OPT_h; break;
case 'i': op_flags |= OPT_i; reflags |= SimpleRegexp::SRE_ICASE; break;
case 'H': op_flags |= OPT_H; break;
case 'i': op_flags |= OPT_i; g_reflags |= SimpleRegexp::SRE_ICASE; break;
case 'L': op_flags |= OPT_L|OPT_c; break;
case 'l': op_flags |= OPT_l|OPT_c; break;
case 'n': op_flags |= OPT_n; break;
case 'p': op_flags |= OPT_p; selpatterns.push_back(optarg); break;
case 'v': op_flags |= OPT_v; break;
case 'x': op_flags |= OPT_x; break;
case OPTVAL_RECOLL_CONFIG: a_config = optarg; break;
case OPTVAL_HELP: Usage(stdout); break;
default: Usage(); break;
}
}
int aremain = argc - optind;
if (op_flags & OPT_h)
Usage();
if (aremain == 0)
Usage();
std::string pattern = argv[optind++];
aremain--;
exp_p = new SimpleRegexp(pattern, reflags);
if (!(op_flags & (OPT_e|OPT_f))) {
if (aremain == 0)
Usage();
std::string patterns = argv[optind++];
aremain--;
g_expstrings.push_back(patterns);
}
buildexps();
// If there are more than 1 file args and -h was not used, we want to print file names.
if ((aremain > 1 || (aremain == 1 && path_isdir(argv[optind]))) && !(op_flags & OPT_h)) {
op_flags |= OPT_H;
}
string reason;
int flags = 0;
@ -265,12 +401,6 @@ int main(int argc, char *argv[])
std::vector<std::string> paths;
if (aremain == 0) {
// Read from stdin
char line[1024];
while (fgets(line, 1023, stdin)) {
string sl(line);
trimstring(sl, "\n\r");
paths.push_back(sl);
}
} else {
while (aremain--) {
paths.push_back(argv[optind++]);