diff --git a/src/index/rclgrep.cpp b/src/index/rclgrep.cpp index 8fdd3908..5004e573 100644 --- a/src/index/rclgrep.cpp +++ b/src/index/rclgrep.cpp @@ -39,8 +39,9 @@ using namespace std; #include "log.h" #include "rclinit.h" -#include "indexer.h" +#include "rclconfig.h" #include "smallut.h" +#include "readfile.h" #include "pathut.h" #include "rclutil.h" #include "cancelcheck.h" @@ -51,27 +52,118 @@ using namespace std; // Command line options static int op_flags; -#define OPT_c 0x2 -#define OPT_f 0x40 -#define OPT_h 0x80 -#define OPT_i 0x200 -#define OPT_p 0x10000 +#define OPT_H 0x1 +#define OPT_L 0x2 +#define OPT_c 0x4 +#define OPT_e 0x8 +#define OPT_f 0x10 +#define OPT_h 0x20 +#define OPT_i 0x40 +#define OPT_l 0x80 +#define OPT_p 0x100 +#define OPT_v 0x200 +#define OPT_x 0x400 +#define OPT_n 0x800 +#define OPT_A 0x1000 +#define OPT_B 0x2000 +#define OPT_C 0x4000 + +#define OPTVAL_RECOLL_CONFIG 1000 +#define OPTVAL_HELP 1001 static struct option long_options[] = { + {"regexp", required_argument, 0, 'e'}, + {"file", required_argument, 0, 'f'}, + {"invert-match", required_argument, 0, 'v'}, + {"word-regexp", 0, 0, 'w'}, // Unimplemented + {"line-regexp", 0, 0, 'x'}, + {"config", required_argument, 0, OPTVAL_RECOLL_CONFIG}, + {"count", 0, 0, 'c'}, + {"files-without-match", 0, 0, 'L'}, + {"files-with-match", 0, 0, 'l'}, + {"with-filename", 0, 0, 'H'}, + {"no-filename", 0, 0, 'h'}, + {"line-number", 0, 0, 'n'}, + {"help", 0, 0, OPTVAL_HELP}, + {"after-context", required_argument, 0, 'A'}, + {"before-context", required_argument, 0, 'B'}, + {"context", required_argument, 0, 'C'}, {0, 0, 0, 0} }; -SimpleRegexp *exp_p; +std::vector g_expressions; +int g_reflags = SimpleRegexp::SRE_NOSUB; + +static RclConfig *config; + +// Working directory before we change: it's simpler to change early +// but some options need the original for computing absolute paths. +static std::string orig_cwd; +static std::string current_topdir; +static int beforecontext; +static int aftercontext; void grepit(const Rcl::Doc& doc) { - std::vector lines; + std::vector lines; + int matchcount = 0; stringToTokens(doc.text, lines, "\n"); - for (const auto& line: lines) { - //std::cout << "LINE:[" << line << "]\n"; - if (exp_p->simpleMatch(line)) { - std::cout << fileurltolocalpath(doc.url) << ":" << doc.ipath << "::" << line << "\n"; + + std::string ppath; + if (op_flags & OPT_H) { + ppath = fileurltolocalpath(doc.url); + if (ppath.size() > current_topdir.size()) { + ppath = ppath.substr(current_topdir.size()); } + ppath += ":"; + ppath += doc.ipath + "::"; + } + int lnum = 0; + std::string ln; + bool inmatch{false}; + for (const auto& line: lines) { + ++lnum; + //std::cout << "LINE:[" << line << "]\n"; + for (const auto e_p : g_expressions) { + auto match = e_p->simpleMatch(line); + if (((op_flags & OPT_v) && match) || (!(op_flags&OPT_v) && !match)) { + inmatch = false; + goto nextline; + } + } + if (op_flags & OPT_c) { + matchcount++; + } else { + if (op_flags & OPT_n) { + ln = ulltodecstr(lnum) + ":"; + } + int idx = lnum -1; + if (beforecontext) { + for (int i = std::max(0, idx - beforecontext); i < idx; i++) { + std::cout << ppath << ln << lines[i] << "\n"; + } + } + std::cout << ppath << ln << line << "\n"; + if (aftercontext && idx < int(lines.size() - 1)) { + for (int i = idx + 1; i < std::min(int(lines.size()), idx + aftercontext + 1); i++) { + std::cout << ppath << ln << lines[i] << "\n"; + } + std::cout << "--\n"; + } + } + nextline: + continue; + } + if (op_flags & OPT_L) { + if (matchcount == 0) { + std::cout << ppath << "\n"; + } + } else if (op_flags & OPT_l) { + if (matchcount) { + std::cout << ppath << "\n"; + } + } else if (op_flags & OPT_c) { + std::cout << ppath << matchcount << "\n"; } } @@ -150,6 +242,10 @@ bool recursive_grep(RclConfig *config, const string& top, const vector& list files; WalkerCB cb(files, selpats, config); FsTreeWalker walker; + current_topdir = top; + if (path_isdir(top)) { + path_catslash(current_topdir); + } walker.walk(top, cb); return true; } @@ -188,56 +284,96 @@ std::string thisprog; static const char usage [] = "\n" -"rclgrep [-h] \n" +"rclgrep [--help] \n" " Print help\n" "rclgrep [-f] []\n" -" Index individual files. No database purge or stem database updates\n" -" Will read paths on stdin if none is given as argument\n" -" -f : ignore skippedPaths and skippedNames while doing this\n" -"Common options:\n" +" Search files.\n" " -c : specify config directory, overriding $RECOLL_CONFDIR\n" +" -e PATTERNS, --regexp=PATTERNS patterns to search for. Can be given multiple times\n" ; -static void Usage() +static void Usage(FILE* fp = stdout) { - FILE *fp = (op_flags & OPT_h) ? stdout : stderr; fprintf(fp, "%s: Usage: %s", path_getsimple(thisprog).c_str(), usage); - fprintf(fp, "Recoll version: %s\n", Rcl::version_string().c_str()); - exit((op_flags & OPT_h)==0); + exit(1); } -static RclConfig *config; +static void add_expressions(const std::string& exps) +{ + std::vector vexps; + stringToTokens(exps, vexps, "\n"); + for (const auto& pattern : vexps) { + if (op_flags & OPT_x) { + auto newpat = std::string("^(") + pattern + ")$"; + g_expressions.push_back(new SimpleRegexp(newpat, g_reflags)); + } else { + g_expressions.push_back(new SimpleRegexp(pattern, g_reflags)); + } + } +} -// Working directory before we change: it's simpler to change early -// but some options need the original for computing absolute paths. -static std::string orig_cwd; +std::vector g_expstrings; +static void buildexps() +{ + for (const auto& s : g_expstrings) + add_expressions(s); +} + +static void exps_from_file(const std::string& fn) +{ + std::string data; + std::string reason; + if (!file_to_string(fn, data, -1, -1, &reason)) { + std::cerr << "Could not read " << fn << " : " << reason << "\n"; + exit(1); + } + g_expstrings.push_back(data); +} int main(int argc, char *argv[]) { int ret; std::string a_config; vector selpatterns; - int reflags = SimpleRegexp::SRE_NOSUB; - while ((ret = getopt_long(argc, argv, "c:fhip:", long_options, NULL)) != -1) { + while ((ret = getopt_long(argc, argv, "A:B:C:ce:f:hHiLlnp:vx", long_options, NULL)) != -1) { switch (ret) { - case 'c': op_flags |= OPT_c; a_config = optarg; break; - case 'f': op_flags |= OPT_f; break; + case 'A': op_flags |= OPT_A; aftercontext = atoi(optarg); break; + case 'B': op_flags |= OPT_B; beforecontext = atoi(optarg); break; + case 'C': op_flags |= OPT_C; aftercontext = beforecontext = atoi(optarg); break; + case 'c': op_flags |= OPT_c; break; + case 'e': op_flags |= OPT_e; g_expstrings.push_back(optarg); break; + case 'f': op_flags |= OPT_f; exps_from_file(optarg);break; case 'h': op_flags |= OPT_h; break; - case 'i': op_flags |= OPT_i; reflags |= SimpleRegexp::SRE_ICASE; break; + case 'H': op_flags |= OPT_H; break; + case 'i': op_flags |= OPT_i; g_reflags |= SimpleRegexp::SRE_ICASE; break; + case 'L': op_flags |= OPT_L|OPT_c; break; + case 'l': op_flags |= OPT_l|OPT_c; break; + case 'n': op_flags |= OPT_n; break; case 'p': op_flags |= OPT_p; selpatterns.push_back(optarg); break; + case 'v': op_flags |= OPT_v; break; + case 'x': op_flags |= OPT_x; break; + case OPTVAL_RECOLL_CONFIG: a_config = optarg; break; + case OPTVAL_HELP: Usage(stdout); break; default: Usage(); break; } } int aremain = argc - optind; - if (op_flags & OPT_h) - Usage(); - if (aremain == 0) - Usage(); - std::string pattern = argv[optind++]; - aremain--; - exp_p = new SimpleRegexp(pattern, reflags); + if (!(op_flags & (OPT_e|OPT_f))) { + if (aremain == 0) + Usage(); + std::string patterns = argv[optind++]; + aremain--; + g_expstrings.push_back(patterns); + } + + buildexps(); + + // If there are more than 1 file args and -h was not used, we want to print file names. + if ((aremain > 1 || (aremain == 1 && path_isdir(argv[optind]))) && !(op_flags & OPT_h)) { + op_flags |= OPT_H; + } string reason; int flags = 0; @@ -265,12 +401,6 @@ int main(int argc, char *argv[]) std::vector paths; if (aremain == 0) { // Read from stdin - char line[1024]; - while (fgets(line, 1023, stdin)) { - string sl(line); - trimstring(sl, "\n\r"); - paths.push_back(sl); - } } else { while (aremain--) { paths.push_back(argv[optind++]);