From 485a077737180a50cd66263c60f5fedfbf4b2fbe Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Thu, 25 Mar 2021 13:54:14 +0100 Subject: [PATCH] Convert recollindex to using getopt_long and add options to compact and burst the webcache --- src/doc/man/recollindex.1 | 47 +++++------ src/doc/user/usermanual.html | 21 ++++- src/doc/user/usermanual.xml | 11 +++ src/index/recollindex.cpp | 146 +++++++++++++++++++++-------------- 4 files changed, 140 insertions(+), 85 deletions(-) diff --git a/src/doc/man/recollindex.1 b/src/doc/man/recollindex.1 index b2d9a1d6..9656c9da 100644 --- a/src/doc/man/recollindex.1 +++ b/src/doc/man/recollindex.1 @@ -8,8 +8,7 @@ recollindex \- indexing command for the Recoll full text search system .B recollindex [ .B \-c - -] +] [ .B \-z|\-Z ] @@ -20,13 +19,11 @@ recollindex \- indexing command for the Recoll full text search system .B recollindex [ .B \-c - -] +] .B \-m [ .B \-w - -] +] [ .B \-D ] @@ -43,8 +40,7 @@ recollindex \- indexing command for the Recoll full text search system .B recollindex [ .B \-c - -] +] .B \-i [ .B \-Z \-k \-f \-P @@ -54,8 +50,7 @@ recollindex \- indexing command for the Recoll full text search system .B recollindex [ .B \-c - -] +] .B \-r [ .B \-Z \-K \-e \-f @@ -69,39 +64,35 @@ pattern .B recollindex [ .B \-c - -] +] .B \-e [] .br .B recollindex [ .B \-c - -] -.B \-l +] +.B \-l|-S|-E .br .B recollindex [ .B \-c - -] +] .B \-s .br .B recollindex [ .B \-c - -] -.B \-S +] +.B \--webcache-compact .br .B recollindex [ .B \-c - -] -.B \-E +] +.B \--webcache-burst + .SH DESCRIPTION The @@ -297,6 +288,16 @@ package, which must be installed on the system. .B recollindex \-E will check the configuration file for topdirs and other relevant paths existence (to help catch typos). +.PP +.B recollindex \--webcache-compact +will recover the space wasted by erased page instances inside the Web +cache. It may temporarily need to use twice the disk space used by the Web +cache. +.PP +.B recollindex \--webcache-burst +will extract all entries from the Web cache to files created inside +. Each cache entry is extracted as two files, for the data and metadata. + .SH SEE ALSO .PP diff --git a/src/doc/user/usermanual.html b/src/doc/user/usermanual.html index 809e63f9..321df680 100644 --- a/src/doc/user/usermanual.html +++ b/src/doc/user/usermanual.html @@ -423,7 +423,7 @@ alink="#0000FF">

List of Tables

-
3.1. Keyboard shortcuts
+
3.1. Keyboard shortcuts
@@ -2004,6 +2004,23 @@ recollindex -c "$confdir" tool to list and edit the contents of the Web cache. (ToolsWebcache editor)

+

The recollindex command has + two options to help manage the Web cache:

+
+
    +
  • --webcache-compact will recover the + space from erased entries. It may need to use twice the + disk space currently needed for the Web cache.
  • +
  • --webcache-burst destdir will + extract all current entries into pairs of metadata and + data files created inside destdir
  • +
+

You can find more details on Web indexing, its usage and configuration in a

- +

Table 3.1. Keyboard shortcuts

diff --git a/src/doc/user/usermanual.xml b/src/doc/user/usermanual.xml index aad9cb27..7b5a1e18 100644 --- a/src/doc/user/usermanual.xml +++ b/src/doc/user/usermanual.xml @@ -1300,6 +1300,17 @@ recollindex -c "$confdir" The &RCL; GUI has a tool to list and edit the contents of the Web cache. (ToolsWebcache editor) + The recollindex command has two options to + help manage the Web cache: + + will recover + the space from erased entries. It may need to use twice the disk space + currently needed for the Web cache. + + will extract all current entries into pairs of metadata and data + files created + inside destdir + You can find more details on Web indexing, its usage and configuration in a Recoll 'Howto' diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp index 68011e62..b68cc09c 100644 --- a/src/index/recollindex.cpp +++ b/src/index/recollindex.cpp @@ -28,6 +28,7 @@ #endif #include "safefcntl.h" #include "safeunistd.h" +#include #include #include @@ -59,6 +60,7 @@ using namespace std; #include "execmd.h" #include "checkretryfailed.h" #include "idxstatus.h" +#include "circache.h" // Command line options static int op_flags; @@ -87,6 +89,15 @@ static int op_flags; #define OPT_Z 0x800000 #define OPT_z 0x1000000 +#define OPTVAL_WEBCACHE_COMPACT 1000 +#define OPTVAL_WEBCACHE_BURST 1001 + +static struct option long_options[] = { + {"webcache-compact", 0, 0, OPTVAL_WEBCACHE_COMPACT}, + {"webcache-burst", required_argument, 0, OPTVAL_WEBCACHE_BURST}, + {0, 0, 0, 0} +}; + ReExec *o_reexec; // Globals for atexit cleanup @@ -547,8 +558,8 @@ static void flushIdxReasons() out.open(reasonsfile, ofstream::out|ofstream::trunc); idxreasons.write(out); } catch (...) { - cerr << "Could not write reasons file " << reasonsfile << endl; - idxreasons.write(cerr); + std::cerr << "Could not write reasons file " << reasonsfile << endl; + idxreasons.write(std::cerr); } } } @@ -563,17 +574,18 @@ static void flushIdxReasons() #if USE_WMAIN #define WARGTOSTRING(w) wchartoutf8(w) -static vector argstovector(int argc, wchar_t **argv) +static vector argstovector(int argc, wchar_t **argv, vector& storage) #else #define WARGTOSTRING(w) (w) - static vector argstovector(int argc, char **argv) + static vector argstovector(int argc, char **argv, vector& storage) #endif { + vector args(argc+1); + storage.resize(argc+1); thisprog = path_absolute(WARGTOSTRING(argv[0])); - argc--; argv++; - vector args; for (int i = 0; i < argc; i++) { - args.push_back(WARGTOSTRING(argv[i])); + storage[i] = WARGTOSTRING(argv[i]); + args[i] = storage[i].c_str(); } return args; } @@ -604,73 +616,68 @@ int main(int argc, char *argv[]) o_reexec->init(argc, argv); #endif - // The bizarre conversion to vector stayed from the time when we - // used a file for passing options. - vector args = argstovector(argc, argv); + // Only actually useful on Windows: convert wargs to utf-8 chars + vector astore; + vector args = argstovector(argc, argv, astore); vector selpatterns; int sleepsecs{60}; string a_config; - unsigned int aremain = args.size(); - unsigned int argidx = 0; - for (; argidx < args.size(); argidx++) { - const string& arg{args[argidx]}; - aremain = args.size() - argidx; - if (arg[0] != '-') { - break; - } - for (unsigned int cidx = 1; cidx < arg.size(); cidx++) { - switch (arg[cidx]) { - case 'c': op_flags |= OPT_c; if (aremain < 2) Usage(); - a_config = args[argidx+1]; argidx++; goto b1; + int ret; + bool webcache_compact{false}; + bool webcache_burst{false}; + std::string burstdir; + while ((ret = getopt_long(argc, (char *const*)&args[0], "c:CDdEefhikKlmnPp:rR:sS:w:xZz", + long_options, NULL)) != -1) { + switch (ret) { + case 'c': op_flags |= OPT_c; a_config = optarg; break; #ifdef RCL_MONITOR - case 'C': op_flags |= OPT_C; break; - case 'D': op_flags |= OPT_D; break; + case 'C': op_flags |= OPT_C; break; + case 'D': op_flags |= OPT_D; break; #endif #if defined(HAVE_POSIX_FADVISE) - case 'd': op_flags |= OPT_d; break; + case 'd': op_flags |= OPT_d; break; #endif - case 'E': op_flags |= OPT_E; break; - case 'e': op_flags |= OPT_e; break; - case 'f': op_flags |= OPT_f; break; - case 'h': op_flags |= OPT_h; break; - case 'i': op_flags |= OPT_i; break; - case 'k': op_flags |= OPT_k; break; - case 'K': op_flags |= OPT_K; break; - case 'l': op_flags |= OPT_l; break; - case 'm': op_flags |= OPT_m; break; - case 'n': op_flags |= OPT_n; break; - case 'P': op_flags |= OPT_P; break; - case 'p': op_flags |= OPT_p; if (aremain < 2) Usage(); - selpatterns.push_back(args[argidx+1]); argidx++; goto b1; - case 'r': op_flags |= OPT_r; break; - case 'R': op_flags |= OPT_R; if (aremain < 2) Usage(); - reasonsfile = args[argidx+1]; argidx++; goto b1; - case 's': op_flags |= OPT_s; break; + case 'E': op_flags |= OPT_E; break; + case 'e': op_flags |= OPT_e; break; + case 'f': op_flags |= OPT_f; break; + case 'h': op_flags |= OPT_h; break; + case 'i': op_flags |= OPT_i; break; + case 'k': op_flags |= OPT_k; break; + case 'K': op_flags |= OPT_K; break; + case 'l': op_flags |= OPT_l; break; + case 'm': op_flags |= OPT_m; break; + case 'n': op_flags |= OPT_n; break; + case 'P': op_flags |= OPT_P; break; + case 'p': op_flags |= OPT_p; selpatterns.push_back(optarg); break; + case 'r': op_flags |= OPT_r; break; + case 'R': op_flags |= OPT_R; reasonsfile = optarg; break; + case 's': op_flags |= OPT_s; break; #ifdef RCL_USE_ASPELL - case 'S': op_flags |= OPT_S; break; + case 'S': op_flags |= OPT_S; break; #endif - case 'w': op_flags |= OPT_w; if (aremain < 2) Usage(); - if ((sscanf(args[argidx+1].c_str(), "%d", &sleepsecs)) != 1) - Usage(); - argidx++; goto b1; - case 'x': op_flags |= OPT_x; break; - case 'Z': op_flags |= OPT_Z; break; - case 'z': op_flags |= OPT_z; break; - default: Usage(); break; - } + case 'w': op_flags |= OPT_w; + if ((sscanf(optarg, "%d", &sleepsecs)) != 1) + Usage(); + break; + case 'x': op_flags |= OPT_x; break; + case 'Z': op_flags |= OPT_Z; break; + case 'z': op_flags |= OPT_z; break; + + case OPTVAL_WEBCACHE_COMPACT: webcache_compact = true; break; + case OPTVAL_WEBCACHE_BURST: burstdir = optarg; webcache_burst = true;break; + + default: Usage(); break; } - b1: - ; } - aremain = args.size() - argidx; + int aremain = argc - optind; if (op_flags & OPT_h) Usage(); #ifndef RCL_MONITOR if (op_flags & (OPT_m | OPT_w|OPT_x)) { - cerr << "-m not available: real-time monitoring was not " + std::cerr << "-m not available: real-time monitoring was not " "configured in this build\n"; exit(1); } @@ -693,9 +700,28 @@ int main(int argc, char *argv[]) if (config == 0 || !config->ok()) { addIdxReason("init", reason); flushIdxReasons(); - cerr << "Configuration problem: " << reason << endl; + std::cerr << "Configuration problem: " << reason << endl; exit(1); } + + // Auxiliary, non-index-related things. Avoids having a separate binary. + if (webcache_compact || webcache_burst) { + std::string ccdir = config->getWebcacheDir(); + std::string reason; + if (webcache_compact) { + if (!CirCache::compact(ccdir, &reason)) { + std::cerr << "Web cache compact failed: " << reason << "\n"; + exit(1); + } + } else if (webcache_burst) { + if (!CirCache::burst(ccdir, burstdir, &reason)) { + std::cerr << "Web cache burst failed: " << reason << "\n"; + exit(1); + } + } + exit(0); + } + #ifndef _WIN32 o_reexec->atexit(cleanup); #endif @@ -789,7 +815,7 @@ int main(int argc, char *argv[]) if (op_flags & OPT_r) { if (aremain != 1) Usage(); - string top = args[argidx++]; aremain--; + string top = args[optind++]; aremain--; top = path_canon(top, &orig_cwd); bool status = recursive_index(config, top, selpatterns); if (confindexer && !confindexer->getReason().empty()) { @@ -810,7 +836,7 @@ int main(int argc, char *argv[]) } } else { while (aremain--) { - filenames.push_back(args[argidx++]); + filenames.push_back(args[optind++]); } } @@ -833,7 +859,7 @@ int main(int argc, char *argv[]) } else if (op_flags & OPT_s) { if (aremain != 1) Usage(); - string lang = args[argidx++]; aremain--; + string lang = args[optind++]; aremain--; exit(!createstemdb(config, lang)); #ifdef RCL_USE_ASPELL