From 90dab89303e8a42366cbc16b31f2612f398c687e Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Wed, 15 Nov 2017 11:02:21 +0100 Subject: [PATCH] Added option -P to recollindex to force purge pass with -i --- src/doc/man/recollindex.1 | 38 ++++++++---------------- src/index/indexer.cpp | 3 ++ src/index/indexer.h | 3 ++ src/index/recollindex.cpp | 61 ++++++++++++++++++++++----------------- 4 files changed, 53 insertions(+), 52 deletions(-) diff --git a/src/doc/man/recollindex.1 b/src/doc/man/recollindex.1 index 2340fd83..5b668cdb 100644 --- a/src/doc/man/recollindex.1 +++ b/src/doc/man/recollindex.1 @@ -40,40 +40,25 @@ recollindex \- indexing command for the Recoll full text search system .B \-n|-k ] .br -.B recollindex +.B recollindex [ .B \-c ] -.B \-i +.B \-i [ -.B \-Z -] -[ -.B \-k -] -[ -.B \-f +.B \-Z \-k \-f \-P ] [] .br .B recollindex [ .B \-c - + ] .B \-r [ -.B \-Z -] -[ -.B \-K -] -[ -.B \-e -] -[ -.B \-f +.B \-Z \-K \-e \-f ] [ .B \-p @@ -121,8 +106,7 @@ pattern .SH DESCRIPTION The .B recollindex -utility allows you to perform indexing operations for the Recoll text -search system. +command is the Recoll indexer. .PP As indexing can sometimes take a long time, the command can be interrupted by sending an interrupt (Ctrl-C, SIGINT) or terminate (SIGTERM) @@ -207,12 +191,14 @@ configuration variables will be used, so that some files may be skipped. You can tell recollindex to ignore skippedPaths and skippedNames by setting the .B -\-f -option. This allows fully custom file selection for a given subtree, +\-f option. This allows fully custom file selection for a given subtree, for which you would add the top directory to skippedPaths, and use any custom tool to generate the file list (ie: a tool from a source code -control system). -.PP +control system). When run this way, the indexer normally does not perform +the deleted files purge pass, because it cannot be sure to have seen all +the existing files. You can force a purge pass with +.B +\-P. .PP .B recollindex \-e will erase data for individual files from the database. The stem expansion diff --git a/src/index/indexer.cpp b/src/index/indexer.cpp index a5b1e1c8..68134a17 100644 --- a/src/index/indexer.cpp +++ b/src/index/indexer.cpp @@ -211,6 +211,9 @@ bool ConfIndexer::indexFiles(list& ifiles, int flag) } } #endif + if (flag & IxFDoPurge) { + m_db.purge(); + } // The close would be done in our destructor, but we want status here if (!m_db.close()) { LOGERR("ConfIndexer::index: error closing database in " << diff --git a/src/index/indexer.h b/src/index/indexer.h index fb51206b..563010f9 100644 --- a/src/index/indexer.h +++ b/src/index/indexer.h @@ -107,6 +107,9 @@ class ConfIndexer { IxFQuickShallow = 4, // Do not retry files which previously failed ('+' sigs) IxFNoRetryFailed = 8, + // Do perform purge pass even if we can't be sure we saw + // all files + IxFDoPurge = 16, }; /** Run indexers */ diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp index 05757315..2bb0542d 100644 --- a/src/index/recollindex.cpp +++ b/src/index/recollindex.cpp @@ -61,28 +61,31 @@ using namespace std; // Command line options static int op_flags; #define OPT_MOINS 0x1 -#define OPT_C 0x2 -#define OPT_D 0x4 -#define OPT_E 0x8 -#define OPT_K 0x10 -#define OPT_R 0x20 -#define OPT_S 0x40 -#define OPT_Z 0x80 -#define OPT_b 0x100 -#define OPT_c 0x200 -#define OPT_e 0x400 -#define OPT_f 0x800 -#define OPT_h 0x1000 -#define OPT_i 0x2000 -#define OPT_k 0x4000 -#define OPT_l 0x8000 -#define OPT_m 0x10000 -#define OPT_n 0x20000 -#define OPT_r 0x40000 -#define OPT_s 0x80000 -#define OPT_w 0x100000 -#define OPT_x 0x200000 -#define OPT_z 0x400000 +#define OPT_C 0x1 +#define OPT_D 0x2 +#define OPT_E 0x4 +#define OPT_K 0x8 +#define OPT_P 0x10 +#define OPT_R 0x20 +#define OPT_S 0x40 +#define OPT_Z 0x80 +#define OPT_b 0x100 +#define OPT_c 0x200 +#define OPT_e 0x400 +#define OPT_f 0x800 +#define OPT_h 0x1000 +#define OPT_i 0x2000 +#define OPT_k 0x4000 +#define OPT_l 0x8000 +#define OPT_m 0x10000 +#define OPT_n 0x20000 +#define OPT_p 0x40000 +#define OPT_r 0x80000 +#define OPT_s 0x100000 +#define OPT_w 0x200000 +#define OPT_x 0x400000 +#define OPT_z 0x800000 + ReExec *o_reexec; // Globals for atexit cleanup @@ -261,6 +264,9 @@ bool indexfiles(RclConfig *config, list &filenames) indexerFlags |= ConfIndexer::IxFNoRetryFailed; if (op_flags & OPT_f) indexerFlags |= ConfIndexer::IxFIgnoreSkip; + if (op_flags & OPT_P) { + indexerFlags |= ConfIndexer::IxFDoPurge; + } return confindexer->indexFiles(filenames, indexerFlags); } @@ -351,10 +357,12 @@ static const char usage [] = " -x disables exit on end of x11 session\n" #endif /* DISABLE_X11MON */ #endif /* RCL_MONITOR */ -"recollindex -e \n" -" Purge data for individual files. No stem database updates\n" -"recollindex -i [-f] [-Z] \n" +"recollindex -e []\n" +" Purge data for individual files. No stem database updates.\n" +" Reads paths on stdin if none is given as argument.\n" +"recollindex -i [-f] [-Z] []\n" " Index individual files. No database purge or stem database updates\n" +" Will read paths on stdin if none is given as argument\n" " -f : ignore skippedPaths and skippedNames while doing this\n" "recollindex -r [-K] [-f] [-Z] [-p pattern] \n" " Recursive partial reindex. \n" @@ -468,7 +476,8 @@ int main(int argc, char **argv) case 'l': op_flags |= OPT_l; break; case 'm': op_flags |= OPT_m; break; case 'n': op_flags |= OPT_n; break; - case 'p': if (argc < 2) Usage(); + case 'P': op_flags |= OPT_P; break; + case 'p': op_flags |= OPT_p; if (argc < 2) Usage(); selpatterns.push_back(*(++argv)); argc--; goto b1; case 'r': op_flags |= OPT_r; break;