diff --git a/src/doc/man/recollindex.1 b/src/doc/man/recollindex.1 index 1e9d618a..2340fd83 100644 --- a/src/doc/man/recollindex.1 +++ b/src/doc/man/recollindex.1 @@ -67,8 +67,18 @@ recollindex \- indexing command for the Recoll full text search system .B \-Z ] [ +.B \-K +] +[ +.B \-e +] +[ .B \-f ] +[ +.B \-p +pattern +] .br .B recollindex @@ -208,6 +218,14 @@ control system). will erase data for individual files from the database. The stem expansion databases will not be updated. .PP +Options +.B +\-i +and +.B +\-e +can be combined. This will first perform the purge, then the indexing. +.PP With options .B \-i or @@ -215,24 +233,30 @@ or , if no file names are given on the command line, they will be read from stdin, so that you could for example run: .PP -find /path/to/dir \-print | recollindex \-e -.PP -followed by -.PP -find /path/to/dir \-print | recollindex \-i +find /path/to/dir \-print | recollindex \-e \-i .PP to force the reindexing of a directory tree (which has to exist inside the file system area defined by .I topdirs in recoll.conf). You could mostly accomplish the same thing with .PP -.B find /path/to/dir \-print | recollindex \-f \-Z +find /path/to/dir \-print | recollindex \-Z \-i +.PP +The latter will perform a less thorough job of purging stale sub-documents +though. .PP .B recollindex \-r -mostly works like \-i, but the parameter is a single directory, which will +mostly works like +.B \-i +, but the parameter is a single directory, which will be recursively updated. This mostly does nothing more than .B find topdir | recollindex \-i -but it may be more convenient to use when started from another program. +but it may be more convenient to use when started from another +program. This retries failed files by default, use option +.B \-K +to change. One or multiple +.B \-p +options can be used to set shell-type selection patterns (e.g.: *.pdf). .PP .B recollindex \-l will list the names of available language stemmers. diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp index 083741d8..f3704e76 100644 --- a/src/index/recollindex.cpp +++ b/src/index/recollindex.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #ifndef _WIN32 #include #include @@ -78,7 +79,7 @@ static int op_flags; #define OPT_r 0x40000 #define OPT_k 0x80000 #define OPT_E 0x100000 - +#define OPT_K 0x200000 ReExec *o_reexec; // Globals for atexit cleanup @@ -195,27 +196,46 @@ void rclIxIonice(const RclConfig *config) class MakeListWalkerCB : public FsTreeWalkerCB { public: - MakeListWalkerCB(list& files) - : m_files(files) + MakeListWalkerCB(list& files, const vector& selpats) + : m_files(files), m_pats(selpats) { } virtual FsTreeWalker::Status processone(const string& fn, const struct stat *, FsTreeWalker::CbFlag flg) { - if (flg == FsTreeWalker::FtwDirEnter || flg == FsTreeWalker::FtwRegular) - m_files.push_back(fn); + if (flg== FsTreeWalker::FtwDirEnter || flg == FsTreeWalker::FtwRegular){ + if (m_pats.empty()) { + cerr << "Selecting " << fn << endl; + m_files.push_back(fn); + } else { + for (vector::const_iterator it = m_pats.begin(); + it != m_pats.end(); it++) { + if (fnmatch(it->c_str(), fn.c_str(), 0) == 0) { + m_files.push_back(fn); + break; + } + } + } + } return FsTreeWalker::FtwOk; } list& m_files; + const vector& m_pats; }; -// Build a list of things to index and call indexfiles. -bool recursive_index(RclConfig *config, const string& top) +// Build a list of things to index, then call purgefiles and/or +// indexfiles. This is basically the same as find xxx | recollindex +// -i [-e] without the find (so, simpler but less powerfull) +bool recursive_index(RclConfig *config, const string& top, + const vector& selpats) { list files; - MakeListWalkerCB cb(files); + MakeListWalkerCB cb(files, selpats); FsTreeWalker walker; walker.walk(top, cb); + if (op_flags & OPT_e) { + purgefiles(config, files); + } return indexfiles(config, files); } @@ -232,9 +252,13 @@ bool indexfiles(RclConfig *config, list &filenames) if (filenames.empty()) return true; makeIndexerOrExit(config, (op_flags & OPT_Z) != 0); - return confindexer->indexFiles(filenames, (op_flags&OPT_f) ? - ConfIndexer::IxFIgnoreSkip : - ConfIndexer::IxFNone); + // The default is to retry failed files + int indexerFlags = ConfIndexer::IxFNone; + if (op_flags & OPT_K) + indexerFlags |= ConfIndexer::IxFNoRetryFailed; + if (op_flags & OPT_f) + indexerFlags |= ConfIndexer::IxFIgnoreSkip; + return confindexer->indexFiles(filenames, indexerFlags); } // Delete a list of files. Same comments about call contexts as indexfiles. @@ -348,8 +372,11 @@ static const char usage [] = "recollindex -i [-f] [-Z] \n" " Index individual files. No database purge or stem database updates\n" " -f : ignore skippedPaths and skippedNames while doing this\n" -"recollindex -r [-f] [-Z] \n" -" Recursive partial reindex\n" +"recollindex -r [-K] [-f] [-Z] [-p pattern] \n" +" Recursive partial reindex. \n" +" -p : filter file names, multiple instances are allowed, e.g.: \n" +" -p *.odt -p *.pdf\n" +" -K : skip previously failed files (they are retried by default)\n" "recollindex -l\n" " List available stemming languages\n" "recollindex -s \n" @@ -398,6 +425,7 @@ int main(int argc, char **argv) { string a_config; int sleepsecs = 60; + vector selpatterns; // The reexec struct is used by the daemon to shed memory after // the initial indexing pass and to restart when the configuration @@ -430,9 +458,13 @@ int main(int argc, char **argv) case 'h': op_flags |= OPT_h; break; case 'i': op_flags |= OPT_i; break; case 'k': op_flags |= OPT_k; break; + case 'K': op_flags |= OPT_K; break; case 'l': op_flags |= OPT_l; break; case 'm': op_flags |= OPT_m; break; case 'n': op_flags |= OPT_n; break; + case 'p': if (argc < 2) Usage(); + selpatterns.push_back(*(++argv)); + argc--; goto b1; case 'r': op_flags |= OPT_r; break; case 's': op_flags |= OPT_s; break; #ifdef RCL_USE_ASPELL @@ -545,7 +577,15 @@ int main(int argc, char **argv) rclIxIonice(config); #endif - if (op_flags & (OPT_i|OPT_e)) { + if (op_flags & OPT_r) { + if (argc != 1) + Usage(); + string top = *argv++; argc--; + bool status = recursive_index(config, top, selpatterns); + if (confindexer && !confindexer->getReason().empty()) + cerr << confindexer->getReason() << endl; + exit(status ? 0 : 1); + } else if (op_flags & (OPT_i|OPT_e)) { lockorexit(&pidfile); list filenames; @@ -577,14 +617,6 @@ int main(int argc, char **argv) if (confindexer && !confindexer->getReason().empty()) cerr << confindexer->getReason() << endl; exit(status ? 0 : 1); - } else if (op_flags & OPT_r) { - if (argc != 1) - Usage(); - string top = *argv++; argc--; - bool status = recursive_index(config, top); - if (confindexer && !confindexer->getReason().empty()) - cerr << confindexer->getReason() << endl; - exit(status ? 0 : 1); } else if (op_flags & OPT_l) { if (argc != 0) Usage(); diff --git a/src/qtgui/rclm_idx.cpp b/src/qtgui/rclm_idx.cpp index 50cc8510..226c6f9e 100644 --- a/src/qtgui/rclm_idx.cpp +++ b/src/qtgui/rclm_idx.cpp @@ -26,6 +26,7 @@ #include "transcode.h" #include "indexer.h" #include "rclmain_w.h" +#include "specialindex.h" using namespace std; @@ -116,25 +117,25 @@ void RclMain::periodic100() m_indexerState = IXST_RUNNINGMINE; fileToggleIndexingAction->setText(tr("Stop &Indexing")); fileToggleIndexingAction->setEnabled(true); - fileRetryFailedAction->setEnabled(false); fileRebuildIndexAction->setEnabled(false); + actionSpecial_Indexing->setEnabled(false); periodictimer->setInterval(200); } else { Pidfile pidfile(theconfig->getPidfile()); if (pidfile.open() == 0) { m_indexerState = IXST_NOTRUNNING; fileToggleIndexingAction->setText(tr("Update &Index")); - fileRetryFailedAction->setEnabled(true); fileToggleIndexingAction->setEnabled(true); fileRebuildIndexAction->setEnabled(true); + actionSpecial_Indexing->setEnabled(true); periodictimer->setInterval(1000); } else { // Real time or externally started batch indexer running m_indexerState = IXST_RUNNINGNOTMINE; fileToggleIndexingAction->setText(tr("Stop &Indexing")); fileToggleIndexingAction->setEnabled(true); - fileRetryFailedAction->setEnabled(false); fileRebuildIndexAction->setEnabled(false); + actionSpecial_Indexing->setEnabled(false); periodictimer->setInterval(200); } } @@ -229,8 +230,6 @@ void RclMain::toggleIndexing() args.clear(); args.push_back("-c"); args.push_back(theconfig->getConfDir()); - if (fileRetryFailedAction->isChecked()) - args.push_back("-k"); m_idxproc = new ExecCmd; m_idxproc->startExec("recollindex", args, false, false); } @@ -249,6 +248,10 @@ void RclMain::rebuildIndex() return; //?? Should not have been called case IXST_NOTRUNNING: { + if (m_idxproc) { + LOGERR(("RclMain::rebuildIndex: current indexer exec not null\n")); + return; + } int rep = QMessageBox::warning(0, tr("Erasing index"), tr("Reset the index and start " @@ -274,6 +277,126 @@ void RclMain::rebuildIndex() } } +void SpecIdxW::onBrowsePB_clicked() +{ + QString dir = myGetFileName(true, tr("Top indexed entity"), true); + targLE->setText(dir); +} + +bool SpecIdxW::noRetryFailed() +{ + return noRetryFailedCB->isChecked(); +} + +bool SpecIdxW::eraseFirst() +{ + return eraseBeforeCB->isChecked(); +} + +std::vector SpecIdxW::selpatterns() +{ + vector pats; + string text = qs2utf8s(selPatsLE->text()); + if (!text.empty()) { + stringToStrings(text, pats); + } + return pats; +} + +std::string SpecIdxW::toptarg() +{ + return qs2utf8s(targLE->text()); +} + +void SpecIdxW::onTargLE_textChanged(const QString& text) +{ + if (text.isEmpty()) + selPatsLE->setEnabled(false); + else + selPatsLE->setEnabled(true); +} + +static string execToString(const string& cmd, const vector& args) +{ + string command = cmd + " "; + for (vector::const_iterator it = args.begin(); + it != args.end(); it++) { + command += "{" + *it + "} "; + } + return command; +} + +void RclMain::specialIndex() +{ + LOGDEB(("RclMain::specialIndex\n")); + switch (m_indexerState) { + case IXST_UNKNOWN: + case IXST_RUNNINGMINE: + case IXST_RUNNINGNOTMINE: + return; //?? Should not have been called + case IXST_NOTRUNNING: + default: + break; + } + if (m_idxproc) { + LOGERR(("RclMain::rebuildIndex: current indexer exec not null\n")); + return; + } + if (!specidx) // ?? + return; + + vector args; + args.push_back("-c"); + args.push_back(theconfig->getConfDir()); + + string top = specidx->toptarg(); + if (!top.empty()) { + args.push_back("-r"); + } + + if (specidx->eraseFirst()) { + if (top.empty()) { + args.push_back("-Z"); + } else { + args.push_back("-e"); + } + } + + // The default for retrying differ depending if -r is set + if (top.empty()) { + if (!specidx->noRetryFailed()) { + args.push_back("-k"); + } + } else { + if (specidx->noRetryFailed()) { + args.push_back("-K"); + } + } + + vector selpats = specidx->selpatterns(); + if (!selpats.empty() && top.empty()) { + QMessageBox::warning(0, tr("Selection patterns need topdir"), + tr("Selection patterns can only be used with a " + "start directory"), + QMessageBox::Ok, + QMessageBox::NoButton); + return; + } + + for (vector::const_iterator it = selpats.begin(); + it != selpats.end(); it++) { + args.push_back("-p"); + args.push_back(*it); + } + if (!top.empty()) { + args.push_back(top); + } + m_idxproc = new ExecCmd; + LOGINFO(("specialIndex: exec: %s\n", + execToString("recollindex", args).c_str())); + m_idxproc->startExec("recollindex", args, false, false); +} + void RclMain::updateIdxForDocs(vector& docs) { if (m_idxproc) { @@ -297,6 +420,6 @@ void RclMain::updateIdxForDocs(vector& docs) fileToggleIndexingAction->setText(tr("Stop &Indexing")); } fileToggleIndexingAction->setEnabled(false); - fileRetryFailedAction->setEnabled(false); + actionSpecial_Indexing->setEnabled(false); } diff --git a/src/qtgui/rclm_wins.cpp b/src/qtgui/rclm_wins.cpp index 98e68e27..05e79482 100644 --- a/src/qtgui/rclm_wins.cpp +++ b/src/qtgui/rclm_wins.cpp @@ -28,6 +28,7 @@ #include "rtitool.h" #include "snippets_w.h" #include "fragbuts.h" +#include "specialindex.h" #include "rclmain_w.h" using namespace std; @@ -94,6 +95,19 @@ void RclMain::showFragButs() } } +void RclMain::showSpecIdx() +{ + if (specidx == 0) { + specidx = new SpecIdxW(0); + connect(specidx, SIGNAL(accepted()), this, SLOT(specialIndex())); + specidx->show(); + } else { + // Close and reopen, in hope that makes us visible... + specidx->close(); + specidx->show(); + } +} + void RclMain::showIndexConfig() { showIndexConfig(false); diff --git a/src/qtgui/rclmain.ui b/src/qtgui/rclmain.ui index bb690cbc..6d89c117 100644 --- a/src/qtgui/rclmain.ui +++ b/src/qtgui/rclmain.ui @@ -73,8 +73,8 @@ &File - + @@ -497,6 +497,14 @@ Load saved query + + + Special Indexing + + + Indexing with special options + + diff --git a/src/qtgui/rclmain_w.cpp b/src/qtgui/rclmain_w.cpp index 8a266089..34ddcb79 100644 --- a/src/qtgui/rclmain_w.cpp +++ b/src/qtgui/rclmain_w.cpp @@ -327,6 +327,8 @@ void RclMain::init() this, SLOT(showSpellDialog())); connect(actionQuery_Fragments, SIGNAL(triggered()), this, SLOT(showFragButs())); + connect(actionSpecial_Indexing, SIGNAL(triggered()), + this, SLOT(showSpecIdx())); connect(indexConfigAction, SIGNAL(triggered()), this, SLOT(showIndexConfig())); connect(indexScheduleAction, SIGNAL(triggered()), diff --git a/src/qtgui/rclmain_w.h b/src/qtgui/rclmain_w.h index 599d07ba..3e989e73 100644 --- a/src/qtgui/rclmain_w.h +++ b/src/qtgui/rclmain_w.h @@ -40,6 +40,7 @@ class ResTable; class CronToolW; class RTIToolW; class FragButs; +class SpecIdxW; #include "ui_rclmain.h" @@ -69,6 +70,7 @@ public: rtiTool(0), spellform(0), fragbuts(0), + specidx(0), periodictimer(0), restable(0), displayingTable(0), @@ -114,11 +116,13 @@ public slots: virtual void periodic100(); virtual void toggleIndexing(); virtual void rebuildIndex(); + virtual void specialIndex(); virtual void startSearch(STD_SHARED_PTR sdata, bool issimple); virtual void previewClosed(Preview *w); virtual void showAdvSearchDialog(); virtual void showSpellDialog(); virtual void showFragButs(); + virtual void showSpecIdx(); virtual void showAboutDialog(); virtual void showMissingHelpers(); virtual void showActiveTypes(); @@ -191,6 +195,7 @@ private: RTIToolW *rtiTool; SpellW *spellform; FragButs *fragbuts; + SpecIdxW *specidx; QTimer *periodictimer; ResTable *restable; bool displayingTable; diff --git a/src/qtgui/specialindex.h b/src/qtgui/specialindex.h new file mode 100644 index 00000000..1ed775e3 --- /dev/null +++ b/src/qtgui/specialindex.h @@ -0,0 +1,53 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#ifndef _SPECIDX_W_H_INCLUDED_ +#define _SPECIDX_W_H_INCLUDED_ + +#include +#include + +#include "ui_specialindex.h" + +class QPushButton; + +class SpecIdxW : public QDialog, public Ui::SpecIdxW { + Q_OBJECT + +public: + + SpecIdxW(QWidget * parent = 0) + : QDialog(parent) + { + setupUi(this); + selPatsLE->setEnabled(false); + connect(browsePB, SIGNAL(clicked()), this, SLOT(onBrowsePB_clicked())); + connect(targLE, SIGNAL(textChanged(const QString&)), + this, SLOT(onTargLE_textChanged(const QString&))); + } + bool noRetryFailed(); + bool eraseFirst(); + std::vector selpatterns(); + std::string toptarg(); + +public slots: + + void onTargLE_textChanged(const QString&); + void onBrowsePB_clicked(); +}; + + +#endif /* _SPECIDX_W_H_INCLUDED_ */ diff --git a/src/qtgui/specialindex.ui b/src/qtgui/specialindex.ui new file mode 100644 index 00000000..8acfeb12 --- /dev/null +++ b/src/qtgui/specialindex.ui @@ -0,0 +1,148 @@ + + + SpecIdxW + + + Qt::WindowModal + + + + 0 + 0 + 413 + 191 + + + + Special Indexing + + + + + + + + Do not retry previously failed files. + + + + + + + Else only modified or failed files will be processed. + + + Erase selected files data before indexing. + + + + + + + + + + 8 + 0 + + + + + 300 + 0 + + + + Directory to recursively index + + + + + + + Browse + + + false + + + + + + + Start directory (else use regular topdirs): + + + false + + + + + + + + + + + Leave empty to select all files. You can use multiple space-separated shell-type patterns.<br>Patterns with embedded spaces should be quoted with double quotes.<br>Can only be used if the start target is set. + + + Selection patterns: + + + + + + + + + + + + + + Qt::Horizontal + + + QDialogButtonBox::Cancel|QDialogButtonBox::Ok + + + + + + + + + buttonBox + accepted() + SpecIdxW + accept() + + + 248 + 254 + + + 157 + 274 + + + + + buttonBox + rejected() + SpecIdxW + reject() + + + 316 + 260 + + + 286 + 274 + + + + +