This commit is contained in:
Jean-Francois Dockes 2020-07-15 10:47:18 +02:00
parent 3cf7fb3b65
commit 96ba5acd32
14 changed files with 505 additions and 533 deletions

View File

@ -1,18 +0,0 @@
PROGS = subtreelist mimetype
all: $(PROGS)
SUBTREELIST_OBJS= subtreelist.o
subtreelist : $(SUBTREELIST_OBJS)
$(CXX) $(ALL_CXXFLAGS) -o subtreelist $(SUBTREELIST_OBJS) \
$(LIBRECOLL)
subtreelist.o : subtreelist.cpp
$(CXX) $(ALL_CXXFLAGS) -DTEST_SUBTREELIST -c subtreelist.cpp
MIMETYPE_OBJS= trmimetype.o
mimetype : $(MIMETYPE_OBJS)
$(CXX) $(ALL_CXXFLAGS) -o mimetype $(MIMETYPE_OBJS) $(LIBRECOLL)
trmimetype.o : mimetype.cpp
$(CXX) $(ALL_CXXFLAGS) -DTEST_MIMETYPE -c -o trmimetype.o \
mimetype.cpp
include ../utils/utmkdefs.mk

View File

@ -23,7 +23,7 @@
// Current status of an indexing operation. This is updated in // Current status of an indexing operation. This is updated in
// $RECOLL_CONFDIR/idxstatus.txt // $RECOLL_CONFDIR/idxstatus.txt
class DbIxStatus { class DbIxStatus {
public: public:
enum Phase {DBIXS_NONE, enum Phase {DBIXS_NONE,
DBIXS_FILES, DBIXS_PURGE, DBIXS_STEMDB, DBIXS_CLOSING, DBIXS_FILES, DBIXS_PURGE, DBIXS_STEMDB, DBIXS_CLOSING,
DBIXS_MONITOR, DBIXS_MONITOR,

View File

@ -432,4 +432,3 @@ vector<string> ConfIndexer::getStemmerNames()
{ {
return Rcl::Db::getStemmerNames(); return Rcl::Db::getStemmerNames();
} }

View File

@ -34,7 +34,7 @@ class WebQueueIndexer;
/** Callback to say what we're doing. If the update func returns false, we /** Callback to say what we're doing. If the update func returns false, we
* stop as soon as possible without corrupting state */ * stop as soon as possible without corrupting state */
class DbIxStatusUpdater { class DbIxStatusUpdater {
public: public:
#ifdef IDX_THREADS #ifdef IDX_THREADS
std::mutex m_mutex; std::mutex m_mutex;
#endif #endif
@ -62,7 +62,7 @@ class DbIxStatusUpdater {
* database(s). * database(s).
*/ */
class ConfIndexer { class ConfIndexer {
public: public:
enum runStatus {IndexerOk, IndexerError}; enum runStatus {IndexerOk, IndexerError};
ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0); ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0);
virtual ~ConfIndexer(); virtual ~ConfIndexer();
@ -113,7 +113,7 @@ class ConfIndexer {
/** Set in place reset mode */ /** Set in place reset mode */
void setInPlaceReset() {m_db.setInPlaceReset();} void setInPlaceReset() {m_db.setInPlaceReset();}
private: private:
RclConfig *m_config; RclConfig *m_config;
Rcl::Db m_db; Rcl::Db m_db;
FsIndexer *m_fsindexer; FsIndexer *m_fsindexer;

View File

@ -45,7 +45,7 @@ using std::multimap;
* Monitoring event: something changed in the filesystem * Monitoring event: something changed in the filesystem
*/ */
class RclMonEvent { class RclMonEvent {
public: public:
enum EvType {RCLEVT_NONE= 0, RCLEVT_MODIFY=1, RCLEVT_DELETE=2, enum EvType {RCLEVT_NONE= 0, RCLEVT_MODIFY=1, RCLEVT_DELETE=2,
RCLEVT_DIRCREATE=3, RCLEVT_ISDIR=0x10}; RCLEVT_DIRCREATE=3, RCLEVT_ISDIR=0x10};
string m_path; string m_path;
@ -76,7 +76,7 @@ enum RclMonitorOption {RCLMON_NONE=0, RCLMON_NOFORK=1, RCLMON_NOX11=2,
*/ */
class RclEQData; class RclEQData;
class RclMonEventQueue { class RclMonEventQueue {
public: public:
RclMonEventQueue(); RclMonEventQueue();
~RclMonEventQueue(); ~RclMonEventQueue();
/** Wait for event or timeout. Returns with the queue locked */ /** Wait for event or timeout. Returns with the queue locked */
@ -94,7 +94,7 @@ class RclMonEventQueue {
void setConfig(RclConfig *conf); void setConfig(RclConfig *conf);
RclConfig *getConfig(); RclConfig *getConfig();
private: private:
RclEQData *m_data; RclEQData *m_data;
}; };

View File

@ -427,80 +427,80 @@ static bool checktopdirs(RclConfig *config, vector<string>& nonexist)
string thisprog; string thisprog;
static const char usage [] = static const char usage [] =
"\n" "\n"
"recollindex [-h] \n" "recollindex [-h] \n"
" Print help\n" " Print help\n"
"recollindex [-z|-Z] [-k]\n" "recollindex [-z|-Z] [-k]\n"
" Index everything according to configuration file\n" " Index everything according to configuration file\n"
" -z : reset database before starting indexing\n" " -z : reset database before starting indexing\n"
" -Z : in place reset: consider all documents as changed. Can also\n" " -Z : in place reset: consider all documents as changed. Can also\n"
" be combined with -i or -r but not -m\n" " be combined with -i or -r but not -m\n"
" -k : retry files on which we previously failed\n" " -k : retry files on which we previously failed\n"
#ifdef RCL_MONITOR #ifdef RCL_MONITOR
"recollindex -m [-w <secs>] -x [-D] [-C]\n" "recollindex -m [-w <secs>] -x [-D] [-C]\n"
" Perform real time indexing. Don't become a daemon if -D is set.\n" " Perform real time indexing. Don't become a daemon if -D is set.\n"
" -w sets number of seconds to wait before starting.\n" " -w sets number of seconds to wait before starting.\n"
" -C disables monitoring config for changes/reexecuting.\n" " -C disables monitoring config for changes/reexecuting.\n"
" -n disables initial incremental indexing (!and purge!).\n" " -n disables initial incremental indexing (!and purge!).\n"
#ifndef DISABLE_X11MON #ifndef DISABLE_X11MON
" -x disables exit on end of x11 session\n" " -x disables exit on end of x11 session\n"
#endif /* DISABLE_X11MON */ #endif /* DISABLE_X11MON */
#endif /* RCL_MONITOR */ #endif /* RCL_MONITOR */
"recollindex -e [<filepath [path ...]>]\n" "recollindex -e [<filepath [path ...]>]\n"
" Purge data for individual files. No stem database updates.\n" " Purge data for individual files. No stem database updates.\n"
" Reads paths on stdin if none is given as argument.\n" " Reads paths on stdin if none is given as argument.\n"
"recollindex -i [-f] [-Z] [<filepath [path ...]>]\n" "recollindex -i [-f] [-Z] [<filepath [path ...]>]\n"
" Index individual files. No database purge or stem database updates\n" " Index individual files. No database purge or stem database updates\n"
" Will read paths on stdin if none is given as argument\n" " Will read paths on stdin if none is given as argument\n"
" -f : ignore skippedPaths and skippedNames while doing this\n" " -f : ignore skippedPaths and skippedNames while doing this\n"
"recollindex -r [-K] [-f] [-Z] [-p pattern] <top> \n" "recollindex -r [-K] [-f] [-Z] [-p pattern] <top> \n"
" Recursive partial reindex. \n" " Recursive partial reindex. \n"
" -p : filter file names, multiple instances are allowed, e.g.: \n" " -p : filter file names, multiple instances are allowed, e.g.: \n"
" -p *.odt -p *.pdf\n" " -p *.odt -p *.pdf\n"
" -K : skip previously failed files (they are retried by default)\n" " -K : skip previously failed files (they are retried by default)\n"
"recollindex -l\n" "recollindex -l\n"
" List available stemming languages\n" " List available stemming languages\n"
"recollindex -s <lang>\n" "recollindex -s <lang>\n"
" Build stem database for additional language <lang>\n" " Build stem database for additional language <lang>\n"
"recollindex -E\n" "recollindex -E\n"
" Check configuration file for topdirs and other paths existence\n" " Check configuration file for topdirs and other paths existence\n"
#ifdef FUTURE_IMPROVEMENT #ifdef FUTURE_IMPROVEMENT
"recollindex -W\n" "recollindex -W\n"
" Process the Web queue\n" " Process the Web queue\n"
#endif #endif
#ifdef RCL_USE_ASPELL #ifdef RCL_USE_ASPELL
"recollindex -S\n" "recollindex -S\n"
" Build aspell spelling dictionary.>\n" " Build aspell spelling dictionary.>\n"
#endif #endif
"Common options:\n" "Common options:\n"
" -c <configdir> : specify config directory, overriding $RECOLL_CONFDIR\n" " -c <configdir> : specify config directory, overriding $RECOLL_CONFDIR\n"
; ;
static void Usage() static void Usage()
{ {
FILE *fp = (op_flags & OPT_h) ? stdout : stderr; FILE *fp = (op_flags & OPT_h) ? stdout : stderr;
fprintf(fp, "%s: Usage: %s", path_getsimple(thisprog).c_str(), usage); fprintf(fp, "%s: Usage: %s", path_getsimple(thisprog).c_str(), usage);
fprintf(fp, "Recoll version: %s\n", Rcl::version_string().c_str()); fprintf(fp, "Recoll version: %s\n", Rcl::version_string().c_str());
exit((op_flags & OPT_h)==0); exit((op_flags & OPT_h)==0);
} }
static RclConfig *config; static RclConfig *config;
static void lockorexit(Pidfile *pidfile, RclConfig *config) static void lockorexit(Pidfile *pidfile, RclConfig *config)
{ {
PRETEND_USE(config); PRETEND_USE(config);
pid_t pid; pid_t pid;
if ((pid = pidfile->open()) != 0) { if ((pid = pidfile->open()) != 0) {
if (pid > 0) { if (pid > 0) {
cerr << "Can't become exclusive indexer: " << pidfile->getreason() cerr << "Can't become exclusive indexer: " << pidfile->getreason()
<< ". Return (other pid?): " << pid << endl; << ". Return (other pid?): " << pid << endl;
#ifndef _WIN32 #ifndef _WIN32
// Have a look at the status file. If the other process is // Have a look at the status file. If the other process is
// a monitor we can tell it to start an incremental pass // a monitor we can tell it to start an incremental pass
// by touching the configuration file // by touching the configuration file
DbIxStatus status; DbIxStatus status;
readIdxStatus(config, status); readIdxStatus(config, status);
if (status.hasmonitor) { if (status.hasmonitor) {
string cmd("touch "); string cmd("touch ");
string path = path_cat(config->getConfDir(), "recoll.conf"); string path = path_cat(config->getConfDir(), "recoll.conf");
cmd += path; cmd += path;
@ -511,19 +511,19 @@ static void lockorexit(Pidfile *pidfile, RclConfig *config)
cerr << "Monitoring indexer process was notified of " cerr << "Monitoring indexer process was notified of "
"indexing request\n"; "indexing request\n";
} }
} }
#endif #endif
} else { } else {
cerr << "Can't become exclusive indexer: " << pidfile->getreason() cerr << "Can't become exclusive indexer: " << pidfile->getreason()
<< endl; << endl;
} }
exit(1); exit(1);
} }
if (pidfile->write_pid() != 0) { if (pidfile->write_pid() != 0) {
cerr << "Can't become exclusive indexer: " << pidfile->getreason() << cerr << "Can't become exclusive indexer: " << pidfile->getreason() <<
endl; endl;
exit(1); exit(1);
} }
} }
static string reasonsfile; static string reasonsfile;
@ -559,7 +559,7 @@ static void flushIdxReasons()
static vector<string> argstovector(int argc, wchar_t **argv) static vector<string> argstovector(int argc, wchar_t **argv)
#else #else
#define WARGTOSTRING(w) (w) #define WARGTOSTRING(w) (w)
static vector<string> argstovector(int argc, char **argv) static vector<string> argstovector(int argc, char **argv)
#endif #endif
{ {
thisprog = path_absolute(WARGTOSTRING(argv[0])); thisprog = path_absolute(WARGTOSTRING(argv[0]));
@ -599,7 +599,7 @@ static std::string orig_cwd;
#if USE_WMAIN #if USE_WMAIN
int wmain(int argc, wchar_t *argv[]) int wmain(int argc, wchar_t *argv[])
#else #else
int main(int argc, char *argv[]) int main(int argc, char *argv[])
#endif #endif
{ {
// The reexec struct is used by the daemon to shed memory after // The reexec struct is used by the daemon to shed memory after

View File

@ -15,7 +15,6 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/ */
#ifndef TEST_SUBTREELIST
#include "autoconfig.h" #include "autoconfig.h"
#include <memory> #include <memory>
@ -33,7 +32,8 @@ bool subtreelist(RclConfig *config, const string& top,
LOGDEB("subtreelist: top: [" << (top) << "]\n" ); LOGDEB("subtreelist: top: [" << (top) << "]\n" );
Rcl::Db rcldb(config); Rcl::Db rcldb(config);
if (!rcldb.open(Rcl::Db::DbRO)) { if (!rcldb.open(Rcl::Db::DbRO)) {
LOGERR("subtreelist: can't open database in [" << (config->getDbDir()) << "]: " << (rcldb.getReason()) << "\n" ); LOGERR("subtreelist: can't open database in [" << config->getDbDir() <<
"]: " << rcldb.getReason() << "\n");
return false; return false;
} }
@ -56,76 +56,3 @@ bool subtreelist(RclConfig *config, const string& top,
} }
return true; return true;
} }
#else // TEST
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <iostream>
#include <vector>
#include <string>
using namespace std;
#include "subtreelist.h"
#include "rclconfig.h"
#include "rclinit.h"
static char *thisprog;
static char usage [] =
" <path> : list document paths in this tree\n"
;
static void
Usage(void)
{
cerr << thisprog << ": usage:" << endl << usage;
exit(1);
}
static int op_flags;
#define OPT_o 0x2
int main(int argc, char **argv)
{
string top;
thisprog = argv[0];
argc--; argv++;
while (argc > 0 && **argv == '-') {
(*argv)++;
if (!(**argv))
/* Cas du "adb - core" */
Usage();
while (**argv)
switch (*(*argv)++) {
default: Usage(); break;
}
argc--; argv++;
}
if (argc < 1)
Usage();
top = *argv++;argc--;
string reason;
RclConfig *config = recollinit(0, 0, 0, reason, 0);
if (!config || !config->ok()) {
fprintf(stderr, "Recoll init failed: %s\n", reason.c_str());
exit(1);
}
vector<string> paths;
if (!subtreelist(config, top, paths)) {
cerr << "subtreelist failed" << endl;
exit(1);
}
for (vector<string>::const_iterator it = paths.begin();
it != paths.end(); it++) {
cout << *it << endl;
}
exit(0);
}
#endif

View File

@ -35,7 +35,7 @@ class CirCache;
class RclConfig; class RclConfig;
class WebStore; class WebStore;
namespace Rcl { namespace Rcl {
class Db; class Db;
} }
class WebQueueIndexer : public FsTreeWalkerCB { class WebQueueIndexer : public FsTreeWalkerCB {

View File

@ -0,0 +1,64 @@
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <iostream>
#include <vector>
#include <string>
using namespace std;
#include "subtreelist.h"
#include "rclconfig.h"
#include "rclinit.h"
static char *thisprog;
static char usage [] = " <path> : list document paths in this tree\n";
static void Usage(void)
{
std::cerr << thisprog << ": usage:" << endl << usage;
exit(1);
}
static int op_flags;
#define OPT_o 0x2
int main(int argc, char **argv)
{
string top;
thisprog = argv[0];
argc--; argv++;
while (argc > 0 && **argv == '-') {
(*argv)++;
if (!(**argv))
/* Cas du "adb - core" */
Usage();
while (**argv)
switch (*(*argv)++) {
default: Usage(); break;
}
argc--; argv++;
}
if (argc < 1)
Usage();
top = *argv++;argc--;
string reason;
RclConfig *config = recollinit(0, 0, 0, reason, 0);
if (!config || !config->ok()) {
fprintf(stderr, "Recoll init failed: %s\n", reason.c_str());
exit(1);
}
vector<string> paths;
if (!subtreelist(config, top, paths)) {
cerr << "subtreelist failed" << endl;
exit(1);
}
for (vector<string>::const_iterator it = paths.begin();
it != paths.end(); it++) {
cout << *it << endl;
}
exit(0);
}