arrange so that a default first indexing run for a given user runs a quick shallow pass, so that queries return some results quickly after indexing starts, avoiding user frustration
This commit is contained in:
parent
719f37ded7
commit
f05cae7344
@ -178,7 +178,7 @@ bool FsIndexer::init()
|
||||
}
|
||||
|
||||
// Recursively index each directory in the topdirs:
|
||||
bool FsIndexer::index()
|
||||
bool FsIndexer::index(bool quickshallow)
|
||||
{
|
||||
Chrono chron;
|
||||
if (!init())
|
||||
@ -193,6 +193,11 @@ bool FsIndexer::index()
|
||||
}
|
||||
|
||||
m_walker.setSkippedPaths(m_config->getSkippedPaths());
|
||||
if (quickshallow) {
|
||||
m_walker.setOpts(m_walker.getOpts() | FsTreeWalker::FtwSkipDotFiles);
|
||||
m_walker.setMaxDepth(2);
|
||||
}
|
||||
|
||||
for (vector<string>::const_iterator it = m_tdl.begin();
|
||||
it != m_tdl.end(); it++) {
|
||||
LOGDEB(("FsIndexer::index: Indexing %s into %s\n", it->c_str(),
|
||||
@ -204,11 +209,13 @@ bool FsIndexer::index()
|
||||
|
||||
// Adjust the "follow symlinks" option
|
||||
bool follow;
|
||||
int opts = m_walker.getOpts();
|
||||
if (m_config->getConfParam("followLinks", &follow) && follow) {
|
||||
m_walker.setOpts(FsTreeWalker::FtwFollow);
|
||||
opts |= FsTreeWalker::FtwFollow;
|
||||
} else {
|
||||
m_walker.setOpts(FsTreeWalker::FtwOptNone);
|
||||
opts &= ~FsTreeWalker::FtwFollow;
|
||||
}
|
||||
m_walker.setOpts(opts);
|
||||
|
||||
int abslen;
|
||||
if (m_config->getConfParam("idxabsmlen", &abslen))
|
||||
|
||||
@ -60,7 +60,7 @@ class FsIndexer : public FsTreeWalkerCB {
|
||||
* We open the database,
|
||||
* then call a file system walk for each top-level directory.
|
||||
*/
|
||||
bool index();
|
||||
bool index(bool quickshallow = 0);
|
||||
|
||||
/** Index a list of files. No db cleaning or stemdb updating */
|
||||
bool indexFiles(std::list<std::string> &files, ConfIndexer::IxFlag f =
|
||||
|
||||
@ -50,6 +50,46 @@ ConfIndexer::~ConfIndexer()
|
||||
deleteZ(m_beagler);
|
||||
}
|
||||
|
||||
// Determine if this is likely the first time that the user runs
|
||||
// indexing. We don't look at the xapiandb as this may have been
|
||||
// explicitely removed for valid reasons, but at the indexing status
|
||||
// file, which should be unexistant-or-empty only before any indexing
|
||||
// has ever run
|
||||
bool ConfIndexer::runFirstIndexing()
|
||||
{
|
||||
// Indexing status file existing and not empty ?
|
||||
struct stat st;
|
||||
if (stat(m_config->getIdxStatusFile().c_str(), &st) == 0 &&
|
||||
st.st_size > 0) {
|
||||
LOGDEB0(("ConfIndexer::runFirstIndexing: no: status file not empty\n"));
|
||||
exit(1);
|
||||
return false;
|
||||
}
|
||||
// And only do this if the user has kept the default topdirs (~).
|
||||
vector<string>tdl = m_config->getTopdirs();
|
||||
if (tdl.size() != 1 || tdl[0].compare(path_canon(path_tildexpand("~")))) {
|
||||
LOGDEB0(("ConfIndexer::runFirstIndexing: no: not home only\n"));
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ConfIndexer::firstFsIndexingSequence()
|
||||
{
|
||||
LOGDEB(("ConfIndexer::firstFsIndexingSequence\n"));
|
||||
deleteZ(m_fsindexer);
|
||||
m_fsindexer = new FsIndexer(m_config, &m_db, m_updater);
|
||||
if (!m_fsindexer) {
|
||||
return false;
|
||||
}
|
||||
int flushmb = m_db.getFlushMb();
|
||||
m_db.setFlushMb(2);
|
||||
m_fsindexer->index(true);
|
||||
m_db.doFlush();
|
||||
m_db.setFlushMb(flushmb);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ConfIndexer::index(bool resetbefore, ixType typestorun)
|
||||
{
|
||||
Rcl::Db::OpenMode mode = resetbefore ? Rcl::Db::DbTrunc : Rcl::Db::DbUpd;
|
||||
@ -61,6 +101,9 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun)
|
||||
|
||||
m_config->setKeyDir(cstr_null);
|
||||
if (typestorun & IxTFs) {
|
||||
if (runFirstIndexing()) {
|
||||
firstFsIndexingSequence();
|
||||
}
|
||||
deleteZ(m_fsindexer);
|
||||
m_fsindexer = new FsIndexer(m_config, &m_db, m_updater);
|
||||
if (!m_fsindexer || !m_fsindexer->index()) {
|
||||
|
||||
@ -138,6 +138,13 @@ class ConfIndexer {
|
||||
BeagleQueueIndexer *m_beagler;
|
||||
DbIxStatusUpdater *m_updater;
|
||||
string m_reason;
|
||||
|
||||
// The first time we index, we do things a bit differently to
|
||||
// avoid user frustration (make at least some results available
|
||||
// fast by using several passes, the first ones to index common
|
||||
// interesting locations).
|
||||
bool runFirstIndexing();
|
||||
bool firstFsIndexingSequence();
|
||||
};
|
||||
|
||||
#endif /* _INDEXER_H_INCLUDED_ */
|
||||
|
||||
@ -1266,20 +1266,30 @@ bool Db::maybeflush(off_t moretext)
|
||||
if ((m_curtxtsz - m_flushtxtsz) / MB >= m_flushMb) {
|
||||
LOGDEB(("Db::add/delete: txt size >= %d Mb, flushing\n",
|
||||
m_flushMb));
|
||||
string ermsg;
|
||||
try {
|
||||
m_ndb->xwdb.flush();
|
||||
} XCATCHERROR(ermsg);
|
||||
if (!ermsg.empty()) {
|
||||
LOGERR(("Db::add: flush() failed: %s\n", ermsg.c_str()));
|
||||
return false;
|
||||
}
|
||||
m_flushtxtsz = m_curtxtsz;
|
||||
return doFlush();
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Db::doFlush()
|
||||
{
|
||||
if (!m_ndb) {
|
||||
LOGERR(("Db::doFLush: no ndb??\n"));
|
||||
return false;
|
||||
}
|
||||
string ermsg;
|
||||
try {
|
||||
m_ndb->xwdb.flush();
|
||||
} XCATCHERROR(ermsg);
|
||||
if (!ermsg.empty()) {
|
||||
LOGERR(("Db::doFlush: flush() failed: %s\n", ermsg.c_str()));
|
||||
return false;
|
||||
}
|
||||
m_flushtxtsz = m_curtxtsz;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Test if doc given by udi has changed since last indexed (test sigs)
|
||||
bool Db::needUpdate(const string &udi, const string& sig)
|
||||
{
|
||||
|
||||
@ -377,6 +377,19 @@ class Db {
|
||||
*/
|
||||
static void setInPlaceReset() {o_inPlaceReset = true;}
|
||||
|
||||
/** Flush interval get/set. This is used by the first indexing
|
||||
pass to override the config value and flush more rapidly
|
||||
initially so that the user can quickly play with queries */
|
||||
int getFlushMb()
|
||||
{
|
||||
return m_flushMb;
|
||||
}
|
||||
void setFlushMb(int mb)
|
||||
{
|
||||
m_flushMb = mb;
|
||||
}
|
||||
bool doFlush();
|
||||
|
||||
/* This has to be public for access by embedded Query::Native */
|
||||
Native *m_ndb;
|
||||
private:
|
||||
|
||||
@ -20,6 +20,7 @@
|
||||
|
||||
#ifndef TEST_FSTREEWALK
|
||||
|
||||
#include <stdio.h>
|
||||
#include <dirent.h>
|
||||
#include <sys/stat.h>
|
||||
#include <errno.h>
|
||||
@ -58,8 +59,15 @@ public:
|
||||
};
|
||||
|
||||
class FsTreeWalker::Internal {
|
||||
public:
|
||||
Internal(int opts)
|
||||
: options(opts), depthswitch(4), maxdepth(-1), errors(0)
|
||||
{
|
||||
}
|
||||
int options;
|
||||
int depthswitch;
|
||||
int maxdepth;
|
||||
int basedepth;
|
||||
stringstream reason;
|
||||
vector<string> skippedNames;
|
||||
vector<string> skippedPaths;
|
||||
@ -74,17 +82,11 @@ class FsTreeWalker::Internal {
|
||||
reason << call << "(" << param << ") : " << errno << " : " <<
|
||||
strerror(errno) << endl;
|
||||
}
|
||||
friend class FsTreeWalker;
|
||||
};
|
||||
|
||||
FsTreeWalker::FsTreeWalker(int opts)
|
||||
{
|
||||
data = new Internal;
|
||||
if (data) {
|
||||
data->options = opts;
|
||||
data->depthswitch = 4;
|
||||
data->errors = 0;
|
||||
}
|
||||
data = new Internal(opts);
|
||||
}
|
||||
|
||||
FsTreeWalker::~FsTreeWalker()
|
||||
@ -92,11 +94,30 @@ FsTreeWalker::~FsTreeWalker()
|
||||
delete data;
|
||||
}
|
||||
|
||||
void FsTreeWalker::setOpts(Options opts, int depthswitch)
|
||||
void FsTreeWalker::setOpts(int opts)
|
||||
{
|
||||
if (data) {
|
||||
data->options = opts;
|
||||
data->depthswitch = depthswitch;
|
||||
}
|
||||
}
|
||||
int FsTreeWalker::getOpts()
|
||||
{
|
||||
if (data) {
|
||||
return data->options;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
void FsTreeWalker::setDepthSwitch(int ds)
|
||||
{
|
||||
if (data) {
|
||||
data->depthswitch = ds;
|
||||
}
|
||||
}
|
||||
void FsTreeWalker::setMaxDepth(int md)
|
||||
{
|
||||
if (data) {
|
||||
data->maxdepth = md;
|
||||
}
|
||||
}
|
||||
|
||||
@ -198,8 +219,7 @@ FsTreeWalker::Status FsTreeWalker::walk(const string& _top,
|
||||
data->options |= FtwTravNatural;
|
||||
}
|
||||
|
||||
int basedepth = slashcount(top); // Only used for breadthThenDepth
|
||||
|
||||
data->basedepth = slashcount(top); // Only used for breadthxx
|
||||
struct stat st;
|
||||
// We always follow symlinks at this point. Makes more sense.
|
||||
if (stat(top.c_str(), &st) == -1) {
|
||||
@ -240,7 +260,7 @@ FsTreeWalker::Status FsTreeWalker::walk(const string& _top,
|
||||
if (data->options & FtwTravBreadthThenDepth) {
|
||||
// Check if new depth warrants switch to depth first
|
||||
// traversal (will happen on next loop iteration).
|
||||
int curdepth = slashcount(dir) - basedepth;
|
||||
int curdepth = slashcount(dir) - data->basedepth;
|
||||
if (curdepth >= data->depthswitch) {
|
||||
//fprintf(stderr, "SWITCHING TO DEPTH FIRST\n");
|
||||
data->options &= ~FtwTravMask;
|
||||
@ -309,6 +329,13 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
int curdepth = slashcount(top) - data->basedepth;
|
||||
if (data->maxdepth >= 0 && curdepth >= data->maxdepth) {
|
||||
LOGDEB1(("FsTreeWalker::iwalk: Maxdepth reached: [%s]\n", top.c_str()));
|
||||
return status;
|
||||
}
|
||||
|
||||
// This is a directory, read it and process entries:
|
||||
|
||||
// Detect if directory already seen. This could just be several
|
||||
@ -345,6 +372,9 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
|
||||
while ((ent = readdir(d)) != 0) {
|
||||
string fn;
|
||||
struct stat st;
|
||||
// Maybe skip dotfiles
|
||||
if ((data->options & FtwSkipDotFiles) && ent->d_name[0] == '.')
|
||||
continue;
|
||||
// Skip . and ..
|
||||
if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
|
||||
continue;
|
||||
@ -439,6 +469,8 @@ static int op_flags;
|
||||
#define OPT_m 0x80
|
||||
#define OPT_L 0x100
|
||||
#define OPT_w 0x200
|
||||
#define OPT_M 0x400
|
||||
#define OPT_D 0x800
|
||||
|
||||
class myCB : public FsTreeWalkerCB {
|
||||
public:
|
||||
@ -489,6 +521,8 @@ static char usage [] =
|
||||
" -d : use almost depth first (dir files, then subdirs)\n"
|
||||
" -m : use breadth up to 4 deep then switch to -d\n"
|
||||
" -w : unset default FNM_PATHNAME when using fnmatch() to match skipped paths\n"
|
||||
" -M <depth>: limit depth (works with -b/m/d)\n"
|
||||
" -D : skip dotfiles\n"
|
||||
;
|
||||
static void
|
||||
Usage(void)
|
||||
@ -501,70 +535,80 @@ int main(int argc, const char **argv)
|
||||
{
|
||||
vector<string> patterns;
|
||||
vector<string> paths;
|
||||
int maxdepth = -1;
|
||||
|
||||
thisprog = argv[0];
|
||||
argc--; argv++;
|
||||
while (argc > 0 && **argv == '-') {
|
||||
(*argv)++;
|
||||
if (!(**argv))
|
||||
/* Cas du "adb - core" */
|
||||
Usage();
|
||||
while (**argv)
|
||||
switch (*(*argv)++) {
|
||||
case 'b': op_flags |= OPT_b; break;
|
||||
case 'c': op_flags |= OPT_c; break;
|
||||
case 'd': op_flags |= OPT_d; break;
|
||||
case 'D': op_flags |= OPT_D; break;
|
||||
case 'L': op_flags |= OPT_L; break;
|
||||
case 'm': op_flags |= OPT_m; break;
|
||||
case 'M': op_flags |= OPT_M; if (argc < 2) Usage();
|
||||
maxdepth = atoi(*(++argv));
|
||||
argc--;
|
||||
goto b1;
|
||||
case 'p': op_flags |= OPT_p; if (argc < 2) Usage();
|
||||
patterns.push_back(*(++argv));
|
||||
argc--;
|
||||
goto b1;
|
||||
case 'P': op_flags |= OPT_P; if (argc < 2) Usage();
|
||||
paths.push_back(*(++argv));
|
||||
argc--;
|
||||
goto b1;
|
||||
case 'r': op_flags |= OPT_r; break;
|
||||
case 'w': op_flags |= OPT_w; break;
|
||||
default: Usage(); break;
|
||||
}
|
||||
b1: argc--; argv++;
|
||||
}
|
||||
|
||||
while (argc > 0 && **argv == '-') {
|
||||
(*argv)++;
|
||||
if (!(**argv))
|
||||
/* Cas du "adb - core" */
|
||||
Usage();
|
||||
while (**argv)
|
||||
switch (*(*argv)++) {
|
||||
case 'b': op_flags |= OPT_b; break;
|
||||
case 'c': op_flags |= OPT_c; break;
|
||||
case 'd': op_flags |= OPT_d; break;
|
||||
case 'L': op_flags |= OPT_L; break;
|
||||
case 'm': op_flags |= OPT_m; break;
|
||||
case 'p': op_flags |= OPT_p; if (argc < 2) Usage();
|
||||
patterns.push_back(*(++argv));
|
||||
argc--;
|
||||
goto b1;
|
||||
case 'P': op_flags |= OPT_P; if (argc < 2) Usage();
|
||||
paths.push_back(*(++argv));
|
||||
argc--;
|
||||
goto b1;
|
||||
case 'r': op_flags |= OPT_r; break;
|
||||
case 'w': op_flags |= OPT_w; break;
|
||||
default: Usage(); break;
|
||||
}
|
||||
b1: argc--; argv++;
|
||||
}
|
||||
if (argc != 1)
|
||||
Usage();
|
||||
string topdir = *argv++;argc--;
|
||||
|
||||
if (argc != 1)
|
||||
Usage();
|
||||
string topdir = *argv++;argc--;
|
||||
int opt = 0;
|
||||
if (op_flags & OPT_r)
|
||||
opt |= FsTreeWalker::FtwNoRecurse;
|
||||
if (op_flags & OPT_c)
|
||||
opt |= FsTreeWalker::FtwNoCanon;
|
||||
if (op_flags & OPT_L)
|
||||
opt |= FsTreeWalker::FtwFollow;
|
||||
if (op_flags & OPT_D)
|
||||
opt |= FsTreeWalker::FtwSkipDotFiles;
|
||||
|
||||
int opt = 0;
|
||||
if (op_flags & OPT_r)
|
||||
opt |= FsTreeWalker::FtwNoRecurse;
|
||||
if (op_flags & OPT_c)
|
||||
opt |= FsTreeWalker::FtwNoCanon;
|
||||
if (op_flags & OPT_L)
|
||||
opt |= FsTreeWalker::FtwFollow;
|
||||
if (op_flags & OPT_b)
|
||||
opt |= FsTreeWalker::FtwTravBreadth;
|
||||
else if (op_flags & OPT_d)
|
||||
opt |= FsTreeWalker::FtwTravFilesThenDirs;
|
||||
else if (op_flags & OPT_m)
|
||||
opt |= FsTreeWalker::FtwTravBreadthThenDepth;
|
||||
|
||||
if (op_flags & OPT_b)
|
||||
opt |= FsTreeWalker::FtwTravBreadth;
|
||||
else if (op_flags & OPT_d)
|
||||
opt |= FsTreeWalker::FtwTravFilesThenDirs;
|
||||
else if (op_flags & OPT_m)
|
||||
opt |= FsTreeWalker::FtwTravBreadthThenDepth;
|
||||
|
||||
string reason;
|
||||
if (!recollinit(0, 0, reason)) {
|
||||
fprintf(stderr, "Init failed: %s\n", reason.c_str());
|
||||
exit(1);
|
||||
}
|
||||
if (op_flags & OPT_w) {
|
||||
FsTreeWalker::setNoFnmPathname();
|
||||
}
|
||||
FsTreeWalker walker(opt);
|
||||
walker.setSkippedNames(patterns);
|
||||
walker.setSkippedPaths(paths);
|
||||
myCB cb;
|
||||
walker.walk(topdir, cb);
|
||||
if (walker.getErrCnt() > 0)
|
||||
cout << walker.getReason();
|
||||
string reason;
|
||||
if (!recollinit(0, 0, reason)) {
|
||||
fprintf(stderr, "Init failed: %s\n", reason.c_str());
|
||||
exit(1);
|
||||
}
|
||||
if (op_flags & OPT_w) {
|
||||
FsTreeWalker::setNoFnmPathname();
|
||||
}
|
||||
FsTreeWalker walker;
|
||||
walker.setOpts(opt);
|
||||
walker.setMaxDepth(maxdepth);
|
||||
walker.setSkippedNames(patterns);
|
||||
walker.setSkippedPaths(paths);
|
||||
myCB cb;
|
||||
walker.walk(topdir, cb);
|
||||
if (walker.getErrCnt() > 0)
|
||||
cout << walker.getReason();
|
||||
}
|
||||
|
||||
#endif // TEST_FSTREEWALK
|
||||
|
||||
@ -56,7 +56,7 @@ class FsTreeWalker {
|
||||
enum Status {FtwOk=0, FtwError=1, FtwStop=2,
|
||||
FtwStatAll = FtwError|FtwStop};
|
||||
enum Options {FtwOptNone = 0, FtwNoRecurse = 1, FtwFollow = 2,
|
||||
FtwNoCanon = 4,
|
||||
FtwNoCanon = 4, FtwSkipDotFiles = 8,
|
||||
// Tree walking options. Natural is close to depth first: process
|
||||
// directory entries as we see them, recursing into subdirectories at
|
||||
// once
|
||||
@ -76,7 +76,10 @@ class FsTreeWalker {
|
||||
FsTreeWalker(int opts = FtwTravNatural);
|
||||
~FsTreeWalker();
|
||||
|
||||
void setOpts(Options opts, int depthswitch = 4);
|
||||
void setOpts(int opts);
|
||||
int getOpts();
|
||||
void setDepthSwitch(int);
|
||||
void setMaxDepth(int);
|
||||
|
||||
/**
|
||||
* Begin file system walk.
|
||||
@ -110,8 +113,8 @@ class FsTreeWalker {
|
||||
|
||||
private:
|
||||
Status iwalk(const string &dir, struct stat *stp, FsTreeWalkerCB& cb);
|
||||
class Internal;
|
||||
Internal *data;
|
||||
class Internal;
|
||||
Internal *data;
|
||||
};
|
||||
|
||||
class FsTreeWalkerCB {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user