arrange so that a default first indexing run for a given user runs a quick shallow pass, so that queries return some results quickly after indexing starts, avoiding user frustration

This commit is contained in:
Jean-Francois Dockes 2013-04-18 14:29:32 +02:00
parent 719f37ded7
commit f05cae7344
8 changed files with 214 additions and 87 deletions

View File

@ -178,7 +178,7 @@ bool FsIndexer::init()
}
// Recursively index each directory in the topdirs:
bool FsIndexer::index()
bool FsIndexer::index(bool quickshallow)
{
Chrono chron;
if (!init())
@ -193,6 +193,11 @@ bool FsIndexer::index()
}
m_walker.setSkippedPaths(m_config->getSkippedPaths());
if (quickshallow) {
m_walker.setOpts(m_walker.getOpts() | FsTreeWalker::FtwSkipDotFiles);
m_walker.setMaxDepth(2);
}
for (vector<string>::const_iterator it = m_tdl.begin();
it != m_tdl.end(); it++) {
LOGDEB(("FsIndexer::index: Indexing %s into %s\n", it->c_str(),
@ -204,11 +209,13 @@ bool FsIndexer::index()
// Adjust the "follow symlinks" option
bool follow;
int opts = m_walker.getOpts();
if (m_config->getConfParam("followLinks", &follow) && follow) {
m_walker.setOpts(FsTreeWalker::FtwFollow);
opts |= FsTreeWalker::FtwFollow;
} else {
m_walker.setOpts(FsTreeWalker::FtwOptNone);
opts &= ~FsTreeWalker::FtwFollow;
}
m_walker.setOpts(opts);
int abslen;
if (m_config->getConfParam("idxabsmlen", &abslen))

View File

@ -60,7 +60,7 @@ class FsIndexer : public FsTreeWalkerCB {
* We open the database,
* then call a file system walk for each top-level directory.
*/
bool index();
bool index(bool quickshallow = 0);
/** Index a list of files. No db cleaning or stemdb updating */
bool indexFiles(std::list<std::string> &files, ConfIndexer::IxFlag f =

View File

@ -50,6 +50,46 @@ ConfIndexer::~ConfIndexer()
deleteZ(m_beagler);
}
// Determine if this is likely the first time that the user runs
// indexing. We don't look at the xapiandb as this may have been
// explicitely removed for valid reasons, but at the indexing status
// file, which should be unexistant-or-empty only before any indexing
// has ever run
bool ConfIndexer::runFirstIndexing()
{
// Indexing status file existing and not empty ?
struct stat st;
if (stat(m_config->getIdxStatusFile().c_str(), &st) == 0 &&
st.st_size > 0) {
LOGDEB0(("ConfIndexer::runFirstIndexing: no: status file not empty\n"));
exit(1);
return false;
}
// And only do this if the user has kept the default topdirs (~).
vector<string>tdl = m_config->getTopdirs();
if (tdl.size() != 1 || tdl[0].compare(path_canon(path_tildexpand("~")))) {
LOGDEB0(("ConfIndexer::runFirstIndexing: no: not home only\n"));
return false;
}
return true;
}
bool ConfIndexer::firstFsIndexingSequence()
{
LOGDEB(("ConfIndexer::firstFsIndexingSequence\n"));
deleteZ(m_fsindexer);
m_fsindexer = new FsIndexer(m_config, &m_db, m_updater);
if (!m_fsindexer) {
return false;
}
int flushmb = m_db.getFlushMb();
m_db.setFlushMb(2);
m_fsindexer->index(true);
m_db.doFlush();
m_db.setFlushMb(flushmb);
return true;
}
bool ConfIndexer::index(bool resetbefore, ixType typestorun)
{
Rcl::Db::OpenMode mode = resetbefore ? Rcl::Db::DbTrunc : Rcl::Db::DbUpd;
@ -61,6 +101,9 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun)
m_config->setKeyDir(cstr_null);
if (typestorun & IxTFs) {
if (runFirstIndexing()) {
firstFsIndexingSequence();
}
deleteZ(m_fsindexer);
m_fsindexer = new FsIndexer(m_config, &m_db, m_updater);
if (!m_fsindexer || !m_fsindexer->index()) {

View File

@ -138,6 +138,13 @@ class ConfIndexer {
BeagleQueueIndexer *m_beagler;
DbIxStatusUpdater *m_updater;
string m_reason;
// The first time we index, we do things a bit differently to
// avoid user frustration (make at least some results available
// fast by using several passes, the first ones to index common
// interesting locations).
bool runFirstIndexing();
bool firstFsIndexingSequence();
};
#endif /* _INDEXER_H_INCLUDED_ */

View File

@ -1266,20 +1266,30 @@ bool Db::maybeflush(off_t moretext)
if ((m_curtxtsz - m_flushtxtsz) / MB >= m_flushMb) {
LOGDEB(("Db::add/delete: txt size >= %d Mb, flushing\n",
m_flushMb));
string ermsg;
try {
m_ndb->xwdb.flush();
} XCATCHERROR(ermsg);
if (!ermsg.empty()) {
LOGERR(("Db::add: flush() failed: %s\n", ermsg.c_str()));
return false;
}
m_flushtxtsz = m_curtxtsz;
return doFlush();
}
}
return true;
}
bool Db::doFlush()
{
if (!m_ndb) {
LOGERR(("Db::doFLush: no ndb??\n"));
return false;
}
string ermsg;
try {
m_ndb->xwdb.flush();
} XCATCHERROR(ermsg);
if (!ermsg.empty()) {
LOGERR(("Db::doFlush: flush() failed: %s\n", ermsg.c_str()));
return false;
}
m_flushtxtsz = m_curtxtsz;
return true;
}
// Test if doc given by udi has changed since last indexed (test sigs)
bool Db::needUpdate(const string &udi, const string& sig)
{

View File

@ -377,6 +377,19 @@ class Db {
*/
static void setInPlaceReset() {o_inPlaceReset = true;}
/** Flush interval get/set. This is used by the first indexing
pass to override the config value and flush more rapidly
initially so that the user can quickly play with queries */
int getFlushMb()
{
return m_flushMb;
}
void setFlushMb(int mb)
{
m_flushMb = mb;
}
bool doFlush();
/* This has to be public for access by embedded Query::Native */
Native *m_ndb;
private:

View File

@ -20,6 +20,7 @@
#ifndef TEST_FSTREEWALK
#include <stdio.h>
#include <dirent.h>
#include <sys/stat.h>
#include <errno.h>
@ -58,8 +59,15 @@ public:
};
class FsTreeWalker::Internal {
public:
Internal(int opts)
: options(opts), depthswitch(4), maxdepth(-1), errors(0)
{
}
int options;
int depthswitch;
int maxdepth;
int basedepth;
stringstream reason;
vector<string> skippedNames;
vector<string> skippedPaths;
@ -74,17 +82,11 @@ class FsTreeWalker::Internal {
reason << call << "(" << param << ") : " << errno << " : " <<
strerror(errno) << endl;
}
friend class FsTreeWalker;
};
FsTreeWalker::FsTreeWalker(int opts)
{
data = new Internal;
if (data) {
data->options = opts;
data->depthswitch = 4;
data->errors = 0;
}
data = new Internal(opts);
}
FsTreeWalker::~FsTreeWalker()
@ -92,11 +94,30 @@ FsTreeWalker::~FsTreeWalker()
delete data;
}
void FsTreeWalker::setOpts(Options opts, int depthswitch)
void FsTreeWalker::setOpts(int opts)
{
if (data) {
data->options = opts;
data->depthswitch = depthswitch;
}
}
int FsTreeWalker::getOpts()
{
if (data) {
return data->options;
} else {
return 0;
}
}
void FsTreeWalker::setDepthSwitch(int ds)
{
if (data) {
data->depthswitch = ds;
}
}
void FsTreeWalker::setMaxDepth(int md)
{
if (data) {
data->maxdepth = md;
}
}
@ -198,8 +219,7 @@ FsTreeWalker::Status FsTreeWalker::walk(const string& _top,
data->options |= FtwTravNatural;
}
int basedepth = slashcount(top); // Only used for breadthThenDepth
data->basedepth = slashcount(top); // Only used for breadthxx
struct stat st;
// We always follow symlinks at this point. Makes more sense.
if (stat(top.c_str(), &st) == -1) {
@ -240,7 +260,7 @@ FsTreeWalker::Status FsTreeWalker::walk(const string& _top,
if (data->options & FtwTravBreadthThenDepth) {
// Check if new depth warrants switch to depth first
// traversal (will happen on next loop iteration).
int curdepth = slashcount(dir) - basedepth;
int curdepth = slashcount(dir) - data->basedepth;
if (curdepth >= data->depthswitch) {
//fprintf(stderr, "SWITCHING TO DEPTH FIRST\n");
data->options &= ~FtwTravMask;
@ -309,6 +329,13 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
return status;
}
int curdepth = slashcount(top) - data->basedepth;
if (data->maxdepth >= 0 && curdepth >= data->maxdepth) {
LOGDEB1(("FsTreeWalker::iwalk: Maxdepth reached: [%s]\n", top.c_str()));
return status;
}
// This is a directory, read it and process entries:
// Detect if directory already seen. This could just be several
@ -345,6 +372,9 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
while ((ent = readdir(d)) != 0) {
string fn;
struct stat st;
// Maybe skip dotfiles
if ((data->options & FtwSkipDotFiles) && ent->d_name[0] == '.')
continue;
// Skip . and ..
if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
continue;
@ -439,6 +469,8 @@ static int op_flags;
#define OPT_m 0x80
#define OPT_L 0x100
#define OPT_w 0x200
#define OPT_M 0x400
#define OPT_D 0x800
class myCB : public FsTreeWalkerCB {
public:
@ -489,6 +521,8 @@ static char usage [] =
" -d : use almost depth first (dir files, then subdirs)\n"
" -m : use breadth up to 4 deep then switch to -d\n"
" -w : unset default FNM_PATHNAME when using fnmatch() to match skipped paths\n"
" -M <depth>: limit depth (works with -b/m/d)\n"
" -D : skip dotfiles\n"
;
static void
Usage(void)
@ -501,70 +535,80 @@ int main(int argc, const char **argv)
{
vector<string> patterns;
vector<string> paths;
int maxdepth = -1;
thisprog = argv[0];
argc--; argv++;
while (argc > 0 && **argv == '-') {
(*argv)++;
if (!(**argv))
/* Cas du "adb - core" */
Usage();
while (**argv)
switch (*(*argv)++) {
case 'b': op_flags |= OPT_b; break;
case 'c': op_flags |= OPT_c; break;
case 'd': op_flags |= OPT_d; break;
case 'D': op_flags |= OPT_D; break;
case 'L': op_flags |= OPT_L; break;
case 'm': op_flags |= OPT_m; break;
case 'M': op_flags |= OPT_M; if (argc < 2) Usage();
maxdepth = atoi(*(++argv));
argc--;
goto b1;
case 'p': op_flags |= OPT_p; if (argc < 2) Usage();
patterns.push_back(*(++argv));
argc--;
goto b1;
case 'P': op_flags |= OPT_P; if (argc < 2) Usage();
paths.push_back(*(++argv));
argc--;
goto b1;
case 'r': op_flags |= OPT_r; break;
case 'w': op_flags |= OPT_w; break;
default: Usage(); break;
}
b1: argc--; argv++;
}
while (argc > 0 && **argv == '-') {
(*argv)++;
if (!(**argv))
/* Cas du "adb - core" */
Usage();
while (**argv)
switch (*(*argv)++) {
case 'b': op_flags |= OPT_b; break;
case 'c': op_flags |= OPT_c; break;
case 'd': op_flags |= OPT_d; break;
case 'L': op_flags |= OPT_L; break;
case 'm': op_flags |= OPT_m; break;
case 'p': op_flags |= OPT_p; if (argc < 2) Usage();
patterns.push_back(*(++argv));
argc--;
goto b1;
case 'P': op_flags |= OPT_P; if (argc < 2) Usage();
paths.push_back(*(++argv));
argc--;
goto b1;
case 'r': op_flags |= OPT_r; break;
case 'w': op_flags |= OPT_w; break;
default: Usage(); break;
}
b1: argc--; argv++;
}
if (argc != 1)
Usage();
string topdir = *argv++;argc--;
if (argc != 1)
Usage();
string topdir = *argv++;argc--;
int opt = 0;
if (op_flags & OPT_r)
opt |= FsTreeWalker::FtwNoRecurse;
if (op_flags & OPT_c)
opt |= FsTreeWalker::FtwNoCanon;
if (op_flags & OPT_L)
opt |= FsTreeWalker::FtwFollow;
if (op_flags & OPT_D)
opt |= FsTreeWalker::FtwSkipDotFiles;
int opt = 0;
if (op_flags & OPT_r)
opt |= FsTreeWalker::FtwNoRecurse;
if (op_flags & OPT_c)
opt |= FsTreeWalker::FtwNoCanon;
if (op_flags & OPT_L)
opt |= FsTreeWalker::FtwFollow;
if (op_flags & OPT_b)
opt |= FsTreeWalker::FtwTravBreadth;
else if (op_flags & OPT_d)
opt |= FsTreeWalker::FtwTravFilesThenDirs;
else if (op_flags & OPT_m)
opt |= FsTreeWalker::FtwTravBreadthThenDepth;
if (op_flags & OPT_b)
opt |= FsTreeWalker::FtwTravBreadth;
else if (op_flags & OPT_d)
opt |= FsTreeWalker::FtwTravFilesThenDirs;
else if (op_flags & OPT_m)
opt |= FsTreeWalker::FtwTravBreadthThenDepth;
string reason;
if (!recollinit(0, 0, reason)) {
fprintf(stderr, "Init failed: %s\n", reason.c_str());
exit(1);
}
if (op_flags & OPT_w) {
FsTreeWalker::setNoFnmPathname();
}
FsTreeWalker walker(opt);
walker.setSkippedNames(patterns);
walker.setSkippedPaths(paths);
myCB cb;
walker.walk(topdir, cb);
if (walker.getErrCnt() > 0)
cout << walker.getReason();
string reason;
if (!recollinit(0, 0, reason)) {
fprintf(stderr, "Init failed: %s\n", reason.c_str());
exit(1);
}
if (op_flags & OPT_w) {
FsTreeWalker::setNoFnmPathname();
}
FsTreeWalker walker;
walker.setOpts(opt);
walker.setMaxDepth(maxdepth);
walker.setSkippedNames(patterns);
walker.setSkippedPaths(paths);
myCB cb;
walker.walk(topdir, cb);
if (walker.getErrCnt() > 0)
cout << walker.getReason();
}
#endif // TEST_FSTREEWALK

View File

@ -56,7 +56,7 @@ class FsTreeWalker {
enum Status {FtwOk=0, FtwError=1, FtwStop=2,
FtwStatAll = FtwError|FtwStop};
enum Options {FtwOptNone = 0, FtwNoRecurse = 1, FtwFollow = 2,
FtwNoCanon = 4,
FtwNoCanon = 4, FtwSkipDotFiles = 8,
// Tree walking options. Natural is close to depth first: process
// directory entries as we see them, recursing into subdirectories at
// once
@ -76,7 +76,10 @@ class FsTreeWalker {
FsTreeWalker(int opts = FtwTravNatural);
~FsTreeWalker();
void setOpts(Options opts, int depthswitch = 4);
void setOpts(int opts);
int getOpts();
void setDepthSwitch(int);
void setMaxDepth(int);
/**
* Begin file system walk.
@ -110,8 +113,8 @@ class FsTreeWalker {
private:
Status iwalk(const string &dir, struct stat *stp, FsTreeWalkerCB& cb);
class Internal;
Internal *data;
class Internal;
Internal *data;
};
class FsTreeWalkerCB {