moved common db code from fsindexer to confindexer
This commit is contained in:
parent
0fe1574439
commit
cb3aa9bc1a
@ -49,10 +49,6 @@ static char rcsid[] = "@(#$Id: $ (C) 2009 J.F.Dockes";
|
|||||||
#include "wipedir.h"
|
#include "wipedir.h"
|
||||||
#include "fileudi.h"
|
#include "fileudi.h"
|
||||||
|
|
||||||
#ifdef RCL_USE_ASPELL
|
|
||||||
#include "rclaspell.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// When using extended attributes, we have to use the ctime.
|
// When using extended attributes, we have to use the ctime.
|
||||||
// This is quite an expensive price to pay...
|
// This is quite an expensive price to pay...
|
||||||
#ifdef RCL_USE_XATTR
|
#ifdef RCL_USE_XATTR
|
||||||
@ -71,36 +67,49 @@ using namespace std;
|
|||||||
|
|
||||||
FsIndexer::~FsIndexer() {
|
FsIndexer::~FsIndexer() {
|
||||||
// Maybe clean up temporary directory
|
// Maybe clean up temporary directory
|
||||||
if (m_tmpdir.length()) {
|
if (!m_tmpdir.empty()) {
|
||||||
wipedir(m_tmpdir);
|
wipedir(m_tmpdir);
|
||||||
if (rmdir(m_tmpdir.c_str()) < 0) {
|
if (rmdir(m_tmpdir.c_str()) < 0) {
|
||||||
LOGERR(("FsIndexer::~FsIndexer: cannot clear temp dir %s\n",
|
LOGERR(("FsIndexer::~FsIndexer: cannot clear temp dir %s\n",
|
||||||
m_tmpdir.c_str()));
|
m_tmpdir.c_str()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m_db.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
list<string> FsIndexer::getStemmerNames()
|
bool FsIndexer::init()
|
||||||
{
|
{
|
||||||
return Rcl::Db::getStemmerNames();
|
if (m_tmpdir.empty() || access(m_tmpdir.c_str(), 0) < 0) {
|
||||||
|
if (!maketmpdir(m_tmpdir, m_reason)) {
|
||||||
|
LOGERR(("FsIndexer: cannot create temporary directory: %s\n",
|
||||||
|
m_reason.c_str()));
|
||||||
|
m_tmpdir.erase();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Index each directory in the topdirs for a given db
|
// Recursively index each directory in the topdirs:
|
||||||
bool FsIndexer::indexTrees(bool resetbefore, list<string> *topdirs)
|
bool FsIndexer::index(bool resetbefore)
|
||||||
{
|
{
|
||||||
if (!init(resetbefore))
|
list<string> topdirs = m_config->getTopdirs();
|
||||||
|
if (topdirs.empty()) {
|
||||||
|
LOGERR(("FsIndexer::indexTrees: no valid topdirs in config\n"));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!init())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (m_updater) {
|
if (m_updater) {
|
||||||
m_updater->status.reset();
|
m_updater->status.reset();
|
||||||
m_updater->status.dbtotdocs = m_db.docCnt();
|
m_updater->status.dbtotdocs = m_db->docCnt();
|
||||||
}
|
}
|
||||||
|
|
||||||
m_walker.setSkippedPaths(m_config->getSkippedPaths());
|
m_walker.setSkippedPaths(m_config->getSkippedPaths());
|
||||||
|
|
||||||
for (list<string>::const_iterator it = topdirs->begin();
|
for (list<string>::const_iterator it = topdirs.begin();
|
||||||
it != topdirs->end(); it++) {
|
it != topdirs.end(); it++) {
|
||||||
LOGDEB(("FsIndexer::index: Indexing %s into %s\n", it->c_str(),
|
LOGDEB(("FsIndexer::index: Indexing %s into %s\n", it->c_str(),
|
||||||
getDbDir().c_str()));
|
getDbDir().c_str()));
|
||||||
|
|
||||||
@ -118,7 +127,7 @@ bool FsIndexer::indexTrees(bool resetbefore, list<string> *topdirs)
|
|||||||
|
|
||||||
int abslen;
|
int abslen;
|
||||||
if (m_config->getConfParam("idxabsmlen", &abslen))
|
if (m_config->getConfParam("idxabsmlen", &abslen))
|
||||||
m_db.setAbstractParams(abslen, -1, -1);
|
m_db->setAbstractParams(abslen, -1, -1);
|
||||||
|
|
||||||
// Set up skipped patterns for this subtree. This probably should be
|
// Set up skipped patterns for this subtree. This probably should be
|
||||||
// done in the directory change code in processone() instead.
|
// done in the directory change code in processone() instead.
|
||||||
@ -131,30 +140,7 @@ bool FsIndexer::indexTrees(bool resetbefore, list<string> *topdirs)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (m_updater) {
|
|
||||||
m_updater->status.fn.erase();
|
|
||||||
m_updater->status.phase = DbIxStatus::DBIXS_PURGE;
|
|
||||||
m_updater->update();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get rid of all database entries that don't exist in the
|
|
||||||
// filesystem anymore.
|
|
||||||
m_db.purge();
|
|
||||||
|
|
||||||
createStemmingDatabases();
|
|
||||||
createAspellDict();
|
|
||||||
|
|
||||||
if (m_updater) {
|
|
||||||
m_updater->status.phase = DbIxStatus::DBIXS_CLOSING;
|
|
||||||
m_updater->status.fn.erase();
|
|
||||||
m_updater->update();
|
|
||||||
}
|
|
||||||
// The close would be done in our destructor, but we want status here
|
|
||||||
if (!m_db.close()) {
|
|
||||||
LOGERR(("FsIndexer::index: error closing database in %s\n",
|
|
||||||
getDbDir().c_str()));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
string missing;
|
string missing;
|
||||||
FileInterner::getMissingDescription(missing);
|
FileInterner::getMissingDescription(missing);
|
||||||
if (!missing.empty()) {
|
if (!missing.empty()) {
|
||||||
@ -165,107 +151,13 @@ bool FsIndexer::indexTrees(bool resetbefore, list<string> *topdirs)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create stemming databases. We also remove those which are not
|
|
||||||
// configured.
|
|
||||||
bool FsIndexer::createStemmingDatabases()
|
|
||||||
{
|
|
||||||
string slangs;
|
|
||||||
if (m_config->getConfParam("indexstemminglanguages", slangs)) {
|
|
||||||
list<string> langs;
|
|
||||||
stringToStrings(slangs, langs);
|
|
||||||
|
|
||||||
// Get the list of existing stem dbs from the database (some may have
|
|
||||||
// been manually created, we just keep those from the config
|
|
||||||
list<string> dblangs = m_db.getStemLangs();
|
|
||||||
list<string>::const_iterator it;
|
|
||||||
for (it = dblangs.begin(); it != dblangs.end(); it++) {
|
|
||||||
if (find(langs.begin(), langs.end(), *it) == langs.end())
|
|
||||||
m_db.deleteStemDb(*it);
|
|
||||||
}
|
|
||||||
for (it = langs.begin(); it != langs.end(); it++) {
|
|
||||||
if (m_updater) {
|
|
||||||
m_updater->status.phase = DbIxStatus::DBIXS_STEMDB;
|
|
||||||
m_updater->status.fn = *it;
|
|
||||||
m_updater->update();
|
|
||||||
}
|
|
||||||
m_db.createStemDb(*it);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool FsIndexer::init(bool resetbefore, bool rdonly)
|
|
||||||
{
|
|
||||||
if (!rdonly && (m_tmpdir.empty() || access(m_tmpdir.c_str(), 0) < 0)) {
|
|
||||||
string reason;
|
|
||||||
if (!maketmpdir(m_tmpdir, reason)) {
|
|
||||||
LOGERR(("FsIndexer: cannot create temporary directory: %s\n",
|
|
||||||
reason.c_str()));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Rcl::Db::OpenMode mode = rdonly ? Rcl::Db::DbRO :
|
|
||||||
resetbefore ? Rcl::Db::DbTrunc : Rcl::Db::DbUpd;
|
|
||||||
if (!m_db.open(mode)) {
|
|
||||||
LOGERR(("FsIndexer: error opening database %s\n", getDbDir().c_str()));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool FsIndexer::createStemDb(const string &lang)
|
|
||||||
{
|
|
||||||
if (!init(false, true))
|
|
||||||
return false;
|
|
||||||
return m_db.createStemDb(lang);
|
|
||||||
}
|
|
||||||
|
|
||||||
// The language for the aspell dictionary is handled internally by the aspell
|
|
||||||
// module, either from a configuration variable or the NLS environment.
|
|
||||||
bool FsIndexer::createAspellDict()
|
|
||||||
{
|
|
||||||
LOGDEB2(("FsIndexer::createAspellDict()\n"));
|
|
||||||
#ifdef RCL_USE_ASPELL
|
|
||||||
// For the benefit of the real-time indexer, we only initialize
|
|
||||||
// noaspell from the configuration once. It can then be set to
|
|
||||||
// true if dictionary generation fails, which avoids retrying
|
|
||||||
// it forever.
|
|
||||||
static int noaspell = -12345;
|
|
||||||
if (noaspell == -12345) {
|
|
||||||
noaspell = false;
|
|
||||||
m_config->getConfParam("noaspell", &noaspell);
|
|
||||||
}
|
|
||||||
if (noaspell)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
if (!init(false, true))
|
|
||||||
return false;
|
|
||||||
Aspell aspell(m_config);
|
|
||||||
string reason;
|
|
||||||
if (!aspell.init(reason)) {
|
|
||||||
LOGERR(("FsIndexer::createAspellDict: aspell init failed: %s\n",
|
|
||||||
reason.c_str()));
|
|
||||||
noaspell = true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
LOGDEB(("FsIndexer::createAspellDict: creating dictionary\n"));
|
|
||||||
if (!aspell.buildDict(m_db, reason)) {
|
|
||||||
LOGERR(("FsIndexer::createAspellDict: aspell buildDict failed: %s\n",
|
|
||||||
reason.c_str()));
|
|
||||||
noaspell = true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Index individual files, out of a full tree run. No database purging
|
* Index individual files, out of a full tree run. No database purging
|
||||||
*/
|
*/
|
||||||
bool FsIndexer::indexFiles(const list<string> &filenames)
|
bool FsIndexer::indexFiles(const list<string> &filenames)
|
||||||
{
|
{
|
||||||
bool called_init = false;
|
if (!init())
|
||||||
|
return false;
|
||||||
|
|
||||||
list<string>::const_iterator it;
|
list<string>::const_iterator it;
|
||||||
for (it = filenames.begin(); it != filenames.end(); it++) {
|
for (it = filenames.begin(); it != filenames.end(); it++) {
|
||||||
@ -273,7 +165,7 @@ bool FsIndexer::indexFiles(const list<string> &filenames)
|
|||||||
m_config->setKeyDir(dir);
|
m_config->setKeyDir(dir);
|
||||||
int abslen;
|
int abslen;
|
||||||
if (m_config->getConfParam("idxabsmlen", &abslen))
|
if (m_config->getConfParam("idxabsmlen", &abslen))
|
||||||
m_db.setAbstractParams(abslen, -1, -1);
|
m_db->setAbstractParams(abslen, -1, -1);
|
||||||
struct stat stb;
|
struct stat stb;
|
||||||
if (lstat(it->c_str(), &stb) != 0) {
|
if (lstat(it->c_str(), &stb) != 0) {
|
||||||
LOGERR(("FsIndexer::indexFiles: lstat(%s): %s", it->c_str(),
|
LOGERR(("FsIndexer::indexFiles: lstat(%s): %s", it->c_str(),
|
||||||
@ -306,12 +198,6 @@ bool FsIndexer::indexFiles(const list<string> &filenames)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Defer opening db until really needed.
|
|
||||||
if (!called_init) {
|
|
||||||
if (!init())
|
|
||||||
return false;
|
|
||||||
called_init = true;
|
|
||||||
}
|
|
||||||
if (processone(*it, &stb, FsTreeWalker::FtwRegular) !=
|
if (processone(*it, &stb, FsTreeWalker::FtwRegular) !=
|
||||||
FsTreeWalker::FtwOk) {
|
FsTreeWalker::FtwOk) {
|
||||||
LOGERR(("FsIndexer::indexFiles: processone failed\n"));
|
LOGERR(("FsIndexer::indexFiles: processone failed\n"));
|
||||||
@ -321,12 +207,6 @@ bool FsIndexer::indexFiles(const list<string> &filenames)
|
|||||||
false; // Need a statement here to make compiler happy ??
|
false; // Need a statement here to make compiler happy ??
|
||||||
}
|
}
|
||||||
|
|
||||||
// The close would be done in our destructor, but we want status here
|
|
||||||
if (!m_db.close()) {
|
|
||||||
LOGERR(("FsIndexer::indexfiles: error closing database in %s\n",
|
|
||||||
getDbDir().c_str()));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -341,18 +221,12 @@ bool FsIndexer::purgeFiles(const list<string> &filenames)
|
|||||||
for (it = filenames.begin(); it != filenames.end(); it++) {
|
for (it = filenames.begin(); it != filenames.end(); it++) {
|
||||||
string udi;
|
string udi;
|
||||||
make_udi(*it, "", udi);
|
make_udi(*it, "", udi);
|
||||||
if (!m_db.purgeFile(udi)) {
|
if (!m_db->purgeFile(udi)) {
|
||||||
LOGERR(("FsIndexer::purgeFiles: Database error\n"));
|
LOGERR(("FsIndexer::purgeFiles: Database error\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// The close would be done in our destructor, but we want status here
|
|
||||||
if (!m_db.close()) {
|
|
||||||
LOGERR(("FsIndexer::purgefiles: error closing database in %s\n",
|
|
||||||
getDbDir().c_str()));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -424,7 +298,7 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||||||
|
|
||||||
int abslen;
|
int abslen;
|
||||||
if (m_config->getConfParam("idxabsmlen", &abslen))
|
if (m_config->getConfParam("idxabsmlen", &abslen))
|
||||||
m_db.setAbstractParams(abslen, -1, -1);
|
m_db->setAbstractParams(abslen, -1, -1);
|
||||||
|
|
||||||
// Adjust local fields from config for this subtree
|
// Adjust local fields from config for this subtree
|
||||||
if (m_havelocalfields)
|
if (m_havelocalfields)
|
||||||
@ -450,7 +324,7 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||||||
string sig = cbuf;
|
string sig = cbuf;
|
||||||
string udi;
|
string udi;
|
||||||
make_udi(fn, "", udi);
|
make_udi(fn, "", udi);
|
||||||
if (!m_db.needUpdate(udi, sig)) {
|
if (!m_db->needUpdate(udi, sig)) {
|
||||||
LOGDEB(("processone: up to date: %s\n", fn.c_str()));
|
LOGDEB(("processone: up to date: %s\n", fn.c_str()));
|
||||||
if (m_updater) {
|
if (m_updater) {
|
||||||
// Status bar update, abort request etc.
|
// Status bar update, abort request etc.
|
||||||
@ -542,7 +416,7 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||||||
// of the file document.
|
// of the file document.
|
||||||
string udi;
|
string udi;
|
||||||
make_udi(fn, ipath, udi);
|
make_udi(fn, ipath, udi);
|
||||||
if (!m_db.addOrUpdate(udi, ipath.empty() ? "" : parent_udi, doc))
|
if (!m_db->addOrUpdate(udi, ipath.empty() ? "" : parent_udi, doc))
|
||||||
return FsTreeWalker::FtwError;
|
return FsTreeWalker::FtwError;
|
||||||
|
|
||||||
// Tell what we are doing and check for interrupt request
|
// Tell what we are doing and check for interrupt request
|
||||||
@ -574,7 +448,7 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||||||
// Document signature for up to date checks.
|
// Document signature for up to date checks.
|
||||||
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
|
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
|
||||||
fileDoc.sig = cbuf;
|
fileDoc.sig = cbuf;
|
||||||
if (!m_db.addOrUpdate(parent_udi, "", fileDoc))
|
if (!m_db->addOrUpdate(parent_udi, "", fileDoc))
|
||||||
return FsTreeWalker::FtwError;
|
return FsTreeWalker::FtwError;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -41,25 +41,21 @@ class FsIndexer : public FsTreeWalkerCB {
|
|||||||
* @param cnf Configuration data
|
* @param cnf Configuration data
|
||||||
* @param updfunc Status updater callback
|
* @param updfunc Status updater callback
|
||||||
*/
|
*/
|
||||||
FsIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0)
|
FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc = 0)
|
||||||
: m_config(cnf), m_db(cnf), m_updater(updfunc)
|
: m_config(cnf), m_db(db), m_updater(updfunc)
|
||||||
{
|
{
|
||||||
m_havelocalfields = m_config->hasNameAnywhere("localfields");
|
m_havelocalfields = m_config->hasNameAnywhere("localfields");
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual ~FsIndexer();
|
virtual ~FsIndexer();
|
||||||
|
|
||||||
/** Top level file system tree index method for updating a
|
/**
|
||||||
given database.
|
* Top level file system tree index method for updating a given database.
|
||||||
|
*
|
||||||
The list is supposed to have all the filename space for the
|
* We create the temporary directory, open the database,
|
||||||
db, and we shall purge entries for non-existing files at the
|
* then call a file system walk for each top-level directory.
|
||||||
end. We create the temporary directory, open the database,
|
*/
|
||||||
then call a file system walk for each top-level directory.
|
bool index(bool resetbefore);
|
||||||
When walking is done, we create the stem databases and close
|
|
||||||
the main db.
|
|
||||||
*/
|
|
||||||
bool indexTrees(bool resetbefore, std::list<string> *topdirs);
|
|
||||||
|
|
||||||
/** Index a list of files. No db cleaning or stemdb updating */
|
/** Index a list of files. No db cleaning or stemdb updating */
|
||||||
bool indexFiles(const std::list<string> &files);
|
bool indexFiles(const std::list<string> &files);
|
||||||
@ -67,30 +63,16 @@ class FsIndexer : public FsTreeWalkerCB {
|
|||||||
/** Purge a list of files. */
|
/** Purge a list of files. */
|
||||||
bool purgeFiles(const std::list<string> &files);
|
bool purgeFiles(const std::list<string> &files);
|
||||||
|
|
||||||
/** Stemming reset to config: create needed, delete unconfigured */
|
|
||||||
bool createStemmingDatabases();
|
|
||||||
|
|
||||||
/** Create stem database for given language */
|
|
||||||
bool createStemDb(const string &lang);
|
|
||||||
|
|
||||||
/** Create misspelling expansion dictionary if aspell i/f is available */
|
|
||||||
bool createAspellDict();
|
|
||||||
|
|
||||||
/** Tree walker callback method */
|
/** Tree walker callback method */
|
||||||
FsTreeWalker::Status
|
FsTreeWalker::Status
|
||||||
processone(const string &, const struct stat *, FsTreeWalker::CbFlag);
|
processone(const string &fn, const struct stat *, FsTreeWalker::CbFlag);
|
||||||
|
|
||||||
/** Return my db dir */
|
|
||||||
string getDbDir() {return m_config->getDbDir();}
|
|
||||||
|
|
||||||
/** List possible stemmer names */
|
|
||||||
static list<string> getStemmerNames();
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
FsTreeWalker m_walker;
|
FsTreeWalker m_walker;
|
||||||
RclConfig *m_config;
|
RclConfig *m_config;
|
||||||
Rcl::Db m_db;
|
Rcl::Db *m_db;
|
||||||
string m_tmpdir;
|
string m_tmpdir;
|
||||||
|
string m_reason;
|
||||||
DbIxStatusUpdater *m_updater;
|
DbIxStatusUpdater *m_updater;
|
||||||
|
|
||||||
// The configuration can set attribute fields to be inherited by
|
// The configuration can set attribute fields to be inherited by
|
||||||
@ -100,9 +82,10 @@ class FsIndexer : public FsTreeWalkerCB {
|
|||||||
bool m_havelocalfields;
|
bool m_havelocalfields;
|
||||||
map<string, string> m_localfields;
|
map<string, string> m_localfields;
|
||||||
|
|
||||||
bool init(bool rst = false, bool rdonly = false);
|
bool init();
|
||||||
void localfieldsfromconf();
|
void localfieldsfromconf();
|
||||||
void setlocalfields(Rcl::Doc& doc);
|
void setlocalfields(Rcl::Doc& doc);
|
||||||
|
string getDbDir() {return m_config->getDbDir();}
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _fsindexer_h_included_ */
|
#endif /* _fsindexer_h_included_ */
|
||||||
|
|||||||
@ -28,6 +28,9 @@ static char rcsid[] = "@(#$Id: indexer.cpp,v 1.71 2008-12-17 08:01:40 dockes Exp
|
|||||||
|
|
||||||
#include "debuglog.h"
|
#include "debuglog.h"
|
||||||
#include "indexer.h"
|
#include "indexer.h"
|
||||||
|
#ifdef RCL_USE_ASPELL
|
||||||
|
#include "rclaspell.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
ConfIndexer::~ConfIndexer()
|
ConfIndexer::~ConfIndexer()
|
||||||
{
|
{
|
||||||
@ -36,55 +39,164 @@ ConfIndexer::~ConfIndexer()
|
|||||||
|
|
||||||
bool ConfIndexer::index(bool resetbefore)
|
bool ConfIndexer::index(bool resetbefore)
|
||||||
{
|
{
|
||||||
list<string> tdl = m_config->getTopdirs();
|
Rcl::Db::OpenMode mode = resetbefore ? Rcl::Db::DbTrunc : Rcl::Db::DbUpd;
|
||||||
if (tdl.empty()) {
|
if (!m_db.open(mode)) {
|
||||||
m_reason = "Top directory list (topdirs param.) not found in config"
|
LOGERR(("ConfIndexer: error opening database %s\n",
|
||||||
"or Directory list parse error";
|
m_config->getDbDir().c_str()));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// (In theory) Each top level directory to be indexed can be
|
m_config->setKeyDir("");
|
||||||
// associated with a different database. We first group the
|
m_fsindexer = new FsIndexer(m_config, &m_db, m_updater);
|
||||||
// directories by database: it is important that all directories
|
bool ret = m_fsindexer->index(resetbefore);
|
||||||
// for a database be indexed at once so that deleted file cleanup
|
deleteZ(m_fsindexer);
|
||||||
// works.
|
|
||||||
// In practise we have a single db per configuration, but this
|
if (m_updater) {
|
||||||
// code doesn't hurt anyway
|
m_updater->status.fn.erase();
|
||||||
list<string>::iterator dirit;
|
m_updater->status.phase = DbIxStatus::DBIXS_PURGE;
|
||||||
map<string, list<string> > dbmap;
|
m_updater->update();
|
||||||
map<string, list<string> >::iterator dbit;
|
}
|
||||||
for (dirit = tdl.begin(); dirit != tdl.end(); dirit++) {
|
// Get rid of all database entries that don't exist in the
|
||||||
string dbdir;
|
// filesystem anymore.
|
||||||
string doctopdir = *dirit;
|
m_db.purge();
|
||||||
m_config->setKeyDir(doctopdir);
|
|
||||||
dbdir = m_config->getDbDir();
|
if (m_updater) {
|
||||||
if (dbdir.empty()) {
|
m_updater->status.phase = DbIxStatus::DBIXS_CLOSING;
|
||||||
LOGERR(("ConfIndexer::index: no database directory in "
|
m_updater->status.fn.erase();
|
||||||
"configuration for %s\n", doctopdir.c_str()));
|
m_updater->update();
|
||||||
m_reason = "No database directory set for " + doctopdir;
|
}
|
||||||
return false;
|
// The close would be done in our destructor, but we want status here
|
||||||
}
|
if (!m_db.close()) {
|
||||||
dbit = dbmap.find(dbdir);
|
LOGERR(("ConfIndexer::index: error closing database in %s\n",
|
||||||
if (dbit == dbmap.end()) {
|
m_config->getDbDir().c_str()));
|
||||||
list<string> l;
|
return false;
|
||||||
l.push_back(doctopdir);
|
}
|
||||||
dbmap[dbdir] = l;
|
|
||||||
} else {
|
createStemmingDatabases();
|
||||||
dbit->second.push_back(doctopdir);
|
createAspellDict();
|
||||||
}
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ConfIndexer::indexFiles(const std::list<string> &files)
|
||||||
|
{
|
||||||
|
if (!m_db.open(Rcl::Db::DbUpd)) {
|
||||||
|
LOGERR(("ConfIndexer: indexFiles error opening database %s\n",
|
||||||
|
m_config->getDbDir().c_str()));
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
m_config->setKeyDir("");
|
m_config->setKeyDir("");
|
||||||
|
m_fsindexer = new FsIndexer(m_config, &m_db, m_updater);
|
||||||
|
bool ret = m_fsindexer->indexFiles(files);
|
||||||
|
deleteZ(m_fsindexer);
|
||||||
|
// The close would be done in our destructor, but we want status here
|
||||||
|
if (!m_db.close()) {
|
||||||
|
LOGERR(("ConfIndexer::index: error closing database in %s\n",
|
||||||
|
m_config->getDbDir().c_str()));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
// The dbmap now has dbdir as key and directory lists as values.
|
bool ConfIndexer::purgeFiles(const std::list<string> &files)
|
||||||
// Index each directory group in turn
|
{
|
||||||
for (dbit = dbmap.begin(); dbit != dbmap.end(); dbit++) {
|
if (!m_db.open(Rcl::Db::DbUpd)) {
|
||||||
m_fsindexer = new FsIndexer(m_config, m_updater);
|
LOGERR(("ConfIndexer: purgeFiles error opening database %s\n",
|
||||||
if (!m_fsindexer->indexTrees(resetbefore, &dbit->second)) {
|
m_config->getDbDir().c_str()));
|
||||||
deleteZ(m_fsindexer);
|
return false;
|
||||||
m_reason = "Failed indexing in " + dbit->first;
|
}
|
||||||
return false;
|
m_config->setKeyDir("");
|
||||||
|
m_fsindexer = new FsIndexer(m_config, &m_db, m_updater);
|
||||||
|
bool ret = m_fsindexer->purgeFiles(files);
|
||||||
|
deleteZ(m_fsindexer);
|
||||||
|
// The close would be done in our destructor, but we want status here
|
||||||
|
if (!m_db.close()) {
|
||||||
|
LOGERR(("ConfIndexer::index: error closing database in %s\n",
|
||||||
|
m_config->getDbDir().c_str()));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create stemming databases. We also remove those which are not
|
||||||
|
// configured.
|
||||||
|
bool ConfIndexer::createStemmingDatabases()
|
||||||
|
{
|
||||||
|
string slangs;
|
||||||
|
if (m_config->getConfParam("indexstemminglanguages", slangs)) {
|
||||||
|
list<string> langs;
|
||||||
|
stringToStrings(slangs, langs);
|
||||||
|
|
||||||
|
// Get the list of existing stem dbs from the database (some may have
|
||||||
|
// been manually created, we just keep those from the config
|
||||||
|
list<string> dblangs = m_db.getStemLangs();
|
||||||
|
list<string>::const_iterator it;
|
||||||
|
for (it = dblangs.begin(); it != dblangs.end(); it++) {
|
||||||
|
if (find(langs.begin(), langs.end(), *it) == langs.end())
|
||||||
|
m_db.deleteStemDb(*it);
|
||||||
|
}
|
||||||
|
for (it = langs.begin(); it != langs.end(); it++) {
|
||||||
|
if (m_updater) {
|
||||||
|
m_updater->status.phase = DbIxStatus::DBIXS_STEMDB;
|
||||||
|
m_updater->status.fn = *it;
|
||||||
|
m_updater->update();
|
||||||
|
}
|
||||||
|
m_db.createStemDb(*it);
|
||||||
}
|
}
|
||||||
deleteZ(m_fsindexer);
|
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ConfIndexer::createStemDb(const string &lang)
|
||||||
|
{
|
||||||
|
if (!m_db.open(Rcl::Db::DbRO)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return m_db.createStemDb(lang);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The language for the aspell dictionary is handled internally by the aspell
|
||||||
|
// module, either from a configuration variable or the NLS environment.
|
||||||
|
bool ConfIndexer::createAspellDict()
|
||||||
|
{
|
||||||
|
LOGDEB2(("FsIndexer::createAspellDict()\n"));
|
||||||
|
#ifdef RCL_USE_ASPELL
|
||||||
|
// For the benefit of the real-time indexer, we only initialize
|
||||||
|
// noaspell from the configuration once. It can then be set to
|
||||||
|
// true if dictionary generation fails, which avoids retrying
|
||||||
|
// it forever.
|
||||||
|
static int noaspell = -12345;
|
||||||
|
if (noaspell == -12345) {
|
||||||
|
noaspell = false;
|
||||||
|
m_config->getConfParam("noaspell", &noaspell);
|
||||||
|
}
|
||||||
|
if (noaspell)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (!m_db.open(Rcl::Db::DbRO)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
Aspell aspell(m_config);
|
||||||
|
string reason;
|
||||||
|
if (!aspell.init(reason)) {
|
||||||
|
LOGERR(("FsIndexer::createAspellDict: aspell init failed: %s\n",
|
||||||
|
reason.c_str()));
|
||||||
|
noaspell = true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
LOGDEB(("FsIndexer::createAspellDict: creating dictionary\n"));
|
||||||
|
if (!aspell.buildDict(m_db, reason)) {
|
||||||
|
LOGERR(("FsIndexer::createAspellDict: aspell buildDict failed: %s\n",
|
||||||
|
reason.c_str()));
|
||||||
|
noaspell = true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
list<string> ConfIndexer::getStemmerNames()
|
||||||
|
{
|
||||||
|
return Rcl::Db::getStemmerNames();
|
||||||
|
}
|
||||||
|
|||||||
@ -70,15 +70,37 @@ class ConfIndexer {
|
|||||||
public:
|
public:
|
||||||
enum runStatus {IndexerOk, IndexerError};
|
enum runStatus {IndexerOk, IndexerError};
|
||||||
ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0)
|
ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0)
|
||||||
: m_config(cnf), m_fsindexer(0), m_updater(updfunc)
|
: m_config(cnf), m_db(cnf), m_fsindexer(0), m_updater(updfunc)
|
||||||
{}
|
{}
|
||||||
virtual ~ConfIndexer();
|
virtual ~ConfIndexer();
|
||||||
|
|
||||||
/** Worker function: doe the actual indexing */
|
/** Worker function: doe the actual indexing */
|
||||||
bool index(bool resetbefore = false);
|
bool index(bool resetbefore = false);
|
||||||
|
|
||||||
const string &getReason() {return m_reason;}
|
const string &getReason() {return m_reason;}
|
||||||
|
|
||||||
|
/** Stemming reset to config: create needed, delete unconfigured */
|
||||||
|
bool createStemmingDatabases();
|
||||||
|
|
||||||
|
/** Create stem database for given language */
|
||||||
|
bool createStemDb(const string &lang);
|
||||||
|
|
||||||
|
/** Create misspelling expansion dictionary if aspell i/f is available */
|
||||||
|
bool createAspellDict();
|
||||||
|
|
||||||
|
/** List possible stemmer names */
|
||||||
|
static list<string> getStemmerNames();
|
||||||
|
|
||||||
|
/** Index a list of files. No db cleaning or stemdb updating */
|
||||||
|
bool indexFiles(const std::list<string> &files);
|
||||||
|
|
||||||
|
/** Purge a list of files. */
|
||||||
|
bool purgeFiles(const std::list<string> &files);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
RclConfig *m_config;
|
RclConfig *m_config;
|
||||||
FsIndexer *m_fsindexer; // Object to process directories for a given db
|
Rcl::Db m_db;
|
||||||
|
FsIndexer *m_fsindexer;
|
||||||
DbIxStatusUpdater *m_updater;
|
DbIxStatusUpdater *m_updater;
|
||||||
string m_reason;
|
string m_reason;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -47,13 +47,11 @@ using namespace std;
|
|||||||
|
|
||||||
// Globals for atexit cleanup
|
// Globals for atexit cleanup
|
||||||
static ConfIndexer *confindexer;
|
static ConfIndexer *confindexer;
|
||||||
static FsIndexer *fsindexer;
|
|
||||||
|
|
||||||
// This is set as an atexit routine,
|
// This is set as an atexit routine,
|
||||||
static void cleanup()
|
static void cleanup()
|
||||||
{
|
{
|
||||||
deleteZ(confindexer);
|
deleteZ(confindexer);
|
||||||
deleteZ(fsindexer);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Global stop request flag. This is checked in a number of place in the
|
// Global stop request flag. This is checked in a number of place in the
|
||||||
@ -81,11 +79,11 @@ static void sigcleanup(int sig)
|
|||||||
stopindexing = 1;
|
stopindexing = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool makeFsIndexer(RclConfig *config)
|
static bool makeIndexer(RclConfig *config)
|
||||||
{
|
{
|
||||||
if (!fsindexer)
|
if (!confindexer)
|
||||||
fsindexer = new FsIndexer(config, &updater);
|
confindexer = new ConfIndexer(config, &updater);
|
||||||
return fsindexer ? true : false;
|
return confindexer ? true : false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The list of top directories/files wont change during program run,
|
// The list of top directories/files wont change during program run,
|
||||||
@ -97,7 +95,7 @@ static list<string> o_tdl;
|
|||||||
//
|
//
|
||||||
// This is called either from the command line or from the monitor. In
|
// This is called either from the command line or from the monitor. In
|
||||||
// this case we're called repeatedly in the same process, and the
|
// this case we're called repeatedly in the same process, and the
|
||||||
// fsindexer is only created once by makeFsIndexer (but the db is
|
// confindexer is only created once by makeIndexer (but the db is
|
||||||
// flushed anyway)
|
// flushed anyway)
|
||||||
bool indexfiles(RclConfig *config, const list<string> &filenames)
|
bool indexfiles(RclConfig *config, const list<string> &filenames)
|
||||||
{
|
{
|
||||||
@ -141,10 +139,10 @@ bool indexfiles(RclConfig *config, const list<string> &filenames)
|
|||||||
// go:
|
// go:
|
||||||
config->setKeyDir(path_getfather(*myfiles.begin()));
|
config->setKeyDir(path_getfather(*myfiles.begin()));
|
||||||
|
|
||||||
if (!makeFsIndexer(config))
|
if (!makeIndexer(config))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return fsindexer->indexFiles(myfiles);
|
return confindexer->indexFiles(myfiles);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delete a list of files. Same comments about call contexts as indexfiles.
|
// Delete a list of files. Same comments about call contexts as indexfiles.
|
||||||
@ -175,21 +173,21 @@ bool purgefiles(RclConfig *config, const list<string> &filenames)
|
|||||||
// go:
|
// go:
|
||||||
config->setKeyDir(path_getfather(*myfiles.begin()));
|
config->setKeyDir(path_getfather(*myfiles.begin()));
|
||||||
|
|
||||||
if (!makeFsIndexer(config))
|
if (!makeIndexer(config))
|
||||||
return false;
|
return false;
|
||||||
return fsindexer->purgeFiles(myfiles);
|
return confindexer->purgeFiles(myfiles);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create stemming and spelling databases
|
// Create stemming and spelling databases
|
||||||
bool createAuxDbs(RclConfig *config)
|
bool createAuxDbs(RclConfig *config)
|
||||||
{
|
{
|
||||||
if (!makeFsIndexer(config))
|
if (!makeIndexer(config))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!fsindexer->createStemmingDatabases())
|
if (!confindexer->createStemmingDatabases())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!fsindexer->createAspellDict())
|
if (!confindexer->createAspellDict())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@ -198,9 +196,9 @@ bool createAuxDbs(RclConfig *config)
|
|||||||
// Create additional stem database
|
// Create additional stem database
|
||||||
static bool createstemdb(RclConfig *config, const string &lang)
|
static bool createstemdb(RclConfig *config, const string &lang)
|
||||||
{
|
{
|
||||||
if (!makeFsIndexer(config))
|
if (!makeIndexer(config))
|
||||||
return false;
|
return false;
|
||||||
return fsindexer->createStemDb(lang);
|
return confindexer->createStemDb(lang);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *thisprog;
|
static const char *thisprog;
|
||||||
@ -354,7 +352,7 @@ int main(int argc, const char **argv)
|
|||||||
} else if (op_flags & OPT_l) {
|
} else if (op_flags & OPT_l) {
|
||||||
if (argc != 0)
|
if (argc != 0)
|
||||||
Usage();
|
Usage();
|
||||||
list<string> stemmers = FsIndexer::getStemmerNames();
|
list<string> stemmers = ConfIndexer::getStemmerNames();
|
||||||
for (list<string>::const_iterator it = stemmers.begin();
|
for (list<string>::const_iterator it = stemmers.begin();
|
||||||
it != stemmers.end(); it++) {
|
it != stemmers.end(); it++) {
|
||||||
cout << *it << endl;
|
cout << *it << endl;
|
||||||
@ -397,9 +395,9 @@ int main(int argc, const char **argv)
|
|||||||
|
|
||||||
#ifdef RCL_USE_ASPELL
|
#ifdef RCL_USE_ASPELL
|
||||||
} else if (op_flags & OPT_S) {
|
} else if (op_flags & OPT_S) {
|
||||||
if (!makeFsIndexer(config))
|
if (!makeIndexer(config))
|
||||||
exit(1);
|
exit(1);
|
||||||
exit(!fsindexer->createAspellDict());
|
exit(!confindexer->createAspellDict());
|
||||||
#endif // ASPELL
|
#endif // ASPELL
|
||||||
} else if (op_flags & OPT_b) {
|
} else if (op_flags & OPT_b) {
|
||||||
BeagleQueueIndexer beagler(config);
|
BeagleQueueIndexer beagler(config);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user