dbindexer->fsindexer, split into its own file
This commit is contained in:
parent
69dcb93059
commit
d14601bde9
582
src/index/fsindexer.cpp
Normal file
582
src/index/fsindexer.cpp
Normal file
@ -0,0 +1,582 @@
|
|||||||
|
#ifndef lint
|
||||||
|
static char rcsid[] = "@(#$Id: $ (C) 2009 J.F.Dockes";
|
||||||
|
#endif
|
||||||
|
/*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc.,
|
||||||
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
*/
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "autoconfig.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <cstring>
|
||||||
|
#include <fnmatch.h>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <list>
|
||||||
|
#include <map>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "pathut.h"
|
||||||
|
#include "conftree.h"
|
||||||
|
#include "rclconfig.h"
|
||||||
|
#include "fstreewalk.h"
|
||||||
|
#include "rcldb.h"
|
||||||
|
#include "readfile.h"
|
||||||
|
#include "indexer.h"
|
||||||
|
#include "fsindexer.h"
|
||||||
|
#include "csguess.h"
|
||||||
|
#include "transcode.h"
|
||||||
|
#include "debuglog.h"
|
||||||
|
#include "internfile.h"
|
||||||
|
#include "smallut.h"
|
||||||
|
#include "wipedir.h"
|
||||||
|
#include "fileudi.h"
|
||||||
|
|
||||||
|
#ifdef RCL_USE_ASPELL
|
||||||
|
#include "rclaspell.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// When using extended attributes, we have to use the ctime.
|
||||||
|
// This is quite an expensive price to pay...
|
||||||
|
#ifdef RCL_USE_XATTR
|
||||||
|
#define RCL_STTIME st_ctime
|
||||||
|
#else
|
||||||
|
#define RCL_STTIME st_mtime
|
||||||
|
#endif // RCL_USE_XATTR
|
||||||
|
|
||||||
|
#ifndef NO_NAMESPACES
|
||||||
|
using namespace std;
|
||||||
|
#endif /* NO_NAMESPACES */
|
||||||
|
|
||||||
|
#ifndef deleteZ
|
||||||
|
#define deleteZ(X) {delete X;X = 0;}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
FsIndexer::~FsIndexer() {
|
||||||
|
// Maybe clean up temporary directory
|
||||||
|
if (m_tmpdir.length()) {
|
||||||
|
wipedir(m_tmpdir);
|
||||||
|
if (rmdir(m_tmpdir.c_str()) < 0) {
|
||||||
|
LOGERR(("FsIndexer::~FsIndexer: cannot clear temp dir %s\n",
|
||||||
|
m_tmpdir.c_str()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m_db.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
list<string> FsIndexer::getStemmerNames()
|
||||||
|
{
|
||||||
|
return Rcl::Db::getStemmerNames();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Index each directory in the topdirs for a given db
|
||||||
|
bool FsIndexer::indexTrees(bool resetbefore, list<string> *topdirs)
|
||||||
|
{
|
||||||
|
if (!init(resetbefore))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (m_updater) {
|
||||||
|
m_updater->status.reset();
|
||||||
|
m_updater->status.dbtotdocs = m_db.docCnt();
|
||||||
|
}
|
||||||
|
|
||||||
|
m_walker.setSkippedPaths(m_config->getSkippedPaths());
|
||||||
|
|
||||||
|
for (list<string>::const_iterator it = topdirs->begin();
|
||||||
|
it != topdirs->end(); it++) {
|
||||||
|
LOGDEB(("FsIndexer::index: Indexing %s into %s\n", it->c_str(),
|
||||||
|
getDbDir().c_str()));
|
||||||
|
|
||||||
|
// Set the current directory in config so that subsequent
|
||||||
|
// getConfParams() will get local values
|
||||||
|
m_config->setKeyDir(*it);
|
||||||
|
|
||||||
|
// Adjust the "follow symlinks" option
|
||||||
|
bool follow;
|
||||||
|
if (m_config->getConfParam("followLinks", &follow) && follow) {
|
||||||
|
m_walker.setOpts(FsTreeWalker::FtwFollow);
|
||||||
|
} else {
|
||||||
|
m_walker.setOpts(FsTreeWalker::FtwOptNone);
|
||||||
|
}
|
||||||
|
|
||||||
|
int abslen;
|
||||||
|
if (m_config->getConfParam("idxabsmlen", &abslen))
|
||||||
|
m_db.setAbstractParams(abslen, -1, -1);
|
||||||
|
|
||||||
|
// Set up skipped patterns for this subtree. This probably should be
|
||||||
|
// done in the directory change code in processone() instead.
|
||||||
|
m_walker.setSkippedNames(m_config->getSkippedNames());
|
||||||
|
|
||||||
|
// Walk the directory tree
|
||||||
|
if (m_walker.walk(*it, *this) != FsTreeWalker::FtwOk) {
|
||||||
|
LOGERR(("FsIndexer::index: error while indexing %s: %s\n",
|
||||||
|
it->c_str(), m_walker.getReason().c_str()));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (m_updater) {
|
||||||
|
m_updater->status.fn.erase();
|
||||||
|
m_updater->status.phase = DbIxStatus::DBIXS_PURGE;
|
||||||
|
m_updater->update();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get rid of all database entries that don't exist in the
|
||||||
|
// filesystem anymore.
|
||||||
|
m_db.purge();
|
||||||
|
|
||||||
|
createStemmingDatabases();
|
||||||
|
createAspellDict();
|
||||||
|
|
||||||
|
if (m_updater) {
|
||||||
|
m_updater->status.phase = DbIxStatus::DBIXS_CLOSING;
|
||||||
|
m_updater->status.fn.erase();
|
||||||
|
m_updater->update();
|
||||||
|
}
|
||||||
|
// The close would be done in our destructor, but we want status here
|
||||||
|
if (!m_db.close()) {
|
||||||
|
LOGERR(("FsIndexer::index: error closing database in %s\n",
|
||||||
|
getDbDir().c_str()));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
string missing;
|
||||||
|
FileInterner::getMissingDescription(missing);
|
||||||
|
if (!missing.empty()) {
|
||||||
|
LOGINFO(("FsIndexer::index missing helper program(s):\n%s\n",
|
||||||
|
missing.c_str()));
|
||||||
|
}
|
||||||
|
m_config->storeMissingHelperDesc(missing);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create stemming databases. We also remove those which are not
|
||||||
|
// configured.
|
||||||
|
bool FsIndexer::createStemmingDatabases()
|
||||||
|
{
|
||||||
|
string slangs;
|
||||||
|
if (m_config->getConfParam("indexstemminglanguages", slangs)) {
|
||||||
|
list<string> langs;
|
||||||
|
stringToStrings(slangs, langs);
|
||||||
|
|
||||||
|
// Get the list of existing stem dbs from the database (some may have
|
||||||
|
// been manually created, we just keep those from the config
|
||||||
|
list<string> dblangs = m_db.getStemLangs();
|
||||||
|
list<string>::const_iterator it;
|
||||||
|
for (it = dblangs.begin(); it != dblangs.end(); it++) {
|
||||||
|
if (find(langs.begin(), langs.end(), *it) == langs.end())
|
||||||
|
m_db.deleteStemDb(*it);
|
||||||
|
}
|
||||||
|
for (it = langs.begin(); it != langs.end(); it++) {
|
||||||
|
if (m_updater) {
|
||||||
|
m_updater->status.phase = DbIxStatus::DBIXS_STEMDB;
|
||||||
|
m_updater->status.fn = *it;
|
||||||
|
m_updater->update();
|
||||||
|
}
|
||||||
|
m_db.createStemDb(*it);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool FsIndexer::init(bool resetbefore, bool rdonly)
|
||||||
|
{
|
||||||
|
if (!rdonly && (m_tmpdir.empty() || access(m_tmpdir.c_str(), 0) < 0)) {
|
||||||
|
string reason;
|
||||||
|
if (!maketmpdir(m_tmpdir, reason)) {
|
||||||
|
LOGERR(("FsIndexer: cannot create temporary directory: %s\n",
|
||||||
|
reason.c_str()));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Rcl::Db::OpenMode mode = rdonly ? Rcl::Db::DbRO :
|
||||||
|
resetbefore ? Rcl::Db::DbTrunc : Rcl::Db::DbUpd;
|
||||||
|
if (!m_db.open(mode)) {
|
||||||
|
LOGERR(("FsIndexer: error opening database %s\n", getDbDir().c_str()));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool FsIndexer::createStemDb(const string &lang)
|
||||||
|
{
|
||||||
|
if (!init(false, true))
|
||||||
|
return false;
|
||||||
|
return m_db.createStemDb(lang);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The language for the aspell dictionary is handled internally by the aspell
|
||||||
|
// module, either from a configuration variable or the NLS environment.
|
||||||
|
bool FsIndexer::createAspellDict()
|
||||||
|
{
|
||||||
|
LOGDEB2(("FsIndexer::createAspellDict()\n"));
|
||||||
|
#ifdef RCL_USE_ASPELL
|
||||||
|
// For the benefit of the real-time indexer, we only initialize
|
||||||
|
// noaspell from the configuration once. It can then be set to
|
||||||
|
// true if dictionary generation fails, which avoids retrying
|
||||||
|
// it forever.
|
||||||
|
static int noaspell = -12345;
|
||||||
|
if (noaspell == -12345) {
|
||||||
|
noaspell = false;
|
||||||
|
m_config->getConfParam("noaspell", &noaspell);
|
||||||
|
}
|
||||||
|
if (noaspell)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (!init(false, true))
|
||||||
|
return false;
|
||||||
|
Aspell aspell(m_config);
|
||||||
|
string reason;
|
||||||
|
if (!aspell.init(reason)) {
|
||||||
|
LOGERR(("FsIndexer::createAspellDict: aspell init failed: %s\n",
|
||||||
|
reason.c_str()));
|
||||||
|
noaspell = true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
LOGDEB(("FsIndexer::createAspellDict: creating dictionary\n"));
|
||||||
|
if (!aspell.buildDict(m_db, reason)) {
|
||||||
|
LOGERR(("FsIndexer::createAspellDict: aspell buildDict failed: %s\n",
|
||||||
|
reason.c_str()));
|
||||||
|
noaspell = true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Index individual files, out of a full tree run. No database purging
|
||||||
|
*/
|
||||||
|
bool FsIndexer::indexFiles(const list<string> &filenames)
|
||||||
|
{
|
||||||
|
bool called_init = false;
|
||||||
|
|
||||||
|
list<string>::const_iterator it;
|
||||||
|
for (it = filenames.begin(); it != filenames.end(); it++) {
|
||||||
|
string dir = path_getfather(*it);
|
||||||
|
m_config->setKeyDir(dir);
|
||||||
|
int abslen;
|
||||||
|
if (m_config->getConfParam("idxabsmlen", &abslen))
|
||||||
|
m_db.setAbstractParams(abslen, -1, -1);
|
||||||
|
struct stat stb;
|
||||||
|
if (lstat(it->c_str(), &stb) != 0) {
|
||||||
|
LOGERR(("FsIndexer::indexFiles: lstat(%s): %s", it->c_str(),
|
||||||
|
strerror(errno)));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we get to indexing directory names one day, will need to test
|
||||||
|
// against dbdir here to avoid modification loops (with rclmon).
|
||||||
|
if (!S_ISREG(stb.st_mode)) {
|
||||||
|
LOGDEB2(("FsIndexer::indexFiles: %s: not a regular file\n",
|
||||||
|
it->c_str()));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
static string lstdir;
|
||||||
|
static list<string> skpl;
|
||||||
|
if (lstdir.compare(dir)) {
|
||||||
|
LOGDEB(("Recomputing list of skipped names\n"));
|
||||||
|
skpl = m_config->getSkippedNames();
|
||||||
|
lstdir = dir;
|
||||||
|
}
|
||||||
|
if (!skpl.empty()) {
|
||||||
|
list<string>::const_iterator skit;
|
||||||
|
string fn = path_getsimple(*it);
|
||||||
|
for (skit = skpl.begin(); skit != skpl.end(); skit++) {
|
||||||
|
if (fnmatch(skit->c_str(), fn.c_str(), 0) == 0) {
|
||||||
|
LOGDEB(("Skipping [%s] :matches skip list\n", fn.c_str()));
|
||||||
|
goto skipped;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Defer opening db until really needed.
|
||||||
|
if (!called_init) {
|
||||||
|
if (!init())
|
||||||
|
return false;
|
||||||
|
called_init = true;
|
||||||
|
}
|
||||||
|
if (processone(*it, &stb, FsTreeWalker::FtwRegular) !=
|
||||||
|
FsTreeWalker::FtwOk) {
|
||||||
|
LOGERR(("FsIndexer::indexFiles: processone failed\n"));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
skipped:
|
||||||
|
false; // Need a statement here to make compiler happy ??
|
||||||
|
}
|
||||||
|
|
||||||
|
// The close would be done in our destructor, but we want status here
|
||||||
|
if (!m_db.close()) {
|
||||||
|
LOGERR(("FsIndexer::indexfiles: error closing database in %s\n",
|
||||||
|
getDbDir().c_str()));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Purge docs for given files out of the database */
|
||||||
|
bool FsIndexer::purgeFiles(const list<string> &filenames)
|
||||||
|
{
|
||||||
|
if (!init())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
list<string>::const_iterator it;
|
||||||
|
for (it = filenames.begin(); it != filenames.end(); it++) {
|
||||||
|
string udi;
|
||||||
|
make_udi(*it, "", udi);
|
||||||
|
if (!m_db.purgeFile(udi)) {
|
||||||
|
LOGERR(("FsIndexer::purgeFiles: Database error\n"));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The close would be done in our destructor, but we want status here
|
||||||
|
if (!m_db.close()) {
|
||||||
|
LOGERR(("FsIndexer::purgefiles: error closing database in %s\n",
|
||||||
|
getDbDir().c_str()));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Local fields can be set for fs subtrees in the configuration file
|
||||||
|
void FsIndexer::localfieldsfromconf()
|
||||||
|
{
|
||||||
|
LOGDEB(("FsIndexer::localfieldsfromconf\n"));
|
||||||
|
m_localfields.clear();
|
||||||
|
string sfields;
|
||||||
|
if (!m_config->getConfParam("localfields", sfields))
|
||||||
|
return;
|
||||||
|
list<string> lfields;
|
||||||
|
if (!stringToStrings(sfields, lfields)) {
|
||||||
|
LOGERR(("FsIndexer::localfieldsfromconf: bad syntax for [%s]\n",
|
||||||
|
sfields.c_str()));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (list<string>::const_iterator it = lfields.begin();
|
||||||
|
it != lfields.end(); it++) {
|
||||||
|
ConfSimple conf(*it, 1, true);
|
||||||
|
list<string> nmlst = conf.getNames("");
|
||||||
|
for (list<string>::const_iterator it1 = nmlst.begin();
|
||||||
|
it1 != nmlst.end(); it1++) {
|
||||||
|
conf.get(*it1, m_localfields[*it1]);
|
||||||
|
LOGDEB2(("FsIndexer::localfieldsfromconf: [%s] => [%s]\n",
|
||||||
|
(*it1).c_str(), m_localfields[*it1].c_str()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
void FsIndexer::setlocalfields(Rcl::Doc& doc)
|
||||||
|
{
|
||||||
|
for (map<string, string>::const_iterator it = m_localfields.begin();
|
||||||
|
it != m_localfields.end(); it++) {
|
||||||
|
// Should local fields override those coming from the document
|
||||||
|
// ? I think not, but not too sure
|
||||||
|
if (doc.meta.find(it->second) == doc.meta.end()) {
|
||||||
|
doc.meta[it->first] = it->second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// This method gets called for every file and directory found by the
|
||||||
|
/// tree walker.
|
||||||
|
///
|
||||||
|
/// It checks with the db if the file has changed and needs to be
|
||||||
|
/// reindexed. If so, it calls internfile() which will identify the
|
||||||
|
/// file type and call an appropriate handler to convert the document into
|
||||||
|
/// internal format, which we then add to the database.
|
||||||
|
///
|
||||||
|
/// Accent and majuscule handling are performed by the db module when doing
|
||||||
|
/// the actual indexing work. The Rcl::Doc created by internfile()
|
||||||
|
/// mostly contains pretty raw utf8 data.
|
||||||
|
FsTreeWalker::Status
|
||||||
|
FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
||||||
|
FsTreeWalker::CbFlag flg)
|
||||||
|
{
|
||||||
|
if (m_updater && !m_updater->update()) {
|
||||||
|
return FsTreeWalker::FtwStop;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we're changing directories, possibly adjust parameters (set
|
||||||
|
// the current directory in configuration object)
|
||||||
|
if (flg == FsTreeWalker::FtwDirEnter ||
|
||||||
|
flg == FsTreeWalker::FtwDirReturn) {
|
||||||
|
m_config->setKeyDir(fn);
|
||||||
|
|
||||||
|
int abslen;
|
||||||
|
if (m_config->getConfParam("idxabsmlen", &abslen))
|
||||||
|
m_db.setAbstractParams(abslen, -1, -1);
|
||||||
|
|
||||||
|
// Adjust local fields from config for this subtree
|
||||||
|
if (m_havelocalfields)
|
||||||
|
localfieldsfromconf();
|
||||||
|
|
||||||
|
if (flg == FsTreeWalker::FtwDirReturn)
|
||||||
|
return FsTreeWalker::FtwOk;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////
|
||||||
|
// Check db up to date ? Doing this before file type
|
||||||
|
// identification means that, if usesystemfilecommand is switched
|
||||||
|
// from on to off it may happen that some files which are now
|
||||||
|
// without mime type will not be purged from the db, resulting
|
||||||
|
// in possible 'cannot intern file' messages at query time...
|
||||||
|
|
||||||
|
// Document signature. This is based on m/ctime and size and used
|
||||||
|
// for the uptodate check (the value computed here is checked
|
||||||
|
// against the stored one). Changing the computation forces a full
|
||||||
|
// reindex of course.
|
||||||
|
char cbuf[100];
|
||||||
|
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
|
||||||
|
string sig = cbuf;
|
||||||
|
string udi;
|
||||||
|
make_udi(fn, "", udi);
|
||||||
|
if (!m_db.needUpdate(udi, sig)) {
|
||||||
|
LOGDEB(("processone: up to date: %s\n", fn.c_str()));
|
||||||
|
if (m_updater) {
|
||||||
|
// Status bar update, abort request etc.
|
||||||
|
m_updater->status.fn = fn;
|
||||||
|
if (!m_updater->update()) {
|
||||||
|
return FsTreeWalker::FtwStop;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return FsTreeWalker::FtwOk;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOGDEB0(("processone: processing: [%s] %s\n",
|
||||||
|
displayableBytes(stp->st_size).c_str(), fn.c_str()));
|
||||||
|
|
||||||
|
FileInterner interner(fn, stp, m_config, m_tmpdir, FileInterner::FIF_none);
|
||||||
|
|
||||||
|
// File name transcoded to utf8 for indexation.
|
||||||
|
string charset = m_config->getDefCharset(true);
|
||||||
|
// If this fails, the file name won't be indexed, no big deal
|
||||||
|
// Note that we used to do the full path here, but I ended up believing
|
||||||
|
// that it made more sense to use only the file name
|
||||||
|
string utf8fn; int ercnt;
|
||||||
|
if (!transcode(path_getsimple(fn), utf8fn, charset, "UTF-8", &ercnt)) {
|
||||||
|
LOGERR(("processone: fn transcode failure from [%s] to UTF-8: %s\n",
|
||||||
|
charset.c_str(), path_getsimple(fn).c_str()));
|
||||||
|
} else if (ercnt) {
|
||||||
|
LOGDEB(("processone: fn transcode %d errors from [%s] to UTF-8: %s\n",
|
||||||
|
ercnt, charset.c_str(), path_getsimple(fn).c_str()));
|
||||||
|
}
|
||||||
|
LOGDEB2(("processone: fn transcoded from [%s] to [%s] (%s->%s)\n",
|
||||||
|
path_getsimple(fn).c_str(), utf8fn.c_str(), charset.c_str(),
|
||||||
|
"UTF-8"));
|
||||||
|
|
||||||
|
string parent_udi;
|
||||||
|
make_udi(fn, "", parent_udi);
|
||||||
|
Rcl::Doc doc;
|
||||||
|
const string plus("+");
|
||||||
|
char ascdate[20];
|
||||||
|
sprintf(ascdate, "%ld", long(stp->st_mtime));
|
||||||
|
|
||||||
|
FileInterner::Status fis = FileInterner::FIAgain;
|
||||||
|
bool hadNullIpath = false;
|
||||||
|
while (fis == FileInterner::FIAgain) {
|
||||||
|
doc.erase();
|
||||||
|
string ipath;
|
||||||
|
fis = interner.internfile(doc, ipath);
|
||||||
|
|
||||||
|
// Index at least the file name even if there was an error.
|
||||||
|
// We'll change the signature to ensure that the indexing will
|
||||||
|
// be retried every time.
|
||||||
|
|
||||||
|
|
||||||
|
// Internal access path for multi-document files
|
||||||
|
if (ipath.empty())
|
||||||
|
hadNullIpath = true;
|
||||||
|
else
|
||||||
|
doc.ipath = ipath;
|
||||||
|
|
||||||
|
// Set file name, mod time and url if not done by filter
|
||||||
|
if (doc.fmtime.empty())
|
||||||
|
doc.fmtime = ascdate;
|
||||||
|
if (doc.url.empty())
|
||||||
|
doc.url = string("file://") + fn;
|
||||||
|
if (doc.utf8fn.empty())
|
||||||
|
doc.utf8fn = utf8fn;
|
||||||
|
|
||||||
|
char cbuf[100];
|
||||||
|
sprintf(cbuf, "%ld", (long)stp->st_size);
|
||||||
|
doc.fbytes = cbuf;
|
||||||
|
// Document signature for up to date checks: concatenate
|
||||||
|
// m/ctime and size. Looking for changes only, no need to
|
||||||
|
// parseback so no need for reversible formatting. Also set,
|
||||||
|
// but never used, for subdocs.
|
||||||
|
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
|
||||||
|
doc.sig = cbuf;
|
||||||
|
// If there was an error, ensure indexing will be
|
||||||
|
// retried. This is for the once missing, later installed
|
||||||
|
// filter case. It can make indexing much slower (if there are
|
||||||
|
// myriads of such files, the ext script is executed for them
|
||||||
|
// and fails every time)
|
||||||
|
if (fis == FileInterner::FIError) {
|
||||||
|
doc.sig += plus;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Possibly add fields from local config
|
||||||
|
if (m_havelocalfields)
|
||||||
|
setlocalfields(doc);
|
||||||
|
// Add document to database. If there is an ipath, add it as a children
|
||||||
|
// of the file document.
|
||||||
|
string udi;
|
||||||
|
make_udi(fn, ipath, udi);
|
||||||
|
if (!m_db.addOrUpdate(udi, ipath.empty() ? "" : parent_udi, doc))
|
||||||
|
return FsTreeWalker::FtwError;
|
||||||
|
|
||||||
|
// Tell what we are doing and check for interrupt request
|
||||||
|
if (m_updater) {
|
||||||
|
++(m_updater->status.docsdone);
|
||||||
|
m_updater->status.fn = fn;
|
||||||
|
if (!ipath.empty())
|
||||||
|
m_updater->status.fn += "|" + ipath;
|
||||||
|
if (!m_updater->update()) {
|
||||||
|
return FsTreeWalker::FtwStop;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we had no instance with a null ipath, we create an empty
|
||||||
|
// document to stand for the file itself, to be used mainly for up
|
||||||
|
// to date checks. Typically this happens for an mbox file.
|
||||||
|
if (hadNullIpath == false) {
|
||||||
|
LOGDEB1(("Creating empty doc for file\n"));
|
||||||
|
Rcl::Doc fileDoc;
|
||||||
|
fileDoc.fmtime = ascdate;
|
||||||
|
fileDoc.utf8fn = utf8fn;
|
||||||
|
fileDoc.mimetype = interner.getMimetype();
|
||||||
|
fileDoc.url = string("file://") + fn;
|
||||||
|
|
||||||
|
char cbuf[100];
|
||||||
|
sprintf(cbuf, "%ld", (long)stp->st_size);
|
||||||
|
fileDoc.fbytes = cbuf;
|
||||||
|
// Document signature for up to date checks.
|
||||||
|
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
|
||||||
|
fileDoc.sig = cbuf;
|
||||||
|
if (!m_db.addOrUpdate(parent_udi, "", fileDoc))
|
||||||
|
return FsTreeWalker::FtwError;
|
||||||
|
}
|
||||||
|
|
||||||
|
return FsTreeWalker::FtwOk;
|
||||||
|
}
|
||||||
108
src/index/fsindexer.h
Normal file
108
src/index/fsindexer.h
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
/*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc.,
|
||||||
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
*/
|
||||||
|
#ifndef _fsindexer_h_included_
|
||||||
|
#define _fsindexer_h_included_
|
||||||
|
/* @(#$Id: $ (C) 2009 J.F.Dockes */
|
||||||
|
|
||||||
|
#include "fstreewalk.h"
|
||||||
|
#include "rcldb.h"
|
||||||
|
|
||||||
|
class DbIxStatusUpdater;
|
||||||
|
|
||||||
|
/** Index selected parts of the file system
|
||||||
|
|
||||||
|
Tree indexing: we inherits FsTreeWalkerCB so that, the processone()
|
||||||
|
method is called by the file-system tree walk code for each file and
|
||||||
|
directory. We keep all state needed while indexing, and finally call
|
||||||
|
the methods to purge the db of stale entries and create the stemming
|
||||||
|
databases.
|
||||||
|
|
||||||
|
Single file(s) indexing: there are also calls to index or purge lists of files.
|
||||||
|
No database purging or stem db updating in this case.
|
||||||
|
*/
|
||||||
|
class FsIndexer : public FsTreeWalkerCB {
|
||||||
|
public:
|
||||||
|
/** Constructor does nothing but store parameters
|
||||||
|
*
|
||||||
|
* @param cnf Configuration data
|
||||||
|
* @param updfunc Status updater callback
|
||||||
|
*/
|
||||||
|
FsIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0)
|
||||||
|
: m_config(cnf), m_db(cnf), m_updater(updfunc)
|
||||||
|
{
|
||||||
|
m_havelocalfields = m_config->hasNameAnywhere("localfields");
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual ~FsIndexer();
|
||||||
|
|
||||||
|
/** Top level file system tree index method for updating a
|
||||||
|
given database.
|
||||||
|
|
||||||
|
The list is supposed to have all the filename space for the
|
||||||
|
db, and we shall purge entries for non-existing files at the
|
||||||
|
end. We create the temporary directory, open the database,
|
||||||
|
then call a file system walk for each top-level directory.
|
||||||
|
When walking is done, we create the stem databases and close
|
||||||
|
the main db.
|
||||||
|
*/
|
||||||
|
bool indexTrees(bool resetbefore, std::list<string> *topdirs);
|
||||||
|
|
||||||
|
/** Index a list of files. No db cleaning or stemdb updating */
|
||||||
|
bool indexFiles(const std::list<string> &files);
|
||||||
|
|
||||||
|
/** Purge a list of files. */
|
||||||
|
bool purgeFiles(const std::list<string> &files);
|
||||||
|
|
||||||
|
/** Stemming reset to config: create needed, delete unconfigured */
|
||||||
|
bool createStemmingDatabases();
|
||||||
|
|
||||||
|
/** Create stem database for given language */
|
||||||
|
bool createStemDb(const string &lang);
|
||||||
|
|
||||||
|
/** Create misspelling expansion dictionary if aspell i/f is available */
|
||||||
|
bool createAspellDict();
|
||||||
|
|
||||||
|
/** Tree walker callback method */
|
||||||
|
FsTreeWalker::Status
|
||||||
|
processone(const string &, const struct stat *, FsTreeWalker::CbFlag);
|
||||||
|
|
||||||
|
/** Return my db dir */
|
||||||
|
string getDbDir() {return m_config->getDbDir();}
|
||||||
|
|
||||||
|
/** List possible stemmer names */
|
||||||
|
static list<string> getStemmerNames();
|
||||||
|
|
||||||
|
private:
|
||||||
|
FsTreeWalker m_walker;
|
||||||
|
RclConfig *m_config;
|
||||||
|
Rcl::Db m_db;
|
||||||
|
string m_tmpdir;
|
||||||
|
DbIxStatusUpdater *m_updater;
|
||||||
|
|
||||||
|
// The configuration can set attribute fields to be inherited by
|
||||||
|
// all files in a file system area. Ie: set "apptag = thunderbird"
|
||||||
|
// inside ~/.thunderbird. The boolean is set at init to avoid
|
||||||
|
// further wasteful processing if no local fields are set.
|
||||||
|
bool m_havelocalfields;
|
||||||
|
map<string, string> m_localfields;
|
||||||
|
|
||||||
|
bool init(bool rst = false, bool rdonly = false);
|
||||||
|
void localfieldsfromconf();
|
||||||
|
void setlocalfields(Rcl::Doc& doc);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* _fsindexer_h_included_ */
|
||||||
@ -25,569 +25,13 @@ static char rcsid[] = "@(#$Id: indexer.cpp,v 1.71 2008-12-17 08:01:40 dockes Exp
|
|||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <cstring>
|
|
||||||
#include <fnmatch.h>
|
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
#include <list>
|
|
||||||
#include <map>
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#include "pathut.h"
|
|
||||||
#include "conftree.h"
|
|
||||||
#include "rclconfig.h"
|
|
||||||
#include "fstreewalk.h"
|
|
||||||
#include "rcldb.h"
|
|
||||||
#include "readfile.h"
|
|
||||||
#include "indexer.h"
|
|
||||||
#include "csguess.h"
|
|
||||||
#include "transcode.h"
|
|
||||||
#include "debuglog.h"
|
#include "debuglog.h"
|
||||||
#include "internfile.h"
|
#include "indexer.h"
|
||||||
#include "smallut.h"
|
|
||||||
#include "wipedir.h"
|
|
||||||
#include "fileudi.h"
|
|
||||||
|
|
||||||
#ifdef RCL_USE_ASPELL
|
|
||||||
#include "rclaspell.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// When using extended attributes, we have to use the ctime.
|
|
||||||
// This is quite an expensive price to pay...
|
|
||||||
#ifdef RCL_USE_XATTR
|
|
||||||
#define RCL_STTIME st_ctime
|
|
||||||
#else
|
|
||||||
#define RCL_STTIME st_mtime
|
|
||||||
#endif // RCL_USE_XATTR
|
|
||||||
|
|
||||||
#ifndef NO_NAMESPACES
|
|
||||||
using namespace std;
|
|
||||||
#endif /* NO_NAMESPACES */
|
|
||||||
|
|
||||||
#ifndef deleteZ
|
|
||||||
#define deleteZ(X) {delete X;X = 0;}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
DbIndexer::~DbIndexer() {
|
|
||||||
// Maybe clean up temporary directory
|
|
||||||
if (m_tmpdir.length()) {
|
|
||||||
wipedir(m_tmpdir);
|
|
||||||
if (rmdir(m_tmpdir.c_str()) < 0) {
|
|
||||||
LOGERR(("DbIndexer::~DbIndexer: cannot clear temp dir %s\n",
|
|
||||||
m_tmpdir.c_str()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
m_db.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
list<string> DbIndexer::getStemmerNames()
|
|
||||||
{
|
|
||||||
return Rcl::Db::getStemmerNames();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Index each directory in the topdirs for a given db
|
|
||||||
bool DbIndexer::indexDb(bool resetbefore, list<string> *topdirs)
|
|
||||||
{
|
|
||||||
if (!init(resetbefore))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (m_updater) {
|
|
||||||
m_updater->status.reset();
|
|
||||||
m_updater->status.dbtotdocs = m_db.docCnt();
|
|
||||||
}
|
|
||||||
|
|
||||||
m_walker.setSkippedPaths(m_config->getSkippedPaths());
|
|
||||||
|
|
||||||
for (list<string>::const_iterator it = topdirs->begin();
|
|
||||||
it != topdirs->end(); it++) {
|
|
||||||
LOGDEB(("DbIndexer::index: Indexing %s into %s\n", it->c_str(),
|
|
||||||
getDbDir().c_str()));
|
|
||||||
|
|
||||||
// Set the current directory in config so that subsequent
|
|
||||||
// getConfParams() will get local values
|
|
||||||
m_config->setKeyDir(*it);
|
|
||||||
|
|
||||||
// Adjust the "follow symlinks" option
|
|
||||||
bool follow;
|
|
||||||
if (m_config->getConfParam("followLinks", &follow) && follow) {
|
|
||||||
m_walker.setOpts(FsTreeWalker::FtwFollow);
|
|
||||||
} else {
|
|
||||||
m_walker.setOpts(FsTreeWalker::FtwOptNone);
|
|
||||||
}
|
|
||||||
|
|
||||||
int abslen;
|
|
||||||
if (m_config->getConfParam("idxabsmlen", &abslen))
|
|
||||||
m_db.setAbstractParams(abslen, -1, -1);
|
|
||||||
|
|
||||||
// Set up skipped patterns for this subtree. This probably should be
|
|
||||||
// done in the directory change code in processone() instead.
|
|
||||||
m_walker.setSkippedNames(m_config->getSkippedNames());
|
|
||||||
|
|
||||||
// Walk the directory tree
|
|
||||||
if (m_walker.walk(*it, *this) != FsTreeWalker::FtwOk) {
|
|
||||||
LOGERR(("DbIndexer::index: error while indexing %s: %s\n",
|
|
||||||
it->c_str(), m_walker.getReason().c_str()));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (m_updater) {
|
|
||||||
m_updater->status.fn.erase();
|
|
||||||
m_updater->status.phase = DbIxStatus::DBIXS_PURGE;
|
|
||||||
m_updater->update();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get rid of all database entries that don't exist in the
|
|
||||||
// filesystem anymore.
|
|
||||||
m_db.purge();
|
|
||||||
|
|
||||||
createStemmingDatabases();
|
|
||||||
createAspellDict();
|
|
||||||
|
|
||||||
if (m_updater) {
|
|
||||||
m_updater->status.phase = DbIxStatus::DBIXS_CLOSING;
|
|
||||||
m_updater->status.fn.erase();
|
|
||||||
m_updater->update();
|
|
||||||
}
|
|
||||||
// The close would be done in our destructor, but we want status here
|
|
||||||
if (!m_db.close()) {
|
|
||||||
LOGERR(("DbIndexer::index: error closing database in %s\n",
|
|
||||||
getDbDir().c_str()));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
string missing;
|
|
||||||
FileInterner::getMissingDescription(missing);
|
|
||||||
if (!missing.empty()) {
|
|
||||||
LOGINFO(("DbIndexer::index missing helper program(s):\n%s\n",
|
|
||||||
missing.c_str()));
|
|
||||||
}
|
|
||||||
m_config->storeMissingHelperDesc(missing);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create stemming databases. We also remove those which are not
|
|
||||||
// configured.
|
|
||||||
bool DbIndexer::createStemmingDatabases()
|
|
||||||
{
|
|
||||||
string slangs;
|
|
||||||
if (m_config->getConfParam("indexstemminglanguages", slangs)) {
|
|
||||||
list<string> langs;
|
|
||||||
stringToStrings(slangs, langs);
|
|
||||||
|
|
||||||
// Get the list of existing stem dbs from the database (some may have
|
|
||||||
// been manually created, we just keep those from the config
|
|
||||||
list<string> dblangs = m_db.getStemLangs();
|
|
||||||
list<string>::const_iterator it;
|
|
||||||
for (it = dblangs.begin(); it != dblangs.end(); it++) {
|
|
||||||
if (find(langs.begin(), langs.end(), *it) == langs.end())
|
|
||||||
m_db.deleteStemDb(*it);
|
|
||||||
}
|
|
||||||
for (it = langs.begin(); it != langs.end(); it++) {
|
|
||||||
if (m_updater) {
|
|
||||||
m_updater->status.phase = DbIxStatus::DBIXS_STEMDB;
|
|
||||||
m_updater->status.fn = *it;
|
|
||||||
m_updater->update();
|
|
||||||
}
|
|
||||||
m_db.createStemDb(*it);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DbIndexer::init(bool resetbefore, bool rdonly)
|
|
||||||
{
|
|
||||||
if (!rdonly && (m_tmpdir.empty() || access(m_tmpdir.c_str(), 0) < 0)) {
|
|
||||||
string reason;
|
|
||||||
if (!maketmpdir(m_tmpdir, reason)) {
|
|
||||||
LOGERR(("DbIndexer: cannot create temporary directory: %s\n",
|
|
||||||
reason.c_str()));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Rcl::Db::OpenMode mode = rdonly ? Rcl::Db::DbRO :
|
|
||||||
resetbefore ? Rcl::Db::DbTrunc : Rcl::Db::DbUpd;
|
|
||||||
if (!m_db.open(mode)) {
|
|
||||||
LOGERR(("DbIndexer: error opening database %s\n", getDbDir().c_str()));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DbIndexer::createStemDb(const string &lang)
|
|
||||||
{
|
|
||||||
if (!init(false, true))
|
|
||||||
return false;
|
|
||||||
return m_db.createStemDb(lang);
|
|
||||||
}
|
|
||||||
|
|
||||||
// The language for the aspell dictionary is handled internally by the aspell
|
|
||||||
// module, either from a configuration variable or the NLS environment.
|
|
||||||
bool DbIndexer::createAspellDict()
|
|
||||||
{
|
|
||||||
LOGDEB2(("DbIndexer::createAspellDict()\n"));
|
|
||||||
#ifdef RCL_USE_ASPELL
|
|
||||||
// For the benefit of the real-time indexer, we only initialize
|
|
||||||
// noaspell from the configuration once. It can then be set to
|
|
||||||
// true if dictionary generation fails, which avoids retrying
|
|
||||||
// it forever.
|
|
||||||
static int noaspell = -12345;
|
|
||||||
if (noaspell == -12345) {
|
|
||||||
noaspell = false;
|
|
||||||
m_config->getConfParam("noaspell", &noaspell);
|
|
||||||
}
|
|
||||||
if (noaspell)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
if (!init(false, true))
|
|
||||||
return false;
|
|
||||||
Aspell aspell(m_config);
|
|
||||||
string reason;
|
|
||||||
if (!aspell.init(reason)) {
|
|
||||||
LOGERR(("DbIndexer::createAspellDict: aspell init failed: %s\n",
|
|
||||||
reason.c_str()));
|
|
||||||
noaspell = true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
LOGDEB(("DbIndexer::createAspellDict: creating dictionary\n"));
|
|
||||||
if (!aspell.buildDict(m_db, reason)) {
|
|
||||||
LOGERR(("DbIndexer::createAspellDict: aspell buildDict failed: %s\n",
|
|
||||||
reason.c_str()));
|
|
||||||
noaspell = true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Index individual files, out of a full tree run. No database purging
|
|
||||||
*/
|
|
||||||
bool DbIndexer::indexFiles(const list<string> &filenames)
|
|
||||||
{
|
|
||||||
bool called_init = false;
|
|
||||||
|
|
||||||
list<string>::const_iterator it;
|
|
||||||
for (it = filenames.begin(); it != filenames.end(); it++) {
|
|
||||||
string dir = path_getfather(*it);
|
|
||||||
m_config->setKeyDir(dir);
|
|
||||||
int abslen;
|
|
||||||
if (m_config->getConfParam("idxabsmlen", &abslen))
|
|
||||||
m_db.setAbstractParams(abslen, -1, -1);
|
|
||||||
struct stat stb;
|
|
||||||
if (lstat(it->c_str(), &stb) != 0) {
|
|
||||||
LOGERR(("DbIndexer::indexFiles: lstat(%s): %s", it->c_str(),
|
|
||||||
strerror(errno)));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we get to indexing directory names one day, will need to test
|
|
||||||
// against dbdir here to avoid modification loops (with rclmon).
|
|
||||||
if (!S_ISREG(stb.st_mode)) {
|
|
||||||
LOGDEB2(("DbIndexer::indexFiles: %s: not a regular file\n",
|
|
||||||
it->c_str()));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
static string lstdir;
|
|
||||||
static list<string> skpl;
|
|
||||||
if (lstdir.compare(dir)) {
|
|
||||||
LOGDEB(("Recomputing list of skipped names\n"));
|
|
||||||
skpl = m_config->getSkippedNames();
|
|
||||||
lstdir = dir;
|
|
||||||
}
|
|
||||||
if (!skpl.empty()) {
|
|
||||||
list<string>::const_iterator skit;
|
|
||||||
string fn = path_getsimple(*it);
|
|
||||||
for (skit = skpl.begin(); skit != skpl.end(); skit++) {
|
|
||||||
if (fnmatch(skit->c_str(), fn.c_str(), 0) == 0) {
|
|
||||||
LOGDEB(("Skipping [%s] :matches skip list\n", fn.c_str()));
|
|
||||||
goto skipped;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Defer opening db until really needed.
|
|
||||||
if (!called_init) {
|
|
||||||
if (!init())
|
|
||||||
return false;
|
|
||||||
called_init = true;
|
|
||||||
}
|
|
||||||
if (processone(*it, &stb, FsTreeWalker::FtwRegular) !=
|
|
||||||
FsTreeWalker::FtwOk) {
|
|
||||||
LOGERR(("DbIndexer::indexFiles: processone failed\n"));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
skipped:
|
|
||||||
false; // Need a statement here to make compiler happy ??
|
|
||||||
}
|
|
||||||
|
|
||||||
// The close would be done in our destructor, but we want status here
|
|
||||||
if (!m_db.close()) {
|
|
||||||
LOGERR(("DbIndexer::indexfiles: error closing database in %s\n",
|
|
||||||
getDbDir().c_str()));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/** Purge docs for given files out of the database */
|
|
||||||
bool DbIndexer::purgeFiles(const list<string> &filenames)
|
|
||||||
{
|
|
||||||
if (!init())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
list<string>::const_iterator it;
|
|
||||||
for (it = filenames.begin(); it != filenames.end(); it++) {
|
|
||||||
string udi;
|
|
||||||
make_udi(*it, "", udi);
|
|
||||||
if (!m_db.purgeFile(udi)) {
|
|
||||||
LOGERR(("DbIndexer::purgeFiles: Database error\n"));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// The close would be done in our destructor, but we want status here
|
|
||||||
if (!m_db.close()) {
|
|
||||||
LOGERR(("DbIndexer::purgefiles: error closing database in %s\n",
|
|
||||||
getDbDir().c_str()));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Local fields can be set for fs subtrees in the configuration file
|
|
||||||
void DbIndexer::localfieldsfromconf()
|
|
||||||
{
|
|
||||||
LOGDEB(("DbIndexer::localfieldsfromconf\n"));
|
|
||||||
m_localfields.clear();
|
|
||||||
string sfields;
|
|
||||||
if (!m_config->getConfParam("localfields", sfields))
|
|
||||||
return;
|
|
||||||
list<string> lfields;
|
|
||||||
if (!stringToStrings(sfields, lfields)) {
|
|
||||||
LOGERR(("DbIndexer::localfieldsfromconf: bad syntax for [%s]\n",
|
|
||||||
sfields.c_str()));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
for (list<string>::const_iterator it = lfields.begin();
|
|
||||||
it != lfields.end(); it++) {
|
|
||||||
ConfSimple conf(*it, 1, true);
|
|
||||||
list<string> nmlst = conf.getNames("");
|
|
||||||
for (list<string>::const_iterator it1 = nmlst.begin();
|
|
||||||
it1 != nmlst.end(); it1++) {
|
|
||||||
conf.get(*it1, m_localfields[*it1]);
|
|
||||||
LOGDEB2(("DbIndexer::localfieldsfromconf: [%s] => [%s]\n",
|
|
||||||
(*it1).c_str(), m_localfields[*it1].c_str()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
void DbIndexer::setlocalfields(Rcl::Doc& doc)
|
|
||||||
{
|
|
||||||
for (map<string, string>::const_iterator it = m_localfields.begin();
|
|
||||||
it != m_localfields.end(); it++) {
|
|
||||||
// Should local fields override those coming from the document
|
|
||||||
// ? I think not, but not too sure
|
|
||||||
if (doc.meta.find(it->second) == doc.meta.end()) {
|
|
||||||
doc.meta[it->first] = it->second;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// This method gets called for every file and directory found by the
|
|
||||||
/// tree walker.
|
|
||||||
///
|
|
||||||
/// It checks with the db if the file has changed and needs to be
|
|
||||||
/// reindexed. If so, it calls internfile() which will identify the
|
|
||||||
/// file type and call an appropriate handler to convert the document into
|
|
||||||
/// internal format, which we then add to the database.
|
|
||||||
///
|
|
||||||
/// Accent and majuscule handling are performed by the db module when doing
|
|
||||||
/// the actual indexing work. The Rcl::Doc created by internfile()
|
|
||||||
/// mostly contains pretty raw utf8 data.
|
|
||||||
FsTreeWalker::Status
|
|
||||||
DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
|
||||||
FsTreeWalker::CbFlag flg)
|
|
||||||
{
|
|
||||||
if (m_updater && !m_updater->update()) {
|
|
||||||
return FsTreeWalker::FtwStop;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we're changing directories, possibly adjust parameters (set
|
|
||||||
// the current directory in configuration object)
|
|
||||||
if (flg == FsTreeWalker::FtwDirEnter ||
|
|
||||||
flg == FsTreeWalker::FtwDirReturn) {
|
|
||||||
m_config->setKeyDir(fn);
|
|
||||||
|
|
||||||
int abslen;
|
|
||||||
if (m_config->getConfParam("idxabsmlen", &abslen))
|
|
||||||
m_db.setAbstractParams(abslen, -1, -1);
|
|
||||||
|
|
||||||
// Adjust local fields from config for this subtree
|
|
||||||
if (m_havelocalfields)
|
|
||||||
localfieldsfromconf();
|
|
||||||
|
|
||||||
if (flg == FsTreeWalker::FtwDirReturn)
|
|
||||||
return FsTreeWalker::FtwOk;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////
|
|
||||||
// Check db up to date ? Doing this before file type
|
|
||||||
// identification means that, if usesystemfilecommand is switched
|
|
||||||
// from on to off it may happen that some files which are now
|
|
||||||
// without mime type will not be purged from the db, resulting
|
|
||||||
// in possible 'cannot intern file' messages at query time...
|
|
||||||
|
|
||||||
// Document signature. This is based on m/ctime and size and used
|
|
||||||
// for the uptodate check (the value computed here is checked
|
|
||||||
// against the stored one). Changing the computation forces a full
|
|
||||||
// reindex of course.
|
|
||||||
char cbuf[100];
|
|
||||||
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
|
|
||||||
string sig = cbuf;
|
|
||||||
string udi;
|
|
||||||
make_udi(fn, "", udi);
|
|
||||||
if (!m_db.needUpdate(udi, sig)) {
|
|
||||||
LOGDEB(("processone: up to date: %s\n", fn.c_str()));
|
|
||||||
if (m_updater) {
|
|
||||||
// Status bar update, abort request etc.
|
|
||||||
m_updater->status.fn = fn;
|
|
||||||
if (!m_updater->update()) {
|
|
||||||
return FsTreeWalker::FtwStop;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return FsTreeWalker::FtwOk;
|
|
||||||
}
|
|
||||||
|
|
||||||
LOGDEB0(("processone: processing: [%s] %s\n",
|
|
||||||
displayableBytes(stp->st_size).c_str(), fn.c_str()));
|
|
||||||
|
|
||||||
FileInterner interner(fn, stp, m_config, m_tmpdir, FileInterner::FIF_none);
|
|
||||||
|
|
||||||
// File name transcoded to utf8 for indexation.
|
|
||||||
string charset = m_config->getDefCharset(true);
|
|
||||||
// If this fails, the file name won't be indexed, no big deal
|
|
||||||
// Note that we used to do the full path here, but I ended up believing
|
|
||||||
// that it made more sense to use only the file name
|
|
||||||
string utf8fn; int ercnt;
|
|
||||||
if (!transcode(path_getsimple(fn), utf8fn, charset, "UTF-8", &ercnt)) {
|
|
||||||
LOGERR(("processone: fn transcode failure from [%s] to UTF-8: %s\n",
|
|
||||||
charset.c_str(), path_getsimple(fn).c_str()));
|
|
||||||
} else if (ercnt) {
|
|
||||||
LOGDEB(("processone: fn transcode %d errors from [%s] to UTF-8: %s\n",
|
|
||||||
ercnt, charset.c_str(), path_getsimple(fn).c_str()));
|
|
||||||
}
|
|
||||||
LOGDEB2(("processone: fn transcoded from [%s] to [%s] (%s->%s)\n",
|
|
||||||
path_getsimple(fn).c_str(), utf8fn.c_str(), charset.c_str(),
|
|
||||||
"UTF-8"));
|
|
||||||
|
|
||||||
string parent_udi;
|
|
||||||
make_udi(fn, "", parent_udi);
|
|
||||||
Rcl::Doc doc;
|
|
||||||
const string plus("+");
|
|
||||||
char ascdate[20];
|
|
||||||
sprintf(ascdate, "%ld", long(stp->st_mtime));
|
|
||||||
|
|
||||||
FileInterner::Status fis = FileInterner::FIAgain;
|
|
||||||
bool hadNullIpath = false;
|
|
||||||
while (fis == FileInterner::FIAgain) {
|
|
||||||
doc.erase();
|
|
||||||
string ipath;
|
|
||||||
fis = interner.internfile(doc, ipath);
|
|
||||||
|
|
||||||
// Index at least the file name even if there was an error.
|
|
||||||
// We'll change the signature to ensure that the indexing will
|
|
||||||
// be retried every time.
|
|
||||||
|
|
||||||
|
|
||||||
// Internal access path for multi-document files
|
|
||||||
if (ipath.empty())
|
|
||||||
hadNullIpath = true;
|
|
||||||
else
|
|
||||||
doc.ipath = ipath;
|
|
||||||
|
|
||||||
// Set file name, mod time and url if not done by filter
|
|
||||||
if (doc.fmtime.empty())
|
|
||||||
doc.fmtime = ascdate;
|
|
||||||
if (doc.url.empty())
|
|
||||||
doc.url = string("file://") + fn;
|
|
||||||
if (doc.utf8fn.empty())
|
|
||||||
doc.utf8fn = utf8fn;
|
|
||||||
|
|
||||||
char cbuf[100];
|
|
||||||
sprintf(cbuf, "%ld", (long)stp->st_size);
|
|
||||||
doc.fbytes = cbuf;
|
|
||||||
// Document signature for up to date checks: concatenate
|
|
||||||
// m/ctime and size. Looking for changes only, no need to
|
|
||||||
// parseback so no need for reversible formatting. Also set,
|
|
||||||
// but never used, for subdocs.
|
|
||||||
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
|
|
||||||
doc.sig = cbuf;
|
|
||||||
// If there was an error, ensure indexing will be
|
|
||||||
// retried. This is for the once missing, later installed
|
|
||||||
// filter case. It can make indexing much slower (if there are
|
|
||||||
// myriads of such files, the ext script is executed for them
|
|
||||||
// and fails every time)
|
|
||||||
if (fis == FileInterner::FIError) {
|
|
||||||
doc.sig += plus;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Possibly add fields from local config
|
|
||||||
if (m_havelocalfields)
|
|
||||||
setlocalfields(doc);
|
|
||||||
// Add document to database. If there is an ipath, add it as a children
|
|
||||||
// of the file document.
|
|
||||||
string udi;
|
|
||||||
make_udi(fn, ipath, udi);
|
|
||||||
if (!m_db.addOrUpdate(udi, ipath.empty() ? "" : parent_udi, doc))
|
|
||||||
return FsTreeWalker::FtwError;
|
|
||||||
|
|
||||||
// Tell what we are doing and check for interrupt request
|
|
||||||
if (m_updater) {
|
|
||||||
++(m_updater->status.docsdone);
|
|
||||||
m_updater->status.fn = fn;
|
|
||||||
if (!ipath.empty())
|
|
||||||
m_updater->status.fn += "|" + ipath;
|
|
||||||
if (!m_updater->update()) {
|
|
||||||
return FsTreeWalker::FtwStop;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we had no instance with a null ipath, we create an empty
|
|
||||||
// document to stand for the file itself, to be used mainly for up
|
|
||||||
// to date checks. Typically this happens for an mbox file.
|
|
||||||
if (hadNullIpath == false) {
|
|
||||||
LOGDEB1(("Creating empty doc for file\n"));
|
|
||||||
Rcl::Doc fileDoc;
|
|
||||||
fileDoc.fmtime = ascdate;
|
|
||||||
fileDoc.utf8fn = utf8fn;
|
|
||||||
fileDoc.mimetype = interner.getMimetype();
|
|
||||||
fileDoc.url = string("file://") + fn;
|
|
||||||
|
|
||||||
char cbuf[100];
|
|
||||||
sprintf(cbuf, "%ld", (long)stp->st_size);
|
|
||||||
fileDoc.fbytes = cbuf;
|
|
||||||
// Document signature for up to date checks.
|
|
||||||
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
|
|
||||||
fileDoc.sig = cbuf;
|
|
||||||
if (!m_db.addOrUpdate(parent_udi, "", fileDoc))
|
|
||||||
return FsTreeWalker::FtwError;
|
|
||||||
}
|
|
||||||
|
|
||||||
return FsTreeWalker::FtwOk;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////
|
|
||||||
// ConIndexer methods: ConfIndexer is the top-level object, that could
|
|
||||||
// in theory index multiple directories to multiple databases. In practise we
|
|
||||||
// have a single database per configuration.
|
|
||||||
|
|
||||||
ConfIndexer::~ConfIndexer()
|
ConfIndexer::~ConfIndexer()
|
||||||
{
|
{
|
||||||
deleteZ(m_dbindexer);
|
deleteZ(m_fsindexer);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ConfIndexer::index(bool resetbefore)
|
bool ConfIndexer::index(bool resetbefore)
|
||||||
@ -634,13 +78,13 @@ bool ConfIndexer::index(bool resetbefore)
|
|||||||
// The dbmap now has dbdir as key and directory lists as values.
|
// The dbmap now has dbdir as key and directory lists as values.
|
||||||
// Index each directory group in turn
|
// Index each directory group in turn
|
||||||
for (dbit = dbmap.begin(); dbit != dbmap.end(); dbit++) {
|
for (dbit = dbmap.begin(); dbit != dbmap.end(); dbit++) {
|
||||||
m_dbindexer = new DbIndexer(m_config, m_updater);
|
m_fsindexer = new FsIndexer(m_config, m_updater);
|
||||||
if (!m_dbindexer->indexDb(resetbefore, &dbit->second)) {
|
if (!m_fsindexer->indexTrees(resetbefore, &dbit->second)) {
|
||||||
deleteZ(m_dbindexer);
|
deleteZ(m_fsindexer);
|
||||||
m_reason = "Failed indexing in " + dbit->first;
|
m_reason = "Failed indexing in " + dbit->first;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
deleteZ(m_dbindexer);
|
deleteZ(m_fsindexer);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -29,8 +29,7 @@ using std::map;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
#include "fstreewalk.h"
|
#include "fsindexer.h"
|
||||||
#include "rcldb.h"
|
|
||||||
|
|
||||||
/* Forward decl for lower level indexing object */
|
/* Forward decl for lower level indexing object */
|
||||||
class DbIndexer;
|
class DbIndexer;
|
||||||
@ -71,7 +70,7 @@ class ConfIndexer {
|
|||||||
public:
|
public:
|
||||||
enum runStatus {IndexerOk, IndexerError};
|
enum runStatus {IndexerOk, IndexerError};
|
||||||
ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0)
|
ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0)
|
||||||
: m_config(cnf), m_dbindexer(0), m_updater(updfunc)
|
: m_config(cnf), m_fsindexer(0), m_updater(updfunc)
|
||||||
{}
|
{}
|
||||||
virtual ~ConfIndexer();
|
virtual ~ConfIndexer();
|
||||||
/** Worker function: doe the actual indexing */
|
/** Worker function: doe the actual indexing */
|
||||||
@ -79,96 +78,9 @@ class ConfIndexer {
|
|||||||
const string &getReason() {return m_reason;}
|
const string &getReason() {return m_reason;}
|
||||||
private:
|
private:
|
||||||
RclConfig *m_config;
|
RclConfig *m_config;
|
||||||
DbIndexer *m_dbindexer; // Object to process directories for a given db
|
FsIndexer *m_fsindexer; // Object to process directories for a given db
|
||||||
DbIxStatusUpdater *m_updater;
|
DbIxStatusUpdater *m_updater;
|
||||||
string m_reason;
|
string m_reason;
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Index things into one database
|
|
||||||
|
|
||||||
Tree indexing: we inherits FsTreeWalkerCB so that, the processone()
|
|
||||||
method is called by the file-system tree walk code for each file and
|
|
||||||
directory. We keep all state needed while indexing, and finally call
|
|
||||||
the methods to purge the db of stale entries and create the stemming
|
|
||||||
databases.
|
|
||||||
|
|
||||||
Single file(s) indexing: no database purging or stem db updating.
|
|
||||||
*/
|
|
||||||
class DbIndexer : public FsTreeWalkerCB {
|
|
||||||
public:
|
|
||||||
/** Constructor does nothing but store parameters
|
|
||||||
*
|
|
||||||
* @param cnf Configuration data
|
|
||||||
* @param updfunc Status updater callback
|
|
||||||
*/
|
|
||||||
DbIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0)
|
|
||||||
: m_config(cnf), m_db(cnf), m_updater(updfunc)
|
|
||||||
{
|
|
||||||
m_havelocalfields = m_config->hasNameAnywhere("localfields");
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual ~DbIndexer();
|
|
||||||
|
|
||||||
/** Top level file system tree index method for updating a
|
|
||||||
given database.
|
|
||||||
|
|
||||||
The list is supposed to have all the filename space for the
|
|
||||||
db, and we shall purge entries for non-existing files at the
|
|
||||||
end. We create the temporary directory, open the database,
|
|
||||||
then call a file system walk for each top-level directory.
|
|
||||||
When walking is done, we create the stem databases and close
|
|
||||||
the main db.
|
|
||||||
*/
|
|
||||||
bool indexDb(bool resetbefore, std::list<string> *topdirs);
|
|
||||||
|
|
||||||
/** Index a list of files. No db cleaning or stemdb updating */
|
|
||||||
bool indexFiles(const std::list<string> &files);
|
|
||||||
|
|
||||||
/** Purge a list of files. */
|
|
||||||
bool purgeFiles(const std::list<string> &files);
|
|
||||||
|
|
||||||
/** Stemming reset to config: create needed, delete unconfigured */
|
|
||||||
bool createStemmingDatabases();
|
|
||||||
|
|
||||||
/** Create stem database for given language */
|
|
||||||
bool createStemDb(const string &lang);
|
|
||||||
|
|
||||||
/** Create misspelling expansion dictionary if aspell i/f is available */
|
|
||||||
bool createAspellDict();
|
|
||||||
|
|
||||||
/** Tree walker callback method */
|
|
||||||
FsTreeWalker::Status
|
|
||||||
processone(const string &, const struct stat *, FsTreeWalker::CbFlag);
|
|
||||||
|
|
||||||
/** Return my db dir */
|
|
||||||
string getDbDir() {return m_config->getDbDir();}
|
|
||||||
|
|
||||||
/** List possible stemmer names */
|
|
||||||
static list<string> getStemmerNames();
|
|
||||||
|
|
||||||
private:
|
|
||||||
FsTreeWalker m_walker;
|
|
||||||
RclConfig *m_config;
|
|
||||||
Rcl::Db m_db;
|
|
||||||
string m_tmpdir;
|
|
||||||
DbIxStatusUpdater *m_updater;
|
|
||||||
|
|
||||||
// The configuration can set attribute fields to be inherited by
|
|
||||||
// all files in a file system area. Ie: set "apptag = thunderbird"
|
|
||||||
// inside ~/.thunderbird. The boolean is set at init to avoid
|
|
||||||
// further wasteful processing if no local fields are set.
|
|
||||||
bool m_havelocalfields;
|
|
||||||
map<string, string> m_localfields;
|
|
||||||
|
|
||||||
bool init(bool rst = false, bool rdonly = false);
|
|
||||||
void localfieldsfromconf();
|
|
||||||
void setlocalfields(Rcl::Doc& doc);
|
|
||||||
};
|
|
||||||
|
|
||||||
/** Helper methods in recollindex.cpp for initial checks/setup to index
|
|
||||||
* a list of files (either from the monitor or the command line) */
|
|
||||||
extern bool indexfiles(RclConfig *config, const list<string> &filenames);
|
|
||||||
extern bool purgefiles(RclConfig *config, const list<string> &filenames);
|
|
||||||
extern bool createAuxDbs(RclConfig *config);
|
|
||||||
|
|
||||||
#endif /* _INDEXER_H_INCLUDED_ */
|
#endif /* _INDEXER_H_INCLUDED_ */
|
||||||
|
|||||||
@ -39,7 +39,7 @@ static char rcsid[] = "@(#$Id: rclmonprc.cpp,v 1.14 2008-11-18 13:25:48 dockes E
|
|||||||
#include "debuglog.h"
|
#include "debuglog.h"
|
||||||
#include "rclmon.h"
|
#include "rclmon.h"
|
||||||
#include "debuglog.h"
|
#include "debuglog.h"
|
||||||
#include "indexer.h"
|
#include "recollindex.h"
|
||||||
#include "pathut.h"
|
#include "pathut.h"
|
||||||
#include "x11mon.h"
|
#include "x11mon.h"
|
||||||
|
|
||||||
@ -168,8 +168,6 @@ RclConfig *RclMonEventQueue::getConfig()
|
|||||||
return m_data->m_config;
|
return m_data->m_config;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int stopindexing;
|
|
||||||
|
|
||||||
bool RclMonEventQueue::ok()
|
bool RclMonEventQueue::ok()
|
||||||
{
|
{
|
||||||
if (m_data == 0) {
|
if (m_data == 0) {
|
||||||
|
|||||||
@ -42,16 +42,18 @@ using namespace std;
|
|||||||
#include "cancelcheck.h"
|
#include "cancelcheck.h"
|
||||||
#include "rcldb.h"
|
#include "rcldb.h"
|
||||||
#include "beaglequeue.h"
|
#include "beaglequeue.h"
|
||||||
|
#include "recollindex.h"
|
||||||
|
#include "fsindexer.h"
|
||||||
|
|
||||||
// Globals for atexit cleanup
|
// Globals for atexit cleanup
|
||||||
static ConfIndexer *confindexer;
|
static ConfIndexer *confindexer;
|
||||||
static DbIndexer *dbindexer;
|
static FsIndexer *fsindexer;
|
||||||
|
|
||||||
// This is set as an atexit routine,
|
// This is set as an atexit routine,
|
||||||
static void cleanup()
|
static void cleanup()
|
||||||
{
|
{
|
||||||
deleteZ(confindexer);
|
deleteZ(confindexer);
|
||||||
deleteZ(dbindexer);
|
deleteZ(fsindexer);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Global stop request flag. This is checked in a number of place in the
|
// Global stop request flag. This is checked in a number of place in the
|
||||||
@ -79,11 +81,11 @@ static void sigcleanup(int sig)
|
|||||||
stopindexing = 1;
|
stopindexing = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool makeDbIndexer(RclConfig *config)
|
static bool makeFsIndexer(RclConfig *config)
|
||||||
{
|
{
|
||||||
if (!dbindexer)
|
if (!fsindexer)
|
||||||
dbindexer = new DbIndexer(config, &updater);
|
fsindexer = new FsIndexer(config, &updater);
|
||||||
return dbindexer ? true : false;
|
return fsindexer ? true : false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The list of top directories/files wont change during program run,
|
// The list of top directories/files wont change during program run,
|
||||||
@ -95,7 +97,7 @@ static list<string> o_tdl;
|
|||||||
//
|
//
|
||||||
// This is called either from the command line or from the monitor. In
|
// This is called either from the command line or from the monitor. In
|
||||||
// this case we're called repeatedly in the same process, and the
|
// this case we're called repeatedly in the same process, and the
|
||||||
// dbIndexer is only created once by makeDbIndexer (but the db is
|
// fsindexer is only created once by makeFsIndexer (but the db is
|
||||||
// flushed anyway)
|
// flushed anyway)
|
||||||
bool indexfiles(RclConfig *config, const list<string> &filenames)
|
bool indexfiles(RclConfig *config, const list<string> &filenames)
|
||||||
{
|
{
|
||||||
@ -139,10 +141,10 @@ bool indexfiles(RclConfig *config, const list<string> &filenames)
|
|||||||
// go:
|
// go:
|
||||||
config->setKeyDir(path_getfather(*myfiles.begin()));
|
config->setKeyDir(path_getfather(*myfiles.begin()));
|
||||||
|
|
||||||
if (!makeDbIndexer(config))
|
if (!makeFsIndexer(config))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return dbindexer->indexFiles(myfiles);
|
return fsindexer->indexFiles(myfiles);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delete a list of files. Same comments about call contexts as indexfiles.
|
// Delete a list of files. Same comments about call contexts as indexfiles.
|
||||||
@ -173,21 +175,21 @@ bool purgefiles(RclConfig *config, const list<string> &filenames)
|
|||||||
// go:
|
// go:
|
||||||
config->setKeyDir(path_getfather(*myfiles.begin()));
|
config->setKeyDir(path_getfather(*myfiles.begin()));
|
||||||
|
|
||||||
if (!makeDbIndexer(config))
|
if (!makeFsIndexer(config))
|
||||||
return false;
|
return false;
|
||||||
return dbindexer->purgeFiles(myfiles);
|
return fsindexer->purgeFiles(myfiles);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create stemming and spelling databases
|
// Create stemming and spelling databases
|
||||||
bool createAuxDbs(RclConfig *config)
|
bool createAuxDbs(RclConfig *config)
|
||||||
{
|
{
|
||||||
if (!makeDbIndexer(config))
|
if (!makeFsIndexer(config))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!dbindexer->createStemmingDatabases())
|
if (!fsindexer->createStemmingDatabases())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!dbindexer->createAspellDict())
|
if (!fsindexer->createAspellDict())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@ -196,9 +198,9 @@ bool createAuxDbs(RclConfig *config)
|
|||||||
// Create additional stem database
|
// Create additional stem database
|
||||||
static bool createstemdb(RclConfig *config, const string &lang)
|
static bool createstemdb(RclConfig *config, const string &lang)
|
||||||
{
|
{
|
||||||
if (!makeDbIndexer(config))
|
if (!makeFsIndexer(config))
|
||||||
return false;
|
return false;
|
||||||
return dbindexer->createStemDb(lang);
|
return fsindexer->createStemDb(lang);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *thisprog;
|
static const char *thisprog;
|
||||||
@ -352,7 +354,7 @@ int main(int argc, const char **argv)
|
|||||||
} else if (op_flags & OPT_l) {
|
} else if (op_flags & OPT_l) {
|
||||||
if (argc != 0)
|
if (argc != 0)
|
||||||
Usage();
|
Usage();
|
||||||
list<string> stemmers = DbIndexer::getStemmerNames();
|
list<string> stemmers = FsIndexer::getStemmerNames();
|
||||||
for (list<string>::const_iterator it = stemmers.begin();
|
for (list<string>::const_iterator it = stemmers.begin();
|
||||||
it != stemmers.end(); it++) {
|
it != stemmers.end(); it++) {
|
||||||
cout << *it << endl;
|
cout << *it << endl;
|
||||||
@ -395,9 +397,9 @@ int main(int argc, const char **argv)
|
|||||||
|
|
||||||
#ifdef RCL_USE_ASPELL
|
#ifdef RCL_USE_ASPELL
|
||||||
} else if (op_flags & OPT_S) {
|
} else if (op_flags & OPT_S) {
|
||||||
if (!makeDbIndexer(config))
|
if (!makeFsIndexer(config))
|
||||||
exit(1);
|
exit(1);
|
||||||
exit(!dbindexer->createAspellDict());
|
exit(!fsindexer->createAspellDict());
|
||||||
#endif // ASPELL
|
#endif // ASPELL
|
||||||
} else if (op_flags & OPT_b) {
|
} else if (op_flags & OPT_b) {
|
||||||
BeagleQueueIndexer beagler(config);
|
BeagleQueueIndexer beagler(config);
|
||||||
|
|||||||
29
src/index/recollindex.h
Normal file
29
src/index/recollindex.h
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
/*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc.,
|
||||||
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
*/
|
||||||
|
#ifndef _recollindex_h_included_
|
||||||
|
#define _recollindex_h_included_
|
||||||
|
/* @(#$Id: $ (C) 2009 J.F.Dockes */
|
||||||
|
|
||||||
|
/** Helper methods in recollindex.cpp for initial checks/setup to index
|
||||||
|
* a list of files (either from the monitor or the command line) */
|
||||||
|
extern bool indexfiles(RclConfig *config, const list<string> &filenames);
|
||||||
|
extern bool purgefiles(RclConfig *config, const list<string> &filenames);
|
||||||
|
extern bool createAuxDbs(RclConfig *config);
|
||||||
|
|
||||||
|
extern int stopindexing;
|
||||||
|
|
||||||
|
#endif /* _recollindex_h_included_ */
|
||||||
Loading…
x
Reference in New Issue
Block a user