recollindex -i now checks that the files are descendants of topdirs

This commit is contained in:
dockes 2006-10-12 14:46:02 +00:00
parent 2c9cc166de
commit 64b74d99d5
4 changed files with 68 additions and 21 deletions

View File

@ -97,7 +97,7 @@ LIBXAPIAN=`$XAPIAN_CONFIG --libs`
# libstdc++.la in the lib list
for i in $LIBXAPIAN ; do
case $i in
*stdc++*|-lm|-lgcc_s);;
*stdc++*|-lm|-lgcc_s|-lc);;
*) tmpxaplib="$tmpxaplib $i";;
esac
done

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.36 2006-10-11 14:16:25 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.37 2006-10-12 14:46:02 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -371,22 +371,33 @@ ConfIndexer::~ConfIndexer()
deleteZ(m_dbindexer);
}
bool ConfIndexer::index(bool resetbefore)
list<string> topdirsToList(RclConfig *conf)
{
list<string> tdl;
// Retrieve the list of directories to be indexed.
string topdirs;
if (!m_config->getConfParam("topdirs", topdirs)) {
if (!conf->getConfParam("topdirs", topdirs)) {
LOGERR(("ConfIndexer::index: no top directories in configuration\n"));
m_reason = "Top directory list (topdirs param.) not found in config";
return false;
return tdl;
}
list<string> tdl; // List of directories to be indexed
if (!stringToStrings(topdirs, tdl)) {
LOGERR(("ConfIndexer::index: parse error for directory list\n"));
m_reason = "Directory list parse error";
}
for (list<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
*it = path_tildexpand(*it);
}
return tdl;
}
bool ConfIndexer::index(bool resetbefore)
{
list<string> tdl = topdirsToList(m_config);
if (tdl.empty()) {
m_reason = "Top directory list (topdirs param.) not found in config"
"or Directory list parse error";
return false;
}
// Each top level directory to be indexed can be associated with a
// different database. We first group the directories by database:
// it is important that all directories for a database be indexed
@ -396,7 +407,7 @@ bool ConfIndexer::index(bool resetbefore)
map<string, list<string> >::iterator dbit;
for (dirit = tdl.begin(); dirit != tdl.end(); dirit++) {
string dbdir;
string doctopdir = path_tildexpand(*dirit);
string doctopdir = *dirit;
{ // Check top dirs. Must not be symlinks
struct stat st;
if (lstat(doctopdir.c_str(), &st) < 0) {

View File

@ -16,7 +16,7 @@
*/
#ifndef _INDEXER_H_INCLUDED_
#define _INDEXER_H_INCLUDED_
/* @(#$Id: indexer.h,v 1.17 2006-10-11 14:16:26 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: indexer.h,v 1.18 2006-10-12 14:46:02 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include <list>
@ -138,4 +138,7 @@ class DbIndexer : public FsTreeWalkerCB {
bool init(bool rst = false);
};
/** utility function to turn topdirs into a proper list */
list<string> topdirsToList(RclConfig *conf);
#endif /* _INDEXER_H_INCLUDED_ */

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.21 2006-10-11 14:16:26 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.22 2006-10-12 14:46:02 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -44,10 +44,10 @@ DbIndexer *dbindexer;
static bool makeDbIndexer(RclConfig *config)
{
if (dbindexer)
if (dbindexer) {
delete dbindexer;
// Note that we do not bother to check for multiple databases,
// which are currently a fiction anyway.
dbindexer = 0;
}
string dbdir = config->getDbDir();
if (dbdir.empty()) {
fprintf(stderr, "makeDbIndexer: no database directory in "
@ -64,13 +64,46 @@ static bool indexfiles(RclConfig *config, const list<string> &filenames)
if (filenames.empty())
return true;
config->setKeyDir(path_getfather(*filenames.begin()));
makeDbIndexer(config);
if (dbindexer)
return dbindexer->indexFiles(filenames);
else
list<string> tdl = topdirsToList(config);
if (tdl.empty()) {
fprintf(stderr, "Top directory list (topdirs param.) not found in"
"config or Directory list parse error");
return false;
}
for (list<string>::iterator dit= tdl.begin(); dit!= tdl.end(); dit++) {
*dit = path_canon(*dit);
}
list<string> myfiles;
for (list<string>::const_iterator it = filenames.begin();
it != filenames.end(); it++) {
string fn = path_canon(*it);
bool ok = false;
for (list<string>::iterator dit= tdl.begin(); dit!= tdl.end(); dit++) {
if (fn.find(*dit) == 0) {
myfiles.push_back(fn);
ok = true;
break;
}
}
if (!ok) {
fprintf(stderr, "File %s not in indexed area\n", fn.c_str());
}
}
if (myfiles.empty())
return true;
// Note: we should sort the file names against the topdirs here
// and check for different databases. But we can for now only have
// one database per config, so we set the keydir from the first
// file (which is not really needed...), create the indexer/db and
// go:
config->setKeyDir(path_getfather(*myfiles.begin()));
if (!makeDbIndexer(config) || !dbindexer)
return false;
else
return dbindexer->indexFiles(myfiles);
}
// Create additional stem database