recollindex -i now checks that the files are descendants of topdirs

This commit is contained in:
dockes 2006-10-12 14:46:02 +00:00
parent 2c9cc166de
commit 64b74d99d5
4 changed files with 68 additions and 21 deletions

View File

@ -97,7 +97,7 @@ LIBXAPIAN=`$XAPIAN_CONFIG --libs`
# libstdc++.la in the lib list # libstdc++.la in the lib list
for i in $LIBXAPIAN ; do for i in $LIBXAPIAN ; do
case $i in case $i in
*stdc++*|-lm|-lgcc_s);; *stdc++*|-lm|-lgcc_s|-lc);;
*) tmpxaplib="$tmpxaplib $i";; *) tmpxaplib="$tmpxaplib $i";;
esac esac
done done

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.36 2006-10-11 14:16:25 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: indexer.cpp,v 1.37 2006-10-12 14:46:02 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -371,19 +371,30 @@ ConfIndexer::~ConfIndexer()
deleteZ(m_dbindexer); deleteZ(m_dbindexer);
} }
bool ConfIndexer::index(bool resetbefore) list<string> topdirsToList(RclConfig *conf)
{ {
list<string> tdl;
// Retrieve the list of directories to be indexed. // Retrieve the list of directories to be indexed.
string topdirs; string topdirs;
if (!m_config->getConfParam("topdirs", topdirs)) { if (!conf->getConfParam("topdirs", topdirs)) {
LOGERR(("ConfIndexer::index: no top directories in configuration\n")); LOGERR(("ConfIndexer::index: no top directories in configuration\n"));
m_reason = "Top directory list (topdirs param.) not found in config"; return tdl;
return false;
} }
list<string> tdl; // List of directories to be indexed
if (!stringToStrings(topdirs, tdl)) { if (!stringToStrings(topdirs, tdl)) {
LOGERR(("ConfIndexer::index: parse error for directory list\n")); LOGERR(("ConfIndexer::index: parse error for directory list\n"));
m_reason = "Directory list parse error"; }
for (list<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
*it = path_tildexpand(*it);
}
return tdl;
}
bool ConfIndexer::index(bool resetbefore)
{
list<string> tdl = topdirsToList(m_config);
if (tdl.empty()) {
m_reason = "Top directory list (topdirs param.) not found in config"
"or Directory list parse error";
return false; return false;
} }
@ -396,7 +407,7 @@ bool ConfIndexer::index(bool resetbefore)
map<string, list<string> >::iterator dbit; map<string, list<string> >::iterator dbit;
for (dirit = tdl.begin(); dirit != tdl.end(); dirit++) { for (dirit = tdl.begin(); dirit != tdl.end(); dirit++) {
string dbdir; string dbdir;
string doctopdir = path_tildexpand(*dirit); string doctopdir = *dirit;
{ // Check top dirs. Must not be symlinks { // Check top dirs. Must not be symlinks
struct stat st; struct stat st;
if (lstat(doctopdir.c_str(), &st) < 0) { if (lstat(doctopdir.c_str(), &st) < 0) {

View File

@ -16,7 +16,7 @@
*/ */
#ifndef _INDEXER_H_INCLUDED_ #ifndef _INDEXER_H_INCLUDED_
#define _INDEXER_H_INCLUDED_ #define _INDEXER_H_INCLUDED_
/* @(#$Id: indexer.h,v 1.17 2006-10-11 14:16:26 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: indexer.h,v 1.18 2006-10-12 14:46:02 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string> #include <string>
#include <list> #include <list>
@ -138,4 +138,7 @@ class DbIndexer : public FsTreeWalkerCB {
bool init(bool rst = false); bool init(bool rst = false);
}; };
/** utility function to turn topdirs into a proper list */
list<string> topdirsToList(RclConfig *conf);
#endif /* _INDEXER_H_INCLUDED_ */ #endif /* _INDEXER_H_INCLUDED_ */

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.21 2006-10-11 14:16:26 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.22 2006-10-12 14:46:02 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -44,10 +44,10 @@ DbIndexer *dbindexer;
static bool makeDbIndexer(RclConfig *config) static bool makeDbIndexer(RclConfig *config)
{ {
if (dbindexer) if (dbindexer) {
delete dbindexer; delete dbindexer;
// Note that we do not bother to check for multiple databases, dbindexer = 0;
// which are currently a fiction anyway. }
string dbdir = config->getDbDir(); string dbdir = config->getDbDir();
if (dbdir.empty()) { if (dbdir.empty()) {
fprintf(stderr, "makeDbIndexer: no database directory in " fprintf(stderr, "makeDbIndexer: no database directory in "
@ -64,13 +64,46 @@ static bool indexfiles(RclConfig *config, const list<string> &filenames)
if (filenames.empty()) if (filenames.empty())
return true; return true;
config->setKeyDir(path_getfather(*filenames.begin())); list<string> tdl = topdirsToList(config);
if (tdl.empty()) {
makeDbIndexer(config); fprintf(stderr, "Top directory list (topdirs param.) not found in"
if (dbindexer) "config or Directory list parse error");
return dbindexer->indexFiles(filenames);
else
return false; return false;
}
for (list<string>::iterator dit= tdl.begin(); dit!= tdl.end(); dit++) {
*dit = path_canon(*dit);
}
list<string> myfiles;
for (list<string>::const_iterator it = filenames.begin();
it != filenames.end(); it++) {
string fn = path_canon(*it);
bool ok = false;
for (list<string>::iterator dit= tdl.begin(); dit!= tdl.end(); dit++) {
if (fn.find(*dit) == 0) {
myfiles.push_back(fn);
ok = true;
break;
}
}
if (!ok) {
fprintf(stderr, "File %s not in indexed area\n", fn.c_str());
}
}
if (myfiles.empty())
return true;
// Note: we should sort the file names against the topdirs here
// and check for different databases. But we can for now only have
// one database per config, so we set the keydir from the first
// file (which is not really needed...), create the indexer/db and
// go:
config->setKeyDir(path_getfather(*myfiles.begin()));
if (!makeDbIndexer(config) || !dbindexer)
return false;
else
return dbindexer->indexFiles(myfiles);
} }
// Create additional stem database // Create additional stem database