From 64b74d99d5f88aa442783ed383ceb5ef4d3ccfa8 Mon Sep 17 00:00:00 2001 From: dockes Date: Thu, 12 Oct 2006 14:46:02 +0000 Subject: [PATCH] recollindex -i now checks that the files are descendants of topdirs --- src/configure.ac | 2 +- src/index/indexer.cpp | 29 ++++++++++++++------- src/index/indexer.h | 5 +++- src/index/recollindex.cpp | 53 +++++++++++++++++++++++++++++++-------- 4 files changed, 68 insertions(+), 21 deletions(-) diff --git a/src/configure.ac b/src/configure.ac index fefab67e..92af523a 100644 --- a/src/configure.ac +++ b/src/configure.ac @@ -97,7 +97,7 @@ LIBXAPIAN=`$XAPIAN_CONFIG --libs` # libstdc++.la in the lib list for i in $LIBXAPIAN ; do case $i in - *stdc++*|-lm|-lgcc_s);; + *stdc++*|-lm|-lgcc_s|-lc);; *) tmpxaplib="$tmpxaplib $i";; esac done diff --git a/src/index/indexer.cpp b/src/index/indexer.cpp index d77b876e..27b8a903 100644 --- a/src/index/indexer.cpp +++ b/src/index/indexer.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: indexer.cpp,v 1.36 2006-10-11 14:16:25 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: indexer.cpp,v 1.37 2006-10-12 14:46:02 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -371,22 +371,33 @@ ConfIndexer::~ConfIndexer() deleteZ(m_dbindexer); } -bool ConfIndexer::index(bool resetbefore) +list topdirsToList(RclConfig *conf) { + list tdl; // Retrieve the list of directories to be indexed. string topdirs; - if (!m_config->getConfParam("topdirs", topdirs)) { + if (!conf->getConfParam("topdirs", topdirs)) { LOGERR(("ConfIndexer::index: no top directories in configuration\n")); - m_reason = "Top directory list (topdirs param.) not found in config"; - return false; + return tdl; } - list tdl; // List of directories to be indexed if (!stringToStrings(topdirs, tdl)) { LOGERR(("ConfIndexer::index: parse error for directory list\n")); - m_reason = "Directory list parse error"; + } + for (list::iterator it = tdl.begin(); it != tdl.end(); it++) { + *it = path_tildexpand(*it); + } + return tdl; +} + +bool ConfIndexer::index(bool resetbefore) +{ + list tdl = topdirsToList(m_config); + if (tdl.empty()) { + m_reason = "Top directory list (topdirs param.) not found in config" + "or Directory list parse error"; return false; } - + // Each top level directory to be indexed can be associated with a // different database. We first group the directories by database: // it is important that all directories for a database be indexed @@ -396,7 +407,7 @@ bool ConfIndexer::index(bool resetbefore) map >::iterator dbit; for (dirit = tdl.begin(); dirit != tdl.end(); dirit++) { string dbdir; - string doctopdir = path_tildexpand(*dirit); + string doctopdir = *dirit; { // Check top dirs. Must not be symlinks struct stat st; if (lstat(doctopdir.c_str(), &st) < 0) { diff --git a/src/index/indexer.h b/src/index/indexer.h index 8af1d2e8..ccbc737a 100644 --- a/src/index/indexer.h +++ b/src/index/indexer.h @@ -16,7 +16,7 @@ */ #ifndef _INDEXER_H_INCLUDED_ #define _INDEXER_H_INCLUDED_ -/* @(#$Id: indexer.h,v 1.17 2006-10-11 14:16:26 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: indexer.h,v 1.18 2006-10-12 14:46:02 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include @@ -138,4 +138,7 @@ class DbIndexer : public FsTreeWalkerCB { bool init(bool rst = false); }; +/** utility function to turn topdirs into a proper list */ +list topdirsToList(RclConfig *conf); + #endif /* _INDEXER_H_INCLUDED_ */ diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp index 0ee8c0ce..07fc11c2 100644 --- a/src/index/recollindex.cpp +++ b/src/index/recollindex.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.21 2006-10-11 14:16:26 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.22 2006-10-12 14:46:02 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -44,10 +44,10 @@ DbIndexer *dbindexer; static bool makeDbIndexer(RclConfig *config) { - if (dbindexer) + if (dbindexer) { delete dbindexer; - // Note that we do not bother to check for multiple databases, - // which are currently a fiction anyway. + dbindexer = 0; + } string dbdir = config->getDbDir(); if (dbdir.empty()) { fprintf(stderr, "makeDbIndexer: no database directory in " @@ -64,13 +64,46 @@ static bool indexfiles(RclConfig *config, const list &filenames) if (filenames.empty()) return true; - config->setKeyDir(path_getfather(*filenames.begin())); - - makeDbIndexer(config); - if (dbindexer) - return dbindexer->indexFiles(filenames); - else + list tdl = topdirsToList(config); + if (tdl.empty()) { + fprintf(stderr, "Top directory list (topdirs param.) not found in" + "config or Directory list parse error"); return false; + } + for (list::iterator dit= tdl.begin(); dit!= tdl.end(); dit++) { + *dit = path_canon(*dit); + } + + list myfiles; + for (list::const_iterator it = filenames.begin(); + it != filenames.end(); it++) { + string fn = path_canon(*it); + bool ok = false; + for (list::iterator dit= tdl.begin(); dit!= tdl.end(); dit++) { + if (fn.find(*dit) == 0) { + myfiles.push_back(fn); + ok = true; + break; + } + } + if (!ok) { + fprintf(stderr, "File %s not in indexed area\n", fn.c_str()); + } + } + if (myfiles.empty()) + return true; + + // Note: we should sort the file names against the topdirs here + // and check for different databases. But we can for now only have + // one database per config, so we set the keydir from the first + // file (which is not really needed...), create the indexer/db and + // go: + config->setKeyDir(path_getfather(*myfiles.begin())); + + if (!makeDbIndexer(config) || !dbindexer) + return false; + else + return dbindexer->indexFiles(myfiles); } // Create additional stem database