implemented stem databases
This commit is contained in:
parent
3fc0738c81
commit
1a897c47b3
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.5 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.6 2005-02-10 15:21:12 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <sys/stat.h>
|
||||
@ -34,7 +34,7 @@ using namespace std;
|
||||
/**
|
||||
* Bunch holder for data used while indexing a directory tree
|
||||
*/
|
||||
class DbIndexer {
|
||||
class DbIndexer : public FsTreeWalkerCB {
|
||||
FsTreeWalker walker;
|
||||
RclConfig *config;
|
||||
string dbdir;
|
||||
@ -46,7 +46,7 @@ class DbIndexer {
|
||||
: config(cnf), dbdir(dbd), topdirs(top)
|
||||
{ }
|
||||
|
||||
~DbIndexer() {
|
||||
virtual ~DbIndexer() {
|
||||
if (tmpdir.length()) {
|
||||
wipedir(tmpdir);
|
||||
if (rmdir(tmpdir.c_str()) < 0) {
|
||||
@ -55,9 +55,9 @@ class DbIndexer {
|
||||
}
|
||||
}
|
||||
}
|
||||
friend FsTreeWalker::Status
|
||||
indexfile(void *, const std::string &, const struct stat *,
|
||||
FsTreeWalker::CbFlag);
|
||||
|
||||
FsTreeWalker::Status
|
||||
processone(const std::string &, const struct stat *, FsTreeWalker::CbFlag);
|
||||
|
||||
bool index();
|
||||
};
|
||||
@ -79,7 +79,7 @@ bool DbIndexer::index()
|
||||
it != topdirs->end(); it++) {
|
||||
LOGDEB(("DbIndexer::index: Indexing %s into %s\n", it->c_str(),
|
||||
dbdir.c_str()));
|
||||
if (walker.walk(*it, indexfile, this) != FsTreeWalker::FtwOk) {
|
||||
if (walker.walk(*it, *this) != FsTreeWalker::FtwOk) {
|
||||
LOGERR(("DbIndexer::index: error while indexing %s\n",
|
||||
it->c_str()));
|
||||
db.close();
|
||||
@ -87,6 +87,18 @@ bool DbIndexer::index()
|
||||
}
|
||||
}
|
||||
db.purge();
|
||||
|
||||
// Create stemming databases
|
||||
string slangs;
|
||||
if (config->getConfParam("indexstemminglanguages", slangs)) {
|
||||
list<string> langs;
|
||||
ConfTree::stringToStrings(slangs, langs);
|
||||
for (list<string>::const_iterator it = langs.begin();
|
||||
it != langs.end(); it++) {
|
||||
db.createStemDb(*it);
|
||||
}
|
||||
}
|
||||
|
||||
if (!db.close()) {
|
||||
LOGERR(("DbIndexer::index: error closing database in %s\n",
|
||||
dbdir.c_str()));
|
||||
@ -105,26 +117,24 @@ bool DbIndexer::index()
|
||||
* the actual indexing work.
|
||||
*/
|
||||
FsTreeWalker::Status
|
||||
indexfile(void *cdata, const std::string &fn, const struct stat *stp,
|
||||
FsTreeWalker::CbFlag flg)
|
||||
DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
||||
FsTreeWalker::CbFlag flg)
|
||||
{
|
||||
DbIndexer *me = (DbIndexer *)cdata;
|
||||
|
||||
// If we're changing directories, possibly adjust parameters.
|
||||
if (flg == FsTreeWalker::FtwDirEnter ||
|
||||
flg == FsTreeWalker::FtwDirReturn) {
|
||||
me->config->setKeyDir(fn);
|
||||
config->setKeyDir(fn);
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
|
||||
// Check db up to date ?
|
||||
if (!me->db.needUpdate(fn, stp)) {
|
||||
if (!db.needUpdate(fn, stp)) {
|
||||
LOGDEB(("indexfile: up to date: %s\n", fn.c_str()));
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
|
||||
Rcl::Doc doc;
|
||||
if (!internfile(fn, me->config, doc, me->tmpdir))
|
||||
if (!internfile(fn, config, doc, tmpdir))
|
||||
return FsTreeWalker::FtwOk;
|
||||
|
||||
// Set up common fields:
|
||||
@ -133,7 +143,7 @@ indexfile(void *cdata, const std::string &fn, const struct stat *stp,
|
||||
doc.mtime = ascdate;
|
||||
|
||||
// Do database-specific work to update document data
|
||||
if (!me->db.add(fn, doc))
|
||||
if (!db.add(fn, doc))
|
||||
return FsTreeWalker::FtwError;
|
||||
|
||||
return FsTreeWalker::FtwOk;
|
||||
|
||||
@ -86,11 +86,7 @@ class myTextSplitCB : public TextSplitCB {
|
||||
static string plaintorich(const string &in, const list<string>& terms,
|
||||
list<pair<int, int> >&termoffsets)
|
||||
{
|
||||
{string t;
|
||||
for (list<string>::const_iterator it = terms.begin();
|
||||
it != terms.end();it++) t += "'" + *it + "' ";
|
||||
LOGDEB(("plaintorich: terms: %s\n", t.c_str()));
|
||||
}
|
||||
LOGDEB(("plaintorich: terms: %s\n", stringlistdisp(terms).c_str()));
|
||||
|
||||
myTextSplitCB cb(terms);
|
||||
TextSplit splitter(&cb, true);
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.23 2005-02-08 14:45:54 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.24 2005-02-10 15:21:12 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <sys/stat.h>
|
||||
@ -16,8 +16,11 @@ using namespace std;
|
||||
#include "unacpp.h"
|
||||
#include "conftree.h"
|
||||
#include "debuglog.h"
|
||||
#include "pathut.h"
|
||||
#include "smallut.h"
|
||||
|
||||
#include "xapian.h"
|
||||
#include <xapian/stem.h>
|
||||
|
||||
// Data for a xapian database. There could actually be 2 different
|
||||
// ones for indexing or query as there is not much in common.
|
||||
@ -25,6 +28,8 @@ class Native {
|
||||
public:
|
||||
bool isopen;
|
||||
bool iswritable;
|
||||
string basedir;
|
||||
|
||||
// Indexing
|
||||
Xapian::WritableDatabase wdb;
|
||||
vector<bool> updated;
|
||||
@ -102,9 +107,6 @@ bool Rcl::Db::open(const string& dir, OpenMode mode)
|
||||
ndb->iswritable = true;
|
||||
break;
|
||||
case DbTrunc:
|
||||
ndb->wdb =
|
||||
Xapian::WritableDatabase(dir, Xapian::DB_CREATE_OR_OVERWRITE);
|
||||
ndb->iswritable = true;
|
||||
break;
|
||||
case DbRO:
|
||||
default:
|
||||
@ -113,6 +115,7 @@ bool Rcl::Db::open(const string& dir, OpenMode mode)
|
||||
break;
|
||||
}
|
||||
ndb->isopen = true;
|
||||
ndb->basedir = dir;
|
||||
return true;
|
||||
} catch (const Xapian::Error &e) {
|
||||
ermsg = e.get_msg();
|
||||
@ -399,17 +402,152 @@ bool Rcl::Db::needUpdate(const string &filename, const struct stat *stp)
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Compute name of stem db for given base database and language
|
||||
static string stemdbname(const string& basename, string lang)
|
||||
{
|
||||
string nm = basename;
|
||||
path_cat(nm, string("stem_") + lang);
|
||||
return nm;
|
||||
}
|
||||
|
||||
// Is char non-lowercase ascii ?
|
||||
inline static bool
|
||||
p_notlowerorutf(unsigned int c)
|
||||
{
|
||||
if (c < 'a' || (c > 'z' && c < 128))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create database of stem to parents associations for a given language.
|
||||
* We walk the list of all terms, stem them, and create another Xapian db
|
||||
* with documents indexed by a single term (the stem), and with the list of
|
||||
* parent terms in the document data.
|
||||
*/
|
||||
bool Rcl::Db::createStemDb(const string& lang)
|
||||
{
|
||||
LOGDEB(("Rcl::Db::createStemDb(%s)\n", lang.c_str()));
|
||||
if (pdata == 0)
|
||||
return false;
|
||||
Native *ndb = (Native *)pdata;
|
||||
if (ndb->isopen == false || ndb->iswritable == false)
|
||||
return false;
|
||||
|
||||
// First build the in-memory stem database:
|
||||
// We walk the list of all terms, and stem each.
|
||||
// If the stem is identical to the term, no need to create an entry
|
||||
// Else, we add an entry to the multimap.
|
||||
// At the end, we only save stem-terms associations with several terms, the
|
||||
// others are not useful
|
||||
multimap<string, string> assocs;
|
||||
// Statistics
|
||||
int nostem=0; // Dont even try: not-alphanum (incomplete for now)
|
||||
int stemconst=0; // Stem == term
|
||||
int stemdiff=0; // Count of all different stems
|
||||
int stemmultiple = 0; // Count of stems with multiple derivatives
|
||||
try {
|
||||
Xapian::Stem stemmer(lang);
|
||||
Xapian::TermIterator it;
|
||||
for (it = ndb->wdb.allterms_begin();
|
||||
it != ndb->wdb.allterms_end(); it++) {
|
||||
// If it has any non-lowercase 7bit char, cant be stemmable
|
||||
string::iterator sit = (*it).begin(), eit = sit + (*it).length();
|
||||
if ((sit = find_if(sit, eit, p_notlowerorutf)) != eit) {
|
||||
++nostem;
|
||||
// LOGDEB(("stemskipped: '%s', because of 0x%x\n",
|
||||
// (*it).c_str(), *sit));
|
||||
continue;
|
||||
}
|
||||
string stem = stemmer.stem_word(*it);
|
||||
//cerr << "word " << *it << " stem " << stem << endl;
|
||||
if (stem == *it) {
|
||||
++stemconst;
|
||||
continue;
|
||||
}
|
||||
assocs.insert(pair<string,string>(stem, *it));
|
||||
}
|
||||
} catch (...) {
|
||||
LOGERR(("Stem database build failed: no stemmer for %s ? \n",
|
||||
lang.c_str()));
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create xapian database for stem relations
|
||||
string stemdbdir = stemdbname(ndb->basedir, lang);
|
||||
string ermsg = "NOERROR";
|
||||
Xapian::WritableDatabase sdb;
|
||||
try {
|
||||
sdb = Xapian::WritableDatabase(stemdbdir,
|
||||
Xapian::DB_CREATE_OR_OVERWRITE);
|
||||
} catch (const Xapian::Error &e) {
|
||||
ermsg = e.get_msg();
|
||||
} catch (const string &s) {
|
||||
ermsg = s;
|
||||
} catch (const char *s) {
|
||||
ermsg = s;
|
||||
} catch (...) {
|
||||
ermsg = "Caught unknown exception";
|
||||
}
|
||||
if (ermsg != "NOERROR") {
|
||||
LOGERR(("Rcl::Db::createstemdb: exception while opening '%s': %s\n",
|
||||
stemdbdir.c_str(), ermsg.c_str()));
|
||||
return false;
|
||||
}
|
||||
|
||||
// Enter pseud-docs in db. Walk the multimap, only enter
|
||||
// associations where there are several parent terms
|
||||
string stem;
|
||||
list<string> derivs;
|
||||
for (multimap<string,string>::const_iterator it = assocs.begin();
|
||||
it != assocs.end(); it++) {
|
||||
if (stem == it->first) {
|
||||
// Staying with same stem
|
||||
derivs.push_back(it->second);
|
||||
// cerr << " " << it->second << endl;
|
||||
} else {
|
||||
// Changing stems
|
||||
++stemdiff;
|
||||
if (derivs.size() > 1) {
|
||||
// Previous stem has multiple derivatives. Enter in db
|
||||
++stemmultiple;
|
||||
Xapian::Document newdocument;
|
||||
newdocument.add_term(stem);
|
||||
// The doc data is just parents=blank-separated-list
|
||||
string record = "parents=";
|
||||
for (list<string>::const_iterator it = derivs.begin();
|
||||
it != derivs.end(); it++) {
|
||||
record += *it + " ";
|
||||
}
|
||||
record += "\n";
|
||||
LOGDEB1(("stemdocument data: %s\n", record.c_str()));
|
||||
newdocument.set_data(record);
|
||||
try {
|
||||
sdb.replace_document(stem, newdocument);
|
||||
} catch (...) {
|
||||
LOGERR(("Rcl::Db::createstemdb: replace failed\n"));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
derivs.clear();
|
||||
stem = it->first;
|
||||
derivs.push_back(it->second);
|
||||
// cerr << "\n" << stem << " " << it->second;
|
||||
}
|
||||
}
|
||||
LOGDEB(("Stem map size: %d stems %d mult %d no %d const %d\n",
|
||||
assocs.size(), stemdiff, stemmultiple, nostem, stemconst));
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is called at the end of an indexing session, to delete the
|
||||
* documents for files that are no longer there. We also build the
|
||||
* stem database while we are at it.
|
||||
*/
|
||||
bool Rcl::Db::purge()
|
||||
{
|
||||
LOGDEB(("Rcl::Db::purge\n"));
|
||||
// There seems to be problems with the document delete code, when
|
||||
// we do this, the database is not actually updated. Especially,
|
||||
// if we delete a bunch of docs, so that there is a hole in the
|
||||
// docids at the beginning, we can't add anything (appears to work
|
||||
// and does nothing). Maybe related to the exceptions below when
|
||||
// trying to delete an unexistant document ?
|
||||
// Flushing before trying the deletes seeems to work around the problem
|
||||
|
||||
if (pdata == 0)
|
||||
return false;
|
||||
Native *ndb = (Native *)pdata;
|
||||
@ -418,6 +556,13 @@ bool Rcl::Db::purge()
|
||||
if (ndb->isopen == false || ndb->iswritable == false)
|
||||
return false;
|
||||
|
||||
// There seems to be problems with the document delete code, when
|
||||
// we do this, the database is not actually updated. Especially,
|
||||
// if we delete a bunch of docs, so that there is a hole in the
|
||||
// docids at the beginning, we can't add anything (appears to work
|
||||
// and does nothing). Maybe related to the exceptions below when
|
||||
// trying to delete an unexistant document ?
|
||||
// Flushing before trying the deletes seeems to work around the problem
|
||||
ndb->wdb.flush();
|
||||
for (Xapian::docid did = 1; did < ndb->updated.size(); ++did) {
|
||||
if (!ndb->updated[did]) {
|
||||
@ -429,6 +574,7 @@ bool Rcl::Db::purge()
|
||||
}
|
||||
}
|
||||
}
|
||||
ndb->wdb.flush();
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -446,46 +592,57 @@ class wsQData : public TextSplitCB {
|
||||
return s;
|
||||
}
|
||||
bool takeword(const std::string &term, int , int, int) {
|
||||
LOGDEB(("Takeword: %s\n", term.c_str()));
|
||||
LOGDEB1(("wsQData::takeword: %s\n", term.c_str()));
|
||||
terms.push_back(term);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
#include <xapian/stem.h>
|
||||
|
||||
// Expand term to list of all terms which expand to the same term.
|
||||
// This is currently awfully inefficient as we actually stem the whole
|
||||
// db term list ! Need to build an efficient structure when finishing
|
||||
// indexing, but good enough for testing
|
||||
// Expand term to list of all terms which stem to the same term.
|
||||
static list<string> stemexpand(Native *ndb, string term, const string& lang)
|
||||
{
|
||||
list<string> explist;
|
||||
try {
|
||||
Xapian::Stem stemmer(lang);
|
||||
string stem = stemmer.stem_word(term);
|
||||
LOGDEB(("stemexpand: term '%s' stem '%s'\n",
|
||||
term.c_str(), stem.c_str()));
|
||||
Xapian::TermIterator it;
|
||||
for (it = ndb->db.allterms_begin();
|
||||
it != ndb->db.allterms_end(); it++) {
|
||||
string stem1 = stemmer.stem_word(*it);
|
||||
if (stem == stem1)
|
||||
explist.push_back(*it);
|
||||
}
|
||||
if (explist.size() == 0)
|
||||
LOGDEB(("stemexpand: '%s' -> '%s'\n", term.c_str(), stem.c_str()));
|
||||
// Try to fetch the doc from the stem db
|
||||
string stemdbdir = stemdbname(ndb->basedir, lang);
|
||||
Xapian::Database sdb(stemdbdir);
|
||||
LOGDEB1(("Rcl::Db::stemexpand: %s lastdocid: %d\n",
|
||||
stemdbdir.c_str(), sdb.get_lastdocid()));
|
||||
if (!sdb.term_exists(stem)) {
|
||||
LOGDEB1(("Rcl::Db::stemexpand: no term for %s\n", stem.c_str()));
|
||||
explist.push_back(term);
|
||||
if (1) {
|
||||
string expanded;
|
||||
for (list<string>::const_iterator it = explist.begin();
|
||||
it != explist.end(); it++) {
|
||||
expanded += *it + " ";
|
||||
}
|
||||
LOGDEB(("stemexpand: expanded list: %s\n", expanded.c_str()));
|
||||
return explist;
|
||||
}
|
||||
Xapian::PostingIterator did = sdb.postlist_begin(stem);
|
||||
if (did == sdb.postlist_end(stem)) {
|
||||
LOGDEB1(("Rcl::Db::stemexpand: no term(1) for %s\n",stem.c_str()));
|
||||
explist.push_back(term);
|
||||
return explist;
|
||||
}
|
||||
Xapian::Document doc = sdb.get_document(*did);
|
||||
string data = doc.get_data();
|
||||
// No need for a conftree, but we need to massage the data a little
|
||||
string::size_type pos = data.find_first_of("=");
|
||||
++pos;
|
||||
string::size_type pos1 = data.find_last_of("\n");
|
||||
if (pos == string::npos || pos1 == string::npos ||pos1 <= pos) { // ??
|
||||
explist.push_back(term);
|
||||
return explist;
|
||||
}
|
||||
ConfTree::stringToStrings(data.substr(pos, pos1-pos), explist);
|
||||
if (find(explist.begin(), explist.end(), term) == explist.end()) {
|
||||
explist.push_back(term);
|
||||
}
|
||||
LOGDEB(("Rcl::Db::stemexpand: %s -> %s\n", stem.c_str(),
|
||||
stringlistdisp(explist).c_str()));
|
||||
} catch (...) {
|
||||
LOGERR(("Stemming failed: no stemmer for %s ? \n", lang.c_str()));
|
||||
LOGERR(("stemexpand: error accessing stem db\n"));
|
||||
explist.push_back(term);
|
||||
return explist;
|
||||
}
|
||||
return explist;
|
||||
}
|
||||
@ -519,7 +676,8 @@ bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts,
|
||||
wsQData splitData;
|
||||
TextSplit splitter(&splitData, true);
|
||||
splitter.text_to_words(*it);
|
||||
LOGDEB(("Splitter term count: %d\n", splitData.terms.size()));
|
||||
LOGDEB1(("Rcl::Db::setquery: splitter term count: %d\n",
|
||||
splitData.terms.size()));
|
||||
switch(splitData.terms.size()) {
|
||||
case 0: continue;// ??
|
||||
case 1: {
|
||||
@ -578,7 +736,7 @@ int Rcl::Db::getResCnt()
|
||||
|
||||
bool Rcl::Db::getDoc(int i, Doc &doc, int *percent)
|
||||
{
|
||||
LOGDEB(("Rcl::Db::getDoc: %d\n", i));
|
||||
LOGDEB1(("Rcl::Db::getDoc: %d\n", i));
|
||||
Native *ndb = (Native *)pdata;
|
||||
if (!ndb || !ndb->enquire) {
|
||||
LOGERR(("Rcl::Db::getDoc: no query opened\n"));
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#ifndef _DB_H_INCLUDED_
|
||||
#define _DB_H_INCLUDED_
|
||||
/* @(#$Id: rcldb.h,v 1.11 2005-02-08 14:45:54 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: rcldb.h,v 1.12 2005-02-10 15:21:12 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
@ -72,6 +72,7 @@ class Db {
|
||||
bool add(const string &filename, const Doc &doc);
|
||||
bool needUpdate(const string &filename, const struct stat *stp);
|
||||
bool purge();
|
||||
bool createStemDb(const string &lang);
|
||||
|
||||
// Query-related functions
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: execmd.cpp,v 1.4 2005-02-08 09:34:47 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: execmd.cpp,v 1.5 2005-02-10 15:21:12 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
#ifndef TEST_EXECMD
|
||||
#include <unistd.h>
|
||||
@ -131,7 +131,7 @@ ExecCmd::doexec(const string &cmd, const list<string> args,
|
||||
close(pipeout[0]);
|
||||
if (pipeout[1] >= 0)
|
||||
close(pipeout[1]);
|
||||
LOGDEB(("ExecCmd::doexec: father got status 0x%x\n", status));
|
||||
LOGDEB1(("ExecCmd::doexec: father got status 0x%x\n", status));
|
||||
return status;
|
||||
} else {
|
||||
if (input) {
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: fstreewalk.cpp,v 1.2 2004-12-12 08:58:12 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: fstreewalk.cpp,v 1.3 2005-02-10 15:21:12 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#ifndef TEST_FSTREEWALK
|
||||
@ -53,8 +53,8 @@ int FsTreeWalker::getErrCnt()
|
||||
return data->errors;
|
||||
}
|
||||
|
||||
FsTreeWalker::Status FsTreeWalker::walk(const string &top, CbType fun,
|
||||
void *cdata)
|
||||
FsTreeWalker::Status FsTreeWalker::walk(const string &top,
|
||||
FsTreeWalkerCB& cb)
|
||||
{
|
||||
Status status = FtwOk;
|
||||
struct stat st;
|
||||
@ -68,12 +68,12 @@ FsTreeWalker::Status FsTreeWalker::walk(const string &top, CbType fun,
|
||||
return FtwError;
|
||||
}
|
||||
if (S_ISDIR(st.st_mode)) {
|
||||
if ((status = fun(cdata, top, &st, FtwDirEnter)) &
|
||||
if ((status = cb.processone(top, &st, FtwDirEnter)) &
|
||||
(FtwStop|FtwError)) {
|
||||
return status;
|
||||
}
|
||||
} else if (S_ISREG(st.st_mode)) {
|
||||
return fun(cdata, top, &st, FtwRegular);
|
||||
return cb.processone(top, &st, FtwRegular);
|
||||
} else {
|
||||
return status;
|
||||
}
|
||||
@ -110,17 +110,17 @@ FsTreeWalker::Status FsTreeWalker::walk(const string &top, CbType fun,
|
||||
}
|
||||
if (S_ISDIR(st.st_mode)) {
|
||||
if (data->options & FtwNoRecurse) {
|
||||
status = fun(cdata, fn, &st, FtwDirEnter);
|
||||
status = cb.processone(fn, &st, FtwDirEnter);
|
||||
} else {
|
||||
status=walk(fn, fun, cdata);
|
||||
status=walk(fn, cb);
|
||||
}
|
||||
if (status & (FtwStop|FtwError))
|
||||
goto out;
|
||||
if ((status = fun(cdata, top, &st, FtwDirReturn))
|
||||
if ((status = cb.processone(top, &st, FtwDirReturn))
|
||||
& (FtwStop|FtwError))
|
||||
goto out;
|
||||
} else if (S_ISREG(st.st_mode)) {
|
||||
if ((status = fun(cdata, fn, &st, FtwRegular)) &
|
||||
if ((status = cb.processone(fn, &st, FtwRegular)) &
|
||||
(FtwStop|FtwError)) {
|
||||
goto out;
|
||||
}
|
||||
@ -143,17 +143,22 @@ FsTreeWalker::Status FsTreeWalker::walk(const string &top, CbType fun,
|
||||
|
||||
using namespace std;
|
||||
|
||||
FsTreeWalker::Status walkfunc(void *, const string &path,
|
||||
const struct stat *st,
|
||||
FsTreeWalker::CbFlag flg)
|
||||
{
|
||||
if (flg == FsTreeWalker::FtwDirEnter) {
|
||||
cout << "[Entering " << path << "]" << endl;
|
||||
} else if (flg == FsTreeWalker::FtwRegular) {
|
||||
cout << path << endl;
|
||||
class myCB : public FsTreeWalkerCB {
|
||||
public:
|
||||
FsTreeWalker::Status processone(const string &path,
|
||||
const struct stat *st,
|
||||
FsTreeWalker::CbFlag flg)
|
||||
{
|
||||
if (flg == FsTreeWalker::FtwDirEnter) {
|
||||
cout << "[Entering " << path << "]" << endl;
|
||||
} else if (flg == FsTreeWalker::FtwDirReturn) {
|
||||
cout << "[Returning to " << path << "]" << endl;
|
||||
} else if (flg == FsTreeWalker::FtwRegular) {
|
||||
cout << path << endl;
|
||||
}
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
};
|
||||
|
||||
int main(int argc, const char **argv)
|
||||
{
|
||||
@ -162,7 +167,8 @@ int main(int argc, const char **argv)
|
||||
exit(1);
|
||||
}
|
||||
FsTreeWalker walker;
|
||||
walker.walk(argv[1], walkfunc, 0);
|
||||
myCB cb;
|
||||
walker.walk(argv[1], cb);
|
||||
if (walker.getErrCnt() > 0)
|
||||
cout << walker.getReason();
|
||||
}
|
||||
|
||||
@ -1,9 +1,14 @@
|
||||
#ifndef _FSTREEWALK_H_INCLUDED_
|
||||
#define _FSTREEWALK_H_INCLUDED_
|
||||
/* @(#$Id: fstreewalk.h,v 1.1 2004-12-10 18:13:13 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: fstreewalk.h,v 1.2 2005-02-10 15:21:12 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
|
||||
#ifndef NO_NAMESPACES
|
||||
using std::string;
|
||||
#endif
|
||||
|
||||
class FsTreeWalkerCB;
|
||||
|
||||
class FsTreeWalker {
|
||||
public:
|
||||
@ -12,12 +17,9 @@ class FsTreeWalker {
|
||||
FtwStatAll = FtwError|FtwStop};
|
||||
enum Options {FtwOptNone = 0, FtwNoRecurse = 1, FtwFollow = 2};
|
||||
|
||||
typedef Status (*CbType)(void *cdata,
|
||||
const std::string &, const struct stat *, CbFlag);
|
||||
|
||||
FsTreeWalker(Options opts = FtwOptNone);
|
||||
~FsTreeWalker();
|
||||
Status walk(const std::string &dir, CbType fun, void *cdata);
|
||||
Status walk(const std::string &dir, FsTreeWalkerCB& cb);
|
||||
std::string getReason();
|
||||
int getErrCnt();
|
||||
private:
|
||||
@ -25,4 +27,11 @@ class FsTreeWalker {
|
||||
Internal *data;
|
||||
};
|
||||
|
||||
class FsTreeWalkerCB {
|
||||
public:
|
||||
virtual ~FsTreeWalkerCB() {}
|
||||
virtual FsTreeWalker::Status
|
||||
processone(const string &, const struct stat *, FsTreeWalker::CbFlag)
|
||||
= 0;
|
||||
};
|
||||
#endif /* _FSTREEWALK_H_INCLUDED_ */
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: smallut.cpp,v 1.3 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: smallut.cpp,v 1.4 2005-02-10 15:21:12 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
#ifndef TEST_SMALLUT
|
||||
#include <string>
|
||||
@ -48,6 +48,17 @@ bool maketmpdir(string& tdir)
|
||||
return true;
|
||||
}
|
||||
|
||||
string stringlistdisp(const list<string>& sl)
|
||||
{
|
||||
string s;
|
||||
for (list<string>::const_iterator it = sl.begin(); it!= sl.end(); it++)
|
||||
s += "[" + *it + "] ";
|
||||
if (!s.empty())
|
||||
s.erase(s.length()-1);
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
int stringicmp(const string & s1, const string& s2)
|
||||
{
|
||||
string::const_iterator it1 = s1.begin();
|
||||
|
||||
@ -1,14 +1,16 @@
|
||||
#ifndef _SMALLUT_H_INCLUDED_
|
||||
#define _SMALLUT_H_INCLUDED_
|
||||
/* @(#$Id: smallut.h,v 1.3 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: smallut.h,v 1.4 2005-02-10 15:21:12 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
#include <string>
|
||||
|
||||
#include <list>
|
||||
using std::string;
|
||||
using std::list;
|
||||
|
||||
extern int stringicmp(const string& s1, const string& s2);
|
||||
extern int stringlowercmp(const string& alreadylower, const string& s2);
|
||||
extern int stringuppercmp(const string& alreadyupper, const string& s2);
|
||||
|
||||
extern bool maketmpdir(string& tdir);
|
||||
extern string stringlistdisp(const list<string>& strs);
|
||||
|
||||
#endif /* _SMALLUT_H_INCLUDED_ */
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user