From 5ca462cdff19d971cd454a2d2bbaf508a3751cc3 Mon Sep 17 00:00:00 2001 From: dockes Date: Tue, 14 Dec 2004 17:54:16 +0000 Subject: [PATCH] *** empty log message *** --- src/common/rclconfig.cpp | 64 +++++++++++++++ src/common/rclconfig.h | 45 +++++++++++ src/common/textsplit.cpp | 114 +++++++++++++++----------- src/common/textsplit.h | 31 +++++++ src/index/indexer.h | 23 ++++++ src/index/mimetype.cpp | 7 +- src/index/mimetype.h | 3 +- src/index/recollindex.cpp | 166 +++++++++++++++++++++++++++++++++----- src/rcldb/rcldb.cpp | 121 +++++++++++++++++++++++++++ src/rcldb/rcldb.h | 32 ++++++++ src/utils/execmd.cpp | 7 +- src/utils/pathut.cpp | 25 +++++- src/utils/pathut.h | 3 +- src/utils/readfile.cpp | 49 +++++++++++ src/utils/readfile.h | 13 +++ 15 files changed, 626 insertions(+), 77 deletions(-) create mode 100644 src/common/rclconfig.cpp create mode 100644 src/common/rclconfig.h create mode 100644 src/common/textsplit.h create mode 100644 src/index/indexer.h create mode 100644 src/rcldb/rcldb.cpp create mode 100644 src/rcldb/rcldb.h create mode 100644 src/utils/readfile.cpp create mode 100644 src/utils/readfile.h diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp new file mode 100644 index 00000000..bd2f1cdc --- /dev/null +++ b/src/common/rclconfig.cpp @@ -0,0 +1,64 @@ +#ifndef lint +static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes"; +#endif + +#include + +#include "rclconfig.h" +#include "pathut.h" +#include "conftree.h" + +using namespace std; + +ConfTree *getConfig() +{ +} + +RclConfig::RclConfig() + : m_ok(false), conf(0), mimemap(0), mimeconf(0) +{ + const char *cp = getenv("RECOLL_CONFDIR"); + if (cp) { + confdir = cp; + } else { + confdir = path_home(); + confdir += ".recoll/"; + } + string cfilename = confdir; + path_cat(cfilename, "recoll.conf"); + + // Maybe we should try to open readonly here as, else, this will + // casually create a configuration file + conf = new ConfTree(cfilename.c_str(), 0); + if (conf == 0) { + cerr << "No configuration" << endl; + return; + } + + string mimemapfile; + if (!conf->get("mimemapfile", mimemapfile, "")) { + mimemapfile = "mimemap"; + } + string mpath = confdir; + path_cat(mpath, mimemapfile); + mimemap = new ConfTree(mpath.c_str()); + if (mimemap == 0) { + cerr << "No mime map file" << endl; + return; + } + string mimeconffile; + if (!conf->get("mimeconffile", mimeconffile, "")) { + mimeconffile = "mimeconf"; + } + mpath = confdir; + + path_cat(mpath, mimeconffile); + mimeconf = new ConfTree(mpath.c_str()); + if (mimeconf == 0) { + cerr << "No mime conf file" << endl; + return; + } + mimeconf->list(); + m_ok = true; + return; +} diff --git a/src/common/rclconfig.h b/src/common/rclconfig.h new file mode 100644 index 00000000..32aacdd0 --- /dev/null +++ b/src/common/rclconfig.h @@ -0,0 +1,45 @@ +#ifndef _RCLCONFIG_H_INCLUDED_ +#define _RCLCONFIG_H_INCLUDED_ +/* @(#$Id: rclconfig.h,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes */ + +#include "conftree.h" + +class RclConfig { + int m_ok; + string confdir; // Directory where the files are stored + ConfTree *conf; // Parsed main configuration + string keydir; // Current directory used for parameter fetches. + string defcharset; // These are stored locally to avoid a config lookup + string deflang; // each time. + // Note: this will have to change if/when we support per directory maps + ConfTree *mimemap; + ConfTree *mimeconf; + public: + RclConfig(); + ~RclConfig() {delete conf;delete mimemap;delete mimeconf;} + bool ok() {return m_ok;} + ConfTree *getConfig() {return m_ok ? conf : 0;} + ConfTree *getMimeMap() {return m_ok ? mimemap : 0;} + ConfTree *getMimeConf() {return m_ok ? mimeconf : 0;} + bool getConfParam(const string &name, string &value) + { + if (conf == 0) + return false; + return conf->get(name, value, keydir); + } + const string &getDefCharset() { + return defcharset; + } + const string &getDefLang() { + return deflang; + } + void setKeyDir(const string &dir) + { + keydir = dir; + conf->get("defaultcharset", defcharset, keydir); + conf->get("defaultlanguage", deflang, keydir); + } +}; + + +#endif /* _RCLCONFIG_H_INCLUDED_ */ diff --git a/src/common/textsplit.cpp b/src/common/textsplit.cpp index e71f11d0..d6895857 100644 --- a/src/common/textsplit.cpp +++ b/src/common/textsplit.cpp @@ -1,14 +1,33 @@ #ifndef lint -static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.1 2004-12-13 15:42:16 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.2 2004-12-14 17:49:11 dockes Exp $ (C) 2004 J.F.Dockes"; #endif +#ifndef TEST_TEXTSPLIT #include #include +#include "textsplit.h" + using namespace std; +/** + * Splitting a text into words. The code in this file will work with any + * charset where the basic separators (.,- etc.) have their ascii values + * (ok for UTF-8, ascii, iso8859* and quite a few others). + * + * We work in a way which would make it quite difficult to handle non-ascii + * separator chars (en-dash,etc.). We would then need to actually parse the + * utf-8 stream, and use a different way to classify the characters (instead + * of a 256 slot array). + * + * We are also not using capitalization information. + */ + // Character classes: we have three main groups, and then some chars // are their own class because they want special handling. +// We have an array with 256 slots where we keep the character states. +// The array could be fully static, but we use a small function to fill it +// once. enum CharClass {LETTER=256, SPACE=257, DIGIT=258}; static int charclasses[256]; static void setcharclasses() @@ -38,7 +57,7 @@ static void setcharclasses() init = 1; } -static void emitterm(string &w, int *posp, bool doerase = true) +void TextSplit::emitterm(string &w, int pos, bool doerase = true) { // Maybe trim end of word. These are chars that we would keep inside // a word or span, but not at the end @@ -55,22 +74,27 @@ static void emitterm(string &w, int *posp, bool doerase = true) } breakloop: if (w.length()) { - if (posp) - *posp++; - cout << w << endl; + if (termsink) + termsink(cdata, w, pos); } if (doerase) w.erase(); } -void text_to_words(const string &in) +/* + * We basically emit a word every time we see a separator, but some chars are + * handled specially so that special cases, ie, c++ and dockes@okyz.com etc, + * are handled properly, + */ +void TextSplit::text_to_words(const string &in) { setcharclasses(); string span; string word; bool number = false; - int pos = 0; + int wordpos = 0; int spanpos = 0; + for (int i = 0; i < in.length(); i++) { int c = in[i]; int cc = charclasses[c]; @@ -78,11 +102,13 @@ void text_to_words(const string &in) case SPACE: SPACE: if (word.length()) { - if (span.length() != word.length()) - emitterm(span, &spanpos); - emitterm(word, &pos); + if (span.length() != word.length()) { + emitterm(span, spanpos); + } + emitterm(word, wordpos++); number = false; } + spanpos = wordpos; span.erase(); break; case '-': @@ -94,9 +120,10 @@ void text_to_words(const string &in) span += c; } } else { - if (span.length() != word.length()) - emitterm(span, &spanpos, false); - emitterm(word, &pos); + if (span.length() != word.length()) { + emitterm(span, spanpos, false); + } + emitterm(word, wordpos++); number = false; span += c; } @@ -104,9 +131,10 @@ void text_to_words(const string &in) case '\'': case '@': if (word.length()) { - if (span.length() != word.length()) - emitterm(span, &spanpos, false); - emitterm(word, &pos); + if (span.length() != word.length()) { + emitterm(span, spanpos, false); + } + emitterm(word, wordpos++); number = false; } else word += c; @@ -117,7 +145,7 @@ void text_to_words(const string &in) word += c; } else { if (word.length()) { - emitterm(word, &pos); + emitterm(word, wordpos++); number = false; } else word += c; @@ -139,8 +167,8 @@ void text_to_words(const string &in) // if '-' is the last char before end of line, just // ignore the line change. This is the right thing to // do almost always. We'd then need a way to check if - // the - was added as part of the sleep or was really there, - // but this would need a dictionary. + // the - was added as part of the word hyphenation, or was + // there in the first place, but this would need a dictionary. } else { // Handle like a normal separator goto SPACE; @@ -162,42 +190,35 @@ void text_to_words(const string &in) } if (word.length()) { if (span.length() != word.length()) - emitterm(span, &spanpos); - emitterm(word, &pos); + emitterm(span, spanpos); + emitterm(word, wordpos); } } -#if 1 || TEST_TEXTSPLIT +#else // TEST driver -> + #include #include #include -int -file_to_string(const string &fn, string &data) + +#include + +#include "textsplit.h" +#include "readfile.h" + +using namespace std; + +int termsink(void *, const string &term, int pos) { - int fd = open(fn.c_str(), 0); - if (fd < 0) { - perror("open"); - return -1; - } - char buf[4096]; - for (;;) { - int n = read(fd, buf, 4096); - if (n < 0) { - perror("read"); - close(fd); - return -1; - } - if (n == 0) - break; - data.append(buf, n); - } - close(fd); + cout << pos << " " << term << endl; return 0; } + static string teststring = "jfd@okyz.com " "Ceci. Est;Oui 1.24 n@d @net .net t@v@c c# c++ -10 o'brien l'ami " + "a 134 +134 -14 -1.5 +1.5 1.54e10 a" "@^#$(#$(*)" "one\n\rtwo\nthree-\nfour" "[olala][ululu]" @@ -206,15 +227,16 @@ static string teststring = int main(int argc, char **argv) { + TextSplit splitter(termsink, 0); if (argc == 2) { string data; - if (file_to_string(argv[1], data) < 0) + if (!file_to_string(argv[1], data)) exit(1); - text_to_words(data); + splitter.text_to_words(data); } else { - cout << teststring << endl; text_to_words(teststring); + cout << teststring << endl; + splitter.text_to_words(teststring); } } #endif // TEST - diff --git a/src/common/textsplit.h b/src/common/textsplit.h new file mode 100644 index 00000000..edd9d79b --- /dev/null +++ b/src/common/textsplit.h @@ -0,0 +1,31 @@ +#ifndef _TEXTSPLIT_H_INCLUDED_ +#define _TEXTSPLIT_H_INCLUDED_ +/* @(#$Id: textsplit.h,v 1.1 2004-12-14 17:49:11 dockes Exp $ (C) 2004 J.F.Dockes */ + +#include + +/** + * Split text into words. + * See comments at top of .cpp for more explanations. + * This used a callback function. It could be done with an iterator instead, + * but 'ts much simpler this way... + */ +class TextSplit { + public: + typedef int (*TermSink)(void *cdata, const std::string & term, int pos); + private: + TermSink termsink; + void *cdata; + void emitterm(std::string &term, int pos, bool doerase); + public: + /** + * Constructor: just store callback and client data + */ + TextSplit(TermSink t, void *c) : termsink(t), cdata(c) {} + /** + * Split text, emit words and positions. + */ + void text_to_words(const std::string &in); +}; + +#endif /* _TEXTSPLIT_H_INCLUDED_ */ diff --git a/src/index/indexer.h b/src/index/indexer.h new file mode 100644 index 00000000..b38b624f --- /dev/null +++ b/src/index/indexer.h @@ -0,0 +1,23 @@ +#ifndef _INDEXER_H_INCLUDED_ +#define _INDEXER_H_INCLUDED_ +/* @(#$Id: indexer.h,v 1.1 2004-12-14 17:53:51 dockes Exp $ (C) 2004 J.F.Dockes */ + +#include "rclconfig.h" + +/* Definition for document interner functions */ +typedef Rcl::Doc* (*MimeHandlerFunc)(RclConfig *, const string &, + const string &); + + +#if 0 +class FsIndexer { + const ConfTree &conf; + public: + enum runStatus {IndexerOk, IndexerError}; + Indexer(const ConfTree &cnf): conf(cnf) {} + virtual ~Indexer() {} + runStatus run() = 0; +}; +#endif + +#endif /* _INDEXER_H_INCLUDED_ */ diff --git a/src/index/mimetype.cpp b/src/index/mimetype.cpp index 3eb1b9cc..92144760 100644 --- a/src/index/mimetype.cpp +++ b/src/index/mimetype.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.1 2004-12-13 15:42:16 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include @@ -11,6 +11,9 @@ using std::string; string mimetype(const string &filename, ConfTree *mtypes) { + if (mtypes == 0) + return ""; + // If filename has a suffix and we find it in the map, we're done string::size_type dot = filename.find_last_of("."); if (dot != string::npos) { @@ -26,6 +29,8 @@ string mimetype(const string &filename, ConfTree *mtypes) return ""; } + + #ifdef _TEST_MIMETYPE_ #include const char *tvec[] = { diff --git a/src/index/mimetype.h b/src/index/mimetype.h index d3165aa6..da22e165 100644 --- a/src/index/mimetype.h +++ b/src/index/mimetype.h @@ -1,6 +1,6 @@ #ifndef _MIMETYPE_H_INCLUDED_ #define _MIMETYPE_H_INCLUDED_ -/* @(#$Id: mimetype.h,v 1.1 2004-12-13 15:42:16 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: mimetype.h,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include "conftree.h" @@ -13,4 +13,5 @@ */ string mimetype(const std::string &filename, ConfTree *mtypes); + #endif /* _MIMETYPE_H_INCLUDED_ */ diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp index 0e96bd28..780709ee 100644 --- a/src/index/recollindex.cpp +++ b/src/index/recollindex.cpp @@ -1,7 +1,9 @@ #ifndef lint -static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.1 2004-12-13 15:42:16 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes"; #endif +#include + #include #include "pathut.h" @@ -9,43 +11,156 @@ static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.1 2004-12-13 15:42:16 dockes #include "rclconfig.h" #include "fstreewalk.h" #include "mimetype.h" +#include "rcldb.h" +#include "readfile.h" +#include "indexer.h" using namespace std; + +Rcl::Doc* textPlainToDoc(RclConfig *conf, const string &fn, + const string &mtype) +{ + return 0; +} + +static map ihandlers; +class IHandler_Init { + public: + IHandler_Init() { + ihandlers["text/plain"] = textPlainToDoc; + } +}; +static IHandler_Init ihandleriniter; + +/** + * Return handler function for given mime type + */ +MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers) +{ + // Return handler definition for mime type + string hs; + if (!mhandlers->get(mtype, hs, "")) + return 0; + + // Break definition into type and name + vector toks; + ConfTree::stringToStrings(hs, toks); + if (toks.size() < 1) { + cerr << "Bad mimeconf line for " << mtype << endl; + return 0; + } + + // Retrieve handler function according to type + if (!strcasecmp(toks[0].c_str(), "internal")) { + cerr << "Internal Handler" << endl; + map::const_iterator it = + ihandlers.find(mtype); + if (it == ihandlers.end()) { + cerr << "Internal handler not found for " << mtype << endl; + return 0; + } + cerr << "Got handler" << endl; + return it->second; + } else if (!strcasecmp(toks[0].c_str(), "dll")) { + if (toks.size() != 2) + return 0; + return 0; + } else if (!strcasecmp(toks[0].c_str(), "exec")) { + if (toks.size() != 2) + return 0; + return 0; + } else { + return 0; + } +} + class DirIndexer { FsTreeWalker walker; RclConfig *config; string topdir; + string dbdir; + Rcl::Db db; public: - DirIndexer(RclConfig *cnf, const string &top) - : config(cnf), topdir(top) - { - } + DirIndexer(RclConfig *cnf, const string &dbd, const string &top) + : config(cnf), topdir(top), dbdir(dbd) + { } + friend FsTreeWalker::Status indexfile(void *, const std::string &, const struct stat *, FsTreeWalker::CbFlag); - void index() - { - walker.walk(topdir, indexfile, this); - } + + void index(); }; +void DirIndexer::index() +{ +#if 0 + if (!db.open(dbdir, Rcl::Db::DbUpd)) { + cerr << "Error opening database in " << dbdir << " for " << + topdir << endl; + return; + } +#endif + walker.walk(topdir, indexfile, this); +#if 0 + if (!db.close()) { + cerr << "Error closing database in " << dbdir << " for " << + topdir << endl; + return; + } +#endif +} + FsTreeWalker::Status -indexfile(void *cdata, const std::string &fn, - const struct stat *stp, FsTreeWalker::CbFlag flg) +indexfile(void *cdata, const std::string &fn, const struct stat *stp, + FsTreeWalker::CbFlag flg) { DirIndexer *me = (DirIndexer *)cdata; - if (flg == FsTreeWalker::FtwDirEnter || flg == FsTreeWalker::FtwDirReturn) { - // Possibly adjust defaults + + if (flg == FsTreeWalker::FtwDirEnter || + flg == FsTreeWalker::FtwDirReturn) { + me->config->setKeyDir(fn); cout << "indexfile: [" << fn << "]" << endl; + cout << " defcharset: " << me->config->getDefCharset() + << " deflang: " << me->config->getDefLang() << endl; + return FsTreeWalker::FtwOk; } - string mtype = mimetype(fn, me->config->getMimeMap()); - if (mtype.length() > 0) - cout << "indexfile: " << mtype << " " << fn << endl; - else - cout << "indexfile: " << "(nomime)" << " " << fn << endl; + string mime = mimetype(fn, me->config->getMimeMap()); + if (mime.length() == 0) { + cout << "indexfile: " << "(no mime)" << " " << fn << endl; + // No mime type ?? pass on. + return FsTreeWalker::FtwOk; + } + + cout << "indexfile: " << mime << " " << fn << endl; + + // Look for appropriate handler + MimeHandlerFunc fun = getMimeHandler(mime, me->config->getMimeConf()); + if (!fun) { + // No handler for this type, for now :( + return FsTreeWalker::FtwOk; + } + + // Check if file has already been indexed, and has changed since + // - Make path term, + // - query db: postlist_begin->docid + // - fetch doc (get_document(docid) + // - check date field, maybe skip + + // Turn file into a document. The document has fields for title, body + // etc., all text converted to utf8 + Rcl::Doc *doc = fun(me->config, fn, mime); + +#if 0 + // Set up xapian document, add postings and misc fields, + // add to or update database. + dbadd(doc); +#endif + + return FsTreeWalker::FtwOk; } @@ -63,11 +178,18 @@ int main(int argc, const char **argv) cerr << "No top directories in configuration" << endl; exit(1); } - list tdl; + vector tdl; if (ConfTree::stringToStrings(topdirs, tdl)) { - for (list::iterator it = tdl.begin(); it != tdl.end(); it++) { - cout << *it << endl; - DirIndexer indexer(config, *it); + for (int i = 0; i < tdl.size(); i++) { + string topdir = tdl[i]; + cout << topdir << endl; + string dbdir; + if (conf->get("dbdir", dbdir, topdir) == 0) { + cerr << "No database directory in configuration for " + << topdir << endl; + exit(1); + } + DirIndexer indexer(config, dbdir, topdir); indexer.index(); } } diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp new file mode 100644 index 00000000..3e1e134c --- /dev/null +++ b/src/rcldb/rcldb.cpp @@ -0,0 +1,121 @@ +#ifndef lint +static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes"; +#endif + +#include +#include +#include + +using namespace std; + +#include "rcldb.h" + +#include "xapian.h" + +// Data for a xapian database +class Native { + public: + bool isopen; + bool iswritable; + class Xapian::Database db; + class Xapian::WritableDatabase wdb; + vector updated; + + Native() : isopen(false), iswritable(false) {} + +}; + +Rcl::Db::Db() +{ + pdata = new Native; +} + +Rcl::Db::~Db() +{ + if (pdata == 0) + return; + Native *ndb = (Native *)pdata; + try { + // There is nothing to do for an ro db. + if (ndb->isopen == false || ndb->iswritable == false) { + delete ndb; + return; + } + ndb->wdb.flush(); + delete ndb; + } catch (const Xapian::Error &e) { + cout << "Exception: " << e.get_msg() << endl; + } catch (const string &s) { + cout << "Exception: " << s << endl; + } catch (const char *s) { + cout << "Exception: " << s << endl; + } catch (...) { + cout << "Caught unknown exception" << endl; + } +} + +bool Rcl::Db::open(const string& dir, OpenMode mode) +{ + if (pdata == 0) + return false; + Native *ndb = (Native *)pdata; + try { + switch (mode) { + case DbUpd: + ndb->wdb = Xapian::Auto::open(dir, Xapian::DB_CREATE_OR_OPEN); + ndb->updated.resize(ndb->wdb.get_lastdocid() + 1); + ndb->iswritable = true; + break; + case DbTrunc: + ndb->wdb = Xapian::Auto::open(dir, Xapian::DB_CREATE_OR_OVERWRITE); + ndb->iswritable = true; + break; + case DbRO: + default: + ndb->iswritable = false; + cerr << "Not ready to open RO yet" << endl; + exit(1); + } + ndb->isopen = true; + return true; + } catch (const Xapian::Error &e) { + cout << "Exception: " << e.get_msg() << endl; + } catch (const string &s) { + cout << "Exception: " << s << endl; + } catch (const char *s) { + cout << "Exception: " << s << endl; + } catch (...) { + cout << "Caught unknown exception" << endl; + } + return false; +} +bool Rcl::Db::close() +{ + if (pdata == 0) + return false; + Native *ndb = (Native *)pdata; + if (ndb->isopen == false) + return true; + try { + if (ndb->isopen == true && ndb->iswritable == true) { + ndb->wdb.flush(); + } + delete ndb; + } catch (const Xapian::Error &e) { + cout << "Exception: " << e.get_msg() << endl; + return false; + } catch (const string &s) { + cout << "Exception: " << s << endl; + return false; + } catch (const char *s) { + cout << "Exception: " << s << endl; + return false; + } catch (...) { + cout << "Caught unknown exception" << endl; + return false; + } + pdata = new Native; + if (pdata) + return true; + return false; +} diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h new file mode 100644 index 00000000..fb586eed --- /dev/null +++ b/src/rcldb/rcldb.h @@ -0,0 +1,32 @@ +#ifndef _DB_H_INCLUDED_ +#define _DB_H_INCLUDED_ +/* @(#$Id: rcldb.h,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes */ + +#include + +namespace Rcl { + +/** + * Wrapper class for the native database. + */ +class Db { + void *pdata; + public: + Db(); + ~Db(); + enum OpenMode {DbRO, DbUpd, DbTrunc}; + bool open(const std::string &dbdir, OpenMode mode); + bool close(); +}; + +class Doc { + public: + string title; + string abstract; + string keywords; + string text; +}; + +} + +#endif /* _DB_H_INCLUDED_ */ diff --git a/src/utils/execmd.cpp b/src/utils/execmd.cpp index 1ef3d6ac..961749df 100644 --- a/src/utils/execmd.cpp +++ b/src/utils/execmd.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: execmd.cpp,v 1.1 2004-12-12 08:58:12 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: execmd.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include @@ -14,6 +14,7 @@ static char rcsid[] = "@(#$Id: execmd.cpp,v 1.1 2004-12-12 08:58:12 dockes Exp $ #include #include "execmd.h" +#include "pathut.h" using namespace std; #define MAX(A,B) (A>B?A:B) @@ -152,7 +153,7 @@ ExecCmd::doexec(const string &cmd, const list args, } // Fill up argv - argv[0] = cmd.c_str(); + argv[0] = path_getsimple(cmd).c_str(); i = 1; for (it = args.begin(); it != args.end(); it++) { argv[i++] = it->c_str(); @@ -160,7 +161,7 @@ ExecCmd::doexec(const string &cmd, const list args, argv[i] = 0; #if 0 - {int i = 0;cerr << "cmd: " << cmd << endl << "ARGS:" << endl; + {int i = 0;cerr << "cmd: " << cmd << endl << "ARGS: " << endl; while (argv[i]) cerr << argv[i++] << endl;} #endif diff --git a/src/utils/pathut.cpp b/src/utils/pathut.cpp index 4881c732..a8d90c2b 100644 --- a/src/utils/pathut.cpp +++ b/src/utils/pathut.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: pathut.cpp,v 1.1 2004-12-10 18:13:14 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: pathut.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #ifndef TEST_PATHUT @@ -31,6 +31,20 @@ std::string path_getfather(const std::string &s) { return father; } +std::string path_getsimple(const std::string &s) { + std::string simple = s; + + if (simple.empty()) + return simple; + + std::string::size_type slp = simple.rfind('/'); + if (slp == std::string::npos) + return simple; + + simple.erase(0, slp+1); + return simple; +} + std::string path_home() { uid_t uid = getuid(); @@ -53,13 +67,18 @@ using namespace std; const char *tstvec[] = {"", "/", "/dir", "/dir/", "/dir1/dir2", "/dir1/dir2", - "./dir", "./dir1/", "dir", "../dir"}; + "./dir", "./dir1/", "dir", "../dir", "/dir/toto.c", + "/dir/.c", +}; int main(int argc, const char **argv) { for (int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) { - cout << tstvec[i] << " -> " << path_getfather(tstvec[i]) << endl; + cout << tstvec[i] << " FATHER " << path_getfather(tstvec[i]) << endl; + } + for (int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) { + cout << tstvec[i] << " SIMPLE " << path_getsimple(tstvec[i]) << endl; } return 0; } diff --git a/src/utils/pathut.h b/src/utils/pathut.h index a4e253b4..c191bb61 100644 --- a/src/utils/pathut.h +++ b/src/utils/pathut.h @@ -1,6 +1,6 @@ #ifndef _PATHUT_H_INCLUDED_ #define _PATHUT_H_INCLUDED_ -/* @(#$Id: pathut.h,v 1.1 2004-12-10 18:13:14 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: pathut.h,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes */ #include @@ -13,6 +13,7 @@ inline void path_cat(std::string &s1, const std::string &s2) { s1 += s2; } +extern std::string path_getsimple(const std::string &s); extern std::string path_getfather(const std::string &s); extern std::string path_home(); diff --git a/src/utils/readfile.cpp b/src/utils/readfile.cpp new file mode 100644 index 00000000..83dbaa09 --- /dev/null +++ b/src/utils/readfile.cpp @@ -0,0 +1,49 @@ +#ifndef lint +static char rcsid[] = "@(#$Id: readfile.cpp,v 1.1 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes"; +#endif + +#include +#include +#ifndef O_STREAMING +#define O_STREAMING 0 +#endif +#include + +#include +using std::string; + +#include "readfile.h" + +bool file_to_string(const string &fn, string &data) +{ + bool ret = false; + + int fd = open(fn.c_str(), O_RDONLY|O_STREAMING); + if (fd < 0) { + // perror("open"); + return false; + } + char buf[4096]; + for (;;) { + int n = read(fd, buf, 4096); + if (n < 0) { + // perror("read"); + goto out; + } + if (n == 0) + break; + + try { + data.append(buf, n); + } catch (...) { + // fprintf(stderr, "file_to_string: out of memory\n"); + goto out; + } + } + + ret = true; + out: + if (fd >= 0) + close(fd); + return ret; +} diff --git a/src/utils/readfile.h b/src/utils/readfile.h new file mode 100644 index 00000000..bb668ff6 --- /dev/null +++ b/src/utils/readfile.h @@ -0,0 +1,13 @@ +#ifndef _READFILE_H_INCLUDED_ +#define _READFILE_H_INCLUDED_ +/* @(#$Id: readfile.h,v 1.1 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes */ + +#include + +/** + * Read whole file into string. + * @return true for ok, false else + */ +bool file_to_string(const std::string &filename, std::string &data); + +#endif /* _READFILE_H_INCLUDED_ */