diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index 1468f633..c1cd0f2f 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.4 2005-01-25 14:37:20 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.5 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include @@ -9,22 +9,18 @@ static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.4 2005-01-25 14:37:20 dockes Ex #include "conftree.h" #include "debuglog.h" -static DebugLog debuglog; -DebugLog *dbl = &debuglog; -class loginitializer { - public: - loginitializer() { - dbl->setlogfilename("stderr"); - dbl->setloglevel(10); - } -}; -static loginitializer lgntlzr; - using namespace std; RclConfig::RclConfig() : m_ok(false), conf(0), mimemap(0), mimeconf(0) { + static int loginit = 0; + if (!loginit) { + DebugLog::setfilename("stderr"); + DebugLog::getdbl()->setloglevel(10); + loginit = 1; + } + const char *cp = getenv("RECOLL_CONFDIR"); if (cp) { confdir = cp; diff --git a/src/index/indexer.cpp b/src/index/indexer.cpp new file mode 100644 index 00000000..74f55a69 --- /dev/null +++ b/src/index/indexer.cpp @@ -0,0 +1,203 @@ +#ifndef lint +static char rcsid[] = "@(#$Id: indexer.cpp,v 1.1 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes"; +#endif +#include + +#include + +#include +#include +#include + +#include "pathut.h" +#include "conftree.h" +#include "rclconfig.h" +#include "fstreewalk.h" +#include "mimetype.h" +#include "rcldb.h" +#include "readfile.h" +#include "indexer.h" +#include "csguess.h" +#include "transcode.h" +#include "mimehandler.h" +#include "debuglog.h" + +using namespace std; + +#ifndef deleteZ +#define deleteZ(X) {delete X;X = 0;} +#endif + +/** + * Bunch holder for data used while indexing a directory tree + */ +class DbIndexer { + FsTreeWalker walker; + RclConfig *config; + string dbdir; + list *topdirs; + Rcl::Db db; + public: + DbIndexer(RclConfig *cnf, const string &dbd, list *top) + : config(cnf), dbdir(dbd), topdirs(top) + { } + + friend FsTreeWalker::Status + indexfile(void *, const std::string &, const struct stat *, + FsTreeWalker::CbFlag); + + bool index(); +}; + +bool DbIndexer::index() +{ + if (!db.open(dbdir, Rcl::Db::DbUpd)) { + LOGERR(("DbIndexer::index: error opening database in %s\n", + dbdir.c_str())); + return false; + } + for (list::const_iterator it = topdirs->begin(); + it != topdirs->end(); it++) { + LOGDEB(("DbIndexer::index: Indexing %s into %s\n", it->c_str(), + dbdir.c_str())); + if (walker.walk(*it, indexfile, this) != FsTreeWalker::FtwOk) { + LOGERR(("DbIndexer::index: error while indexing %s\n", + it->c_str())); + db.close(); + return false; + } + } + db.purge(); + if (!db.close()) { + LOGERR(("DbIndexer::index: error closing database in %s\n", + dbdir.c_str())); + return false; + } + return true; +} + +/** + * This function gets called for every file and directory found by the + * tree walker. It checks with the db if the file has changed and needs to + * be reindexed. If so, it calls an appropriate handler depending on the mime + * type, which is responsible for populating an Rcl::Doc. + * Accent and majuscule handling are performed by the db module when doing + * the actual indexing work. + */ +FsTreeWalker::Status +indexfile(void *cdata, const std::string &fn, const struct stat *stp, + FsTreeWalker::CbFlag flg) +{ + DbIndexer *me = (DbIndexer *)cdata; + + // If we're changing directories, possibly adjust parameters. + if (flg == FsTreeWalker::FtwDirEnter || + flg == FsTreeWalker::FtwDirReturn) { + me->config->setKeyDir(fn); + return FsTreeWalker::FtwOk; + } + + string mime = mimetype(fn, me->config->getMimeMap()); + if (mime.empty()) { + // No mime type ?? pass on. + LOGDEB(("indexfile: (no mime) [%s]\n", fn.c_str())); + return FsTreeWalker::FtwOk; + } + + // Look for appropriate handler + MimeHandlerFunc fun = getMimeHandler(mime, me->config->getMimeConf()); + if (!fun) { + // No handler for this type, for now :( + LOGDEB(("indexfile: %s : no handler\n", mime.c_str())); + return FsTreeWalker::FtwOk; + } + + LOGDEB(("indexfile: %s [%s]\n", mime.c_str(), fn.c_str())); + + // Check db up to date ? + if (!me->db.needUpdate(fn, stp)) + return FsTreeWalker::FtwOk; + + // Turn file into a document. The document has fields for title, body + // etc., all text converted to utf8 + Rcl::Doc doc; + if (!fun(me->config, fn, mime, doc)) + return FsTreeWalker::FtwOk; + + // Set up common fields: + doc.mimetype = mime; + char ascdate[20]; + sprintf(ascdate, "%ld", long(stp->st_mtime)); + doc.mtime = ascdate; + + // Do database-specific work to update document data + if (!me->db.add(fn, doc)) + return FsTreeWalker::FtwError; + + return FsTreeWalker::FtwOk; +} + +ConfIndexer::~ConfIndexer() +{ + deleteZ(indexer); +} + +bool ConfIndexer::index() +{ + ConfTree *conf = config->getConfig(); + + // Retrieve the list of directories to be indexed. + string topdirs; + if (conf->get("topdirs", topdirs, "") == 0) { + LOGERR(("ConfIndexer::index: no top directories in configuration\n")); + return false; + } + + // Group the directories by database: it is important that all + // directories for a database be indexed at once so that deleted + // file cleanup works + vector tdl; // List of directories to be indexed + if (!ConfTree::stringToStrings(topdirs, tdl)) { + LOGERR(("ConfIndexer::index: parse error for directory list\n")); + return false; + } + + vector::iterator dirit; + map > dbmap; + map >::iterator dbit; + for (dirit = tdl.begin(); dirit != tdl.end(); dirit++) { + string db; + string dir = path_tildexpand(*dirit); + if (conf->get("dbdir", db, dir) == 0) { + LOGERR(("ConfIndexer::index: no database directory in " + "configuration for %s\n", dir.c_str())); + return false; + } + db = path_tildexpand(db); + dbit = dbmap.find(db); + if (dbit == dbmap.end()) { + list l; + l.push_back(dir); + dbmap[db] = l; + } else { + dbit->second.push_back(dir); + } + } + + for (dbit = dbmap.begin(); dbit != dbmap.end(); dbit++) { + //cout << dbit->first << " -> "; + //list::const_iterator dit; + //for (dit = dbit->second.begin(); dit != dbit->second.end(); dit++) { + // cout << *dit << " "; + //} + //cout << endl; + + indexer = new DbIndexer(config, dbit->first, &dbit->second); + if (!indexer->index()) { + deleteZ(indexer); + return false; + } + deleteZ(indexer); + } + return true; +} diff --git a/src/index/indexer.h b/src/index/indexer.h index ff5f9237..33eba3b5 100644 --- a/src/index/indexer.h +++ b/src/index/indexer.h @@ -1,18 +1,17 @@ #ifndef _INDEXER_H_INCLUDED_ #define _INDEXER_H_INCLUDED_ -/* @(#$Id: indexer.h,v 1.3 2005-01-25 14:37:21 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: indexer.h,v 1.4 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes */ #include "rclconfig.h" - -#if 0 -class FsIndexer { - const ConfTree &conf; +class DbIndexer; +class ConfIndexer { + RclConfig *config; + DbIndexer *indexer; public: enum runStatus {IndexerOk, IndexerError}; - Indexer(const ConfTree &cnf): conf(cnf) {} - virtual ~Indexer() {} - runStatus run() = 0; + ConfIndexer(RclConfig *cnf) : config(cnf), indexer(0) {} + virtual ~ConfIndexer(); + bool index(); }; -#endif #endif /* _INDEXER_H_INCLUDED_ */ diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp index d515bb32..e73cc83b 100644 --- a/src/index/recollindex.cpp +++ b/src/index/recollindex.cpp @@ -1,139 +1,13 @@ #ifndef lint -static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.7 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.8 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes"; #endif -#include +#include +#include -#include - -#include -#include -#include - -#include "pathut.h" -#include "conftree.h" -#include "rclconfig.h" -#include "fstreewalk.h" -#include "mimetype.h" -#include "rcldb.h" -#include "readfile.h" #include "indexer.h" -#include "csguess.h" -#include "transcode.h" -#include "mimehandler.h" -#include "debuglog.h" -using namespace std; - - -/** - * Bunch holder for data used while indexing a directory tree - */ -class DirIndexer { - FsTreeWalker walker; - RclConfig *config; - list *topdirs; - string dbdir; - Rcl::Db db; - public: - DirIndexer(RclConfig *cnf, const string &dbd, list *top) - : config(cnf), topdirs(top), dbdir(dbd) - { } - - friend FsTreeWalker::Status - indexfile(void *, const std::string &, const struct stat *, - FsTreeWalker::CbFlag); - - bool index(); -}; - -bool DirIndexer::index() -{ - if (!db.open(dbdir, Rcl::Db::DbUpd)) { - LOGERR(("DirIndexer::index: error opening database in %s\n", - dbdir.c_str())); - return false; - } - for (list::const_iterator it = topdirs->begin(); - it != topdirs->end(); it++) { - LOGDEB(("DirIndexer::index: Indexing %s into %s\n", it->c_str(), - dbdir.c_str())); - if (walker.walk(*it, indexfile, this) != FsTreeWalker::FtwOk) { - LOGERR(("DirIndexer::index: error while indexing %s\n", - it->c_str())); - db.close(); - return false; - } - } - db.purge(); - if (!db.close()) { - LOGERR(("DirIndexer::index: error closing database in %s\n", - dbdir.c_str())); - return false; - } - return true; -} - -/** - * This function gets called for every file and directory found by the - * tree walker. It checks with the db if the file has changed and needs to - * be reindexed. If so, it calls an appropriate handler depending on the mime - * type, which is responsible for populating an Rcl::Doc. - * Accent and majuscule handling are performed by the db module when doing - * the actual indexing work. - */ -FsTreeWalker::Status -indexfile(void *cdata, const std::string &fn, const struct stat *stp, - FsTreeWalker::CbFlag flg) -{ - DirIndexer *me = (DirIndexer *)cdata; - - if (flg == FsTreeWalker::FtwDirEnter || - flg == FsTreeWalker::FtwDirReturn) { - me->config->setKeyDir(fn); - return FsTreeWalker::FtwOk; - } - - string mime = mimetype(fn, me->config->getMimeMap()); - if (mime.length() == 0) { - LOGDEB(("indexfile: (no mime) [%s]\n", fn.c_str())); - // No mime type ?? pass on. - return FsTreeWalker::FtwOk; - } - - LOGDEB(("indexfile: %s [%s]\n", mime.c_str(), fn.c_str())); - - // Look for appropriate handler - MimeHandlerFunc fun = getMimeHandler(mime, me->config->getMimeConf()); - if (!fun) { - // No handler for this type, for now :( - return FsTreeWalker::FtwOk; - } - - if (!me->db.needUpdate(fn, stp)) - return FsTreeWalker::FtwOk; - - // Turn file into a document. The document has fields for title, body - // etc., all text converted to utf8 - Rcl::Doc doc; - if (!fun(me->config, fn, mime, doc)) - return FsTreeWalker::FtwOk; - - // Set up common fields: - doc.mimetype = mime; - char ascdate[20]; - sprintf(ascdate, "%ld", long(stp->st_mtime)); - doc.mtime = ascdate; - - // Set up xapian document, add postings and misc fields, - // add to or update database. - if (!me->db.add(fn, doc)) - return FsTreeWalker::FtwError; - - return FsTreeWalker::FtwOk; -} - -DirIndexer *indexer; +ConfIndexer *indexer; static void cleanup() { @@ -161,61 +35,11 @@ int main(int argc, const char **argv) signal(SIGTERM, sigcleanup); RclConfig config; - if (!config.ok()) - cerr << "Config could not be built" << endl; - - ConfTree *conf = config.getConfig(); - - // Retrieve the list of directories to be indexed. - string topdirs; - if (conf->get("topdirs", topdirs, "") == 0) { - cerr << "No top directories in configuration" << endl; + if (!config.ok()) { + fprintf(stderr, "Config could not be built\n"); exit(1); } - - // Group the directories by database: it is important that all - // directories for a database be indexed at once so that deleted - // file cleanup works - vector tdl; // List of directories to be indexed - if (!ConfTree::stringToStrings(topdirs, tdl)) { - cerr << "Parse error for directory list" << endl; - exit(1); - } - - vector::iterator dirit; - map > dbmap; - map >::iterator dbit; - for (dirit = tdl.begin(); dirit != tdl.end(); dirit++) { - string db; - if (conf->get("dbdir", db, *dirit) == 0) { - cerr << "No database directory in configuration for " - << *dirit << endl; - exit(1); - } - dbit = dbmap.find(db); - if (dbit == dbmap.end()) { - list l; - l.push_back(*dirit); - dbmap[db] = l; - } else { - dbit->second.push_back(*dirit); - } - } - - for (dbit = dbmap.begin(); dbit != dbmap.end(); dbit++) { - cout << dbit->first << " -> "; - list::const_iterator dit; - for (dit = dbit->second.begin(); dit != dbit->second.end(); dit++) { - cout << *dit << " "; - } - cout << endl; - indexer = new DirIndexer(&config, dbit->first, &dbit->second); - if (!indexer->index()) { - delete indexer; - indexer = 0; - exit(1); - } - delete indexer; - indexer = 0; - } + indexer = new ConfIndexer(&config); + + exit(!indexer->index()); } diff --git a/src/lib/Makefile b/src/lib/Makefile index 055ab798..c1f89c85 100644 --- a/src/lib/Makefile +++ b/src/lib/Makefile @@ -8,13 +8,14 @@ LIBS = librcl.a all: $(LIBS) OBJS = conftree.o csguess.o debuglog.o \ - fstreewalk.o html.o htmlparse.o \ + fstreewalk.o html.o htmlparse.o indexer.o \ mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathut.o \ rclconfig.o rcldb.o readfile.o smallut.o \ textsplit.o transcode.o \ unacpp.o unac.o SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \ ../utils/fstreewalk.cpp ../common/html.cpp ../common/htmlparse.cpp \ + ../index/indexer.cpp \ ../common/mimehandler.cpp ../utils/mimeparse.cpp ../index/mimetype.cpp \ ../common/myhtmlparse.cpp ../utils/pathut.cpp \ ../common/rclconfig.cpp ../common/rcldb.cpp ../utils/readfile.cpp \ @@ -41,6 +42,8 @@ html.o : ../common/html.cpp $(CXX) $(CXXFLAGS) -c $< htmlparse.o : ../common/htmlparse.cpp $(CXX) $(CXXFLAGS) -c $< +indexer.o : ../index/indexer.cpp + $(CXX) $(CXXFLAGS) -c $< mimehandler.o : ../common/mimehandler.cpp $(CXX) $(CXXFLAGS) -c $< mimeparse.o : ../utils/mimeparse.cpp diff --git a/src/qtgui/idxthread.cpp b/src/qtgui/idxthread.cpp new file mode 100644 index 00000000..edb0ac9b --- /dev/null +++ b/src/qtgui/idxthread.cpp @@ -0,0 +1,50 @@ +#include +#include + +#include "indexer.h" +#include "debuglog.h" + +class IdxThread : public QThread { + virtual void run(); + public: + ConfIndexer *indexer; +}; + +int startindexing; +int indexingdone; +bool indexingstatus; +int stopidxthread; + +void IdxThread::run() +{ + DebugLog::getdbl()->setloglevel(DEBDEB1); + for (;;) { + if (stopidxthread) { + delete indexer; + return; + } + if (startindexing) { + indexingdone = indexingstatus = startindexing = 0; + fprintf(stderr, "Index thread :start index\n"); + indexingstatus = indexer->index(); + indexingdone = 1; + } + msleep(100); + } +} + +static IdxThread idxthread; + +void start_idxthread(RclConfig *cnf) +{ + ConfIndexer *ix = new ConfIndexer(cnf); + idxthread.indexer = ix; + idxthread.start(); +} + +void stop_idxthread() +{ + stopidxthread = 1; + while (idxthread.running()) + sleep(1); +} diff --git a/src/qtgui/main.cpp b/src/qtgui/main.cpp index dfbef209..2b5b6fa0 100644 --- a/src/qtgui/main.cpp +++ b/src/qtgui/main.cpp @@ -1,35 +1,54 @@ #include #include +#include +#include + #include #include "recollmain.h" #include "rcldb.h" #include "rclconfig.h" +#include "pathut.h" RclConfig *rclconfig; Rcl::Db *rcldb; -static void cleanup() +extern void start_idxthread(RclConfig *cnf); +extern void stop_idxthread(); +extern int startindexing; + +void recollCleanup() { + stop_idxthread(); delete rcldb; rcldb = 0; delete rclconfig; rclconfig = 0; } + +int recollNeedsExit; + static void sigcleanup(int sig) { fprintf(stderr, "sigcleanup\n"); - cleanup(); - exit(1); + // Cant call exit from here, because the atexit cleanup does some + // thread stuff that we can't do from signal context. + // Just set a flag and let the watchdog timer do the work + recollNeedsExit = 1; } + + int main( int argc, char ** argv ) { - QApplication a( argc, argv ); + QApplication a(argc, argv); RecollMain w; w.show(); - a.connect( &a, SIGNAL( lastWindowClosed() ), &a, SLOT( quit() ) ); + a.connect(&a, SIGNAL(lastWindowClosed()), &a, SLOT(quit())); + QTimer *timer = new QTimer(&a); + w.connect(timer, SIGNAL(timeout()), &w, SLOT(checkExit())); + timer->start(100); - atexit(cleanup); + atexit(recollCleanup); if (signal(SIGHUP, SIG_IGN) != SIG_IGN) signal(SIGHUP, sigcleanup); if (signal(SIGINT, SIG_IGN) != SIG_IGN) @@ -54,14 +73,19 @@ int main( int argc, char ** argv ) QString("No db directory in configuration")); exit(1); } - + dbdir = path_tildexpand(dbdir); + rcldb = new Rcl::Db; if (!rcldb->open(dbdir, Rcl::Db::DbRO)) { - QMessageBox::critical(0, "Recoll", - QString("Could not open database in ") + - QString(dbdir)); - exit(1); + startindexing = 1; + QMessageBox::information(0, "Recoll", + QString("Could not open database in ") + + QString(dbdir) + ". Starting indexation"); + startindexing = 1; } + + start_idxthread(rclconfig); + return a.exec(); } diff --git a/src/qtgui/recoll.pro b/src/qtgui/recoll.pro index 84317e09..f4d8a2be 100644 --- a/src/qtgui/recoll.pro +++ b/src/qtgui/recoll.pro @@ -3,7 +3,8 @@ LANGUAGE = C++ CONFIG += qt warn_on release -SOURCES += main.cpp +SOURCES += main.cpp \ + idxthread.cpp FORMS = recollmain.ui diff --git a/src/qtgui/recollmain.ui b/src/qtgui/recollmain.ui index 63e3957b..a74e47c1 100644 --- a/src/qtgui/recollmain.ui +++ b/src/qtgui/recollmain.ui @@ -214,12 +214,6 @@ - - fileExitAction - activated() - RecollMain - fileExit() - fileExitAction activated() @@ -272,6 +266,7 @@ fileExit() + checkExit() reslistTE_doubleClicked( int par, int car ) reslistTE_clicked( int par, int car ) queryText_returnPressed() diff --git a/src/qtgui/recollmain.ui.h b/src/qtgui/recollmain.ui.h index 4130b230..ef987e64 100644 --- a/src/qtgui/recollmain.ui.h +++ b/src/qtgui/recollmain.ui.h @@ -27,12 +27,19 @@ extern RclConfig *rclconfig; extern Rcl::Db *rcldb; - +extern void recollCleanup(); void RecollMain::fileExit() { + LOGDEB(("RecollMain: fileExit\n")); exit(0); } +extern int recollNeedsExit; +void RecollMain::checkExit() +{ + if (recollNeedsExit) + fileExit(); +} static string plaintorich(const string &in) { @@ -175,12 +182,30 @@ void RecollMain::reslistTE_clicked(int par, int car) void RecollMain::queryText_returnPressed() { LOGDEB(("RecollMain::queryText_returnPressed()\n")); + if (!rcldb->isopen()) { + string dbdir; + if (rclconfig->getConfParam(string("dbdir"), dbdir) == 0) { + QMessageBox::critical(0, "Recoll", + QString("No db directory in configuration")); + exit(1); + } + dbdir = path_tildexpand(dbdir); + if (!rcldb->open(dbdir, Rcl::Db::DbRO)) { + QMessageBox::information(0, "Recoll", + QString("Could not open database in ") + + QString(dbdir) + " wait for indexing " + + "to complete?"); + return; + } + + } reslist_current = -1; reslist_winfirst = -1; QCString u8 = queryText->text().utf8(); - - rcldb->setQuery(string((const char *)u8)); + + if (!rcldb->setQuery(string((const char *)u8))) + return; listNextPB_clicked(); } @@ -218,13 +243,18 @@ void RecollMain::listNextPB_clicked() Rcl::Doc doc; doc.erase(); int percent; - if (!rcldb->getDoc(reslist_winfirst + i, doc, &percent)) - break; - int resCnt = rcldb->getResCnt(); - int last = MIN(resCnt, reslist_winfirst+respagesize); if (i == 0) { reslistTE->clear(); previewTextEdit->clear(); + } + if (!rcldb->getDoc(reslist_winfirst + i, doc, &percent)) { + if (i == 0) + reslist_winfirst = -1; + break; + } + int resCnt = rcldb->getResCnt(); + int last = MIN(resCnt, reslist_winfirst+respagesize); + if (i == 0) { reslistTE->append("

"); char line[80]; sprintf(line, "

Displaying results %d-%d out of %d
", @@ -276,3 +306,4 @@ void RecollMain::listNextPB_clicked() reslist_winfirst = 0; } } + diff --git a/src/query/qtry.cpp b/src/query/qtry.cpp index 994b3169..56541ba3 100644 --- a/src/query/qtry.cpp +++ b/src/query/qtry.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: qtry.cpp,v 1.3 2005-01-26 11:47:27 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: qtry.cpp,v 1.4 2005-01-31 14:31:10 dockes Exp $ (C) 2004 J.F.Dockes"; #endif // Tests with the query interface @@ -15,6 +15,7 @@ static char rcsid[] = "@(#$Id: qtry.cpp,v 1.3 2005-01-26 11:47:27 dockes Exp $ ( #include "rcldb.h" #include "transcode.h" #include "mimehandler.h" +#include "pathut.h" using namespace std; @@ -72,7 +73,7 @@ int main(int argc, char **argv) cerr << "No database directory in configuration" << endl; exit(1); } - + dbdir = path_tildexpand(dbdir); Rcl::Db *rcldb = new Rcl::Db; if (!rcldb->open(dbdir, Rcl::Db::DbRO)) { diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index db501287..7e5e44a5 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.13 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.14 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include @@ -87,7 +87,7 @@ bool Rcl::Db::open(const string& dir, OpenMode mode) LOGERR(("Rcl::Db::open: already open\n")); return false; } - + string ermsg; try { switch (mode) { case DbUpd: @@ -110,15 +110,16 @@ bool Rcl::Db::open(const string& dir, OpenMode mode) ndb->isopen = true; return true; } catch (const Xapian::Error &e) { - cerr << "Exception: " << e.get_msg() << endl; + ermsg = e.get_msg(); } catch (const string &s) { - cerr << "Exception: " << s << endl; + ermsg = s; } catch (const char *s) { - cerr << "Exception: " << s << endl; + ermsg = s; } catch (...) { - cerr << "Caught unknown exception" << endl; + ermsg = "Caught unknown exception"; } - LOGERR(("Rcl::Db::open: got exception\n")); + LOGERR(("Rcl::Db::open: exception while opening '%s': %s\n", + dir.c_str(), ermsg.c_str())); return false; } @@ -156,6 +157,14 @@ bool Rcl::Db::close() return false; } +bool Rcl::Db::isopen() +{ + if (pdata == 0) + return false; + Native *ndb = (Native *)pdata; + return ndb->isopen; +} + // A small class to hold state while splitting text class wsData { public: @@ -366,6 +375,10 @@ static bool splitQCb(void *cdata, const std::string &term, int ) bool Rcl::Db::setQuery(const std::string &querystring) { LOGDEB(("Rcl::Db::setQuery: %s\n", querystring.c_str())); + Native *ndb = (Native *)pdata; + if (!ndb) + return false; + wsQData splitData; TextSplit splitter(splitQCb, &splitData); @@ -375,7 +388,6 @@ bool Rcl::Db::setQuery(const std::string &querystring) } splitter.text_to_words(noacc); - Native *ndb = (Native *)pdata; ndb->query = Xapian::Query(Xapian::Query::OP_OR, splitData.terms.begin(), splitData.terms.end()); diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index 5af9eaf4..c7e8614b 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -1,6 +1,6 @@ #ifndef _DB_H_INCLUDED_ #define _DB_H_INCLUDED_ -/* @(#$Id: rcldb.h,v 1.7 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: rcldb.h,v 1.8 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes */ #include @@ -61,6 +61,7 @@ class Db { enum OpenMode {DbRO, DbUpd, DbTrunc}; bool open(const std::string &dbdir, OpenMode mode); bool close(); + bool isopen(); // Update-related functions bool add(const std::string &filename, const Doc &doc); diff --git a/src/utils/Makefile b/src/utils/Makefile index 476c104a..0abd1e27 100644 --- a/src/utils/Makefile +++ b/src/utils/Makefile @@ -1,5 +1,5 @@ -CXXFLAGS = -I. +CXXFLAGS = -I. -g BIGLIB = ../lib/librcl.a @@ -15,7 +15,7 @@ trfstreewalk.o : fstreewalk.cpp fstreewalk.h PATHUT_OBJS= trpathut.o pathut.o trpathut : $(PATHUT_OBJS) - $(CXX) -o trpathut $(PATHUT_OBJS) + $(CXX) $(CXXFLAGS) -o trpathut $(PATHUT_OBJS) trpathut.o : pathut.cpp pathut.h $(CXX) -o trpathut.o -c $(CXXFLAGS) \ -DTEST_PATHUT pathut.cpp diff --git a/src/utils/pathut.cpp b/src/utils/pathut.cpp index a8d90c2b..0a8ce919 100644 --- a/src/utils/pathut.cpp +++ b/src/utils/pathut.cpp @@ -1,15 +1,17 @@ #ifndef lint -static char rcsid[] = "@(#$Id: pathut.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: pathut.cpp,v 1.3 2005-01-31 14:31:10 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #ifndef TEST_PATHUT #include +#include #include "pathut.h" +using std::string; -std::string path_getfather(const std::string &s) { - std::string father = s; +string path_getfather(const string &s) { + string father = s; // ?? if (father.empty()) @@ -22,8 +24,8 @@ std::string path_getfather(const std::string &s) { father.erase(father.length()-1); } - std::string::size_type slp = father.rfind('/'); - if (slp == std::string::npos) + string::size_type slp = father.rfind('/'); + if (slp == string::npos) return "./"; father.erase(slp); @@ -31,33 +33,57 @@ std::string path_getfather(const std::string &s) { return father; } -std::string path_getsimple(const std::string &s) { - std::string simple = s; +string path_getsimple(const string &s) { + string simple = s; if (simple.empty()) return simple; - std::string::size_type slp = simple.rfind('/'); - if (slp == std::string::npos) + string::size_type slp = simple.rfind('/'); + if (slp == string::npos) return simple; simple.erase(0, slp+1); return simple; } -std::string path_home() +string path_home() { uid_t uid = getuid(); struct passwd *entry = getpwuid(uid); - if (entry == 0) + if (entry == 0) { + const char *cp = getenv("HOME"); + if (cp) + return cp; + else return "/"; + } - std::string homedir = entry->pw_dir; + string homedir = entry->pw_dir; path_catslash(homedir); return homedir; } +extern string path_tildexpand(const string &s) +{ + if (s.empty() || s[0] != '~') + return s; + string o = s; + if (s.length() == 1) { + o.replace(0, 1, path_home()); + } else if (s[1] == '/') { + o.replace(0, 2, path_home()); + } else { + string::size_type pos = s.find('/'); + int l = (pos == string::npos) ? s.length() - 1 : pos - 1; + struct passwd *entry = getpwnam(s.substr(1, l).c_str()); + if (entry) + o.replace(0, l+1, entry->pw_dir); + } + return o; +} + #else // TEST_PATHUT #include @@ -71,15 +97,29 @@ const char *tstvec[] = {"", "/", "/dir", "/dir/", "/dir1/dir2", "/dir/.c", }; +const string ttvec[] = {"/dir", "", "~", "~/sub", "~root", "~root/sub", + "~nosuch", "~nosuch/sub"}; +int nttvec = sizeof(ttvec) / sizeof(string); + int main(int argc, const char **argv) { - +#if 0 for (int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) { cout << tstvec[i] << " FATHER " << path_getfather(tstvec[i]) << endl; } for (int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) { cout << tstvec[i] << " SIMPLE " << path_getsimple(tstvec[i]) << endl; } +#endif + string s; + + for (int i = 0; i < nttvec; i++) { + cout << "tildexp: '" << ttvec[i] << "' -> '" << + path_tildexpand(ttvec[i]) << "'" << endl; + } + + + return 0; } diff --git a/src/utils/pathut.h b/src/utils/pathut.h index c191bb61..e5e2666e 100644 --- a/src/utils/pathut.h +++ b/src/utils/pathut.h @@ -1,6 +1,6 @@ #ifndef _PATHUT_H_INCLUDED_ #define _PATHUT_H_INCLUDED_ -/* @(#$Id: pathut.h,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: pathut.h,v 1.3 2005-01-31 14:31:10 dockes Exp $ (C) 2004 J.F.Dockes */ #include @@ -16,5 +16,6 @@ inline void path_cat(std::string &s1, const std::string &s2) { extern std::string path_getsimple(const std::string &s); extern std::string path_getfather(const std::string &s); extern std::string path_home(); +extern std::string path_tildexpand(const std::string &s); #endif /* _PATHUT_H_INCLUDED_ */ diff --git a/src/utils/smallut.cpp b/src/utils/smallut.cpp new file mode 100644 index 00000000..23bfa636 --- /dev/null +++ b/src/utils/smallut.cpp @@ -0,0 +1,148 @@ +#ifndef lint +static char rcsid[] = "@(#$Id: smallut.cpp,v 1.1 2005-01-31 14:31:10 dockes Exp $ (C) 2004 J.F.Dockes"; +#endif + +#ifndef TEST_SMALLUT +#include +#include "smallut.h" + +#include + +#define MIN(A,B) ((A)<(B)?(A):(B)) + +int stringicmp(const string & s1, const string& s2) +{ + string::const_iterator it1 = s1.begin(); + string::const_iterator it2 = s2.begin(); + int size1 = s1.length(), size2 = s2.length(); + char c1, c2; + + if (size1 > size2) { + while (it1 != s1.end()) { + c1 = ::toupper(*it1); + c2 = ::toupper(*it2); + if (c1 != c2) { + return c1 > c2 ? 1 : -1; + } + ++it1; ++it2; + } + return size1 == size2 ? 0 : 1; + } else { + while (it2 != s2.end()) { + c1 = ::toupper(*it1); + c2 = ::toupper(*it2); + if (c1 != c2) { + return c1 > c2 ? 1 : -1; + } + ++it1; ++it2; + } + return size1 == size2 ? 0 : -1; + } +} + +// s1 is already lowercase +int stringlowercmp(const string & s1, const string& s2) +{ + string::const_iterator it1 = s1.begin(); + string::const_iterator it2 = s2.begin(); + int size1 = s1.length(), size2 = s2.length(); + char c2; + + if (size1 > size2) { + while (it1 != s1.end()) { + c2 = ::tolower(*it2); + if (*it1 != c2) { + return *it1 > c2 ? 1 : -1; + } + ++it1; ++it2; + } + return size1 == size2 ? 0 : 1; + } else { + while (it2 != s2.end()) { + c2 = ::tolower(*it2); + if (*it1 != c2) { + return *it1 > c2 ? 1 : -1; + } + ++it1; ++it2; + } + return size1 == size2 ? 0 : -1; + } +} + +// s1 is already uppercase +int stringuppercmp(const string & s1, const string& s2) +{ + string::const_iterator it1 = s1.begin(); + string::const_iterator it2 = s2.begin(); + int size1 = s1.length(), size2 = s2.length(); + char c2; + + if (size1 > size2) { + while (it1 != s1.end()) { + c2 = ::toupper(*it2); + if (*it1 != c2) { + return *it1 > c2 ? 1 : -1; + } + ++it1; ++it2; + } + return size1 == size2 ? 0 : 1; + } else { + while (it2 != s2.end()) { + c2 = ::toupper(*it2); + if (*it1 != c2) { + return *it1 > c2 ? 1 : -1; + } + ++it1; ++it2; + } + return size1 == size2 ? 0 : -1; + } +} + +#else + +#include +#include "smallut.h" + +struct spair { + const char *s1; + const char *s2; +}; +struct spair pairs[] = { + {"", ""}, + {"", "a"}, + {"a", ""}, + {"a", "a"}, + {"A", "a"}, + {"a", "A"}, + {"A", "A"}, + {"12", "12"}, + {"a", "ab"}, + {"ab", "a"}, + {"A", "Ab"}, + {"a", "Ab"}, +}; +int npairs = sizeof(pairs) / sizeof(struct spair); + +int main(int argc, char **argv) +{ + for (int i = 0; i < npairs; i++) { + { + int c = stringicmp(pairs[i].s1, pairs[i].s2); + printf("'%s' %s '%s' ", pairs[i].s1, + c == 0 ? "==" : c < 0 ? "<" : ">", pairs[i].s2); + } + { + int cl = stringlowercmp(pairs[i].s1, pairs[i].s2); + printf("L '%s' %s '%s' ", pairs[i].s1, + cl == 0 ? "==" : cl < 0 ? "<" : ">", pairs[i].s2); + } + { + int cu = stringuppercmp(pairs[i].s1, pairs[i].s2); + printf("U '%s' %s '%s' ", pairs[i].s1, + cu == 0 ? "==" : cu < 0 ? "<" : ">", pairs[i].s2); + } + printf("\n"); + } +} + +#endif diff --git a/src/utils/smallut.h b/src/utils/smallut.h new file mode 100644 index 00000000..6bc7aa3f --- /dev/null +++ b/src/utils/smallut.h @@ -0,0 +1,13 @@ +#ifndef _SMALLUT_H_INCLUDED_ +#define _SMALLUT_H_INCLUDED_ +/* @(#$Id: smallut.h,v 1.1 2005-01-31 14:31:10 dockes Exp $ (C) 2004 J.F.Dockes */ +#include + +using std::string; + +extern int stringicmp(const string& s1, const string& s2); +extern int stringlowercmp(const string& alreadylower, const string& s2); +extern int stringuppercmp(const string& alreadyupper, const string& s2); + + +#endif /* _SMALLUT_H_INCLUDED_ */