diff --git a/src/common/rclconfig.h b/src/common/rclconfig.h index ca715dc9..aa6a1ec5 100644 --- a/src/common/rclconfig.h +++ b/src/common/rclconfig.h @@ -16,7 +16,7 @@ */ #ifndef _RCLCONFIG_H_INCLUDED_ #define _RCLCONFIG_H_INCLUDED_ -/* @(#$Id: rclconfig.h,v 1.32 2007-06-02 08:30:41 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: rclconfig.h,v 1.33 2007-06-08 16:47:19 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include @@ -36,6 +36,11 @@ class RclConfig { public: RclConfig(const string *argcnf = 0); + // Main programs should implement this, it avoids having to carry + // the configuration parameter everywhere. Places where several + // instances might be needed will take care of themselves. + static RclConfig* getMainConfig(); + bool ok() {return m_ok;} const string &getReason() {return m_reason;} /** Return the directory where this config is stored */ diff --git a/src/index/indexer.cpp b/src/index/indexer.cpp index 2a7d2577..a8d8df0c 100644 --- a/src/index/indexer.cpp +++ b/src/index/indexer.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: indexer.cpp,v 1.57 2007-06-02 08:30:41 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: indexer.cpp,v 1.58 2007-06-08 16:47:19 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -186,10 +186,6 @@ bool DbIndexer::init(bool resetbefore, bool rdonly) return false; } - int idxflushmb; - if (m_config->getConfParam("idxflushmb", &idxflushmb)) - m_db.setFlushMb(idxflushmb); - return true; } diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp index 8cf959fa..0fa2c4d0 100644 --- a/src/index/recollindex.cpp +++ b/src/index/recollindex.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.32 2007-05-21 13:30:21 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.33 2007-06-08 16:47:19 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -252,6 +252,12 @@ Usage(void) exit((op_flags & OPT_h)==0); } +static RclConfig *config; +RclConfig *RclConfig::getMainConfig() +{ + return config; +} + int main(int argc, const char **argv) { string a_config; @@ -303,8 +309,7 @@ int main(int argc, const char **argv) string reason; RclInitFlags flags = (op_flags & OPT_m) && !(op_flags&OPT_D) ? RCLINIT_DAEMON : RCLINIT_NONE; - RclConfig *config = - recollinit(flags, cleanup, sigcleanup, reason, &a_config); + config = recollinit(flags, cleanup, sigcleanup, reason, &a_config); if (config == 0 || !config->ok()) { cerr << "Configuration problem: " << reason << endl; exit(1); diff --git a/src/mk/FreeBSD b/src/mk/FreeBSD index 87896190..43b12ab2 100644 --- a/src/mk/FreeBSD +++ b/src/mk/FreeBSD @@ -4,5 +4,6 @@ include $(depth)/mk/localdefs ALL_CXXFLAGS = $(CXXFLAGS) $(COMMONCXXFLAGS) $(LOCALCXXFLAGS) \ -pthread \ -DHAVE_VASPRINTF=1 \ - -DHAVE_MKDTEMP=1 + -DHAVE_MKDTEMP=1 \ + -DSTATFS_INCLUDE="" LIBSYS = diff --git a/src/qtgui/main.cpp b/src/qtgui/main.cpp index 948193c9..8ebed039 100644 --- a/src/qtgui/main.cpp +++ b/src/qtgui/main.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: main.cpp,v 1.60 2007-06-02 08:30:41 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: main.cpp,v 1.61 2007-06-08 16:47:19 dockes Exp $ (C) 2005 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -78,6 +78,11 @@ Rcl::Db *rcldb; Aspell *aspell; #endif +RclConfig* RclConfig::getMainConfig() +{ + return rclconfig; +} + RclHistory *g_dynconf; int recollNeedsExit; static string dbdir; diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 9dabfe72..312eb952 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.111 2007-06-02 08:30:42 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.112 2007-06-08 16:47:19 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -33,6 +33,7 @@ static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.111 2007-06-02 08:30:42 dockes Exp using namespace std; #endif /* NO_NAMESPACES */ +#include "rclconfig.h" #include "rcldb.h" #include "stemdb.h" #include "textsplit.h" @@ -498,9 +499,16 @@ string Native::makeAbstract(Xapian::docid docid, const list& iterms) Db::Db() : m_ndb(0), m_qOpts(QO_NONE), m_idxAbsTruncLen(250), m_synthAbsLen(250), - m_synthAbsWordCtxLen(4), m_flushmb(-1), m_mode(Db::DbRO) + m_synthAbsWordCtxLen(4), m_flushMb(-1), + m_curtxtsz(0), m_flushtxtsz(0), m_occtxtsz(0), + m_maxFsOccupPc(0), m_mode(Db::DbRO) { m_ndb = new Native(this); + RclConfig *config = RclConfig::getMainConfig(); + if (config) { + config->getConfParam("maxfsoccuppc", &m_maxFsOccupPc); + config->getConfParam("idxflushmb", &m_flushMb); + } } Db::~Db() @@ -824,17 +832,30 @@ void Db::setAbstractParams(int idxtrunc, int syntlen, int syntctxlen) m_synthAbsWordCtxLen = syntctxlen; } +static const int MB = 1024 * 1024; + // Add document in internal form to the database: index the terms in // the title abstract and body and add special terms for file name, // date, mime type ... , create the document data record (more // metadata), and update database -bool Db::add(const string &fn, const Doc &idoc, - const struct stat *stp) +bool Db::add(const string &fn, const Doc &idoc, const struct stat *stp) { LOGDEB1(("Db::add: fn %s\n", fn.c_str())); if (m_ndb == 0) return false; + // Check file system full every mbyte of indexed text. + if (m_maxFsOccupPc > 0 && (m_curtxtsz - m_occtxtsz) / MB >= 1) { + LOGDEB(("Db::add: checking file system usage\n")); + int pc; + if (fsocc(m_basedir, &pc) && pc >= m_maxFsOccupPc) { + LOGERR(("Db::add: stop indexing: file system " + "%d%% full > max %d%%\n", pc, m_maxFsOccupPc)); + return false; + } + m_occtxtsz = m_curtxtsz; + } + Doc doc = idoc; // Truncate abstract, title and keywords to reasonable lengths. If @@ -1069,11 +1090,11 @@ bool Db::add(const string &fn, const Doc &idoc, } // Test if we're over the flush threshold (limit memory usage): - if (m_flushmb > 0) { - m_curtxtsz += doc.text.length(); - if (m_curtxtsz / (1024*1024) >= m_flushmb) { + m_curtxtsz += doc.text.length(); + if (m_flushMb > 0) { + if ((m_curtxtsz - m_flushtxtsz) / MB >= m_flushMb) { ermsg.erase(); - LOGDEB(("Db::add: text size >= %d Mb, flushing\n", m_flushmb)); + LOGDEB(("Db::add: text size >= %d Mb, flushing\n", m_flushMb)); try { m_ndb->wdb.flush(); } catch (const Xapian::Error &e) { @@ -1087,7 +1108,7 @@ bool Db::add(const string &fn, const Doc &idoc, LOGERR(("Db::add: flush() failed: %s\n", ermsg.c_str())); return false; } - m_curtxtsz = 0; + m_flushtxtsz = m_curtxtsz; } } diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index bfd88f15..abebc6e2 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -16,7 +16,7 @@ */ #ifndef _DB_H_INCLUDED_ #define _DB_H_INCLUDED_ -/* @(#$Id: rcldb.h,v 1.49 2007-06-08 16:05:25 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: rcldb.h,v 1.50 2007-06-08 16:47:19 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include @@ -119,9 +119,6 @@ class Db { /** Delete stem expansion database for given language. */ bool deleteStemDb(const string &lang); - /** Adjust flush threshold */ - void setFlushMb(int mb) {m_flushmb = mb;} - /* Query-related methods ************************************/ @@ -213,12 +210,19 @@ private: int m_synthAbsWordCtxLen; // Flush threshold. Megabytes of text indexed before we flush. - int m_flushmb; - // Text bytes indexed since last flush + int m_flushMb; + // Text bytes indexed since beginning long long m_curtxtsz; + // Text bytes at last flush + long long m_flushtxtsz; + // Text bytes at last fsoccup check + long long m_occtxtsz; + + // Maximum file system occupation percentage + int m_maxFsOccupPc; // Database directory - string m_basedir; + string m_basedir; // List of directories for additional databases to query list m_extraDbs;