added file system usage check
This commit is contained in:
parent
23adf64d7c
commit
6d8040c2f0
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _RCLCONFIG_H_INCLUDED_
|
#ifndef _RCLCONFIG_H_INCLUDED_
|
||||||
#define _RCLCONFIG_H_INCLUDED_
|
#define _RCLCONFIG_H_INCLUDED_
|
||||||
/* @(#$Id: rclconfig.h,v 1.32 2007-06-02 08:30:41 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: rclconfig.h,v 1.33 2007-06-08 16:47:19 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -36,6 +36,11 @@ class RclConfig {
|
|||||||
public:
|
public:
|
||||||
|
|
||||||
RclConfig(const string *argcnf = 0);
|
RclConfig(const string *argcnf = 0);
|
||||||
|
// Main programs should implement this, it avoids having to carry
|
||||||
|
// the configuration parameter everywhere. Places where several
|
||||||
|
// instances might be needed will take care of themselves.
|
||||||
|
static RclConfig* getMainConfig();
|
||||||
|
|
||||||
bool ok() {return m_ok;}
|
bool ok() {return m_ok;}
|
||||||
const string &getReason() {return m_reason;}
|
const string &getReason() {return m_reason;}
|
||||||
/** Return the directory where this config is stored */
|
/** Return the directory where this config is stored */
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.57 2007-06-02 08:30:41 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.58 2007-06-08 16:47:19 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -186,10 +186,6 @@ bool DbIndexer::init(bool resetbefore, bool rdonly)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
int idxflushmb;
|
|
||||||
if (m_config->getConfParam("idxflushmb", &idxflushmb))
|
|
||||||
m_db.setFlushMb(idxflushmb);
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.32 2007-05-21 13:30:21 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.33 2007-06-08 16:47:19 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -252,6 +252,12 @@ Usage(void)
|
|||||||
exit((op_flags & OPT_h)==0);
|
exit((op_flags & OPT_h)==0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static RclConfig *config;
|
||||||
|
RclConfig *RclConfig::getMainConfig()
|
||||||
|
{
|
||||||
|
return config;
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, const char **argv)
|
int main(int argc, const char **argv)
|
||||||
{
|
{
|
||||||
string a_config;
|
string a_config;
|
||||||
@ -303,8 +309,7 @@ int main(int argc, const char **argv)
|
|||||||
string reason;
|
string reason;
|
||||||
RclInitFlags flags = (op_flags & OPT_m) && !(op_flags&OPT_D) ?
|
RclInitFlags flags = (op_flags & OPT_m) && !(op_flags&OPT_D) ?
|
||||||
RCLINIT_DAEMON : RCLINIT_NONE;
|
RCLINIT_DAEMON : RCLINIT_NONE;
|
||||||
RclConfig *config =
|
config = recollinit(flags, cleanup, sigcleanup, reason, &a_config);
|
||||||
recollinit(flags, cleanup, sigcleanup, reason, &a_config);
|
|
||||||
if (config == 0 || !config->ok()) {
|
if (config == 0 || !config->ok()) {
|
||||||
cerr << "Configuration problem: " << reason << endl;
|
cerr << "Configuration problem: " << reason << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
|
|||||||
@ -4,5 +4,6 @@ include $(depth)/mk/localdefs
|
|||||||
ALL_CXXFLAGS = $(CXXFLAGS) $(COMMONCXXFLAGS) $(LOCALCXXFLAGS) \
|
ALL_CXXFLAGS = $(CXXFLAGS) $(COMMONCXXFLAGS) $(LOCALCXXFLAGS) \
|
||||||
-pthread \
|
-pthread \
|
||||||
-DHAVE_VASPRINTF=1 \
|
-DHAVE_VASPRINTF=1 \
|
||||||
-DHAVE_MKDTEMP=1
|
-DHAVE_MKDTEMP=1 \
|
||||||
|
-DSTATFS_INCLUDE="<sys/mount.h>"
|
||||||
LIBSYS =
|
LIBSYS =
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: main.cpp,v 1.60 2007-06-02 08:30:41 dockes Exp $ (C) 2005 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: main.cpp,v 1.61 2007-06-08 16:47:19 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -78,6 +78,11 @@ Rcl::Db *rcldb;
|
|||||||
Aspell *aspell;
|
Aspell *aspell;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
RclConfig* RclConfig::getMainConfig()
|
||||||
|
{
|
||||||
|
return rclconfig;
|
||||||
|
}
|
||||||
|
|
||||||
RclHistory *g_dynconf;
|
RclHistory *g_dynconf;
|
||||||
int recollNeedsExit;
|
int recollNeedsExit;
|
||||||
static string dbdir;
|
static string dbdir;
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.111 2007-06-02 08:30:42 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.112 2007-06-08 16:47:19 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -33,6 +33,7 @@ static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.111 2007-06-02 08:30:42 dockes Exp
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
#endif /* NO_NAMESPACES */
|
#endif /* NO_NAMESPACES */
|
||||||
|
|
||||||
|
#include "rclconfig.h"
|
||||||
#include "rcldb.h"
|
#include "rcldb.h"
|
||||||
#include "stemdb.h"
|
#include "stemdb.h"
|
||||||
#include "textsplit.h"
|
#include "textsplit.h"
|
||||||
@ -498,9 +499,16 @@ string Native::makeAbstract(Xapian::docid docid, const list<string>& iterms)
|
|||||||
|
|
||||||
Db::Db()
|
Db::Db()
|
||||||
: m_ndb(0), m_qOpts(QO_NONE), m_idxAbsTruncLen(250), m_synthAbsLen(250),
|
: m_ndb(0), m_qOpts(QO_NONE), m_idxAbsTruncLen(250), m_synthAbsLen(250),
|
||||||
m_synthAbsWordCtxLen(4), m_flushmb(-1), m_mode(Db::DbRO)
|
m_synthAbsWordCtxLen(4), m_flushMb(-1),
|
||||||
|
m_curtxtsz(0), m_flushtxtsz(0), m_occtxtsz(0),
|
||||||
|
m_maxFsOccupPc(0), m_mode(Db::DbRO)
|
||||||
{
|
{
|
||||||
m_ndb = new Native(this);
|
m_ndb = new Native(this);
|
||||||
|
RclConfig *config = RclConfig::getMainConfig();
|
||||||
|
if (config) {
|
||||||
|
config->getConfParam("maxfsoccuppc", &m_maxFsOccupPc);
|
||||||
|
config->getConfParam("idxflushmb", &m_flushMb);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Db::~Db()
|
Db::~Db()
|
||||||
@ -824,17 +832,30 @@ void Db::setAbstractParams(int idxtrunc, int syntlen, int syntctxlen)
|
|||||||
m_synthAbsWordCtxLen = syntctxlen;
|
m_synthAbsWordCtxLen = syntctxlen;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const int MB = 1024 * 1024;
|
||||||
|
|
||||||
// Add document in internal form to the database: index the terms in
|
// Add document in internal form to the database: index the terms in
|
||||||
// the title abstract and body and add special terms for file name,
|
// the title abstract and body and add special terms for file name,
|
||||||
// date, mime type ... , create the document data record (more
|
// date, mime type ... , create the document data record (more
|
||||||
// metadata), and update database
|
// metadata), and update database
|
||||||
bool Db::add(const string &fn, const Doc &idoc,
|
bool Db::add(const string &fn, const Doc &idoc, const struct stat *stp)
|
||||||
const struct stat *stp)
|
|
||||||
{
|
{
|
||||||
LOGDEB1(("Db::add: fn %s\n", fn.c_str()));
|
LOGDEB1(("Db::add: fn %s\n", fn.c_str()));
|
||||||
if (m_ndb == 0)
|
if (m_ndb == 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
// Check file system full every mbyte of indexed text.
|
||||||
|
if (m_maxFsOccupPc > 0 && (m_curtxtsz - m_occtxtsz) / MB >= 1) {
|
||||||
|
LOGDEB(("Db::add: checking file system usage\n"));
|
||||||
|
int pc;
|
||||||
|
if (fsocc(m_basedir, &pc) && pc >= m_maxFsOccupPc) {
|
||||||
|
LOGERR(("Db::add: stop indexing: file system "
|
||||||
|
"%d%% full > max %d%%\n", pc, m_maxFsOccupPc));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
m_occtxtsz = m_curtxtsz;
|
||||||
|
}
|
||||||
|
|
||||||
Doc doc = idoc;
|
Doc doc = idoc;
|
||||||
|
|
||||||
// Truncate abstract, title and keywords to reasonable lengths. If
|
// Truncate abstract, title and keywords to reasonable lengths. If
|
||||||
@ -1069,11 +1090,11 @@ bool Db::add(const string &fn, const Doc &idoc,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Test if we're over the flush threshold (limit memory usage):
|
// Test if we're over the flush threshold (limit memory usage):
|
||||||
if (m_flushmb > 0) {
|
m_curtxtsz += doc.text.length();
|
||||||
m_curtxtsz += doc.text.length();
|
if (m_flushMb > 0) {
|
||||||
if (m_curtxtsz / (1024*1024) >= m_flushmb) {
|
if ((m_curtxtsz - m_flushtxtsz) / MB >= m_flushMb) {
|
||||||
ermsg.erase();
|
ermsg.erase();
|
||||||
LOGDEB(("Db::add: text size >= %d Mb, flushing\n", m_flushmb));
|
LOGDEB(("Db::add: text size >= %d Mb, flushing\n", m_flushMb));
|
||||||
try {
|
try {
|
||||||
m_ndb->wdb.flush();
|
m_ndb->wdb.flush();
|
||||||
} catch (const Xapian::Error &e) {
|
} catch (const Xapian::Error &e) {
|
||||||
@ -1087,7 +1108,7 @@ bool Db::add(const string &fn, const Doc &idoc,
|
|||||||
LOGERR(("Db::add: flush() failed: %s\n", ermsg.c_str()));
|
LOGERR(("Db::add: flush() failed: %s\n", ermsg.c_str()));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
m_curtxtsz = 0;
|
m_flushtxtsz = m_curtxtsz;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _DB_H_INCLUDED_
|
#ifndef _DB_H_INCLUDED_
|
||||||
#define _DB_H_INCLUDED_
|
#define _DB_H_INCLUDED_
|
||||||
/* @(#$Id: rcldb.h,v 1.49 2007-06-08 16:05:25 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: rcldb.h,v 1.50 2007-06-08 16:47:19 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
#include <list>
|
||||||
@ -119,9 +119,6 @@ class Db {
|
|||||||
/** Delete stem expansion database for given language. */
|
/** Delete stem expansion database for given language. */
|
||||||
bool deleteStemDb(const string &lang);
|
bool deleteStemDb(const string &lang);
|
||||||
|
|
||||||
/** Adjust flush threshold */
|
|
||||||
void setFlushMb(int mb) {m_flushmb = mb;}
|
|
||||||
|
|
||||||
|
|
||||||
/* Query-related methods ************************************/
|
/* Query-related methods ************************************/
|
||||||
|
|
||||||
@ -213,12 +210,19 @@ private:
|
|||||||
int m_synthAbsWordCtxLen;
|
int m_synthAbsWordCtxLen;
|
||||||
|
|
||||||
// Flush threshold. Megabytes of text indexed before we flush.
|
// Flush threshold. Megabytes of text indexed before we flush.
|
||||||
int m_flushmb;
|
int m_flushMb;
|
||||||
// Text bytes indexed since last flush
|
// Text bytes indexed since beginning
|
||||||
long long m_curtxtsz;
|
long long m_curtxtsz;
|
||||||
|
// Text bytes at last flush
|
||||||
|
long long m_flushtxtsz;
|
||||||
|
// Text bytes at last fsoccup check
|
||||||
|
long long m_occtxtsz;
|
||||||
|
|
||||||
|
// Maximum file system occupation percentage
|
||||||
|
int m_maxFsOccupPc;
|
||||||
|
|
||||||
// Database directory
|
// Database directory
|
||||||
string m_basedir;
|
string m_basedir;
|
||||||
|
|
||||||
// List of directories for additional databases to query
|
// List of directories for additional databases to query
|
||||||
list<string> m_extraDbs;
|
list<string> m_extraDbs;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user