roughly auto-config threads depending on cpu count

This commit is contained in:
Jean-Francois Dockes 2013-04-25 09:00:50 +02:00
parent 2b80c77c23
commit ec68cd95ba
11 changed files with 236 additions and 10 deletions

View File

@ -35,6 +35,7 @@ using std::list;
#endif
#include <iostream>
#include <sstream>
#include <cstdlib>
#include <cstring>
using namespace std;
@ -48,6 +49,9 @@ using namespace std;
#include "textsplit.h"
#include "readfile.h"
#include "fstreewalk.h"
#include "cpuconf.h"
typedef pair<int,int> RclPII;
// Static, logically const, RclConfig members are initialized once from the
// first object build during process initialization.
@ -218,6 +222,10 @@ RclConfig::RclConfig(const string *argcnf)
if (!readFieldsConfig(cnferrloc))
return;
// Default is no threading
m_thrConf = create_vector<RclPII>
(RclPII(-1, 0))(RclPII(-1, 0))(RclPII(-1, 0));
m_ptrans = new ConfSimple(path_cat(m_confdir, "ptrans").c_str());
m_ok = true;
@ -368,14 +376,83 @@ bool RclConfig::getConfParam(const string &name, vector<int> *vip) const
return true;
}
pair<int,int> RclConfig::getThrConf(ThrStage who) const
void RclConfig::initThrConf()
{
// Default is no threading
m_thrConf = create_vector<RclPII>
(RclPII(-1, 0))(RclPII(-1, 0))(RclPII(-1, 0));
vector<int> vq;
vector<int> vt;
if (!getConfParam("thrQSizes", &vq) || !getConfParam("thrTCounts", &vt)) {
if (!getConfParam("thrQSizes", &vq)) {
LOGINFO(("RclConfig::initThrConf: no thread info (queues)\n"));
goto out;
}
// If the first queue size is 0, autoconf is requested.
if (vq.size() > 0 && vq[0] == 0) {
LOGDEB(("RclConfig::initThrConf: autoconf requested\n"));
CpuConf cpus;
if (!getCpuConf(cpus) || cpus.ncpus < 1) {
LOGERR(("RclConfig::initThrConf: could not retrieve cpu conf\n"));
cpus.ncpus = 1;
}
// Arbitrarily set threads config based on number of CPUS. This also
// depends on the IO setup actually, so we're bound to be wrong...
if (cpus.ncpus == 1) {
// Somewhat counter-intuitively (because of possible IO//)
// it seems that the best config here is no threading
} else if (cpus.ncpus < 4) {
// Untested so let's guess...
m_thrConf = create_vector<RclPII>
(RclPII(2, 2))(RclPII(2, 2))(RclPII(2, 1));
} else if (cpus.ncpus < 6) {
m_thrConf = create_vector<RclPII>
(RclPII(2, 4))(RclPII(2, 2))(RclPII(2, 1));
} else {
m_thrConf = create_vector<RclPII>
(RclPII(2, 5))(RclPII(2, 3))(RclPII(2, 1));
}
goto out;
} else if (vq.size() > 0 && vq[0] < 0) {
// threads disabled by config
goto out;
}
if (!getConfParam("thrTCounts", &vt) ) {
LOGINFO(("RclConfig::initThrConf: no thread info (threads)\n"));
goto out;
}
if (vq.size() != 3 || vt.size() != 3) {
LOGINFO(("RclConfig::initThrConf: bad thread info vector sizes\n"));
goto out;
}
// Normal case: record info from config
m_thrConf.clear();
for (unsigned int i = 0; i < 3; i++) {
m_thrConf.push_back(RclPII(vq[i], vt[i]));
}
out:
ostringstream sconf;
for (unsigned int i = 0; i < 3; i++) {
sconf << "(" << m_thrConf[i].first << ", " << m_thrConf[i].second <<
") ";
}
LOGDEB(("RclConfig::initThrConf: chosen config (ql,nt): %s\n",
sconf.str().c_str()));
}
pair<int,int> RclConfig::getThrConf(ThrStage who) const
{
if (m_thrConf.size() != 3) {
LOGERR(("RclConfig::getThrConf: bad data in rclconfig\n"));
return pair<int,int>(-1,-1);
}
return pair<int,int>(vq[who], vt[who]);
return m_thrConf[who];
}
vector<string> RclConfig::getTopdirs() const
@ -1257,6 +1334,7 @@ void RclConfig::initFrom(const RclConfig& r)
m_stpsuffstate.init(this, mimemap, r.m_stpsuffstate.paramname);
m_skpnstate.init(this, m_conf, r.m_skpnstate.paramname);
m_rmtstate.init(this, m_conf, r.m_rmtstate.paramname);
m_thrConf = r.m_thrConf;
}
#else // -> Test

View File

@ -252,6 +252,11 @@ class RclConfig {
* command string returned by getMimeHandlerDef */
string findFilter(const string& cmd) const;
/** Thread config init is not done automatically because not all
programs need it and it uses debuglog so that it's better to
call it after primary init */
void initThrConf();
~RclConfig() {
freeAll();
}
@ -303,6 +308,7 @@ class RclConfig {
// Limiting set of mime types to be processed. Normally empty.
ParamStale m_rmtstate;
set<string> m_restrictMTypes;
vector<pair<int, int> > m_thrConf;
/** Create initial user configuration */
bool initUserConfig();

View File

@ -125,7 +125,6 @@ RclConfig *recollinit(RclInitFlags flags,
}
}
// Make sure the locale charset is initialized (so that multiple
// threads don't try to do it at once).
config->getDefCharset();
@ -146,6 +145,9 @@ RclConfig *recollinit(RclInitFlags flags,
#ifndef IDX_THREADS
ExecCmd::useVfork(true);
#else
// Keep threads init behind log init, but make sure it's done before
// we do the vfork choice !
config->initThrConf();
bool intern_noThr = config->getThrConf(RclConfig::ThrIntern).first == -1;
bool split_noThr = config->getThrConf(RclConfig::ThrSplit).first == -1;
bool write_noThr = config->getThrConf(RclConfig::ThrDbWrite).first == -1;

View File

@ -8,8 +8,8 @@ LIBS = librecoll.a $(LIBRECOLL)
all: $(LIBS)
OBJS = rclaspell.o beaglequeuecache.o cstr.o rclconfig.o rclinit.o textsplit.o unacpp.o beaglequeue.o bglfetcher.o fetcher.o fsfetcher.o fsindexer.o indexer.o mimetype.o subtreelist.o htmlparse.o internfile.o mh_exec.o mh_execm.o mh_html.o mh_mail.o mh_mbox.o mh_text.o mimehandler.o myhtmlparse.o txtdcode.o uncomp.o docseq.o docseqdb.o docseqhist.o filtseq.o dynconf.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o daterange.o expansiondbs.o rclabstract.o rcldb.o rcldoc.o rcldups.o rclquery.o rclterms.o searchdata.o searchdataxml.o stemdb.o stoplist.o synfamily.o unac.o base64.o circache.o closefrom.o conftree.o copyfile.o debuglog.o ecrontab.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o rclionice.o readfile.o smallut.o strmatcher.o transcode.o wipedir.o x11mon.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime.o convert.o iodevice.o iofactory.o
DEPS = rclaspell.dep.stamp beaglequeuecache.dep.stamp cstr.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp beaglequeue.dep.stamp bglfetcher.dep.stamp fetcher.dep.stamp fsfetcher.dep.stamp fsindexer.dep.stamp indexer.dep.stamp mimetype.dep.stamp subtreelist.dep.stamp htmlparse.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_execm.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp mimehandler.dep.stamp myhtmlparse.dep.stamp txtdcode.dep.stamp uncomp.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp dynconf.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp daterange.dep.stamp expansiondbs.dep.stamp rclabstract.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rcldups.dep.stamp rclquery.dep.stamp rclterms.dep.stamp searchdata.dep.stamp searchdataxml.dep.stamp stemdb.dep.stamp stoplist.dep.stamp synfamily.dep.stamp unac.dep.stamp base64.dep.stamp circache.dep.stamp closefrom.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp ecrontab.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp rclionice.dep.stamp readfile.dep.stamp smallut.dep.stamp strmatcher.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp
OBJS = rclaspell.o beaglequeuecache.o cstr.o rclconfig.o rclinit.o textsplit.o unacpp.o beaglequeue.o bglfetcher.o fetcher.o fsfetcher.o fsindexer.o indexer.o mimetype.o subtreelist.o htmlparse.o internfile.o mh_exec.o mh_execm.o mh_html.o mh_mail.o mh_mbox.o mh_text.o mimehandler.o myhtmlparse.o txtdcode.o uncomp.o docseq.o docseqdb.o docseqhist.o filtseq.o dynconf.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o daterange.o expansiondbs.o rclabstract.o rcldb.o rcldoc.o rcldups.o rclquery.o rclterms.o searchdata.o searchdataxml.o stemdb.o stoplist.o synfamily.o unac.o base64.o circache.o closefrom.o conftree.o copyfile.o cpuconf.o debuglog.o ecrontab.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o rclionice.o readfile.o smallut.o strmatcher.o transcode.o wipedir.o x11mon.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime.o convert.o iodevice.o iofactory.o
DEPS = rclaspell.dep.stamp beaglequeuecache.dep.stamp cstr.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp beaglequeue.dep.stamp bglfetcher.dep.stamp fetcher.dep.stamp fsfetcher.dep.stamp fsindexer.dep.stamp indexer.dep.stamp mimetype.dep.stamp subtreelist.dep.stamp htmlparse.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_execm.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp mimehandler.dep.stamp myhtmlparse.dep.stamp txtdcode.dep.stamp uncomp.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp dynconf.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp daterange.dep.stamp expansiondbs.dep.stamp rclabstract.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rcldups.dep.stamp rclquery.dep.stamp rclterms.dep.stamp searchdata.dep.stamp searchdataxml.dep.stamp stemdb.dep.stamp stoplist.dep.stamp synfamily.dep.stamp unac.dep.stamp base64.dep.stamp circache.dep.stamp closefrom.dep.stamp conftree.dep.stamp copyfile.dep.stamp cpuconf.dep.stamp debuglog.dep.stamp ecrontab.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp rclionice.dep.stamp readfile.dep.stamp smallut.dep.stamp strmatcher.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp
librecoll.a : $(DEPS) $(OBJS)
ar ru librecoll.a $(OBJS)
@ -137,6 +137,8 @@ conftree.o : ../utils/conftree.cpp $(depth)/mk/localdefs
$(CXX) $(ALL_CXXFLAGS) -c ../utils/conftree.cpp
copyfile.o : ../utils/copyfile.cpp $(depth)/mk/localdefs
$(CXX) $(ALL_CXXFLAGS) -c ../utils/copyfile.cpp
cpuconf.o : ../utils/cpuconf.cpp $(depth)/mk/localdefs
$(CXX) $(ALL_CXXFLAGS) -c ../utils/cpuconf.cpp
debuglog.o : ../utils/debuglog.cpp $(depth)/mk/localdefs
$(CXX) $(ALL_CXXFLAGS) -c ../utils/debuglog.cpp
ecrontab.o : ../utils/ecrontab.cpp $(depth)/mk/localdefs
@ -364,6 +366,9 @@ conftree.dep.stamp : ../utils/conftree.cpp $(depth)/mk/localdefs
copyfile.dep.stamp : ../utils/copyfile.cpp $(depth)/mk/localdefs
$(CXX) -M $(ALL_CXXFLAGS) ../utils/copyfile.cpp > copyfile.dep
touch copyfile.dep.stamp
cpuconf.dep.stamp : ../utils/cpuconf.cpp $(depth)/mk/localdefs
$(CXX) -M $(ALL_CXXFLAGS) ../utils/cpuconf.cpp > cpuconf.dep
touch cpuconf.dep.stamp
debuglog.dep.stamp : ../utils/debuglog.cpp $(depth)/mk/localdefs
$(CXX) -M $(ALL_CXXFLAGS) ../utils/debuglog.cpp > debuglog.dep
touch debuglog.dep.stamp
@ -475,6 +480,7 @@ include circache.dep
include closefrom.dep
include conftree.dep
include copyfile.dep
include cpuconf.dep
include debuglog.dep
include ecrontab.dep
include execmd.dep

View File

@ -62,6 +62,7 @@ ${depth}/utils/circache.cpp \
${depth}/utils/closefrom.cpp \
${depth}/utils/conftree.cpp \
${depth}/utils/copyfile.cpp \
${depth}/utils/cpuconf.cpp \
${depth}/utils/debuglog.cpp \
${depth}/utils/ecrontab.cpp \
${depth}/utils/execmd.cpp \

View File

@ -141,8 +141,15 @@ dbdir = xapiandb
# value to -1 disables a queue (replaced by a direct call). The thrTcounts
# values define the number of threads to start for each queue. The last
# value can only be one (as Xapian is single-threaded).
thrQSizes = 2 2 2
thrTCounts = 4 2 1
# If the first element in thrQSizes is 0, recollindex will attempt to set
# roughly guestimated values based on the number of CPUs.
#
# The following are the best setup on my core i5 system (4 cores, no
# hyperthreading, multiple disks).
#thrQSizes = 2 2 2
#thrTCounts = 4 2 1
# The default is to let recoll guess.
thrQSizes = 0
# Maximum file system occupation before we stop indexing. The default value
# is 0, meaning no checking. The value is a percentage, corresponding to

View File

@ -37,6 +37,12 @@ trreadfile.o : readfile.cpp readfile.h
$(CXX) -o trreadfile.o -c $(ALL_CXXFLAGS) \
-DTEST_READFILE readfile.cpp
CPUCONF_OBJS= trcpuconf.o
trcpuconf : $(CPUCONF_OBJS)
$(CXX) -o trcpuconf $(CPUCONF_OBJS) $(LIBRECOLL) $(LIBICONV) $(LIBSYS)
trcpuconf.o : cpuconf.cpp cpuconf.h
$(CXX) -o trcpuconf.o -c $(ALL_CXXFLAGS) -DTEST_CPUCONF cpuconf.cpp
CIRCACHE_OBJS= trcircache.o
trcircache : $(CIRCACHE_OBJS)
$(CXX) -o trcircache $(CIRCACHE_OBJS) \

86
src/utils/cpuconf.cpp Normal file
View File

@ -0,0 +1,86 @@
/* Copyright (C) 2013 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef TEST_CPUCONF
#include "autoconfig.h"
#include "cpuconf.h"
#include "execmd.h"
#include "smallut.h"
#if defined(__gnu_linux__)
bool getCpuConf(CpuConf& conf)
{
vector<string> cmdv = create_vector<string>("sh")("-c")
("egrep ^processor /proc/cpuinfo | wc -l");
string result;
if (!ExecCmd::backtick(cmdv, result))
return false;
conf.ncpus = atoi(result.c_str());
if (conf.ncpus < 1 || conf.ncpus > 100)
conf.ncpus = 1;
return true;
}
#elif defined(__FreeBSD__)
bool getCpuConf(CpuConf& conf)
{
vector<string> cmdv = create_vector<string>("sysctl")("hw.ncpu");
string result;
if (!ExecCmd::backtick(cmdv, result))
return false;
conf.ncpus = atoi(result.c_str());
if (conf.ncpus < 1 || conf.ncpus > 100)
conf.ncpus = 1;
return true;
}
//#elif defined(__APPLE__)
#else // Any other system
// Generic, pretend there is one
bool getCpuConf(CpuConf& cpus)
{
cpu.cpus = 1;
return true;
}
#endif
#else // TEST_CPUCONF
#include <stdlib.h>
#include <iostream>
using namespace std;
#include "cpuconf.h"
// Test driver
int main(int argc, const char **argv)
{
CpuConf cpus;
if (!getCpuConf(cpus)) {
cerr << "getCpuConf failed" << endl;
exit(1);
}
cout << "Cpus: " << cpus.ncpus << endl;
exit(0);
}
#endif // TEST_CPUCONF

34
src/utils/cpuconf.h Normal file
View File

@ -0,0 +1,34 @@
/* Copyright (C) 2013 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _CPUCONF_H_INCLUDED_
#define _CPUCONF_H_INCLUDED_
/** Guess how many CPUs there are on this machine, to help with configuring
threads */
struct CpuConf {
CpuConf()
: ncpus(1)
{}
// Virtual ones, including hyperthreading, we only care about this for now
int ncpus;
// int ncores;
// int nsockets;
};
extern bool getCpuConf(CpuConf& conf);
#endif /* _CPUCONF_H_INCLUDED_ */

View File

@ -536,7 +536,7 @@ int ExecCmd::send(const string& data)
break;
int n = con->send(data.c_str() + nwritten, data.length() - nwritten);
if (n < 0) {
LOGERR(("ExecCmd::doexec: send failed\n"));
LOGERR(("ExecCmd::send: send failed\n"));
return -1;
}
nwritten += n;

View File

@ -201,7 +201,7 @@ inline void leftzeropad(string& s, unsigned len)
// Code for static initialization of an stl map. Somewhat like Boost.assign.
// Ref: http://stackoverflow.com/questions/138600/initializing-a-static-stdmapint-int-in-c
// Example use: map<int, int> m = create_map (1,2) (3,4) (5,6) (7,8);
// Example use: map<int, int> m = create_map<int, int> (1,2) (3,4) (5,6) (7,8);
template <typename T, typename U>
class create_map