diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index ece3add9..1b8aeae7 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -35,6 +35,7 @@ using std::list; #endif #include +#include #include #include using namespace std; @@ -48,6 +49,9 @@ using namespace std; #include "textsplit.h" #include "readfile.h" #include "fstreewalk.h" +#include "cpuconf.h" + +typedef pair RclPII; // Static, logically const, RclConfig members are initialized once from the // first object build during process initialization. @@ -218,6 +222,10 @@ RclConfig::RclConfig(const string *argcnf) if (!readFieldsConfig(cnferrloc)) return; + // Default is no threading + m_thrConf = create_vector + (RclPII(-1, 0))(RclPII(-1, 0))(RclPII(-1, 0)); + m_ptrans = new ConfSimple(path_cat(m_confdir, "ptrans").c_str()); m_ok = true; @@ -368,14 +376,83 @@ bool RclConfig::getConfParam(const string &name, vector *vip) const return true; } -pair RclConfig::getThrConf(ThrStage who) const +void RclConfig::initThrConf() { + // Default is no threading + m_thrConf = create_vector + (RclPII(-1, 0))(RclPII(-1, 0))(RclPII(-1, 0)); + vector vq; vector vt; - if (!getConfParam("thrQSizes", &vq) || !getConfParam("thrTCounts", &vt)) { + if (!getConfParam("thrQSizes", &vq)) { + LOGINFO(("RclConfig::initThrConf: no thread info (queues)\n")); + goto out; + } + + // If the first queue size is 0, autoconf is requested. + if (vq.size() > 0 && vq[0] == 0) { + LOGDEB(("RclConfig::initThrConf: autoconf requested\n")); + CpuConf cpus; + if (!getCpuConf(cpus) || cpus.ncpus < 1) { + LOGERR(("RclConfig::initThrConf: could not retrieve cpu conf\n")); + cpus.ncpus = 1; + } + // Arbitrarily set threads config based on number of CPUS. This also + // depends on the IO setup actually, so we're bound to be wrong... + if (cpus.ncpus == 1) { + // Somewhat counter-intuitively (because of possible IO//) + // it seems that the best config here is no threading + } else if (cpus.ncpus < 4) { + // Untested so let's guess... + m_thrConf = create_vector + (RclPII(2, 2))(RclPII(2, 2))(RclPII(2, 1)); + } else if (cpus.ncpus < 6) { + m_thrConf = create_vector + (RclPII(2, 4))(RclPII(2, 2))(RclPII(2, 1)); + } else { + m_thrConf = create_vector + (RclPII(2, 5))(RclPII(2, 3))(RclPII(2, 1)); + } + goto out; + } else if (vq.size() > 0 && vq[0] < 0) { + // threads disabled by config + goto out; + } + + if (!getConfParam("thrTCounts", &vt) ) { + LOGINFO(("RclConfig::initThrConf: no thread info (threads)\n")); + goto out; + } + + if (vq.size() != 3 || vt.size() != 3) { + LOGINFO(("RclConfig::initThrConf: bad thread info vector sizes\n")); + goto out; + } + + // Normal case: record info from config + m_thrConf.clear(); + for (unsigned int i = 0; i < 3; i++) { + m_thrConf.push_back(RclPII(vq[i], vt[i])); + } + +out: + ostringstream sconf; + for (unsigned int i = 0; i < 3; i++) { + sconf << "(" << m_thrConf[i].first << ", " << m_thrConf[i].second << + ") "; + } + + LOGDEB(("RclConfig::initThrConf: chosen config (ql,nt): %s\n", + sconf.str().c_str())); +} + +pair RclConfig::getThrConf(ThrStage who) const +{ + if (m_thrConf.size() != 3) { + LOGERR(("RclConfig::getThrConf: bad data in rclconfig\n")); return pair(-1,-1); } - return pair(vq[who], vt[who]); + return m_thrConf[who]; } vector RclConfig::getTopdirs() const @@ -1257,6 +1334,7 @@ void RclConfig::initFrom(const RclConfig& r) m_stpsuffstate.init(this, mimemap, r.m_stpsuffstate.paramname); m_skpnstate.init(this, m_conf, r.m_skpnstate.paramname); m_rmtstate.init(this, m_conf, r.m_rmtstate.paramname); + m_thrConf = r.m_thrConf; } #else // -> Test diff --git a/src/common/rclconfig.h b/src/common/rclconfig.h index b914f77a..3b5dbd8e 100644 --- a/src/common/rclconfig.h +++ b/src/common/rclconfig.h @@ -252,6 +252,11 @@ class RclConfig { * command string returned by getMimeHandlerDef */ string findFilter(const string& cmd) const; + /** Thread config init is not done automatically because not all + programs need it and it uses debuglog so that it's better to + call it after primary init */ + void initThrConf(); + ~RclConfig() { freeAll(); } @@ -303,6 +308,7 @@ class RclConfig { // Limiting set of mime types to be processed. Normally empty. ParamStale m_rmtstate; set m_restrictMTypes; + vector > m_thrConf; /** Create initial user configuration */ bool initUserConfig(); diff --git a/src/common/rclinit.cpp b/src/common/rclinit.cpp index a17daff7..91b61d7d 100644 --- a/src/common/rclinit.cpp +++ b/src/common/rclinit.cpp @@ -125,7 +125,6 @@ RclConfig *recollinit(RclInitFlags flags, } } - // Make sure the locale charset is initialized (so that multiple // threads don't try to do it at once). config->getDefCharset(); @@ -146,6 +145,9 @@ RclConfig *recollinit(RclInitFlags flags, #ifndef IDX_THREADS ExecCmd::useVfork(true); #else + // Keep threads init behind log init, but make sure it's done before + // we do the vfork choice ! + config->initThrConf(); bool intern_noThr = config->getThrConf(RclConfig::ThrIntern).first == -1; bool split_noThr = config->getThrConf(RclConfig::ThrSplit).first == -1; bool write_noThr = config->getThrConf(RclConfig::ThrDbWrite).first == -1; diff --git a/src/lib/Makefile b/src/lib/Makefile index 2ef93806..9d030a4d 100644 --- a/src/lib/Makefile +++ b/src/lib/Makefile @@ -8,8 +8,8 @@ LIBS = librecoll.a $(LIBRECOLL) all: $(LIBS) -OBJS = rclaspell.o beaglequeuecache.o cstr.o rclconfig.o rclinit.o textsplit.o unacpp.o beaglequeue.o bglfetcher.o fetcher.o fsfetcher.o fsindexer.o indexer.o mimetype.o subtreelist.o htmlparse.o internfile.o mh_exec.o mh_execm.o mh_html.o mh_mail.o mh_mbox.o mh_text.o mimehandler.o myhtmlparse.o txtdcode.o uncomp.o docseq.o docseqdb.o docseqhist.o filtseq.o dynconf.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o daterange.o expansiondbs.o rclabstract.o rcldb.o rcldoc.o rcldups.o rclquery.o rclterms.o searchdata.o searchdataxml.o stemdb.o stoplist.o synfamily.o unac.o base64.o circache.o closefrom.o conftree.o copyfile.o debuglog.o ecrontab.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o rclionice.o readfile.o smallut.o strmatcher.o transcode.o wipedir.o x11mon.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime.o convert.o iodevice.o iofactory.o -DEPS = rclaspell.dep.stamp beaglequeuecache.dep.stamp cstr.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp beaglequeue.dep.stamp bglfetcher.dep.stamp fetcher.dep.stamp fsfetcher.dep.stamp fsindexer.dep.stamp indexer.dep.stamp mimetype.dep.stamp subtreelist.dep.stamp htmlparse.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_execm.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp mimehandler.dep.stamp myhtmlparse.dep.stamp txtdcode.dep.stamp uncomp.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp dynconf.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp daterange.dep.stamp expansiondbs.dep.stamp rclabstract.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rcldups.dep.stamp rclquery.dep.stamp rclterms.dep.stamp searchdata.dep.stamp searchdataxml.dep.stamp stemdb.dep.stamp stoplist.dep.stamp synfamily.dep.stamp unac.dep.stamp base64.dep.stamp circache.dep.stamp closefrom.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp ecrontab.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp rclionice.dep.stamp readfile.dep.stamp smallut.dep.stamp strmatcher.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp +OBJS = rclaspell.o beaglequeuecache.o cstr.o rclconfig.o rclinit.o textsplit.o unacpp.o beaglequeue.o bglfetcher.o fetcher.o fsfetcher.o fsindexer.o indexer.o mimetype.o subtreelist.o htmlparse.o internfile.o mh_exec.o mh_execm.o mh_html.o mh_mail.o mh_mbox.o mh_text.o mimehandler.o myhtmlparse.o txtdcode.o uncomp.o docseq.o docseqdb.o docseqhist.o filtseq.o dynconf.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o daterange.o expansiondbs.o rclabstract.o rcldb.o rcldoc.o rcldups.o rclquery.o rclterms.o searchdata.o searchdataxml.o stemdb.o stoplist.o synfamily.o unac.o base64.o circache.o closefrom.o conftree.o copyfile.o cpuconf.o debuglog.o ecrontab.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o rclionice.o readfile.o smallut.o strmatcher.o transcode.o wipedir.o x11mon.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime.o convert.o iodevice.o iofactory.o +DEPS = rclaspell.dep.stamp beaglequeuecache.dep.stamp cstr.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp beaglequeue.dep.stamp bglfetcher.dep.stamp fetcher.dep.stamp fsfetcher.dep.stamp fsindexer.dep.stamp indexer.dep.stamp mimetype.dep.stamp subtreelist.dep.stamp htmlparse.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_execm.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp mimehandler.dep.stamp myhtmlparse.dep.stamp txtdcode.dep.stamp uncomp.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp dynconf.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp daterange.dep.stamp expansiondbs.dep.stamp rclabstract.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rcldups.dep.stamp rclquery.dep.stamp rclterms.dep.stamp searchdata.dep.stamp searchdataxml.dep.stamp stemdb.dep.stamp stoplist.dep.stamp synfamily.dep.stamp unac.dep.stamp base64.dep.stamp circache.dep.stamp closefrom.dep.stamp conftree.dep.stamp copyfile.dep.stamp cpuconf.dep.stamp debuglog.dep.stamp ecrontab.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp rclionice.dep.stamp readfile.dep.stamp smallut.dep.stamp strmatcher.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp librecoll.a : $(DEPS) $(OBJS) ar ru librecoll.a $(OBJS) @@ -137,6 +137,8 @@ conftree.o : ../utils/conftree.cpp $(depth)/mk/localdefs $(CXX) $(ALL_CXXFLAGS) -c ../utils/conftree.cpp copyfile.o : ../utils/copyfile.cpp $(depth)/mk/localdefs $(CXX) $(ALL_CXXFLAGS) -c ../utils/copyfile.cpp +cpuconf.o : ../utils/cpuconf.cpp $(depth)/mk/localdefs + $(CXX) $(ALL_CXXFLAGS) -c ../utils/cpuconf.cpp debuglog.o : ../utils/debuglog.cpp $(depth)/mk/localdefs $(CXX) $(ALL_CXXFLAGS) -c ../utils/debuglog.cpp ecrontab.o : ../utils/ecrontab.cpp $(depth)/mk/localdefs @@ -364,6 +366,9 @@ conftree.dep.stamp : ../utils/conftree.cpp $(depth)/mk/localdefs copyfile.dep.stamp : ../utils/copyfile.cpp $(depth)/mk/localdefs $(CXX) -M $(ALL_CXXFLAGS) ../utils/copyfile.cpp > copyfile.dep touch copyfile.dep.stamp +cpuconf.dep.stamp : ../utils/cpuconf.cpp $(depth)/mk/localdefs + $(CXX) -M $(ALL_CXXFLAGS) ../utils/cpuconf.cpp > cpuconf.dep + touch cpuconf.dep.stamp debuglog.dep.stamp : ../utils/debuglog.cpp $(depth)/mk/localdefs $(CXX) -M $(ALL_CXXFLAGS) ../utils/debuglog.cpp > debuglog.dep touch debuglog.dep.stamp @@ -475,6 +480,7 @@ include circache.dep include closefrom.dep include conftree.dep include copyfile.dep +include cpuconf.dep include debuglog.dep include ecrontab.dep include execmd.dep diff --git a/src/lib/mkMake b/src/lib/mkMake index 6b14f836..93806b52 100755 --- a/src/lib/mkMake +++ b/src/lib/mkMake @@ -62,6 +62,7 @@ ${depth}/utils/circache.cpp \ ${depth}/utils/closefrom.cpp \ ${depth}/utils/conftree.cpp \ ${depth}/utils/copyfile.cpp \ +${depth}/utils/cpuconf.cpp \ ${depth}/utils/debuglog.cpp \ ${depth}/utils/ecrontab.cpp \ ${depth}/utils/execmd.cpp \ diff --git a/src/sampleconf/recoll.conf.in b/src/sampleconf/recoll.conf.in index 667bfd8f..b515e1ad 100644 --- a/src/sampleconf/recoll.conf.in +++ b/src/sampleconf/recoll.conf.in @@ -141,8 +141,15 @@ dbdir = xapiandb # value to -1 disables a queue (replaced by a direct call). The thrTcounts # values define the number of threads to start for each queue. The last # value can only be one (as Xapian is single-threaded). -thrQSizes = 2 2 2 -thrTCounts = 4 2 1 +# If the first element in thrQSizes is 0, recollindex will attempt to set +# roughly guestimated values based on the number of CPUs. +# +# The following are the best setup on my core i5 system (4 cores, no +# hyperthreading, multiple disks). +#thrQSizes = 2 2 2 +#thrTCounts = 4 2 1 +# The default is to let recoll guess. +thrQSizes = 0 # Maximum file system occupation before we stop indexing. The default value # is 0, meaning no checking. The value is a percentage, corresponding to diff --git a/src/utils/Makefile b/src/utils/Makefile index 0014d6b9..ffb93812 100644 --- a/src/utils/Makefile +++ b/src/utils/Makefile @@ -37,6 +37,12 @@ trreadfile.o : readfile.cpp readfile.h $(CXX) -o trreadfile.o -c $(ALL_CXXFLAGS) \ -DTEST_READFILE readfile.cpp +CPUCONF_OBJS= trcpuconf.o +trcpuconf : $(CPUCONF_OBJS) + $(CXX) -o trcpuconf $(CPUCONF_OBJS) $(LIBRECOLL) $(LIBICONV) $(LIBSYS) +trcpuconf.o : cpuconf.cpp cpuconf.h + $(CXX) -o trcpuconf.o -c $(ALL_CXXFLAGS) -DTEST_CPUCONF cpuconf.cpp + CIRCACHE_OBJS= trcircache.o trcircache : $(CIRCACHE_OBJS) $(CXX) -o trcircache $(CIRCACHE_OBJS) \ diff --git a/src/utils/cpuconf.cpp b/src/utils/cpuconf.cpp new file mode 100644 index 00000000..8ee08846 --- /dev/null +++ b/src/utils/cpuconf.cpp @@ -0,0 +1,86 @@ +/* Copyright (C) 2013 J.F.Dockes + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef TEST_CPUCONF + +#include "autoconfig.h" +#include "cpuconf.h" +#include "execmd.h" +#include "smallut.h" + +#if defined(__gnu_linux__) +bool getCpuConf(CpuConf& conf) +{ + vector cmdv = create_vector("sh")("-c") + ("egrep ^processor /proc/cpuinfo | wc -l"); + + string result; + if (!ExecCmd::backtick(cmdv, result)) + return false; + conf.ncpus = atoi(result.c_str()); + if (conf.ncpus < 1 || conf.ncpus > 100) + conf.ncpus = 1; + return true; +} + +#elif defined(__FreeBSD__) +bool getCpuConf(CpuConf& conf) +{ + vector cmdv = create_vector("sysctl")("hw.ncpu"); + + string result; + if (!ExecCmd::backtick(cmdv, result)) + return false; + conf.ncpus = atoi(result.c_str()); + if (conf.ncpus < 1 || conf.ncpus > 100) + conf.ncpus = 1; + return true; +} +//#elif defined(__APPLE__) + +#else // Any other system + +// Generic, pretend there is one +bool getCpuConf(CpuConf& cpus) +{ + cpu.cpus = 1; + return true; +} +#endif + + +#else // TEST_CPUCONF + +#include + +#include +using namespace std; + +#include "cpuconf.h" + +// Test driver +int main(int argc, const char **argv) +{ + CpuConf cpus; + if (!getCpuConf(cpus)) { + cerr << "getCpuConf failed" << endl; + exit(1); + } + cout << "Cpus: " << cpus.ncpus << endl; + exit(0); +} +#endif // TEST_CPUCONF diff --git a/src/utils/cpuconf.h b/src/utils/cpuconf.h new file mode 100644 index 00000000..76bbb02e --- /dev/null +++ b/src/utils/cpuconf.h @@ -0,0 +1,34 @@ +/* Copyright (C) 2013 J.F.Dockes + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#ifndef _CPUCONF_H_INCLUDED_ +#define _CPUCONF_H_INCLUDED_ + +/** Guess how many CPUs there are on this machine, to help with configuring + threads */ +struct CpuConf { + CpuConf() + : ncpus(1) + {} + // Virtual ones, including hyperthreading, we only care about this for now + int ncpus; +// int ncores; +// int nsockets; +}; + +extern bool getCpuConf(CpuConf& conf); + +#endif /* _CPUCONF_H_INCLUDED_ */ diff --git a/src/utils/execmd.cpp b/src/utils/execmd.cpp index 968f9355..4071b65d 100644 --- a/src/utils/execmd.cpp +++ b/src/utils/execmd.cpp @@ -536,7 +536,7 @@ int ExecCmd::send(const string& data) break; int n = con->send(data.c_str() + nwritten, data.length() - nwritten); if (n < 0) { - LOGERR(("ExecCmd::doexec: send failed\n")); + LOGERR(("ExecCmd::send: send failed\n")); return -1; } nwritten += n; diff --git a/src/utils/smallut.h b/src/utils/smallut.h index ae8791df..11a57b27 100644 --- a/src/utils/smallut.h +++ b/src/utils/smallut.h @@ -201,7 +201,7 @@ inline void leftzeropad(string& s, unsigned len) // Code for static initialization of an stl map. Somewhat like Boost.assign. // Ref: http://stackoverflow.com/questions/138600/initializing-a-static-stdmapint-int-in-c -// Example use: map m = create_map (1,2) (3,4) (5,6) (7,8); +// Example use: map m = create_map (1,2) (3,4) (5,6) (7,8); template class create_map