From 62a8bff5552b408abdd519a69057d9bcfab37490 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Sun, 18 Nov 2012 08:33:33 +0100 Subject: [PATCH] use vfork when possible + small cleanups in mt init --- src/common/rclinit.cpp | 9 +- src/utils/execmd.cpp | 249 +++++++++++++++++++++++------------------ src/utils/execmd.h | 14 ++- src/utils/pathut.cpp | 18 +-- src/utils/smallut.cpp | 8 ++ src/utils/smallut.h | 2 + 6 files changed, 175 insertions(+), 125 deletions(-) diff --git a/src/common/rclinit.cpp b/src/common/rclinit.cpp index 313f4ef6..333c4424 100644 --- a/src/common/rclinit.cpp +++ b/src/common/rclinit.cpp @@ -31,6 +31,7 @@ #include "pathut.h" #include "unac.h" #include "smallut.h" +#include "execmd.h" static const int catchedSigs[] = {SIGHUP, SIGINT, SIGQUIT, SIGTERM, SIGUSR1, SIGUSR2}; @@ -112,16 +113,18 @@ RclConfig *recollinit(RclInitFlags flags, // Init unac locking unac_init_mt(); - // Init pathut static values + // Init smallut and pathut static values pathut_init_mt(); + smallut_init_mt(); // Init Unac translation exceptions string unacex; if (config->getConfParam("unac_except_trans", unacex) && !unacex.empty()) unac_set_except_translations(unacex.c_str()); - // Init langtocode() static table - langtocode(""); +#ifndef IDX_THREADS + ExecCmd::useVfork(true); +#endif int flushmb; if (config->getConfParam("idxflushmb", &flushmb) && flushmb > 0) { diff --git a/src/utils/execmd.cpp b/src/utils/execmd.cpp index 5c13b6f1..a3d0d0bd 100644 --- a/src/utils/execmd.cpp +++ b/src/utils/execmd.cpp @@ -28,10 +28,6 @@ #include #include -#if !defined(PUTENV_ARG_CONST) -#include -#endif - #include #include #include @@ -44,8 +40,10 @@ using namespace std; #include "smallut.h" #include "netcon.h" #include "closefrom.h" -#include "ptmutex.h" +extern char **environ; + +bool ExecCmd::o_useVfork = false; /* From FreeBSD's which command */ static bool exec_is_there(const char *candidate) @@ -167,6 +165,80 @@ ExecCmd::~ExecCmd() ExecCmdRsrc(this); } +// In child process. Set up pipes and exec command. +// This must not return. _exit() on error. +// *** This can be called after a vfork, so no modification of the +// process memory at all is allowed *** +// The LOGXX calls should not be there, but they occur only after "impossible" +// errors, which we would most definitely want to have a hint about +inline void ExecCmd::dochild(const string &cmd, const char **argv, + const char **envv, + bool has_input, bool has_output) +{ + // Start our own process group + if (setpgid(0, getpid())) { + LOGINFO(("ExecCmd::dochild: setpgid(0, %d) failed: errno %d\n", + getpid(), errno)); + } + + // Restore SIGTERM to default. Really, signal handling should be + // specified when creating the execmd. Help Recoll get rid of its + // filter children though. To be fixed one day... Not sure that + // all of this is needed. But an ignored sigterm and the masks are + // normally inherited. + if (signal(SIGTERM, SIG_DFL) == SIG_ERR) { + LOGERR(("ExecCmd::dochild: signal() failed, errno %d\n", errno)); + } + sigset_t sset; + sigfillset(&sset); + pthread_sigmask(SIG_UNBLOCK, &sset, 0); + sigprocmask(SIG_UNBLOCK, &sset, 0); + + if (has_input) { + close(m_pipein[1]); + if (m_pipein[0] != 0) { + dup2(m_pipein[0], 0); + close(m_pipein[0]); + } + } + if (has_output) { + close(m_pipeout[0]); + if (m_pipeout[1] != 1) { + if (dup2(m_pipeout[1], 1) < 0) { + LOGERR(("ExecCmd::doexec: dup2(2) failed. errno %d\n", errno)); + } + if (close(m_pipeout[1]) < 0) { + LOGERR(("ExecCmd::doexec: close(2) failed. errno %d\n", errno)); + } + } + } + // Do we need to redirect stderr ? + if (!m_stderrFile.empty()) { + int fd = open(m_stderrFile.c_str(), O_WRONLY|O_CREAT +#ifdef O_APPEND + |O_APPEND +#endif + , 0600); + if (fd < 0) { + close(2); + } else { + if (fd != 2) { + dup2(fd, 2); + } + lseek(2, 0, 2); + } + } + + // Close all descriptors except 0,1,2 + libclf_closefrom(3); + + execve(cmd.c_str(), (char *const*)argv, (char *const*)envv); + // Hu ho + LOGERR(("ExecCmd::doexec: execvp(%s) failed. errno %d\n", cmd.c_str(), + errno)); + _exit(127); +} + int ExecCmd::startExec(const string &cmd, const vector& args, bool has_input, bool has_output) { @@ -192,19 +264,76 @@ int ExecCmd::startExec(const string &cmd, const vector& args, return -1; } - m_pid = fork(); + +//////////// vfork setup section + // We do here things that we could/should do after a fork(), but + // not a vfork(). Does no harm to do it here in both cases, except + // that it needs cleanup (as compared to doing it just before + // exec()). + + // Allocate arg vector (2 more for arg0 + final 0) + typedef const char *Ccharp; + Ccharp *argv; + argv = (Ccharp *)malloc((args.size()+2) * sizeof(char *)); + if (argv == 0) { + LOGERR(("ExecCmd::doexec: malloc() failed. errno %d\n", errno)); + exit(1); + } + // Fill up argv + argv[0] = cmd.c_str(); + int i = 1; + vector::const_iterator it; + for (it = args.begin(); it != args.end(); it++) { + argv[i++] = it->c_str(); + } + argv[i] = 0; + + Ccharp *envv; + int envsize; + for (envsize = 0; ; envsize++) + if (environ[envsize] == 0) + break; + envv = (Ccharp *)malloc((envsize + m_env.size() + 2) * sizeof(char *)); + int eidx; + for (eidx = 0; eidx < envsize; eidx++) + envv[eidx] = environ[eidx]; + for (vector::const_iterator it = m_env.begin(); + it != m_env.end(); it++) { + envv[eidx++] = it->c_str(); + } + envv[eidx] = 0; + + // As we are going to use execve, not execvp, do the PATH + // thing. If the command is not found, exe will be empty and the + // exec will fail, which is what we want. + string exe; + which(cmd, exe); +//////////////////////////////// + + if (o_useVfork) { + m_pid = vfork(); + } else { + m_pid = fork(); + } if (m_pid < 0) { LOGERR(("ExecCmd::startExec: fork(2) failed. errno %d\n", errno)); return -1; } if (m_pid == 0) { - e.inactivate(); // needed ? - dochild(cmd, args, has_input, has_output); + // e.inactivate() is not needed. As we do not return, the call + // stack won't be unwound and destructors of local objects + // won't be called. + dochild(exe, argv, envv, has_input, has_output); // dochild does not return. Just in case... _exit(1); } // Father process +//////////////////// + // Vfork cleanup section + free(argv); + free(envv); +/////////////////// // Set the process group for the child. This is also done in the // child process see wikipedia(Process_group) @@ -497,110 +626,6 @@ bool ExecCmd::maybereap(int *status) } } -// In child process. Set up pipes, environment, and exec command. -// This must not return. exit() on error. -void ExecCmd::dochild(const string &cmd, const vector& args, - bool has_input, bool has_output) -{ - // Start our own process group - if (setpgid(0, getpid())) { - LOGINFO(("ExecCmd::dochild: setpgid(0, %d) failed: errno %d\n", - getpid(), errno)); - } - - // Restore SIGTERM to default. Really, signal handling should be - // specified when creating the execmd. Help Recoll get rid of its - // filter children though. To be fixed one day... Not sure that - // all of this is needed. But an ignored sigterm and the masks are - // normally inherited. - if (signal(SIGTERM, SIG_DFL) == SIG_ERR) { - LOGERR(("ExecCmd::dochild: signal() failed, errno %d\n", errno)); - } - sigset_t sset; - sigfillset(&sset); - pthread_sigmask(SIG_UNBLOCK, &sset, 0); - sigprocmask(SIG_UNBLOCK, &sset, 0); - - if (has_input) { - close(m_pipein[1]); - m_pipein[1] = -1; - if (m_pipein[0] != 0) { - dup2(m_pipein[0], 0); - close(m_pipein[0]); - m_pipein[0] = -1; - } - } - if (has_output) { - close(m_pipeout[0]); - m_pipeout[0] = -1; - if (m_pipeout[1] != 1) { - if (dup2(m_pipeout[1], 1) < 0) { - LOGERR(("ExecCmd::doexec: dup2(2) failed. errno %d\n", errno)); - } - if (close(m_pipeout[1]) < 0) { - LOGERR(("ExecCmd::doexec: close(2) failed. errno %d\n", errno)); - } - m_pipeout[1] = -1; - } - } - // Do we need to redirect stderr ? - if (!m_stderrFile.empty()) { - int fd = open(m_stderrFile.c_str(), O_WRONLY|O_CREAT -#ifdef O_APPEND - |O_APPEND -#endif - , 0600); - if (fd < 0) { - close(2); - } else { - if (fd != 2) { - dup2(fd, 2); - } - lseek(2, 0, 2); - } - } - - // Close all descriptors except 0,1,2 - libclf_closefrom(3); - - // Allocate arg vector (2 more for arg0 + final 0) - typedef const char *Ccharp; - Ccharp *argv; - argv = (Ccharp *)malloc((args.size()+2) * sizeof(char *)); - if (argv == 0) { - LOGERR(("ExecCmd::doexec: malloc() failed. errno %d\n", errno)); - exit(1); - } - - // Fill up argv - argv[0] = cmd.c_str(); - int i = 1; - vector::const_iterator it; - for (it = args.begin(); it != args.end(); it++) { - argv[i++] = it->c_str(); - } - argv[i] = 0; - -#if 0 - {int i = 0;cerr << "cmd: " << cmd << endl << "ARGS: " << endl; - while (argv[i]) cerr << argv[i++] << endl;} -#endif - - for (vector::const_iterator it = m_env.begin(); - it != m_env.end(); it++) { -#ifdef PUTENV_ARG_CONST - ::putenv(it->c_str()); -#else - ::putenv(strdup(it->c_str())); -#endif - } - execvp(cmd.c_str(), (char *const*)argv); - // Hu ho - LOGERR(("ExecCmd::doexec: execvp(%s) failed. errno %d\n", cmd.c_str(), - errno)); - _exit(127); -} - ReExec::ReExec(int argc, char *args[]) { init(argc, args); diff --git a/src/utils/execmd.h b/src/utils/execmd.h index cc61b49b..3ef81416 100644 --- a/src/utils/execmd.h +++ b/src/utils/execmd.h @@ -73,6 +73,12 @@ class ExecCmdProvide { */ class ExecCmd { public: + // Use vfork instead of fork. This must not be called in a multithreaded + // program. + static void useVfork(bool on) + { + o_useVfork = on; + } /** * Add/replace environment variable before executing command. This must * be called before doexec() to have an effect (possibly multiple @@ -155,7 +161,7 @@ class ExecCmd { */ void zapChild() {setKill(); (void)wait();} - ExecCmd() + ExecCmd() : m_advise(0), m_provide(0), m_timeoutMs(1000) { reset(); @@ -174,6 +180,8 @@ class ExecCmd { friend class ExecCmdRsrc; private: + static bool o_useVfork; + vector m_env; ExecCmdAdvise *m_advise; ExecCmdProvide *m_provide; @@ -200,8 +208,8 @@ class ExecCmd { sigemptyset(&m_blkcld); } // Child process code - void dochild(const string &cmd, const vector& args, - bool has_input, bool has_output); + inline void dochild(const string &cmd, const char **argv, + const char **envv, bool has_input, bool has_output); /* Copyconst and assignment private and forbidden */ ExecCmd(const ExecCmd &) {} ExecCmd& operator=(const ExecCmd &) {return *this;}; diff --git a/src/utils/pathut.cpp b/src/utils/pathut.cpp index ec0ea87a..40bde2a3 100644 --- a/src/utils/pathut.cpp +++ b/src/utils/pathut.cpp @@ -95,14 +95,18 @@ bool fsocc(const string &path, int *pc, long *blocks) return true; } -static const char *tmplocation() +static const string& tmplocation() { - const char *tmpdir = getenv("RECOLL_TMPDIR"); - if (!tmpdir) - tmpdir = getenv("TMPDIR"); - if (!tmpdir) - tmpdir = "/tmp"; - return tmpdir; + static string stmpdir; + if (stmpdir.empty()) { + const char *tmpdir = getenv("RECOLL_TMPDIR"); + if (tmpdir == 0) + tmpdir = getenv("TMPDIR"); + if (tmpdir == 0) + tmpdir = "/tmp"; + stmpdir = string(tmpdir); + } + return stmpdir; } bool maketmpdir(string& tdir, string& reason) diff --git a/src/utils/smallut.cpp b/src/utils/smallut.cpp index cc469e25..c416d017 100644 --- a/src/utils/smallut.cpp +++ b/src/utils/smallut.cpp @@ -1157,6 +1157,14 @@ string localelang() return locale.substr(0, under); } +// Initialization for static stuff to be called from main thread before going +// multiple +void smallut_init_mt() +{ + // Init langtocode() static table + langtocode(""); +} + #else // TEST_SMALLUT #include diff --git a/src/utils/smallut.h b/src/utils/smallut.h index ff6c0a5b..e783b599 100644 --- a/src/utils/smallut.h +++ b/src/utils/smallut.h @@ -236,4 +236,6 @@ public: #define MAX(A,B) (((A)>(B)) ? (A) : (B)) #endif +void smallut_init_mt(); + #endif /* _SMALLUT_H_INCLUDED_ */