From d551483a957d982d7f913060615dfe6fc24f2daa Mon Sep 17 00:00:00 2001 From: dockes Date: Wed, 11 Oct 2006 16:09:45 +0000 Subject: [PATCH] improve execcmd to avoid allocating an allterms buffer when creating dico --- src/aspell/rclaspell.cpp | 54 ++++++++++++----- src/utils/execmd.cpp | 124 ++++++++++++++++++++++++++++++++------- src/utils/execmd.h | 31 ++++++++-- 3 files changed, 168 insertions(+), 41 deletions(-) diff --git a/src/aspell/rclaspell.cpp b/src/aspell/rclaspell.cpp index 98d62a6f..dcd15878 100644 --- a/src/aspell/rclaspell.cpp +++ b/src/aspell/rclaspell.cpp @@ -1,6 +1,6 @@ #ifndef TEST_RCLASPELL #ifndef lint -static char rcsid[] = "@(#$Id: rclaspell.cpp,v 1.3 2006-10-11 14:16:25 dockes Exp $ (C) 2006 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rclaspell.cpp,v 1.4 2006-10-11 16:09:45 dockes Exp $ (C) 2006 J.F.Dockes"; #endif #ifdef HAVE_CONFIG_H #include "autoconfig.h" @@ -174,6 +174,36 @@ string Aspell::dicPath() string("aspdict.") + m_lang + string(".rws")); } + +class AspExecPv : public ExecCmdProvide { +public: + string *m_input; // pointer to string used as input buffer to command + Rcl::TermIter *m_tit; + Rcl::Db &m_db; + AspExecPv(string *i, Rcl::TermIter *tit, Rcl::Db &db) + : m_input(i), m_tit(tit), m_db(db) + {} + void newData() { + while (m_db.termWalkNext(m_tit, *m_input)) { + // Filter out terms beginning with upper case (special stuff) and + // containing numbers + if (m_input->empty()) + continue; + if ('A' <= m_input->at(0) && m_input->at(0) <= 'Z') + continue; + if (m_input->find_first_of("0123456789+-._@") != string::npos) + continue; + // Got a non-empty sort-of appropriate term, let's send it to + // aspell + m_input->append("\n"); + return; + } + // End of data. Tell so. Exec will close cmd. + m_input->erase(); + } +}; + + bool Aspell::buildDict(Rcl::Db &db, string &reason) { if (!ok()) @@ -194,24 +224,16 @@ bool Aspell::buildDict(Rcl::Db &db, string &reason) reason = "termWalkOpen failed\n"; return false; } - string allterms, term; - while (db.termWalkNext(tit, term)) { - // Filter out terms beginning with upper case (special stuff) and - // containing numbers - if (term.empty()) - continue; - if ('A' <= term.at(0) && term.at(0) <= 'Z') - continue; - if (term.find_first_of("0123456789+-._@") != string::npos) - continue; - allterms += term + "\n"; - // std::cout << "[" << term << "]" << std::endl; - } - db.termWalkClose(tit); - if (aspell.doexec(m_data->m_exec, args, &allterms)) { + + string termbuf; + AspExecPv pv(&termbuf, tit, db); + aspell.setProvide(&pv); + + if (aspell.doexec(m_data->m_exec, args, &termbuf)) { reason = string("aspell dictionary creation command failed. Check the language data files for lang = ") + m_lang; return false; } + db.termWalkClose(tit); return true; } diff --git a/src/utils/execmd.cpp b/src/utils/execmd.cpp index bb57e53d..59a78fde 100644 --- a/src/utils/execmd.cpp +++ b/src/utils/execmd.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: execmd.cpp,v 1.18 2006-10-09 16:37:08 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: execmd.cpp,v 1.19 2006-10-11 16:09:45 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -93,7 +93,7 @@ public: } }; - + int ExecCmd::doexec(const string &cmd, const list& args, const string *inputstring, string *output) { @@ -181,9 +181,21 @@ int ExecCmd::doexec(const string &cmd, const list& args, } nwritten += n; if (nwritten == inputlen) { - // cerr << "Closing output" << endl; - close(e.pipein[1]); - e.pipein[1] = -1; + if (m_provide) { + m_provide->newData(); + if (inputstring->empty()) { + close(e.pipein[1]); + e.pipein[1] = -1; + } else { + input = inputstring->data(); + inputlen = inputstring->length(); + nwritten = 0; + } + } else { + // cerr << "Closing output" << endl; + close(e.pipein[1]); + e.pipein[1] = -1; + } } } if (e.pipeout[0] > 0 && FD_ISSET(e.pipeout[0], &readfds)) { @@ -300,6 +312,9 @@ int ExecCmd::doexec(const string &cmd, const list& args, return -1; } + + +//////////////////////////////////////////////////////////////////// #else // TEST #include #include @@ -324,34 +339,101 @@ public: } }; -int main(int argc, const char **argv) +class MEPv : public ExecCmdProvide { +public: + FILE *m_fp; + string *m_input; + MEPv(string *i) + : m_input(i) + { + m_fp = fopen("/etc/group", "r"); + } + ~MEPv() { + if (m_fp) + fclose(m_fp); + } + void newData() { + char line[1024]; + if (m_fp && fgets(line, 1024, m_fp)) { + m_input->assign((const char *)line); + } else { + m_input->erase(); + } + } +}; + + +static char *thisprog; +static char usage [] = +"execmd cmd [arg1 arg2 ...]\n" +" \n\n" +; +static void Usage(void) { + fprintf(stderr, "%s: usage:\n%s", thisprog, usage); + exit(1); +} + +static int op_flags; +#define OPT_MOINS 0x1 +#define OPT_s 0x2 +#define OPT_b 0x4 + +int main(int argc, char **argv) +{ + int count = 10; + + thisprog = argv[0]; + argc--; argv++; + + while (argc > 0 && **argv == '-') { + (*argv)++; + if (!(**argv)) + /* Cas du "adb - core" */ + Usage(); + while (**argv) + switch (*(*argv)++) { + case 's': op_flags |= OPT_s; break; + case 'b': op_flags |= OPT_b; if (argc < 2) Usage(); + if ((sscanf(*(++argv), "%d", &count)) != 1) + Usage(); + argc--; + goto b1; + default: Usage(); break; + } + b1: argc--; argv++; + } + + if (argc < 1) + Usage(); + + string cmd = *argv++; argc--; + list l; + while (argc > 0) { + l.push_back(*argv++); argc--; + } + DebugLog::getdbl()->setloglevel(DEBDEB1); DebugLog::setfilename("stderr"); - if (argc < 2) { - cerr << "Usage: execmd cmd arg1 arg2 ..." << endl; - exit(1); - } - const string cmd = argv[1]; - list l; - for (int i = 2; i < argc; i++) { - l.push_back(argv[i]); - } + ExecCmd mexec; MEAdv adv; adv.cmd = &mexec; mexec.setAdvise(&adv); mexec.setTimeout(500); mexec.setStderr("/tmp/trexecStderr"); - - string input, output; - input = data; - string *ip = 0; - ip = &input; mexec.putenv("TESTVARIABLE1=TESTVALUE1"); mexec.putenv("TESTVARIABLE2=TESTVALUE2"); mexec.putenv("TESTVARIABLE3=TESTVALUE3"); + string input, output; + // input = data; + string *ip = 0; + ip = &input; + + MEPv pv(&input); + mexec.setProvide(&pv); + int status = -1; try { status = mexec.doexec(cmd, l, ip, &output); @@ -360,7 +442,7 @@ int main(int argc, const char **argv) } fprintf(stderr, "Status: 0x%x\n", status); - cout << "Output:" << output << endl; + cout << "Output:[" << output << "]" << endl; exit (status >> 8); } #endif // TEST diff --git a/src/utils/execmd.h b/src/utils/execmd.h index 383134f2..005362ee 100644 --- a/src/utils/execmd.h +++ b/src/utils/execmd.h @@ -16,7 +16,7 @@ */ #ifndef _EXECMD_H_INCLUDED_ #define _EXECMD_H_INCLUDED_ -/* @(#$Id: execmd.h,v 1.9 2006-10-09 16:37:08 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: execmd.h,v 1.10 2006-10-11 16:09:45 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include @@ -32,9 +32,28 @@ class ExecCmdAdvise { virtual void newData(int cnt) = 0; }; +/** Callback function object to get more input data. Data has to be provided + * in the initial input string, set it to empty to signify eof + */ +class ExecCmdProvide { + public: + virtual ~ExecCmdProvide() {} + virtual void newData() = 0; +}; + /** * Execute command possibly taking both input and output (will do * asynchronous io as appropriate for things to work). + * + * Input to the command can be provided either once in a parameter to doexec + * or provided in chunks by setting a callback which will be called to + * request new data. In this case, the 'input' parameter to doexec may be + * empty (but not null) + * + * Output from the command is normally returned in a single string, but a + * callback can be set to be called whenever new data arrives, in which case + * it is permissible to consume the data and erase the string. + * */ class ExecCmd { public: @@ -67,10 +86,11 @@ class ExecCmd { void putenv(const string &envassign); /** - * Set function object to call whenever new data is available or on - * select timeout. + * Set function objects to call whenever new data is available or on + * select timeout / whenever new data is needed to send. */ void setAdvise(ExecCmdAdvise *adv) {m_advise = adv;} + void setProvide(ExecCmdProvide *p) {m_provide = p;} /** * Set select timeout in milliseconds. The default is 1 S. @@ -93,11 +113,14 @@ class ExecCmd { */ void setCancel() {m_cancelRequest = true;} - ExecCmd() : m_advise(0), m_cancelRequest(false), m_timeoutMs(1000) {} + ExecCmd() + : m_advise(0), m_provide(0), m_cancelRequest(false), m_timeoutMs(1000) + {} private: list m_env; ExecCmdAdvise *m_advise; + ExecCmdProvide *m_provide; bool m_cancelRequest; int m_timeoutMs; string m_stderrFile;