improve execcmd to avoid allocating an allterms buffer when creating dico

This commit is contained in:
dockes 2006-10-11 16:09:45 +00:00
parent 99372e587b
commit d551483a95
3 changed files with 168 additions and 41 deletions

View File

@ -1,6 +1,6 @@
#ifndef TEST_RCLASPELL
#ifndef lint
static char rcsid[] = "@(#$Id: rclaspell.cpp,v 1.3 2006-10-11 14:16:25 dockes Exp $ (C) 2006 J.F.Dockes";
static char rcsid[] = "@(#$Id: rclaspell.cpp,v 1.4 2006-10-11 16:09:45 dockes Exp $ (C) 2006 J.F.Dockes";
#endif
#ifdef HAVE_CONFIG_H
#include "autoconfig.h"
@ -174,6 +174,36 @@ string Aspell::dicPath()
string("aspdict.") + m_lang + string(".rws"));
}
class AspExecPv : public ExecCmdProvide {
public:
string *m_input; // pointer to string used as input buffer to command
Rcl::TermIter *m_tit;
Rcl::Db &m_db;
AspExecPv(string *i, Rcl::TermIter *tit, Rcl::Db &db)
: m_input(i), m_tit(tit), m_db(db)
{}
void newData() {
while (m_db.termWalkNext(m_tit, *m_input)) {
// Filter out terms beginning with upper case (special stuff) and
// containing numbers
if (m_input->empty())
continue;
if ('A' <= m_input->at(0) && m_input->at(0) <= 'Z')
continue;
if (m_input->find_first_of("0123456789+-._@") != string::npos)
continue;
// Got a non-empty sort-of appropriate term, let's send it to
// aspell
m_input->append("\n");
return;
}
// End of data. Tell so. Exec will close cmd.
m_input->erase();
}
};
bool Aspell::buildDict(Rcl::Db &db, string &reason)
{
if (!ok())
@ -194,24 +224,16 @@ bool Aspell::buildDict(Rcl::Db &db, string &reason)
reason = "termWalkOpen failed\n";
return false;
}
string allterms, term;
while (db.termWalkNext(tit, term)) {
// Filter out terms beginning with upper case (special stuff) and
// containing numbers
if (term.empty())
continue;
if ('A' <= term.at(0) && term.at(0) <= 'Z')
continue;
if (term.find_first_of("0123456789+-._@") != string::npos)
continue;
allterms += term + "\n";
// std::cout << "[" << term << "]" << std::endl;
}
db.termWalkClose(tit);
if (aspell.doexec(m_data->m_exec, args, &allterms)) {
string termbuf;
AspExecPv pv(&termbuf, tit, db);
aspell.setProvide(&pv);
if (aspell.doexec(m_data->m_exec, args, &termbuf)) {
reason = string("aspell dictionary creation command failed. Check the language data files for lang = ") + m_lang;
return false;
}
db.termWalkClose(tit);
return true;
}

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: execmd.cpp,v 1.18 2006-10-09 16:37:08 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: execmd.cpp,v 1.19 2006-10-11 16:09:45 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -93,7 +93,7 @@ public:
}
};
int ExecCmd::doexec(const string &cmd, const list<string>& args,
const string *inputstring, string *output)
{
@ -181,9 +181,21 @@ int ExecCmd::doexec(const string &cmd, const list<string>& args,
}
nwritten += n;
if (nwritten == inputlen) {
// cerr << "Closing output" << endl;
close(e.pipein[1]);
e.pipein[1] = -1;
if (m_provide) {
m_provide->newData();
if (inputstring->empty()) {
close(e.pipein[1]);
e.pipein[1] = -1;
} else {
input = inputstring->data();
inputlen = inputstring->length();
nwritten = 0;
}
} else {
// cerr << "Closing output" << endl;
close(e.pipein[1]);
e.pipein[1] = -1;
}
}
}
if (e.pipeout[0] > 0 && FD_ISSET(e.pipeout[0], &readfds)) {
@ -300,6 +312,9 @@ int ExecCmd::doexec(const string &cmd, const list<string>& args,
return -1;
}
////////////////////////////////////////////////////////////////////
#else // TEST
#include <stdio.h>
#include <string>
@ -324,34 +339,101 @@ public:
}
};
int main(int argc, const char **argv)
class MEPv : public ExecCmdProvide {
public:
FILE *m_fp;
string *m_input;
MEPv(string *i)
: m_input(i)
{
m_fp = fopen("/etc/group", "r");
}
~MEPv() {
if (m_fp)
fclose(m_fp);
}
void newData() {
char line[1024];
if (m_fp && fgets(line, 1024, m_fp)) {
m_input->assign((const char *)line);
} else {
m_input->erase();
}
}
};
static char *thisprog;
static char usage [] =
"execmd cmd [arg1 arg2 ...]\n"
" \n\n"
;
static void Usage(void)
{
fprintf(stderr, "%s: usage:\n%s", thisprog, usage);
exit(1);
}
static int op_flags;
#define OPT_MOINS 0x1
#define OPT_s 0x2
#define OPT_b 0x4
int main(int argc, char **argv)
{
int count = 10;
thisprog = argv[0];
argc--; argv++;
while (argc > 0 && **argv == '-') {
(*argv)++;
if (!(**argv))
/* Cas du "adb - core" */
Usage();
while (**argv)
switch (*(*argv)++) {
case 's': op_flags |= OPT_s; break;
case 'b': op_flags |= OPT_b; if (argc < 2) Usage();
if ((sscanf(*(++argv), "%d", &count)) != 1)
Usage();
argc--;
goto b1;
default: Usage(); break;
}
b1: argc--; argv++;
}
if (argc < 1)
Usage();
string cmd = *argv++; argc--;
list<string> l;
while (argc > 0) {
l.push_back(*argv++); argc--;
}
DebugLog::getdbl()->setloglevel(DEBDEB1);
DebugLog::setfilename("stderr");
if (argc < 2) {
cerr << "Usage: execmd cmd arg1 arg2 ..." << endl;
exit(1);
}
const string cmd = argv[1];
list<string> l;
for (int i = 2; i < argc; i++) {
l.push_back(argv[i]);
}
ExecCmd mexec;
MEAdv adv;
adv.cmd = &mexec;
mexec.setAdvise(&adv);
mexec.setTimeout(500);
mexec.setStderr("/tmp/trexecStderr");
string input, output;
input = data;
string *ip = 0;
ip = &input;
mexec.putenv("TESTVARIABLE1=TESTVALUE1");
mexec.putenv("TESTVARIABLE2=TESTVALUE2");
mexec.putenv("TESTVARIABLE3=TESTVALUE3");
string input, output;
// input = data;
string *ip = 0;
ip = &input;
MEPv pv(&input);
mexec.setProvide(&pv);
int status = -1;
try {
status = mexec.doexec(cmd, l, ip, &output);
@ -360,7 +442,7 @@ int main(int argc, const char **argv)
}
fprintf(stderr, "Status: 0x%x\n", status);
cout << "Output:" << output << endl;
cout << "Output:[" << output << "]" << endl;
exit (status >> 8);
}
#endif // TEST

View File

@ -16,7 +16,7 @@
*/
#ifndef _EXECMD_H_INCLUDED_
#define _EXECMD_H_INCLUDED_
/* @(#$Id: execmd.h,v 1.9 2006-10-09 16:37:08 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: execmd.h,v 1.10 2006-10-11 16:09:45 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include <list>
@ -32,9 +32,28 @@ class ExecCmdAdvise {
virtual void newData(int cnt) = 0;
};
/** Callback function object to get more input data. Data has to be provided
* in the initial input string, set it to empty to signify eof
*/
class ExecCmdProvide {
public:
virtual ~ExecCmdProvide() {}
virtual void newData() = 0;
};
/**
* Execute command possibly taking both input and output (will do
* asynchronous io as appropriate for things to work).
*
* Input to the command can be provided either once in a parameter to doexec
* or provided in chunks by setting a callback which will be called to
* request new data. In this case, the 'input' parameter to doexec may be
* empty (but not null)
*
* Output from the command is normally returned in a single string, but a
* callback can be set to be called whenever new data arrives, in which case
* it is permissible to consume the data and erase the string.
*
*/
class ExecCmd {
public:
@ -67,10 +86,11 @@ class ExecCmd {
void putenv(const string &envassign);
/**
* Set function object to call whenever new data is available or on
* select timeout.
* Set function objects to call whenever new data is available or on
* select timeout / whenever new data is needed to send.
*/
void setAdvise(ExecCmdAdvise *adv) {m_advise = adv;}
void setProvide(ExecCmdProvide *p) {m_provide = p;}
/**
* Set select timeout in milliseconds. The default is 1 S.
@ -93,11 +113,14 @@ class ExecCmd {
*/
void setCancel() {m_cancelRequest = true;}
ExecCmd() : m_advise(0), m_cancelRequest(false), m_timeoutMs(1000) {}
ExecCmd()
: m_advise(0), m_provide(0), m_cancelRequest(false), m_timeoutMs(1000)
{}
private:
list<string> m_env;
ExecCmdAdvise *m_advise;
ExecCmdProvide *m_provide;
bool m_cancelRequest;
int m_timeoutMs;
string m_stderrFile;