Merge branch 'kopostag'

This commit is contained in:
Jean-Francois Dockes 2020-03-26 14:03:17 +01:00
commit 9b3a5fac12
18 changed files with 5527 additions and 1275 deletions

View File

@ -82,6 +82,7 @@ common/rclinit.h \
common/syngroups.cpp \ common/syngroups.cpp \
common/syngroups.h \ common/syngroups.h \
common/textsplit.cpp \ common/textsplit.cpp \
common/textsplitko.cpp \
common/textsplit.h \ common/textsplit.h \
common/unacpp.cpp \ common/unacpp.cpp \
common/unacpp.h \ common/unacpp.h \
@ -210,6 +211,8 @@ utils/circache.cpp \
utils/circache.h \ utils/circache.h \
utils/closefrom.cpp \ utils/closefrom.cpp \
utils/closefrom.h \ utils/closefrom.h \
utils/cmdtalk.cpp \
utils/cmdtalk.h \
utils/conftree.cpp \ utils/conftree.cpp \
utils/conftree.h \ utils/conftree.h \
utils/copyfile.cpp \ utils/copyfile.cpp \
@ -645,8 +648,10 @@ filterdir = $(pkgdatadir)/filters
dist_filter_DATA = \ dist_filter_DATA = \
desktop/hotrecoll.py \ desktop/hotrecoll.py \
filters/abiword.xsl \ filters/abiword.xsl \
filters/cmdtalk.py \
filters/fb2.xsl \ filters/fb2.xsl \
filters/gnumeric.xsl \ filters/gnumeric.xsl \
filters/kosplitter.py \
filters/msodump.zip \ filters/msodump.zip \
filters/okular-note.xsl \ filters/okular-note.xsl \
filters/opendoc-body.xsl \ filters/opendoc-body.xsl \
@ -724,7 +729,7 @@ python/recoll/recoll/rclconfig.py
install-data-hook: install-data-hook:
(cd $(DESTDIR)/$(filterdir); \ (cd $(DESTDIR)/$(filterdir); \
chmod a+x rcl* ppt-dump.py xls-dump.py xlsxmltocsv.py hotrecoll.py; \ chmod a+x rcl* ppt-dump.py xls-dump.py xlsxmltocsv.py hotrecoll.py; \
chmod a+x recoll-we-move-files.py ../examples/rclmon.sh; \ chmod a+x recoll-we-move-files.py ../examples/rclmon.sh kosplitter.py; \
chmod 0644 msodump.zip recollepub.zip rclexecm.py rcllatinstops.zip \ chmod 0644 msodump.zip recollepub.zip rclexecm.py rcllatinstops.zip \
rclconfig.py conftree.py rclmidi.py rclexec1.py rcluncomp.py rclxslt.py) rclconfig.py conftree.py rclmidi.py rclexec1.py rcluncomp.py rclxslt.py)

View File

@ -44,8 +44,10 @@
// ngrams // ngrams
#undef KATAKANA_AS_WORDS #undef KATAKANA_AS_WORDS
// Same for Korean syllabic, and same problem, not used. // Same for Korean syllabic, and same problem. However we have a
#undef HANGUL_AS_WORDS // runtime option to use an external text analyser for hangul, so this
// is defined at compile time.
#define HANGUL_AS_WORDS
using namespace std; using namespace std;
@ -289,6 +291,7 @@ bool TextSplit::o_noNumbers{false};
bool TextSplit::o_deHyphenate{false}; bool TextSplit::o_deHyphenate{false};
int TextSplit::o_maxWordLength{40}; int TextSplit::o_maxWordLength{40};
static const int o_CJKMaxNgramLen{5}; static const int o_CJKMaxNgramLen{5};
bool o_exthangultagger{false};
void TextSplit::staticConfInit(RclConfig *config) void TextSplit::staticConfInit(RclConfig *config)
{ {
@ -323,6 +326,13 @@ void TextSplit::staticConfInit(RclConfig *config)
charclasses[int('\\')] = SPACE; charclasses[int('\\')] = SPACE;
} }
} }
string kotagger;
config->getConfParam("hangultagger", kotagger);
if (!kotagger.empty()) {
o_exthangultagger = true;
koStaticConfInit(config, kotagger);
}
} }
// Final term checkpoint: do some checking (the kind which is simpler // Final term checkpoint: do some checking (the kind which is simpler
@ -612,7 +622,7 @@ bool TextSplit::text_to_words(const string &in)
#if defined(KATAKANA_AS_WORDS) || defined(HANGUL_AS_WORDS) #if defined(KATAKANA_AS_WORDS) || defined(HANGUL_AS_WORDS)
int prev_csc = -1; int prev_csc = -1;
#endif #endif
for (; !it.eof(); it++) { for (; !it.eof() && !it.error(); it++) {
unsigned int c = *it; unsigned int c = *it;
nonalnumcnt++; nonalnumcnt++;
@ -625,30 +635,40 @@ bool TextSplit::text_to_words(const string &in)
if (UNICODE_IS_KATAKANA(c)) { if (UNICODE_IS_KATAKANA(c)) {
csc = CSC_KATAKANA; csc = CSC_KATAKANA;
} else if (UNICODE_IS_HANGUL(c)) { } else if (UNICODE_IS_HANGUL(c)) {
csc = CSC_HANGUL; if (o_exthangultagger) {
csc = CSC_HANGUL;
} else {
csc = CSC_CJK;
}
} else if (UNICODE_IS_CJK(c)) { } else if (UNICODE_IS_CJK(c)) {
csc = CSC_CJK; csc = CSC_CJK;
} else { } else {
csc = CSC_OTHER; csc = CSC_OTHER;
} }
if (o_processCJK && csc == CSC_CJK) { if (o_processCJK && (csc == CSC_CJK || csc == CSC_HANGUL)) {
// CJK character hit. // CJK character hit. Hangul processing may be special.
// Do like at EOF with the current non-cjk data. // Do like at EOF with the current non-cjk data.
if (m_wordLen || m_span.length()) { if (m_wordLen || m_span.length()) {
if (!doemit(true, it.getBpos())) if (!doemit(true, it.getBpos()))
return false; return false;
} }
// Hand off situation to the appropriate routine.
// Hand off situation to the cjk routine. if (csc == CSC_HANGUL) {
if (!cjk_to_words(&it, &c)) { if (!ko_to_words(&it, &c)) {
LOGERR("Textsplit: scan error in cjk handler\n"); LOGERR("Textsplit: scan error in korean handler\n");
return false; return false;
}
} else {
if (!cjk_to_words(&it, &c)) {
LOGERR("Textsplit: scan error in cjk handler\n");
return false;
}
} }
// Check for eof, else c contains the first non-cjk // Check for eof, else c contains the first non-cjk
// character after the cjk sequence, just go on. // character after the cjk sequence, just go on.
if (it.eof()) if (it.eof() || it.error())
break; break;
} }
@ -976,7 +996,7 @@ bool TextSplit::cjk_to_words(Utf8Iter *itp, unsigned int *cp)
// Current number of valid offsets; // Current number of valid offsets;
unsigned int nchars = 0; unsigned int nchars = 0;
unsigned int c = 0; unsigned int c = 0;
for (; !it.eof(); it++) { for (; !it.eof() && !it.error(); it++) {
c = *it; c = *it;
if (c == ' ' || c == '\t' || c == '\n') { if (c == ' ' || c == '\t' || c == '\n') {
continue; continue;
@ -1077,7 +1097,7 @@ int TextSplit::countWords(const string& s, TextSplit::Flags flgs)
bool TextSplit::hasVisibleWhite(const string &in) bool TextSplit::hasVisibleWhite(const string &in)
{ {
Utf8Iter it(in); Utf8Iter it(in);
for (; !it.eof(); it++) { for (; !it.eof() && !it.error(); it++) {
unsigned int c = (unsigned char)*it; unsigned int c = (unsigned char)*it;
if (c == (unsigned int)-1) { if (c == (unsigned int)-1) {
LOGERR("hasVisibleWhite: error while scanning UTF-8 string\n"); LOGERR("hasVisibleWhite: error while scanning UTF-8 string\n");
@ -1097,7 +1117,7 @@ template <class T> bool u8stringToStrings(const string &s, T &tokens)
tokens.clear(); tokens.clear();
enum states {SPACE, TOKEN, INQUOTE, ESCAPE}; enum states {SPACE, TOKEN, INQUOTE, ESCAPE};
states state = SPACE; states state = SPACE;
for (; !it.eof(); it++) { for (; !it.eof() && !it.error(); it++) {
unsigned int c = *it; unsigned int c = *it;
if (visiblewhite.find(c) != visiblewhite.end()) if (visiblewhite.find(c) != visiblewhite.end())
c = ' '; c = ' ';

View File

@ -54,6 +54,7 @@ public:
/** Call at program initialization to read non default values from the /** Call at program initialization to read non default values from the
configuration */ configuration */
static void staticConfInit(RclConfig *config); static void staticConfInit(RclConfig *config);
static void koStaticConfInit(RclConfig *config, const std::string& tagger);
/** Split text, emit words and positions. */ /** Split text, emit words and positions. */
virtual bool text_to_words(const std::string &in); virtual bool text_to_words(const std::string &in);
@ -199,6 +200,9 @@ private:
// This processes cjk text: // This processes cjk text:
bool cjk_to_words(Utf8Iter *it, unsigned int *cp); bool cjk_to_words(Utf8Iter *it, unsigned int *cp);
// Experimental Korean splitter. This uses an external Python tokenizer
bool ko_to_words(Utf8Iter *it, unsigned int *cp);
bool emitterm(bool isspan, std::string &term, int pos, size_t bs,size_t be); bool emitterm(bool isspan, std::string &term, int pos, size_t bs,size_t be);
bool doemit(bool spanerase, size_t bp); bool doemit(bool spanerase, size_t bp);
void discardspan(); void discardspan();

214
src/common/textsplitko.cpp Normal file
View File

@ -0,0 +1,214 @@
/* Copyright (C) 2020 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
// Specialized Korean text splitter using konlpy running in a Python
// subprocess. konlpy can use several different backends. We support
// Okt (Twitter) and Mecab at this point. Unfortunately the different
// backends have different POS TAG names, so that things are not
// completly transparent when using another (need to translate the tag
// names in the Python program).
#include "autoconfig.h"
#include <iostream>
#include <string>
#include <cstring>
#include <unordered_set>
#include <mutex>
#include "textsplit.h"
#include "log.h"
//#define UTF8ITER_CHECK
#include "utf8iter.h"
#include "smallut.h"
#include "rclconfig.h"
#include "cmdtalk.h"
using namespace std;
// Separator char used in words and tags lists.
static const string sepchars("\t");
static CmdTalk *o_talker;
static bool o_starterror{false};
static string o_cmdpath;
std::mutex o_mutex;
static string o_taggername{"Okt"};
// The Python/Java splitter is leaking memory. We restart it from time to time
static uint64_t restartcount;
static uint64_t restartthreshold = 5 * 1000 * 1000;
void TextSplit::koStaticConfInit(RclConfig *config, const string& tagger)
{
o_cmdpath = config->findFilter("kosplitter.py");
if (tagger == "Okt" || tagger == "Mecab" || tagger == "Komoran") {
o_taggername = tagger;
} else {
LOGERR("TextSplit::koStaticConfInit: unknown tagger [" << tagger <<
"], using Okt\n");
}
}
// Start the Python subprocess
static bool initCmd()
{
if (o_starterror) {
// No use retrying
return false;
}
if (o_talker) {
if (restartcount > restartthreshold) {
delete o_talker;
o_talker = nullptr;
restartcount = 0;
} else {
return true;
}
}
if (o_cmdpath.empty()) {
return false;
}
if (nullptr == (o_talker = new CmdTalk(300))) {
o_starterror = true;
return false;
}
if (!o_talker->startCmd(o_cmdpath)) {
delete o_talker;
o_talker = nullptr;
o_starterror = true;
return false;
}
return true;
}
bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
{
std::unique_lock<std::mutex> mylock(o_mutex);
initCmd();
if (nullptr == o_talker) {
return false;
}
LOGDEB1("k_to_words: m_wordpos " << m_wordpos << "\n");
Utf8Iter &it = *itp;
unsigned int c = 0;
unordered_map<string, string> args;
args.insert(pair<string,string>{"data", string()});
string& inputdata{args.begin()->second};
// We send the tagger name every time but it's only used the first
// one: can't change it after init. We could avoid sending it
// every time, but I don't think that the performance hit is
// significant
args.insert(pair<string,string>{"tagger", o_taggername});
// Walk the Korean characters section and send the text to the
// analyser
string::size_type orgbytepos = it.getBpos();
for (; !it.eof() && !it.error(); it++) {
c = *it;
if (!isHANGUL(c) && isalpha(c)) {
// Done with Korean stretch, process and go back to main routine
//std::cerr << "Broke on char " << (std::string)it << endl;
break;
} else {
it.appendchartostring(inputdata);
}
}
LOGDEB1("TextSplit::k_to_words: sending out " << inputdata.size() <<
" bytes " << inputdata << endl);
restartcount += inputdata.size();
unordered_map<string,string> result;
if (!o_talker->talk(args, result)) {
LOGERR("Python splitter for Korean failed for [" << inputdata << "]\n");
return false;
}
auto resit = result.find("text");
if (resit == result.end()) {
LOGERR("No text in Python splitter for Korean\n");
return false;
}
string& outtext = resit->second;
vector<string> words;
stringToTokens(outtext, words, sepchars);
resit = result.find("tags");
if (resit == result.end()) {
LOGERR("No tags in Python splitter for Korean\n");
return false;
}
string& outtags = resit->second;
vector<string> tags;
stringToTokens(outtags, tags, sepchars);
// This is the position in the whole text, not the local fragment,
// which is bytepos-orgbytepos
string::size_type bytepos(orgbytepos);
for (unsigned int i = 0; i < words.size(); i++) {
// The POS tagger strips characters from the input (e.g. multiple
// spaces, sometimes new lines, possibly other stuff). This
// means that we can't easily reconstruct the byte position
// from the concatenated terms. The output seems to be always
// shorter than the input, so we try to look ahead for the
// term. Can't be too sure that this works though, depending
// on exactly what transformation may have been applied from
// the original input to the term.
string word = words[i];
trimstring(word);
string::size_type newpos = bytepos - orgbytepos;
newpos = inputdata.find(word, newpos);
if (newpos != string::npos) {
bytepos = orgbytepos + newpos;
}
LOGDEB1("WORD OPOS " << bytepos-orgbytepos <<
" FOUND POS " << newpos << endl);
if (tags[i] == "Noun" || tags[i] == "Verb" ||
tags[i] == "Adjective" || tags[i] == "Adverb") {
if (!takeword(
word, m_wordpos++, bytepos, bytepos + words[i].size())) {
return false;
}
}
LOGDEB1("WORD [" << words[i] << "] size " << words[i].size() <<
" TAG " << tags[i] << endl);
bytepos += words[i].size();
}
#if DO_CHECK_THINGS
int sizediff = inputdata.size() - (bytepos - orgbytepos);
if (sizediff < 0)
sizediff = -sizediff;
if (sizediff > 1) {
LOGERR("ORIGINAL TEXT SIZE: " << inputdata.size() <<
" FINAL BYTE POS " << bytepos - orgbytepos <<
" TEXT [" << inputdata << "]\n");
}
#endif
// Reset state, saving term position, and return the found non-cjk
// Unicode character value. The current input byte offset is kept
// in the utf8Iter
int pos = m_wordpos;
clearsplitstate();
m_spanpos = m_wordpos = pos;
*cp = c;
return true;
}

236
src/filters/cmdtalk.py Normal file
View File

@ -0,0 +1,236 @@
#################################
# Copyright (C) 2016 J.F.Dockes
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
########################################################
# Command communication module and utilities. See commands in cmdtalk.h
#
# All data is binary. This is important for Python3
# All parameter names are converted to and processed as str/unicode
from __future__ import print_function
import sys
import os
import tempfile
import shutil
import getopt
import traceback
PY3 = sys.version > '3'
if PY3:
def makebytes(data):
if isinstance(data, bytes):
return data
else:
return data.encode("UTF-8")
else:
def makebytes(data):
if isinstance(data, unicode):
return data.encode("UTF-8")
else:
return data
############################################
# CmdTalk implements the
# communication protocol with the master process. It calls an external
# method to use the args and produce return data.
class CmdTalk:
def __init__(self, outfile=sys.stdout, infile=sys.stdin, exitfunc=None):
try:
self.myname = os.path.basename(sys.argv[0])
except:
self.myname = "???"
self.outfile = outfile
self.infile = infile
self.exitfunc = exitfunc
self.fields = {}
if sys.platform == "win32":
import msvcrt
msvcrt.setmode(self.outfile.fileno(), os.O_BINARY)
msvcrt.setmode(self.infile.fileno(), os.O_BINARY)
self.debugfile = None
if self.debugfile:
self.errfout = open(self.debugfile, "a")
else:
self.errfout = sys.stderr
def log(self, s, doexit = 0, exitvalue = 1):
print("CMDTALK: %s: %s" % (self.myname, s), file=self.errfout)
if doexit:
if self.exitfunc:
self.exitfunc(exitvalue)
sys.exit(exitvalue)
def breakwrite(self, outfile, data):
if sys.platform != "win32":
outfile.write(data)
else:
# On windows, writing big chunks can fail with a "not enough space"
# error. Seems a combined windows/python bug, depending on versions.
# See https://bugs.python.org/issue11395
# In any case, just break it up
total = len(data)
bs = 4*1024
offset = 0
while total > 0:
if total < bs:
tow = total
else:
tow = bs
#self.log("Total %d Writing %d to stdout: %s" % (total,tow,data[offset:offset+tow]))
outfile.write(data[offset:offset+tow])
offset += tow
total -= tow
# Read single parameter from process input: line with param name and size
# followed by data. The param name is returned as str/unicode, the data
# as bytes
def readparam(self):
if PY3:
inf = self.infile.buffer
else:
inf = self.infile
s = inf.readline()
if s == b'':
if self.exitfunc:
self.exitfunc(0)
sys.exit(0)
s = s.rstrip(b'\n')
if s == b'':
return ('', b'')
l = s.split()
if len(l) != 2:
self.log(b'bad line: [' + s + b']', 1, 1)
paramname = l[0].decode('ASCII').rstrip(':')
paramsize = int(l[1])
if paramsize > 0:
paramdata = inf.read(paramsize)
if len(paramdata) != paramsize:
self.log("Bad read: wanted %d, got %d" %
(paramsize, len(paramdata)), 1, 1)
else:
paramdata = b''
if PY3:
paramdata = paramdata.decode('utf-8')
#self.log("paramname [%s] paramsize %d value [%s]" %
# (paramname, paramsize, paramdata))
return (paramname, paramdata)
if PY3:
def senditem(self, nm, data):
data = makebytes(data)
l = len(data)
self.outfile.buffer.write(makebytes("%s: %d\n" % (nm, l)))
self.breakwrite(self.outfile.buffer, data)
else:
def senditem(self, nm, data):
data = makebytes(data)
l = len(data)
self.outfile.write(makebytes("%s: %d\n" % (nm, l)))
self.breakwrite(self.outfile, data)
# Send answer: document, ipath, possible eof.
def answer(self, outfields):
for nm,value in outfields.items():
#self.log("Senditem: [%s] -> [%s]" % (nm, value))
self.senditem(nm, value)
# End of message
print(file=self.outfile)
self.outfile.flush()
#self.log("done writing data")
# Call processor with input params, send result
def processmessage(self, processor, params):
# In normal usage we try to recover from processor errors, but
# we sometimes want to see the real stack trace when testing
safeexec = True
if safeexec:
try:
outfields = processor.process(params)
except Exception as err:
self.log("processmessage: processor raised: [%s]" % err)
traceback.print_exc()
outfields = {}
outfields["cmdtalkstatus"] = "1"
outfields["cmdtalkerrstr"] = str(err)
else:
outfields = processor.process(params)
self.answer(outfields)
# Loop on messages from our master
def mainloop(self, processor):
while 1:
#self.log("waiting for command")
params = dict()
# Read at most 10 parameters (normally 1 or 2), stop at empty line
# End of message is signalled by empty paramname
for i in range(10):
paramname, paramdata = self.readparam()
if paramname == "":
break
params[paramname] = paramdata
# Got message, act on it
self.processmessage(processor, params)
# Common main routine for testing: either run the normal protocol
# engine or a local loop. This means that you can call
# cmdtalk.main(proto,processor) instead of proto.mainloop(processor)
# from your module, and get the benefits of command line testing
def main(proto, processor):
if len(sys.argv) == 1:
proto.mainloop(processor)
# mainloop does not return. Just in case
sys.exit(1)
# Not running the main loop: run one processor call for debugging
def usage():
print("Usage: cmdtalk.py pname pvalue [pname pvalue...]",
file=sys.stderr)
sys.exit(1)
def debprint(out, s):
proto.breakwrite(out, makebytes(s+'\n'))
args = sys.argv[1:]
if len(args) == 0 or len(args) % 2 != 0:
usage()
params = dict()
for i in range(int(len(args)/2)):
params[args[2*i]] = args[2*i+1]
res = processor.process(params)
ioout = sys.stdout.buffer if PY3 else sys.stdout
for nm,value in res.items():
#self.log("Senditem: [%s] -> [%s]" % (nm, value))
bdata = makebytes(value)
debprint(ioout, "%s->" % nm)
proto.breakwrite(ioout, bdata)
ioout.write(b'\n')

90
src/filters/kosplitter.py Executable file
View File

@ -0,0 +1,90 @@
#!/usr/bin/python3
#################################
# Copyright (C) 2020 J.F.Dockes
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
########################################################
#
# Interface to the konlpy Korean text analyser: we receive text from
# our parent process and have it segmented by the analyser, then
# return the results. The analyser startup is very expensive (several
# seconds), which is why we can't just execute it from the main
# process.
#
import sys
import cmdtalk
from konlpy.tag import Okt,Mecab,Komoran
class Processor(object):
def __init__(self, proto):
self.proto = proto
self.tagsOkt = False
self.tagsMecab = False
self.tagsKomoran = False
def _init_tagger(self, taggername):
if taggername == "Okt":
self.tagger = Okt()
self.tagsOkt = True
elif taggername == "Mecab":
self.tagger = Mecab()
self.tagsMecab = True
elif taggername == "Komoran":
self.tagger = Komoran()
self.tagsKomoran = True
else:
raise Exception("Bad tagger name " + taggername)
def process(self, params):
if 'data' not in params:
return {'error':'No data field in parameters'}
if not (self.tagsOkt or self.tagsMecab or self.tagsKomoran):
if 'tagger' not in params:
return {'error':'No "tagger" field in parameters'}
self._init_tagger(params['tagger']);
pos = self.tagger.pos(params['data'])
#proto.log("%s" % pos)
text = ""
tags = ""
for e in pos:
word = e[0]
word = word.replace('\t', ' ')
text += word + "\t"
tag = e[1]
if self.tagsOkt:
pass
elif self.tagsMecab or self.tagsKomoran:
tb = tag[0:2]
if tb[0] == "N":
tag = "Noun"
elif tb == "VV":
tag = "Verb"
elif tb == "VA":
tag = "Adjective"
elif tag == "MAG":
tag = "Adverb"
else:
pass
tags += tag + "\t"
return {'text': text, 'tags': tags}
proto = cmdtalk.CmdTalk()
processor = Processor(proto)
cmdtalk.main(proto, processor)

View File

@ -36,20 +36,6 @@ from hwp5.xmlmodel import Hwp5File as xml_Hwp5File
from hwp5.utils import cached_property from hwp5.utils import cached_property
# This was duplicated from hwp5 hwp5text.py and I don't really
# understand what it does...
RESOURCE_PATH_XSL_TEXT = 'xsl/plaintext.xsl'
class TextTransform(BaseTransform):
@property
def transform_hwp5_to_text(self):
transform_xhwp5 = self.transform_xhwp5_to_text
return self.make_transform_hwp5(transform_xhwp5)
@cached_property
def transform_xhwp5_to_text(self):
resource_path = RESOURCE_PATH_XSL_TEXT
return self.make_xsl_transform(resource_path)
# Associate HTML meta names and hwp summaryinfo values # Associate HTML meta names and hwp summaryinfo values
def metafields(summaryinfo): def metafields(summaryinfo):
yield(('Description', summaryinfo.subject + " " + yield(('Description', summaryinfo.subject + " " +

View File

@ -158,7 +158,15 @@ void rwSettings(bool writing)
"/Recoll/prefs/reslist/collapseDuplicates", Bool, false); "/Recoll/prefs/reslist/collapseDuplicates", Bool, false);
SETTING_RW(prefs.showResultsAsTable, SETTING_RW(prefs.showResultsAsTable,
"/Recoll/prefs/showResultsAsTable", Bool, false); "/Recoll/prefs/showResultsAsTable", Bool, false);
SETTING_RW(prefs.maxhltextmbs, "/Recoll/prefs/preview/maxhltextmbs", Int, 3);
SETTING_RW(prefs.maxhltextkbs, "/Recoll/prefs/preview/maxhltextkbs", Int,
3000);
// Compat: if maxhltextkbs is not set but old maxhltextmbs is set use it
if (!writing && !settings.contains("/Recoll/prefs/preview/maxhltextkbs") &&
settings.contains("/Recoll/prefs/preview/maxhltextmbs")) {
prefs.maxhltextkbs = settings.value(
"/Recoll/prefs/preview/maxhltextmbs").toInt() * 1024;
}
SETTING_RW(prefs.previewPlainPre, SETTING_RW(prefs.previewPlainPre,
"/Recoll/prefs/preview/plainPre", Int, PrefsPack::PP_PREWRAP); "/Recoll/prefs/preview/plainPre", Int, PrefsPack::PP_PREWRAP);

View File

@ -20,6 +20,7 @@
#include <string> #include <string>
#include <list> #include <list>
#include <vector> #include <vector>
#include <set>
#include <qstring.h> #include <qstring.h>
#include <qstringlist.h> #include <qstringlist.h>
@ -46,7 +47,7 @@ class PrefsPack {
int filterCtlStyle; int filterCtlStyle;
int respagesize{8}; int respagesize{8};
int historysize{0}; int historysize{0};
int maxhltextmbs; int maxhltextkbs;
QString reslistfontfamily; QString reslistfontfamily;
// Not saved in prefs for now. Computed from qt defaults and used to // Not saved in prefs for now. Computed from qt defaults and used to
// set main character color for webkit/textbrowser reslist and // set main character color for webkit/textbrowser reslist and
@ -154,6 +155,11 @@ class PrefsPack {
std::string stemlang(); std::string stemlang();
// MIME types for which we prefer to use stored text from preview
// rather than extracting the possibly nicer HTML because the
// extractor is very slow. This is compiled in and there is no UI
// for now.
std::set<std::string> preferStoredTextMimes{"application/x-hwp"};
}; };
/** Global preferences record */ /** Global preferences record */

BIN
src/qtgui/i18n/recoll_ko.qm Normal file

Binary file not shown.

3294
src/qtgui/i18n/recoll_ko.ts Normal file

File diff suppressed because it is too large Load Diff

View File

@ -574,6 +574,90 @@ void Preview::emitWordSelect(QString word)
emit(wordSelect(word)); emit(wordSelect(word));
} }
// Display message dialog after load failed
void Preview::displayLoadError(
FileInterner::ErrorPossibleCause explain, bool canGetRawText)
{
// Note that we can't easily check for a readable file
// because it's possible that only a region is locked
// (e.g. on Windows for an ost file the first block is
// readable even if Outlook is running).
QString msg;
switch (explain) {
case FileInterner::FetchMissing:
msg = tr("Error loading the document: file missing.");
break;
case FileInterner::FetchPerm:
msg = tr("Error loading the document: no permission.");
break;
case FileInterner::FetchNoBackend:
msg =
tr("Error loading: backend not configured.");
break;
case FileInterner::InternfileOther:
#ifdef _WIN32
msg = tr("Error loading the document: "
"other handler error<br>"
"Maybe the application is locking the file ?");
#else
msg = tr("Error loading the document: other handler error.");
#endif
break;
}
if (canGetRawText) {
msg += tr("<br>Attempting to display from stored text.");
}
QMessageBox::warning(0, "Recoll", msg);
}
bool Preview::runLoadThread(LoadThread& lthr, QTimer& tT, QEventLoop& loop,
QProgressDialog& progress, bool canGetRawText)
{
lthr.start();
for (int i = 0;;i++) {
tT.start(1000);
loop.exec();
if (lthr.isFinished())
break;
if (progress.wasCanceled()) {
CancelCheck::instance().setCancel();
}
if (i == 1)
progress.show();
}
LOGDEB("loadDocInCurrentTab: after file load: cancel " <<
CancelCheck::instance().cancelState() << " status " << lthr.status <<
" text length " << lthr.fdoc.text.length() << "\n");
if (lthr.status == 0) {
return true;
}
if (CancelCheck::instance().cancelState())
return false;
QString explain;
if (!lthr.missing.empty()) {
explain = QString::fromUtf8("<br>") +
tr("Missing helper program: ") +
QString::fromLocal8Bit(lthr.missing.c_str());
QMessageBox::warning(0, "Recoll",
tr("Can't turn doc into internal "
"representation for ") +
lthr.fdoc.mimetype.c_str() + explain);
} else {
if (progress.wasCanceled()) {
QMessageBox::warning(0, "Recoll", tr("Canceled"));
} else {
progress.reset();
displayLoadError(lthr.explain, canGetRawText);
}
}
return false;
}
/* /*
Code for loading a file into an editor window. The operations that Code for loading a file into an editor window. The operations that
we call have no provision to indicate progression, and it would be we call have no provision to indicate progression, and it would be
@ -628,92 +712,41 @@ bool Preview::loadDocInCurrentTab(const Rcl::Doc &idoc, int docnum)
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Load and convert document // Load and convert document
// idoc came out of the index data (main text and some fields missing). // - idoc came out of the index data (main text and some fields missing).
// fdoc is the complete one what we are going to extract from storage. // - fdoc is the complete one what we are going to extract from storage.
//
// If the preference to use the stored text is set, we still
// create the LoadThread object for convenience (using its fdoc
// field, but don't start it.
LoadThread lthr(theconfig, idoc, prefs.previewHtml, this); LoadThread lthr(theconfig, idoc, prefs.previewHtml, this);
connect(&lthr, SIGNAL(finished()), &loop, SLOT(quit())); connect(&lthr, SIGNAL(finished()), &loop, SLOT(quit()));
lthr.start(); bool canGetRawText = rcldb && rcldb->storesDocText();
for (int i = 0;;i++) { auto it = prefs.preferStoredTextMimes.find(idoc.mimetype);
tT.start(1000); bool preferStoredText = (it != prefs.preferStoredTextMimes.end());
loop.exec(); bool loadok{false};
if (lthr.isFinished())
break; if (!preferStoredText || !canGetRawText) {
if (progress.wasCanceled()) { // Try load from actual document
CancelCheck::instance().setCancel(); loadok = runLoadThread(lthr, tT, loop, progress, canGetRawText);
}
if (i == 1)
progress.show();
} }
LOGDEB("loadDocInCurrentTab: after file load: cancel " << if (!loadok && canGetRawText) {
CancelCheck::instance().cancelState() << " status " << lthr.status << // Preferring/able to use stored text or extern load failed
" text length " << lthr.fdoc.text.length() << "\n"); lthr.fdoc = idoc;
loadok = rcldb->getDocRawText(lthr.fdoc);
if (!loadok) {
QMessageBox::warning(0,"Recoll",tr("Could not fetch stored text"));
}
}
if (CancelCheck::instance().cancelState()) if (!loadok) {
// Everything failed.
progress.close();
return false; return false;
if (lthr.status != 0) {
bool canGetRawText = rcldb && rcldb->storesDocText();
QString explain;
if (!lthr.missing.empty()) {
explain = QString::fromUtf8("<br>") +
tr("Missing helper program: ") +
QString::fromLocal8Bit(lthr.missing.c_str());
QMessageBox::warning(0, "Recoll",
tr("Can't turn doc into internal "
"representation for ") +
lthr.fdoc.mimetype.c_str() + explain);
} else {
if (progress.wasCanceled()) {
QMessageBox::warning(0, "Recoll", tr("Canceled"));
} else {
progress.reset();
// Note that we can't easily check for a readable file
// because it's possible that only a region is locked
// (e.g. on Windows for an ost file the first block is
// readable even if Outlook is running).
QString msg;
switch (lthr.explain) {
case FileInterner::FetchMissing:
msg = tr("Error loading the document: file missing.");
break;
case FileInterner::FetchPerm:
msg = tr("Error loading the document: no permission.");
break;
case FileInterner::FetchNoBackend:
msg =
tr("Error loading: backend not configured.");
break;
case FileInterner::InternfileOther:
#ifdef _WIN32
msg = tr("Error loading the document: "
"other handler error<br>"
"Maybe the application is locking the file ?");
#else
msg = tr("Error loading the document: other handler error.");
#endif
break;
}
if (canGetRawText) {
msg += tr("<br>Attempting to display from stored text.");
}
QMessageBox::warning(0, "Recoll", msg);
}
}
if (canGetRawText) {
lthr.fdoc = idoc;
if (!rcldb->getDocRawText(lthr.fdoc)) {
QMessageBox::warning(0, "Recoll",
tr("Could not fetch stored text"));
progress.close();
return false;
}
} else {
progress.close();
}
} }
// Reset config just in case. // Reset config just in case.
theconfig->setKeyDir(""); theconfig->setKeyDir("");
@ -722,8 +755,8 @@ bool Preview::loadDocInCurrentTab(const Rcl::Doc &idoc, int docnum)
// We don't do the highlighting for very big texts: too long. We // We don't do the highlighting for very big texts: too long. We
// should at least do special char escaping, in case a '&' or '<' // should at least do special char escaping, in case a '&' or '<'
// somehow slipped through previous processing. // somehow slipped through previous processing.
bool highlightTerms = lthr.fdoc.text.length() < bool highlightTerms = int(lthr.fdoc.text.length()) <
(unsigned long)prefs.maxhltextmbs * 1024 * 1024; prefs.maxhltextkbs * 1024;
// Final text is produced in chunks so that we can display the top // Final text is produced in chunks so that we can display the top
// while still inserting at bottom // while still inserting at bottom
@ -752,7 +785,6 @@ bool Preview::loadDocInCurrentTab(const Rcl::Doc &idoc, int docnum)
QStringList qrichlst; QStringList qrichlst;
editor->m_plaintorich->set_activatelinks(prefs.previewActiveLinks); editor->m_plaintorich->set_activatelinks(prefs.previewActiveLinks);
#if 1
if (highlightTerms) { if (highlightTerms) {
progress.setLabelText(tr("Creating preview text")); progress.setLabelText(tr("Creating preview text"));
qApp->processEvents(); qApp->processEvents();
@ -815,17 +847,6 @@ bool Preview::loadDocInCurrentTab(const Rcl::Doc &idoc, int docnum)
} }
} }
} }
#else // For testing qtextedit bugs...
highlightTerms = true;
const char *textlist[] =
{
"Du plain text avec un\n <termtag>termtag</termtag> fin de ligne:",
"texte apres le tag\n",
};
const int listl = sizeof(textlist) / sizeof(char*);
for (int i = 0 ; i < listl ; i++)
qrichlst.push_back(QString::fromUtf8(textlist[i]));
#endif
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////

View File

@ -44,9 +44,11 @@
#include "rcldb.h" #include "rcldb.h"
#include "plaintorich.h" #include "plaintorich.h"
#include "rclmain_w.h" #include "rclmain_w.h"
#include "internfile.h"
#include "ui_preview.h" #include "ui_preview.h"
class QTabWidget; class QTabWidget;
class QLabel; class QLabel;
class QPushButton; class QPushButton;
@ -55,6 +57,10 @@ class Preview;
class PlainToRichQtPreview; class PlainToRichQtPreview;
class QUrl; class QUrl;
class RclMain; class RclMain;
class LoadThread;
class QTimer;
class QEventLoop;
class QProgressDialog;
class PreviewTextEdit : public PREVIEW_PARENTCLASS { class PreviewTextEdit : public PREVIEW_PARENTCLASS {
Q_OBJECT; Q_OBJECT;
@ -185,6 +191,10 @@ private:
virtual PreviewTextEdit *currentEditor(); virtual PreviewTextEdit *currentEditor();
virtual PreviewTextEdit *addEditorTab(); virtual PreviewTextEdit *addEditorTab();
virtual bool loadDocInCurrentTab(const Rcl::Doc& idoc, int dnm); virtual bool loadDocInCurrentTab(const Rcl::Doc& idoc, int dnm);
void displayLoadError(
FileInterner::ErrorPossibleCause explain, bool canGetRawText);
bool runLoadThread(LoadThread& lthr, QTimer& tT, QEventLoop& loop,
QProgressDialog& progress, bool canGetRawText);
}; };
#endif /* _PREVIEW_W_H_INCLUDED_ */ #endif /* _PREVIEW_W_H_INCLUDED_ */

View File

@ -168,7 +168,7 @@ i18n/recoll_zh_CN.ts \
i18n/recoll_fr.ts \ i18n/recoll_fr.ts \
i18n/recoll_xx.ts \ i18n/recoll_xx.ts \
i18n/recoll_cs.ts \ i18n/recoll_cs.ts \
i18n/recoll_kr.ts \ i18n/recoll_ko.ts \
i18n/recoll_el.ts \ i18n/recoll_el.ts \
i18n/recoll_tr.ts i18n/recoll_tr.ts

File diff suppressed because it is too large Load Diff

View File

@ -112,7 +112,7 @@ void UIPrefsDialog::setFromPrefs()
pageLenSB->setValue(prefs.respagesize); pageLenSB->setValue(prefs.respagesize);
maxHistSizeSB->setValue(prefs.historysize); maxHistSizeSB->setValue(prefs.historysize);
collapseDupsCB->setChecked(prefs.collapseDuplicates); collapseDupsCB->setChecked(prefs.collapseDuplicates);
maxHLTSB->setValue(prefs.maxhltextmbs); maxHLTSB->setValue(prefs.maxhltextkbs);
if (prefs.ssearchTypSav) { if (prefs.ssearchTypSav) {
ssearchTypCMB->setCurrentIndex(4); ssearchTypCMB->setCurrentIndex(4);
@ -304,7 +304,7 @@ void UIPrefsDialog::accept()
prefs.respagesize = pageLenSB->value(); prefs.respagesize = pageLenSB->value();
prefs.historysize = maxHistSizeSB->value(); prefs.historysize = maxHistSizeSB->value();
prefs.collapseDuplicates = collapseDupsCB->isChecked(); prefs.collapseDuplicates = collapseDupsCB->isChecked();
prefs.maxhltextmbs = maxHLTSB->value(); prefs.maxhltextkbs = maxHLTSB->value();
prefs.qtermstyle = qtermStyleLE->text(); prefs.qtermstyle = qtermStyleLE->text();
prefs.abssep = abssepLE->text(); prefs.abssep = abssepLE->text();

243
src/utils/cmdtalk.cpp Normal file
View File

@ -0,0 +1,243 @@
/* Copyright (C) 2016 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "cmdtalk.h"
#include <stdio.h>
#include <iostream>
#include <sstream>
#include <mutex>
#include "smallut.h"
#include "execmd.h"
#ifdef MDU_INCLUDE_LOG
#include MDU_INCLUDE_LOG
#else
#include "log.h"
#endif
using namespace std;
class TimeoutExcept {};
class Canceler : public ExecCmdAdvise {
public:
Canceler(int tmsecs)
: m_timeosecs(tmsecs) {}
virtual void newData(int cnt) {
if (m_starttime && (time(0) - m_starttime) > m_timeosecs) {
throw TimeoutExcept();
}
}
void reset() {
m_starttime = time(0);
}
int m_timeosecs;
time_t m_starttime{0};
};
class CmdTalk::Internal {
public:
Internal(int timeosecs)
: m_cancel(timeosecs) {}
~Internal() {
delete cmd;
}
bool readDataElement(string& name, string &data);
bool talk(const pair<string, string>& arg0,
const unordered_map<string, string>& args,
unordered_map<string, string>& rep);
ExecCmd *cmd{0};
Canceler m_cancel;
std::mutex mmutex;
};
CmdTalk::CmdTalk(int timeosecs)
{
m = new Internal(timeosecs);
}
CmdTalk::~CmdTalk()
{
delete m;
}
bool CmdTalk::startCmd(const string& cmdname,
const vector<string>& args,
const vector<string>& env,
const vector<string>& path)
{
LOGDEB("CmdTalk::startCmd\n");
delete m->cmd;
m->cmd = new ExecCmd;
m->cmd->setAdvise(&m->m_cancel);
for (const auto& it : env) {
m->cmd->putenv(it);
}
string acmdname(cmdname);
if (!path.empty()) {
string colonpath;
for (const auto& it: path) {
colonpath += it + ":";
}
if (!colonpath.empty()) {
colonpath.erase(colonpath.size()-1);
}
LOGDEB("CmdTalk::startCmd: PATH: [" << colonpath << "]\n");
ExecCmd::which(cmdname, acmdname, colonpath.c_str());
}
if (m->cmd->startExec(acmdname, args, 1, 1) < 0) {
return false;
}
return true;
}
// Messages are made of data elements. Each element is like:
// name: len\ndata
// An empty line signals the end of the message, so the whole thing
// would look like:
// Name1: Len1\nData1Name2: Len2\nData2\n
bool CmdTalk::Internal::readDataElement(string& name, string &data)
{
string ibuf;
m_cancel.reset();
try {
// Read name and length
if (cmd->getline(ibuf) <= 0) {
LOGERR("CmdTalk: getline error\n");
return false;
}
} catch (TimeoutExcept) {
LOGINF("CmdTalk:readDataElement: fatal timeout (" <<
m_cancel.m_timeosecs << " S)\n");
return false;
}
LOGDEB1("CmdTalk:rde: line [" << ibuf << "]\n");
// Empty line (end of message) ?
if (!ibuf.compare("\n")) {
LOGDEB1("CmdTalk: Got empty line\n");
return true;
}
// We're expecting something like Name: len\n
vector<string> tokens;
stringToTokens(ibuf, tokens);
if (tokens.size() != 2) {
LOGERR("CmdTalk: bad line in filter output: [" << ibuf << "]\n");
return false;
}
vector<string>::iterator it = tokens.begin();
name = *it++;
string& slen = *it;
int len;
if (sscanf(slen.c_str(), "%d", &len) != 1) {
LOGERR("CmdTalk: bad line in filter output: [" << ibuf << "]\n");
return false;
}
// Read element data
data.erase();
if (len > 0 && cmd->receive(data, len) != len) {
LOGERR("CmdTalk: expected " << len << " bytes of data, got " <<
data.length() << "\n");
return false;
}
LOGDEB1("CmdTalk:rde: got: name [" << name << "] len " << len <<"value ["<<
(data.size() > 100 ? (data.substr(0, 100) + " ...") : data)<< endl);
return true;
}
bool CmdTalk::Internal::talk(const pair<string, string>& arg0,
const unordered_map<string, string>& args,
unordered_map<string, string>& rep)
{
std::unique_lock<std::mutex> lock(mmutex);
if (cmd->getChildPid() <= 0) {
LOGERR("CmdTalk::talk: no process\n");
return false;
}
ostringstream obuf;
if (!arg0.first.empty()) {
obuf << arg0.first << ": " << arg0.second.size() << "\n" << arg0.second;
}
for (const auto& it : args) {
obuf << it.first << ": " << it.second.size() << "\n" << it.second;
}
obuf << "\n";
if (cmd->send(obuf.str()) < 0) {
cmd->zapChild();
LOGERR("CmdTalk: send error\n");
return false;
}
// Read answer (multiple elements)
LOGDEB1("CmdTalk: reading answer\n");
for (;;) {
string name, data;
if (!readDataElement(name, data)) {
cmd->zapChild();
return false;
}
if (name.empty()) {
break;
}
trimstring(name, ":");
LOGDEB1("CmdTalk: got [" << name << "] -> [" << data << "]\n");
rep[name] = data;
}
if (rep.find("cmdtalkstatus") != rep.end()) {
return false;
} else {
return true;
}
}
bool CmdTalk::running()
{
return m && m->cmd && m->cmd->getChildPid() > 0;
}
bool CmdTalk::talk(const unordered_map<string, string>& args,
unordered_map<string, string>& rep)
{
return m->talk({"",""}, args, rep);
}
bool CmdTalk::callproc(
const string& proc,
const unordered_map<std::string, std::string>& args,
unordered_map<std::string, std::string>& rep)
{
return m->talk({"cmdtalk:proc", proc}, args, rep);
}

109
src/utils/cmdtalk.h Normal file
View File

@ -0,0 +1,109 @@
/* Copyright (C) 2016 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifndef _CMDTALK_H_INCLUDED_
#define _CMDTALK_H_INCLUDED_
/**
* Execute commands and exchange messages with it.
*
* A simple stream protocol is used for the dialog. HTTP or some kind
* of full-blown RPC could have been used, but there was also good
* reason to keep it simple (yet powerful), given the limited context
* of dialog through a pipe.
*
* The data is exchanged in TLV fashion, in a way that should be
* usable in most script languages. The basic unit of data has one line
* with a data type and a count (both ASCII), followed by the data. A
* 'message' is made of one or several units or tags and ends with one empty
* line.
*
* Example:(the message begins before 'Filename' and has 'Filename' and
* 'Ipath' tags):
*
Filename: 24
/my/home/mail/somefolderIpath: 2
22
<Message ends here: because of the empty line after '22'
*
* Example answer, with 'Mimetype' and 'Data' tags
*
Mimetype: 10
text/plainData: 10
0123456789
<Message ends here because of empty line
*
* This format is both extensible and reasonably easy to parse.
* While it's more fitted for python or perl on the script side, it
* should even be sort of usable from the shell (e.g.: use dd to read
* the counted data). Most alternatives would need data encoding in
* some cases.
*
* Higher level dialog:
* The C++ program is the master and sends request messages to the script.
* Both sides of the communication should be prepared to receive and discard
* unknown tags.
*/
#include <string>
#include <vector>
#include <unordered_map>
class CmdTalk {
public:
CmdTalk(int timeosecs);
virtual ~CmdTalk();
// @param env each entry should be of the form name=value. They
// augment the subprocess environnement.
// @param path replaces the PATH variable when looking for the command.
//
// Note that cmdtalk.py:main() method is a test routine which
// expects data pairs on the command line. If actual parameters
// need to be passed, it can't be used by the processor.
virtual bool startCmd(const std::string& cmdname,
const std::vector<std::string>& args =
std::vector<std::string>(),
const std::vector<std::string>& env =
std::vector<std::string>(),
const std::vector<std::string>& path =
std::vector<std::string>()
);
virtual bool running();
// Single exchange: send and receive data.
virtual bool talk(const std::unordered_map<std::string, std::string>& args,
std::unordered_map<std::string, std::string>& rep);
// Specialized version with special argument used by dispatcher to call
// designated method
virtual bool callproc(
const std::string& proc,
const std::unordered_map<std::string, std::string>& args,
std::unordered_map<std::string, std::string>& rep);
CmdTalk(const CmdTalk&) = delete;
CmdTalk &operator=(const CmdTalk &) = delete;
private:
class Internal;
Internal *m{0};
};
#endif /* _CMDTALK_H_INCLUDED_ */