Merge branch 'kopostag'

2020-03-26 14:03:17 +01:00 · 2020-03-26 14:03:17 +01:00 · 9b3a5fac12
commit 9b3a5fac12
parent f755505e98 1afc606718
18 changed files with 5527 additions and 1275 deletions
--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -82,6 +82,7 @@ common/rclinit.h \
 common/syngroups.cpp \
 common/syngroups.h \
 common/textsplit.cpp \
 common/textsplitko.cpp \
 common/textsplit.h \
 common/unacpp.cpp \
 common/unacpp.h \
@ -210,6 +211,8 @@ utils/circache.cpp \
 utils/circache.h \
 utils/closefrom.cpp \
 utils/closefrom.h \
 utils/cmdtalk.cpp \
 utils/cmdtalk.h \
 utils/conftree.cpp \
 utils/conftree.h \
 utils/copyfile.cpp \
@ -645,8 +648,10 @@ filterdir = $(pkgdatadir)/filters
 dist_filter_DATA = \
 desktop/hotrecoll.py \
 filters/abiword.xsl \
 filters/cmdtalk.py \
 filters/fb2.xsl \
 filters/gnumeric.xsl \
 filters/kosplitter.py \
 filters/msodump.zip \
 filters/okular-note.xsl \
 filters/opendoc-body.xsl \
@ -724,7 +729,7 @@ python/recoll/recoll/rclconfig.py
 install-data-hook: 
 	(cd $(DESTDIR)/$(filterdir); \
 	chmod a+x rcl* ppt-dump.py xls-dump.py xlsxmltocsv.py hotrecoll.py; \
-	chmod a+x recoll-we-move-files.py ../examples/rclmon.sh; \
+	chmod a+x recoll-we-move-files.py ../examples/rclmon.sh kosplitter.py; \
 	chmod 0644 msodump.zip recollepub.zip rclexecm.py rcllatinstops.zip \
        rclconfig.py conftree.py rclmidi.py rclexec1.py rcluncomp.py rclxslt.py)
--- a/src/common/textsplit.cpp
+++ b/src/common/textsplit.cpp
@ -44,8 +44,10 @@
 // ngrams
 #undef KATAKANA_AS_WORDS
-// Same for Korean syllabic, and same problem, not used.
+// Same for Korean syllabic, and same problem. However we have a
-#undef HANGUL_AS_WORDS
+// runtime option to use an external text analyser for hangul, so this
 // is defined at compile time.
 #define HANGUL_AS_WORDS
 using namespace std;
@ -289,6 +291,7 @@ bool          TextSplit::o_noNumbers{false};
 bool          TextSplit::o_deHyphenate{false};
 int           TextSplit::o_maxWordLength{40};
 static const int o_CJKMaxNgramLen{5};
 bool o_exthangultagger{false};
 void TextSplit::staticConfInit(RclConfig *config)
 {
@ -323,6 +326,13 @@ void TextSplit::staticConfInit(RclConfig *config)
            charclasses[int('\\')] = SPACE;
        }
    }
    string kotagger;
    config->getConfParam("hangultagger", kotagger);
    if (!kotagger.empty()) {
        o_exthangultagger = true;
        koStaticConfInit(config, kotagger);
    }
 }
 // Final term checkpoint: do some checking (the kind which is simpler
@ -612,7 +622,7 @@ bool TextSplit::text_to_words(const string &in)
 #if defined(KATAKANA_AS_WORDS) || defined(HANGUL_AS_WORDS)
    int prev_csc = -1;
 #endif
-    for (; !it.eof(); it++) {
+    for (; !it.eof() && !it.error(); it++) {
        unsigned int c = *it;
        nonalnumcnt++;
@ -625,30 +635,40 @@ bool TextSplit::text_to_words(const string &in)
        if (UNICODE_IS_KATAKANA(c)) {
            csc = CSC_KATAKANA;
        } else if (UNICODE_IS_HANGUL(c)) {
-            csc = CSC_HANGUL;
+            if (o_exthangultagger) {
                csc = CSC_HANGUL;
            } else {
                csc = CSC_CJK;
            }
        } else if (UNICODE_IS_CJK(c)) {
            csc = CSC_CJK;
        } else {
            csc = CSC_OTHER;
        }
-        if (o_processCJK && csc == CSC_CJK) {
+        if (o_processCJK && (csc == CSC_CJK || csc == CSC_HANGUL)) {
-            // CJK character hit. 
+            // CJK character hit. Hangul processing may be special.
            // Do like at EOF with the current non-cjk data.
            if (m_wordLen || m_span.length()) {
                if (!doemit(true, it.getBpos()))
                    return false;
            }
-
+            // Hand off situation to the appropriate routine.
-            // Hand off situation to the cjk routine.
+            if (csc == CSC_HANGUL) {
-            if (!cjk_to_words(&it, &c)) {
+                if (!ko_to_words(&it, &c)) {
-                LOGERR("Textsplit: scan error in cjk handler\n");
+                    LOGERR("Textsplit: scan error in korean handler\n");
-                return false;
+                    return false;
                }
            } else {
                if (!cjk_to_words(&it, &c)) {
                    LOGERR("Textsplit: scan error in cjk handler\n");
                    return false;
                }
            }
            // Check for eof, else c contains the first non-cjk
            // character after the cjk sequence, just go on.
-            if (it.eof())
+            if (it.eof() || it.error())
                break;
        }
@ -976,7 +996,7 @@ bool TextSplit::cjk_to_words(Utf8Iter *itp, unsigned int *cp)
    // Current number of valid offsets;
    unsigned int nchars = 0;
    unsigned int c = 0;
-    for (; !it.eof(); it++) {
+    for (; !it.eof() && !it.error(); it++) {
        c = *it;
        if (c == ' ' || c == '\t' || c == '\n') {
            continue;
@ -1077,7 +1097,7 @@ int TextSplit::countWords(const string& s, TextSplit::Flags flgs)
 bool TextSplit::hasVisibleWhite(const string &in)
 {
    Utf8Iter it(in);
-    for (; !it.eof(); it++) {
+    for (; !it.eof() && !it.error(); it++) {
        unsigned int c = (unsigned char)*it;
        if (c == (unsigned int)-1) {
            LOGERR("hasVisibleWhite: error while scanning UTF-8 string\n");
@ -1097,7 +1117,7 @@ template <class T> bool u8stringToStrings(const string &s, T &tokens)
    tokens.clear();
    enum states {SPACE, TOKEN, INQUOTE, ESCAPE};
    states state = SPACE;
-    for (; !it.eof(); it++) {
+    for (; !it.eof() && !it.error(); it++) {
        unsigned int c = *it;
        if (visiblewhite.find(c) != visiblewhite.end()) 
            c = ' ';
--- a/src/common/textsplit.h
+++ b/src/common/textsplit.h
@ -54,6 +54,7 @@ public:
    /** Call at program initialization to read non default values from the 
        configuration */
    static void staticConfInit(RclConfig *config);
    static void koStaticConfInit(RclConfig *config, const std::string& tagger);
    /** Split text, emit words and positions. */
    virtual bool text_to_words(const std::string &in);
@ -199,6 +200,9 @@ private:
    // This processes cjk text:
    bool cjk_to_words(Utf8Iter *it, unsigned int *cp);
    // Experimental Korean splitter. This uses an external Python tokenizer
    bool ko_to_words(Utf8Iter *it, unsigned int *cp);
    bool emitterm(bool isspan, std::string &term, int pos, size_t bs,size_t be);
    bool doemit(bool spanerase, size_t bp);
    void discardspan();
--- a/src/common/textsplitko.cpp
+++ b/src/common/textsplitko.cpp
@ -0,0 +1,214 @@
 /* Copyright (C) 2020 J.F.Dockes
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program; if not, write to the
 *   Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */
 // Specialized Korean text splitter using konlpy running in a Python
 // subprocess. konlpy can use several different backends. We support
 // Okt (Twitter) and Mecab at this point. Unfortunately the different
 // backends have different POS TAG names, so that things are not
 // completly transparent when using another (need to translate the tag
 // names in the Python program).
 #include "autoconfig.h"
 #include <iostream>
 #include <string>
 #include <cstring>
 #include <unordered_set>
 #include <mutex>
 #include "textsplit.h"
 #include "log.h"
 //#define UTF8ITER_CHECK
 #include "utf8iter.h"
 #include "smallut.h"
 #include "rclconfig.h"
 #include "cmdtalk.h"
 using namespace std;
 // Separator char used in words and tags lists.
 static const string sepchars("\t");
 static CmdTalk *o_talker;
 static bool o_starterror{false};
 static string o_cmdpath;
 std::mutex o_mutex;
 static string o_taggername{"Okt"};
 // The Python/Java splitter is leaking memory. We restart it from time to time
 static uint64_t restartcount;
 static uint64_t restartthreshold = 5 * 1000 * 1000;
 void TextSplit::koStaticConfInit(RclConfig *config, const string& tagger)
 {
    o_cmdpath = config->findFilter("kosplitter.py");
    if (tagger == "Okt" || tagger == "Mecab" || tagger == "Komoran") {
        o_taggername = tagger;
    } else {
        LOGERR("TextSplit::koStaticConfInit: unknown tagger [" << tagger <<
               "], using Okt\n");
    }
 }
 // Start the Python subprocess
 static bool initCmd()
 {
    if (o_starterror) {
        // No use retrying
        return false;
    }
    if (o_talker) {
        if (restartcount > restartthreshold) {
            delete o_talker;
            o_talker = nullptr;
            restartcount = 0;
        } else {
            return true;
        }
    }
    if (o_cmdpath.empty()) {
        return false;
    }
    if (nullptr == (o_talker = new CmdTalk(300))) {
        o_starterror = true;
        return false;
    }
    if (!o_talker->startCmd(o_cmdpath)) {
        delete o_talker;
        o_talker = nullptr;
        o_starterror = true;
        return false;
    }
    return true;
 }
 bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
 {
    std::unique_lock<std::mutex> mylock(o_mutex);
    initCmd();
    if (nullptr == o_talker) {
        return false;
    }
    LOGDEB1("k_to_words: m_wordpos " << m_wordpos << "\n");
    Utf8Iter &it = *itp;
    unsigned int c = 0;
    unordered_map<string, string> args;
    args.insert(pair<string,string>{"data", string()});
    string& inputdata{args.begin()->second};
    // We send the tagger name every time but it's only used the first
    // one: can't change it after init. We could avoid sending it
    // every time, but I don't think that the performance hit is
    // significant
    args.insert(pair<string,string>{"tagger", o_taggername});
    // Walk the Korean characters section and send the text to the
    // analyser
    string::size_type orgbytepos = it.getBpos();
    for (; !it.eof() && !it.error(); it++) {
        c = *it;
        if (!isHANGUL(c) && isalpha(c)) {
            // Done with Korean stretch, process and go back to main routine
            //std::cerr << "Broke on char " << (std::string)it << endl;
            break;
        } else {
            it.appendchartostring(inputdata);
        }
    }
    LOGDEB1("TextSplit::k_to_words: sending out " << inputdata.size() <<
            " bytes " << inputdata << endl);
    restartcount += inputdata.size();
    unordered_map<string,string> result;
    if (!o_talker->talk(args, result)) {
        LOGERR("Python splitter for Korean failed for [" << inputdata << "]\n");
        return false;
    }
    auto resit = result.find("text");
    if (resit == result.end()) {
        LOGERR("No text in Python splitter for Korean\n");
        return false;
    }        
    string& outtext = resit->second;
    vector<string> words;
    stringToTokens(outtext, words, sepchars);
    resit = result.find("tags");
    if (resit == result.end()) {
        LOGERR("No tags in Python splitter for Korean\n");
        return false;
    }        
    string& outtags = resit->second;
    vector<string> tags;
    stringToTokens(outtags, tags, sepchars);
    // This is the position in the whole text, not the local fragment,
    // which is bytepos-orgbytepos
    string::size_type bytepos(orgbytepos);
    for (unsigned int i = 0; i < words.size(); i++) {
        // The POS tagger strips characters from the input (e.g. multiple
        // spaces, sometimes new lines, possibly other stuff). This
        // means that we can't easily reconstruct the byte position
        // from the concatenated terms. The output seems to be always
        // shorter than the input, so we try to look ahead for the
        // term. Can't be too sure that this works though, depending
        // on exactly what transformation may have been applied from
        // the original input to the term.
        string word = words[i];
        trimstring(word);
        string::size_type newpos = bytepos - orgbytepos;
        newpos = inputdata.find(word, newpos);
        if (newpos != string::npos) {
            bytepos = orgbytepos + newpos;
        }
        LOGDEB1("WORD OPOS " << bytepos-orgbytepos <<
                " FOUND POS " << newpos << endl);
        if (tags[i] == "Noun" || tags[i] == "Verb" ||
            tags[i] == "Adjective" || tags[i] == "Adverb") {
            if (!takeword(
                    word, m_wordpos++, bytepos, bytepos + words[i].size())) {
                return false;
            }
        }
        LOGDEB1("WORD [" << words[i] << "] size " << words[i].size() <<
               " TAG " << tags[i] << endl);
        bytepos += words[i].size();
    }
 #if DO_CHECK_THINGS
    int sizediff = inputdata.size() - (bytepos - orgbytepos);
    if (sizediff < 0)
        sizediff = -sizediff;
    if (sizediff > 1) {
        LOGERR("ORIGINAL TEXT SIZE: " << inputdata.size() <<
               " FINAL BYTE POS " << bytepos - orgbytepos <<
               " TEXT [" << inputdata << "]\n");
    }
 #endif
    // Reset state, saving term position, and return the found non-cjk
    // Unicode character value. The current input byte offset is kept
    // in the utf8Iter
    int pos = m_wordpos;
    clearsplitstate();
    m_spanpos = m_wordpos = pos;
    *cp = c;
    return true;
 }
--- a/src/filters/cmdtalk.py
+++ b/src/filters/cmdtalk.py
@ -0,0 +1,236 @@
 #################################
 # Copyright (C) 2016 J.F.Dockes
 #   This program is free software; you can redistribute it and/or modify
 #   it under the terms of the GNU General Public License as published by
 #   the Free Software Foundation; either version 2 of the License, or
 #   (at your option) any later version.
 #
 #   This program is distributed in the hope that it will be useful,
 #   but WITHOUT ANY WARRANTY; without even the implied warranty of
 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #   GNU General Public License for more details.
 #
 #   You should have received a copy of the GNU General Public License
 #   along with this program; if not, write to the
 #   Free Software Foundation, Inc.,
 #   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 ########################################################
 # Command communication module and utilities. See commands in cmdtalk.h
 #
 # All data is binary. This is important for Python3
 # All parameter names are converted to and processed as str/unicode
 from __future__ import print_function
 import sys
 import os
 import tempfile
 import shutil
 import getopt
 import traceback
 PY3 = sys.version > '3'
 if PY3:
    def makebytes(data):
        if isinstance(data, bytes):
            return data
        else:
            return data.encode("UTF-8")
 else:
    def makebytes(data):
        if isinstance(data, unicode):
            return data.encode("UTF-8")
        else:
            return data
 ############################################
 # CmdTalk implements the
 # communication protocol with the master process. It calls an external
 # method to use the args and produce return data.
 class CmdTalk:
    def __init__(self, outfile=sys.stdout, infile=sys.stdin, exitfunc=None):
        try:
            self.myname = os.path.basename(sys.argv[0])
        except:
            self.myname = "???"
        self.outfile = outfile
        self.infile = infile
        self.exitfunc = exitfunc
        self.fields = {}
        if sys.platform == "win32":
            import msvcrt
            msvcrt.setmode(self.outfile.fileno(), os.O_BINARY)
            msvcrt.setmode(self.infile.fileno(), os.O_BINARY)
        self.debugfile = None
        if self.debugfile:
            self.errfout = open(self.debugfile, "a")
        else:
            self.errfout = sys.stderr
    def log(self, s, doexit = 0, exitvalue = 1):
        print("CMDTALK: %s: %s" % (self.myname, s), file=self.errfout)
        if doexit:
            if self.exitfunc:
                self.exitfunc(exitvalue)
            sys.exit(exitvalue)
    def breakwrite(self, outfile, data):
        if sys.platform != "win32":
            outfile.write(data)
        else:
            # On windows, writing big chunks can fail with a "not enough space"
            # error. Seems a combined windows/python bug, depending on versions.
            # See https://bugs.python.org/issue11395
            # In any case, just break it up
            total = len(data)
            bs = 4*1024
            offset = 0
            while total > 0:
                if total < bs:
                    tow = total
                else:
                    tow = bs
                #self.log("Total %d Writing %d to stdout: %s" % (total,tow,data[offset:offset+tow]))
                outfile.write(data[offset:offset+tow])
                offset += tow
                total -= tow
    # Read single parameter from process input: line with param name and size
    # followed by data. The param name is returned as str/unicode, the data
    # as bytes
    def readparam(self):
        if PY3:
            inf = self.infile.buffer
        else:
            inf = self.infile
        s = inf.readline()
        if s == b'':
            if self.exitfunc:
                self.exitfunc(0)
            sys.exit(0)
        s = s.rstrip(b'\n')
        if s == b'':
            return ('', b'')
        l = s.split()
        if len(l) != 2:
            self.log(b'bad line: [' + s + b']', 1, 1)
        paramname = l[0].decode('ASCII').rstrip(':')
        paramsize = int(l[1])
        if paramsize > 0:
            paramdata = inf.read(paramsize)
            if len(paramdata) != paramsize:
                self.log("Bad read: wanted %d, got %d" %
                      (paramsize, len(paramdata)), 1, 1)
        else:
            paramdata = b''
        if PY3:
            paramdata = paramdata.decode('utf-8')
        #self.log("paramname [%s] paramsize %d value [%s]" %
        #          (paramname, paramsize, paramdata))
        return (paramname, paramdata)
    if PY3:
        def senditem(self, nm, data):
            data = makebytes(data)
            l = len(data)
            self.outfile.buffer.write(makebytes("%s: %d\n" % (nm, l)))
            self.breakwrite(self.outfile.buffer, data)
    else:
        def senditem(self, nm, data):
            data = makebytes(data)
            l = len(data)
            self.outfile.write(makebytes("%s: %d\n" % (nm, l)))
            self.breakwrite(self.outfile, data)
    # Send answer: document, ipath, possible eof.
    def answer(self, outfields):
        for nm,value in outfields.items():
            #self.log("Senditem: [%s] -> [%s]" % (nm, value))
            self.senditem(nm, value)
        # End of message
        print(file=self.outfile)
        self.outfile.flush()
        #self.log("done writing data")
    # Call processor with input params, send result
    def processmessage(self, processor, params):
        # In normal usage we try to recover from processor errors, but
        # we sometimes want to see the real stack trace when testing
        safeexec = True
        if safeexec:
            try:
                outfields = processor.process(params)
            except Exception as err:
                self.log("processmessage: processor raised: [%s]" % err)
                traceback.print_exc()
                outfields = {}
                outfields["cmdtalkstatus"] = "1"
                outfields["cmdtalkerrstr"] = str(err)
        else:
            outfields = processor.process(params)
        self.answer(outfields)
    # Loop on messages from our master
    def mainloop(self, processor):
        while 1:
            #self.log("waiting for command")
            params = dict()
            # Read at most 10 parameters (normally 1 or 2), stop at empty line
            # End of message is signalled by empty paramname
            for i in range(10):
                paramname, paramdata = self.readparam()
                if paramname == "":
                    break
                params[paramname] = paramdata
            # Got message, act on it
            self.processmessage(processor, params)
 # Common main routine for testing: either run the normal protocol
 # engine or a local loop. This means that you can call
 # cmdtalk.main(proto,processor) instead of proto.mainloop(processor)
 # from your module, and get the benefits of command line testing
 def main(proto, processor):
    if len(sys.argv) == 1:
        proto.mainloop(processor)
        # mainloop does not return. Just in case
        sys.exit(1)
    # Not running the main loop: run one processor call for debugging
    def usage():
        print("Usage: cmdtalk.py pname pvalue [pname pvalue...]",
              file=sys.stderr)
        sys.exit(1)
    def debprint(out, s):
        proto.breakwrite(out, makebytes(s+'\n'))
    args = sys.argv[1:]
    if len(args) == 0 or len(args) % 2 != 0:
        usage()
    params = dict()
    for i in range(int(len(args)/2)):
        params[args[2*i]] = args[2*i+1]
    res = processor.process(params)
    ioout = sys.stdout.buffer if PY3 else sys.stdout
    for nm,value in res.items():
        #self.log("Senditem: [%s] -> [%s]" % (nm, value))
        bdata = makebytes(value)
        debprint(ioout, "%s->" % nm)
        proto.breakwrite(ioout, bdata)
        ioout.write(b'\n')
--- a/src/filters/kosplitter.py
+++ b/src/filters/kosplitter.py
@ -0,0 +1,90 @@
 #!/usr/bin/python3
 #################################
 # Copyright (C) 2020 J.F.Dockes
 #   This program is free software; you can redistribute it and/or modify
 #   it under the terms of the GNU General Public License as published by
 #   the Free Software Foundation; either version 2 of the License, or
 #   (at your option) any later version.
 #
 #   This program is distributed in the hope that it will be useful,
 #   but WITHOUT ANY WARRANTY; without even the implied warranty of
 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #   GNU General Public License for more details.
 #
 #   You should have received a copy of the GNU General Public License
 #   along with this program; if not, write to the
 #   Free Software Foundation, Inc.,
 #   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 ########################################################
 #
 # Interface to the konlpy Korean text analyser: we receive text from
 # our parent process and have it segmented by the analyser, then
 # return the results. The analyser startup is very expensive (several
 # seconds), which is why we can't just execute it from the main
 # process.
 #
 import sys
 import cmdtalk
 from konlpy.tag import Okt,Mecab,Komoran
 class Processor(object):
    def __init__(self, proto):
        self.proto = proto
        self.tagsOkt = False
        self.tagsMecab = False
        self.tagsKomoran = False
    def _init_tagger(self, taggername):
        if taggername == "Okt":
            self.tagger = Okt()
            self.tagsOkt = True
        elif taggername == "Mecab":
            self.tagger = Mecab()
            self.tagsMecab = True
        elif taggername == "Komoran":
            self.tagger = Komoran()
            self.tagsKomoran = True
        else:
            raise Exception("Bad tagger name " + taggername)
    def process(self, params):
        if 'data' not in params:
            return {'error':'No data field in parameters'}
        if not (self.tagsOkt or self.tagsMecab or self.tagsKomoran):
            if 'tagger' not in params:
                return {'error':'No "tagger" field in parameters'}
            self._init_tagger(params['tagger']);
        pos = self.tagger.pos(params['data'])
        #proto.log("%s" % pos)
        text = ""
        tags = ""
        for e in pos:
            word = e[0]
            word = word.replace('\t', ' ')
            text += word + "\t"
            tag = e[1]
            if self.tagsOkt:
                pass
            elif self.tagsMecab or self.tagsKomoran:
                tb = tag[0:2]
                if tb[0] == "N":
                    tag = "Noun"
                elif tb == "VV":
                    tag = "Verb"
                elif tb == "VA":
                    tag = "Adjective"
                elif tag == "MAG":
                    tag = "Adverb"
            else:
                pass
            tags += tag + "\t"
        return {'text': text, 'tags': tags}
 proto = cmdtalk.CmdTalk()
 processor = Processor(proto)
 cmdtalk.main(proto, processor)
--- a/src/filters/rclhwp.py
+++ b/src/filters/rclhwp.py
@ -36,20 +36,6 @@ from hwp5.xmlmodel import Hwp5File as xml_Hwp5File
 from hwp5.utils import cached_property
 # This was duplicated from hwp5 hwp5text.py and I don't really
 # understand what it does...
 RESOURCE_PATH_XSL_TEXT = 'xsl/plaintext.xsl'
 class TextTransform(BaseTransform):
    @property
    def transform_hwp5_to_text(self):
        transform_xhwp5 = self.transform_xhwp5_to_text
        return self.make_transform_hwp5(transform_xhwp5)
    @cached_property
    def transform_xhwp5_to_text(self):
        resource_path = RESOURCE_PATH_XSL_TEXT
        return self.make_xsl_transform(resource_path)
 # Associate HTML meta names and hwp summaryinfo values
 def metafields(summaryinfo):
    yield(('Description', summaryinfo.subject + " " +
--- a/src/qtgui/guiutils.cpp
+++ b/src/qtgui/guiutils.cpp
@ -158,7 +158,15 @@ void rwSettings(bool writing)
               "/Recoll/prefs/reslist/collapseDuplicates", Bool, false);
    SETTING_RW(prefs.showResultsAsTable, 
               "/Recoll/prefs/showResultsAsTable", Bool, false);
-    SETTING_RW(prefs.maxhltextmbs, "/Recoll/prefs/preview/maxhltextmbs", Int, 3);
+
    SETTING_RW(prefs.maxhltextkbs, "/Recoll/prefs/preview/maxhltextkbs", Int,
               3000);
    // Compat: if maxhltextkbs is not set but old maxhltextmbs is set use it
    if (!writing && !settings.contains("/Recoll/prefs/preview/maxhltextkbs") &&
        settings.contains("/Recoll/prefs/preview/maxhltextmbs")) {
        prefs.maxhltextkbs = settings.value(
            "/Recoll/prefs/preview/maxhltextmbs").toInt() * 1024;
    }
    SETTING_RW(prefs.previewPlainPre, 
               "/Recoll/prefs/preview/plainPre", Int, PrefsPack::PP_PREWRAP);
--- a/src/qtgui/guiutils.h
+++ b/src/qtgui/guiutils.h
@ -20,6 +20,7 @@
 #include <string>
 #include <list>
 #include <vector>
 #include <set>
 #include <qstring.h>
 #include <qstringlist.h>
@ -46,7 +47,7 @@ class PrefsPack {
    int filterCtlStyle;
    int respagesize{8};
    int historysize{0};
-    int maxhltextmbs;
+    int maxhltextkbs;
    QString reslistfontfamily;
    // Not saved in prefs for now. Computed from qt defaults and used to
    // set main character color for webkit/textbrowser reslist and
@ -154,6 +155,11 @@ class PrefsPack {
    std::string stemlang();
    // MIME types for which we prefer to use stored text from preview
    // rather than extracting the possibly nicer HTML because the
    // extractor is very slow. This is compiled in and there is no UI
    // for now.
    std::set<std::string> preferStoredTextMimes{"application/x-hwp"};
 };
 /** Global preferences record */
--- a/src/qtgui/i18n/recoll_ko.qm
+++ b/src/qtgui/i18n/recoll_ko.qm
--- a/src/qtgui/i18n/recoll_ko.ts
+++ b/src/qtgui/i18n/recoll_ko.ts
--- a/src/qtgui/preview_w.cpp
+++ b/src/qtgui/preview_w.cpp
@ -574,6 +574,90 @@ void Preview::emitWordSelect(QString word)
    emit(wordSelect(word));
 }
 // Display message dialog after load failed
 void Preview::displayLoadError(
    FileInterner::ErrorPossibleCause explain, bool canGetRawText)
 {
    // Note that we can't easily check for a readable file
    // because it's possible that only a region is locked
    // (e.g. on Windows for an ost file the first block is
    // readable even if Outlook is running).
    QString msg;
    switch (explain) {
    case FileInterner::FetchMissing:
        msg = tr("Error loading the document: file missing.");
        break;
    case FileInterner::FetchPerm:
        msg = tr("Error loading the document: no permission.");
        break;
    case FileInterner::FetchNoBackend:
        msg =
            tr("Error loading: backend not configured.");
        break;
    case FileInterner::InternfileOther:
 #ifdef _WIN32
        msg = tr("Error loading the document: "
                 "other handler error<br>"
                 "Maybe the application is locking the file ?");
 #else
        msg = tr("Error loading the document: other handler error.");
 #endif
        break;
    }
    if (canGetRawText) {
        msg += tr("<br>Attempting to display from stored text.");
    }
    QMessageBox::warning(0, "Recoll", msg);
 }
 bool Preview::runLoadThread(LoadThread& lthr, QTimer& tT, QEventLoop& loop,
                            QProgressDialog& progress, bool canGetRawText)
 {
    lthr.start();
    for (int i = 0;;i++) {
        tT.start(1000); 
        loop.exec();
        if (lthr.isFinished())
            break;
        if (progress.wasCanceled()) {
            CancelCheck::instance().setCancel();
        }
        if (i == 1)
            progress.show();
    }
    LOGDEB("loadDocInCurrentTab: after file load: cancel " <<
           CancelCheck::instance().cancelState() << " status " << lthr.status <<
           " text length " << lthr.fdoc.text.length() << "\n");
    if (lthr.status == 0) {
        return true;
    }
    if (CancelCheck::instance().cancelState())
        return false;
    QString explain;
    if (!lthr.missing.empty()) {
        explain = QString::fromUtf8("<br>") +
            tr("Missing helper program: ") +
            QString::fromLocal8Bit(lthr.missing.c_str());
        QMessageBox::warning(0, "Recoll",
                             tr("Can't turn doc into internal "
                                "representation for ") +
                             lthr.fdoc.mimetype.c_str() + explain);
    } else {
        if (progress.wasCanceled()) {
            QMessageBox::warning(0, "Recoll", tr("Canceled"));
        } else {
            progress.reset();
            displayLoadError(lthr.explain, canGetRawText);
        }
    }
    return false;
 }
 /*
  Code for loading a file into an editor window. The operations that
  we call have no provision to indicate progression, and it would be
@ -628,92 +712,41 @@ bool Preview::loadDocInCurrentTab(const Rcl::Doc &idoc, int docnum)
    ////////////////////////////////////////////////////////////////////////
    // Load and convert document 
-    // idoc came out of the index data (main text and some fields missing). 
+    //  - idoc came out of the index data (main text and some fields missing).
-    // fdoc is the complete one what we are going to extract from storage.
+    //  - fdoc is the complete one what we are going to extract from storage.
    // 
    // If the preference to use the stored text is set, we still
    // create the LoadThread object for convenience (using its fdoc
    // field, but don't start it.
    LoadThread lthr(theconfig, idoc, prefs.previewHtml, this);
    connect(&lthr, SIGNAL(finished()), &loop, SLOT(quit()));
-    lthr.start();
+    bool canGetRawText = rcldb && rcldb->storesDocText();
-    for (int i = 0;;i++) {
+    auto it = prefs.preferStoredTextMimes.find(idoc.mimetype);
-        tT.start(1000); 
+    bool preferStoredText = (it != prefs.preferStoredTextMimes.end());
-        loop.exec();
+    bool loadok{false};
-        if (lthr.isFinished())
+
-            break;
+    if (!preferStoredText || !canGetRawText) {
-        if (progress.wasCanceled()) {
+        // Try load from actual document
-            CancelCheck::instance().setCancel();
+        loadok = runLoadThread(lthr, tT, loop, progress, canGetRawText);
        }
        if (i == 1)
            progress.show();
    }
-    LOGDEB("loadDocInCurrentTab: after file load: cancel " <<
+    if (!loadok && canGetRawText) {
-           CancelCheck::instance().cancelState() << " status " << lthr.status <<
+        // Preferring/able to use stored text or extern load failed
-           " text length " << lthr.fdoc.text.length() << "\n");
+        lthr.fdoc = idoc;
        loadok = rcldb->getDocRawText(lthr.fdoc);
        if (!loadok) {
            QMessageBox::warning(0,"Recoll",tr("Could not fetch stored text"));
        }
    }
-    if (CancelCheck::instance().cancelState())
+    if (!loadok) {
        // Everything failed.
        progress.close();
        return false;
    if (lthr.status != 0) {
        bool canGetRawText = rcldb && rcldb->storesDocText();
        QString explain;
        if (!lthr.missing.empty()) {
            explain = QString::fromUtf8("<br>") +
                tr("Missing helper program: ") +
                QString::fromLocal8Bit(lthr.missing.c_str());
            QMessageBox::warning(0, "Recoll",
                                 tr("Can't turn doc into internal "
                                    "representation for ") +
                                 lthr.fdoc.mimetype.c_str() + explain);
        } else {
            if (progress.wasCanceled()) {
                QMessageBox::warning(0, "Recoll", tr("Canceled"));
            } else {
                progress.reset();
                // Note that we can't easily check for a readable file
                // because it's possible that only a region is locked
                // (e.g. on Windows for an ost file the first block is
                // readable even if Outlook is running).
                QString msg;
                switch (lthr.explain) {
                case FileInterner::FetchMissing:
                    msg = tr("Error loading the document: file missing.");
                    break;
                case FileInterner::FetchPerm:
                    msg = tr("Error loading the document: no permission.");
                    break;
                case FileInterner::FetchNoBackend:
                    msg =
                        tr("Error loading: backend not configured.");
                    break;
                case FileInterner::InternfileOther:
 #ifdef _WIN32
                    msg = tr("Error loading the document: "
                             "other handler error<br>"
                             "Maybe the application is locking the file ?");
 #else
                    msg = tr("Error loading the document: other handler error.");
 #endif
                    break;
                }
                if (canGetRawText) {
                    msg += tr("<br>Attempting to display from stored text.");
                }
                QMessageBox::warning(0, "Recoll", msg);
            }
        }
        if (canGetRawText) {
            lthr.fdoc = idoc;
            if (!rcldb->getDocRawText(lthr.fdoc)) {
                QMessageBox::warning(0, "Recoll",
                                     tr("Could not fetch stored text"));
                progress.close();
                return false;
            }
        } else {
            progress.close();
        }
    }
    // Reset config just in case.
    theconfig->setKeyDir("");
@ -722,8 +755,8 @@ bool Preview::loadDocInCurrentTab(const Rcl::Doc &idoc, int docnum)
    // We don't do the highlighting for very big texts: too long. We
    // should at least do special char escaping, in case a '&' or '<'
    // somehow slipped through previous processing.
-    bool highlightTerms = lthr.fdoc.text.length() < 
+    bool highlightTerms = int(lthr.fdoc.text.length()) < 
-        (unsigned long)prefs.maxhltextmbs * 1024 * 1024;
+        prefs.maxhltextkbs * 1024;
    // Final text is produced in chunks so that we can display the top
    // while still inserting at bottom
@ -752,7 +785,6 @@ bool Preview::loadDocInCurrentTab(const Rcl::Doc &idoc, int docnum)
    QStringList qrichlst;
    editor->m_plaintorich->set_activatelinks(prefs.previewActiveLinks);
 #if 1
    if (highlightTerms) {
        progress.setLabelText(tr("Creating preview text"));
        qApp->processEvents();
@ -815,17 +847,6 @@ bool Preview::loadDocInCurrentTab(const Rcl::Doc &idoc, int docnum)
            }
        }
    }
 #else // For testing qtextedit bugs...
    highlightTerms = true;
    const char *textlist[] =
        {
            "Du plain text avec un\n <termtag>termtag</termtag> fin de ligne:",
            "texte apres le tag\n",
        };
    const int listl = sizeof(textlist) / sizeof(char*);
    for (int i = 0 ; i < listl ; i++)
        qrichlst.push_back(QString::fromUtf8(textlist[i]));
 #endif
    ///////////////////////////////////////////////////////////
--- a/src/qtgui/preview_w.h
+++ b/src/qtgui/preview_w.h
@ -44,9 +44,11 @@
 #include "rcldb.h"
 #include "plaintorich.h"
 #include "rclmain_w.h"
 #include "internfile.h"
 #include "ui_preview.h"
 class QTabWidget;
 class QLabel;
 class QPushButton;
@ -55,6 +57,10 @@ class Preview;
 class PlainToRichQtPreview;
 class QUrl;
 class RclMain;
 class LoadThread;
 class QTimer;
 class QEventLoop;
 class QProgressDialog;
 class PreviewTextEdit : public PREVIEW_PARENTCLASS {
    Q_OBJECT;
@ -185,6 +191,10 @@ private:
    virtual PreviewTextEdit *currentEditor();
    virtual PreviewTextEdit *addEditorTab();
    virtual bool loadDocInCurrentTab(const Rcl::Doc& idoc, int dnm);
    void displayLoadError(
        FileInterner::ErrorPossibleCause explain, bool canGetRawText);
    bool runLoadThread(LoadThread& lthr, QTimer& tT, QEventLoop& loop,
                       QProgressDialog& progress, bool canGetRawText);
 };
 #endif /* _PREVIEW_W_H_INCLUDED_ */
--- a/src/qtgui/recoll.pro.in
+++ b/src/qtgui/recoll.pro.in
@ -168,7 +168,7 @@ i18n/recoll_zh_CN.ts \
 i18n/recoll_fr.ts \
 i18n/recoll_xx.ts \
 i18n/recoll_cs.ts \
-i18n/recoll_kr.ts \
+i18n/recoll_ko.ts \
 i18n/recoll_el.ts \
 i18n/recoll_tr.ts
--- a/src/qtgui/uiprefs.ui
+++ b/src/qtgui/uiprefs.ui
--- a/src/qtgui/uiprefs_w.cpp
+++ b/src/qtgui/uiprefs_w.cpp
@ -112,7 +112,7 @@ void UIPrefsDialog::setFromPrefs()
    pageLenSB->setValue(prefs.respagesize);
    maxHistSizeSB->setValue(prefs.historysize);
    collapseDupsCB->setChecked(prefs.collapseDuplicates);
-    maxHLTSB->setValue(prefs.maxhltextmbs);
+    maxHLTSB->setValue(prefs.maxhltextkbs);
    if (prefs.ssearchTypSav) {
        ssearchTypCMB->setCurrentIndex(4);
@ -304,7 +304,7 @@ void UIPrefsDialog::accept()
    prefs.respagesize = pageLenSB->value();
    prefs.historysize = maxHistSizeSB->value();
    prefs.collapseDuplicates = collapseDupsCB->isChecked();
-    prefs.maxhltextmbs = maxHLTSB->value();
+    prefs.maxhltextkbs = maxHLTSB->value();
    prefs.qtermstyle = qtermStyleLE->text();
    prefs.abssep = abssepLE->text();
--- a/src/utils/cmdtalk.cpp
+++ b/src/utils/cmdtalk.cpp
@ -0,0 +1,243 @@
 /* Copyright (C) 2016 J.F.Dockes 
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU Lesser General Public License as published by
 *   the Free Software Foundation; either version 2.1 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU Lesser General Public License for more details.
 *
 *   You should have received a copy of the GNU Lesser General Public License
 *   along with this program; if not, write to the
 *   Free Software Foundation, Inc.,
 *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */
 #include "cmdtalk.h"
 #include <stdio.h>
 #include <iostream>
 #include <sstream>
 #include <mutex>
 #include "smallut.h"
 #include "execmd.h"
 #ifdef MDU_INCLUDE_LOG
 #include MDU_INCLUDE_LOG
 #else
 #include "log.h"
 #endif
 using namespace std;
 class TimeoutExcept {};
 class Canceler : public ExecCmdAdvise {
 public:
    Canceler(int tmsecs) 
        : m_timeosecs(tmsecs) {}
    virtual void newData(int cnt) {
        if (m_starttime && (time(0) - m_starttime) > m_timeosecs) {
            throw TimeoutExcept();
        }
    }
    void reset() {
        m_starttime = time(0);
    }
    int m_timeosecs;
    time_t m_starttime{0};
 };
 class CmdTalk::Internal {
 public:
    Internal(int timeosecs)
        : m_cancel(timeosecs) {}
    ~Internal() {
        delete cmd;
    }
    bool readDataElement(string& name, string &data);
    bool talk(const pair<string, string>& arg0,
              const unordered_map<string, string>& args,
              unordered_map<string, string>& rep);
    ExecCmd *cmd{0};
    Canceler m_cancel;
    std::mutex mmutex;
 };
 CmdTalk::CmdTalk(int timeosecs)
 {
    m = new Internal(timeosecs);
 }
 CmdTalk::~CmdTalk()
 {
    delete m;
 }
 bool CmdTalk::startCmd(const string& cmdname,
                       const vector<string>& args,
                       const vector<string>& env,
                       const vector<string>& path)
 {
    LOGDEB("CmdTalk::startCmd\n");
    delete m->cmd;
    m->cmd = new ExecCmd;
    m->cmd->setAdvise(&m->m_cancel);
    for (const auto& it : env) {
        m->cmd->putenv(it);
    }
    string acmdname(cmdname);
    if (!path.empty()) {
        string colonpath;
        for (const auto& it: path) {
            colonpath += it + ":";
        }
        if (!colonpath.empty()) {
            colonpath.erase(colonpath.size()-1);
        }
        LOGDEB("CmdTalk::startCmd: PATH: [" << colonpath << "]\n");
        ExecCmd::which(cmdname, acmdname, colonpath.c_str());
    }
    if (m->cmd->startExec(acmdname, args, 1, 1) < 0) {
        return false;
    }
    return true;
 }
 // Messages are made of data elements. Each element is like:
 // name: len\ndata
 // An empty line signals the end of the message, so the whole thing
 // would look like:
 // Name1: Len1\nData1Name2: Len2\nData2\n
 bool CmdTalk::Internal::readDataElement(string& name, string &data)
 {
    string ibuf;
    m_cancel.reset();
    try {
        // Read name and length
        if (cmd->getline(ibuf) <= 0) {
            LOGERR("CmdTalk: getline error\n");
            return false;
        }
    } catch (TimeoutExcept) {
        LOGINF("CmdTalk:readDataElement: fatal timeout (" <<
               m_cancel.m_timeosecs << " S)\n");
        return false;
    }
    LOGDEB1("CmdTalk:rde: line [" << ibuf << "]\n");
    // Empty line (end of message) ?
    if (!ibuf.compare("\n")) {
        LOGDEB1("CmdTalk: Got empty line\n");
        return true;
    }
    // We're expecting something like Name: len\n
    vector<string> tokens;
    stringToTokens(ibuf, tokens);
    if (tokens.size() != 2) {
        LOGERR("CmdTalk: bad line in filter output: [" << ibuf << "]\n");
        return false;
    }
    vector<string>::iterator it = tokens.begin();
    name = *it++;
    string& slen = *it;
    int len;
    if (sscanf(slen.c_str(), "%d", &len) != 1) {
        LOGERR("CmdTalk: bad line in filter output: [" << ibuf << "]\n");
        return false;
    }
    // Read element data
    data.erase();
    if (len > 0 && cmd->receive(data, len) != len) {
        LOGERR("CmdTalk: expected " << len << " bytes of data, got " <<
               data.length() << "\n");
        return false;
    }
    LOGDEB1("CmdTalk:rde: got: name [" << name << "] len " << len <<"value ["<<
            (data.size() > 100 ? (data.substr(0, 100) + " ...") : data)<< endl);
    return true;
 }
 bool CmdTalk::Internal::talk(const pair<string, string>& arg0,
                             const unordered_map<string, string>& args,
                             unordered_map<string, string>& rep)
 {
    std::unique_lock<std::mutex> lock(mmutex);
    if (cmd->getChildPid() <= 0) {
        LOGERR("CmdTalk::talk: no process\n");
        return false;
    }
    ostringstream obuf;
    if (!arg0.first.empty()) {
        obuf << arg0.first << ": " << arg0.second.size() << "\n" << arg0.second;
    }
    for (const auto& it : args) {
        obuf << it.first << ": " << it.second.size() << "\n" << it.second;
    }
    obuf << "\n";
    if (cmd->send(obuf.str()) < 0) {
        cmd->zapChild();
        LOGERR("CmdTalk: send error\n");
        return false;
    }
    // Read answer (multiple elements)
    LOGDEB1("CmdTalk: reading answer\n");
    for (;;) {
        string name, data;
        if (!readDataElement(name, data)) {
            cmd->zapChild();
            return false;
        }
        if (name.empty()) {
            break;
        }
        trimstring(name, ":");
        LOGDEB1("CmdTalk: got [" << name << "] -> [" << data << "]\n");
        rep[name] = data;
    }
    if (rep.find("cmdtalkstatus") != rep.end()) {
        return false;
    } else {
        return true;
    }
 }
 bool CmdTalk::running()
 {
    return m && m->cmd && m->cmd->getChildPid() > 0;
 }
 bool CmdTalk::talk(const unordered_map<string, string>& args,
                   unordered_map<string, string>& rep)
 {
    return m->talk({"",""}, args, rep);
 }
 bool CmdTalk::callproc(
    const string& proc,
    const unordered_map<std::string, std::string>& args,
    unordered_map<std::string, std::string>& rep)
 {
    return m->talk({"cmdtalk:proc", proc}, args, rep);
 }
--- a/src/utils/cmdtalk.h
+++ b/src/utils/cmdtalk.h
@ -0,0 +1,109 @@
 /* Copyright (C) 2016 J.F.Dockes
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU Lesser General Public License as published by
 *   the Free Software Foundation; either version 2.1 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU Lesser General Public License for more details.
 *
 *   You should have received a copy of the GNU Lesser General Public License
 *   along with this program; if not, write to the
 *   Free Software Foundation, Inc.,
 *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */
 #ifndef _CMDTALK_H_INCLUDED_
 #define _CMDTALK_H_INCLUDED_
 /** 
 * Execute commands and exchange messages with it.
 *
 * A simple stream protocol is used for the dialog. HTTP or some kind
 * of full-blown RPC could have been used, but there was also good
 * reason to keep it simple (yet powerful), given the limited context
 * of dialog through a pipe.
 *
 * The data is exchanged in TLV fashion, in a way that should be
 * usable in most script languages. The basic unit of data has one line 
 * with a data type and a count (both ASCII), followed by the data. A
 * 'message' is made of one or several units or tags and ends with one empty
 * line. 
 * 
 * Example:(the message begins before 'Filename' and has 'Filename' and 
 * 'Ipath' tags):
 * 
 Filename: 24
 /my/home/mail/somefolderIpath: 2
 22
 <Message ends here: because of the empty line after '22'
 * 
 * Example answer, with 'Mimetype' and 'Data' tags
 * 
 Mimetype: 10
 text/plainData: 10
 0123456789
 <Message ends here because of empty line
 *        
 * This format is both extensible and reasonably easy to parse. 
 * While it's more fitted for python or perl on the script side, it
 * should even be sort of usable from the shell (e.g.: use dd to read
 * the counted data). Most alternatives would need data encoding in
 * some cases.
 *
 * Higher level dialog:
 * The C++ program is the master and sends request messages to the script. 
 * Both sides of the communication should be prepared to receive and discard 
 * unknown tags.
 */
 #include <string>
 #include <vector>
 #include <unordered_map>
 class CmdTalk {
 public:
    CmdTalk(int timeosecs);
    virtual ~CmdTalk();
    // @param env each entry should be of the form name=value. They
    //   augment the subprocess environnement.
    // @param path replaces the PATH variable when looking for the command.
    // 
    // Note that cmdtalk.py:main() method is a test routine which
    // expects data pairs on the command line. If actual parameters
    // need to be passed, it can't be used by the processor.
    virtual bool startCmd(const std::string& cmdname,
 			  const std::vector<std::string>& args =
 			  std::vector<std::string>(),
 			  const std::vector<std::string>& env =
 			  std::vector<std::string>(),
 			  const std::vector<std::string>& path =
 			  std::vector<std::string>()
 	);
    virtual bool running();
    // Single exchange: send and receive data.
    virtual bool talk(const std::unordered_map<std::string, std::string>& args,
 		      std::unordered_map<std::string, std::string>& rep);
    // Specialized version with special argument used by dispatcher to call
    // designated method
    virtual bool callproc(
 	const std::string& proc,
 	const std::unordered_map<std::string, std::string>& args,
 	std::unordered_map<std::string, std::string>& rep);
    CmdTalk(const CmdTalk&) = delete;
    CmdTalk &operator=(const CmdTalk &) = delete;
 private:
    class Internal;
    Internal *m{0};
 };
 #endif /* _CMDTALK_H_INCLUDED_ */