diff --git a/src/common/textsplit.cpp b/src/common/textsplit.cpp
index bb61696e..59bbfbbe 100644
--- a/src/common/textsplit.cpp
+++ b/src/common/textsplit.cpp
@@ -44,8 +44,10 @@
 // ngrams
 #undef KATAKANA_AS_WORDS
 
-// Same for Korean syllabic, and same problem, not used.
-#undef HANGUL_AS_WORDS
+// Same for Korean syllabic, and same problem. However we have a
+// runtime option to use an external text analyser for hangul, so this
+// is defined at compile time.
+#define HANGUL_AS_WORDS
 
 using namespace std;
 
@@ -246,7 +248,6 @@ static inline int whatcc(unsigned int c, char *asciirep = nullptr)
 #define UNICODE_IS_KATAKANA(p) false
 #endif
 
-#define HANGUL_AS_WORDS
 #ifdef HANGUL_AS_WORDS
 #define UNICODE_IS_HANGUL(p) (                 \
         ((p) >= 0x1100 && (p) <= 0x11FF) ||    \
@@ -290,6 +291,7 @@ bool          TextSplit::o_noNumbers{false};
 bool          TextSplit::o_deHyphenate{false};
 int           TextSplit::o_maxWordLength{40};
 static const int o_CJKMaxNgramLen{5};
+bool o_exthangultagger{false};
 
 void TextSplit::staticConfInit(RclConfig *config)
 {
@@ -324,7 +326,13 @@ void TextSplit::staticConfInit(RclConfig *config)
             charclasses[int('\\')] = SPACE;
         }
     }
-    koStaticConfInit(config);
+
+    string kotagger;
+    config->getConfParam("hangultagger", kotagger);
+    if (!kotagger.empty()) {
+        o_exthangultagger = true;
+        koStaticConfInit(config, kotagger);
+    }
 }
 
 // Final term checkpoint: do some checking (the kind which is simpler
@@ -627,7 +635,11 @@ bool TextSplit::text_to_words(const string &in)
         if (UNICODE_IS_KATAKANA(c)) {
             csc = CSC_KATAKANA;
         } else if (UNICODE_IS_HANGUL(c)) {
-            csc = CSC_HANGUL;
+            if (o_exthangultagger) {
+                csc = CSC_HANGUL;
+            } else {
+                csc = CSC_CJK;
+            }
         } else if (UNICODE_IS_CJK(c)) {
             csc = CSC_CJK;
         } else {
@@ -635,15 +647,13 @@ bool TextSplit::text_to_words(const string &in)
         }
 
         if (o_processCJK && (csc == CSC_CJK || csc == CSC_HANGUL)) {
-            // CJK character hit. Hangul processing may be special or
-            // not depending on how we were built.
+            // CJK character hit. Hangul processing may be special.
 
             // Do like at EOF with the current non-cjk data.
             if (m_wordLen || m_span.length()) {
                 if (!doemit(true, it.getBpos()))
                     return false;
             }
-
             // Hand off situation to the appropriate routine.
             if (csc == CSC_HANGUL) {
                 if (!ko_to_words(&it, &c)) {
diff --git a/src/common/textsplit.h b/src/common/textsplit.h
index 8f8f19d3..3cf7adf3 100644
--- a/src/common/textsplit.h
+++ b/src/common/textsplit.h
@@ -54,7 +54,7 @@ public:
     /** Call at program initialization to read non default values from the 
         configuration */
     static void staticConfInit(RclConfig *config);
-    static void koStaticConfInit(RclConfig *config);
+    static void koStaticConfInit(RclConfig *config, const std::string& tagger);
     
     /** Split text, emit words and positions. */
     virtual bool text_to_words(const std::string &in);
diff --git a/src/common/textsplitko.cpp b/src/common/textsplitko.cpp
index e4c624b4..7d26e0a6 100644
--- a/src/common/textsplitko.cpp
+++ b/src/common/textsplitko.cpp
@@ -15,6 +15,13 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+// Specialized Korean text splitter using konlpy running in a Python
+// subprocess. konlpy can use several different backends. We support
+// Okt (Twitter) and Mecab at this point. Unfortunately the different
+// backends have different POS TAG names, so that things are not
+// completly transparent when using another (need to translate the tag
+// names in the Python program).
+
 #include "autoconfig.h"
 
 #include <iostream>
@@ -33,16 +40,27 @@
 
 using namespace std;
 
+// Separator char used in words and tags lists.
+static const string sepchars("\t");
+
 static CmdTalk *o_talker;
 static bool o_starterror{false};
 static string o_cmdpath;
 std::mutex o_mutex;
+static string o_taggername{"Okt"};
 
-void TextSplit::koStaticConfInit(RclConfig *config)
+void TextSplit::koStaticConfInit(RclConfig *config, const string& tagger)
 {
     o_cmdpath = config->findFilter("kosplitter.py");
+    if (tagger == "Okt" && tagger == "Mecab") {
+        o_taggername = tagger;
+    } else {
+        LOGERR("TextSplit::koStaticConfInit: unknown tagger [" << tagger <<
+               "], using Okt\n");
+    }
 }
 
+// Start the Python subprocess
 static bool initCmd()
 {
     if (o_starterror) {
@@ -68,8 +86,6 @@ static bool initCmd()
     return true;
 }
 
-static const string sepchars("\t");
-
 bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
 {
     std::unique_lock<std::mutex> mylock(o_mutex);
@@ -78,18 +94,28 @@ bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
             return false;
         }
     }
+
     LOGDEB1("k_to_words: m_wordpos " << m_wordpos << "\n");
     Utf8Iter &it = *itp;
     unsigned int c = 0;
+
     unordered_map<string, string> args;
+
     args.insert(pair<string,string>{"data", string()});
     string& inputdata{args.begin()->second};
-    string::size_type orgbytepos = it.getBpos();
+
+    // We send the tagger name every time but it's only used the first
+    // one: can't change it after init. We could avoid sending it
+    // every time, but I don't think that the performance hit is
+    // significant
+    args.insert(pair<string,string>{"tagger", o_taggername});
     
-    // Gather all Korean characters and send the text to the analyser
+    // Walk the Korean characters section and send the text to the
+    // analyser
+    string::size_type orgbytepos = it.getBpos();
     for (; !it.eof(); it++) {
         c = *it;
-        if (!isHANGUL(c) && !(isascii(c) && (isspace(c) || ispunct(c)))) {
+        if (!isHANGUL(c) && !(isspace(c) || ispunct(c))) {
             // Done with Korean stretch, process and go back to main routine
             //std::cerr << "Broke on char " << int(c) << endl;
             break;
@@ -97,10 +123,6 @@ bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
             it.appendchartostring(inputdata);
         }
     }
-    // Need to convert white text spans to single space otherwise the
-    // byte offsets will be wrong
-    
-    string::size_type textsize = inputdata.size();
     LOGDEB1("TextSplit::k_to_words: sending out " << inputdata.size() <<
             " bytes " << inputdata << endl);
     unordered_map<string,string> result;
@@ -161,11 +183,11 @@ bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
     }
 
 #if DO_CHECK_THINGS
-    int sizediff = textsize - (bytepos - orgbytepos);
+    int sizediff = inputdata.size() - (bytepos - orgbytepos);
     if (sizediff < 0)
         sizediff = -sizediff;
     if (sizediff > 1) {
-        LOGERR("ORIGINAL TEXT SIZE: " << textsize <<
+        LOGERR("ORIGINAL TEXT SIZE: " << inputdata.size() <<
                " FINAL BYTE POS " << bytepos - orgbytepos <<
                " TEXT [" << inputdata << "]\n");
     }
diff --git a/src/filters/kosplitter.py b/src/filters/kosplitter.py
index 4037acb5..c586cfff 100755
--- a/src/filters/kosplitter.py
+++ b/src/filters/kosplitter.py
@@ -28,17 +28,32 @@
 import sys
 import cmdtalk
 
-from konlpy.tag import Okt,Kkma
+from konlpy.tag import Okt,Mecab
 
 class Processor(object):
     def __init__(self, proto):
         self.proto = proto
-        self.tagger = Okt()
-        #self.tagger = Kkma()
+        self.tagsOkt = False
+        self.tagsMecab = False
 
+    def _init_tagger(self, taggername):
+        if taggername == "Okt":
+            self.tagger = Okt()
+            self.tagsOkt = True
+        elif taggername == "Mecab":
+            self.tagger = Mecab()
+            self.tagsMecab = True
+        else:
+            raise Exception("Bad tagger name " + taggername)
+        
     def process(self, params):
         if 'data' not in params:
             return {'error':'No data field in parameters'}
+        if not (self.tagsOkt or self.tagsMecab):
+            if 'tagger' not in params:
+                return {'error':'No "tagger" field in parameters'}
+            self._init_tagger(params['tagger']);
+                              
         pos = self.tagger.pos(params['data'])
         #proto.log("%s" % pos)
         text = ""
@@ -47,10 +62,25 @@ class Processor(object):
             word = e[0]
             word = word.replace('\t', ' ')
             text += word + "\t"
-            tags += e[1] + "\t"
+            tag = e[1]
+            if self.tagsOkt:
+                pass
+            elif self.tagsMecab:
+                tb = tag[0:2]
+                if tb[0] == "N":
+                    tag = "Noun"
+                elif tb == "VV":
+                    tag = "Verb"
+                elif tb == "VA":
+                    tag = "Adjective"
+                elif tag == "MAG":
+                    tag = "Adverb"
+            else:
+                pass
+            tags += tag + "\t"
         return {'text': text, 'tags': tags}
 
+
 proto = cmdtalk.CmdTalk()
 processor = Processor(proto)
 cmdtalk.main(proto, processor)
-
diff --git a/src/utils/cmdtalk.h b/src/utils/cmdtalk.h
index b7a55cb1..9f937b97 100644
--- a/src/utils/cmdtalk.h
+++ b/src/utils/cmdtalk.h
@@ -74,6 +74,10 @@ class CmdTalk {
     // @param env each entry should be of the form name=value. They
     //   augment the subprocess environnement.
     // @param path replaces the PATH variable when looking for the command.
+    // 
+    // Note that cmdtalk.py:main() method is a test routine which
+    // expects data pairs on the command line. If actual parameters
+    // need to be passed, it can't be used by the processor.
     virtual bool startCmd(const std::string& cmdname,
 			  const std::vector<std::string>& args =
 			  std::vector<std::string>(),