From 00eb803f5d2eb835856684e4eb63197730c092f5 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Fri, 5 Jul 2019 17:57:00 +0200 Subject: [PATCH] Do not process hangul as words, but as ngrams. Same issues as with Katakana: word separation too hard --- src/common/textsplit.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/textsplit.cpp b/src/common/textsplit.cpp index f018348a..c13eff70 100644 --- a/src/common/textsplit.cpp +++ b/src/common/textsplit.cpp @@ -44,8 +44,8 @@ // ngrams #undef KATAKANA_AS_WORDS -// Same for Korean syllabic -#define HANGUL_AS_WORDS +// Same for Korean syllabic, and same problem, not used. +#undef HANGUL_AS_WORDS using namespace std;