korean splitter: only break korean stretch on non-korean alphabetic (e.g. not numbers or punctuation)
This commit is contained in:
parent
023bdc055e
commit
97e89c408a
@ -124,9 +124,9 @@ bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
|
|||||||
string::size_type orgbytepos = it.getBpos();
|
string::size_type orgbytepos = it.getBpos();
|
||||||
for (; !it.eof(); it++) {
|
for (; !it.eof(); it++) {
|
||||||
c = *it;
|
c = *it;
|
||||||
if (!isHANGUL(c) && !(isspace(c) || ispunct(c))) {
|
if (!isHANGUL(c) && isalpha(c)) {
|
||||||
// Done with Korean stretch, process and go back to main routine
|
// Done with Korean stretch, process and go back to main routine
|
||||||
//std::cerr << "Broke on char " << int(c) << endl;
|
std::cerr << "Broke on char " << (std::string)it << endl;
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
it.appendchartostring(inputdata);
|
it.appendchartostring(inputdata);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user