korean: reactivate option to generate both noun,jx and noun+jx

This commit is contained in:
Jean-Francois Dockes 2020-05-19 09:23:03 +02:00
parent 73f2836317
commit ea2db676ed

View File

@ -218,10 +218,9 @@ bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
" bytepos " << bytepos << " word from text: " << " bytepos " << bytepos << " word from text: " <<
inputdata.substr(bytepos, word.size()) << endl); inputdata.substr(bytepos, word.size()) << endl);
bool isNoun = (tags[i] == "Noun"); bool isNoun = (tags[i] == "Noun");
#if 0
// When Noun followed by JX, emit both Noun and Noun+JX at the // When Noun followed by JX, emit both Noun and Noun+JX at the
// same pos Experimental, it seems that this is sometimes // same pos. This is because the compound term may actually
// problematic, so turned off for now. // mean something else, if it's a phonetic transcription.
if (isNoun) { if (isNoun) {
lastNoun = word; lastNoun = word;
lastNounWordPos = m_wordpos; lastNounWordPos = m_wordpos;
@ -235,7 +234,6 @@ bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
} }
lastNoun.clear(); lastNoun.clear();
} }
#endif
// 11/05/2020 For now index everything until more precise // 11/05/2020 For now index everything until more precise
// verification of what should be pruned // verification of what should be pruned
if (true || (isNoun || tags[i] == "Verb" || if (true || (isNoun || tags[i] == "Verb" ||