korean: reactivate option to generate both noun,jx and noun+jx
This commit is contained in:
parent
73f2836317
commit
ea2db676ed
@ -218,10 +218,9 @@ bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
|
|||||||
" bytepos " << bytepos << " word from text: " <<
|
" bytepos " << bytepos << " word from text: " <<
|
||||||
inputdata.substr(bytepos, word.size()) << endl);
|
inputdata.substr(bytepos, word.size()) << endl);
|
||||||
bool isNoun = (tags[i] == "Noun");
|
bool isNoun = (tags[i] == "Noun");
|
||||||
#if 0
|
|
||||||
// When Noun followed by JX, emit both Noun and Noun+JX at the
|
// When Noun followed by JX, emit both Noun and Noun+JX at the
|
||||||
// same pos Experimental, it seems that this is sometimes
|
// same pos. This is because the compound term may actually
|
||||||
// problematic, so turned off for now.
|
// mean something else, if it's a phonetic transcription.
|
||||||
if (isNoun) {
|
if (isNoun) {
|
||||||
lastNoun = word;
|
lastNoun = word;
|
||||||
lastNounWordPos = m_wordpos;
|
lastNounWordPos = m_wordpos;
|
||||||
@ -229,13 +228,12 @@ bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
|
|||||||
} else {
|
} else {
|
||||||
if (tags[i] == "JX" && !lastNoun.empty()) {
|
if (tags[i] == "JX" && !lastNoun.empty()) {
|
||||||
if (!takeword(lastNoun+word, lastNounWordPos, lastNounBytePos,
|
if (!takeword(lastNoun+word, lastNounWordPos, lastNounBytePos,
|
||||||
lastNounBytePos + word.size())) {
|
lastNounBytePos + word.size())) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
lastNoun.clear();
|
lastNoun.clear();
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
// 11/05/2020 For now index everything until more precise
|
// 11/05/2020 For now index everything until more precise
|
||||||
// verification of what should be pruned
|
// verification of what should be pruned
|
||||||
if (true || (isNoun || tags[i] == "Verb" ||
|
if (true || (isNoun || tags[i] == "Verb" ||
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user