Make max words in span a parameter (it was hard-coded at 6 which remains the default value)

This commit is contained in:
Jean-Francois Dockes 2022-06-01 09:37:51 +02:00
parent e87d7f0683
commit a6a2abd251
2 changed files with 6 additions and 2 deletions

View File

@ -81,6 +81,8 @@ unsigned int TextSplit::o_CJKNgramLen{2};
bool TextSplit::o_noNumbers{false};
bool TextSplit::o_deHyphenate{false};
int TextSplit::o_maxWordLength{40};
int TextSplit::o_maxWordsInSpan{6};
static const int o_CJKMaxNgramLen{5};
bool o_exthangultagger{false};
@ -90,6 +92,7 @@ static char underscoreatend = '_';
void TextSplit::staticConfInit(RclConfig *config)
{
config->getConfParam("maxtermlength", &o_maxWordLength);
config->getConfParam("maxwordsinspan", &o_maxWordsInSpan);
bool bvalue{false};
if (config->getConfParam("nocjk", &bvalue) && bvalue == true) {
@ -505,7 +508,7 @@ bool TextSplit::words_from_span(size_t bp)
inline bool TextSplit::doemit(bool spanerase, size_t _bp)
{
int bp = int(_bp);
LOGDEB2("TextSplit::doemit: sper " << spanerase << " bp " << bp <<
LOGERR("TextSplit::doemit: sper " << spanerase << " bp " << bp <<
" spp " << m_spanpos << " spanwords " << m_words_in_span.size() <<
" wS " << m_wordStart << " wL " << m_wordLen << " inn " <<
m_inNumber << " span [" << m_span << "]\n");
@ -513,7 +516,7 @@ inline bool TextSplit::doemit(bool spanerase, size_t _bp)
if (m_wordLen) {
// We have a current word. Remember it
if (m_words_in_span.size() >= 6) {
if (int(m_words_in_span.size()) >= o_maxWordsInSpan) {
// Limit max span word count
spanerase = true;
}

View File

@ -162,6 +162,7 @@ private:
static bool o_deHyphenate; // false
static unsigned int o_CJKNgramLen; // 2
static int o_maxWordLength; // 40
static int o_maxWordsInSpan; // 6
Flags m_flags;