diff --git a/src/common/textsplitko.cpp b/src/common/textsplitko.cpp index 9721b8ee..f3257b6d 100644 --- a/src/common/textsplitko.cpp +++ b/src/common/textsplitko.cpp @@ -49,6 +49,10 @@ static string o_cmdpath; std::mutex o_mutex; static string o_taggername{"Okt"}; +// The Python/Java splitter is leaking memory. We restart it from time to time +static uint64_t restartcount; +static uint64_t restartthreshold = 5 * 1000 * 1000; + void TextSplit::koStaticConfInit(RclConfig *config, const string& tagger) { o_cmdpath = config->findFilter("kosplitter.py"); @@ -68,7 +72,13 @@ static bool initCmd() return false; } if (o_talker) { - return true; + if (restartcount > restartthreshold) { + delete o_talker; + o_talker = nullptr; + restartcount = 0; + } else { + return true; + } } if (o_cmdpath.empty()) { return false; @@ -89,10 +99,9 @@ static bool initCmd() bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp) { std::unique_lock mylock(o_mutex); + initCmd(); if (nullptr == o_talker) { - if (!initCmd()) { - return false; - } + return false; } LOGDEB1("k_to_words: m_wordpos " << m_wordpos << "\n"); @@ -125,6 +134,7 @@ bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp) } LOGDEB1("TextSplit::k_to_words: sending out " << inputdata.size() << " bytes " << inputdata << endl); + restartcount += inputdata.size(); unordered_map result; if (!o_talker->talk(args, result)) { LOGERR("Python splitter for Korean failed\n"); diff --git a/src/filters/rclhwp.py b/src/filters/rclhwp.py index 973e486a..40f6ae13 100755 --- a/src/filters/rclhwp.py +++ b/src/filters/rclhwp.py @@ -36,20 +36,6 @@ from hwp5.xmlmodel import Hwp5File as xml_Hwp5File from hwp5.utils import cached_property -# This was duplicated from hwp5 hwp5text.py and I don't really -# understand what it does... -RESOURCE_PATH_XSL_TEXT = 'xsl/plaintext.xsl' -class TextTransform(BaseTransform): - @property - def transform_hwp5_to_text(self): - transform_xhwp5 = self.transform_xhwp5_to_text - return self.make_transform_hwp5(transform_xhwp5) - @cached_property - def transform_xhwp5_to_text(self): - resource_path = RESOURCE_PATH_XSL_TEXT - return self.make_xsl_transform(resource_path) - - # Associate HTML meta names and hwp summaryinfo values def metafields(summaryinfo): yield(('Description', summaryinfo.subject + " " +