korean splitter: restart the python/java splitter from time to time because it leaks memory
This commit is contained in:
parent
a323472876
commit
207bfec93e
@ -49,6 +49,10 @@ static string o_cmdpath;
|
|||||||
std::mutex o_mutex;
|
std::mutex o_mutex;
|
||||||
static string o_taggername{"Okt"};
|
static string o_taggername{"Okt"};
|
||||||
|
|
||||||
|
// The Python/Java splitter is leaking memory. We restart it from time to time
|
||||||
|
static uint64_t restartcount;
|
||||||
|
static uint64_t restartthreshold = 5 * 1000 * 1000;
|
||||||
|
|
||||||
void TextSplit::koStaticConfInit(RclConfig *config, const string& tagger)
|
void TextSplit::koStaticConfInit(RclConfig *config, const string& tagger)
|
||||||
{
|
{
|
||||||
o_cmdpath = config->findFilter("kosplitter.py");
|
o_cmdpath = config->findFilter("kosplitter.py");
|
||||||
@ -68,7 +72,13 @@ static bool initCmd()
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (o_talker) {
|
if (o_talker) {
|
||||||
return true;
|
if (restartcount > restartthreshold) {
|
||||||
|
delete o_talker;
|
||||||
|
o_talker = nullptr;
|
||||||
|
restartcount = 0;
|
||||||
|
} else {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (o_cmdpath.empty()) {
|
if (o_cmdpath.empty()) {
|
||||||
return false;
|
return false;
|
||||||
@ -89,10 +99,9 @@ static bool initCmd()
|
|||||||
bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
|
bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
|
||||||
{
|
{
|
||||||
std::unique_lock<std::mutex> mylock(o_mutex);
|
std::unique_lock<std::mutex> mylock(o_mutex);
|
||||||
|
initCmd();
|
||||||
if (nullptr == o_talker) {
|
if (nullptr == o_talker) {
|
||||||
if (!initCmd()) {
|
return false;
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
LOGDEB1("k_to_words: m_wordpos " << m_wordpos << "\n");
|
LOGDEB1("k_to_words: m_wordpos " << m_wordpos << "\n");
|
||||||
@ -125,6 +134,7 @@ bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
|
|||||||
}
|
}
|
||||||
LOGDEB1("TextSplit::k_to_words: sending out " << inputdata.size() <<
|
LOGDEB1("TextSplit::k_to_words: sending out " << inputdata.size() <<
|
||||||
" bytes " << inputdata << endl);
|
" bytes " << inputdata << endl);
|
||||||
|
restartcount += inputdata.size();
|
||||||
unordered_map<string,string> result;
|
unordered_map<string,string> result;
|
||||||
if (!o_talker->talk(args, result)) {
|
if (!o_talker->talk(args, result)) {
|
||||||
LOGERR("Python splitter for Korean failed\n");
|
LOGERR("Python splitter for Korean failed\n");
|
||||||
|
|||||||
@ -36,20 +36,6 @@ from hwp5.xmlmodel import Hwp5File as xml_Hwp5File
|
|||||||
from hwp5.utils import cached_property
|
from hwp5.utils import cached_property
|
||||||
|
|
||||||
|
|
||||||
# This was duplicated from hwp5 hwp5text.py and I don't really
|
|
||||||
# understand what it does...
|
|
||||||
RESOURCE_PATH_XSL_TEXT = 'xsl/plaintext.xsl'
|
|
||||||
class TextTransform(BaseTransform):
|
|
||||||
@property
|
|
||||||
def transform_hwp5_to_text(self):
|
|
||||||
transform_xhwp5 = self.transform_xhwp5_to_text
|
|
||||||
return self.make_transform_hwp5(transform_xhwp5)
|
|
||||||
@cached_property
|
|
||||||
def transform_xhwp5_to_text(self):
|
|
||||||
resource_path = RESOURCE_PATH_XSL_TEXT
|
|
||||||
return self.make_xsl_transform(resource_path)
|
|
||||||
|
|
||||||
|
|
||||||
# Associate HTML meta names and hwp summaryinfo values
|
# Associate HTML meta names and hwp summaryinfo values
|
||||||
def metafields(summaryinfo):
|
def metafields(summaryinfo):
|
||||||
yield(('Description', summaryinfo.subject + " " +
|
yield(('Description', summaryinfo.subject + " " +
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user