korean splitter: add inactive option to split on white space before calling the tagger
This commit is contained in:
parent
2c44b805cf
commit
73f2836317
@ -35,7 +35,7 @@ try:
|
|||||||
import mecab
|
import mecab
|
||||||
usingkonlpy = False
|
usingkonlpy = False
|
||||||
except:
|
except:
|
||||||
from konlpy.tag import Okt,Mecab,Komoran
|
import konlpy.tag
|
||||||
usingkonlpy = True
|
usingkonlpy = True
|
||||||
|
|
||||||
class Processor(object):
|
class Processor(object):
|
||||||
@ -51,19 +51,19 @@ class Processor(object):
|
|||||||
from konlpy.tag import Okt,Mecab,Komoran
|
from konlpy.tag import Okt,Mecab,Komoran
|
||||||
usingkonlpy = True
|
usingkonlpy = True
|
||||||
if taggername == "Okt":
|
if taggername == "Okt":
|
||||||
self.tagger = Okt()
|
self.tagger = konlpy.tag.Okt()
|
||||||
self.tagsOkt = True
|
self.tagsOkt = True
|
||||||
elif taggername == "Mecab":
|
elif taggername == "Mecab":
|
||||||
if usingkonlpy:
|
if usingkonlpy:
|
||||||
# Use Mecab(dicpath="c:/some/path/mecab-ko-dic") for a
|
# Use Mecab(dicpath="c:/some/path/mecab-ko-dic") for a
|
||||||
# non-default location. (?? mecab uses rcfile and dicdir not
|
# non-default location. (?? mecab uses rcfile and dicdir not
|
||||||
# dicpath)
|
# dicpath)
|
||||||
self.tagger = Mecab()
|
self.tagger = konlpy.tag.Mecab()
|
||||||
else:
|
else:
|
||||||
self.tagger = mecab.MeCab()
|
self.tagger = mecab.MeCab()
|
||||||
self.tagsMecab = True
|
self.tagsMecab = True
|
||||||
elif taggername == "Komoran":
|
elif taggername == "Komoran":
|
||||||
self.tagger = Komoran()
|
self.tagger = konlpy.tag.Komoran()
|
||||||
self.tagsKomoran = True
|
self.tagsKomoran = True
|
||||||
else:
|
else:
|
||||||
raise Exception("Bad tagger name " + taggername)
|
raise Exception("Bad tagger name " + taggername)
|
||||||
@ -76,8 +76,16 @@ class Processor(object):
|
|||||||
return {'error':'No "tagger" field in parameters'}
|
return {'error':'No "tagger" field in parameters'}
|
||||||
self._init_tagger(params['tagger']);
|
self._init_tagger(params['tagger']);
|
||||||
|
|
||||||
pos = self.tagger.pos(params['data'])
|
spliteojeol = False
|
||||||
#proto.log("%s" % pos)
|
if spliteojeol:
|
||||||
|
data = params['data'].split()
|
||||||
|
pos = []
|
||||||
|
for d in data:
|
||||||
|
pos += self.tagger.pos(d)
|
||||||
|
else:
|
||||||
|
pos = self.tagger.pos(params['data'])
|
||||||
|
|
||||||
|
#proto.log("POS: %s" % pos)
|
||||||
text = ""
|
text = ""
|
||||||
tags = ""
|
tags = ""
|
||||||
for e in pos:
|
for e in pos:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user