Fix Korean

This commit is contained in:
Ines Montani 2019-09-29 17:10:56 +02:00
parent 159b72ed4c
commit 75514b5970

View File

@ -46,7 +46,7 @@ class KoreanTokenizer(DummyTokenizer):
def __call__(self, text): def __call__(self, text):
dtokens = list(self.detailed_tokens(text)) dtokens = list(self.detailed_tokens(text))
surfaces = [dt.surface for dt in dtokens] surfaces = [dt["surface"] for dt in dtokens]
doc = Doc(self.vocab, words=surfaces, spaces=list(check_spaces(text, surfaces))) doc = Doc(self.vocab, words=surfaces, spaces=list(check_spaces(text, surfaces)))
for token, dtoken in zip(doc, dtokens): for token, dtoken in zip(doc, dtokens):
first_tag, sep, eomi_tags = dtoken["tag"].partition("+") first_tag, sep, eomi_tags = dtoken["tag"].partition("+")