fix thai bug (#3693)

fix tokenize for pythainlp
This commit is contained in:
Wannaphong Phatthiyaphaibun 2019-05-10 05:21:34 -07:00 committed by Ines Montani
parent 2663f4133c
commit 5a14a13f64

View File

@ -28,7 +28,7 @@ class ThaiTokenizer(DummyTokenizer):
self.vocab = nlp.vocab if nlp is not None else cls.create_vocab(nlp) self.vocab = nlp.vocab if nlp is not None else cls.create_vocab(nlp)
def __call__(self, text): def __call__(self, text):
words = list(self.word_tokenize(text, "newmm")) words = list(self.word_tokenize(text))
spaces = [False] * len(words) spaces = [False] * len(words)
return Doc(self.vocab, words=words, spaces=spaces) return Doc(self.vocab, words=words, spaces=spaces)