* Create tokenizer via default_tokenizer function

This commit is contained in:
Matthew Honnibal 2016-04-24 19:11:10 +02:00
parent 478a8d1829
commit 9bfe20cac9

View File

@ -135,6 +135,10 @@ class Language(object):
else: else:
return None return None
@classmethod
def default_tokenizer(cls, package, vocab):
return Tokenizer.from_package(package, vocab)
def __init__(self, def __init__(self,
data_dir=None, data_dir=None,
vocab=None, vocab=None,
@ -178,7 +182,7 @@ class Language(object):
vocab = self.default_vocab(package, vectors_package=vectors_package) vocab = self.default_vocab(package, vectors_package=vectors_package)
self.vocab = vocab self.vocab = vocab
if tokenizer in (None, True): if tokenizer in (None, True):
tokenizer = Tokenizer.from_package(package, self.vocab) tokenizer = self.default_tokenizer(package, self.vocab)
self.tokenizer = tokenizer self.tokenizer = tokenizer
if tagger in (None, True): if tagger in (None, True):
tagger = Tagger.from_package(package, self.vocab) tagger = Tagger.from_package(package, self.vocab)