2016-09-24 16:42:01 +03:00
|
|
|
import pathlib
|
|
|
|
|
|
|
|
from .util import set_lang_class, get_lang_class
|
2016-01-15 20:01:02 +03:00
|
|
|
|
2016-04-12 17:31:16 +03:00
|
|
|
from . import en
|
|
|
|
from . import de
|
2016-04-24 19:45:16 +03:00
|
|
|
from . import zh
|
2016-01-15 20:01:02 +03:00
|
|
|
|
2016-03-24 13:19:43 +03:00
|
|
|
|
2016-09-24 23:09:21 +03:00
|
|
|
try:
|
2016-09-24 23:17:01 +03:00
|
|
|
basestring
|
2016-09-24 23:09:21 +03:00
|
|
|
except NameError:
|
2016-09-24 23:17:01 +03:00
|
|
|
basestring = str
|
2016-09-24 23:09:21 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
2016-04-12 17:31:16 +03:00
|
|
|
set_lang_class(en.English.lang, en.English)
|
|
|
|
set_lang_class(de.German.lang, de.German)
|
2016-04-24 19:45:16 +03:00
|
|
|
set_lang_class(zh.Chinese.lang, zh.Chinese)
|
2016-03-25 20:54:45 +03:00
|
|
|
|
|
|
|
|
2016-09-26 12:07:46 +03:00
|
|
|
def blank(name, vocab=None, tokenizer=None, parser=None, tagger=None, entity=None,
|
|
|
|
matcher=None, serializer=None, vectors=None, pipeline=None):
|
|
|
|
target_name, target_version = util.split_data_name(name)
|
|
|
|
cls = get_lang_class(target_name)
|
|
|
|
return cls(
|
|
|
|
path,
|
|
|
|
vectors=vectors,
|
|
|
|
vocab=vocab,
|
|
|
|
tokenizer=tokenizer,
|
|
|
|
tagger=tagger,
|
|
|
|
parser=parser,
|
|
|
|
entity=entity,
|
|
|
|
matcher=matcher,
|
|
|
|
pipeline=pipeline,
|
|
|
|
serializer=serializer)
|
|
|
|
|
|
|
|
|
2016-09-24 21:26:17 +03:00
|
|
|
def load(name, vocab=True, tokenizer=True, parser=True, tagger=True, entity=True,
|
2016-09-24 23:09:21 +03:00
|
|
|
matcher=True, serializer=True, vectors=True, pipeline=True, via=None):
|
2016-09-24 21:26:17 +03:00
|
|
|
if via is None:
|
|
|
|
via = util.get_data_path()
|
2016-09-24 16:42:01 +03:00
|
|
|
|
2016-09-24 21:26:17 +03:00
|
|
|
target_name, target_version = util.split_data_name(name)
|
|
|
|
path = util.match_best_version(target_name, target_version, via)
|
2016-09-24 16:42:01 +03:00
|
|
|
|
2016-09-24 21:26:17 +03:00
|
|
|
if isinstance(vectors, basestring):
|
2016-09-24 23:09:21 +03:00
|
|
|
vectors = util.match_best_version(vectors, None, via)
|
2016-09-24 21:26:17 +03:00
|
|
|
|
|
|
|
cls = get_lang_class(target_name)
|
2016-04-17 22:31:50 +03:00
|
|
|
return cls(
|
2016-09-24 21:26:17 +03:00
|
|
|
path,
|
2016-09-24 16:42:01 +03:00
|
|
|
vectors=vectors,
|
2016-04-17 22:31:50 +03:00
|
|
|
vocab=vocab,
|
|
|
|
tokenizer=tokenizer,
|
|
|
|
tagger=tagger,
|
|
|
|
parser=parser,
|
|
|
|
entity=entity,
|
|
|
|
matcher=matcher,
|
2016-09-24 23:09:21 +03:00
|
|
|
pipeline=pipeline,
|
2016-04-17 22:31:50 +03:00
|
|
|
serializer=serializer)
|