spaCy/spacy/__init__.py

48 lines
1.5 KiB
Python
Raw Normal View History

import pathlib
from .util import set_lang_class, get_lang_class
from .about import __version__
from . import en
from . import de
from . import zh
2016-11-02 22:05:14 +03:00
from . import es
from . import it
2016-11-02 22:48:29 +03:00
from . import fr
2016-11-02 22:05:14 +03:00
from . import pt
2016-09-24 23:09:21 +03:00
try:
2016-09-24 23:17:01 +03:00
basestring
2016-09-24 23:09:21 +03:00
except NameError:
2016-09-24 23:17:01 +03:00
basestring = str
2016-09-24 23:09:21 +03:00
set_lang_class(en.English.lang, en.English)
set_lang_class(de.German.lang, de.German)
2016-11-02 22:05:14 +03:00
set_lang_class(es.Spanish.lang, es.Spanish)
set_lang_class(pt.Portuguese.lang, pt.Portuguese)
set_lang_class(fr.French.lang, fr.French)
set_lang_class(it.Italian.lang, it.Italian)
set_lang_class(zh.Chinese.lang, zh.Chinese)
2016-03-25 20:54:45 +03:00
2016-10-18 20:23:31 +03:00
def load(name, **overrides):
target_name, target_version = util.split_data_name(name)
data_path = overrides.get('path', util.get_data_path())
if target_name == 'en' and 'add_vectors' not in overrides:
if 'vectors' in overrides:
vec_path = util.match_best_version(overrides['vectors'], None, data_path)
if vec_path is None:
raise IOError(
'Could not load data pack %s from %s' % (overrides['vectors'], data_path))
else:
vec_path = util.match_best_version('en_glove_cc_300_1m_vectors', None, data_path)
if vec_path is not None:
vec_path = vec_path / 'vocab' / 'vec.bin'
overrides['add_vectors'] = lambda vocab: vocab.load_vectors_from_bin_loc(vec_path)
path = util.match_best_version(target_name, target_version, data_path)
2016-09-24 21:26:17 +03:00
cls = get_lang_class(target_name)
2016-10-18 20:23:31 +03:00
return cls(path=path, **overrides)