Update spacy.load() and fix path checks

This commit is contained in:
ines 2017-04-16 20:35:00 +02:00
parent d3759dfb32
commit 7670c745b6

View File

@ -1,39 +1,38 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
from pathlib import Path from . import util
from .util import set_lang_class, get_lang_class, parse_package_meta
from .deprecated import resolve_model_name from .deprecated import resolve_model_name
from .cli import info from .cli import info
from . import en, de, zh, es, it, hu, fr, pt, nl, sv, fi, bn, he from . import en, de, zh, es, it, hu, fr, pt, nl, sv, fi, bn, he
set_lang_class(en.English.lang, en.English) _languages = (en.English, de.German, es.Spanish, pt.Portuguese, fr.French,
set_lang_class(de.German.lang, de.German) it.Italian, hu.Hungarian, zh.Chinese, nl.Dutch, sv.Swedish,
set_lang_class(es.Spanish.lang, es.Spanish) fi.Finnish, bn.Bengali, he.Hebrew)
set_lang_class(pt.Portuguese.lang, pt.Portuguese)
set_lang_class(fr.French.lang, fr.French)
set_lang_class(it.Italian.lang, it.Italian) for _lang in _languages:
set_lang_class(hu.Hungarian.lang, hu.Hungarian) util.set_lang_class(_lang.lang, _lang)
set_lang_class(zh.Chinese.lang, zh.Chinese)
set_lang_class(nl.Dutch.lang, nl.Dutch)
set_lang_class(sv.Swedish.lang, sv.Swedish)
set_lang_class(fi.Finnish.lang, fi.Finnish)
set_lang_class(bn.Bengali.lang, bn.Bengali)
set_lang_class(he.Hebrew.lang, he.Hebrew)
def load(name, **overrides): def load(name, **overrides):
data_path = overrides.get('path', util.get_data_path()) if overrides.get('path') in (None, False, True):
model_name = resolve_model_name(name) data_path = util.get_data_path()
meta = parse_package_meta(data_path, model_name, require=False) model_name = resolve_model_name(name)
model_path = data_path / model_name
if not model_path.exists():
model_path = None
util.print_msg(
"Only loading the '{}' tokenizer.".format(name),
title="Warning: no model found for '{}'".format(name))
else:
model_path = util.ensure_path(overrides['path'])
data_path = model_path.parent
meta = util.parse_package_meta(data_path, model_name, require=False)
lang = meta['lang'] if meta and 'lang' in meta else name lang = meta['lang'] if meta and 'lang' in meta else name
cls = get_lang_class(lang) cls = util.get_lang_class(lang)
overrides['meta'] = meta overrides['meta'] = meta
model_path = Path(data_path / model_name) overrides['path'] = model_path
if model_path.exists():
overrides['path'] = model_path
return cls(**overrides) return cls(**overrides)