From 7670c745b6370fafd29ccedcdcba875156ccd576 Mon Sep 17 00:00:00 2001 From: ines Date: Sun, 16 Apr 2017 20:35:00 +0200 Subject: [PATCH] Update spacy.load() and fix path checks --- spacy/__init__.py | 47 +++++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/spacy/__init__.py b/spacy/__init__.py index bc668121f..06e9374ea 100644 --- a/spacy/__init__.py +++ b/spacy/__init__.py @@ -1,39 +1,38 @@ # coding: utf8 from __future__ import unicode_literals -from pathlib import Path - -from .util import set_lang_class, get_lang_class, parse_package_meta +from . import util from .deprecated import resolve_model_name from .cli import info from . import en, de, zh, es, it, hu, fr, pt, nl, sv, fi, bn, he -set_lang_class(en.English.lang, en.English) -set_lang_class(de.German.lang, de.German) -set_lang_class(es.Spanish.lang, es.Spanish) -set_lang_class(pt.Portuguese.lang, pt.Portuguese) -set_lang_class(fr.French.lang, fr.French) -set_lang_class(it.Italian.lang, it.Italian) -set_lang_class(hu.Hungarian.lang, hu.Hungarian) -set_lang_class(zh.Chinese.lang, zh.Chinese) -set_lang_class(nl.Dutch.lang, nl.Dutch) -set_lang_class(sv.Swedish.lang, sv.Swedish) -set_lang_class(fi.Finnish.lang, fi.Finnish) -set_lang_class(bn.Bengali.lang, bn.Bengali) -set_lang_class(he.Hebrew.lang, he.Hebrew) +_languages = (en.English, de.German, es.Spanish, pt.Portuguese, fr.French, + it.Italian, hu.Hungarian, zh.Chinese, nl.Dutch, sv.Swedish, + fi.Finnish, bn.Bengali, he.Hebrew) + + +for _lang in _languages: + util.set_lang_class(_lang.lang, _lang) def load(name, **overrides): - data_path = overrides.get('path', util.get_data_path()) - model_name = resolve_model_name(name) - meta = parse_package_meta(data_path, model_name, require=False) + if overrides.get('path') in (None, False, True): + data_path = util.get_data_path() + model_name = resolve_model_name(name) + model_path = data_path / model_name + if not model_path.exists(): + model_path = None + util.print_msg( + "Only loading the '{}' tokenizer.".format(name), + title="Warning: no model found for '{}'".format(name)) + else: + model_path = util.ensure_path(overrides['path']) + data_path = model_path.parent + meta = util.parse_package_meta(data_path, model_name, require=False) lang = meta['lang'] if meta and 'lang' in meta else name - cls = get_lang_class(lang) + cls = util.get_lang_class(lang) overrides['meta'] = meta - model_path = Path(data_path / model_name) - if model_path.exists(): - overrides['path'] = model_path - + overrides['path'] = model_path return cls(**overrides)