diff --git a/spacy/__init__.py b/spacy/__init__.py index 57d02d95f..556027a42 100644 --- a/spacy/__init__.py +++ b/spacy/__init__.py @@ -4,4 +4,4 @@ from .en import English def load(name, via=None): package = util.get_package_by_name(name, via=via) - return English(package) + return English(package=package) diff --git a/spacy/language.py b/spacy/language.py index 24e716265..8f3eb646d 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -140,7 +140,7 @@ class Language(object): def default_vocab(cls, package, get_lex_attr=None): if get_lex_attr is None: get_lex_attr = cls.default_lex_attrs() - return Vocab.load(package, get_lex_attr=get_lex_attr) + return Vocab.from_package(package, get_lex_attr=get_lex_attr) @classmethod def default_parser(cls, package, vocab): @@ -164,7 +164,8 @@ class Language(object): entity=None, matcher=None, serializer=None, - load_vectors=True): + load_vectors=True, + package=None): """ a model can be specified: @@ -182,30 +183,29 @@ class Language(object): 4) by package name with a relocated package base - spacy.load('en_default', via='/my/package/root') - spacy.load('en_default==1.0.0', via='/my/package/root') - - 5) by package object - - spacy.en.English(package) """ if data_dir is not None and via is None: warn("Use of data_dir is deprecated, use via instead.", DeprecationWarning) via = data_dir - if via is None: - package = util.get_package_by_name() - else: - package = util.get_package(via) + if package is None: + if via is None: + package = util.get_package_by_name() + else: + package = util.get_package(via) if load_vectors is not True: warn("load_vectors is deprecated", DeprecationWarning) + if vocab in (None, True): - vocab = Vocab.load(package, get_lex_attr=self.default_lex_attrs()) + vocab = self.default_vocab(package) self.vocab = vocab if tokenizer in (None, True): - tokenizer = Tokenizer.load(package, self.vocab) + tokenizer = Tokenizer.from_package(package, self.vocab) self.tokenizer = tokenizer if tagger in (None, True): - tagger = Tagger.load(package, self.vocab) + tagger = Tagger.from_package(package, self.vocab) self.tagger = tagger if entity in (None, True): entity = self.default_entity(package, self.vocab) @@ -214,7 +214,7 @@ class Language(object): parser = self.default_parser(package, self.vocab) self.parser = parser if matcher in (None, True): - matcher = Matcher.load(package, self.vocab) + matcher = Matcher.from_package(package, self.vocab) self.matcher = matcher def __reduce__(self): diff --git a/spacy/lemmatizer.py b/spacy/lemmatizer.py index 5082da253..a05ca49c0 100644 --- a/spacy/lemmatizer.py +++ b/spacy/lemmatizer.py @@ -14,7 +14,10 @@ from .util import get_package class Lemmatizer(object): @classmethod def load(cls, via): - pkg = get_package(via) + return cls.from_package(get_package(via)) + + @classmethod + def from_package(cls, pkg): index = {} exc = {} for pos in ['adj', 'noun', 'verb']: diff --git a/spacy/matcher.pyx b/spacy/matcher.pyx index df71e8f98..098d6cd5d 100644 --- a/spacy/matcher.pyx +++ b/spacy/matcher.pyx @@ -171,7 +171,10 @@ cdef class Matcher: @classmethod def load(cls, via, Vocab vocab): - package = get_package(via) + return cls.from_package(get_package(via), vocab=vocab) + + @classmethod + def from_package(cls, package, Vocab vocab): patterns = package.load_json(('vocab', 'gazetteer.json')) return cls(vocab, patterns) diff --git a/spacy/tagger.pyx b/spacy/tagger.pyx index 7d7b82d90..0d4252d6c 100644 --- a/spacy/tagger.pyx +++ b/spacy/tagger.pyx @@ -149,7 +149,10 @@ cdef class Tagger: @classmethod def load(cls, via, vocab): - pkg = get_package(via) + return cls.from_package(get_package(via), vocab=vocab) + + @classmethod + def from_package(cls, pkg, vocab): # TODO: templates.json deprecated? not present in latest package templates = cls.default_templates() # templates = package.load_utf8(json.load, diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx index 9f195f784..fbb54c248 100644 --- a/spacy/tokenizer.pyx +++ b/spacy/tokenizer.pyx @@ -43,8 +43,11 @@ cdef class Tokenizer: @classmethod def load(cls, via, Vocab vocab): - pkg = get_package(via) - rules, prefix_re, suffix_re, infix_re = read_lang_data(pkg) + return cls.from_package(get_package(via), vocab=vocab) + + @classmethod + def from_package(cls, package, Vocab vocab): + rules, prefix_re, suffix_re, infix_re = read_lang_data(package) prefix_re = re.compile(prefix_re) suffix_re = re.compile(suffix_re) infix_re = re.compile(infix_re) diff --git a/spacy/util.py b/spacy/util.py index 5083fa37a..24b683f0a 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -4,9 +4,9 @@ import json import re import os.path +import six import sputnik from sputnik.dir_package import DirPackage -from sputnik.package_stub import PackageStub from sputnik.package_list import (PackageNotFoundException, CompatiblePackageNotFoundException) @@ -15,8 +15,8 @@ from .attrs import TAG, HEAD, DEP, ENT_IOB, ENT_TYPE def get_package(via=None): - if isinstance(via, PackageStub): - return via + if not isinstance(via, six.string_types): + raise RuntimeError('via must be a string') return DirPackage(via) diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index f9771d5f7..3e7dbf38d 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -49,10 +49,13 @@ cdef class Vocab: ''' @classmethod def load(cls, via, get_lex_attr=None): - package = get_package(via) + return cls.from_package(get_package(via), get_lex_attr=get_lex_attr) + + @classmethod + def from_package(cls, package, get_lex_attr=None): tag_map = package.load_json(('vocab', 'tag_map.json'), default={}) - lemmatizer = Lemmatizer.load(package) + lemmatizer = Lemmatizer.from_package(package) serializer_freqs = package.load_json(('vocab', 'serializer.json'), default={})