* Fix use of mock Package object

2025-09-22 20:16:43 +03:00 · 2015-12-31 04:13:15 +01:00 · 2015-12-31 04:13:15 +01:00 · eaf2ad59f1
commit eaf2ad59f1
parent 029136a007
6 changed files with 39 additions and 31 deletions
--- a/spacy/language.py
+++ b/spacy/language.py
@ -186,7 +186,7 @@ class Language(object):
        if load_vectors is not True:
            warn("load_vectors is deprecated", DeprecationWarning)
        if vocab in (None, True):
-            vocab = self.default_vocab(package)
+            vocab = Vocab.load(package, get_lex_attr=self.default_lex_attrs())
        self.vocab = vocab
        if tokenizer in (None, True):
            tokenizer = Tokenizer.load(package, self.vocab)
--- a/spacy/lemmatizer.py
+++ b/spacy/lemmatizer.py
@ -22,8 +22,7 @@ class Lemmatizer(object):
                index[pos] = read_index(file_) if file_ is not None else set()
            with pkg.open(('wordnet', '%s.exc' % pos), default=None) as file_:
                exc[pos] = read_exc(file_) if file_ is not None else {}
-        with pkg.open(('vocab', 'lemma_rules.json'), default=None) as file_:
-            rules = json.load(file_) if file_ is not None else {}
+        rules = pkg.load_json(('vocab', 'lemma_rules.json'), default={})
        return cls(index, exc, rules)

    def __init__(self, index, exceptions, rules):
--- a/spacy/matcher.pyx
+++ b/spacy/matcher.pyx
@ -172,9 +172,7 @@ cdef class Matcher:
    @classmethod
    def load(cls, pkg_or_str_or_file, Vocab vocab):
        package = Package.create_or_return(pkg_or_str_or_file)
-
-        with package.open(('vocab', 'serializer.json'), default=None) as file_:
-            patterns = json.load(file_) if file_ is not None else {}
+        patterns = package.load_json(('vocab', 'gazetteer.json'))
        return cls(vocab, patterns)

    def __init__(self, vocab, patterns):
--- a/spacy/tests/tagger/test_lemmatizer.py
+++ b/spacy/tests/tagger/test_lemmatizer.py
@ -25,14 +25,16 @@ def lemmatizer(package):


 def test_read_index(package):
-    index = package.load_utf8(read_index, 'wordnet', 'index.noun')
+    with package.open(('wordnet', 'index.noun')) as file_:
+        index = read_index(file_)
    assert 'man' in index
    assert 'plantes' not in index
    assert 'plant' in index


 def test_read_exc(package):
-    exc = package.load_utf8(read_exc, 'wordnet', 'verb.exc')
+    with package.open(('wordnet', 'verb.exc')) as file_:
+        exc = read_exc(file_)
    assert exc['was'] == ('be',)


--- a/spacy/util.py
+++ b/spacy/util.py
@ -9,8 +9,8 @@ import types
 from .attrs import TAG, HEAD, DEP, ENT_IOB, ENT_TYPE


-def local_path(subdir):
-    return os.path.abspath(os.path.join(os.path.dirname(__file__), 'data'))
+def local_path(*dirs):
+    return os.path.abspath(os.path.join(os.path.dirname(__file__), *dirs))


 class Package(object):
@ -18,10 +18,10 @@ class Package(object):
    def create_or_return(cls, me_or_arg):
        return me_or_arg if isinstance(me_or_arg, cls) else cls(me_or_arg)

-    def __init__(self, data_path=None):
+    def __init__(self, data_path=None, model='en_default-1.0.3'):
        if data_path is None:
-            data_path = local_path('data')
-        self.name = None
+            data_path = local_path('data', model)
+        self.model = model
        self.data_path = data_path
        self._root = self.data_path

@ -37,18 +37,22 @@ class Package(object):
    def dir_path(self, *path_parts, **kwargs):
        return os.path.join(self._root, *path_parts)

-    def load_utf8(self, func, *path_parts, **kwargs):
-        if kwargs.get('require', True):
-            with io.open(self.file_path(os.path.join(*path_parts)),
-                        mode='r', encoding='utf8') as f:
-                return func(f)
-        else:
-            return None
+    def load_json(self, path_parts, default=None):
+        if not self.has_file(*path_parts):
+            if _is_error_class(default):
+                raise default(self.file_path(*path_parts))
+            elif isinstance(default, Exception):
+                raise default
+            else:
+                return default
+        with io.open(self.file_path(os.path.join(*path_parts)),
+                      mode='r', encoding='utf8') as file_:
+            return json.load(file_)
    
    @contextmanager
-    def open(self, path_parts, default=IOError):
+    def open(self, path_parts, mode='r', encoding='utf8', default=IOError):
        if not self.has_file(*path_parts):
-            if isinstance(default, types.TypeType) and issubclass(default, Exception):
+            if _is_error_class(default):
                raise default(self.file_path(*path_parts))
            elif isinstance(default, Exception):
                raise default
@ -57,12 +61,16 @@ class Package(object):
        else:
            # Enter
            file_ = io.open(self.file_path(os.path.join(*path_parts)),
-                            mode='r', encoding='utf8')
+                            mode=mode, encoding='utf8')
            yield file_
            # Exit
            file_.close()


+def _is_error_class(e):
+    return isinstance(e, types.TypeType) and issubclass(e, Exception)
+
+
 def get_package(name=None, data_path=None):
    return Package(data_path)

@ -92,10 +100,13 @@ def utf8open(loc, mode='r'):


 def read_lang_data(package):
-    tokenization = package.load_utf8(json.load, 'tokenizer', 'specials.json')
-    prefix = package.load_utf8(read_prefix, 'tokenizer', 'prefix.txt')
-    suffix = package.load_utf8(read_suffix, 'tokenizer', 'suffix.txt')
-    infix = package.load_utf8(read_infix, 'tokenizer', 'infix.txt')
+    tokenization = package.load_json(('tokenizer', 'specials.json'))
+    with package.open(('tokenizer', 'prefix.txt'), default=None) as file_:
+        prefix = read_prefix(file_) if file_ is not None else None
+    with package.open(('tokenizer', 'suffix.txt'), default=None) as file_:
+        suffix = read_suffix(file_) if file_ is not None else None
+    with package.open(('tokenizer', 'infix.txt'), default=None) as file_:
+        infix = read_infix(file_) if file_ is not None else None
    return tokenization, prefix, suffix, infix


--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -50,13 +50,11 @@ cdef class Vocab:
    @classmethod
    def load(cls, pkg_or_str_or_file, get_lex_attr=None):
        package = Package.create_or_return(pkg_or_str_or_file)
-        with package.open(('vocab', 'tag_map.json'), default=None) as file_:
-            tag_map = json.load(file_) if file_ is not None else {}
+        tag_map = package.load_json(('vocab', 'tag_map.json'), default={})

        lemmatizer = Lemmatizer.load(package)

-        with package.open(('vocab', 'serializer.json'), default=None) as file_:
-            serializer_freqs = json.load(file_) if file_ is not None else {}
+        serializer_freqs = package.load_json(('vocab', 'serializer.json'), default={})

        cdef Vocab self = cls(get_lex_attr=get_lex_attr, tag_map=tag_map,
                              lemmatizer=lemmatizer, serializer_freqs=serializer_freqs)