mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
* Fix use of mock Package object
This commit is contained in:
parent
029136a007
commit
eaf2ad59f1
|
@ -186,7 +186,7 @@ class Language(object):
|
|||
if load_vectors is not True:
|
||||
warn("load_vectors is deprecated", DeprecationWarning)
|
||||
if vocab in (None, True):
|
||||
vocab = self.default_vocab(package)
|
||||
vocab = Vocab.load(package, get_lex_attr=self.default_lex_attrs())
|
||||
self.vocab = vocab
|
||||
if tokenizer in (None, True):
|
||||
tokenizer = Tokenizer.load(package, self.vocab)
|
||||
|
|
|
@ -22,8 +22,7 @@ class Lemmatizer(object):
|
|||
index[pos] = read_index(file_) if file_ is not None else set()
|
||||
with pkg.open(('wordnet', '%s.exc' % pos), default=None) as file_:
|
||||
exc[pos] = read_exc(file_) if file_ is not None else {}
|
||||
with pkg.open(('vocab', 'lemma_rules.json'), default=None) as file_:
|
||||
rules = json.load(file_) if file_ is not None else {}
|
||||
rules = pkg.load_json(('vocab', 'lemma_rules.json'), default={})
|
||||
return cls(index, exc, rules)
|
||||
|
||||
def __init__(self, index, exceptions, rules):
|
||||
|
|
|
@ -172,9 +172,7 @@ cdef class Matcher:
|
|||
@classmethod
|
||||
def load(cls, pkg_or_str_or_file, Vocab vocab):
|
||||
package = Package.create_or_return(pkg_or_str_or_file)
|
||||
|
||||
with package.open(('vocab', 'serializer.json'), default=None) as file_:
|
||||
patterns = json.load(file_) if file_ is not None else {}
|
||||
patterns = package.load_json(('vocab', 'gazetteer.json'))
|
||||
return cls(vocab, patterns)
|
||||
|
||||
def __init__(self, vocab, patterns):
|
||||
|
|
|
@ -25,14 +25,16 @@ def lemmatizer(package):
|
|||
|
||||
|
||||
def test_read_index(package):
|
||||
index = package.load_utf8(read_index, 'wordnet', 'index.noun')
|
||||
with package.open(('wordnet', 'index.noun')) as file_:
|
||||
index = read_index(file_)
|
||||
assert 'man' in index
|
||||
assert 'plantes' not in index
|
||||
assert 'plant' in index
|
||||
|
||||
|
||||
def test_read_exc(package):
|
||||
exc = package.load_utf8(read_exc, 'wordnet', 'verb.exc')
|
||||
with package.open(('wordnet', 'verb.exc')) as file_:
|
||||
exc = read_exc(file_)
|
||||
assert exc['was'] == ('be',)
|
||||
|
||||
|
||||
|
|
|
@ -9,8 +9,8 @@ import types
|
|||
from .attrs import TAG, HEAD, DEP, ENT_IOB, ENT_TYPE
|
||||
|
||||
|
||||
def local_path(subdir):
|
||||
return os.path.abspath(os.path.join(os.path.dirname(__file__), 'data'))
|
||||
def local_path(*dirs):
|
||||
return os.path.abspath(os.path.join(os.path.dirname(__file__), *dirs))
|
||||
|
||||
|
||||
class Package(object):
|
||||
|
@ -18,10 +18,10 @@ class Package(object):
|
|||
def create_or_return(cls, me_or_arg):
|
||||
return me_or_arg if isinstance(me_or_arg, cls) else cls(me_or_arg)
|
||||
|
||||
def __init__(self, data_path=None):
|
||||
def __init__(self, data_path=None, model='en_default-1.0.3'):
|
||||
if data_path is None:
|
||||
data_path = local_path('data')
|
||||
self.name = None
|
||||
data_path = local_path('data', model)
|
||||
self.model = model
|
||||
self.data_path = data_path
|
||||
self._root = self.data_path
|
||||
|
||||
|
@ -37,18 +37,22 @@ class Package(object):
|
|||
def dir_path(self, *path_parts, **kwargs):
|
||||
return os.path.join(self._root, *path_parts)
|
||||
|
||||
def load_utf8(self, func, *path_parts, **kwargs):
|
||||
if kwargs.get('require', True):
|
||||
with io.open(self.file_path(os.path.join(*path_parts)),
|
||||
mode='r', encoding='utf8') as f:
|
||||
return func(f)
|
||||
else:
|
||||
return None
|
||||
def load_json(self, path_parts, default=None):
|
||||
if not self.has_file(*path_parts):
|
||||
if _is_error_class(default):
|
||||
raise default(self.file_path(*path_parts))
|
||||
elif isinstance(default, Exception):
|
||||
raise default
|
||||
else:
|
||||
return default
|
||||
with io.open(self.file_path(os.path.join(*path_parts)),
|
||||
mode='r', encoding='utf8') as file_:
|
||||
return json.load(file_)
|
||||
|
||||
@contextmanager
|
||||
def open(self, path_parts, default=IOError):
|
||||
def open(self, path_parts, mode='r', encoding='utf8', default=IOError):
|
||||
if not self.has_file(*path_parts):
|
||||
if isinstance(default, types.TypeType) and issubclass(default, Exception):
|
||||
if _is_error_class(default):
|
||||
raise default(self.file_path(*path_parts))
|
||||
elif isinstance(default, Exception):
|
||||
raise default
|
||||
|
@ -57,12 +61,16 @@ class Package(object):
|
|||
else:
|
||||
# Enter
|
||||
file_ = io.open(self.file_path(os.path.join(*path_parts)),
|
||||
mode='r', encoding='utf8')
|
||||
mode=mode, encoding='utf8')
|
||||
yield file_
|
||||
# Exit
|
||||
file_.close()
|
||||
|
||||
|
||||
def _is_error_class(e):
|
||||
return isinstance(e, types.TypeType) and issubclass(e, Exception)
|
||||
|
||||
|
||||
def get_package(name=None, data_path=None):
|
||||
return Package(data_path)
|
||||
|
||||
|
@ -92,10 +100,13 @@ def utf8open(loc, mode='r'):
|
|||
|
||||
|
||||
def read_lang_data(package):
|
||||
tokenization = package.load_utf8(json.load, 'tokenizer', 'specials.json')
|
||||
prefix = package.load_utf8(read_prefix, 'tokenizer', 'prefix.txt')
|
||||
suffix = package.load_utf8(read_suffix, 'tokenizer', 'suffix.txt')
|
||||
infix = package.load_utf8(read_infix, 'tokenizer', 'infix.txt')
|
||||
tokenization = package.load_json(('tokenizer', 'specials.json'))
|
||||
with package.open(('tokenizer', 'prefix.txt'), default=None) as file_:
|
||||
prefix = read_prefix(file_) if file_ is not None else None
|
||||
with package.open(('tokenizer', 'suffix.txt'), default=None) as file_:
|
||||
suffix = read_suffix(file_) if file_ is not None else None
|
||||
with package.open(('tokenizer', 'infix.txt'), default=None) as file_:
|
||||
infix = read_infix(file_) if file_ is not None else None
|
||||
return tokenization, prefix, suffix, infix
|
||||
|
||||
|
||||
|
|
|
@ -50,13 +50,11 @@ cdef class Vocab:
|
|||
@classmethod
|
||||
def load(cls, pkg_or_str_or_file, get_lex_attr=None):
|
||||
package = Package.create_or_return(pkg_or_str_or_file)
|
||||
with package.open(('vocab', 'tag_map.json'), default=None) as file_:
|
||||
tag_map = json.load(file_) if file_ is not None else {}
|
||||
tag_map = package.load_json(('vocab', 'tag_map.json'), default={})
|
||||
|
||||
lemmatizer = Lemmatizer.load(package)
|
||||
|
||||
with package.open(('vocab', 'serializer.json'), default=None) as file_:
|
||||
serializer_freqs = json.load(file_) if file_ is not None else {}
|
||||
serializer_freqs = package.load_json(('vocab', 'serializer.json'), default={})
|
||||
|
||||
cdef Vocab self = cls(get_lex_attr=get_lex_attr, tag_map=tag_map,
|
||||
lemmatizer=lemmatizer, serializer_freqs=serializer_freqs)
|
||||
|
|
Loading…
Reference in New Issue
Block a user