mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 09:14:32 +03:00
untangle data_path/via
This commit is contained in:
parent
6d1a3af343
commit
235f094534
|
@ -6,6 +6,8 @@ import shutil
|
||||||
|
|
||||||
import plac
|
import plac
|
||||||
import sputnik
|
import sputnik
|
||||||
|
from sputnik.package_list import (PackageNotFoundException,
|
||||||
|
CompatiblePackageNotFoundException)
|
||||||
|
|
||||||
from .. import about
|
from .. import about
|
||||||
|
|
||||||
|
@ -22,28 +24,21 @@ def migrate(path):
|
||||||
os.unlink(os.path.join(path, filename))
|
os.unlink(os.path.join(path, filename))
|
||||||
|
|
||||||
|
|
||||||
def link(package, path):
|
|
||||||
if os.path.exists(path):
|
|
||||||
if os.path.isdir(path):
|
|
||||||
shutil.rmtree(path)
|
|
||||||
else:
|
|
||||||
os.unlink(path)
|
|
||||||
|
|
||||||
if not hasattr(os, 'symlink'): # not supported by win+py27
|
|
||||||
shutil.copytree(package.dir_path('data'), path)
|
|
||||||
else:
|
|
||||||
os.symlink(package.dir_path('data'), path)
|
|
||||||
|
|
||||||
|
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
force=("Force overwrite", "flag", "f", bool),
|
force=("Force overwrite", "flag", "f", bool),
|
||||||
)
|
)
|
||||||
def main(data_size='all', force=False):
|
def main(data_size='all', force=False):
|
||||||
path = os.path.dirname(os.path.abspath(__file__))
|
|
||||||
|
|
||||||
if force:
|
if force:
|
||||||
sputnik.purge(about.__name__, about.__version__)
|
sputnik.purge(about.__name__, about.__version__)
|
||||||
|
|
||||||
|
try:
|
||||||
|
sputnik.package(about.__name__, about.__version__, about.__default_model__)
|
||||||
|
print("Model already installed. Please run 'python -m "
|
||||||
|
"spacy.en.download --force' to reinstall.", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
except PackageNotFoundException, CompatiblePackageNotFoundException:
|
||||||
|
pass
|
||||||
|
|
||||||
package = sputnik.install(about.__name__, about.__version__, about.__default_model__)
|
package = sputnik.install(about.__name__, about.__version__, about.__default_model__)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -54,7 +49,7 @@ def main(data_size='all', force=False):
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# FIXME clean up old-style packages
|
# FIXME clean up old-style packages
|
||||||
migrate(path)
|
migrate(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
print("Model successfully installed.", file=sys.stderr)
|
print("Model successfully installed.", file=sys.stderr)
|
||||||
|
|
||||||
|
|
|
@ -155,7 +155,6 @@ class Language(object):
|
||||||
return Parser.from_dir(data_dir, vocab.strings, BiluoPushDown)
|
return Parser.from_dir(data_dir, vocab.strings, BiluoPushDown)
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
via=None,
|
|
||||||
data_dir=None,
|
data_dir=None,
|
||||||
vocab=None,
|
vocab=None,
|
||||||
tokenizer=None,
|
tokenizer=None,
|
||||||
|
@ -172,9 +171,9 @@ class Language(object):
|
||||||
1) by calling a Language subclass
|
1) by calling a Language subclass
|
||||||
- spacy.en.English()
|
- spacy.en.English()
|
||||||
|
|
||||||
2) by calling a Language subclass with via (previously: data_dir)
|
2) by calling a Language subclass with data_dir
|
||||||
- spacy.en.English('my/model/root')
|
- spacy.en.English('my/model/root')
|
||||||
- spacy.en.English(via='my/model/root')
|
- spacy.en.English(data_dir='my/model/root')
|
||||||
|
|
||||||
3) by package name
|
3) by package name
|
||||||
- spacy.load('en_default')
|
- spacy.load('en_default')
|
||||||
|
@ -185,15 +184,11 @@ class Language(object):
|
||||||
- spacy.load('en_default==1.0.0', via='/my/package/root')
|
- spacy.load('en_default==1.0.0', via='/my/package/root')
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if data_dir is not None and via is None:
|
|
||||||
warn("Use of data_dir is deprecated, use via instead.", DeprecationWarning)
|
|
||||||
via = data_dir
|
|
||||||
|
|
||||||
if package is None:
|
if package is None:
|
||||||
if via is None:
|
if data_dir is None:
|
||||||
package = util.get_package_by_name()
|
package = util.get_package_by_name()
|
||||||
else:
|
else:
|
||||||
package = util.get_package(via)
|
package = util.get_package(data_dir)
|
||||||
|
|
||||||
if load_vectors is not True:
|
if load_vectors is not True:
|
||||||
warn("load_vectors is deprecated", DeprecationWarning)
|
warn("load_vectors is deprecated", DeprecationWarning)
|
||||||
|
|
|
@ -170,8 +170,8 @@ cdef class Matcher:
|
||||||
cdef object _patterns
|
cdef object _patterns
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def load(cls, via, Vocab vocab):
|
def load(cls, data_dir, Vocab vocab):
|
||||||
return cls.from_package(get_package(via), vocab=vocab)
|
return cls.from_package(get_package(data_dir), vocab=vocab)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_package(cls, package, Vocab vocab):
|
def from_package(cls, package, Vocab vocab):
|
||||||
|
|
|
@ -148,8 +148,8 @@ cdef class Tagger:
|
||||||
return cls(vocab, model)
|
return cls(vocab, model)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def load(cls, via, vocab):
|
def load(cls, data_dir, vocab):
|
||||||
return cls.from_package(get_package(via), vocab=vocab)
|
return cls.from_package(get_package(data_dir), vocab=vocab)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_package(cls, pkg, vocab):
|
def from_package(cls, pkg, vocab):
|
||||||
|
|
|
@ -7,11 +7,11 @@ import os
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def EN():
|
def EN():
|
||||||
if os.environ.get('SPACY_DATA'):
|
if os.environ.get('SPACY_DATA'):
|
||||||
data_path = os.environ.get('SPACY_DATA')
|
data_dir = os.environ.get('SPACY_DATA')
|
||||||
else:
|
else:
|
||||||
data_path = None
|
data_dir = None
|
||||||
print("Load EN from %s" % data_path)
|
print("Load EN from %s" % data_dir)
|
||||||
return English(data_dir=data_path)
|
return English(data_dir=data_dir)
|
||||||
|
|
||||||
|
|
||||||
def pytest_addoption(parser):
|
def pytest_addoption(parser):
|
||||||
|
|
|
@ -13,6 +13,7 @@ from spacy.tokenizer import Tokenizer
|
||||||
from os import path
|
from os import path
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
from spacy import util
|
||||||
from spacy.attrs import ORTH, SPACY, TAG, DEP, HEAD
|
from spacy.attrs import ORTH, SPACY, TAG, DEP, HEAD
|
||||||
from spacy.serialize.packer import Packer
|
from spacy.serialize.packer import Packer
|
||||||
|
|
||||||
|
@ -21,11 +22,13 @@ from spacy.serialize.bits import BitArray
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def vocab():
|
def vocab():
|
||||||
if os.environ.get('SPACY_DATA'):
|
data_dir = os.environ.get('SPACY_DATA')
|
||||||
data_path = os.environ.get('SPACY_DATA')
|
if data_dir is None:
|
||||||
|
package = util.get_package_by_name()
|
||||||
else:
|
else:
|
||||||
data_path = None
|
package = util.get_package(data_dir)
|
||||||
vocab = English.default_vocab(package=data_path)
|
|
||||||
|
vocab = English.default_vocab(package=package)
|
||||||
lex = vocab['dog']
|
lex = vocab['dog']
|
||||||
assert vocab[vocab.strings['dog']].orth_ == 'dog'
|
assert vocab[vocab.strings['dog']].orth_ == 'dog'
|
||||||
lex = vocab['the']
|
lex = vocab['the']
|
||||||
|
|
|
@ -5,23 +5,23 @@ import io
|
||||||
import pickle
|
import pickle
|
||||||
|
|
||||||
from spacy.lemmatizer import Lemmatizer, read_index, read_exc
|
from spacy.lemmatizer import Lemmatizer, read_index, read_exc
|
||||||
from spacy.util import get_package
|
from spacy import util
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def package():
|
def package():
|
||||||
if os.environ.get('SPACY_DATA'):
|
data_dir = os.environ.get('SPACY_DATA')
|
||||||
data_path = os.environ.get('SPACY_DATA')
|
if data_dir is None:
|
||||||
|
return util.get_package_by_name()
|
||||||
else:
|
else:
|
||||||
data_path = None
|
return util.get_package(data_dir)
|
||||||
return get_package(data_path=data_path)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def lemmatizer(package):
|
def lemmatizer(package):
|
||||||
return Lemmatizer.load(package)
|
return Lemmatizer.from_package(package)
|
||||||
|
|
||||||
|
|
||||||
def test_read_index(package):
|
def test_read_index(package):
|
||||||
|
|
|
@ -7,10 +7,10 @@ import os
|
||||||
def nlp():
|
def nlp():
|
||||||
from spacy.en import English
|
from spacy.en import English
|
||||||
if os.environ.get('SPACY_DATA'):
|
if os.environ.get('SPACY_DATA'):
|
||||||
data_path = os.environ.get('SPACY_DATA')
|
data_dir = os.environ.get('SPACY_DATA')
|
||||||
else:
|
else:
|
||||||
data_path = None
|
data_dir = None
|
||||||
return English(data_dir=data_path)
|
return English(data_dir=data_dir)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
|
|
|
@ -11,13 +11,13 @@ def token(doc):
|
||||||
|
|
||||||
def test_load_resources_and_process_text():
|
def test_load_resources_and_process_text():
|
||||||
if os.environ.get('SPACY_DATA'):
|
if os.environ.get('SPACY_DATA'):
|
||||||
data_path = os.environ.get('SPACY_DATA')
|
data_dir = os.environ.get('SPACY_DATA')
|
||||||
else:
|
else:
|
||||||
data_path = None
|
data_dir = None
|
||||||
print("Load EN from %s" % data_path)
|
print("Load EN from %s" % data_dir)
|
||||||
|
|
||||||
from spacy.en import English
|
from spacy.en import English
|
||||||
nlp = English(data_dir=data_path)
|
nlp = English(data_dir=data_dir)
|
||||||
doc = nlp('Hello, world. Here are two sentences.')
|
doc = nlp('Hello, world. Here are two sentences.')
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -42,8 +42,8 @@ cdef class Tokenizer:
|
||||||
return (self.__class__, args, None, None)
|
return (self.__class__, args, None, None)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def load(cls, via, Vocab vocab):
|
def load(cls, data_dir, Vocab vocab):
|
||||||
return cls.from_package(get_package(via), vocab=vocab)
|
return cls.from_package(get_package(data_dir), vocab=vocab)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_package(cls, package, Vocab vocab):
|
def from_package(cls, package, Vocab vocab):
|
||||||
|
|
|
@ -14,10 +14,10 @@ from . import about
|
||||||
from .attrs import TAG, HEAD, DEP, ENT_IOB, ENT_TYPE
|
from .attrs import TAG, HEAD, DEP, ENT_IOB, ENT_TYPE
|
||||||
|
|
||||||
|
|
||||||
def get_package():
|
def get_package(data_dir):
|
||||||
if not isinstance(via, six.string_types):
|
if not isinstance(data_dir, six.string_types):
|
||||||
raise RuntimeError('via must be a string')
|
raise RuntimeError('data_dir must be a string')
|
||||||
return DirPackage(via)
|
return DirPackage(data_dir)
|
||||||
|
|
||||||
|
|
||||||
def get_package_by_name(name=None, via=None):
|
def get_package_by_name(name=None, via=None):
|
||||||
|
|
|
@ -48,8 +48,8 @@ cdef class Vocab:
|
||||||
'''A map container for a language's LexemeC structs.
|
'''A map container for a language's LexemeC structs.
|
||||||
'''
|
'''
|
||||||
@classmethod
|
@classmethod
|
||||||
def load(cls, via, get_lex_attr=None):
|
def load(cls, data_dir, get_lex_attr=None):
|
||||||
return cls.from_package(get_package(via), get_lex_attr=get_lex_attr)
|
return cls.from_package(get_package(data_dir), get_lex_attr=get_lex_attr)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_package(cls, package, get_lex_attr=None):
|
def from_package(cls, package, get_lex_attr=None):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user