mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
Merge changes to __init__.py
This commit is contained in:
commit
6420f86f02
|
@ -1,4 +1,3 @@
|
||||||
recursive-include include *.h
|
recursive-include include *.h
|
||||||
include buildbot.json
|
|
||||||
include LICENSE
|
include LICENSE
|
||||||
include README.rst
|
include README.rst
|
||||||
|
|
|
@ -1,25 +0,0 @@
|
||||||
{
|
|
||||||
"build": {
|
|
||||||
"sdist": [
|
|
||||||
"pip install -r requirements.txt",
|
|
||||||
"pip install \"numpy<1.8\"",
|
|
||||||
"python setup.py sdist"
|
|
||||||
],
|
|
||||||
"install": [
|
|
||||||
"pip install -v source.tar.gz"
|
|
||||||
],
|
|
||||||
"wheel": [
|
|
||||||
"python untar.py source.tar.gz .",
|
|
||||||
"python setup.py bdist_wheel",
|
|
||||||
"python cpdist.py dist"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"test": {
|
|
||||||
"after": ["install", "wheel"],
|
|
||||||
"run": [
|
|
||||||
"python -m spacy.en.download --force"
|
|
||||||
],
|
|
||||||
"package": "spacy",
|
|
||||||
"args": "--tb=native -x --models --vectors --slow"
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals, print_function
|
||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from .util import set_lang_class, get_lang_class, parse_package_meta
|
from .util import set_lang_class, get_lang_class, parse_package_meta
|
||||||
|
from .deprecated import resolve_model_name
|
||||||
|
|
||||||
from . import en
|
from . import en
|
||||||
from . import de
|
from . import de
|
||||||
|
@ -35,13 +36,15 @@ set_lang_class(bn.Bengali.lang, bn.Bengali)
|
||||||
|
|
||||||
def load(name, **overrides):
|
def load(name, **overrides):
|
||||||
data_path = overrides.get('path', util.get_data_path())
|
data_path = overrides.get('path', util.get_data_path())
|
||||||
meta = parse_package_meta(data_path, name, require=False)
|
model_name = resolve_model_name(name)
|
||||||
|
meta = parse_package_meta(data_path, model_name, require=False)
|
||||||
lang = meta['lang'] if meta and 'lang' in meta else name
|
lang = meta['lang'] if meta and 'lang' in meta else name
|
||||||
cls = get_lang_class(lang)
|
cls = get_lang_class(lang)
|
||||||
overrides['meta'] = meta
|
overrides['meta'] = meta
|
||||||
model_path = Path(data_path) / name
|
model_path = Path(data_path / model_name)
|
||||||
if model_path.exists():
|
if model_path.exists():
|
||||||
overrides['path'] = model_path
|
overrides['path'] = model_path
|
||||||
|
|
||||||
return cls(**overrides)
|
return cls(**overrides)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -12,5 +12,5 @@ __license__ = 'MIT'
|
||||||
|
|
||||||
__docs__ = 'https://spacy.io/docs/usage'
|
__docs__ = 'https://spacy.io/docs/usage'
|
||||||
__download_url__ = 'https://github.com/explosion/spacy-models/releases/download'
|
__download_url__ = 'https://github.com/explosion/spacy-models/releases/download'
|
||||||
__compatibility__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json?token=ANAt54fi5zcUtnwGhMLw2klWwcAyHkZGks5Y0nw1wA%3D%3D'
|
__compatibility__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json'
|
||||||
__shortcuts__ = {'en': 'en_core_web_sm', 'de': 'de_core_web_md', 'vectors': 'en_vectors_glove_md'}
|
__shortcuts__ = {'en': 'en_core_web_sm', 'de': 'de_core_web_md', 'vectors': 'en_vectors_glove_md'}
|
||||||
|
|
|
@ -2,6 +2,7 @@ from pathlib import Path
|
||||||
from . import about
|
from . import about
|
||||||
from . import util
|
from . import util
|
||||||
from .download import download
|
from .download import download
|
||||||
|
from .link import link
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -86,6 +87,35 @@ def fix_glove_vectors_loading(overrides):
|
||||||
return overrides
|
return overrides
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_model_name(name):
|
||||||
|
"""If spaCy is loaded with 'de', check if symlink already exists. If
|
||||||
|
not, user have upgraded from older version and have old models installed.
|
||||||
|
Check if old model directory exists and if so, return that instead and create
|
||||||
|
shortcut link. If English model is found and no shortcut exists, raise error
|
||||||
|
and tell user to install new model.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if name == 'en' or name == 'de':
|
||||||
|
versions = ['1.0.0', '1.1.0']
|
||||||
|
data_path = Path(util.get_data_path())
|
||||||
|
model_path = data_path / name
|
||||||
|
v_model_paths = [data_path / Path(name + '-' + v) for v in versions]
|
||||||
|
|
||||||
|
if not model_path.exists(): # no shortcut found
|
||||||
|
for v_path in v_model_paths:
|
||||||
|
if v_path.exists(): # versioned model directory found
|
||||||
|
if name == 'de':
|
||||||
|
link(v_path, name)
|
||||||
|
return name
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
"Found English model at {p}. This model is not "
|
||||||
|
"compatible with the current version. See "
|
||||||
|
"https://spacy.io/docs/usage/models to download the "
|
||||||
|
"new model.".format(p=v_path))
|
||||||
|
return name
|
||||||
|
|
||||||
|
|
||||||
class ModelDownload():
|
class ModelDownload():
|
||||||
"""Replace download modules within en and de with deprecation warning and
|
"""Replace download modules within en and de with deprecation warning and
|
||||||
download default language model (using shortcut). Use classmethods to allow
|
download default language model (using shortcut). Use classmethods to allow
|
||||||
|
|
|
@ -28,6 +28,7 @@ def download(model=None, direct=False):
|
||||||
download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version))
|
download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version))
|
||||||
link(model_name, model, force=True)
|
link(model_name, model, force=True)
|
||||||
|
|
||||||
|
|
||||||
def get_compatibility():
|
def get_compatibility():
|
||||||
version = about.__version__
|
version = about.__version__
|
||||||
r = requests.get(about.__compatibility__)
|
r = requests.get(about.__compatibility__)
|
||||||
|
|
|
@ -5,6 +5,7 @@ from ..download import download, get_compatibility, get_version, check_error_dep
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
def test_download_fetch_compatibility():
|
def test_download_fetch_compatibility():
|
||||||
compatibility = get_compatibility()
|
compatibility = get_compatibility()
|
||||||
assert type(compatibility) == dict
|
assert type(compatibility) == dict
|
||||||
|
|
|
@ -12,10 +12,10 @@
|
||||||
"COMPANY_URL": "https://explosion.ai",
|
"COMPANY_URL": "https://explosion.ai",
|
||||||
"DEMOS_URL": "https://demos.explosion.ai",
|
"DEMOS_URL": "https://demos.explosion.ai",
|
||||||
|
|
||||||
"SPACY_VERSION": "1.6",
|
"SPACY_VERSION": "1.7",
|
||||||
"LATEST_NEWS": {
|
"LATEST_NEWS": {
|
||||||
"url": "https://explosion.ai/blog/deep-learning-formula-nlp",
|
"url": "/docs/usage/models",
|
||||||
"title": "The new deep learning formula for state-of-the-art NLP models"
|
"title": "Downloading and installing models as packages"
|
||||||
},
|
},
|
||||||
|
|
||||||
"SOCIAL": {
|
"SOCIAL": {
|
||||||
|
@ -54,8 +54,8 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
"V_CSS": "1.15",
|
"V_CSS": "1.2",
|
||||||
"V_JS": "1.1",
|
"V_JS": "1.2",
|
||||||
"DEFAULT_SYNTAX": "python",
|
"DEFAULT_SYNTAX": "python",
|
||||||
"ANALYTICS": "UA-58931649-1",
|
"ANALYTICS": "UA-58931649-1",
|
||||||
"MAILCHIMP": {
|
"MAILCHIMP": {
|
||||||
|
|
|
@ -67,6 +67,17 @@ mixin aside-code(label, language)
|
||||||
block
|
block
|
||||||
|
|
||||||
|
|
||||||
|
//- Infobox
|
||||||
|
label - [string] infobox title (optional or false for no title)
|
||||||
|
|
||||||
|
mixin infobox(label)
|
||||||
|
aside.o-box.o-block.u-text-small
|
||||||
|
if label
|
||||||
|
h3.u-text-label.u-color-theme=label
|
||||||
|
|
||||||
|
block
|
||||||
|
|
||||||
|
|
||||||
//- Link button
|
//- Link button
|
||||||
url - [string] link href
|
url - [string] link href
|
||||||
trusted - [boolean] if not set / false, rel="noopener nofollow" is added
|
trusted - [boolean] if not set / false, rel="noopener nofollow" is added
|
||||||
|
|
|
@ -22,7 +22,10 @@ p
|
||||||
|
|
||||||
//- Links
|
//- Links
|
||||||
|
|
||||||
main p a, main table a, main > *:not(footer) li a, .c-aside a
|
main p a,
|
||||||
|
main table a,
|
||||||
|
main > *:not(footer) li a,
|
||||||
|
main aside a
|
||||||
@extend .u-link
|
@extend .u-link
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -62,6 +62,15 @@
|
||||||
border: 1px solid $color-subtle
|
border: 1px solid $color-subtle
|
||||||
padding: 3rem 2.5%
|
padding: 3rem 2.5%
|
||||||
|
|
||||||
|
|
||||||
|
//- Box
|
||||||
|
|
||||||
|
.o-box
|
||||||
|
background: $color-theme-light
|
||||||
|
padding: 2rem
|
||||||
|
border: 1px solid darken($color-theme-light, 5)
|
||||||
|
|
||||||
|
|
||||||
//- Icons
|
//- Icons
|
||||||
|
|
||||||
.o-icon
|
.o-icon
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
display: inline-block
|
display: inline-block
|
||||||
font-weight: bold
|
font-weight: bold
|
||||||
padding: 0.75em 1em
|
padding: 0.75em 1em
|
||||||
|
margin-bottom: 1px
|
||||||
border: 2px solid
|
border: 2px solid
|
||||||
border-radius: 2px
|
border-radius: 2px
|
||||||
text-align: center
|
text-align: center
|
||||||
|
|
|
@ -34,6 +34,7 @@ $color-dark: lighten($color-front, 20) !default
|
||||||
|
|
||||||
$color-theme: map-get($colors, $theme)
|
$color-theme: map-get($colors, $theme)
|
||||||
$color-theme-dark: darken(map-get($colors, $theme), 5)
|
$color-theme-dark: darken(map-get($colors, $theme), 5)
|
||||||
|
$color-theme-light: saturate(lighten(map-get($colors, $theme), 35), 15)
|
||||||
|
|
||||||
$color-subtle: #ddd !default
|
$color-subtle: #ddd !default
|
||||||
$color-subtle-light: #f6f6f6 !default
|
$color-subtle-light: #f6f6f6 !default
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
"sidebar": {
|
"sidebar": {
|
||||||
"Get started": {
|
"Get started": {
|
||||||
"Installation": "./",
|
"Installation": "./",
|
||||||
|
"Models": "models",
|
||||||
"Lightning tour": "lightning-tour",
|
"Lightning tour": "lightning-tour",
|
||||||
"Resources": "resources"
|
"Resources": "resources"
|
||||||
},
|
},
|
||||||
|
@ -28,6 +29,11 @@
|
||||||
|
|
||||||
"index": {
|
"index": {
|
||||||
"title": "Install spaCy",
|
"title": "Install spaCy",
|
||||||
|
"next": "models"
|
||||||
|
},
|
||||||
|
|
||||||
|
"models": {
|
||||||
|
"title": "Models",
|
||||||
"next": "lightning-tour"
|
"next": "lightning-tour"
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,16 @@ p
|
||||||
| #[a(href="#source-ubuntu") Ubuntu], #[a(href="#source-osx") macOS/OS X]
|
| #[a(href="#source-ubuntu") Ubuntu], #[a(href="#source-osx") macOS/OS X]
|
||||||
| and #[a(href="#source-windows") Windows] for details.
|
| and #[a(href="#source-windows") Windows] for details.
|
||||||
|
|
||||||
|
+aside("Download models")
|
||||||
|
| After installation you need to download a language model. For more info
|
||||||
|
| and available models, see the #[+a("/docs/usage/models") docs on models].
|
||||||
|
|
||||||
|
+code.o-no-block.
|
||||||
|
python -m spacy.download en
|
||||||
|
|
||||||
|
>>> import spacy
|
||||||
|
>>> nlp = spacy.load('en')
|
||||||
|
|
||||||
+h(2, "pip") pip
|
+h(2, "pip") pip
|
||||||
|
|
||||||
p Using pip, spaCy releases are currently only available as source packages.
|
p Using pip, spaCy releases are currently only available as source packages.
|
||||||
|
@ -43,64 +53,6 @@ p
|
||||||
| #[+a("https://github.com/conda-forge/spacy-feedstock") this repository].
|
| #[+a("https://github.com/conda-forge/spacy-feedstock") this repository].
|
||||||
| Improvements and pull requests to the recipe and setup are always appreciated.
|
| Improvements and pull requests to the recipe and setup are always appreciated.
|
||||||
|
|
||||||
+h(2, "models") Download models
|
|
||||||
|
|
||||||
p
|
|
||||||
| After installation you need to download a language model. Models for
|
|
||||||
| English (#[code en]) and German (#[code de]) are available.
|
|
||||||
|
|
||||||
+code(false, "bash").
|
|
||||||
python -m spacy.en.download all
|
|
||||||
python -m spacy.de.download all
|
|
||||||
|
|
||||||
+aside-code("Examples", "bash").
|
|
||||||
# Install English tagger, parser and NER
|
|
||||||
python -m spacy.en.download parser
|
|
||||||
|
|
||||||
# Install English GloVe vectors
|
|
||||||
python -m spacy.en.download glove
|
|
||||||
|
|
||||||
# Upgrade/overwrite existing data
|
|
||||||
python -m spacy.en.download --force
|
|
||||||
|
|
||||||
# Check whether the model was successfully installed
|
|
||||||
python -c "import spacy; spacy.load('en'); print('OK')"
|
|
||||||
|
|
||||||
p
|
|
||||||
| The download command fetches about 1 GB of data which it
|
|
||||||
| installs within the #[code spacy] package directory.
|
|
||||||
|
|
||||||
+h(3, "custom-location") Download model to custom location
|
|
||||||
|
|
||||||
p
|
|
||||||
| You can specify where #[code spacy.en.download] and
|
|
||||||
| #[code spacy.de.download] download the language model to using the
|
|
||||||
| #[code --data-path] or #[code -d] argument:
|
|
||||||
|
|
||||||
+code(false, "bash").
|
|
||||||
python -m spacy.en.download all --data-path /some/dir
|
|
||||||
|
|
||||||
p
|
|
||||||
| If you choose to download to a custom location, you will need to tell
|
|
||||||
| spaCy where to load the model from in order to use it. You can do this
|
|
||||||
| either by calling #[code spacy.util.set_data_path()] before calling
|
|
||||||
| #[code spacy.load()], or by passing a #[code path] argument to the
|
|
||||||
| #[code spacy.en.English] or #[code spacy.de.German] constructors.
|
|
||||||
|
|
||||||
+h(3, "models-manual") Download models manually
|
|
||||||
|
|
||||||
p
|
|
||||||
| As of v1.6, the models and word vectors are also available as direct
|
|
||||||
| downloads from GitHub, attached to the #[+a(gh("spaCy") + "/releases") releases] as #[code .tar.gz] archives.
|
|
||||||
|
|
||||||
p
|
|
||||||
| To install the models manually, first find the default data path. You can
|
|
||||||
| use #[code spacy.util.get_data_path()] to find the directory where spaCy
|
|
||||||
| will look for its models, or change the default data path with
|
|
||||||
| #[code spacy.util.set_data_path()]. Then simply unpack the archive and
|
|
||||||
| place the contained folder in that directory. You can now load the models
|
|
||||||
| via #[code spacy.load()].
|
|
||||||
|
|
||||||
+h(2, "source") Compile from source
|
+h(2, "source") Compile from source
|
||||||
|
|
||||||
p
|
p
|
||||||
|
|
|
@ -6,6 +6,15 @@ p
|
||||||
| The following examples and code snippets give you an overview of spaCy's
|
| The following examples and code snippets give you an overview of spaCy's
|
||||||
| functionality and its usage.
|
| functionality and its usage.
|
||||||
|
|
||||||
|
+h(2, "models") Install and load models
|
||||||
|
|
||||||
|
+code(false, "bash").
|
||||||
|
python -m spacy.download en
|
||||||
|
|
||||||
|
+code.
|
||||||
|
import spacy
|
||||||
|
nlp = spacy.load('en')
|
||||||
|
|
||||||
+h(2, "examples-resources") Load resources and process text
|
+h(2, "examples-resources") Load resources and process text
|
||||||
|
|
||||||
+code.
|
+code.
|
||||||
|
|
285
website/docs/usage/models.jade
Normal file
285
website/docs/usage/models.jade
Normal file
|
@ -0,0 +1,285 @@
|
||||||
|
//- 💫 DOCS > USAGE > MODELS
|
||||||
|
|
||||||
|
include ../../_includes/_mixins
|
||||||
|
|
||||||
|
p
|
||||||
|
| As of v1.7.0, models for spaCy can be installed as #[strong Python packages].
|
||||||
|
| This means that they're a component of your application, just like any
|
||||||
|
| other module. They're versioned and can be defined as a dependency in your
|
||||||
|
| #[code requirements.txt]. Models can be installed from a download URL or
|
||||||
|
| a local directory, manually or via #[+a("https://pypi.python.org/pypi/pip") pip].
|
||||||
|
| Their data can be located anywhere on your file system. To make a model
|
||||||
|
| available to spaCy, all you need to do is create a "shortcut link", an
|
||||||
|
| internal alias that tells spaCy where to find the data files for a specific
|
||||||
|
| model name.
|
||||||
|
|
||||||
|
+infobox("Important note")
|
||||||
|
| Due to improvements in the English lemmatizer in v1.7.0, you need to download the
|
||||||
|
| new English model. The German model is still compatible and will be
|
||||||
|
| recognised and linked automatically.
|
||||||
|
|
||||||
|
+aside-code("Quickstart").
|
||||||
|
# Install spaCy and download English model
|
||||||
|
pip install spacy
|
||||||
|
python -m spacy.download en
|
||||||
|
|
||||||
|
# Usage in Python
|
||||||
|
import spacy
|
||||||
|
nlp = spacy.load('en')
|
||||||
|
doc = nlp(u'This is a sentence.')
|
||||||
|
|
||||||
|
+h(2, "available") Available models
|
||||||
|
|
||||||
|
+table(["Name", "Size", "Description"])
|
||||||
|
+row
|
||||||
|
+cell #[code en_core_web_sm]
|
||||||
|
+cell 50 MB
|
||||||
|
+cell Vocab, syntax, entities, word vectors #[+tag default]
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code en_core_web_md]
|
||||||
|
+cell 1 GB
|
||||||
|
+cell Vocab, syntax, entities, word vectors
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code en_depent_web_md]
|
||||||
|
+cell 328 MB
|
||||||
|
+cell Vocab, syntax, entities
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code en_vectors_glove_md]
|
||||||
|
+cell 727 MB
|
||||||
|
+cell
|
||||||
|
| #[+a("http://nlp.stanford.edu/projects/glove/") GloVe] Common
|
||||||
|
| Crawl vectors
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code de_core_news_md]
|
||||||
|
+cell 645 MB
|
||||||
|
+cell Vocab, syntax, entities, word vectors #[+tag default]
|
||||||
|
|
||||||
|
p
|
||||||
|
| Models are now available as #[code .tar.gz] archives #[+a(gh("spacy-models")) from GitHub],
|
||||||
|
| attached to individual releases. They can be downloaded and loaded manually,
|
||||||
|
| or using spaCy's #[code download] and #[code link] commands. All models
|
||||||
|
| follow the naming convention of #[code [language]_[type]_[genre]_[size]].
|
||||||
|
|
||||||
|
+button(gh("spacy-models") + "/releases", true, "primary") View models
|
||||||
|
|
||||||
|
+h(2, "download") Downloading models
|
||||||
|
|
||||||
|
+aside("Downloading models in spaCy < v1.7")
|
||||||
|
| In older versions of spaCy, you can still use the old download commands.
|
||||||
|
| This will download and install the models into the #[code spacy/data]
|
||||||
|
| directory.
|
||||||
|
|
||||||
|
+code.o-no-block.
|
||||||
|
python -m spacy.en.download all
|
||||||
|
python -m spacy.de.download all
|
||||||
|
python -m spacy.en.download glove
|
||||||
|
|
||||||
|
| The old models are also #[+a(gh("spacy") + "/tree/v1.6.0") attached to the v1.6.0 release].
|
||||||
|
| To download and install them manually, unpack the archive, drop the
|
||||||
|
| contained directory into #[code spacy/data] and load the model via
|
||||||
|
| #[code spacy.load('en')] or #[code spacy.load('de')].
|
||||||
|
|
||||||
|
p
|
||||||
|
| The easiest way to download a model is via spaCy's #[code download]
|
||||||
|
| command. It takes care of finding the best-matching model compatible with
|
||||||
|
| your spaCy installation.
|
||||||
|
|
||||||
|
+code(false, "bash").
|
||||||
|
# out-of-the-box: download best-matching default model
|
||||||
|
python -m spacy.download en
|
||||||
|
python -m spacy.download de
|
||||||
|
|
||||||
|
# download best-matching version of specific model for your spaCy installation
|
||||||
|
python -m spacy.download en_core_web_md
|
||||||
|
|
||||||
|
# download exact model version (doesn't create shortcut link)
|
||||||
|
python -m spacy.download en_core_web_md-1.2.0 --direct
|
||||||
|
|
||||||
|
p
|
||||||
|
| The download command will #[+a("#download-pip") install the model] via
|
||||||
|
| pip, place the package in your #[code site-packages] directory and create
|
||||||
|
| a #[+a("#usage") shortcut link] that lets you load the model by name. The
|
||||||
|
| shortcut link will be the same as the model name used in
|
||||||
|
| #[code spacy.download].
|
||||||
|
|
||||||
|
+code(false, "bash").
|
||||||
|
pip install spacy
|
||||||
|
python -m spacy.download en
|
||||||
|
|
||||||
|
+code.
|
||||||
|
import spacy
|
||||||
|
nlp = spacy.load('en')
|
||||||
|
doc = nlp(u'This is a sentence.')
|
||||||
|
|
||||||
|
+h(3, "download-pip") Installation via pip
|
||||||
|
|
||||||
|
p
|
||||||
|
| To download a model directly using #[+a("https://pypi.python.org/pypi/pip") pip],
|
||||||
|
| simply point #[code pip install] to the URL or local path of the archive
|
||||||
|
| file. To find the direct link to a model, head over to the
|
||||||
|
| #[+a(gh("spacy-models") + "/releases") model releases], right click on the archive
|
||||||
|
| link and copy it to your clipboard.
|
||||||
|
|
||||||
|
+code(false, "bash").
|
||||||
|
# with external URL
|
||||||
|
pip install #{gh("spacy-models")}/releases/download/en_core_web_md-1.2.0/en_core_web_md-1.2.0.tar.gz
|
||||||
|
|
||||||
|
# with local file
|
||||||
|
pip install /Users/you/en_core_web_md-1.2.0.tar.gz
|
||||||
|
|
||||||
|
p
|
||||||
|
| By default, this will install the model into your #[code site-packages]
|
||||||
|
| directory. You can then create a #[+a("#usage") shortcut link] for your
|
||||||
|
| model to load it via #[code spacy.load()], or #[+a("usage-import") import it]
|
||||||
|
| as a Python module.
|
||||||
|
|
||||||
|
+h(3, "download-manual") Manual download and installation
|
||||||
|
|
||||||
|
p
|
||||||
|
| In some cases, you might prefer downloading the data manually, for
|
||||||
|
| example to place it into a custom directory. You can download the model
|
||||||
|
| via your browser from the #[+a(gh("spacy-models")) latest releases], or configure
|
||||||
|
| your own download script using the URL of the archive file. The archive
|
||||||
|
| consists of a model directory that contains another directory with the
|
||||||
|
| model data.
|
||||||
|
|
||||||
|
+code("Directory structure", "yaml").
|
||||||
|
└── en_core_web_md-1.2.0.tar.gz # downloaded archive
|
||||||
|
├── meta.json # model meta data
|
||||||
|
├── setup.py # setup file for pip installation
|
||||||
|
└── en_core_web_md # model directory
|
||||||
|
├── __init__.py # init for pip installation
|
||||||
|
├── meta.json # model meta data
|
||||||
|
└── en_core_web_md-1.2.0 # model data
|
||||||
|
|
||||||
|
p
|
||||||
|
| You can place the model data directory anywhere on your local file system.
|
||||||
|
| To use it with spaCy, simply assign it a name by creating a
|
||||||
|
| #[+a("#usage") shortcut link] for the data directory.
|
||||||
|
|
||||||
|
+h(2, "usage") Using models with spaCy
|
||||||
|
|
||||||
|
p
|
||||||
|
| While previous versions of spaCy required you to maintain a data directory
|
||||||
|
| containing the models for each installation, you can now choose how and
|
||||||
|
| where you want to keep your data files. To load the models conveniently
|
||||||
|
| from within spaCy, you can use the #[code spacy.link] command to create a
|
||||||
|
| symlink. This lets you set up custom shortcut links for models so you can
|
||||||
|
| load them by name.
|
||||||
|
|
||||||
|
+code(false, "bash").
|
||||||
|
python -m spacy.link [package name or path] [shortcut] [--force]
|
||||||
|
|
||||||
|
p
|
||||||
|
| The first argument is the package name (if the model was installed via
|
||||||
|
| pip), or a local path to the the data directory. The second argument is
|
||||||
|
| the internal name you want to use for the model. Setting the #[code --force]
|
||||||
|
| flag will overwrite any existing links.
|
||||||
|
|
||||||
|
+code("Examples", "bash").
|
||||||
|
# set up shortcut link to load installed package as "en_default"
|
||||||
|
python -m spacy.link en_core_web_md en_default
|
||||||
|
|
||||||
|
# set up shortcut link to load local model as "my_amazing_model"
|
||||||
|
python -m spacy.link /Users/you/model my_amazing_model
|
||||||
|
|
||||||
|
+h(3, "usage-loading") Loading models
|
||||||
|
|
||||||
|
p
|
||||||
|
| To load a model, use #[code spacy.load()] with the model's shortcut link.
|
||||||
|
|
||||||
|
+code.
|
||||||
|
import spacy
|
||||||
|
nlp = spacy.load('en_default')
|
||||||
|
doc = nlp(u'This is a sentence.')
|
||||||
|
|
||||||
|
p
|
||||||
|
| You can also use the #[code info()] method to print a model's meta data
|
||||||
|
| before loading it. Each #[code Language] object returned by #[code spacy.load()]
|
||||||
|
| also exposes the model's meta data as the attribute #[code meta]:
|
||||||
|
|
||||||
|
+code.
|
||||||
|
import spacy
|
||||||
|
spacy.info('en_default')
|
||||||
|
# JSON-formatted model meta data
|
||||||
|
|
||||||
|
nlp = spacy.load('en_default')
|
||||||
|
print(nlp.meta['version'])
|
||||||
|
# 1.2.0
|
||||||
|
|
||||||
|
+h(3, "usage-import") Importing models as modules
|
||||||
|
|
||||||
|
p
|
||||||
|
| If you've installed a model via pip, you can also #[code import] it
|
||||||
|
| directly and then call its #[code load()] method with no arguments:
|
||||||
|
|
||||||
|
+code.
|
||||||
|
import spacy
|
||||||
|
import en_core_web_md
|
||||||
|
|
||||||
|
nlp = en_core_web_md.load()
|
||||||
|
doc = nlp(u'This is a sentence.')
|
||||||
|
|
||||||
|
+h(2, "own-models") Using your own models
|
||||||
|
|
||||||
|
p
|
||||||
|
| If you've trained your own model, for example for
|
||||||
|
| #[+a("/docs/usage/adding-languages") additional languages], you can
|
||||||
|
| create a shortuct link for it by pointing #[code spacy.link] to the
|
||||||
|
| model's data directory. To allow your model to be downloaded and
|
||||||
|
| installed via pip, you'll also need to generate a package for it.
|
||||||
|
|
||||||
|
+infobox("Important note")
|
||||||
|
| The model packages are #[strong not suitable] for the public
|
||||||
|
| #[+a("https://pypi.python.org") pypi.python.org] directory, which is not
|
||||||
|
| designed for binary data and files over 50 MB. However, if your company
|
||||||
|
| is running an internal installation of pypi, publishing your models on
|
||||||
|
| there can be a convenient solution to share them with your team.
|
||||||
|
|
||||||
|
p The model directory should look like this:
|
||||||
|
|
||||||
|
+code("Directory structure", "yaml").
|
||||||
|
└── /
|
||||||
|
├── MANIFEST.in # to include meta.json
|
||||||
|
├── meta.json # model meta data
|
||||||
|
├── setup.py # setup file for pip installation
|
||||||
|
└── en_core_web_md # model directory
|
||||||
|
├── __init__.py # init for pip installation
|
||||||
|
└── en_core_web_md-1.2.0 # model data
|
||||||
|
|
||||||
|
p
|
||||||
|
| You can find templates for all files in our
|
||||||
|
| #[+a(gh("spacy-dev-resouces", "templates/model")) spaCy dev resources].
|
||||||
|
| Unless you want to customise installation and loading, the only file
|
||||||
|
| you'll need to modify is #[code meta.json], which includes the model's
|
||||||
|
| meta data. It will later be copied into the package and data directory.
|
||||||
|
|
||||||
|
+code("meta.json", "json").
|
||||||
|
{
|
||||||
|
"name": "core_web_md",
|
||||||
|
"lang": "en",
|
||||||
|
"version": "1.2.0",
|
||||||
|
"spacy_version": "1.7.0",
|
||||||
|
"description": "English model for spaCy",
|
||||||
|
"author": "Explosion AI",
|
||||||
|
"email": "contact@explosion.ai",
|
||||||
|
"license": "MIT"
|
||||||
|
}
|
||||||
|
|
||||||
|
p
|
||||||
|
| Keep in mind that the directories need to be named according to the
|
||||||
|
| naming conventions. The #[code lang] setting is also used to create the
|
||||||
|
| respective #[code Language] class in spaCy, which will later be returned
|
||||||
|
| by the model's #[code load()] method.
|
||||||
|
|
||||||
|
p
|
||||||
|
| To generate the package, run the following command from within the
|
||||||
|
| directory. This will create a #[code .tar.gz] archive in a directory
|
||||||
|
| #[code /dist].
|
||||||
|
|
||||||
|
+code(false, "bash").
|
||||||
|
python setup.py sdist
|
|
@ -7,6 +7,13 @@ p Many of the associated tools and resources that we're developing alongside spa
|
||||||
+h(2, "developer") Developer tools
|
+h(2, "developer") Developer tools
|
||||||
|
|
||||||
+table(["Name", "Description"])
|
+table(["Name", "Description"])
|
||||||
|
+row
|
||||||
|
+cell
|
||||||
|
+src(gh("spacy-models")) spaCy Models
|
||||||
|
|
||||||
|
+cell
|
||||||
|
| Model releases for spaCy.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell
|
+cell
|
||||||
+src(gh("spacy-dev-resources")) spaCy Dev Resources
|
+src(gh("spacy-dev-resources")) spaCy Dev Resources
|
||||||
|
@ -55,7 +62,7 @@ p Many of the associated tools and resources that we're developing alongside spa
|
||||||
+src(gh("thinc")) Thinc
|
+src(gh("thinc")) Thinc
|
||||||
|
|
||||||
+cell
|
+cell
|
||||||
| Super sparse multi-class machine learning with Cython.
|
| spaCy's Machine Learning library for NLP in Python.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell
|
+cell
|
||||||
|
|
|
@ -66,7 +66,7 @@ include _includes/_mixins
|
||||||
+grid
|
+grid
|
||||||
+grid-col("two-thirds")
|
+grid-col("two-thirds")
|
||||||
+terminal("lightning_tour.py").
|
+terminal("lightning_tour.py").
|
||||||
# Install: pip install spacy && python -m spacy.en.download
|
# Install: pip install spacy && python -m spacy.download en
|
||||||
import spacy
|
import spacy
|
||||||
|
|
||||||
# Load English tokenizer, tagger, parser, NER and word vectors
|
# Load English tokenizer, tagger, parser, NER and word vectors
|
||||||
|
|
Loading…
Reference in New Issue
Block a user