Merge changes to __init__.py

This commit is contained in:
Matthew Honnibal 2017-03-17 19:51:45 +01:00
commit 6420f86f02
19 changed files with 388 additions and 95 deletions

View File

@ -1,4 +1,3 @@
recursive-include include *.h recursive-include include *.h
include buildbot.json
include LICENSE include LICENSE
include README.rst include README.rst

View File

@ -1,25 +0,0 @@
{
"build": {
"sdist": [
"pip install -r requirements.txt",
"pip install \"numpy<1.8\"",
"python setup.py sdist"
],
"install": [
"pip install -v source.tar.gz"
],
"wheel": [
"python untar.py source.tar.gz .",
"python setup.py bdist_wheel",
"python cpdist.py dist"
]
},
"test": {
"after": ["install", "wheel"],
"run": [
"python -m spacy.en.download --force"
],
"package": "spacy",
"args": "--tb=native -x --models --vectors --slow"
}
}

View File

@ -4,6 +4,7 @@ from __future__ import unicode_literals, print_function
import json import json
from pathlib import Path from pathlib import Path
from .util import set_lang_class, get_lang_class, parse_package_meta from .util import set_lang_class, get_lang_class, parse_package_meta
from .deprecated import resolve_model_name
from . import en from . import en
from . import de from . import de
@ -35,13 +36,15 @@ set_lang_class(bn.Bengali.lang, bn.Bengali)
def load(name, **overrides): def load(name, **overrides):
data_path = overrides.get('path', util.get_data_path()) data_path = overrides.get('path', util.get_data_path())
meta = parse_package_meta(data_path, name, require=False) model_name = resolve_model_name(name)
meta = parse_package_meta(data_path, model_name, require=False)
lang = meta['lang'] if meta and 'lang' in meta else name lang = meta['lang'] if meta and 'lang' in meta else name
cls = get_lang_class(lang) cls = get_lang_class(lang)
overrides['meta'] = meta overrides['meta'] = meta
model_path = Path(data_path) / name model_path = Path(data_path / model_name)
if model_path.exists(): if model_path.exists():
overrides['path'] = model_path overrides['path'] = model_path
return cls(**overrides) return cls(**overrides)

View File

@ -12,5 +12,5 @@ __license__ = 'MIT'
__docs__ = 'https://spacy.io/docs/usage' __docs__ = 'https://spacy.io/docs/usage'
__download_url__ = 'https://github.com/explosion/spacy-models/releases/download' __download_url__ = 'https://github.com/explosion/spacy-models/releases/download'
__compatibility__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json?token=ANAt54fi5zcUtnwGhMLw2klWwcAyHkZGks5Y0nw1wA%3D%3D' __compatibility__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json'
__shortcuts__ = {'en': 'en_core_web_sm', 'de': 'de_core_web_md', 'vectors': 'en_vectors_glove_md'} __shortcuts__ = {'en': 'en_core_web_sm', 'de': 'de_core_web_md', 'vectors': 'en_vectors_glove_md'}

View File

@ -2,6 +2,7 @@ from pathlib import Path
from . import about from . import about
from . import util from . import util
from .download import download from .download import download
from .link import link
try: try:
@ -86,6 +87,35 @@ def fix_glove_vectors_loading(overrides):
return overrides return overrides
def resolve_model_name(name):
"""If spaCy is loaded with 'de', check if symlink already exists. If
not, user have upgraded from older version and have old models installed.
Check if old model directory exists and if so, return that instead and create
shortcut link. If English model is found and no shortcut exists, raise error
and tell user to install new model.
"""
if name == 'en' or name == 'de':
versions = ['1.0.0', '1.1.0']
data_path = Path(util.get_data_path())
model_path = data_path / name
v_model_paths = [data_path / Path(name + '-' + v) for v in versions]
if not model_path.exists(): # no shortcut found
for v_path in v_model_paths:
if v_path.exists(): # versioned model directory found
if name == 'de':
link(v_path, name)
return name
else:
raise ValueError(
"Found English model at {p}. This model is not "
"compatible with the current version. See "
"https://spacy.io/docs/usage/models to download the "
"new model.".format(p=v_path))
return name
class ModelDownload(): class ModelDownload():
"""Replace download modules within en and de with deprecation warning and """Replace download modules within en and de with deprecation warning and
download default language model (using shortcut). Use classmethods to allow download default language model (using shortcut). Use classmethods to allow

View File

@ -28,6 +28,7 @@ def download(model=None, direct=False):
download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version)) download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version))
link(model_name, model, force=True) link(model_name, model, force=True)
def get_compatibility(): def get_compatibility():
version = about.__version__ version = about.__version__
r = requests.get(about.__compatibility__) r = requests.get(about.__compatibility__)

View File

@ -5,6 +5,7 @@ from ..download import download, get_compatibility, get_version, check_error_dep
import pytest import pytest
@pytest.mark.slow
def test_download_fetch_compatibility(): def test_download_fetch_compatibility():
compatibility = get_compatibility() compatibility = get_compatibility()
assert type(compatibility) == dict assert type(compatibility) == dict

View File

@ -12,10 +12,10 @@
"COMPANY_URL": "https://explosion.ai", "COMPANY_URL": "https://explosion.ai",
"DEMOS_URL": "https://demos.explosion.ai", "DEMOS_URL": "https://demos.explosion.ai",
"SPACY_VERSION": "1.6", "SPACY_VERSION": "1.7",
"LATEST_NEWS": { "LATEST_NEWS": {
"url": "https://explosion.ai/blog/deep-learning-formula-nlp", "url": "/docs/usage/models",
"title": "The new deep learning formula for state-of-the-art NLP models" "title": "Downloading and installing models as packages"
}, },
"SOCIAL": { "SOCIAL": {
@ -54,8 +54,8 @@
} }
}, },
"V_CSS": "1.15", "V_CSS": "1.2",
"V_JS": "1.1", "V_JS": "1.2",
"DEFAULT_SYNTAX": "python", "DEFAULT_SYNTAX": "python",
"ANALYTICS": "UA-58931649-1", "ANALYTICS": "UA-58931649-1",
"MAILCHIMP": { "MAILCHIMP": {

View File

@ -67,6 +67,17 @@ mixin aside-code(label, language)
block block
//- Infobox
label - [string] infobox title (optional or false for no title)
mixin infobox(label)
aside.o-box.o-block.u-text-small
if label
h3.u-text-label.u-color-theme=label
block
//- Link button //- Link button
url - [string] link href url - [string] link href
trusted - [boolean] if not set / false, rel="noopener nofollow" is added trusted - [boolean] if not set / false, rel="noopener nofollow" is added

View File

@ -22,7 +22,10 @@ p
//- Links //- Links
main p a, main table a, main > *:not(footer) li a, .c-aside a main p a,
main table a,
main > *:not(footer) li a,
main aside a
@extend .u-link @extend .u-link

View File

@ -62,6 +62,15 @@
border: 1px solid $color-subtle border: 1px solid $color-subtle
padding: 3rem 2.5% padding: 3rem 2.5%
//- Box
.o-box
background: $color-theme-light
padding: 2rem
border: 1px solid darken($color-theme-light, 5)
//- Icons //- Icons
.o-icon .o-icon

View File

@ -4,6 +4,7 @@
display: inline-block display: inline-block
font-weight: bold font-weight: bold
padding: 0.75em 1em padding: 0.75em 1em
margin-bottom: 1px
border: 2px solid border: 2px solid
border-radius: 2px border-radius: 2px
text-align: center text-align: center

View File

@ -34,6 +34,7 @@ $color-dark: lighten($color-front, 20) !default
$color-theme: map-get($colors, $theme) $color-theme: map-get($colors, $theme)
$color-theme-dark: darken(map-get($colors, $theme), 5) $color-theme-dark: darken(map-get($colors, $theme), 5)
$color-theme-light: saturate(lighten(map-get($colors, $theme), 35), 15)
$color-subtle: #ddd !default $color-subtle: #ddd !default
$color-subtle-light: #f6f6f6 !default $color-subtle-light: #f6f6f6 !default

View File

@ -2,6 +2,7 @@
"sidebar": { "sidebar": {
"Get started": { "Get started": {
"Installation": "./", "Installation": "./",
"Models": "models",
"Lightning tour": "lightning-tour", "Lightning tour": "lightning-tour",
"Resources": "resources" "Resources": "resources"
}, },
@ -28,6 +29,11 @@
"index": { "index": {
"title": "Install spaCy", "title": "Install spaCy",
"next": "models"
},
"models": {
"title": "Models",
"next": "lightning-tour" "next": "lightning-tour"
}, },

View File

@ -12,6 +12,16 @@ p
| #[a(href="#source-ubuntu") Ubuntu], #[a(href="#source-osx") macOS/OS X] | #[a(href="#source-ubuntu") Ubuntu], #[a(href="#source-osx") macOS/OS X]
| and #[a(href="#source-windows") Windows] for details. | and #[a(href="#source-windows") Windows] for details.
+aside("Download models")
| After installation you need to download a language model. For more info
| and available models, see the #[+a("/docs/usage/models") docs on models].
+code.o-no-block.
python -m spacy.download en
&gt;&gt;&gt; import spacy
&gt;&gt;&gt; nlp = spacy.load('en')
+h(2, "pip") pip +h(2, "pip") pip
p Using pip, spaCy releases are currently only available as source packages. p Using pip, spaCy releases are currently only available as source packages.
@ -43,64 +53,6 @@ p
| #[+a("https://github.com/conda-forge/spacy-feedstock") this repository]. | #[+a("https://github.com/conda-forge/spacy-feedstock") this repository].
| Improvements and pull requests to the recipe and setup are always appreciated. | Improvements and pull requests to the recipe and setup are always appreciated.
+h(2, "models") Download models
p
| After installation you need to download a language model. Models for
| English (#[code en]) and German (#[code de]) are available.
+code(false, "bash").
python -m spacy.en.download all
python -m spacy.de.download all
+aside-code("Examples", "bash").
# Install English tagger, parser and NER
python -m spacy.en.download parser
# Install English GloVe vectors
python -m spacy.en.download glove
# Upgrade/overwrite existing data
python -m spacy.en.download --force
# Check whether the model was successfully installed
python -c "import spacy; spacy.load('en'); print('OK')"
p
| The download command fetches about 1 GB of data which it
| installs within the #[code spacy] package directory.
+h(3, "custom-location") Download model to custom location
p
| You can specify where #[code spacy.en.download] and
| #[code spacy.de.download] download the language model to using the
| #[code --data-path] or #[code -d] argument:
+code(false, "bash").
python -m spacy.en.download all --data-path /some/dir
p
| If you choose to download to a custom location, you will need to tell
| spaCy where to load the model from in order to use it. You can do this
| either by calling #[code spacy.util.set_data_path()] before calling
| #[code spacy.load()], or by passing a #[code path] argument to the
| #[code spacy.en.English] or #[code spacy.de.German] constructors.
+h(3, "models-manual") Download models manually
p
| As of v1.6, the models and word vectors are also available as direct
| downloads from GitHub, attached to the #[+a(gh("spaCy") + "/releases") releases] as #[code .tar.gz] archives.
p
| To install the models manually, first find the default data path. You can
| use #[code spacy.util.get_data_path()] to find the directory where spaCy
| will look for its models, or change the default data path with
| #[code spacy.util.set_data_path()]. Then simply unpack the archive and
| place the contained folder in that directory. You can now load the models
| via #[code spacy.load()].
+h(2, "source") Compile from source +h(2, "source") Compile from source
p p

View File

@ -6,6 +6,15 @@ p
| The following examples and code snippets give you an overview of spaCy's | The following examples and code snippets give you an overview of spaCy's
| functionality and its usage. | functionality and its usage.
+h(2, "models") Install and load models
+code(false, "bash").
python -m spacy.download en
+code.
import spacy
nlp = spacy.load('en')
+h(2, "examples-resources") Load resources and process text +h(2, "examples-resources") Load resources and process text
+code. +code.

View File

@ -0,0 +1,285 @@
//- 💫 DOCS > USAGE > MODELS
include ../../_includes/_mixins
p
| As of v1.7.0, models for spaCy can be installed as #[strong Python packages].
| This means that they're a component of your application, just like any
| other module. They're versioned and can be defined as a dependency in your
| #[code requirements.txt]. Models can be installed from a download URL or
| a local directory, manually or via #[+a("https://pypi.python.org/pypi/pip") pip].
| Their data can be located anywhere on your file system. To make a model
| available to spaCy, all you need to do is create a "shortcut link", an
| internal alias that tells spaCy where to find the data files for a specific
| model name.
+infobox("Important note")
| Due to improvements in the English lemmatizer in v1.7.0, you need to download the
| new English model. The German model is still compatible and will be
| recognised and linked automatically.
+aside-code("Quickstart").
# Install spaCy and download English model
pip install spacy
python -m spacy.download en
# Usage in Python
import spacy
nlp = spacy.load('en')
doc = nlp(u'This is a sentence.')
+h(2, "available") Available models
+table(["Name", "Size", "Description"])
+row
+cell #[code en_core_web_sm]
+cell 50 MB
+cell Vocab, syntax, entities, word vectors #[+tag default]
+row
+cell #[code en_core_web_md]
+cell 1 GB
+cell Vocab, syntax, entities, word vectors
+row
+cell #[code en_depent_web_md]
+cell 328 MB
+cell Vocab, syntax, entities
+row
+cell #[code en_vectors_glove_md]
+cell 727 MB
+cell
| #[+a("http://nlp.stanford.edu/projects/glove/") GloVe] Common
| Crawl vectors
+row
+cell #[code de_core_news_md]
+cell 645 MB
+cell Vocab, syntax, entities, word vectors #[+tag default]
p
| Models are now available as #[code .tar.gz] archives #[+a(gh("spacy-models")) from GitHub],
| attached to individual releases. They can be downloaded and loaded manually,
| or using spaCy's #[code download] and #[code link] commands. All models
| follow the naming convention of #[code [language]_[type]_[genre]_[size]].
+button(gh("spacy-models") + "/releases", true, "primary") View models
+h(2, "download") Downloading models
+aside("Downloading models in spaCy < v1.7")
| In older versions of spaCy, you can still use the old download commands.
| This will download and install the models into the #[code spacy/data]
| directory.
+code.o-no-block.
python -m spacy.en.download all
python -m spacy.de.download all
python -m spacy.en.download glove
| The old models are also #[+a(gh("spacy") + "/tree/v1.6.0") attached to the v1.6.0 release].
| To download and install them manually, unpack the archive, drop the
| contained directory into #[code spacy/data] and load the model via
| #[code spacy.load('en')] or #[code spacy.load('de')].
p
| The easiest way to download a model is via spaCy's #[code download]
| command. It takes care of finding the best-matching model compatible with
| your spaCy installation.
+code(false, "bash").
# out-of-the-box: download best-matching default model
python -m spacy.download en
python -m spacy.download de
# download best-matching version of specific model for your spaCy installation
python -m spacy.download en_core_web_md
# download exact model version (doesn't create shortcut link)
python -m spacy.download en_core_web_md-1.2.0 --direct
p
| The download command will #[+a("#download-pip") install the model] via
| pip, place the package in your #[code site-packages] directory and create
| a #[+a("#usage") shortcut link] that lets you load the model by name. The
| shortcut link will be the same as the model name used in
| #[code spacy.download].
+code(false, "bash").
pip install spacy
python -m spacy.download en
+code.
import spacy
nlp = spacy.load('en')
doc = nlp(u'This is a sentence.')
+h(3, "download-pip") Installation via pip
p
| To download a model directly using #[+a("https://pypi.python.org/pypi/pip") pip],
| simply point #[code pip install] to the URL or local path of the archive
| file. To find the direct link to a model, head over to the
| #[+a(gh("spacy-models") + "/releases") model releases], right click on the archive
| link and copy it to your clipboard.
+code(false, "bash").
# with external URL
pip install #{gh("spacy-models")}/releases/download/en_core_web_md-1.2.0/en_core_web_md-1.2.0.tar.gz
# with local file
pip install /Users/you/en_core_web_md-1.2.0.tar.gz
p
| By default, this will install the model into your #[code site-packages]
| directory. You can then create a #[+a("#usage") shortcut link] for your
| model to load it via #[code spacy.load()], or #[+a("usage-import") import it]
| as a Python module.
+h(3, "download-manual") Manual download and installation
p
| In some cases, you might prefer downloading the data manually, for
| example to place it into a custom directory. You can download the model
| via your browser from the #[+a(gh("spacy-models")) latest releases], or configure
| your own download script using the URL of the archive file. The archive
| consists of a model directory that contains another directory with the
| model data.
+code("Directory structure", "yaml").
└── en_core_web_md-1.2.0.tar.gz # downloaded archive
├── meta.json # model meta data
├── setup.py # setup file for pip installation
└── en_core_web_md # model directory
├── __init__.py # init for pip installation
├── meta.json # model meta data
└── en_core_web_md-1.2.0 # model data
p
| You can place the model data directory anywhere on your local file system.
| To use it with spaCy, simply assign it a name by creating a
| #[+a("#usage") shortcut link] for the data directory.
+h(2, "usage") Using models with spaCy
p
| While previous versions of spaCy required you to maintain a data directory
| containing the models for each installation, you can now choose how and
| where you want to keep your data files. To load the models conveniently
| from within spaCy, you can use the #[code spacy.link] command to create a
| symlink. This lets you set up custom shortcut links for models so you can
| load them by name.
+code(false, "bash").
python -m spacy.link [package name or path] [shortcut] [--force]
p
| The first argument is the package name (if the model was installed via
| pip), or a local path to the the data directory. The second argument is
| the internal name you want to use for the model. Setting the #[code --force]
| flag will overwrite any existing links.
+code("Examples", "bash").
# set up shortcut link to load installed package as "en_default"
python -m spacy.link en_core_web_md en_default
# set up shortcut link to load local model as "my_amazing_model"
python -m spacy.link /Users/you/model my_amazing_model
+h(3, "usage-loading") Loading models
p
| To load a model, use #[code spacy.load()] with the model's shortcut link.
+code.
import spacy
nlp = spacy.load('en_default')
doc = nlp(u'This is a sentence.')
p
| You can also use the #[code info()] method to print a model's meta data
| before loading it. Each #[code Language] object returned by #[code spacy.load()]
| also exposes the model's meta data as the attribute #[code meta]:
+code.
import spacy
spacy.info('en_default')
# JSON-formatted model meta data
nlp = spacy.load('en_default')
print(nlp.meta['version'])
# 1.2.0
+h(3, "usage-import") Importing models as modules
p
| If you've installed a model via pip, you can also #[code import] it
| directly and then call its #[code load()] method with no arguments:
+code.
import spacy
import en_core_web_md
nlp = en_core_web_md.load()
doc = nlp(u'This is a sentence.')
+h(2, "own-models") Using your own models
p
| If you've trained your own model, for example for
| #[+a("/docs/usage/adding-languages") additional languages], you can
| create a shortuct link for it by pointing #[code spacy.link] to the
| model's data directory. To allow your model to be downloaded and
| installed via pip, you'll also need to generate a package for it.
+infobox("Important note")
| The model packages are #[strong not suitable] for the public
| #[+a("https://pypi.python.org") pypi.python.org] directory, which is not
| designed for binary data and files over 50 MB. However, if your company
| is running an internal installation of pypi, publishing your models on
| there can be a convenient solution to share them with your team.
p The model directory should look like this:
+code("Directory structure", "yaml").
└── /
├── MANIFEST.in # to include meta.json
├── meta.json # model meta data
├── setup.py # setup file for pip installation
└── en_core_web_md # model directory
├── __init__.py # init for pip installation
└── en_core_web_md-1.2.0 # model data
p
| You can find templates for all files in our
| #[+a(gh("spacy-dev-resouces", "templates/model")) spaCy dev resources].
| Unless you want to customise installation and loading, the only file
| you'll need to modify is #[code meta.json], which includes the model's
| meta data. It will later be copied into the package and data directory.
+code("meta.json", "json").
{
"name": "core_web_md",
"lang": "en",
"version": "1.2.0",
"spacy_version": "1.7.0",
"description": "English model for spaCy",
"author": "Explosion AI",
"email": "contact@explosion.ai",
"license": "MIT"
}
p
| Keep in mind that the directories need to be named according to the
| naming conventions. The #[code lang] setting is also used to create the
| respective #[code Language] class in spaCy, which will later be returned
| by the model's #[code load()] method.
p
| To generate the package, run the following command from within the
| directory. This will create a #[code .tar.gz] archive in a directory
| #[code /dist].
+code(false, "bash").
python setup.py sdist

View File

@ -7,6 +7,13 @@ p Many of the associated tools and resources that we're developing alongside spa
+h(2, "developer") Developer tools +h(2, "developer") Developer tools
+table(["Name", "Description"]) +table(["Name", "Description"])
+row
+cell
+src(gh("spacy-models")) spaCy Models
+cell
| Model releases for spaCy.
+row +row
+cell +cell
+src(gh("spacy-dev-resources")) spaCy Dev Resources +src(gh("spacy-dev-resources")) spaCy Dev Resources
@ -55,7 +62,7 @@ p Many of the associated tools and resources that we're developing alongside spa
+src(gh("thinc")) Thinc +src(gh("thinc")) Thinc
+cell +cell
| Super sparse multi-class machine learning with Cython. | spaCy's Machine Learning library for NLP in Python.
+row +row
+cell +cell

View File

@ -66,7 +66,7 @@ include _includes/_mixins
+grid +grid
+grid-col("two-thirds") +grid-col("two-thirds")
+terminal("lightning_tour.py"). +terminal("lightning_tour.py").
# Install: pip install spacy && python -m spacy.en.download # Install: pip install spacy && python -m spacy.download en
import spacy import spacy
# Load English tokenizer, tagger, parser, NER and word vectors # Load English tokenizer, tagger, parser, NER and word vectors