mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Merge changes to __init__.py
This commit is contained in:
		
						commit
						6420f86f02
					
				| 
						 | 
				
			
			@ -1,4 +1,3 @@
 | 
			
		|||
recursive-include include *.h
 | 
			
		||||
include buildbot.json
 | 
			
		||||
include LICENSE
 | 
			
		||||
include README.rst
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,25 +0,0 @@
 | 
			
		|||
{
 | 
			
		||||
    "build": {
 | 
			
		||||
        "sdist": [
 | 
			
		||||
            "pip install -r requirements.txt",
 | 
			
		||||
            "pip install \"numpy<1.8\"",
 | 
			
		||||
            "python setup.py sdist"
 | 
			
		||||
        ],
 | 
			
		||||
        "install": [
 | 
			
		||||
            "pip install -v source.tar.gz"
 | 
			
		||||
        ],
 | 
			
		||||
        "wheel": [
 | 
			
		||||
            "python untar.py source.tar.gz .",
 | 
			
		||||
            "python setup.py bdist_wheel",
 | 
			
		||||
            "python cpdist.py dist"
 | 
			
		||||
        ]
 | 
			
		||||
    },
 | 
			
		||||
    "test": {
 | 
			
		||||
        "after": ["install", "wheel"],
 | 
			
		||||
        "run": [
 | 
			
		||||
            "python -m spacy.en.download --force"
 | 
			
		||||
        ],
 | 
			
		||||
        "package": "spacy",
 | 
			
		||||
        "args": "--tb=native -x --models --vectors --slow"
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -4,6 +4,7 @@ from __future__ import unicode_literals, print_function
 | 
			
		|||
import json
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from .util import set_lang_class, get_lang_class, parse_package_meta
 | 
			
		||||
from .deprecated import resolve_model_name
 | 
			
		||||
 | 
			
		||||
from . import en
 | 
			
		||||
from . import de
 | 
			
		||||
| 
						 | 
				
			
			@ -35,13 +36,15 @@ set_lang_class(bn.Bengali.lang, bn.Bengali)
 | 
			
		|||
 | 
			
		||||
def load(name, **overrides):
 | 
			
		||||
    data_path = overrides.get('path', util.get_data_path())
 | 
			
		||||
    meta = parse_package_meta(data_path, name, require=False)
 | 
			
		||||
    model_name = resolve_model_name(name)
 | 
			
		||||
    meta = parse_package_meta(data_path, model_name, require=False)
 | 
			
		||||
    lang = meta['lang'] if meta and 'lang' in meta else name
 | 
			
		||||
    cls = get_lang_class(lang)
 | 
			
		||||
    overrides['meta'] = meta
 | 
			
		||||
    model_path = Path(data_path) / name
 | 
			
		||||
    model_path = Path(data_path / model_name)
 | 
			
		||||
    if model_path.exists():
 | 
			
		||||
        overrides['path'] = model_path
 | 
			
		||||
 | 
			
		||||
    return cls(**overrides)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -12,5 +12,5 @@ __license__ = 'MIT'
 | 
			
		|||
 | 
			
		||||
__docs__ = 'https://spacy.io/docs/usage'
 | 
			
		||||
__download_url__ = 'https://github.com/explosion/spacy-models/releases/download'
 | 
			
		||||
__compatibility__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json?token=ANAt54fi5zcUtnwGhMLw2klWwcAyHkZGks5Y0nw1wA%3D%3D'
 | 
			
		||||
__compatibility__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json'
 | 
			
		||||
__shortcuts__ = {'en': 'en_core_web_sm', 'de': 'de_core_web_md', 'vectors': 'en_vectors_glove_md'}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2,6 +2,7 @@ from pathlib import Path
 | 
			
		|||
from . import about
 | 
			
		||||
from . import util
 | 
			
		||||
from .download import download
 | 
			
		||||
from .link import link
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
| 
						 | 
				
			
			@ -86,6 +87,35 @@ def fix_glove_vectors_loading(overrides):
 | 
			
		|||
    return overrides
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def resolve_model_name(name):
 | 
			
		||||
    """If spaCy is loaded with 'de', check if symlink already exists. If
 | 
			
		||||
    not, user have upgraded from older version and have old models installed.
 | 
			
		||||
    Check if old model directory exists and if so, return that instead and create
 | 
			
		||||
    shortcut link. If English model is found and no shortcut exists, raise error
 | 
			
		||||
    and tell user to install new model.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    if name == 'en' or name == 'de':
 | 
			
		||||
        versions = ['1.0.0', '1.1.0']
 | 
			
		||||
        data_path = Path(util.get_data_path())
 | 
			
		||||
        model_path = data_path / name
 | 
			
		||||
        v_model_paths = [data_path / Path(name + '-' + v) for v in versions]
 | 
			
		||||
 | 
			
		||||
        if not model_path.exists(): # no shortcut found
 | 
			
		||||
            for v_path in v_model_paths:
 | 
			
		||||
                if v_path.exists(): # versioned model directory found
 | 
			
		||||
                    if name == 'de':
 | 
			
		||||
                        link(v_path, name)
 | 
			
		||||
                        return name
 | 
			
		||||
                    else:
 | 
			
		||||
                        raise ValueError(
 | 
			
		||||
                            "Found English model at {p}. This model is not "
 | 
			
		||||
                            "compatible with the current version. See "
 | 
			
		||||
                            "https://spacy.io/docs/usage/models to download the "
 | 
			
		||||
                            "new model.".format(p=v_path))
 | 
			
		||||
    return name
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ModelDownload():
 | 
			
		||||
    """Replace download modules within en and de with deprecation warning and
 | 
			
		||||
    download default language model (using shortcut). Use classmethods to allow
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -28,6 +28,7 @@ def download(model=None, direct=False):
 | 
			
		|||
        download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version))
 | 
			
		||||
        link(model_name, model, force=True)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_compatibility():
 | 
			
		||||
    version = about.__version__
 | 
			
		||||
    r = requests.get(about.__compatibility__)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -5,6 +5,7 @@ from ..download import download, get_compatibility, get_version, check_error_dep
 | 
			
		|||
import pytest
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.slow
 | 
			
		||||
def test_download_fetch_compatibility():
 | 
			
		||||
    compatibility = get_compatibility()
 | 
			
		||||
    assert type(compatibility) == dict
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -12,10 +12,10 @@
 | 
			
		|||
        "COMPANY_URL": "https://explosion.ai",
 | 
			
		||||
        "DEMOS_URL": "https://demos.explosion.ai",
 | 
			
		||||
 | 
			
		||||
        "SPACY_VERSION": "1.6",
 | 
			
		||||
        "SPACY_VERSION": "1.7",
 | 
			
		||||
        "LATEST_NEWS": {
 | 
			
		||||
            "url": "https://explosion.ai/blog/deep-learning-formula-nlp",
 | 
			
		||||
            "title": "The new deep learning formula for state-of-the-art NLP models"
 | 
			
		||||
            "url": "/docs/usage/models",
 | 
			
		||||
            "title": "Downloading and installing models as packages"
 | 
			
		||||
        },
 | 
			
		||||
 | 
			
		||||
        "SOCIAL": {
 | 
			
		||||
| 
						 | 
				
			
			@ -54,8 +54,8 @@
 | 
			
		|||
            }
 | 
			
		||||
        },
 | 
			
		||||
 | 
			
		||||
        "V_CSS": "1.15",
 | 
			
		||||
        "V_JS": "1.1",
 | 
			
		||||
        "V_CSS": "1.2",
 | 
			
		||||
        "V_JS": "1.2",
 | 
			
		||||
        "DEFAULT_SYNTAX": "python",
 | 
			
		||||
        "ANALYTICS": "UA-58931649-1",
 | 
			
		||||
        "MAILCHIMP": {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -67,6 +67,17 @@ mixin aside-code(label, language)
 | 
			
		|||
            block
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
//- Infobox
 | 
			
		||||
    label - [string] infobox title (optional or false for no title)
 | 
			
		||||
 | 
			
		||||
mixin infobox(label)
 | 
			
		||||
    aside.o-box.o-block.u-text-small
 | 
			
		||||
        if label
 | 
			
		||||
            h3.u-text-label.u-color-theme=label
 | 
			
		||||
 | 
			
		||||
        block
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
//- Link button
 | 
			
		||||
    url      - [string] link href
 | 
			
		||||
    trusted  - [boolean] if not set / false, rel="noopener nofollow" is added
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -22,7 +22,10 @@ p
 | 
			
		|||
 | 
			
		||||
//- Links
 | 
			
		||||
 | 
			
		||||
main p a, main table a, main > *:not(footer) li a, .c-aside a
 | 
			
		||||
main p a,
 | 
			
		||||
main table a,
 | 
			
		||||
main > *:not(footer) li a,
 | 
			
		||||
main aside a
 | 
			
		||||
    @extend .u-link
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -62,6 +62,15 @@
 | 
			
		|||
    border: 1px solid $color-subtle
 | 
			
		||||
    padding: 3rem 2.5%
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
//- Box
 | 
			
		||||
 | 
			
		||||
.o-box
 | 
			
		||||
    background: $color-theme-light
 | 
			
		||||
    padding: 2rem
 | 
			
		||||
    border: 1px solid darken($color-theme-light, 5)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
//- Icons
 | 
			
		||||
 | 
			
		||||
.o-icon
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -4,6 +4,7 @@
 | 
			
		|||
    display: inline-block
 | 
			
		||||
    font-weight: bold
 | 
			
		||||
    padding: 0.75em 1em
 | 
			
		||||
    margin-bottom: 1px
 | 
			
		||||
    border: 2px solid
 | 
			
		||||
    border-radius: 2px
 | 
			
		||||
    text-align: center
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -34,6 +34,7 @@ $color-dark: lighten($color-front, 20) !default
 | 
			
		|||
 | 
			
		||||
$color-theme: map-get($colors, $theme)
 | 
			
		||||
$color-theme-dark: darken(map-get($colors, $theme), 5)
 | 
			
		||||
$color-theme-light: saturate(lighten(map-get($colors, $theme), 35), 15)
 | 
			
		||||
 | 
			
		||||
$color-subtle: #ddd !default
 | 
			
		||||
$color-subtle-light: #f6f6f6 !default
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2,6 +2,7 @@
 | 
			
		|||
    "sidebar": {
 | 
			
		||||
        "Get started": {
 | 
			
		||||
            "Installation": "./",
 | 
			
		||||
            "Models": "models",
 | 
			
		||||
            "Lightning tour": "lightning-tour",
 | 
			
		||||
            "Resources": "resources"
 | 
			
		||||
        },
 | 
			
		||||
| 
						 | 
				
			
			@ -28,6 +29,11 @@
 | 
			
		|||
 | 
			
		||||
    "index": {
 | 
			
		||||
        "title": "Install spaCy",
 | 
			
		||||
        "next": "models"
 | 
			
		||||
    },
 | 
			
		||||
 | 
			
		||||
    "models": {
 | 
			
		||||
        "title": "Models",
 | 
			
		||||
        "next": "lightning-tour"
 | 
			
		||||
    },
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -12,6 +12,16 @@ p
 | 
			
		|||
    |  #[a(href="#source-ubuntu") Ubuntu], #[a(href="#source-osx") macOS/OS X]
 | 
			
		||||
    |  and #[a(href="#source-windows") Windows] for details.
 | 
			
		||||
 | 
			
		||||
+aside("Download models")
 | 
			
		||||
    |  After installation you need to download a language model. For more info
 | 
			
		||||
    |  and available models, see the #[+a("/docs/usage/models") docs on models].
 | 
			
		||||
 | 
			
		||||
    +code.o-no-block.
 | 
			
		||||
        python -m spacy.download en
 | 
			
		||||
 | 
			
		||||
        >>> import spacy
 | 
			
		||||
        >>> nlp = spacy.load('en')
 | 
			
		||||
 | 
			
		||||
+h(2, "pip") pip
 | 
			
		||||
 | 
			
		||||
p Using pip, spaCy releases are currently only available as source packages.
 | 
			
		||||
| 
						 | 
				
			
			@ -43,64 +53,6 @@ p
 | 
			
		|||
    |  #[+a("https://github.com/conda-forge/spacy-feedstock") this repository].
 | 
			
		||||
    |  Improvements and pull requests to the recipe and setup are always appreciated.
 | 
			
		||||
 | 
			
		||||
+h(2, "models") Download models
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |  After installation you need to download a language model. Models for
 | 
			
		||||
    |  English (#[code en]) and German (#[code de]) are available.
 | 
			
		||||
 | 
			
		||||
+code(false, "bash").
 | 
			
		||||
    python -m spacy.en.download all
 | 
			
		||||
    python -m spacy.de.download all
 | 
			
		||||
 | 
			
		||||
+aside-code("Examples", "bash").
 | 
			
		||||
    # Install English tagger, parser and NER
 | 
			
		||||
    python -m spacy.en.download parser
 | 
			
		||||
 | 
			
		||||
    # Install English GloVe vectors
 | 
			
		||||
    python -m spacy.en.download glove
 | 
			
		||||
 | 
			
		||||
    # Upgrade/overwrite existing data
 | 
			
		||||
    python -m spacy.en.download --force
 | 
			
		||||
 | 
			
		||||
    # Check whether the model was successfully installed
 | 
			
		||||
    python -c "import spacy; spacy.load('en'); print('OK')"
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |  The download command fetches about 1 GB of data which it
 | 
			
		||||
    |  installs within the #[code spacy] package directory.
 | 
			
		||||
 | 
			
		||||
+h(3, "custom-location") Download model to custom location
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |   You can specify where #[code spacy.en.download] and
 | 
			
		||||
    |  #[code spacy.de.download] download the language model to using the
 | 
			
		||||
    |  #[code --data-path] or #[code -d] argument:
 | 
			
		||||
 | 
			
		||||
+code(false, "bash").
 | 
			
		||||
    python -m spacy.en.download all --data-path /some/dir
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |  If you choose to download to a custom location, you will need to tell
 | 
			
		||||
    |  spaCy where to load the model from in order to use it. You can do this
 | 
			
		||||
    |  either by calling #[code spacy.util.set_data_path()] before calling
 | 
			
		||||
    |  #[code spacy.load()], or by passing a #[code path] argument to the
 | 
			
		||||
    |  #[code spacy.en.English] or #[code spacy.de.German] constructors.
 | 
			
		||||
 | 
			
		||||
+h(3, "models-manual") Download models manually
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |  As of v1.6, the models and word vectors are also available as direct
 | 
			
		||||
    |  downloads from GitHub, attached to the #[+a(gh("spaCy") + "/releases") releases] as #[code .tar.gz] archives.
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |  To install the models manually, first find the default data path. You can
 | 
			
		||||
    |  use #[code spacy.util.get_data_path()] to find the directory where spaCy
 | 
			
		||||
    |  will look for its models, or change the default data path with
 | 
			
		||||
    |  #[code spacy.util.set_data_path()]. Then simply unpack the archive and
 | 
			
		||||
    |  place the contained folder in that directory. You can now load the models
 | 
			
		||||
    |  via #[code spacy.load()].
 | 
			
		||||
 | 
			
		||||
+h(2, "source") Compile from source
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -6,6 +6,15 @@ p
 | 
			
		|||
    |  The following examples and code snippets give you an overview of spaCy's
 | 
			
		||||
    |  functionality and its usage.
 | 
			
		||||
 | 
			
		||||
+h(2, "models") Install and load models
 | 
			
		||||
 | 
			
		||||
+code(false, "bash").
 | 
			
		||||
    python -m spacy.download en
 | 
			
		||||
 | 
			
		||||
+code.
 | 
			
		||||
    import spacy
 | 
			
		||||
    nlp = spacy.load('en')
 | 
			
		||||
 | 
			
		||||
+h(2, "examples-resources") Load resources and process text
 | 
			
		||||
 | 
			
		||||
+code.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										285
									
								
								website/docs/usage/models.jade
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										285
									
								
								website/docs/usage/models.jade
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,285 @@
 | 
			
		|||
//- 💫 DOCS > USAGE > MODELS
 | 
			
		||||
 | 
			
		||||
include ../../_includes/_mixins
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |  As of v1.7.0, models for spaCy can be installed as #[strong Python packages].
 | 
			
		||||
    |  This means that they're a component of your application, just like any
 | 
			
		||||
    |  other module. They're versioned and can be defined as a dependency in your
 | 
			
		||||
    |  #[code requirements.txt]. Models can be installed from a download URL or
 | 
			
		||||
    |  a local directory, manually or via #[+a("https://pypi.python.org/pypi/pip") pip].
 | 
			
		||||
    |  Their data can be located anywhere on your file system. To make a model
 | 
			
		||||
    |  available to spaCy, all you need to do is create a "shortcut link", an
 | 
			
		||||
    |  internal alias that tells spaCy where to find the data files for a specific
 | 
			
		||||
    |  model name.
 | 
			
		||||
 | 
			
		||||
+infobox("Important note")
 | 
			
		||||
    |  Due to improvements in the English lemmatizer in v1.7.0, you need to download the
 | 
			
		||||
    |  new English model. The German model is still compatible and will be
 | 
			
		||||
    |  recognised and linked automatically.
 | 
			
		||||
 | 
			
		||||
+aside-code("Quickstart").
 | 
			
		||||
    # Install spaCy and download English model
 | 
			
		||||
    pip install spacy
 | 
			
		||||
    python -m spacy.download en
 | 
			
		||||
 | 
			
		||||
    # Usage in Python
 | 
			
		||||
    import spacy
 | 
			
		||||
    nlp = spacy.load('en')
 | 
			
		||||
    doc = nlp(u'This is a sentence.')
 | 
			
		||||
 | 
			
		||||
+h(2, "available") Available models
 | 
			
		||||
 | 
			
		||||
+table(["Name", "Size", "Description"])
 | 
			
		||||
    +row
 | 
			
		||||
        +cell #[code en_core_web_sm]
 | 
			
		||||
        +cell 50 MB
 | 
			
		||||
        +cell Vocab, syntax, entities, word vectors #[+tag default]
 | 
			
		||||
 | 
			
		||||
    +row
 | 
			
		||||
        +cell #[code en_core_web_md]
 | 
			
		||||
        +cell 1 GB
 | 
			
		||||
        +cell Vocab, syntax, entities, word vectors
 | 
			
		||||
 | 
			
		||||
    +row
 | 
			
		||||
        +cell #[code en_depent_web_md]
 | 
			
		||||
        +cell 328 MB
 | 
			
		||||
        +cell Vocab, syntax, entities
 | 
			
		||||
 | 
			
		||||
    +row
 | 
			
		||||
        +cell #[code en_vectors_glove_md]
 | 
			
		||||
        +cell 727 MB
 | 
			
		||||
        +cell
 | 
			
		||||
            |  #[+a("http://nlp.stanford.edu/projects/glove/") GloVe] Common
 | 
			
		||||
            |  Crawl vectors
 | 
			
		||||
 | 
			
		||||
    +row
 | 
			
		||||
        +cell #[code de_core_news_md]
 | 
			
		||||
        +cell 645 MB
 | 
			
		||||
        +cell Vocab, syntax, entities, word vectors #[+tag default]
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |  Models are now available as #[code .tar.gz] archives #[+a(gh("spacy-models")) from GitHub],
 | 
			
		||||
    |  attached to individual releases. They can be downloaded and loaded manually,
 | 
			
		||||
    |  or using spaCy's #[code download] and #[code link] commands. All models
 | 
			
		||||
    |  follow the naming convention of #[code [language]_[type]_[genre]_[size]].
 | 
			
		||||
 | 
			
		||||
+button(gh("spacy-models") + "/releases", true, "primary") View models
 | 
			
		||||
 | 
			
		||||
+h(2, "download") Downloading models
 | 
			
		||||
 | 
			
		||||
+aside("Downloading models in spaCy < v1.7")
 | 
			
		||||
    |  In older versions of spaCy, you can still use the old download commands.
 | 
			
		||||
    |  This will download and install the models into the #[code spacy/data]
 | 
			
		||||
    |  directory.
 | 
			
		||||
 | 
			
		||||
    +code.o-no-block.
 | 
			
		||||
        python -m spacy.en.download all
 | 
			
		||||
        python -m spacy.de.download all
 | 
			
		||||
        python -m spacy.en.download glove
 | 
			
		||||
 | 
			
		||||
    |  The old models are also #[+a(gh("spacy") + "/tree/v1.6.0") attached to the v1.6.0 release].
 | 
			
		||||
    |  To download and install them manually, unpack the archive, drop the
 | 
			
		||||
    |  contained directory into #[code spacy/data] and load the model via
 | 
			
		||||
    |  #[code spacy.load('en')] or #[code spacy.load('de')].
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |  The easiest way to download a model is via spaCy's #[code download]
 | 
			
		||||
    |  command. It takes care of finding the best-matching model compatible with
 | 
			
		||||
    |  your spaCy installation.
 | 
			
		||||
 | 
			
		||||
+code(false, "bash").
 | 
			
		||||
    # out-of-the-box: download best-matching default model
 | 
			
		||||
    python -m spacy.download en
 | 
			
		||||
    python -m spacy.download de
 | 
			
		||||
 | 
			
		||||
    # download best-matching version of specific model for your spaCy installation
 | 
			
		||||
    python -m spacy.download en_core_web_md
 | 
			
		||||
 | 
			
		||||
    # download exact model version (doesn't create shortcut link)
 | 
			
		||||
    python -m spacy.download en_core_web_md-1.2.0 --direct
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |  The download command will #[+a("#download-pip") install the model] via
 | 
			
		||||
    |  pip, place the package in your #[code site-packages] directory and create
 | 
			
		||||
    |  a #[+a("#usage") shortcut link] that lets you load the model by name. The
 | 
			
		||||
    |  shortcut link will be the same as the model name used in
 | 
			
		||||
    |  #[code spacy.download].
 | 
			
		||||
 | 
			
		||||
+code(false, "bash").
 | 
			
		||||
    pip install spacy
 | 
			
		||||
    python -m spacy.download en
 | 
			
		||||
 | 
			
		||||
+code.
 | 
			
		||||
    import spacy
 | 
			
		||||
    nlp = spacy.load('en')
 | 
			
		||||
    doc = nlp(u'This is a sentence.')
 | 
			
		||||
 | 
			
		||||
+h(3, "download-pip") Installation via pip
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    | To download a model directly using #[+a("https://pypi.python.org/pypi/pip") pip],
 | 
			
		||||
    |  simply point #[code pip install] to the URL or local path of the archive
 | 
			
		||||
    |  file. To find the direct link to a model, head over to the
 | 
			
		||||
    |  #[+a(gh("spacy-models") + "/releases") model releases], right click on the archive
 | 
			
		||||
    |  link and copy it to your clipboard.
 | 
			
		||||
 | 
			
		||||
+code(false, "bash").
 | 
			
		||||
    # with external URL
 | 
			
		||||
    pip install #{gh("spacy-models")}/releases/download/en_core_web_md-1.2.0/en_core_web_md-1.2.0.tar.gz
 | 
			
		||||
 | 
			
		||||
    # with local file
 | 
			
		||||
    pip install /Users/you/en_core_web_md-1.2.0.tar.gz
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |  By default, this will install the model into your #[code site-packages]
 | 
			
		||||
    |  directory. You can then create a #[+a("#usage") shortcut link] for your
 | 
			
		||||
    |  model to load it via #[code spacy.load()], or #[+a("usage-import") import it]
 | 
			
		||||
    |  as a Python module.
 | 
			
		||||
 | 
			
		||||
+h(3, "download-manual") Manual download and installation
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |  In some cases, you might prefer downloading the data manually, for
 | 
			
		||||
    |  example to place it into a custom directory. You can download the model
 | 
			
		||||
    |  via your browser from the #[+a(gh("spacy-models")) latest releases], or configure
 | 
			
		||||
    |  your own download script using the URL of the archive file. The archive
 | 
			
		||||
    |  consists of a model directory that contains another directory with the
 | 
			
		||||
    |  model data.
 | 
			
		||||
 | 
			
		||||
+code("Directory structure", "yaml").
 | 
			
		||||
    └── en_core_web_md-1.2.0.tar.gz       # downloaded archive
 | 
			
		||||
        ├── meta.json                     # model meta data
 | 
			
		||||
        ├── setup.py                      # setup file for pip installation
 | 
			
		||||
        └── en_core_web_md                # model directory
 | 
			
		||||
            ├── __init__.py               # init for pip installation
 | 
			
		||||
            ├── meta.json                 # model meta data
 | 
			
		||||
            └── en_core_web_md-1.2.0      # model data
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |  You can place the model data directory anywhere on your local file system.
 | 
			
		||||
    |  To use it with spaCy, simply assign it a name by creating a
 | 
			
		||||
    |  #[+a("#usage") shortcut link] for the data directory.
 | 
			
		||||
 | 
			
		||||
+h(2, "usage") Using models with spaCy
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |  While previous versions of spaCy required you to maintain a data directory
 | 
			
		||||
    |  containing the models for each installation, you can now choose how and
 | 
			
		||||
    |  where you want to keep your data files. To load the models conveniently
 | 
			
		||||
    |  from within spaCy, you can use the #[code spacy.link] command to create a
 | 
			
		||||
    |  symlink. This lets you set up custom shortcut links for models so you can
 | 
			
		||||
    |  load them by name.
 | 
			
		||||
 | 
			
		||||
+code(false, "bash").
 | 
			
		||||
    python -m spacy.link [package name or path] [shortcut] [--force]
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |  The first argument is the package name (if the model was installed via
 | 
			
		||||
    |  pip), or a local path to the the data directory. The second argument is
 | 
			
		||||
    |  the internal name you want to use for the model. Setting the #[code --force]
 | 
			
		||||
    |  flag will overwrite any existing links.
 | 
			
		||||
 | 
			
		||||
+code("Examples", "bash").
 | 
			
		||||
    # set up shortcut link to load installed package as "en_default"
 | 
			
		||||
    python -m spacy.link en_core_web_md en_default
 | 
			
		||||
 | 
			
		||||
    # set up shortcut link to load local model as "my_amazing_model"
 | 
			
		||||
    python -m spacy.link /Users/you/model my_amazing_model
 | 
			
		||||
 | 
			
		||||
+h(3, "usage-loading") Loading models
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |  To load a model, use #[code spacy.load()] with the model's shortcut link.
 | 
			
		||||
 | 
			
		||||
+code.
 | 
			
		||||
    import spacy
 | 
			
		||||
    nlp = spacy.load('en_default')
 | 
			
		||||
    doc = nlp(u'This is a sentence.')
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |  You can also use the #[code info()] method to print a model's meta data
 | 
			
		||||
    |  before loading it. Each #[code Language] object returned by #[code spacy.load()]
 | 
			
		||||
    |  also exposes the model's meta data as the attribute #[code meta]:
 | 
			
		||||
 | 
			
		||||
+code.
 | 
			
		||||
    import spacy
 | 
			
		||||
    spacy.info('en_default')
 | 
			
		||||
    # JSON-formatted model meta data
 | 
			
		||||
 | 
			
		||||
    nlp = spacy.load('en_default')
 | 
			
		||||
    print(nlp.meta['version'])
 | 
			
		||||
    # 1.2.0
 | 
			
		||||
 | 
			
		||||
+h(3, "usage-import") Importing models as modules
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |  If you've installed a model via pip, you can also #[code import] it
 | 
			
		||||
    |  directly and then call its #[code load()] method with no arguments:
 | 
			
		||||
 | 
			
		||||
+code.
 | 
			
		||||
    import spacy
 | 
			
		||||
    import en_core_web_md
 | 
			
		||||
 | 
			
		||||
    nlp = en_core_web_md.load()
 | 
			
		||||
    doc = nlp(u'This is a sentence.')
 | 
			
		||||
 | 
			
		||||
+h(2, "own-models") Using your own models
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |  If you've trained your own model, for example for
 | 
			
		||||
    |  #[+a("/docs/usage/adding-languages") additional languages], you can
 | 
			
		||||
    |  create a shortuct link for it by pointing #[code spacy.link] to the
 | 
			
		||||
    |  model's data directory. To allow your model to be downloaded and
 | 
			
		||||
    |  installed via pip, you'll also need to generate a package for it.
 | 
			
		||||
 | 
			
		||||
+infobox("Important note")
 | 
			
		||||
    |  The model packages are #[strong not suitable] for the public
 | 
			
		||||
    |  #[+a("https://pypi.python.org") pypi.python.org] directory, which is not
 | 
			
		||||
    |  designed for binary data and files over 50 MB. However, if your company
 | 
			
		||||
    |  is running an internal installation of pypi, publishing your models on
 | 
			
		||||
    |  there can be a convenient solution to share them with your team.
 | 
			
		||||
 | 
			
		||||
p The model directory should look like this:
 | 
			
		||||
 | 
			
		||||
+code("Directory structure", "yaml").
 | 
			
		||||
    └── /
 | 
			
		||||
        ├── MANIFEST.in                   # to include meta.json
 | 
			
		||||
        ├── meta.json                     # model meta data
 | 
			
		||||
        ├── setup.py                      # setup file for pip installation
 | 
			
		||||
        └── en_core_web_md                # model directory
 | 
			
		||||
            ├── __init__.py               # init for pip installation
 | 
			
		||||
            └── en_core_web_md-1.2.0      # model data
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |  You can find templates for all files in our
 | 
			
		||||
    |  #[+a(gh("spacy-dev-resouces", "templates/model")) spaCy dev resources].
 | 
			
		||||
    |  Unless you want to customise installation and loading, the only file
 | 
			
		||||
    |  you'll need to modify is #[code meta.json], which includes the model's
 | 
			
		||||
    |  meta data. It will later be copied into the package and data directory.
 | 
			
		||||
 | 
			
		||||
+code("meta.json", "json").
 | 
			
		||||
    {
 | 
			
		||||
        "name": "core_web_md",
 | 
			
		||||
        "lang": "en",
 | 
			
		||||
        "version": "1.2.0",
 | 
			
		||||
        "spacy_version": "1.7.0",
 | 
			
		||||
        "description": "English model for spaCy",
 | 
			
		||||
        "author": "Explosion AI",
 | 
			
		||||
        "email": "contact@explosion.ai",
 | 
			
		||||
        "license": "MIT"
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |  Keep in mind that the directories need to be named according to the
 | 
			
		||||
    |  naming conventions. The #[code lang] setting is also used to create the
 | 
			
		||||
    |  respective #[code Language] class in spaCy, which will later be returned
 | 
			
		||||
    |  by the model's #[code load()] method.
 | 
			
		||||
 | 
			
		||||
p
 | 
			
		||||
    |  To generate the package, run the following command from within the
 | 
			
		||||
    |  directory. This will create a #[code .tar.gz] archive in a directory
 | 
			
		||||
    |  #[code /dist].
 | 
			
		||||
 | 
			
		||||
+code(false, "bash").
 | 
			
		||||
    python setup.py sdist
 | 
			
		||||
| 
						 | 
				
			
			@ -7,6 +7,13 @@ p Many of the associated tools and resources that we're developing alongside spa
 | 
			
		|||
+h(2, "developer") Developer tools
 | 
			
		||||
 | 
			
		||||
+table(["Name", "Description"])
 | 
			
		||||
    +row
 | 
			
		||||
        +cell
 | 
			
		||||
            +src(gh("spacy-models")) spaCy Models
 | 
			
		||||
 | 
			
		||||
        +cell
 | 
			
		||||
            |  Model releases for spaCy.
 | 
			
		||||
 | 
			
		||||
    +row
 | 
			
		||||
        +cell
 | 
			
		||||
            +src(gh("spacy-dev-resources")) spaCy Dev Resources
 | 
			
		||||
| 
						 | 
				
			
			@ -55,7 +62,7 @@ p Many of the associated tools and resources that we're developing alongside spa
 | 
			
		|||
            +src(gh("thinc")) Thinc
 | 
			
		||||
 | 
			
		||||
        +cell
 | 
			
		||||
            |  Super sparse multi-class machine learning with Cython.
 | 
			
		||||
            |  spaCy's Machine Learning library for NLP in Python.
 | 
			
		||||
 | 
			
		||||
    +row
 | 
			
		||||
        +cell
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -66,7 +66,7 @@ include _includes/_mixins
 | 
			
		|||
    +grid
 | 
			
		||||
        +grid-col("two-thirds")
 | 
			
		||||
            +terminal("lightning_tour.py").
 | 
			
		||||
                # Install: pip install spacy && python -m spacy.en.download
 | 
			
		||||
                # Install: pip install spacy && python -m spacy.download en
 | 
			
		||||
                import spacy
 | 
			
		||||
 | 
			
		||||
                # Load English tokenizer, tagger, parser, NER and word vectors
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user