Merge changes to __init__.py

2025-08-02 03:10:22 +03:00 · 2017-03-17 19:51:45 +01:00 · 2017-03-17 19:51:45 +01:00 · 6420f86f02
commit 6420f86f02
parent d013aba7b5 3926ffdb70
19 changed files with 388 additions and 95 deletions
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -1,4 +1,3 @@
 recursive-include include *.h
-include buildbot.json
 include LICENSE
 include README.rst
--- a/buildbot.json
+++ b/buildbot.json
@ -1,25 +0,0 @@
-{
-    "build": {
-        "sdist": [
-            "pip install -r requirements.txt",
-            "pip install \"numpy<1.8\"",
-            "python setup.py sdist"
-        ],
-        "install": [
-            "pip install -v source.tar.gz"
-        ],
-        "wheel": [
-            "python untar.py source.tar.gz .",
-            "python setup.py bdist_wheel",
-            "python cpdist.py dist"
-        ]
-    },
-    "test": {
-        "after": ["install", "wheel"],
-        "run": [
-            "python -m spacy.en.download --force"
-        ],
-        "package": "spacy",
-        "args": "--tb=native -x --models --vectors --slow"
-    }
-}
--- a/spacy/init.py
+++ b/spacy/init.py
@ -4,6 +4,7 @@ from __future__ import unicode_literals, print_function
 import json
 from pathlib import Path
 from .util import set_lang_class, get_lang_class, parse_package_meta
+from .deprecated import resolve_model_name

 from . import en
 from . import de
@ -35,13 +36,15 @@ set_lang_class(bn.Bengali.lang, bn.Bengali)

 def load(name, **overrides):
    data_path = overrides.get('path', util.get_data_path())
-    meta = parse_package_meta(data_path, name, require=False)
+    model_name = resolve_model_name(name)
+    meta = parse_package_meta(data_path, model_name, require=False)
    lang = meta['lang'] if meta and 'lang' in meta else name
    cls = get_lang_class(lang)
    overrides['meta'] = meta
-    model_path = Path(data_path) / name
+    model_path = Path(data_path / model_name)
    if model_path.exists():
        overrides['path'] = model_path
+
    return cls(**overrides)


--- a/spacy/about.py
+++ b/spacy/about.py
@ -12,5 +12,5 @@ __license__ = 'MIT'

 __docs__ = 'https://spacy.io/docs/usage'
 __download_url__ = 'https://github.com/explosion/spacy-models/releases/download'
-__compatibility__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json?token=ANAt54fi5zcUtnwGhMLw2klWwcAyHkZGks5Y0nw1wA%3D%3D'
+__compatibility__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json'
 __shortcuts__ = {'en': 'en_core_web_sm', 'de': 'de_core_web_md', 'vectors': 'en_vectors_glove_md'}
--- a/spacy/deprecated.py
+++ b/spacy/deprecated.py
@ -2,6 +2,7 @@ from pathlib import Path
 from . import about
 from . import util
 from .download import download
+from .link import link


 try:
@ -86,6 +87,35 @@ def fix_glove_vectors_loading(overrides):
    return overrides


+def resolve_model_name(name):
+    """If spaCy is loaded with 'de', check if symlink already exists. If
+    not, user have upgraded from older version and have old models installed.
+    Check if old model directory exists and if so, return that instead and create
+    shortcut link. If English model is found and no shortcut exists, raise error
+    and tell user to install new model.
+    """
+
+    if name == 'en' or name == 'de':
+        versions = ['1.0.0', '1.1.0']
+        data_path = Path(util.get_data_path())
+        model_path = data_path / name
+        v_model_paths = [data_path / Path(name + '-' + v) for v in versions]
+
+        if not model_path.exists(): # no shortcut found
+            for v_path in v_model_paths:
+                if v_path.exists(): # versioned model directory found
+                    if name == 'de':
+                        link(v_path, name)
+                        return name
+                    else:
+                        raise ValueError(
+                            "Found English model at {p}. This model is not "
+                            "compatible with the current version. See "
+                            "https://spacy.io/docs/usage/models to download the "
+                            "new model.".format(p=v_path))
+    return name
+
+
 class ModelDownload():
    """Replace download modules within en and de with deprecation warning and
    download default language model (using shortcut). Use classmethods to allow
--- a/spacy/download.py
+++ b/spacy/download.py
@ -28,6 +28,7 @@ def download(model=None, direct=False):
        download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version))
        link(model_name, model, force=True)

+
 def get_compatibility():
    version = about.__version__
    r = requests.get(about.__compatibility__)
--- a/spacy/tests/test_download.py
+++ b/spacy/tests/test_download.py
@ -5,6 +5,7 @@ from ..download import download, get_compatibility, get_version, check_error_dep
 import pytest


+@pytest.mark.slow
 def test_download_fetch_compatibility():
    compatibility = get_compatibility()
    assert type(compatibility) == dict
--- a/website/_harp.json
+++ b/website/_harp.json
@ -12,10 +12,10 @@
        "COMPANY_URL": "https://explosion.ai",
        "DEMOS_URL": "https://demos.explosion.ai",

-        "SPACY_VERSION": "1.6",
+        "SPACY_VERSION": "1.7",
        "LATEST_NEWS": {
-            "url": "https://explosion.ai/blog/deep-learning-formula-nlp",
-            "title": "The new deep learning formula for state-of-the-art NLP models"
+            "url": "/docs/usage/models",
+            "title": "Downloading and installing models as packages"
        },

        "SOCIAL": {
@ -54,8 +54,8 @@
            }
        },

-        "V_CSS": "1.15",
-        "V_JS": "1.1",
+        "V_CSS": "1.2",
+        "V_JS": "1.2",
        "DEFAULT_SYNTAX": "python",
        "ANALYTICS": "UA-58931649-1",
        "MAILCHIMP": {
--- a/website/_includes/_mixins.jade
+++ b/website/_includes/_mixins.jade
@ -67,6 +67,17 @@ mixin aside-code(label, language)
            block


+//- Infobox
+    label - [string] infobox title (optional or false for no title)
+
+mixin infobox(label)
+    aside.o-box.o-block.u-text-small
+        if label
+            h3.u-text-label.u-color-theme=label
+
+        block
+
+
 //- Link button
    url      - [string] link href
    trusted  - [boolean] if not set / false, rel="noopener nofollow" is added
--- a/website/assets/css/_base/_layout.sass
+++ b/website/assets/css/_base/_layout.sass
@ -22,7 +22,10 @@ p

 //- Links

-main p a, main table a, main > *:not(footer) li a, .c-aside a
+main p a,
+main table a,
+main > *:not(footer) li a,
+main aside a
    @extend .u-link


--- a/website/assets/css/_base/_objects.sass
+++ b/website/assets/css/_base/_objects.sass
@ -62,6 +62,15 @@
    border: 1px solid $color-subtle
    padding: 3rem 2.5%

+
+//- Box
+
+.o-box
+    background: $color-theme-light
+    padding: 2rem
+    border: 1px solid darken($color-theme-light, 5)
+
+
 //- Icons

 .o-icon
--- a/website/assets/css/_components/_buttons.sass
+++ b/website/assets/css/_components/_buttons.sass
@ -4,6 +4,7 @@
    display: inline-block
    font-weight: bold
    padding: 0.75em 1em
+    margin-bottom: 1px
    border: 2px solid
    border-radius: 2px
    text-align: center
--- a/website/assets/css/_variables.sass
+++ b/website/assets/css/_variables.sass
@ -34,6 +34,7 @@ $color-dark: lighten($color-front, 20) !default

 $color-theme: map-get($colors, $theme)
 $color-theme-dark: darken(map-get($colors, $theme), 5)
+$color-theme-light: saturate(lighten(map-get($colors, $theme), 35), 15)

 $color-subtle: #ddd !default
 $color-subtle-light: #f6f6f6 !default
--- a/website/docs/usage/_data.json
+++ b/website/docs/usage/_data.json
@ -2,6 +2,7 @@
    "sidebar": {
        "Get started": {
            "Installation": "./",
+            "Models": "models",
            "Lightning tour": "lightning-tour",
            "Resources": "resources"
        },
@ -28,6 +29,11 @@

    "index": {
        "title": "Install spaCy",
+        "next": "models"
+    },
+
+    "models": {
+        "title": "Models",
        "next": "lightning-tour"
    },

--- a/website/docs/usage/index.jade
+++ b/website/docs/usage/index.jade
@ -12,6 +12,16 @@ p
    |  #[a(href="#source-ubuntu") Ubuntu], #[a(href="#source-osx") macOS/OS X]
    |  and #[a(href="#source-windows") Windows] for details.

+aside("Download models")
+    |  After installation you need to download a language model. For more info
+    |  and available models, see the #[+a("/docs/usage/models") docs on models].
+
+    +code.o-no-block.
+        python -m spacy.download en
+
+        &gt;&gt;&gt; import spacy
+        &gt;&gt;&gt; nlp = spacy.load('en')
+
 +h(2, "pip") pip

 p Using pip, spaCy releases are currently only available as source packages.
@ -43,64 +53,6 @@ p
    |  #[+a("https://github.com/conda-forge/spacy-feedstock") this repository].
    |  Improvements and pull requests to the recipe and setup are always appreciated.

-+h(2, "models") Download models
-
-p
-    |  After installation you need to download a language model. Models for
-    |  English (#[code en]) and German (#[code de]) are available.
-
-+code(false, "bash").
-    python -m spacy.en.download all
-    python -m spacy.de.download all
-
-+aside-code("Examples", "bash").
-    # Install English tagger, parser and NER
-    python -m spacy.en.download parser
-
-    # Install English GloVe vectors
-    python -m spacy.en.download glove
-
-    # Upgrade/overwrite existing data
-    python -m spacy.en.download --force
-
-    # Check whether the model was successfully installed
-    python -c "import spacy; spacy.load('en'); print('OK')"
-
-p
-    |  The download command fetches about 1 GB of data which it
-    |  installs within the #[code spacy] package directory.
-
-+h(3, "custom-location") Download model to custom location
-
-p
-    |   You can specify where #[code spacy.en.download] and
-    |  #[code spacy.de.download] download the language model to using the
-    |  #[code --data-path] or #[code -d] argument:
-
-+code(false, "bash").
-    python -m spacy.en.download all --data-path /some/dir
-
-p
-    |  If you choose to download to a custom location, you will need to tell
-    |  spaCy where to load the model from in order to use it. You can do this
-    |  either by calling #[code spacy.util.set_data_path()] before calling
-    |  #[code spacy.load()], or by passing a #[code path] argument to the
-    |  #[code spacy.en.English] or #[code spacy.de.German] constructors.
-
-+h(3, "models-manual") Download models manually
-
-p
-    |  As of v1.6, the models and word vectors are also available as direct
-    |  downloads from GitHub, attached to the #[+a(gh("spaCy") + "/releases") releases] as #[code .tar.gz] archives.
-
-p
-    |  To install the models manually, first find the default data path. You can
-    |  use #[code spacy.util.get_data_path()] to find the directory where spaCy
-    |  will look for its models, or change the default data path with
-    |  #[code spacy.util.set_data_path()]. Then simply unpack the archive and
-    |  place the contained folder in that directory. You can now load the models
-    |  via #[code spacy.load()].
-
 +h(2, "source") Compile from source

 p
--- a/website/docs/usage/lightning-tour.jade
+++ b/website/docs/usage/lightning-tour.jade
@ -6,6 +6,15 @@ p
    |  The following examples and code snippets give you an overview of spaCy's
    |  functionality and its usage.

+h(2, "models") Install and load models
+
+code(false, "bash").
+    python -m spacy.download en
+
+code.
+    import spacy
+    nlp = spacy.load('en')
+
 +h(2, "examples-resources") Load resources and process text

 +code.
--- a/website/docs/usage/models.jade
+++ b/website/docs/usage/models.jade
@ -0,0 +1,285 @@
+//- 💫 DOCS > USAGE > MODELS
+
+include ../../_includes/_mixins
+
+p
+    |  As of v1.7.0, models for spaCy can be installed as #[strong Python packages].
+    |  This means that they're a component of your application, just like any
+    |  other module. They're versioned and can be defined as a dependency in your
+    |  #[code requirements.txt]. Models can be installed from a download URL or
+    |  a local directory, manually or via #[+a("https://pypi.python.org/pypi/pip") pip].
+    |  Their data can be located anywhere on your file system. To make a model
+    |  available to spaCy, all you need to do is create a "shortcut link", an
+    |  internal alias that tells spaCy where to find the data files for a specific
+    |  model name.
+
+infobox("Important note")
+    |  Due to improvements in the English lemmatizer in v1.7.0, you need to download the
+    |  new English model. The German model is still compatible and will be
+    |  recognised and linked automatically.
+
+aside-code("Quickstart").
+    # Install spaCy and download English model
+    pip install spacy
+    python -m spacy.download en
+
+    # Usage in Python
+    import spacy
+    nlp = spacy.load('en')
+    doc = nlp(u'This is a sentence.')
+
+h(2, "available") Available models
+
+table(["Name", "Size", "Description"])
+    +row
+        +cell #[code en_core_web_sm]
+        +cell 50 MB
+        +cell Vocab, syntax, entities, word vectors #[+tag default]
+
+    +row
+        +cell #[code en_core_web_md]
+        +cell 1 GB
+        +cell Vocab, syntax, entities, word vectors
+
+    +row
+        +cell #[code en_depent_web_md]
+        +cell 328 MB
+        +cell Vocab, syntax, entities
+
+    +row
+        +cell #[code en_vectors_glove_md]
+        +cell 727 MB
+        +cell
+            |  #[+a("http://nlp.stanford.edu/projects/glove/") GloVe] Common
+            |  Crawl vectors
+
+    +row
+        +cell #[code de_core_news_md]
+        +cell 645 MB
+        +cell Vocab, syntax, entities, word vectors #[+tag default]
+
+p
+    |  Models are now available as #[code .tar.gz] archives #[+a(gh("spacy-models")) from GitHub],
+    |  attached to individual releases. They can be downloaded and loaded manually,
+    |  or using spaCy's #[code download] and #[code link] commands. All models
+    |  follow the naming convention of #[code [language]_[type]_[genre]_[size]].
+
+button(gh("spacy-models") + "/releases", true, "primary") View models
+
+h(2, "download") Downloading models
+
+aside("Downloading models in spaCy < v1.7")
+    |  In older versions of spaCy, you can still use the old download commands.
+    |  This will download and install the models into the #[code spacy/data]
+    |  directory.
+
+    +code.o-no-block.
+        python -m spacy.en.download all
+        python -m spacy.de.download all
+        python -m spacy.en.download glove
+
+    |  The old models are also #[+a(gh("spacy") + "/tree/v1.6.0") attached to the v1.6.0 release].
+    |  To download and install them manually, unpack the archive, drop the
+    |  contained directory into #[code spacy/data] and load the model via
+    |  #[code spacy.load('en')] or #[code spacy.load('de')].
+
+p
+    |  The easiest way to download a model is via spaCy's #[code download]
+    |  command. It takes care of finding the best-matching model compatible with
+    |  your spaCy installation.
+
+code(false, "bash").
+    # out-of-the-box: download best-matching default model
+    python -m spacy.download en
+    python -m spacy.download de
+
+    # download best-matching version of specific model for your spaCy installation
+    python -m spacy.download en_core_web_md
+
+    # download exact model version (doesn't create shortcut link)
+    python -m spacy.download en_core_web_md-1.2.0 --direct
+
+p
+    |  The download command will #[+a("#download-pip") install the model] via
+    |  pip, place the package in your #[code site-packages] directory and create
+    |  a #[+a("#usage") shortcut link] that lets you load the model by name. The
+    |  shortcut link will be the same as the model name used in
+    |  #[code spacy.download].
+
+code(false, "bash").
+    pip install spacy
+    python -m spacy.download en
+
+code.
+    import spacy
+    nlp = spacy.load('en')
+    doc = nlp(u'This is a sentence.')
+
+h(3, "download-pip") Installation via pip
+
+p
+    | To download a model directly using #[+a("https://pypi.python.org/pypi/pip") pip],
+    |  simply point #[code pip install] to the URL or local path of the archive
+    |  file. To find the direct link to a model, head over to the
+    |  #[+a(gh("spacy-models") + "/releases") model releases], right click on the archive
+    |  link and copy it to your clipboard.
+
+code(false, "bash").
+    # with external URL
+    pip install #{gh("spacy-models")}/releases/download/en_core_web_md-1.2.0/en_core_web_md-1.2.0.tar.gz
+
+    # with local file
+    pip install /Users/you/en_core_web_md-1.2.0.tar.gz
+
+p
+    |  By default, this will install the model into your #[code site-packages]
+    |  directory. You can then create a #[+a("#usage") shortcut link] for your
+    |  model to load it via #[code spacy.load()], or #[+a("usage-import") import it]
+    |  as a Python module.
+
+h(3, "download-manual") Manual download and installation
+
+p
+    |  In some cases, you might prefer downloading the data manually, for
+    |  example to place it into a custom directory. You can download the model
+    |  via your browser from the #[+a(gh("spacy-models")) latest releases], or configure
+    |  your own download script using the URL of the archive file. The archive
+    |  consists of a model directory that contains another directory with the
+    |  model data.
+
+code("Directory structure", "yaml").
+    └── en_core_web_md-1.2.0.tar.gz       # downloaded archive
+        ├── meta.json                     # model meta data
+        ├── setup.py                      # setup file for pip installation
+        └── en_core_web_md                # model directory
+            ├── __init__.py               # init for pip installation
+            ├── meta.json                 # model meta data
+            └── en_core_web_md-1.2.0      # model data
+
+p
+    |  You can place the model data directory anywhere on your local file system.
+    |  To use it with spaCy, simply assign it a name by creating a
+    |  #[+a("#usage") shortcut link] for the data directory.
+
+h(2, "usage") Using models with spaCy
+
+p
+    |  While previous versions of spaCy required you to maintain a data directory
+    |  containing the models for each installation, you can now choose how and
+    |  where you want to keep your data files. To load the models conveniently
+    |  from within spaCy, you can use the #[code spacy.link] command to create a
+    |  symlink. This lets you set up custom shortcut links for models so you can
+    |  load them by name.
+
+code(false, "bash").
+    python -m spacy.link [package name or path] [shortcut] [--force]
+
+p
+    |  The first argument is the package name (if the model was installed via
+    |  pip), or a local path to the the data directory. The second argument is
+    |  the internal name you want to use for the model. Setting the #[code --force]
+    |  flag will overwrite any existing links.
+
+code("Examples", "bash").
+    # set up shortcut link to load installed package as "en_default"
+    python -m spacy.link en_core_web_md en_default
+
+    # set up shortcut link to load local model as "my_amazing_model"
+    python -m spacy.link /Users/you/model my_amazing_model
+
+h(3, "usage-loading") Loading models
+
+p
+    |  To load a model, use #[code spacy.load()] with the model's shortcut link.
+
+code.
+    import spacy
+    nlp = spacy.load('en_default')
+    doc = nlp(u'This is a sentence.')
+
+p
+    |  You can also use the #[code info()] method to print a model's meta data
+    |  before loading it. Each #[code Language] object returned by #[code spacy.load()]
+    |  also exposes the model's meta data as the attribute #[code meta]:
+
+code.
+    import spacy
+    spacy.info('en_default')
+    # JSON-formatted model meta data
+
+    nlp = spacy.load('en_default')
+    print(nlp.meta['version'])
+    # 1.2.0
+
+h(3, "usage-import") Importing models as modules
+
+p
+    |  If you've installed a model via pip, you can also #[code import] it
+    |  directly and then call its #[code load()] method with no arguments:
+
+code.
+    import spacy
+    import en_core_web_md
+
+    nlp = en_core_web_md.load()
+    doc = nlp(u'This is a sentence.')
+
+h(2, "own-models") Using your own models
+
+p
+    |  If you've trained your own model, for example for
+    |  #[+a("/docs/usage/adding-languages") additional languages], you can
+    |  create a shortuct link for it by pointing #[code spacy.link] to the
+    |  model's data directory. To allow your model to be downloaded and
+    |  installed via pip, you'll also need to generate a package for it.
+
+infobox("Important note")
+    |  The model packages are #[strong not suitable] for the public
+    |  #[+a("https://pypi.python.org") pypi.python.org] directory, which is not
+    |  designed for binary data and files over 50 MB. However, if your company
+    |  is running an internal installation of pypi, publishing your models on
+    |  there can be a convenient solution to share them with your team.
+
+p The model directory should look like this:
+
+code("Directory structure", "yaml").
+    └── /
+        ├── MANIFEST.in                   # to include meta.json
+        ├── meta.json                     # model meta data
+        ├── setup.py                      # setup file for pip installation
+        └── en_core_web_md                # model directory
+            ├── __init__.py               # init for pip installation
+            └── en_core_web_md-1.2.0      # model data
+
+p
+    |  You can find templates for all files in our
+    |  #[+a(gh("spacy-dev-resouces", "templates/model")) spaCy dev resources].
+    |  Unless you want to customise installation and loading, the only file
+    |  you'll need to modify is #[code meta.json], which includes the model's
+    |  meta data. It will later be copied into the package and data directory.
+
+code("meta.json", "json").
+    {
+        "name": "core_web_md",
+        "lang": "en",
+        "version": "1.2.0",
+        "spacy_version": "1.7.0",
+        "description": "English model for spaCy",
+        "author": "Explosion AI",
+        "email": "contact@explosion.ai",
+        "license": "MIT"
+    }
+
+p
+    |  Keep in mind that the directories need to be named according to the
+    |  naming conventions. The #[code lang] setting is also used to create the
+    |  respective #[code Language] class in spaCy, which will later be returned
+    |  by the model's #[code load()] method.
+
+p
+    |  To generate the package, run the following command from within the
+    |  directory. This will create a #[code .tar.gz] archive in a directory
+    |  #[code /dist].
+
+code(false, "bash").
+    python setup.py sdist
--- a/website/docs/usage/resources.jade
+++ b/website/docs/usage/resources.jade
@ -7,6 +7,13 @@ p Many of the associated tools and resources that we're developing alongside spa
 +h(2, "developer") Developer tools

 +table(["Name", "Description"])
+    +row
+        +cell
+            +src(gh("spacy-models")) spaCy Models
+
+        +cell
+            |  Model releases for spaCy.
+
    +row
        +cell
            +src(gh("spacy-dev-resources")) spaCy Dev Resources
@ -55,7 +62,7 @@ p Many of the associated tools and resources that we're developing alongside spa
            +src(gh("thinc")) Thinc

        +cell
-            |  Super sparse multi-class machine learning with Cython.
+            |  spaCy's Machine Learning library for NLP in Python.

    +row
        +cell
--- a/website/index.jade
+++ b/website/index.jade
@ -66,7 +66,7 @@ include _includes/_mixins
    +grid
        +grid-col("two-thirds")
            +terminal("lightning_tour.py").
-                # Install: pip install spacy && python -m spacy.en.download
+                # Install: pip install spacy && python -m spacy.download en
                import spacy

                # Load English tokenizer, tagger, parser, NER and word vectors