From dd302e0faaf8caa420c146f0ecdcacbda37dd4a0 Mon Sep 17 00:00:00 2001 From: ines Date: Thu, 16 Mar 2017 21:52:19 +0100 Subject: [PATCH 01/27] Add bottom margin hack to prevent overlaps --- website/assets/css/_components/_buttons.sass | 1 + 1 file changed, 1 insertion(+) diff --git a/website/assets/css/_components/_buttons.sass b/website/assets/css/_components/_buttons.sass index 647723380..f753e15bf 100644 --- a/website/assets/css/_components/_buttons.sass +++ b/website/assets/css/_components/_buttons.sass @@ -4,6 +4,7 @@ display: inline-block font-weight: bold padding: 0.75em 1em + margin-bottom: 1px border: 2px solid border-radius: 2px text-align: center From d406f1e9e6b1378a1743a852f81320b62c5945be Mon Sep 17 00:00:00 2001 From: ines Date: Thu, 16 Mar 2017 21:52:45 +0100 Subject: [PATCH 02/27] Fix formatting and add link styling to all asides --- website/assets/css/_base/_layout.sass | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/website/assets/css/_base/_layout.sass b/website/assets/css/_base/_layout.sass index bb5e82e62..3f680bdc2 100644 --- a/website/assets/css/_base/_layout.sass +++ b/website/assets/css/_base/_layout.sass @@ -22,7 +22,10 @@ p //- Links -main p a, main table a, main > *:not(footer) li a, .c-aside a +main p a, +main table a, +main > *:not(footer) li a, +main aside a @extend .u-link From b7722b1bff3c8a49782c3b95fa7ff1cfdc8984cb Mon Sep 17 00:00:00 2001 From: ines Date: Thu, 16 Mar 2017 21:52:52 +0100 Subject: [PATCH 03/27] Add box component --- website/assets/css/_base/_objects.sass | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/website/assets/css/_base/_objects.sass b/website/assets/css/_base/_objects.sass index 1be4b17d5..abd5453f4 100644 --- a/website/assets/css/_base/_objects.sass +++ b/website/assets/css/_base/_objects.sass @@ -62,6 +62,15 @@ border: 1px solid $color-subtle padding: 3rem 2.5% + +//- Box + +.o-box + background: $color-theme-light + padding: 2rem + border: 1px solid darken($color-theme-light, 5) + + //- Icons .o-icon From 3ad2179360f008e88e345a20ab7f228323910e6a Mon Sep 17 00:00:00 2001 From: ines Date: Thu, 16 Mar 2017 21:53:00 +0100 Subject: [PATCH 04/27] Add lighter version of theme colour --- website/assets/css/_variables.sass | 1 + 1 file changed, 1 insertion(+) diff --git a/website/assets/css/_variables.sass b/website/assets/css/_variables.sass index 9029161e2..bfef915be 100644 --- a/website/assets/css/_variables.sass +++ b/website/assets/css/_variables.sass @@ -34,6 +34,7 @@ $color-dark: lighten($color-front, 20) !default $color-theme: map-get($colors, $theme) $color-theme-dark: darken(map-get($colors, $theme), 5) +$color-theme-light: saturate(lighten(map-get($colors, $theme), 35), 15) $color-subtle: #ddd !default $color-subtle-light: #f6f6f6 !default From 3e3f20e68b2bcf4931074665a92d1404245f2911 Mon Sep 17 00:00:00 2001 From: ines Date: Thu, 16 Mar 2017 21:53:15 +0100 Subject: [PATCH 05/27] Add infobox mixin --- website/_includes/_mixins.jade | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/website/_includes/_mixins.jade b/website/_includes/_mixins.jade index 8a42024c1..ba5a9297e 100644 --- a/website/_includes/_mixins.jade +++ b/website/_includes/_mixins.jade @@ -67,6 +67,17 @@ mixin aside-code(label, language) block +//- Infobox + label - [string] infobox title (optional or false for no title) + +mixin infobox(label) + aside.o-box.o-block.u-text-small + if label + h3.u-text-label.u-color-theme=label + + block + + //- Link button url - [string] link href trusted - [boolean] if not set / false, rel="noopener nofollow" is added From ec75c781b95c061e134846693c9a5e33032c8bdf Mon Sep 17 00:00:00 2001 From: ines Date: Thu, 16 Mar 2017 21:53:31 +0100 Subject: [PATCH 06/27] Add docs page for models --- website/docs/usage/_data.json | 6 + website/docs/usage/models.jade | 274 +++++++++++++++++++++++++++++++++ 2 files changed, 280 insertions(+) create mode 100644 website/docs/usage/models.jade diff --git a/website/docs/usage/_data.json b/website/docs/usage/_data.json index 4621ec8c2..436d14abe 100644 --- a/website/docs/usage/_data.json +++ b/website/docs/usage/_data.json @@ -2,6 +2,7 @@ "sidebar": { "Get started": { "Installation": "./", + "Models": "models", "Lightning tour": "lightning-tour", "Resources": "resources" }, @@ -28,6 +29,11 @@ "index": { "title": "Install spaCy", + "next": "models" + }, + + "models": { + "title": "Models", "next": "lightning-tour" }, diff --git a/website/docs/usage/models.jade b/website/docs/usage/models.jade new file mode 100644 index 000000000..ebe55a87d --- /dev/null +++ b/website/docs/usage/models.jade @@ -0,0 +1,274 @@ +//- 💫 DOCS > USAGE > MODELS + +include ../../_includes/_mixins + +p + | As of v1.7.0, models for spaCy can be installed as #[strong Python packages]. + | This means that they're a component of your application, just like any + | other module. They're versioned and can be defined as a dependency in your + | #[code requirements.txt]. Models can be installed from a download URL or + | a local directory, manually or via #[+a("https://pypi.python.org/pypi/pip") pip]. + | Their data can be located anywhere on your file system. To make a model + | available to spaCy, all you need to do is create a "shortcut link", an + | internal alias that tells spaCy where to find the data files for a specific + | model name. + ++aside-code("Quickstart"). + # Install spaCy and download English model + pip install spacy + python -m spacy.download en + + # Usage in Python + import spacy + nlp = spacy.load('en') + doc = nlp(u'This is a sentence.') + ++h(2, "available") Available models + ++table(["Name", "Size", "Description"]) + +row + +cell #[code en_core_web_md] + +cell 340 MB + +cell Vocab, syntax, entities, word vectors + + +row + +cell #[code en_core_web_sm] + +cell 52 MB + +cell Vocab, syntax, entities, word vectors #[+tag default] + + +row + +cell #[code en_vectors_glove_md] + +cell 693 MB + +cell + | #[+a("http://nlp.stanford.edu/projects/glove/") GloVe] Common + | Crawl vectors + + +row + +cell #[code de_core_news_md] + +cell 637 MB + +cell Vocab, syntax, entities, word vectors #[+tag default] + +p + | Models are now available as #[code .tar.gz] archives #[+a(MODELS_URL) from GitHub], + | attached to individual releases. They can be downloaded and loaded manually, + | or using spaCy's #[code download] and #[code link] commands. All models + | follow the naming convention of #[code [language]_[type]_[genre]_[size]]. + ++button(MODELS_URL + "/releases", true, "primary") View models + ++h(2, "download") Downloading models + ++aside("Downloading models in spaCy < v1.7") + | In older versions of spaCy, you can still use the old download commands. + | This will download and install the models into the #[code spacy/data] + | directory. + + +code.o-no-block. + python -m spacy.en.download all + python -m spacy.de.download all + python -m spacy.en.download glove + + | The old models are now available as Python packages, meaning that you can + | also choose to #[+a("#usage-import") import them] as modules. + +p + | The easiest way to download a model is via spaCy's #[code download] + | command. It takes care of finding the best-matching model compatible with + | your spaCy installation. + ++code(false, "bash"). + # out-of-the-box: download best-matching default model + python -m spacy.download en + python -m spacy.download de + + # download best-matching version of specific model for your spaCy installation + python -m spacy.download en_core_web_md + + # download exact model version (doesn't create shortcut link) + python -m spacy.download en_core_web_md-1.2.0 --direct + +p + | The download command will #[+a("#download-pip") install the model] via + | pip, place the package in your #[code site-packages] directory and create + | a #[+a("#usage") shortcut link] that lets you load the model by name. The + | shortcut link will be the same as the model name used in + | #[code spacy.download]. + ++code(false, "bash"). + pip install spacy + python -m spacy.download en + ++code. + import spacy + nlp = spacy.load('en') + doc = nlp(u'This is a sentence.') + ++h(3, "download-pip") Installation via pip + +p + | To download a model directly using #[+a("https://pypi.python.org/pypi/pip") pip], + | simply point #[code pip install] to the URL or local path of the archive + | file. To find the direct link to a model, head over to the + | #[+a(MODELS_URL + "/releases") model releases], right click on the archive + | link and copy it to your clipboard. + ++code(false, "bash"). + # with external URL + pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_md-1.2.0/en_core_web_md-1.2.0.tar.gz + + # with local file + pip install /Users/you/en_core_web_md-1.2.0.tar.gz + +p + | By default, this will install the model into your #[code site-packages] + | directory. You can then create a #[+a("#usage") shortcut link] for your + | model to load it via #[code spacy.load()], or #[+a("usage-import") import it] + | as a Python module. + ++h(3, "download-manual") Manual download and installation + +p + | In some cases, you might prefer downloading the data manually, for + | example to place it into a custom directory. You can download the model + | via your browser from the #[+a(MODELS_URL) latest releases], or configure + | your own download script using the URL of the archive file. The archive + | consists of a model directory that contains another directory with the + | model data. + ++code("Directory structure", "yaml"). + └── en_core_web_md-1.2.0.tar.gz # downloaded archive + ├── meta.json # model meta data + ├── setup.py # setup file for pip installation + └── en_core_web_md # model directory + ├── __init__.py # init for pip installation + ├── meta.json # model meta data + └── en_core_web_md-1.2.0 # model data + +p + | You can place the model data directory anywhere on your local file system. + | To use it with spaCy, simply create a #[+a("#usage") shortcut link] for + | the directory and assign it a name. + ++h(2, "usage") Using models with spaCy + +p + | While previous versions of spaCy required you to maintain a data directory + | containing the models for each installation, you can now choose how and + | where you want to keep your data files. To load the models conveniently + | from within spaCy, you can use the #[code spacy.link] command to create a + | symlink. This lets you set up custom shortcut links for models so you can + | load them by name. + ++code(false, "bash"). + python -m spacy.link [package name or path] [shortcut] [--force] + +p + | The first argument is the package name (if the model was installed via + | pip), or a local path to the the data directory. The second argument is + | the internal name you want to use for the model. Setting the #[code --force] + | flag will overwrite any existing links. + ++code("Examples", "bash"). + # Create link en_default for model en_core_web_md + python -m spacy.link en_core_web_md en_default + + # Create link my_amazing_model for model data in local directory + python -m spacy.link /Users/you/model my_amazing_model + ++h(3, "usage-loading") Loading models + +p To load a model, use #[code spacy.load()] with the model's shortcut link: + ++code. + import spacy + nlp = spacy.load('en_default') + doc = nlp(u'This is a sentence.') + +p + | You can also use the #[code info()] method to print a model's meta data + | before loading it. Each #[code Language] object returned by #[code spacy.load()] + | also exposes the model's meta data as the property #[code meta]: + ++code. + import spacy + spacy.info('en_default') + # JSON-formatted model meta data + + nlp = spacy.load('en_default') + print(nlp.meta['version']) + # 1.2.0 + ++h(3, "usage-import") Importing models as modules + +p + | If you've installed a model via pip, you can also #[code import] it + | directly and then call its #[code load()] method with no arguments: + ++code. + import spacy + import en_core_web_md + + nlp = en_core_web_md.load() + doc = nlp(u'This is a sentence.') + +p This should also work for older models in previous versions of spaCy. + ++h(2, "own-models") Using your own models + +p + | If you've trained your own model, for example for + | #[+a("/docs/usage/adding-languages") additional languages], you can + | create a shortuct link for it by pointing #[code spacy.link] to the + | model's data directory. To allow your model to be downloaded and + | installed via pip, you'll also need to generate a package for it. + ++infobox("Important note") + | The model packages are #[strong not suitable] for the public + | #[+a("https://pypi.python.org") pypi.python.org] directory, which is not + | designed for binary data and files over 50 MB. However, if your company + | is running an internal installation of pypi, publishing your models on + | there can be a convenient solution to share them with your team. + +p The model directory should look like this: + ++code("Directory structure", "yaml"). + └── / + ├── MANIFEST.in # to include meta.json + ├── meta.json # model meta data + ├── setup.py # setup file for pip installation + └── en_core_web_md # model directory + ├── __init__.py # init for pip installation + └── en_core_web_md-1.2.0 # model data + +p + | You can find templates for all files in our + | #[+a(gh("spacy-dev-resouces", "templates/model")) spaCy dev resources]. + | Unless you want to customise installation and loading, the only file + | you'll need to modify is #[code meta.json], which includes the model's + | meta data. It will later be copied into the package and data directory. + ++code("meta.json", "json"). + { + "name": "core_web_md", + "lang": "en", + "version": "1.2.0", + "spacy_version": "1.7.0", + "description": "English model for spaCy", + "author": "Explosion AI", + "email": "contact@explosion.ai", + "license": "MIT" + } + +p + | Keep in mind that the directories need to be named according to the + | naming conventions. The #[code lang] setting is also used to create the + | respective #[code Language] class in spaCy, which will later be returned + | by the model's #[code load()] method. + +p + | To generate the package, run the following command from within the + | directory. This will create a #[code .tar.gz] archive in a directory + | #[code /dist]. + ++code(false, "bash"). + python setup.py sdist From 807139ae614c7f016d2f5b173aadc327aceaf7fa Mon Sep 17 00:00:00 2001 From: ines Date: Thu, 16 Mar 2017 21:53:44 +0100 Subject: [PATCH 07/27] Update installation docs and add models quickstart aside --- website/docs/usage/index.jade | 68 ++++++----------------------------- 1 file changed, 10 insertions(+), 58 deletions(-) diff --git a/website/docs/usage/index.jade b/website/docs/usage/index.jade index 479635e4b..b894bedde 100644 --- a/website/docs/usage/index.jade +++ b/website/docs/usage/index.jade @@ -12,6 +12,16 @@ p | #[a(href="#source-ubuntu") Ubuntu], #[a(href="#source-osx") macOS/OS X] | and #[a(href="#source-windows") Windows] for details. ++aside("Download models") + | After installation you need to download a language model. For more info + | and available models, see the #[+a("/docs/usage/models") docs on models]. + + +code.o-no-block. + python -m spacy.download en + + >>> import spacy + >>> nlp = spacy.load('en') + +h(2, "pip") pip p Using pip, spaCy releases are currently only available as source packages. @@ -43,64 +53,6 @@ p | #[+a("https://github.com/conda-forge/spacy-feedstock") this repository]. | Improvements and pull requests to the recipe and setup are always appreciated. -+h(2, "models") Download models - -p - | After installation you need to download a language model. Models for - | English (#[code en]) and German (#[code de]) are available. - -+code(false, "bash"). - python -m spacy.en.download all - python -m spacy.de.download all - -+aside-code("Examples", "bash"). - # Install English tagger, parser and NER - python -m spacy.en.download parser - - # Install English GloVe vectors - python -m spacy.en.download glove - - # Upgrade/overwrite existing data - python -m spacy.en.download --force - - # Check whether the model was successfully installed - python -c "import spacy; spacy.load('en'); print('OK')" - -p - | The download command fetches about 1 GB of data which it - | installs within the #[code spacy] package directory. - -+h(3, "custom-location") Download model to custom location - -p - | You can specify where #[code spacy.en.download] and - | #[code spacy.de.download] download the language model to using the - | #[code --data-path] or #[code -d] argument: - -+code(false, "bash"). - python -m spacy.en.download all --data-path /some/dir - -p - | If you choose to download to a custom location, you will need to tell - | spaCy where to load the model from in order to use it. You can do this - | either by calling #[code spacy.util.set_data_path()] before calling - | #[code spacy.load()], or by passing a #[code path] argument to the - | #[code spacy.en.English] or #[code spacy.de.German] constructors. - -+h(3, "models-manual") Download models manually - -p - | As of v1.6, the models and word vectors are also available as direct - | downloads from GitHub, attached to the #[+a(gh("spaCy") + "/releases") releases] as #[code .tar.gz] archives. - -p - | To install the models manually, first find the default data path. You can - | use #[code spacy.util.get_data_path()] to find the directory where spaCy - | will look for its models, or change the default data path with - | #[code spacy.util.set_data_path()]. Then simply unpack the archive and - | place the contained folder in that directory. You can now load the models - | via #[code spacy.load()]. - +h(2, "source") Compile from source p From 0f4a876a831cd99ab214b07931752cd677c7c55e Mon Sep 17 00:00:00 2001 From: ines Date: Thu, 16 Mar 2017 21:53:54 +0100 Subject: [PATCH 08/27] Add MODELS_URL variable --- website/_harp.json | 1 + 1 file changed, 1 insertion(+) diff --git a/website/_harp.json b/website/_harp.json index 9548cadcf..03b4a8dd5 100644 --- a/website/_harp.json +++ b/website/_harp.json @@ -11,6 +11,7 @@ "COMPANY": "Explosion AI", "COMPANY_URL": "https://explosion.ai", "DEMOS_URL": "https://demos.explosion.ai", + "MODELS_URL": "https://github.com/spacy-models", "SPACY_VERSION": "1.6", "LATEST_NEWS": { From 01288807ba76b1cc32140ee86662c4d343f5888e Mon Sep 17 00:00:00 2001 From: ines Date: Thu, 16 Mar 2017 21:53:59 +0100 Subject: [PATCH 09/27] Update spaCy version --- website/_harp.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/_harp.json b/website/_harp.json index 03b4a8dd5..b119fb2bd 100644 --- a/website/_harp.json +++ b/website/_harp.json @@ -13,7 +13,7 @@ "DEMOS_URL": "https://demos.explosion.ai", "MODELS_URL": "https://github.com/spacy-models", - "SPACY_VERSION": "1.6", + "SPACY_VERSION": "1.7", "LATEST_NEWS": { "url": "https://explosion.ai/blog/deep-learning-formula-nlp", "title": "The new deep learning formula for state-of-the-art NLP models" From 0b5c664b04f38d6624cefeb1210acf375c195a39 Mon Sep 17 00:00:00 2001 From: ines Date: Thu, 16 Mar 2017 21:59:26 +0100 Subject: [PATCH 10/27] Update resources --- website/docs/usage/resources.jade | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/website/docs/usage/resources.jade b/website/docs/usage/resources.jade index 754b951c7..56e92a1e7 100644 --- a/website/docs/usage/resources.jade +++ b/website/docs/usage/resources.jade @@ -7,6 +7,13 @@ p Many of the associated tools and resources that we're developing alongside spa +h(2, "developer") Developer tools +table(["Name", "Description"]) + +row + +cell + +src(gh("spacy-models")) spaCy Models + + +cell + | Model releases for spaCy. + +row +cell +src(gh("spacy-dev-resources")) spaCy Dev Resources @@ -55,7 +62,7 @@ p Many of the associated tools and resources that we're developing alongside spa +src(gh("thinc")) Thinc +cell - | Super sparse multi-class machine learning with Cython. + | spaCy's Machine Learning library for NLP in Python. +row +cell From 4d9161476a90f8148876ee4278b9d4dca5fb211e Mon Sep 17 00:00:00 2001 From: ines Date: Thu, 16 Mar 2017 22:09:36 +0100 Subject: [PATCH 11/27] Remove redundant MODELS_URL --- website/_harp.json | 1 - 1 file changed, 1 deletion(-) diff --git a/website/_harp.json b/website/_harp.json index b119fb2bd..8de3f8574 100644 --- a/website/_harp.json +++ b/website/_harp.json @@ -11,7 +11,6 @@ "COMPANY": "Explosion AI", "COMPANY_URL": "https://explosion.ai", "DEMOS_URL": "https://demos.explosion.ai", - "MODELS_URL": "https://github.com/spacy-models", "SPACY_VERSION": "1.7", "LATEST_NEWS": { From 08b0fb62ccfbd0a1e23d8912283a28689767e49f Mon Sep 17 00:00:00 2001 From: ines Date: Thu, 16 Mar 2017 22:09:43 +0100 Subject: [PATCH 12/27] Update models docs --- website/docs/usage/models.jade | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/website/docs/usage/models.jade b/website/docs/usage/models.jade index ebe55a87d..0cd5bcc1d 100644 --- a/website/docs/usage/models.jade +++ b/website/docs/usage/models.jade @@ -49,12 +49,12 @@ p +cell Vocab, syntax, entities, word vectors #[+tag default] p - | Models are now available as #[code .tar.gz] archives #[+a(MODELS_URL) from GitHub], + | Models are now available as #[code .tar.gz] archives #[+a(gh("spacy-models")) from GitHub], | attached to individual releases. They can be downloaded and loaded manually, | or using spaCy's #[code download] and #[code link] commands. All models | follow the naming convention of #[code [language]_[type]_[genre]_[size]]. -+button(MODELS_URL + "/releases", true, "primary") View models ++button(gh("spacy-models") + "/releases", true, "primary") View models +h(2, "download") Downloading models @@ -109,12 +109,12 @@ p | To download a model directly using #[+a("https://pypi.python.org/pypi/pip") pip], | simply point #[code pip install] to the URL or local path of the archive | file. To find the direct link to a model, head over to the - | #[+a(MODELS_URL + "/releases") model releases], right click on the archive + | #[+a(gh("spacy-models") + "/releases") model releases], right click on the archive | link and copy it to your clipboard. +code(false, "bash"). # with external URL - pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_md-1.2.0/en_core_web_md-1.2.0.tar.gz + pip install #{gh("spacy-models")}/releases/download/en_core_web_md-1.2.0/en_core_web_md-1.2.0.tar.gz # with local file pip install /Users/you/en_core_web_md-1.2.0.tar.gz @@ -130,7 +130,7 @@ p p | In some cases, you might prefer downloading the data manually, for | example to place it into a custom directory. You can download the model - | via your browser from the #[+a(MODELS_URL) latest releases], or configure + | via your browser from the #[+a(gh("spacy-models")) latest releases], or configure | your own download script using the URL of the archive file. The archive | consists of a model directory that contains another directory with the | model data. @@ -169,10 +169,10 @@ p | flag will overwrite any existing links. +code("Examples", "bash"). - # Create link en_default for model en_core_web_md + # set up shortcut link to load package en_core_web as "en_default" python -m spacy.link en_core_web_md en_default - # Create link my_amazing_model for model data in local directory + # set up shortcut link to load local model as "my_amazing_model" python -m spacy.link /Users/you/model my_amazing_model +h(3, "usage-loading") Loading models From f4df9463f2d86cdd94cf78b1683db32fada51d63 Mon Sep 17 00:00:00 2001 From: ines Date: Thu, 16 Mar 2017 22:21:46 +0100 Subject: [PATCH 13/27] Fix wording --- website/docs/usage/models.jade | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/usage/models.jade b/website/docs/usage/models.jade index 0cd5bcc1d..29bf68389 100644 --- a/website/docs/usage/models.jade +++ b/website/docs/usage/models.jade @@ -187,7 +187,7 @@ p To load a model, use #[code spacy.load()] with the model's shortcut link: p | You can also use the #[code info()] method to print a model's meta data | before loading it. Each #[code Language] object returned by #[code spacy.load()] - | also exposes the model's meta data as the property #[code meta]: + | also exposes the model's meta data as the attribute #[code meta]: +code. import spacy From fd609618252ffe493fc54c86ba969169b814b453 Mon Sep 17 00:00:00 2001 From: ines Date: Thu, 16 Mar 2017 23:23:26 +0100 Subject: [PATCH 14/27] Fix spacing --- spacy/download.py | 1 + 1 file changed, 1 insertion(+) diff --git a/spacy/download.py b/spacy/download.py index 1f8f701ff..f7ece25e9 100644 --- a/spacy/download.py +++ b/spacy/download.py @@ -28,6 +28,7 @@ def download(model=None, direct=False): download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version)) link(model_name, model, force=True) + def get_compatibility(): version = about.__version__ r = requests.get(about.__compatibility__) From e461fafd145e0fce7682b1d5eef3fdc16961b154 Mon Sep 17 00:00:00 2001 From: ines Date: Thu, 16 Mar 2017 23:23:35 +0100 Subject: [PATCH 15/27] Update example --- website/docs/usage/models.jade | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/usage/models.jade b/website/docs/usage/models.jade index 29bf68389..fda9c76a5 100644 --- a/website/docs/usage/models.jade +++ b/website/docs/usage/models.jade @@ -169,7 +169,7 @@ p | flag will overwrite any existing links. +code("Examples", "bash"). - # set up shortcut link to load package en_core_web as "en_default" + # set up shortcut link to load installed package as "en_default" python -m spacy.link en_core_web_md en_default # set up shortcut link to load local model as "my_amazing_model" From 8af4b9e4dfd6e8e273be5613c7dde017ae2a3354 Mon Sep 17 00:00:00 2001 From: ines Date: Fri, 17 Mar 2017 12:43:03 +0100 Subject: [PATCH 16/27] Fix compatibility.json link --- spacy/about.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/about.py b/spacy/about.py index 8e21ab316..949ff7c76 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -12,5 +12,5 @@ __license__ = 'MIT' __docs__ = 'https://spacy.io/docs/usage' __download_url__ = 'https://github.com/explosion/spacy-models/releases/download' -__compatibility__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json?token=ANAt54fi5zcUtnwGhMLw2klWwcAyHkZGks5Y0nw1wA%3D%3D' +__compatibility__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json' __shortcuts__ = {'en': 'en_core_web_sm', 'de': 'de_core_web_md', 'vectors': 'en_vectors_glove_md'} From 279b1d19657db73e64ed221043d0d97acc5a3d4e Mon Sep 17 00:00:00 2001 From: ines Date: Fri, 17 Mar 2017 12:43:08 +0100 Subject: [PATCH 17/27] Update version --- spacy/about.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/about.py b/spacy/about.py index 949ff7c76..698e4e9ce 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -3,7 +3,7 @@ # https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py __title__ = 'spacy' -__version__ = '1.6.0' +__version__ = '1.7.0' __summary__ = 'Industrial-strength Natural Language Processing (NLP) with Python and Cython' __uri__ = 'https://spacy.io' __author__ = 'Matthew Honnibal' From 441383d3806a143850a4fe055b2bfbcdbddf079e Mon Sep 17 00:00:00 2001 From: ines Date: Fri, 17 Mar 2017 12:52:55 +0100 Subject: [PATCH 18/27] Bump version --- website/_harp.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/_harp.json b/website/_harp.json index 8de3f8574..18ed0709d 100644 --- a/website/_harp.json +++ b/website/_harp.json @@ -54,8 +54,8 @@ } }, - "V_CSS": "1.15", - "V_JS": "1.1", + "V_CSS": "1.2", + "V_JS": "1.2", "DEFAULT_SYNTAX": "python", "ANALYTICS": "UA-58931649-1", "MAILCHIMP": { From 881db170f708bd6ead04850ef5e50e1724f557b7 Mon Sep 17 00:00:00 2001 From: ines Date: Fri, 17 Mar 2017 13:10:47 +0100 Subject: [PATCH 19/27] Update latest news --- website/_harp.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/_harp.json b/website/_harp.json index 18ed0709d..d26851804 100644 --- a/website/_harp.json +++ b/website/_harp.json @@ -14,8 +14,8 @@ "SPACY_VERSION": "1.7", "LATEST_NEWS": { - "url": "https://explosion.ai/blog/deep-learning-formula-nlp", - "title": "The new deep learning formula for state-of-the-art NLP models" + "url": "/docs/usage/models", + "title": "Downloading and installing models as packages" }, "SOCIAL": { From 7f25f64acc1b9e0c3fa0cf24a72a4496e86559a4 Mon Sep 17 00:00:00 2001 From: ines Date: Fri, 17 Mar 2017 13:11:00 +0100 Subject: [PATCH 20/27] Update lightning tour --- website/docs/usage/lightning-tour.jade | 9 +++++++++ website/index.jade | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/website/docs/usage/lightning-tour.jade b/website/docs/usage/lightning-tour.jade index cb08bc045..ba0adaa6e 100644 --- a/website/docs/usage/lightning-tour.jade +++ b/website/docs/usage/lightning-tour.jade @@ -6,6 +6,15 @@ p | The following examples and code snippets give you an overview of spaCy's | functionality and its usage. ++h(2, "models") Install and load models + ++code(false, "bash"). + python -m spacy.download en + ++code. + import spacy + nlp = spacy.load('en') + +h(2, "examples-resources") Load resources and process text +code. diff --git a/website/index.jade b/website/index.jade index 9d53432fc..1a5cd0826 100644 --- a/website/index.jade +++ b/website/index.jade @@ -66,7 +66,7 @@ include _includes/_mixins +grid +grid-col("two-thirds") +terminal("lightning_tour.py"). - # Install: pip install spacy && python -m spacy.en.download + # Install: pip install spacy && python -m spacy.download en import spacy # Load English tokenizer, tagger, parser, NER and word vectors From 0e533ad0cc42431a57758b577cf96783ee4b7484 Mon Sep 17 00:00:00 2001 From: ines Date: Fri, 17 Mar 2017 13:11:36 +0100 Subject: [PATCH 21/27] Mark compatibility table test as slow (temporary) Prevent Travis from running test test until models repo is published --- spacy/tests/test_download.py | 1 + 1 file changed, 1 insertion(+) diff --git a/spacy/tests/test_download.py b/spacy/tests/test_download.py index 8d67364ea..728cacc41 100644 --- a/spacy/tests/test_download.py +++ b/spacy/tests/test_download.py @@ -5,6 +5,7 @@ from ..download import download, get_compatibility, get_version, check_error_dep import pytest +@pytest.mark.slow def test_download_fetch_compatibility(): compatibility = get_compatibility() assert type(compatibility) == dict From 7f8c2ef3c10ef58ada00dcab30ea7f4a3c2ec10e Mon Sep 17 00:00:00 2001 From: ines Date: Fri, 17 Mar 2017 14:35:10 +0100 Subject: [PATCH 22/27] Remove buildbot.json for now --- MANIFEST.in | 1 - buildbot.json | 25 ------------------------- 2 files changed, 26 deletions(-) delete mode 100644 buildbot.json diff --git a/MANIFEST.in b/MANIFEST.in index e15d9de6d..697748835 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,3 @@ recursive-include include *.h -include buildbot.json include LICENSE include README.rst diff --git a/buildbot.json b/buildbot.json deleted file mode 100644 index 6dc8aa286..000000000 --- a/buildbot.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "build": { - "sdist": [ - "pip install -r requirements.txt", - "pip install \"numpy<1.8\"", - "python setup.py sdist" - ], - "install": [ - "pip install -v source.tar.gz" - ], - "wheel": [ - "python untar.py source.tar.gz .", - "python setup.py bdist_wheel", - "python cpdist.py dist" - ] - }, - "test": { - "after": ["install", "wheel"], - "run": [ - "python -m spacy.en.download --force" - ], - "package": "spacy", - "args": "--tb=native -x --models --vectors --slow" - } -} From b322f3152165a107ded77d46c777af6d4959d527 Mon Sep 17 00:00:00 2001 From: ines Date: Fri, 17 Mar 2017 16:09:56 +0100 Subject: [PATCH 23/27] Update models docs --- website/docs/usage/models.jade | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/website/docs/usage/models.jade b/website/docs/usage/models.jade index fda9c76a5..fe7d643d8 100644 --- a/website/docs/usage/models.jade +++ b/website/docs/usage/models.jade @@ -26,19 +26,24 @@ p +h(2, "available") Available models +table(["Name", "Size", "Description"]) - +row - +cell #[code en_core_web_md] - +cell 340 MB - +cell Vocab, syntax, entities, word vectors - +row +cell #[code en_core_web_sm] - +cell 52 MB + +cell 50 MB +cell Vocab, syntax, entities, word vectors #[+tag default] + +row + +cell #[code en_core_web_md] + +cell 1 GB + +cell Vocab, syntax, entities, word vectors + + +row + +cell #[code en_depent_web_md] + +cell 328 MB + +cell Vocab, syntax, entities + +row +cell #[code en_vectors_glove_md] - +cell 693 MB + +cell 727 MB +cell | #[+a("http://nlp.stanford.edu/projects/glove/") GloVe] Common | Crawl vectors @@ -146,8 +151,8 @@ p p | You can place the model data directory anywhere on your local file system. - | To use it with spaCy, simply create a #[+a("#usage") shortcut link] for - | the directory and assign it a name. + | To use it with spaCy, simply assign it a name by creating a + | #[+a("#usage") shortcut link] for the data directory. +h(2, "usage") Using models with spaCy From 76c0ea6cc678bbc580b2fe42bc9ca0a1465c42be Mon Sep 17 00:00:00 2001 From: ines Date: Fri, 17 Mar 2017 17:01:16 +0100 Subject: [PATCH 24/27] Update models docs --- website/docs/usage/models.jade | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/website/docs/usage/models.jade b/website/docs/usage/models.jade index fe7d643d8..22e49573e 100644 --- a/website/docs/usage/models.jade +++ b/website/docs/usage/models.jade @@ -50,7 +50,7 @@ p +row +cell #[code de_core_news_md] - +cell 637 MB + +cell 1 GB +cell Vocab, syntax, entities, word vectors #[+tag default] p @@ -73,8 +73,10 @@ p python -m spacy.de.download all python -m spacy.en.download glove - | The old models are now available as Python packages, meaning that you can - | also choose to #[+a("#usage-import") import them] as modules. + | The old models are also #[+a(gh("spacy") + "/tree/v1.6.0") attached to the v1.6.0 release]. + | To download and install them manually, unpack the archive, drop the + | contained directory into #[code spacy/data] and load the model via + | #[code spacy.load('en')] or #[code spacy.load('de')]. p | The easiest way to download a model is via spaCy's #[code download] @@ -216,8 +218,6 @@ p nlp = en_core_web_md.load() doc = nlp(u'This is a sentence.') -p This should also work for older models in previous versions of spaCy. - +h(2, "own-models") Using your own models p From aedefef49dddae29ba9775f389ec7150749ac85f Mon Sep 17 00:00:00 2001 From: ines Date: Fri, 17 Mar 2017 18:47:05 +0100 Subject: [PATCH 25/27] Add function to resolve model names and link them --- spacy/__init__.py | 6 ++++-- spacy/deprecated.py | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/spacy/__init__.py b/spacy/__init__.py index ca5a39f05..571e95ecf 100644 --- a/spacy/__init__.py +++ b/spacy/__init__.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals, print_function import json from pathlib import Path from .util import set_lang_class, get_lang_class, parse_package_meta +from .deprecated import resolve_model_name from . import en from . import de @@ -35,11 +36,12 @@ set_lang_class(bn.Bengali.lang, bn.Bengali) def load(name, **overrides): data_path = overrides.get('path', util.get_data_path()) - meta = parse_package_meta(data_path, name) + model_name = resolve_model_name(name) + meta = parse_package_meta(data_path, model_name) lang = meta['lang'] if meta and 'lang' in meta else 'en' cls = get_lang_class(lang) overrides['meta'] = meta - overrides['path'] = Path(data_path / name) + overrides['path'] = Path(data_path / model_name) return cls(**overrides) diff --git a/spacy/deprecated.py b/spacy/deprecated.py index 72327c584..2576c7f9a 100644 --- a/spacy/deprecated.py +++ b/spacy/deprecated.py @@ -2,6 +2,7 @@ from pathlib import Path from . import about from . import util from .download import download +from .link import link try: @@ -86,6 +87,26 @@ def fix_glove_vectors_loading(overrides): return overrides +def resolve_model_name(name): + """If spaCy is loaded with 'en' or 'de', check if symlink already exists. If + not, user have upgraded from older version and have old models installed. + Check if old model directory exists and if so, return that instead and create + shortcut link. + """ + + if name == 'en' or name == 'de': + versions = ['1.0.0', '1.1.0'] + data_path = Path(util.get_data_path()) + model_path = data_path / name + v_model_paths = [data_path / Path(name + '-' + v) for v in versions] + if not model_path.exists(): + for v_path in v_model_paths: + if v_path.exists(): + link(v_path, name) + return name + return name + + class ModelDownload(): """Replace download modules within en and de with deprecation warning and download default language model (using shortcut). Use classmethods to allow From e01fbacf8123ee265209f47db312818850ea8102 Mon Sep 17 00:00:00 2001 From: ines Date: Fri, 17 Mar 2017 19:26:28 +0100 Subject: [PATCH 26/27] Update resolve_model_name --- spacy/deprecated.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/spacy/deprecated.py b/spacy/deprecated.py index 2576c7f9a..30be24942 100644 --- a/spacy/deprecated.py +++ b/spacy/deprecated.py @@ -88,10 +88,11 @@ def fix_glove_vectors_loading(overrides): def resolve_model_name(name): - """If spaCy is loaded with 'en' or 'de', check if symlink already exists. If + """If spaCy is loaded with 'de', check if symlink already exists. If not, user have upgraded from older version and have old models installed. Check if old model directory exists and if so, return that instead and create - shortcut link. + shortcut link. If English model is found and no shortcut exists, raise error + and tell user to install new model. """ if name == 'en' or name == 'de': @@ -99,11 +100,19 @@ def resolve_model_name(name): data_path = Path(util.get_data_path()) model_path = data_path / name v_model_paths = [data_path / Path(name + '-' + v) for v in versions] - if not model_path.exists(): + + if not model_path.exists(): # no shortcut found for v_path in v_model_paths: - if v_path.exists(): - link(v_path, name) - return name + if v_path.exists(): # versioned model directory found + if name == 'de': + link(v_path, name) + return name + else: + raise ValueError( + "Found English model at {p}. This model is not " + "compatible with the current version. See " + "https://spacy.io/docs/usage/models to download the " + "new model.".format(p=v_path)) return name From 3926ffdb7061b24c882c7b880e25bbbfb8e7f27b Mon Sep 17 00:00:00 2001 From: ines Date: Fri, 17 Mar 2017 19:26:37 +0100 Subject: [PATCH 27/27] Update models docs --- website/docs/usage/models.jade | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/website/docs/usage/models.jade b/website/docs/usage/models.jade index 22e49573e..3b1bb5f7d 100644 --- a/website/docs/usage/models.jade +++ b/website/docs/usage/models.jade @@ -13,6 +13,11 @@ p | internal alias that tells spaCy where to find the data files for a specific | model name. ++infobox("Important note") + | Due to improvements in the English lemmatizer in v1.7.0, you need to download the + | new English model. The German model is still compatible and will be + | recognised and linked automatically. + +aside-code("Quickstart"). # Install spaCy and download English model pip install spacy @@ -50,7 +55,7 @@ p +row +cell #[code de_core_news_md] - +cell 1 GB + +cell 645 MB +cell Vocab, syntax, entities, word vectors #[+tag default] p @@ -184,7 +189,8 @@ p +h(3, "usage-loading") Loading models -p To load a model, use #[code spacy.load()] with the model's shortcut link: +p + | To load a model, use #[code spacy.load()] with the model's shortcut link. +code. import spacy