Update morphology branch from develop

2025-11-10 12:58:01 +03:00 · 2019-03-07 00:14:43 +01:00 · 2019-03-07 00:14:43 +01:00 · 3993f41cc4
commit 3993f41cc4
parent b9ef8ac616 e9babd9973
1059 changed files with 1603104 additions and 429912 deletions
--- a/.appveyor.yml
+++ b/.appveyor.yml
@ -1,50 +1,21 @@
 environment:
-
  matrix:
-
-    # For Python versions available on Appveyor, see
-    # http://www.appveyor.com/docs/installed-software#python
-
-    - PYTHON: "C:\\Python27-x64"
-    #- PYTHON: "C:\\Python34"
-    #- PYTHON: "C:\\Python35"
-    #- DISTUTILS_USE_SDK: "1"
-    #- PYTHON: "C:\\Python34-x64"
-    #- DISTUTILS_USE_SDK: "1"
-    #- PYTHON: "C:\\Python35-x64"
+    - PYTHON: "C:\\Python35-x64"
    - PYTHON: "C:\\Python36-x64"
-
+    - PYTHON: "C:\\Python37-x64"
 install:
  # We need wheel installed to build wheels
  - "%PYTHON%\\python.exe -m pip install wheel"
  - "%PYTHON%\\python.exe -m pip install cython"
  - "%PYTHON%\\python.exe -m pip install -r requirements.txt"
  - "%PYTHON%\\python.exe -m pip install -e ."
-
 build: off
-
 test_script:
-  # Put your test command here.
-  # If you don't need to build C extensions on 64-bit Python 3.4,
-  # you can remove "build.cmd" from the front of the command, as it's
-  # only needed to support those cases.
-  # Note that you must use the environment variable %PYTHON% to refer to
-  # the interpreter you're using - Appveyor does not do anything special
-  # to put the Python version you want to use on PATH.
  - "%PYTHON%\\python.exe -m pytest spacy/ --no-print-logs"
-
 after_test:
-  # This step builds your wheels.
-  # Again, you only need build.cmd if you're building C extensions for
-  # 64-bit Python 3.4. And you need to use %PYTHON% to get the correct
-  # interpreter
  - "%PYTHON%\\python.exe setup.py bdist_wheel"
-
 artifacts:
-  # bdist_wheel puts your built wheel in the dist directory
  - path: dist\*
-
-#on_success:
-#  You can use this step to upload your artifacts to a public website.
-#  See Appveyor's documentation for more details. Or you can simply
-#  access your wheels from the Appveyor "artifacts" tab for your build.
+branches:
+  except:
+    - spacy.io
--- a/.flake8
+++ b/.flake8
@ -0,0 +1,14 @@
+[flake8]
+ignore = E203, E266, E501, E731, W503
+max-line-length = 80
+select = B,C,E,F,W,T4,B9
+exclude =
+    .env,
+    .git,
+    __pycache__,
+    lemmatizer.py,
+    lookup.py,
+    _tokenizer_exceptions_list.py,
+    spacy/lang/fr/lemmatizer,
+    spacy/lang/nb/lemmatizer
+    spacy/__init__.py
--- a/.github/CONTRIBUTOR_AGREEMENT.md
+++ b/.github/CONTRIBUTOR_AGREEMENT.md
@ -5,7 +5,7 @@ This spaCy Contributor Agreement (**"SCA"**) is based on the
 The SCA applies to any contribution that you make to any product or project
 managed by us (the **"project"**), and sets out the intellectual property rights
 you grant to us in the contributed materials. The term **"us"** shall mean
-[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+[ExplosionAI UG GmbH](https://explosion.ai/legal). The term
 **"you"** shall mean the person or entity identified below.

 If you agree to be bound by these terms, fill in the information requested
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@ -1,7 +1,7 @@
 <!--- Please provide a summary in the title and describe your issue here.
 Is this a bug or feature request? If a bug, include all the steps that led to the issue.

-If you're looking for help with your code, consider posting a question on StackOverflow instead:
+If you're looking for help with your code, consider posting a question on Stack Overflow instead:
 http://stackoverflow.com/questions/tagged/spacy -->


--- a/.github/ISSUE_TEMPLATE/05_other.md
+++ b/.github/ISSUE_TEMPLATE/05_other.md
@ -1,11 +1,11 @@
 ---
 name: "\U0001F4AC Anything else?"
 about: For general usage questions or help with your code, please consider
-  posting on StackOverflow instead.
+  posting on Stack Overflow instead.

 ---

-<!-- Describe your issue here. Please keep in mind that the GitHub issue tracker is mostly intended for reports related to the spaCy code base and source, and for bugs and feature requests. If you're looking for help with your code, consider posting a question on StackOverflow instead: http://stackoverflow.com/questions/tagged/spacy -->
+<!-- Describe your issue here. Please keep in mind that the GitHub issue tracker is mostly intended for reports related to the spaCy code base and source, and for bugs and feature requests. If you're looking for help with your code, consider posting a question on Stack Overflow instead: http://stackoverflow.com/questions/tagged/spacy -->

 ## Your Environment
 <!-- Include details of your environment. If you're using spaCy 1.7+, you can also type `python -m spacy info --markdown` and copy-paste the result here.-->
--- a/.github/contributors/ALSchwalm.md
+++ b/.github/contributors/ALSchwalm.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                    |
+|------------------------------- | ------------------------ |
+| Name                           | Adam Schwalm             |
+| Company name (if applicable)   | Star Lab                 |
+| Title or role (if applicable)  | Software Engineer        |
+| Date                           | 2018-11-28               |
+| GitHub username                | ALSchwalm                |
+| Website (optional)             | https://alschwalm.com    |
--- a/.github/contributors/BramVanroy.md
+++ b/.github/contributors/BramVanroy.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [x] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                 |
+|------------------------------- | ----------------------|
+| Name                           | Bram Vanroy           |
+| Company name (if applicable)   |                       |
+| Title or role (if applicable)  |                       |
+| Date                           | October 19, 2018      |
+| GitHub username                | BramVanroy            |
+| Website (optional)             | https://bramvanroy.be |
--- a/.github/contributors/Brixjohn.md
+++ b/.github/contributors/Brixjohn.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [ ] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [X] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Brixter John Lumabi  |
+| Company name (if applicable)   | Stratpoint           |
+| Title or role (if applicable)  | Software Developer   |
+| Date                           | 18 December 2018     |
+| GitHub username                | Brixjohn             |
+| Website (optional)             |                      |
--- a/.github/contributors/Cinnamy.md
+++ b/.github/contributors/Cinnamy.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Marina Lysyuk        |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 13.10.2018           |
+| GitHub username                | Cinnamy              |
+| Website (optional)             |                      |
--- a/.github/contributors/DeNeutoy.md
+++ b/.github/contributors/DeNeutoy.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           |Mark Neumann                      |
+| Company name (if applicable)   |Allen Institute for AI                      |
+| Title or role (if applicable)  |Research Engineer                      |
+| Date                           | 13/01/2019                      |
+| GitHub username                |@Deneutoy                      |
+| Website (optional)             |markneumann.xyz                      |
--- a/.github/contributors/DoomCoder.md
+++ b/.github/contributors/DoomCoder.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Piotr Książek        |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 22.11.2018           |
+| GitHub username                | DoomCoder            |
+| Website (optional)             |                      |
--- a/.github/contributors/Gizzio.md
+++ b/.github/contributors/Gizzio.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [X] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           |  Stanisław Giziński  |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           |  21.11.2018          |
+| GitHub username                |  Gizzio              |
+| Website (optional)             |                      |
--- a/.github/contributors/JKhakpour.md
+++ b/.github/contributors/JKhakpour.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [ ] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Ja'far Khakpour      |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 2018-09-24           |
+| GitHub username                | JKhakpour            |
+| Website (optional)             |                      |
--- a/.github/contributors/Loghijiaha.md
+++ b/.github/contributors/Loghijiaha.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [ x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ x] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Loghi Perinpanayagam |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |   Student            |
+| Date                           |   13 Jan, 2019       |
+| GitHub username                |   loghijiaha         |
+| Website (optional)             |                      |
--- a/.github/contributors/MateuszOlko.md
+++ b/.github/contributors/MateuszOlko.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Mateusz Olko         |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 22.11.2018           |
+| GitHub username                | MateuszOlko          |
+| Website (optional)             |                      |
--- a/.github/contributors/PolyglotOpenstreetmap.md
+++ b/.github/contributors/PolyglotOpenstreetmap.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Jo                   |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 2018-01-26           |
+| GitHub username                | PolyglotOpenstreetmap|
+| Website (optional)             |                      |
--- a/.github/contributors/adrianeboyd.md
+++ b/.github/contributors/adrianeboyd.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Adriane Boyd         |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 28 January 2019      |
+| GitHub username                | adrianeboyd          |
+| Website (optional)             |                      |
--- a/.github/contributors/akki2825.md
+++ b/.github/contributors/akki2825.md
@ -0,0 +1,87 @@
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Akhilesh K R         |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 2019-02-12           |
+| GitHub username                | akki2825             |
+| Website (optional)             |                      |
--- a/.github/contributors/alvaroabascar.md
+++ b/.github/contributors/alvaroabascar.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           |     Álvaro Abella    |
+| Company name (if applicable)   |         IOMED        |
+| Title or role (if applicable)  |          CSO         |
+| Date                           |       21/12/2018     |
+| GitHub username                |      alvaroabascar   |
+| Website (optional)             |                      |
--- a/.github/contributors/alvations.md
+++ b/.github/contributors/alvations.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [ ] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           |  Liling              |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           |  04 Jan 2019         |
+| GitHub username                |  alvations           |
+| Website (optional)             |                      |
--- a/.github/contributors/amperinet.md
+++ b/.github/contributors/amperinet.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                   |
+|------------------------------- | ----------------------- |
+| Name                           | Amandine Périnet        |
+| Company name (if applicable)   | 365Talents              |
+| Title or role (if applicable)  | Data Science Researcher |
+| Date                           | 28/01/2019              |
+| GitHub username                | amperinet               |
+| Website (optional)             |                         |
--- a/.github/contributors/aniruddha-adhikary.md
+++ b/.github/contributors/aniruddha-adhikary.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Aniruddha Adhikary   |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 2018-09-05           |
+| GitHub username                | aniruddha-adhikary   |
+| Website (optional)             | https://adhikary.net |
--- a/.github/contributors/aongko.md
+++ b/.github/contributors/aongko.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [ ] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [x] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Andrew Ongko         |
+| Company name (if applicable)   | Kurio                |
+| Title or role (if applicable)  | Senior Data Science  |
+| Date                           | Sep 10, 2018         |
+| GitHub username                | aongko               |
+| Website (optional)             |                      |
--- a/.github/contributors/aryaprabhudesai.md
+++ b/.github/contributors/aryaprabhudesai.md
@ -0,0 +1,54 @@
+spaCy contributor agreement
+This spaCy Contributor Agreement ("SCA") is based on the Oracle Contributor Agreement. The SCA applies to any contribution that you make to any product or project managed by us (the "project"), and sets out the intellectual property rights you grant to us in the contributed materials. The term "us" shall mean ExplosionAI UG (haftungsbeschränkt). The term "you" shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested below and include the filled-in version with your first pull request, under the folder .github/contributors/. The name of the file should be your GitHub username, with the extension .md. For example, the user example_user would create the file .github/contributors/example_user.md.
+
+Read this agreement carefully before signing. These terms and conditions constitute a binding legal agreement.
+
+Contributor Agreement
+The term "contribution" or "contributed materials" means any source code, object code, patch, tool, sample, graphic, specification, manual, documentation, or any other material posted or submitted by you to the project.
+
+With respect to any worldwide copyrights, or copyright applications and registrations, in your contribution:
+
+you hereby assign to us joint ownership, and to the extent that such assignment is or becomes invalid, ineffective or unenforceable, you hereby grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, royalty-free, unrestricted license to exercise all rights under those copyrights. This includes, at our option, the right to sublicense these same rights to third parties through multiple levels of sublicensees or other licensing arrangements;
+
+you agree that each of us can do all things in relation to your contribution as if each of us were the sole owners, and if one of us makes a derivative work of your contribution, the one who makes the derivative work (or has it made will be the sole owner of that derivative work;
+
+you agree that you will not assert any moral rights in your contribution against us, our licensees or transferees;
+
+you agree that we may register a copyright in your contribution and exercise all ownership rights associated with it; and
+
+you agree that neither of us has any duty to consult with, obtain the consent of, pay or render an accounting to the other for any use or distribution of your contribution.
+
+With respect to any patents you own, or that you can license without payment to any third party, you hereby grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, royalty-free license to:
+
+make, have made, use, sell, offer to sell, import, and otherwise transfer your contribution in whole or in part, alone or in combination with or included in any product, work or materials arising out of the project to which your contribution was submitted, and
+
+at our option, to sublicense these same rights to third parties through multiple levels of sublicensees or other licensing arrangements.
+
+Except as set out above, you keep all right, title, and interest in your contribution. The rights that you grant to us under these terms are effective on the date you first submitted a contribution to us, even if your submission took place before the date you sign these terms.
+
+You covenant, represent, warrant and agree that:
+
+Each contribution that you submit is and shall be an original work of authorship and you can legally grant the rights set out in this SCA;
+
+to the best of your knowledge, each contribution will not violate any third party's copyrights, trademarks, patents, or other intellectual property rights; and
+
+each contribution shall be in compliance with U.S. export control laws and other applicable export and import laws. You agree to notify us if you become aware of any circumstance which would make any of the foregoing representations inaccurate in any respect. We may publicly disclose your participation in the project, including the fact that you have signed the SCA.
+
+This SCA is governed by the laws of the State of California and applicable U.S. Federal law. Any choice of law rules will not apply.
+
+Please place an “x” on one of the applicable statement below. Please do NOT mark both statements:
+
+ [X] I am signing on behalf of myself as an individual and no other person or entity, including my employer, has or will have rights with respect to my contributions.
+
+ I am signing on behalf of my employer or a legal entity and I have the actual authority to contractually bind that entity.
+
+Contributor Details
+Field	Entry
+Name	Arya Prabhudesai
+Company name (if applicable)	-
+Title or role (if applicable)	-
+Date	2018-08-17
+GitHub username	aryaprabhudesai
+Website (optional)	-
--- a/.github/contributors/beatesi.md
+++ b/.github/contributors/beatesi.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [ ] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [x] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Beate Sildnes        |
+| Company name (if applicable)   | NAV                  |
+| Title or role (if applicable)  | Data Scientist       |
+| Date                           | 04.12.2018           |
+| GitHub username                | beatesi              |
+| Website (optional)             |                      |
--- a/.github/contributors/boena.md
+++ b/.github/contributors/boena.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Björn Lennartsson                     |
+| Company name (if applicable)   | Uptrail AB                     |
+| Title or role (if applicable)  | CTO                     |
+| Date                           | 2019-01-15                     |
+| GitHub username                | boena                     |
+| Website (optional)             | www.uptrail.com                     |
--- a/.github/contributors/charlax.md
+++ b/.github/contributors/charlax.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [ ] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [x] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Charles-Axel Dein                     |
+| Company name (if applicable)   | Skrib                     |
+| Title or role (if applicable)  | CEO                     |
+| Date                           | 27/09/2018                     |
+| GitHub username                | charlax                     |
+| Website (optional)             | www.dein.fr                     |
--- a/.github/contributors/chezou.md
+++ b/.github/contributors/chezou.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Aki Ariga            |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 07/12/2018            |
+| GitHub username                | chezou            |
+| Website (optional)             | chezo.uno             |
--- a/.github/contributors/cicorias.md
+++ b/.github/contributors/cicorias.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [X] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           |  Shawn Cicoria                     |
+| Company name (if applicable)   |   Microsoft                   |
+| Title or role (if applicable)  |   Principal Software Engineer                   |
+| Date                           |   November  20, 2018                  |
+| GitHub username                |     cicorias                 |
+| Website (optional)             |      www.cicoria.com                |
--- a/.github/contributors/clippered.md
+++ b/.github/contributors/clippered.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Kenneth Cruz         |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 2018-12-07           |
+| GitHub username                | clippered            |
+| Website (optional)             |                      |
--- a/.github/contributors/darindf.md
+++ b/.github/contributors/darindf.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your 
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                              |
+|------------------------------- | --------------------               |
+| Name                           | Darin DeForest                     |
+| Company name (if applicable)   | Ipro Tech                          |
+| Title or role (if applicable)  | Senior Software Engineer           |
+| Date                           | 2018-09-26                         |
+| GitHub username                | darindf                            |
+| Website (optional)             |                                    |
--- a/.github/contributors/filipecaixeta.md
+++ b/.github/contributors/filipecaixeta.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your 
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Filipe Caixeta       |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 09.12.2018           |
+| GitHub username                | filipecaixeta        |
+| Website (optional)             | filipecaixeta.com.br |
--- a/.github/contributors/foufaster.md
+++ b/.github/contributors/foufaster.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           |Anès Foufa            |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |NLP developer         |
+| Date                           |21/01/2019            |
+| GitHub username                |foufaster             |
+| Website (optional)             |                      |
--- a/.github/contributors/frascuchon.md
+++ b/.github/contributors/frascuchon.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Francisco Aranda     |
+| Company name (if applicable)   | recognai             |
+| Title or role (if applicable)  |                      |
+| Date                           |                      |
+| GitHub username                | frascuchon           |
+| Website (optional)             | https://recogn.ai    |
--- a/.github/contributors/free-variation.md
+++ b/.github/contributors/free-variation.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [ ] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           |  John Stewart        |
+| Company name (if applicable)   |  Amplify             |
+| Title or role (if applicable)  |  SVP Research        |
+| Date                           |  14/09/2018          |
+| GitHub username                |  free-variation      |
+| Website (optional)             |                      |
--- a/.github/contributors/gavrieltal.md
+++ b/.github/contributors/gavrieltal.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Gavriel Loria        |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | Nov 29, 2018         |
+| GitHub username                | gavrieltal           |
+| Website (optional)             |                      |
--- a/.github/contributors/grivaz.md
+++ b/.github/contributors/grivaz.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           |C. Grivaz                   |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           |08.22.2018                  |
+| GitHub username                |grivaz               |
+| Website (optional)             |                      |
--- a/.github/contributors/jacopofar.md
+++ b/.github/contributors/jacopofar.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [X] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           |   Jacopo Farina      |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           |  2018-10-12          |
+| GitHub username                |  jacopofar           |
+| Website (optional)             |  jacopofarina.eu     |
--- a/.github/contributors/jarib.md
+++ b/.github/contributors/jarib.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Jari Bakken          |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 2018-12-21           |
+| GitHub username                | jarib                |
+| Website (optional)             |                      |
--- a/.github/contributors/juliamakogon.md
+++ b/.github/contributors/juliamakogon.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [ ] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Julia Makogon        |
+| Company name (if applicable)   | Semantrum            |
+| Title or role (if applicable)  |                      |
+| Date                           | 07.02.2019           |
+| GitHub username                | juliamakogon         |
+| Website (optional)             |                      |
--- a/.github/contributors/kbulygin.md
+++ b/.github/contributors/kbulygin.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Kirill Bulygin       |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 2018-12-18           |
+| GitHub username                | kbulygin             |
+| Website (optional)             |                      |
--- a/.github/contributors/keshan.md
+++ b/.github/contributors/keshan.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your 
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Keshan Sodimana |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | Sep 21, 2018  |
+| GitHub username                | keshan     |
+| Website (optional)             |                      |
--- a/.github/contributors/kowaalczyk.md
+++ b/.github/contributors/kowaalczyk.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           |Krzysztof Kowalczyk   |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           |22.11.2018            |
+| GitHub username                |kowaalczyk            |
+| Website (optional)             |kowaalczyk.pl         |
--- a/.github/contributors/lauraBaakman.md
+++ b/.github/contributors/lauraBaakman.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Laura Baakman        |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | February 7, 2019     |
+| GitHub username                | lauraBaakman         |
+| Website (optional)             |                      |
--- a/.github/contributors/mbkupfer.md
+++ b/.github/contributors/mbkupfer.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           |  Maxim Kupfer        |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           |  Sep 6, 2018         |
+| GitHub username                |  mbkupfer            |
+| Website (optional)             |                      |
--- a/.github/contributors/mikelibg.md
+++ b/.github/contributors/mikelibg.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your 
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                    |
+|------------------------------- | ------------------------ |
+| Name                           | Michael Liberman         |
+| Company name (if applicable)   |                          |
+| Title or role (if applicable)  |                          |
+| Date                           | 2018-11-08               |
+| GitHub username                | mikelibg                 |
+| Website (optional)             |                          |
--- a/.github/contributors/moreymat.md
+++ b/.github/contributors/moreymat.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [ ] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [x] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Mathieu Morey        |
+| Company name (if applicable)   | Datactivist          |
+| Title or role (if applicable)  | Researcher           |
+| Date                           | 2019-01-07           |
+| GitHub username                | moreymat             |
+| Website (optional)             |                      |
--- a/.github/contributors/mpuig.md
+++ b/.github/contributors/mpuig.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Marc Puig            |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 2018-11-17           |
+| GitHub username                | mpuig                |
+| Website (optional)             |                      |
--- a/.github/contributors/njsmith.md
+++ b/.github/contributors/njsmith.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Nathaniel J. Smith   |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 2018-08-26           |
+| GitHub username                | njsmith              |
+| Website (optional)             | https://vorpus.org   |
--- a/.github/contributors/ozcankasal.md
+++ b/.github/contributors/ozcankasal.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Ozcan Kasal          |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | December 21, 2018    |
+| GitHub username                | ozcankasal           |
+| Website (optional)             |                      |
--- a/.github/contributors/phojnacki.md
+++ b/.github/contributors/phojnacki.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [ X ] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                                 |
+|------------------------------- | ------------------------------------- |
+| Name                           | Przemysław Hojnacki                   |
+| Company name (if applicable)   |                                       |
+| Title or role (if applicable)  |                                       |
+| Date                           | 12/09/2018                            |
+| GitHub username                | phojnacki                             |
+| Website (optional)             | https://about.me/przemyslaw.hojnacki  |
--- a/.github/contributors/pzelasko.md
+++ b/.github/contributors/pzelasko.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your 
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Piotr Żelasko        |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 04-09-2018           |
+| GitHub username                | pzelasko             |
+| Website (optional)             |                      |
--- a/.github/contributors/retnuh.md
+++ b/.github/contributors/retnuh.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1.  The term "contribution" or "contributed materials" means any source code,
+    object code, patch, tool, sample, graphic, specification, manual,
+    documentation, or any other material posted or submitted by you to the project.
+
+2.  With respect to any worldwide copyrights, or copyright applications and
+    registrations, in your contribution:
+
+        * you hereby assign to us joint ownership, and to the extent that such
+        assignment is or becomes invalid, ineffective or unenforceable, you hereby
+        grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+        royalty-free, unrestricted license to exercise all rights under those
+        copyrights. This includes, at our option, the right to sublicense these same
+        rights to third parties through multiple levels of sublicensees or other
+        licensing arrangements;
+
+        * you agree that each of us can do all things in relation to your
+        contribution as if each of us were the sole owners, and if one of us makes
+        a derivative work of your contribution, the one who makes the derivative
+        work (or has it made will be the sole owner of that derivative work;
+
+        * you agree that you will not assert any moral rights in your contribution
+        against us, our licensees or transferees;
+
+        * you agree that we may register a copyright in your contribution and
+        exercise all ownership rights associated with it; and
+
+        * you agree that neither of us has any duty to consult with, obtain the
+        consent of, pay or render an accounting to the other for any use or
+        distribution of your contribution.
+
+3.  With respect to any patents you own, or that you can license without payment
+    to any third party, you hereby grant to us a perpetual, irrevocable,
+    non-exclusive, worldwide, no-charge, royalty-free license to:
+
+        * make, have made, use, sell, offer to sell, import, and otherwise transfer
+        your contribution in whole or in part, alone or in combination with or
+        included in any product, work or materials arising out of the project to
+        which your contribution was submitted, and
+
+        * at our option, to sublicense these same rights to third parties through
+        multiple levels of sublicensees or other licensing arrangements.
+
+4.  Except as set out above, you keep all right, title, and interest in your
+    contribution. The rights that you grant to us under these terms are effective
+    on the date you first submitted a contribution to us, even if your submission
+    took place before the date you sign these terms.
+
+5.  You covenant, represent, warrant and agree that:
+
+    - Each contribution that you submit is and shall be an original work of
+      authorship and you can legally grant the rights set out in this SCA;
+
+    - to the best of your knowledge, each contribution will not violate any
+      third party's copyrights, trademarks, patents, or other intellectual
+      property rights; and
+
+    - each contribution shall be in compliance with U.S. export control laws and
+      other applicable export and import laws. You agree to notify us if you
+      become aware of any circumstance which would make any of the foregoing
+      representations inaccurate in any respect. We may publicly disclose your
+      participation in the project, including the fact that you have signed the SCA.
+
+6.  This SCA is governed by the laws of the State of California and applicable
+    U.S. Federal law. Any choice of law rules will not apply.
+
+7.  Please place an “x” on one of the applicable statement below. Please do NOT
+    mark both statements:
+
+        * [x] I am signing on behalf of myself as an individual and no other person
+        or entity, including my employer, has or will have rights with respect to my
+        contributions.
+
+        * [ ] I am signing on behalf of my employer or a legal entity and I have the
+        actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                         | Entry        |
+| ----------------------------- | ------------ |
+| Name                          | Hunter Kelly |
+| Company name (if applicable)  |              |
+| Title or role (if applicable) |              |
+| Date                          | 2019-01-10   |
+| GitHub username               | retnuh       |
+| Website (optional)            |              |
--- a/.github/contributors/roshni-b.md
+++ b/.github/contributors/roshni-b.md
@ -0,0 +1,107 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your 
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Roshni Biswas        |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 02-17-2019           |
+| GitHub username                | roshni-b             |
+| Website (optional)             |                      |
+
--- a/.github/contributors/sainathadapa.md
+++ b/.github/contributors/sainathadapa.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Sainath Adapa   |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 2018-09-06           |
+| GitHub username                | sainathadapa         |
+| Website (optional)             |                      |
--- a/.github/contributors/svlandeg.md
+++ b/.github/contributors/svlandeg.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Sofie Van Landeghem  |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 29 Nov 2018          |
+| GitHub username                | svlandeg             |
+| Website (optional)             |                      |
--- a/.github/contributors/tyburam.md
+++ b/.github/contributors/tyburam.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [ ] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Mateusz Tybura       |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 08.09.2018           |
+| GitHub username                | tyburam              |
+| Website (optional)             |                      |
--- a/.github/contributors/willprice.md
+++ b/.github/contributors/willprice.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                 |
+|------------------------------- | --------------------- |
+| Name                           | Will Price            |
+| Company name (if applicable)   | N/A                   |
+| Title or role (if applicable)  | N/A                   |
+| Date                           | 26/12/2018            |
+| GitHub username                | willprice             |
+| Website (optional)             | https://willprice.org |
--- a/.github/contributors/wxv.md
+++ b/.github/contributors/wxv.md
@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+    * you hereby assign to us joint ownership, and to the extent that such
+    assignment is or becomes invalid, ineffective or unenforceable, you hereby
+    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+    royalty-free, unrestricted license to exercise all rights under those
+    copyrights. This includes, at our option, the right to sublicense these same
+    rights to third parties through multiple levels of sublicensees or other
+    licensing arrangements;
+
+    * you agree that each of us can do all things in relation to your
+    contribution as if each of us were the sole owners, and if one of us makes
+    a derivative work of your contribution, the one who makes the derivative
+    work (or has it made will be the sole owner of that derivative work;
+
+    * you agree that you will not assert any moral rights in your contribution
+    against us, our licensees or transferees;
+
+    * you agree that we may register a copyright in your contribution and
+    exercise all ownership rights associated with it; and
+
+    * you agree that neither of us has any duty to consult with, obtain the
+    consent of, pay or render an accounting to the other for any use or
+    distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+    * make, have made, use, sell, offer to sell, import, and otherwise transfer
+    your contribution in whole or in part, alone or in combination with or
+    included in any product, work or materials arising out of the project to
+    which your contribution was submitted, and
+
+    * at our option, to sublicense these same rights to third parties through
+    multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+    * Each contribution that you submit is and shall be an original work of
+    authorship and you can legally grant the rights set out in this SCA;
+
+    * to the best of your knowledge, each contribution will not violate any
+    third party's copyrights, trademarks, patents, or other intellectual
+    property rights; and
+
+    * each contribution shall be in compliance with U.S. export control laws and
+    other applicable export and import laws. You agree to notify us if you
+    become aware of any circumstance which would make any of the foregoing
+    representations inaccurate in any respect. We may publicly disclose your
+    participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+    * [x] I am signing on behalf of myself as an individual and no other person
+    or entity, including my employer, has or will have rights with respect to my
+    contributions.
+
+    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field                          | Entry                |
+|------------------------------- | -------------------- |
+| Name                           | Jason Xu             |
+| Company name (if applicable)   |                      |
+| Title or role (if applicable)  |                      |
+| Date                           | 2018-11-29           |
+| GitHub username                | wxv                  |
+| Website (optional)             |                      |
--- a/.gitignore
+++ b/.gitignore
@ -5,9 +5,15 @@ corpora/
 keys/

 # Website
+website/.cache/
+website/public/
+website/node_modules
+website/.npm
+website/logs
+*.log
+npm-debug.log*
 website/www/
 website/_deploy.sh
-website/.gitignore

 # Cython / C extensions
 cythonize.json
--- a/.travis.yml
+++ b/.travis.yml
@ -1,26 +1,20 @@
 language: python
-
 sudo: false
+cache: pip
 dist: trusty
 group: edge
-
 python:
   - "2.7"
   - "3.5"
   - "3.6"
-
 os:
  - linux
-
 env:
  - VIA=compile
  - VIA=flake8
-  #- VIA=pypi_nightly
-
 install:
  - "./travis.sh"
  - pip install flake8
-
 script:
  - "cat /proc/cpuinfo | grep flags | head -n 1"
  - "pip install pytest pytest-timeout"
@ -28,10 +22,10 @@ script:
  - if [[ "${VIA}" == "flake8" ]]; then flake8 . --count --exclude=spacy/compat.py,spacy/lang --select=E901,E999,F821,F822,F823 --show-source --statistics; fi
  - if [[ "${VIA}" == "pypi_nightly" ]]; then python -m pytest --tb=native --models --en `python -c "import os.path; import spacy; print(os.path.abspath(os.path.dirname(spacy.__file__)))"`; fi
  - if [[ "${VIA}" == "sdist" ]]; then python -m pytest --tb=native `python -c "import os.path; import spacy; print(os.path.abspath(os.path.dirname(spacy.__file__)))"`; fi
-
+branches:
+  except:
+    - spacy.io
 notifications:
  slack:
    secure: F8GvqnweSdzImuLL64TpfG0i5rYl89liyr9tmFVsHl4c0DNiDuGhZivUz0M1broS8svE3OPOllLfQbACG/4KxD890qfF9MoHzvRDlp7U+RtwMV/YAkYn8MGWjPIbRbX0HpGdY7O2Rc9Qy4Kk0T8ZgiqXYIqAz2Eva9/9BlSmsJQ=
  email: false
-
-cache: pip
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -26,7 +26,7 @@ also check the [troubleshooting guide](https://spacy.io/usage/#troubleshooting)
 to see if your problem is already listed there.

 If you're looking for help with your code, consider posting a question on
-[StackOverflow](http://stackoverflow.com/questions/tagged/spacy) instead. If you
+[Stack Overflow](http://stackoverflow.com/questions/tagged/spacy) instead. If you
 tag it `spacy` and `python`, more people will see it and hopefully be able to
 help. Please understand that we won't be able to provide individual support via
 email. We also believe that help is much more valuable if it's **shared publicly**,
@ -55,7 +55,7 @@ even format them as Markdown to copy-paste into GitHub issues:
 `python -m spacy info --markdown`.

 * **Checking the model compatibility:** If you're having problems with a
-[statistical model](https://spacy.io/models), it may be because to the
+[statistical model](https://spacy.io/models), it may be because the
 model is incompatible with your spaCy installation. In spaCy v2.0+, you can check
 this on the command line by running `python -m spacy validate`.

@ -186,13 +186,99 @@ sure your test passes and reference the issue in your commit message.
 ## Code conventions

 Code should loosely follow [pep8](https://www.python.org/dev/peps/pep-0008/).
-Regular line length is **80 characters**, with some tolerance for lines up to
-90 characters if the alternative would be worse — for instance, if your list
-comprehension comes to 82 characters, it's better not to split it over two lines.
-You can also use a linter like [`flake8`](https://pypi.python.org/pypi/flake8)
-or [`frosted`](https://pypi.python.org/pypi/frosted) – just keep in mind that
-it won't work very well for `.pyx` files and will complain about Cython syntax
-like `<int*>` or `cimport`.
+As of `v2.1.0`, spaCy uses [`black`](https://github.com/ambv/black) for code
+formatting and [`flake8`](http://flake8.pycqa.org/en/latest/) for linting its
+Python modules. If you've built spaCy from source, you'll already have both
+tools installed.
+
+**⚠️ Note that formatting and linting is currently only possible for Python
+modules in `.py` files, not Cython modules in `.pyx` and `.pxd` files.**
+
+### Code formatting
+
+[`black`](https://github.com/ambv/black) is an opinionated Python code
+formatter, optimised to produce readable code and small diffs. You can run
+`black` from the command-line, or via your code editor. For example, if you're
+using [Visual Studio Code](https://code.visualstudio.com/), you can  add the
+following to your `settings.json` to use `black` for formatting and auto-format
+your files on save:
+
+```json
+{
+    "python.formatting.provider": "black",
+    "[python]": {
+        "editor.formatOnSave": true
+    }
+}
+```
+
+[See here](https://github.com/ambv/black#editor-integration) for the full
+list of available editor integrations.
+
+#### Disabling formatting
+
+There are a few cases where auto-formatting doesn't improve readability – for
+example, in some of the the language data files like the `tag_map.py`, or in
+the tests that construct `Doc` objects from lists of words and other labels.
+Wrapping a block in `# fmt: off` and `# fmt: on` lets you disable formatting
+for that particular code. Here's an example:
+
+```python
+# fmt: off
+text = "I look forward to using Thingamajig.  I've been told it will make my life easier..."
+heads = [1, 0, -1, -2, -1, -1, -5, -1, 3, 2, 1, 0, 2, 1, -3, 1, 1, -3, -7]
+deps = ["nsubj", "ROOT", "advmod", "prep", "pcomp", "dobj", "punct", "",
+        "nsubjpass", "aux", "auxpass", "ROOT", "nsubj", "aux", "ccomp",
+        "poss", "nsubj", "ccomp", "punct"]
+# fmt: on
+```
+
+### Code linting
+
+[`flake8`](http://flake8.pycqa.org/en/latest/) is a tool for enforcing code
+style. It scans one or more files and outputs errors and warnings. This feedback
+can help you stick to general standards and conventions, and can be very useful
+for spotting potential mistakes and inconsistencies in your code. The most
+important things to watch out for are syntax errors and undefined names, but you
+also want to keep an eye on unused declared variables or repeated
+(i.e. overwritten) dictionary keys. If your code was formatted with `black`
+(see above), you shouldn't see any formatting-related warnings.
+
+The [`.flake8`](.flake8) config defines the configuration we use for this
+codebase. For example, we're not super strict about the line length, and we're
+excluding very large files like lemmatization and tokenizer exception tables.
+
+Ideally, running the following command from within the repo directory should
+not return any errors or warnings:
+
+```bash
+flake8 spacy
+```
+
+#### Disabling linting
+
+Sometimes, you explicitly want to write code that's not compatible with our
+rules. For example, a module's `__init__.py` might import a function so other
+modules can import it from there, but `flake8` will complain about an unused
+import. And although it's generally discouraged, there might be cases where it
+makes sense to use a bare `except`.
+
+To ignore a given line, you can add a comment like `# noqa: F401`, specifying
+the code of the error or warning we want to ignore. It's also possible to
+ignore several comma-separated codes at once, e.g. `# noqa: E731,E123`. Here
+are some examples:
+
+```python
+# The imported class isn't used in this file, but imported here, so it can be
+# imported *from* here by another module.
+from .submodule import SomeClass  # noqa: F401
+
+try:
+    do_something()
+except:  # noqa: E722
+    # This bare except is justified, for some specific reason
+    do_something_else()
+```

 ### Python conventions

@ -206,10 +292,9 @@ for example to show more specific error messages, you can use the `is_config()`
 helper function.

 ```python
-from .compat import unicode_, json_dumps, is_config
+from .compat import unicode_, is_config

 compatible_unicode = unicode_('hello world')
-compatible_json = json_dumps({'key': 'value'})
 if is_config(windows=True, python2=True):
    print("You are using Python 2 on Windows.")
 ```
@ -235,7 +320,7 @@ of other types these names. For instance, don't name a text string `doc` — you
 should usually call this `text`. Two general code style preferences further help
 with naming. First, **lean away from introducing temporary variables**, as these
 clutter your namespace. This is one reason why comprehension expressions are
-often preferred. Second, **keep your functions shortish**, so that can work in a
+often preferred. Second, **keep your functions shortish**, so they can work in a
 smaller scope. Of course, this is a question of trade-offs.

 ### Cython conventions
@ -353,7 +438,7 @@ avoid unnecessary imports.
 Extensive tests that take a long time should be marked with `@pytest.mark.slow`.
 Tests that require the model to be loaded should be marked with
 `@pytest.mark.models`. Loading the models is expensive and not necessary if
-you're not actually testing the model performance. If all you needs ia a `Doc`
+you're not actually testing the model performance. If all you need is a `Doc`
 object with annotations like heads, POS tags or the dependency parse, you can
 use the `get_doc()` utility function to construct it manually.

--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@ -1,83 +0,0 @@
-# 👥 Contributors
-
-This is a list of everyone who has made significant contributions to spaCy, in alphabetical order. Thanks a lot for the great work!
-
-* Adam Bittlingmayer, [@bittlingmayer](https://github.com/bittlingmayer)
-* Alexey Kim, [@yuukos](https://github.com/yuukos)
-* Alexis Eidelman, [@AlexisEidelman](https://github.com/AlexisEidelman)
-* Ali Zarezade, [@azarezade](https://github.com/azarezade)
-* Andreas Grivas, [@andreasgrv](https://github.com/andreasgrv)
-* Andrew Poliakov, [@pavlin99th](https://github.com/pavlin99th)
-* Aniruddha Adhikary, [@aniruddha-adhikary](https://github.com/aniruddha-adhikary)
-* Anto Binish Kaspar, [@binishkaspar](https://github.com/binishkaspar)
-* Avadh Patel, [@avadhpatel](https://github.com/avadhpatel)
-* Ben Eyal, [@beneyal](https://github.com/beneyal)
-* Bhargav Srinivasa, [@bhargavvader](https://github.com/bhargavvader)
-* Bruno P. Kinoshita, [@kinow](https://github.com/kinow)
-* Canbey Bilgili, [@cbilgili](https://github.com/cbilgili)
-* Chris DuBois, [@chrisdubois](https://github.com/chrisdubois)
-* Christoph Schwienheer, [@chssch](https://github.com/chssch)
-* Dafne van Kuppevelt, [@dafnevk](https://github.com/dafnevk)
-* Daniel Rapp, [@rappdw](https://github.com/rappdw)
-* Daniel Vila Suero, [@dvsrepo](https://github.com/dvsrepo)
-* Dmytro Sadovnychyi, [@sadovnychyi](https://github.com/sadovnychyi)
-* Eric Zhao, [@ericzhao28](https://github.com/ericzhao28)
-* Francisco Aranda, [@frascuchon](https://github.com/frascuchon)
-* Greg Baker, [@solresol](https://github.com/solresol)
-* Greg Dubbin, [@GregDubbin](https://github.com/GregDubbin)
-* Grégory Howard, [@Gregory-Howard](https://github.com/Gregory-Howard)
-* György Orosz, [@oroszgy](https://github.com/oroszgy)
-* Henning Peters, [@henningpeters](https://github.com/henningpeters)
-* Iddo Berger, [@iddoberger](https://github.com/iddoberger)
-* Ines Montani, [@ines](https://github.com/ines)
-* J Nicolas Schrading, [@NSchrading](https://github.com/NSchrading)
-* Janneke van der Zwaan, [@jvdzwaan](https://github.com/jvdzwaan)
-* Jim Geovedi, [@geovedi](https://github.com/geovedi)
-* Jim Regan, [@jimregan](https://github.com/jimregan)
-* Jeffrey Gerard, [@IamJeffG](https://github.com/IamJeffG)
-* Jordan Suchow, [@suchow](https://github.com/suchow)
-* Josh Reeter, [@jreeter](https://github.com/jreeter)
-* Juan Miguel Cejuela, [@juanmirocks](https://github.com/juanmirocks)
-* Kendrick Tan, [@kendricktan](https://github.com/kendricktan)
-* Kyle P. Johnson, [@kylepjohnson](https://github.com/kylepjohnson)
-* Leif Uwe Vogelsang, [@luvogels](https://github.com/luvogels)
-* Liling Tan, [@alvations](https://github.com/alvations)
-* Magnus Burton, [@magnusburton](https://github.com/magnusburton)
-* Mark Amery, [@ExplodingCabbage](https://github.com/ExplodingCabbage)
-* Matthew Honnibal, [@honnibal](https://github.com/honnibal)
-* Maxim Samsonov, [@maxirmx](https://github.com/maxirmx)
-* Michael Wallin, [@wallinm1](https://github.com/wallinm1)
-* Miguel Almeida, [@mamoit](https://github.com/mamoit)
-* Motoki Wu, [@tokestermw](https://github.com/tokestermw)
-* Ole Henrik Skogstrøm, [@ohenrik](https://github.com/ohenrik)
-* Oleg Zd, [@olegzd](https://github.com/olegzd)
-* Orhan Bilgin, [@melanuria](https://github.com/melanuria)
-* Orion Montoya, [@mdcclv](https://github.com/mdcclv)
-* Paul O'Leary McCann, [@polm](https://github.com/polm)
-* Pokey Rule, [@pokey](https://github.com/pokey)
-* Ramanan Balakrishnan, [@ramananbalakrishnan](https://github.com/ramananbalakrishnan)
-* Raphaël Bournhonesque, [@raphael0202](https://github.com/raphael0202)
-* Rob van Nieuwpoort, [@RvanNieuwpoort](https://github.com/RvanNieuwpoort)
-* Roman Domrachev, [@ligser](https://github.com/ligser)
-* Roman Inflianskas, [@rominf](https://github.com/rominf)
-* Sam Bozek, [@sambozek](https://github.com/sambozek)
-* Sasho Savkov, [@savkov](https://github.com/savkov)
-* Shuvanon Razik, [@shuvanon](https://github.com/shuvanon)
-* Søren Lind Kristiansen, [@sorenlind](https://github.com/sorenlind)
-* Swier, [@swierh](https://github.com/swierh)
-* Thomas Tanon, [@Tpt](https://github.com/Tpt)
-* Thomas Opsomer, [@thomasopsomer](https://github.com/thomasopsomer)
-* Tiago Rodrigues, [@TiagoMRodrigues](https://github.com/TiagoMRodrigues)
-* Vadim Mazaev, [@GreenRiverRUS](https://github.com/GreenRiverRUS)
-* Vimos Tan, [@Vimos](https://github.com/Vimos)
-* Vsevolod Solovyov, [@vsolovyov](https://github.com/vsolovyov)
-* Wah Loon Keng, [@kengz](https://github.com/kengz)
-* Wannaphong Phatthiyaphaibun, [@wannaphongcom](https://github.com/wannaphongcom)
-* Willem van Hage, [@wrvhage](https://github.com/wrvhage)
-* Wolfgang Seeker, [@wbwseeker](https://github.com/wbwseeker)
-* Yam, [@hscspring](https://github.com/hscspring)
-* Yanhao Yang, [@YanhaoYang](https://github.com/YanhaoYang)
-* Yasuaki Uechi, [@uetchy](https://github.com/uetchy)
-* Yu-chun Huang, [@galaxyh](https://github.com/galaxyh)
-* Yubing Dong, [@tomtung](https://github.com/tomtung)
-* Yuval Pinter, [@yuvalpinter](https://github.com/yuvalpinter)
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 The MIT License (MIT)

-Copyright (C) 2016 ExplosionAI UG (haftungsbeschränkt), 2016 spaCy GmbH, 2015 Matthew Honnibal
+Copyright (C) 2016-2019 ExplosionAI GmbH, 2016 spaCy GmbH, 2015 Matthew Honnibal

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -1,4 +1,5 @@
 recursive-include include *.h
 include LICENSE
-include README.rst
+include README.md
+include pyproject.toml
 include bin/spacy
--- a/4
+++ b/4
@ -5,11 +5,11 @@ dist/spacy.pex : spacy/*.py* spacy/*/*.py*
 	python3.6 -m venv env3.6
 	source env3.6/bin/activate
 	env3.6/bin/pip install wheel
-	env3.6/bin/pip install -r requirements.txt --no-cache-dir --no-binary :all:
+	env3.6/bin/pip install -r requirements.txt --no-cache-dir 
 	env3.6/bin/python setup.py build_ext --inplace
 	env3.6/bin/python setup.py sdist
 	env3.6/bin/python setup.py bdist_wheel
-	env3.6/bin/python -m pip install pex
+	env3.6/bin/python -m pip install pex==1.5.3
 	env3.6/bin/pex pytest dist/*.whl -e spacy -o dist/spacy-$(sha).pex
 	cp dist/spacy-$(sha).pex dist/spacy.pex
 	chmod a+rx dist/spacy.pex
--- a/README.md
+++ b/README.md
@ -0,0 +1,284 @@
+<a href="https://explosion.ai"><img src="https://explosion.ai/assets/img/logo.svg" width="125" height="125" align="right" /></a>
+
+# spaCy: Industrial-strength NLP
+
+spaCy is a library for advanced Natural Language Processing in Python and
+Cython. It's built on the very latest research, and was designed from day one
+to be used in real products. spaCy comes with
+[pre-trained statistical models](https://spacy.io/models) and word vectors, and
+currently supports tokenization for **45+ languages**. It features the
+**fastest syntactic parser** in the world, convolutional
+**neural network models** for tagging, parsing and **named entity recognition**
+and easy **deep learning** integration. It's commercial open-source software,
+released under the MIT license.
+
+💫 **Version 2.1 out now!** [Check out the release notes here.](https://github.com/explosion/spaCy/releases)
+
+[![Travis Build Status](https://img.shields.io/travis/explosion/spaCy/master.svg?style=flat-square&logo=travis)](https://travis-ci.org/explosion/spaCy)
+[![Appveyor Build Status](https://img.shields.io/appveyor/ci/explosion/spaCy/master.svg?style=flat-square&logo=appveyor)](https://ci.appveyor.com/project/explosion/spaCy)
+[![Current Release Version](https://img.shields.io/github/release/explosion/spacy.svg?style=flat-square)](https://github.com/explosion/spaCy/releases)
+[![pypi Version](https://img.shields.io/pypi/v/spacy.svg?style=flat-square)](https://pypi.python.org/pypi/spacy)
+[![conda Version](https://img.shields.io/conda/vn/conda-forge/spacy.svg?style=flat-square)](https://anaconda.org/conda-forge/spacy)
+[![Python wheels](https://img.shields.io/badge/wheels-%E2%9C%93-4c1.svg?longCache=true&style=flat-square&logo=python&logoColor=white)](https://github.com/explosion/wheelwright/releases)
+[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg?style=flat-square)](https://github.com/ambv/black)
+[![spaCy on Twitter](https://img.shields.io/twitter/follow/spacy_io.svg?style=social&label=Follow)](https://twitter.com/spacy_io)
+
+## 📖 Documentation
+
+| Documentation   |                                                                |
+| --------------- | -------------------------------------------------------------- |
+| [spaCy 101]     | New to spaCy? Here's everything you need to know!              |
+| [Usage Guides]  | How to use spaCy and its features.                             |
+| [New in v2.1]   | New features, backwards incompatibilities and migration guide. |
+| [API Reference] | The detailed reference for spaCy's API.                        |
+| [Models]        | Download statistical language models for spaCy.                |
+| [Universe]      | Libraries, extensions, demos, books and courses.               |
+| [Changelog]     | Changes and version history.                                   |
+| [Contribute]    | How to contribute to the spaCy project and code base.          |
+
+[spacy 101]: https://spacy.io/usage/spacy-101
+[new in v2.1]: https://spacy.io/usage/v2-1
+[usage guides]: https://spacy.io/usage/
+[api reference]: https://spacy.io/api/
+[models]: https://spacy.io/models
+[universe]: https://spacy.io/universe
+[changelog]: https://spacy.io/usage/#changelog
+[contribute]: https://github.com/explosion/spaCy/blob/master/CONTRIBUTING.md
+
+## 💬 Where to ask questions
+
+The spaCy project is maintained by [@honnibal](https://github.com/honnibal)
+and [@ines](https://github.com/ines). Please understand that we won't be able
+to provide individual support via email. We also believe that help is much more
+valuable if it's shared publicly, so that more people can benefit from it.
+
+| Type                     | Platforms                                              |
+| ------------------------ | ------------------------------------------------------ |
+| 🚨 **Bug Reports**       | [GitHub Issue Tracker]                                 |
+| 🎁 **Feature Requests**  | [GitHub Issue Tracker]                                 |
+| 👩‍💻 **Usage Questions**   | [Stack Overflow] · [Gitter Chat] · [Reddit User Group] |
+| 🗯 **General Discussion** | [Gitter Chat] · [Reddit User Group]                    |
+
+[github issue tracker]: https://github.com/explosion/spaCy/issues
+[stack overflow]: http://stackoverflow.com/questions/tagged/spacy
+[gitter chat]: https://gitter.im/explosion/spaCy
+[reddit user group]: https://www.reddit.com/r/spacynlp
+
+## Features
+
+-   **Fastest syntactic parser** in the world
+-   **Named entity** recognition
+-   Non-destructive **tokenization**
+-   Support for **45+ languages**
+-   Pre-trained [statistical models](https://spacy.io/models) and word vectors
+-   Easy **deep learning** integration
+-   Part-of-speech tagging
+-   Labelled dependency parsing
+-   Syntax-driven sentence segmentation
+-   Built in **visualizers** for syntax and NER
+-   Convenient string-to-hash mapping
+-   Export to numpy data arrays
+-   Efficient binary serialization
+-   Easy **model packaging** and deployment
+-   State-of-the-art speed
+-   Robust, rigorously evaluated accuracy
+
+📖 **For more details, see the
+[facts, figures and benchmarks](https://spacy.io/usage/facts-figures).**
+
+## Install spaCy
+
+For detailed installation instructions, see the
+[documentation](https://spacy.io/usage).
+
+-   **Operating system**: macOS / OS X · Linux · Windows (Cygwin, MinGW, Visual Studio)
+-   **Python version**: Python 2.7, 3.4+ (only 64 bit)
+-   **Package managers**: [pip] · [conda] (via `conda-forge`)
+
+[pip]: https://pypi.python.org/pypi/spacy
+[conda]: https://anaconda.org/conda-forge/spacy
+
+### pip
+
+Using pip, spaCy releases are available as source packages and binary wheels
+(as of `v2.0.13`).
+
+```bash
+pip install spacy
+```
+
+When using pip it is generally recommended to install packages in a virtual
+environment to avoid modifying system state:
+
+```bash
+python -m venv .env
+source .env/bin/activate
+pip install spacy
+```
+
+### conda
+
+Thanks to our great community, we've finally re-added conda support. You can now
+install spaCy via `conda-forge`:
+
+```bash
+conda config --add channels conda-forge
+conda install spacy
+```
+
+For the feedstock including the build recipe and configuration,
+check out [this repository](https://github.com/conda-forge/spacy-feedstock).
+Improvements and pull requests to the recipe and setup are always appreciated.
+
+### Updating spaCy
+
+Some updates to spaCy may require downloading new statistical models. If you're
+running spaCy v2.0 or higher, you can use the `validate` command to check if
+your installed models are compatible and if not, print details on how to update
+them:
+
+```bash
+pip install -U spacy
+python -m spacy validate
+```
+
+If you've trained your own models, keep in mind that your training and runtime
+inputs must match. After updating spaCy, we recommend **retraining your models**
+with the new version.
+
+📖 **For details on upgrading from spaCy 1.x to spaCy 2.x, see the
+[migration guide](https://spacy.io/usage/v2#migrating).**
+
+## Download models
+
+As of v1.7.0, models for spaCy can be installed as **Python packages**.
+This means that they're a component of your application, just like any
+other module. Models can be installed using spaCy's `download` command,
+or manually by pointing pip to a path or URL.
+
+| Documentation          |                                                               |
+| ---------------------- | ------------------------------------------------------------- |
+| [Available Models]     | Detailed model descriptions, accuracy figures and benchmarks. |
+| [Models Documentation] | Detailed usage instructions.                                  |
+
+[available models]: https://spacy.io/models
+[models documentation]: https://spacy.io/docs/usage/models
+
+```bash
+# out-of-the-box: download best-matching default model
+python -m spacy download en
+
+# download best-matching version of specific model for your spaCy installation
+python -m spacy download en_core_web_lg
+
+# pip install .tar.gz archive from path or URL
+pip install /Users/you/en_core_web_sm-2.0.0.tar.gz
+```
+
+### Loading and using models
+
+To load a model, use `spacy.load()` with the model's shortcut link:
+
+```python
+import spacy
+nlp = spacy.load('en')
+doc = nlp(u'This is a sentence.')
+```
+
+If you've installed a model via pip, you can also `import` it directly and
+then call its `load()` method:
+
+```python
+import spacy
+import en_core_web_sm
+
+nlp = en_core_web_sm.load()
+doc = nlp(u'This is a sentence.')
+```
+
+📖 **For more info and examples, check out the
+[models documentation](https://spacy.io/docs/usage/models).**
+
+### Support for older versions
+
+If you're using an older version (`v1.6.0` or below), you can still download
+and install the old models from within spaCy using `python -m spacy.en.download all`
+or `python -m spacy.de.download all`. The `.tar.gz` archives are also
+[attached to the v1.6.0 release](https://github.com/explosion/spaCy/tree/v1.6.0).
+To download and install the models manually, unpack the archive, drop the
+contained directory into `spacy/data` and load the model via `spacy.load('en')`
+or `spacy.load('de')`.
+
+## Compile from source
+
+The other way to install spaCy is to clone its
+[GitHub repository](https://github.com/explosion/spaCy) and build it from
+source. That is the common way if you want to make changes to the code base.
+You'll need to make sure that you have a development environment consisting of a
+Python distribution including header files, a compiler,
+[pip](https://pip.pypa.io/en/latest/installing/),
+[virtualenv](https://virtualenv.pypa.io/) and [git](https://git-scm.com)
+installed. The compiler part is the trickiest. How to do that depends on your
+system. See notes on Ubuntu, OS X and Windows for details.
+
+```bash
+# make sure you are using the latest pip
+python -m pip install -U pip
+git clone https://github.com/explosion/spaCy
+cd spaCy
+
+python -m venv .env
+source .env/bin/activate
+export PYTHONPATH=`pwd`
+pip install -r requirements.txt
+python setup.py build_ext --inplace
+```
+
+Compared to regular install via pip, [requirements.txt](requirements.txt)
+additionally installs developer dependencies such as Cython. For more details
+and instructions, see the documentation on
+[compiling spaCy from source](https://spacy.io/usage/#source) and the
+[quickstart widget](https://spacy.io/usage/#section-quickstart) to get
+the right commands for your platform and Python version.
+
+### Ubuntu
+
+Install system-level dependencies via `apt-get`:
+
+```bash
+sudo apt-get install build-essential python-dev git
+```
+
+### macOS / OS X
+
+Install a recent version of [XCode](https://developer.apple.com/xcode/),
+including the so-called "Command Line Tools". macOS and OS X ship with Python
+and git preinstalled.
+
+### Windows
+
+Install a version of the [Visual C++ Build Tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/) or
+[Visual Studio Express](https://www.visualstudio.com/vs/visual-studio-express/)
+that matches the version that was used to compile your Python
+interpreter. For official distributions these are VS 2008 (Python 2.7),
+VS 2010 (Python 3.4) and VS 2015 (Python 3.5).
+
+## Run tests
+
+spaCy comes with an [extensive test suite](spacy/tests). In order to run the
+tests, you'll usually want to clone the repository and build spaCy from source.
+This will also install the required development dependencies and test utilities
+defined in the `requirements.txt`.
+
+Alternatively, you can find out where spaCy is installed and run `pytest` on
+that directory. Don't forget to also install the test utilities via spaCy's
+`requirements.txt`:
+
+```bash
+python -c "import os; import spacy; print(os.path.dirname(spacy.__file__))"
+pip install -r path/to/requirements.txt
+python -m pytest <spacy-directory>
+```
+
+See [the documentation](https://spacy.io/usage/#tests) for more details and
+examples.
--- a/README.rst
+++ b/README.rst
@ -1,332 +0,0 @@
-spaCy: Industrial-strength NLP
-******************************
-
-spaCy is a library for advanced Natural Language Processing in Python and Cython.
-It's built on the very latest research, and was designed from day one to be
-used in real products. spaCy comes with
-`pre-trained statistical models <https://spacy.io/models>`_ and word
-vectors, and currently supports tokenization for **20+ languages**. It features
-the **fastest syntactic parser** in the world, convolutional **neural network models**
-for tagging, parsing and **named entity recognition** and easy **deep learning**
-integration. It's commercial open-source software, released under the MIT license.
-
-💫 **Version 2.0 out now!** `Check out the new features here. <https://spacy.io/usage/v2>`_
-
-.. image:: https://img.shields.io/travis/explosion/spaCy/master.svg?style=flat-square&logo=travis
-    :target: https://travis-ci.org/explosion/spaCy
-    :alt: Build Status
-
-.. image:: https://img.shields.io/appveyor/ci/explosion/spaCy/master.svg?style=flat-square&logo=appveyor
-    :target: https://ci.appveyor.com/project/explosion/spaCy
-    :alt: Appveyor Build Status
-
-.. image:: https://img.shields.io/github/release/explosion/spacy.svg?style=flat-square
-    :target: https://github.com/explosion/spaCy/releases
-    :alt: Current Release Version
-
-.. image:: https://img.shields.io/pypi/v/spacy.svg?style=flat-square
-    :target: https://pypi.python.org/pypi/spacy
-    :alt: pypi Version
-
-.. image:: https://img.shields.io/conda/vn/conda-forge/spacy.svg?style=flat-square
-    :target: https://anaconda.org/conda-forge/spacy
-    :alt: conda Version
-
-.. image:: https://img.shields.io/badge/chat-join%20%E2%86%92-09a3d5.svg?style=flat-square&logo=gitter-white
-    :target: https://gitter.im/explosion/spaCy
-    :alt: spaCy on Gitter
-
-.. image:: https://img.shields.io/twitter/follow/spacy_io.svg?style=social&label=Follow
-    :target: https://twitter.com/spacy_io
-    :alt: spaCy on Twitter
-
-📖 Documentation
-================
-
-===================  ===
-`spaCy 101`_         New to spaCy? Here's everything you need to know!
-`Usage Guides`_      How to use spaCy and its features.
-`New in v2.0`_       New features, backwards incompatibilities and migration guide.
-`API Reference`_     The detailed reference for spaCy's API.
-`Models`_            Download statistical language models for spaCy.
-`Universe`_          Libraries, extensions, demos, books and courses.
-`Changelog`_         Changes and version history.
-`Contribute`_        How to contribute to the spaCy project and code base.
-===================  ===
-
-.. _spaCy 101: https://spacy.io/usage/spacy-101
-.. _New in v2.0: https://spacy.io/usage/v2#migrating
-.. _Usage Guides: https://spacy.io/usage/
-.. _API Reference: https://spacy.io/api/
-.. _Models: https://spacy.io/models
-.. _Universe: https://spacy.io/universe
-.. _Changelog: https://spacy.io/usage/#changelog
-.. _Contribute: https://github.com/explosion/spaCy/blob/master/CONTRIBUTING.md
-
-💬 Where to ask questions
-==========================
-
-The spaCy project is maintained by `@honnibal <https://github.com/honnibal>`_
-and `@ines <https://github.com/ines>`_. Please understand that we won't be able
-to provide individual support via email. We also believe that help is much more
-valuable if it's shared publicly, so that more people can benefit from it.
-
-====================== ===
-**Bug Reports**        `GitHub Issue Tracker`_
-**Usage Questions**    `StackOverflow`_, `Gitter Chat`_, `Reddit User Group`_
-**General Discussion** `Gitter Chat`_, `Reddit User Group`_
-====================== ===
-
-.. _GitHub Issue Tracker: https://github.com/explosion/spaCy/issues
-.. _StackOverflow: http://stackoverflow.com/questions/tagged/spacy
-.. _Gitter Chat: https://gitter.im/explosion/spaCy
-.. _Reddit User Group: https://www.reddit.com/r/spacynlp
-
-Features
-========
-
-* **Fastest syntactic parser** in the world
-* **Named entity** recognition
-* Non-destructive **tokenization**
-* Support for **20+ languages**
-* Pre-trained `statistical models <https://spacy.io/models>`_ and word vectors
-* Easy **deep learning** integration
-* Part-of-speech tagging
-* Labelled dependency parsing
-* Syntax-driven sentence segmentation
-* Built in **visualizers** for syntax and NER
-* Convenient string-to-hash mapping
-* Export to numpy data arrays
-* Efficient binary serialization
-* Easy **model packaging** and deployment
-* State-of-the-art speed
-* Robust, rigorously evaluated accuracy
-
-📖  **For more details, see the** `facts, figures and benchmarks <https://spacy.io/usage/facts-figures>`_.
-
-Install spaCy
-=============
-
-For detailed installation instructions, see
-the `documentation <https://spacy.io/usage>`_.
-
-==================== ===
-**Operating system** macOS / OS X, Linux, Windows (Cygwin, MinGW, Visual Studio)
-**Python version**   CPython 2.7, 3.4+. Only 64 bit.
-**Package managers** `pip`_ (source packages only), `conda`_ (via ``conda-forge``)
-==================== ===
-
-.. _pip: https://pypi.python.org/pypi/spacy
-.. _conda: https://anaconda.org/conda-forge/spacy
-
-pip
---
-
-Using pip, spaCy releases are currently only available as source packages.
-
-.. code:: bash
-
-    pip install spacy
-
-When using pip it is generally recommended to install packages in a virtual
-environment to avoid modifying system state:
-
-.. code:: bash
-
-    python -m venv .env
-    source .env/bin/activate
-    pip install spacy
-
-conda
-----
-
-Thanks to our great community, we've finally re-added conda support. You can now
-install spaCy via ``conda-forge``:
-
-.. code:: bash
-
-    conda config --add channels conda-forge
-    conda install spacy
-
-For the feedstock including the build recipe and configuration,
-check out `this repository <https://github.com/conda-forge/spacy-feedstock>`_.
-Improvements and pull requests to the recipe and setup are always appreciated.
-
-Updating spaCy
--------------
-
-Some updates to spaCy may require downloading new statistical models. If you're
-running spaCy v2.0 or higher, you can use the ``validate`` command to check if
-your installed models are compatible and if not, print details on how to update
-them:
-
-.. code:: bash
-
-    pip install -U spacy
-    python -m spacy validate
-
-If you've trained your own models, keep in mind that your training and runtime
-inputs must match. After updating spaCy, we recommend **retraining your models**
-with the new version.
-
-📖  **For details on upgrading from spaCy 1.x to spaCy 2.x, see the**
-`migration guide <https://spacy.io/usage/v2#migrating>`_.
-
-Download models
-===============
-
-As of v1.7.0, models for spaCy can be installed as **Python packages**.
-This means that they're a component of your application, just like any
-other module. Models can be installed using spaCy's ``download`` command,
-or manually by pointing pip to a path or URL.
-
-======================= ===
-`Available Models`_     Detailed model descriptions, accuracy figures and benchmarks.
-`Models Documentation`_ Detailed usage instructions.
-======================= ===
-
-.. _Available Models: https://spacy.io/models
-.. _Models Documentation: https://spacy.io/docs/usage/models
-
-.. code:: bash
-
-    # out-of-the-box: download best-matching default model
-    python -m spacy download en
-
-    # download best-matching version of specific model for your spaCy installation
-    python -m spacy download en_core_web_lg
-
-    # pip install .tar.gz archive from path or URL
-    pip install /Users/you/en_core_web_sm-2.0.0.tar.gz
-
-If you have SSL certification problems, SSL customization options are described in the help:
-
-    # help for the download command
-    python -m spacy download --help
-
-Loading and using models
------------------------
-
-To load a model, use ``spacy.load()`` with the model's shortcut link:
-
-.. code:: python
-
-    import spacy
-    nlp = spacy.load('en')
-    doc = nlp(u'This is a sentence.')
-
-If you've installed a model via pip, you can also ``import`` it directly and
-then call its ``load()`` method:
-
-.. code:: python
-
-    import spacy
-    import en_core_web_sm
-
-    nlp = en_core_web_sm.load()
-    doc = nlp(u'This is a sentence.')
-
-📖 **For more info and examples, check out the**
-`models documentation <https://spacy.io/docs/usage/models>`_.
-
-Support for older versions
--------------------------
-
-If you're using an older version (``v1.6.0`` or below), you can still download
-and install the old models from within spaCy using ``python -m spacy.en.download all``
-or ``python -m spacy.de.download all``. The ``.tar.gz`` archives are also
-`attached to the v1.6.0 release <https://github.com/explosion/spaCy/tree/v1.6.0>`_.
-To download and install the models manually, unpack the archive, drop the
-contained directory into ``spacy/data`` and load the model via ``spacy.load('en')``
-or ``spacy.load('de')``.
-
-Compile from source
-===================
-
-The other way to install spaCy is to clone its
-`GitHub repository <https://github.com/explosion/spaCy>`_ and build it from
-source. That is the common way if you want to make changes to the code base.
-You'll need to make sure that you have a development environment consisting of a
-Python distribution including header files, a compiler,
-`pip <https://pip.pypa.io/en/latest/installing/>`__, `virtualenv <https://virtualenv.pypa.io/>`_
-and `git <https://git-scm.com>`_ installed. The compiler part is the trickiest.
-How to do that depends on your system. See notes on Ubuntu, OS X and Windows for
-details.
-
-.. code:: bash
-
-    # make sure you are using the latest pip
-    python -m pip install -U pip
-    git clone https://github.com/explosion/spaCy
-    cd spaCy
-
-    python -m venv .env
-    source .env/bin/activate
-    export PYTHONPATH=`pwd`
-    pip install -r requirements.txt
-    python setup.py build_ext --inplace
-
-Compared to regular install via pip, `requirements.txt <requirements.txt>`_
-additionally installs developer dependencies such as Cython. For more details
-and instructions, see the documentation on
-`compiling spaCy from source <https://spacy.io/usage/#source>`_ and the
-`quickstart widget <https://spacy.io/usage/#section-quickstart>`_ to get
-the right commands for your platform and Python version.
-
-Instead of the above verbose commands, you can also use the following
-`Fabric <http://www.fabfile.org/>`_ commands. All commands assume that your
-virtual environment is located in a directory ``.env``. If you're using a
-different directory, you can change it via the environment variable ``VENV_DIR``,
-for example ``VENV_DIR=".custom-env" fab clean make``.
-
-============= ===
-``fab env``   Create virtual environment and delete previous one, if it exists.
-``fab make``  Compile the source.
-``fab clean`` Remove compiled objects, including the generated C++.
-``fab test``  Run basic tests, aborting after first failure.
-============= ===
-
-Ubuntu
------
-
-Install system-level dependencies via ``apt-get``:
-
-.. code:: bash
-
-    sudo apt-get install build-essential python-dev git
-
-macOS / OS X
------------
-
-Install a recent version of `XCode <https://developer.apple.com/xcode/>`_,
-including the so-called "Command Line Tools". macOS and OS X ship with Python
-and git preinstalled.
-
-Windows
-------
-
-Install a version of `Visual Studio Express <https://www.visualstudio.com/vs/visual-studio-express/>`_
-or higher that matches the version that was used to compile your Python
-interpreter. For official distributions these are VS 2008 (Python 2.7),
-VS 2010 (Python 3.4) and VS 2015 (Python 3.5).
-
-Run tests
-=========
-
-spaCy comes with an `extensive test suite <spacy/tests>`_.  In order to run the
-tests, you'll usually want to clone the repository and build spaCy from source.
-This will also install the required development dependencies and test utilities
-defined in the ``requirements.txt``.
-
-Alternatively, you can find out where spaCy is installed and run ``pytest`` on
-that directory. Don't forget to also install the test utilities via spaCy's
-``requirements.txt``:
-
-.. code:: bash
-
-    python -c "import os; import spacy; print(os.path.dirname(spacy.__file__))"
-    pip install -r path/to/requirements.txt
-    python -m pytest <spacy-directory>
-
-See `the documentation <https://spacy.io/usage/#tests>`_ for more details and
-examples.
--- a/bin/cythonize.py
+++ b/bin/cythonize.py
@ -35,41 +35,49 @@ import subprocess
 import argparse


-HASH_FILE = 'cythonize.json'
+HASH_FILE = "cythonize.json"


-def process_pyx(fromfile, tofile):
-    print('Processing %s' % fromfile)
+def process_pyx(fromfile, tofile, language_level="-2"):
+    print("Processing %s" % fromfile)
    try:
        from Cython.Compiler.Version import version as cython_version
        from distutils.version import LooseVersion
-        if LooseVersion(cython_version) < LooseVersion('0.19'):
-            raise Exception('Require Cython >= 0.19')
+
+        if LooseVersion(cython_version) < LooseVersion("0.19"):
+            raise Exception("Require Cython >= 0.19")

    except ImportError:
        pass

-    flags = ['--fast-fail']
-    if tofile.endswith('.cpp'):
-        flags += ['--cplus']
+    flags = ["--fast-fail", language_level]
+    if tofile.endswith(".cpp"):
+        flags += ["--cplus"]

    try:
        try:
-            r = subprocess.call(['cython'] + flags + ['-o', tofile, fromfile],
-                                env=os.environ) # See Issue #791
+            r = subprocess.call(
+                ["cython"] + flags + ["-o", tofile, fromfile], env=os.environ
+            )  # See Issue #791
            if r != 0:
-                raise Exception('Cython failed')
+                raise Exception("Cython failed")
        except OSError:
            # There are ways of installing Cython that don't result in a cython
            # executable on the path, see gh-2397.
-            r = subprocess.call([sys.executable, '-c',
-                                'import sys; from Cython.Compiler.Main import '
-                                'setuptools_main as main; sys.exit(main())'] + flags +
-                                ['-o', tofile, fromfile])
+            r = subprocess.call(
+                [
+                    sys.executable,
+                    "-c",
+                    "import sys; from Cython.Compiler.Main import "
+                    "setuptools_main as main; sys.exit(main())",
+                ]
+                + flags
+                + ["-o", tofile, fromfile]
+            )
            if r != 0:
-                raise Exception('Cython failed')
+                raise Exception("Cython failed")
    except OSError:
-        raise OSError('Cython needs to be installed')
+        raise OSError("Cython needs to be installed")


 def preserve_cwd(path, func, *args):
@ -89,12 +97,12 @@ def load_hashes(filename):


 def save_hashes(hash_db, filename):
-    with open(filename, 'w') as f:
+    with open(filename, "w") as f:
        f.write(json.dumps(hash_db))


 def get_hash(path):
-    return hashlib.md5(open(path, 'rb').read()).hexdigest()
+    return hashlib.md5(open(path, "rb").read()).hexdigest()


 def hash_changed(base, path, db):
@ -109,25 +117,27 @@ def hash_add(base, path, db):

 def process(base, filename, db):
    root, ext = os.path.splitext(filename)
-    if ext in ['.pyx', '.cpp']:
-        if hash_changed(base, filename, db) or not os.path.isfile(os.path.join(base, root + '.cpp')):
-            preserve_cwd(base, process_pyx, root + '.pyx', root + '.cpp')
-            hash_add(base, root + '.cpp', db)
-            hash_add(base, root + '.pyx', db)
+    if ext in [".pyx", ".cpp"]:
+        if hash_changed(base, filename, db) or not os.path.isfile(
+            os.path.join(base, root + ".cpp")
+        ):
+            preserve_cwd(base, process_pyx, root + ".pyx", root + ".cpp")
+            hash_add(base, root + ".cpp", db)
+            hash_add(base, root + ".pyx", db)


 def check_changes(root, db):
    res = False
    new_db = {}

-    setup_filename = 'setup.py'
-    hash_add('.', setup_filename, new_db)
-    if hash_changed('.', setup_filename, db):
+    setup_filename = "setup.py"
+    hash_add(".", setup_filename, new_db)
+    if hash_changed(".", setup_filename, db):
        res = True

    for base, _, files in os.walk(root):
        for filename in files:
-            if filename.endswith('.pxd'):
+            if filename.endswith(".pxd"):
                hash_add(base, filename, new_db)
                if hash_changed(base, filename, db):
                    res = True
@ -150,8 +160,10 @@ def run(root):
        save_hashes(db, HASH_FILE)


-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Cythonize pyx files into C++ files as needed')
-    parser.add_argument('root', help='root directory')
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Cythonize pyx files into C++ files as needed"
+    )
+    parser.add_argument("root", help="root directory")
    args = parser.parse_args()
    run(args.root)
--- a/bin/load_reddit.py
+++ b/bin/load_reddit.py
@ -0,0 +1,97 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+import bz2
+import re
+import srsly
+import sys
+import random
+import datetime
+import plac
+from pathlib import Path
+
+_unset = object()
+
+
+class Reddit(object):
+    """Stream cleaned comments from Reddit."""
+
+    pre_format_re = re.compile(r"^[`*~]")
+    post_format_re = re.compile(r"[`*~]$")
+    url_re = re.compile(r"\[([^]]+)\]\(%%URL\)")
+    link_re = re.compile(r"\[([^]]+)\]\(https?://[^\)]+\)")
+
+    def __init__(self, file_path, meta_keys={"subreddit": "section"}):
+        """
+        file_path (unicode / Path): Path to archive or directory of archives.
+        meta_keys (dict): Meta data key included in the Reddit corpus, mapped
+            to display name in Prodigy meta.
+        RETURNS (Reddit): The Reddit loader.
+        """
+        self.meta = meta_keys
+        file_path = Path(file_path)
+        if not file_path.exists():
+            raise IOError("Can't find file path: {}".format(file_path))
+        if not file_path.is_dir():
+            self.files = [file_path]
+        else:
+            self.files = list(file_path.iterdir())
+
+    def __iter__(self):
+        for file_path in self.iter_files():
+            with bz2.open(str(file_path)) as f:
+                for line in f:
+                    line = line.strip()
+                    if not line:
+                        continue
+                    comment = srsly.json_loads(line)
+                    if self.is_valid(comment):
+                        text = self.strip_tags(comment["body"])
+                        yield {"text": text}
+
+    def get_meta(self, item):
+        return {name: item.get(key, "n/a") for key, name in self.meta.items()}
+
+    def iter_files(self):
+        for file_path in self.files:
+            yield file_path
+
+    def strip_tags(self, text):
+        text = self.link_re.sub(r"\1", text)
+        text = text.replace("&gt;", ">").replace("&lt;", "<")
+        text = self.pre_format_re.sub("", text)
+        text = self.post_format_re.sub("", text)
+        text = re.sub(r"\s+", " ", text)
+        return text.strip()
+
+    def is_valid(self, comment):
+        return (
+            comment["body"] is not None
+            and comment["body"] != "[deleted]"
+            and comment["body"] != "[removed]"
+        )
+
+
+def main(path):
+    reddit = Reddit(path)
+    for comment in reddit:
+        print(srsly.json_dumps(comment))
+
+
+if __name__ == "__main__":
+    import socket
+
+    try:
+        BrokenPipeError
+    except NameError:
+        BrokenPipeError = socket.error
+    try:
+        plac.call(main)
+    except BrokenPipeError:
+        import os, sys
+
+        # Python flushes standard streams on exit; redirect remaining output
+        # to devnull to avoid another BrokenPipeError at shutdown
+        devnull = os.open(os.devnull, os.O_WRONLY)
+        os.dup2(devnull, sys.stdout.fileno())
+        sys.exit(1)  # Python exits with error code 1 on EPIPE
--- a/bin/push-tag.sh
+++ b/bin/push-tag.sh
@ -7,9 +7,12 @@ git diff-index --quiet HEAD

 git checkout $1
 git pull origin $1
+
 version=$(grep "__version__ = " spacy/about.py)
 version=${version/__version__ = }
 version=${version/\'/}
 version=${version/\'/}
+version=${version/\"/}
+version=${version/\"/}
 git tag "v$version"
 git push origin --tags
--- a/examples/deep_learning_keras.py
+++ b/examples/deep_learning_keras.py
@ -1,5 +1,12 @@
 """
-This example shows how to use an LSTM sentiment classification model trained using Keras in spaCy. spaCy splits the document into sentences, and each sentence is classified using the LSTM. The scores for the sentences are then aggregated to give the document score. This kind of hierarchical model is quite difficult in "pure" Keras or Tensorflow, but it's very effective. The Keras example on this dataset performs quite poorly, because it cuts off the documents so that they're a fixed size. This hurts review accuracy a lot, because people often summarise their rating in the final sentence
+This example shows how to use an LSTM sentiment classification model trained
+using Keras in spaCy. spaCy splits the document into sentences, and each
+sentence is classified using the LSTM. The scores for the sentences are then
+aggregated to give the document score. This kind of hierarchical model is quite
+difficult in "pure" Keras or Tensorflow, but it's very effective. The Keras
+example on this dataset performs quite poorly, because it cuts off the documents
+so that they're a fixed size. This hurts review accuracy a lot, because people
+often summarise their rating in the final sentence

 Prerequisites:
 spacy download en_vectors_web_lg
@ -25,9 +32,9 @@ import spacy
 class SentimentAnalyser(object):
    @classmethod
    def load(cls, path, nlp, max_length=100):
-        with (path / 'config.json').open() as file_:
+        with (path / "config.json").open() as file_:
            model = model_from_json(file_.read())
-        with (path / 'model').open('rb') as file_:
+        with (path / "model").open("rb") as file_:
            lstm_weights = pickle.load(file_)
        embeddings = get_embeddings(nlp.vocab)
        model.set_weights([embeddings] + lstm_weights)
@ -69,12 +76,12 @@ def get_labelled_sentences(docs, doc_labels):
        for sent in doc.sents:
            sentences.append(sent)
            labels.append(y)
-    return sentences, numpy.asarray(labels, dtype='int32')
+    return sentences, numpy.asarray(labels, dtype="int32")


 def get_features(docs, max_length):
    docs = list(docs)
-    Xs = numpy.zeros((len(docs), max_length), dtype='int32')
+    Xs = numpy.zeros((len(docs), max_length), dtype="int32")
    for i, doc in enumerate(docs):
        j = 0
        for token in doc:
@ -89,14 +96,25 @@ def get_features(docs, max_length):
    return Xs


-def train(train_texts, train_labels, dev_texts, dev_labels,
-          lstm_shape, lstm_settings, lstm_optimizer, batch_size=100,
-          nb_epoch=5, by_sentence=True):
+def train(
+    train_texts,
+    train_labels,
+    dev_texts,
+    dev_labels,
+    lstm_shape,
+    lstm_settings,
+    lstm_optimizer,
+    batch_size=100,
+    nb_epoch=5,
+    by_sentence=True,
+):
+
    print("Loading spaCy")
-    nlp = spacy.load('en_vectors_web_lg')
-    nlp.add_pipe(nlp.create_pipe('sentencizer'))
+    nlp = spacy.load("en_vectors_web_lg")
+    nlp.add_pipe(nlp.create_pipe("sentencizer"))
    embeddings = get_embeddings(nlp.vocab)
    model = compile_lstm(embeddings, lstm_shape, lstm_settings)
+
    print("Parsing texts...")
    train_docs = list(nlp.pipe(train_texts))
    dev_docs = list(nlp.pipe(dev_texts))
@ -104,10 +122,15 @@ def train(train_texts, train_labels, dev_texts, dev_labels,
        train_docs, train_labels = get_labelled_sentences(train_docs, train_labels)
        dev_docs, dev_labels = get_labelled_sentences(dev_docs, dev_labels)

-    train_X = get_features(train_docs, lstm_shape['max_length'])
-    dev_X = get_features(dev_docs, lstm_shape['max_length'])
-    model.fit(train_X, train_labels, validation_data=(dev_X, dev_labels),
-              nb_epoch=nb_epoch, batch_size=batch_size)
+    train_X = get_features(train_docs, lstm_shape["max_length"])
+    dev_X = get_features(dev_docs, lstm_shape["max_length"])
+    model.fit(
+        train_X,
+        train_labels,
+        validation_data=(dev_X, dev_labels),
+        epochs=nb_epoch,
+        batch_size=batch_size,
+    )
    return model


@ -117,19 +140,28 @@ def compile_lstm(embeddings, shape, settings):
        Embedding(
            embeddings.shape[0],
            embeddings.shape[1],
-            input_length=shape['max_length'],
+            input_length=shape["max_length"],
            trainable=False,
            weights=[embeddings],
-            mask_zero=True
+            mask_zero=True,
        )
    )
-    model.add(TimeDistributed(Dense(shape['nr_hidden'], use_bias=False)))
-    model.add(Bidirectional(LSTM(shape['nr_hidden'],
-                                 recurrent_dropout=settings['dropout'],
-                                 dropout=settings['dropout'])))
-    model.add(Dense(shape['nr_class'], activation='sigmoid'))
-    model.compile(optimizer=Adam(lr=settings['lr']), loss='binary_crossentropy',
-		  metrics=['accuracy'])
+    model.add(TimeDistributed(Dense(shape["nr_hidden"], use_bias=False)))
+    model.add(
+        Bidirectional(
+            LSTM(
+                shape["nr_hidden"],
+                recurrent_dropout=settings["dropout"],
+                dropout=settings["dropout"],
+            )
+        )
+    )
+    model.add(Dense(shape["nr_class"], activation="sigmoid"))
+    model.compile(
+        optimizer=Adam(lr=settings["lr"]),
+        loss="binary_crossentropy",
+        metrics=["accuracy"],
+    )
    return model


@ -138,15 +170,9 @@ def get_embeddings(vocab):


 def evaluate(model_dir, texts, labels, max_length=100):
-    def create_pipeline(nlp):
-        '''
-        This could be a lambda, but named functions are easier to read in Python.
-        '''
-        return [nlp.tagger, nlp.parser, SentimentAnalyser.load(model_dir, nlp,
-                                                               max_length=max_length)]
-
-    nlp = spacy.load('en')
-    nlp.pipeline = create_pipeline(nlp)
+    nlp = spacy.load("en_vectors_web_lg")
+    nlp.add_pipe(nlp.create_pipe("sentencizer"))
+    nlp.add_pipe(SentimentAnalyser.load(model_dir, nlp, max_length=max_length))

    correct = 0
    i = 0
@ -158,7 +184,7 @@ def evaluate(model_dir, texts, labels, max_length=100):

 def read_data(data_dir, limit=0):
    examples = []
-    for subdir, label in (('pos', 1), ('neg', 0)):
+    for subdir, label in (("pos", 1), ("neg", 0)):
        for filename in (data_dir / subdir).iterdir():
            with filename.open() as file_:
                text = file_.read()
@ -180,13 +206,21 @@ def read_data(data_dir, limit=0):
    learn_rate=("Learn rate", "option", "e", float),
    nb_epoch=("Number of training epochs", "option", "i", int),
    batch_size=("Size of minibatches for training LSTM", "option", "b", int),
-    nr_examples=("Limit to N examples", "option", "n", int)
+    nr_examples=("Limit to N examples", "option", "n", int),
 )
-def main(model_dir=None, train_dir=None, dev_dir=None,
+def main(
+    model_dir=None,
+    train_dir=None,
+    dev_dir=None,
    is_runtime=False,
-         nr_hidden=64, max_length=100, # Shape
-         dropout=0.5, learn_rate=0.001, # General NN config
-         nb_epoch=5, batch_size=100, nr_examples=-1):  # Training params
+    nr_hidden=64,
+    max_length=100,  # Shape
+    dropout=0.5,
+    learn_rate=0.001,  # General NN config
+    nb_epoch=5,
+    batch_size=256,
+    nr_examples=-1,
+):  # Training params
    if model_dir is not None:
        model_dir = pathlib.Path(model_dir)
    if train_dir is None or dev_dir is None:
@ -208,20 +242,26 @@ def main(model_dir=None, train_dir=None, dev_dir=None,
            dev_texts, dev_labels = zip(*imdb_data[1])
        else:
            dev_texts, dev_labels = read_data(dev_dir, imdb_data, limit=nr_examples)
-        train_labels = numpy.asarray(train_labels, dtype='int32')
-        dev_labels = numpy.asarray(dev_labels, dtype='int32')
-        lstm = train(train_texts, train_labels, dev_texts, dev_labels,
-                     {'nr_hidden': nr_hidden, 'max_length': max_length, 'nr_class': 1},
-                     {'dropout': dropout, 'lr': learn_rate},
+        train_labels = numpy.asarray(train_labels, dtype="int32")
+        dev_labels = numpy.asarray(dev_labels, dtype="int32")
+        lstm = train(
+            train_texts,
+            train_labels,
+            dev_texts,
+            dev_labels,
+            {"nr_hidden": nr_hidden, "max_length": max_length, "nr_class": 1},
+            {"dropout": dropout, "lr": learn_rate},
            {},
-                     nb_epoch=nb_epoch, batch_size=batch_size)
+            nb_epoch=nb_epoch,
+            batch_size=batch_size,
+        )
        weights = lstm.get_weights()
        if model_dir is not None:
-            with (model_dir / 'model').open('wb') as file_:
+            with (model_dir / "model").open("wb") as file_:
                pickle.dump(weights[1:], file_)
-            with (model_dir / 'config.json').open('wb') as file_:
+            with (model_dir / "config.json").open("w") as file_:
                file_.write(lstm.to_json())


-if __name__ == '__main__':
+if __name__ == "__main__":
    plac.call(main)
--- a/examples/information_extraction/entity_relations.py
+++ b/examples/information_extraction/entity_relations.py
@ -15,14 +15,15 @@ import spacy


 TEXTS = [
-    'Net income was $9.4 million compared to the prior year of $2.7 million.',
-    'Revenue exceeded twelve billion dollars, with a loss of $1b.',
+    "Net income was $9.4 million compared to the prior year of $2.7 million.",
+    "Revenue exceeded twelve billion dollars, with a loss of $1b.",
 ]


@plac.annotations(
-    model=("Model to load (needs parser and NER)", "positional", None, str))
-def main(model='en_core_web_sm'):
+    model=("Model to load (needs parser and NER)", "positional", None, str)
+)
+def main(model="en_core_web_sm"):
    nlp = spacy.load(model)
    print("Loaded model '%s'" % model)
    print("Processing %d texts" % len(TEXTS))
@ -31,7 +32,7 @@ def main(model='en_core_web_sm'):
        doc = nlp(text)
        relations = extract_currency_relations(doc)
        for r1, r2 in relations:
-            print('{:<10}\t{}\t{}'.format(r1.text, r2.ent_type_, r2.text))
+            print("{:<10}\t{}\t{}".format(r1.text, r2.ent_type_, r2.text))


 def extract_currency_relations(doc):
@ -41,18 +42,18 @@ def extract_currency_relations(doc):
        span.merge()

    relations = []
-    for money in filter(lambda w: w.ent_type_ == 'MONEY', doc):
-        if money.dep_ in ('attr', 'dobj'):
-            subject = [w for w in money.head.lefts if w.dep_ == 'nsubj']
+    for money in filter(lambda w: w.ent_type_ == "MONEY", doc):
+        if money.dep_ in ("attr", "dobj"):
+            subject = [w for w in money.head.lefts if w.dep_ == "nsubj"]
            if subject:
                subject = subject[0]
                relations.append((subject, money))
-        elif money.dep_ == 'pobj' and money.head.dep_ == 'prep':
+        elif money.dep_ == "pobj" and money.head.dep_ == "prep":
            relations.append((money.head.head, money))
    return relations


-if __name__ == '__main__':
+if __name__ == "__main__":
    plac.call(main)

    # Expected output:
--- a/examples/information_extraction/parse_subtrees.py
+++ b/examples/information_extraction/parse_subtrees.py
@ -24,37 +24,39 @@ import plac
 import spacy


-@plac.annotations(
-    model=("Model to load", "positional", None, str))
-def main(model='en_core_web_sm'):
+@plac.annotations(model=("Model to load", "positional", None, str))
+def main(model="en_core_web_sm"):
    nlp = spacy.load(model)
    print("Loaded model '%s'" % model)

-    doc = nlp("displaCy uses CSS and JavaScript to show you how computers "
-               "understand language")
+    doc = nlp(
+        "displaCy uses CSS and JavaScript to show you how computers "
+        "understand language"
+    )

    # The easiest way is to find the head of the subtree you want, and then use
    # the `.subtree`, `.children`, `.lefts` and `.rights` iterators. `.subtree`
    # is the one that does what you're asking for most directly:
    for word in doc:
-        if word.dep_ in ('xcomp', 'ccomp'):
-            print(''.join(w.text_with_ws for w in word.subtree))
+        if word.dep_ in ("xcomp", "ccomp"):
+            print("".join(w.text_with_ws for w in word.subtree))

    # It'd probably be better for `word.subtree` to return a `Span` object
    # instead of a generator over the tokens. If you want the `Span` you can
    # get it via the `.right_edge` and `.left_edge` properties. The `Span`
    # object is nice because you can easily get a vector, merge it, etc.
    for word in doc:
-        if word.dep_ in ('xcomp', 'ccomp'):
+        if word.dep_ in ("xcomp", "ccomp"):
            subtree_span = doc[word.left_edge.i : word.right_edge.i + 1]
-            print(subtree_span.text, '|', subtree_span.root.text)
+            print(subtree_span.text, "|", subtree_span.root.text)

    # You might also want to select a head, and then select a start and end
    # position by walking along its children. You could then take the
    # `.left_edge` and `.right_edge` of those tokens, and use it to calculate
    # a span.

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    plac.call(main)

    # Expected output:
--- a/examples/information_extraction/phrase_matcher.py
+++ b/examples/information_extraction/phrase_matcher.py
@ -45,7 +45,7 @@ from __future__ import print_function, unicode_literals, division
 from bz2 import BZ2File
 import time
 import plac
-import ujson
+import json

 from spacy.matcher import PhraseMatcher
 import spacy
@ -55,15 +55,15 @@ import spacy
    patterns_loc=("Path to gazetteer", "positional", None, str),
    text_loc=("Path to Reddit corpus file", "positional", None, str),
    n=("Number of texts to read", "option", "n", int),
-    lang=("Language class to initialise", "option", "l", str))
-def main(patterns_loc, text_loc, n=10000, lang='en'):
-    nlp = spacy.blank('en')
+    lang=("Language class to initialise", "option", "l", str),
+)
+def main(patterns_loc, text_loc, n=10000, lang="en"):
+    nlp = spacy.blank(lang)
    nlp.vocab.lex_attr_getters = {}
    phrases = read_gazetteer(nlp.tokenizer, patterns_loc)
    count = 0
    t1 = time.time()
-    for ent_id, text in get_matches(nlp.tokenizer, phrases,
-                                    read_text(text_loc, n=n)):
+    for ent_id, text in get_matches(nlp.tokenizer, phrases, read_text(text_loc, n=n)):
        count += 1
    t2 = time.time()
    print("%d docs in %.3f s. %d matches" % (n, (t2 - t1), count))
@ -71,8 +71,8 @@ def main(patterns_loc, text_loc, n=10000, lang='en'):

 def read_gazetteer(tokenizer, loc, n=-1):
    for i, line in enumerate(open(loc)):
-        data = ujson.loads(line.strip())
-        phrase = tokenizer(data['text'])
+        data = json.loads(line.strip())
+        phrase = tokenizer(data["text"])
        for w in phrase:
            _ = tokenizer.vocab[w.text]
        if len(phrase) >= 2:
@ -82,15 +82,15 @@ def read_gazetteer(tokenizer, loc, n=-1):
 def read_text(bz2_loc, n=10000):
    with BZ2File(bz2_loc) as file_:
        for i, line in enumerate(file_):
-            data = ujson.loads(line)
-            yield data['body']
+            data = json.loads(line)
+            yield data["body"]
            if i >= n:
                break


 def get_matches(tokenizer, phrases, texts, max_length=6):
    matcher = PhraseMatcher(tokenizer.vocab, max_length=max_length)
-    matcher.add('Phrase', None, *phrases)
+    matcher.add("Phrase", None, *phrases)
    for text in texts:
        doc = tokenizer(text)
        for w in doc:
@ -100,10 +100,11 @@ def get_matches(tokenizer, phrases, texts, max_length=6):
            yield (ent_id, doc[start:end].text)


-if __name__ == '__main__':
+if __name__ == "__main__":
    if False:
        import cProfile
        import pstats
+
        cProfile.runctx("plac.call(main)", globals(), locals(), "Profile.prof")
        s = pstats.Stats("Profile.prof")
        s.strip_dirs().sort_stats("time").print_stats()
--- a/examples/keras_parikh_entailment/README.md
+++ b/examples/keras_parikh_entailment/README.md
@ -2,11 +2,7 @@

 # A decomposable attention model for Natural Language Inference
 **by Matthew Honnibal, [@honnibal](https://github.com/honnibal)**
-
-> ⚠️ **IMPORTANT NOTE:** This example is currently only compatible with spaCy
-> v1.x. We're working on porting the example over to Keras v2.x and spaCy v2.x.
-> See [#1445](https://github.com/explosion/spaCy/issues/1445) for details –
-> contributions welcome!
+**Updated for spaCy 2.0+ and Keras 2.2.2+ by John Stewart, [@free-variation](https://github.com/free-variation)**

 This directory contains an implementation of the entailment prediction model described
 by [Parikh et al. (2016)](https://arxiv.org/pdf/1606.01933.pdf). The model is notable
@ -21,19 +17,25 @@ hook is installed to customise the `.similarity()` method of spaCy's `Doc`
 and `Span` objects:

 ```python
-def demo(model_dir):
-    nlp = spacy.load('en', path=model_dir,
-            create_pipeline=create_similarity_pipeline)
-    doc1 = nlp(u'Worst fries ever! Greasy and horrible...')
-    doc2 = nlp(u'The milkshakes are good. The fries are bad.')
-    print(doc1.similarity(doc2))
-    sent1a, sent1b = doc1.sents
-    print(sent1a.similarity(sent1b))
-    print(sent1a.similarity(doc2))
-    print(sent1b.similarity(doc2))
+def demo(shape):
+	nlp = spacy.load('en_vectors_web_lg')
+    nlp.add_pipe(KerasSimilarityShim.load(nlp.path / 'similarity', nlp, shape[0]))
+
+    doc1 = nlp(u'The king of France is bald.')
+    doc2 = nlp(u'France has no king.')
+
+    print("Sentence 1:", doc1)
+    print("Sentence 2:", doc2)
+
+    entailment_type, confidence = doc1.similarity(doc2)
+    print("Entailment type:", entailment_type, "(Confidence:", confidence, ")")
 ```

+Which gives the output `Entailment type: contradiction (Confidence: 0.60604566)`, showing that
+the system has definite opinions about Betrand Russell's [famous conundrum](https://users.drew.edu/jlenz/br-on-denoting.html)!
+
 I'm working on a blog post to explain Parikh et al.'s model in more detail.
+A [notebook](https://github.com/free-variation/spaCy/blob/master/examples/notebooks/Decompositional%20Attention.ipynb) is available that briefly explains this implementation.
 I think it is a very interesting example of the attention mechanism, which
 I didn't understand very well before working through this paper. There are
 lots of ways to extend the model.
@ -43,7 +45,7 @@ lots of ways to extend the model.
 | File | Description |
 | --- | --- |
 | `__main__.py` | The script that will be executed. Defines the CLI, the data reading, etc — all the boring stuff. |
-| `spacy_hook.py` | Provides a class `SimilarityShim` that lets you use an arbitrary function to customize spaCy's `doc.similarity()` method. Instead of the default average-of-vectors algorithm, when you call `doc1.similarity(doc2)`, you'll get the result of `your_model(doc1, doc2)`. |
+| `spacy_hook.py` | Provides a class `KerasSimilarityShim` that lets you use an arbitrary function to customize spaCy's `doc.similarity()` method. Instead of the default average-of-vectors algorithm, when you call `doc1.similarity(doc2)`, you'll get the result of `your_model(doc1, doc2)`. |
 | `keras_decomposable_attention.py` | Defines the neural network model. |

 ## Setting up
@ -52,17 +54,13 @@ First, install [Keras](https://keras.io/), [spaCy](https://spacy.io) and the spa
 English models (about 1GB of data):

 ```bash
-pip install https://github.com/fchollet/keras/archive/1.2.2.zip
+pip install keras
 pip install spacy
-python -m spacy.en.download
+python -m spacy download en_vectors_web_lg
 ```

-⚠️ **Important:** In order for the example to run, you'll need to install Keras from
-the 1.2.2 release (and not via `pip install keras`). For more info on this, see
-[#727](https://github.com/explosion/spaCy/issues/727).
-
-You'll also want to get Keras working on your GPU. This will depend on your
-set up, so you're mostly on your own for this step. If you're using AWS, try the
+You'll also want to get Keras working on your GPU, and you will need a backend, such as TensorFlow or Theano.
+This will depend on your set up, so you're mostly on your own for this step. If you're using AWS, try the
 [NVidia AMI](https://aws.amazon.com/marketplace/pp/B00FYCDDTE). It made things pretty easy.

 Once you've installed the dependencies, you can run a small preliminary test of
@ -80,22 +78,35 @@ Finally, download the [Stanford Natural Language Inference corpus](http://nlp.st
 ## Running the example

 You can run the `keras_parikh_entailment/` directory as a script, which executes the file
-[`keras_parikh_entailment/__main__.py`](__main__.py). The first thing you'll want to do is train the model:
+[`keras_parikh_entailment/__main__.py`](__main__.py).  If you run the script without arguments
+the usage is shown.  Running it with `-h` explains the command line arguments.
+
+The first thing you'll want to do is train the model:

 ```bash
-python keras_parikh_entailment/ train <train_directory> <dev_directory>
+python keras_parikh_entailment/ train -t <path to SNLI train JSON> -s <path to SNLI dev JSON>
 ```

 Training takes about 300 epochs for full accuracy, and I haven't rerun the full
 experiment since refactoring things to publish this example — please let me
-know if I've broken something. You should get to at least 85% on the development data.
+know if I've broken something. You should get to at least 85% on the development data even after 10-15 epochs.

 The other two modes demonstrate run-time usage. I never like relying on the accuracy printed
 by `.fit()` methods. I never really feel confident until I've run a new process that loads
 the model and starts making predictions, without access to the gold labels. I've therefore
-included an `evaluate` mode. Finally, there's also a little demo, which mostly exists to show
+included an `evaluate` mode. 
+
+```bash
+python keras_parikh_entailment/ evaluate -s <path to SNLI train JSON>
+```
+
+Finally, there's also a little demo, which mostly exists to show
 you how run-time usage will eventually look.

+```bash
+python keras_parikh_entailment/ demo
+```
+
 ## Getting updates

 We should have the blog post explaining the model ready before the end of the week. To get
--- a/examples/keras_parikh_entailment/main.py
+++ b/examples/keras_parikh_entailment/main.py
@ -1,139 +1,207 @@
-from __future__ import division, unicode_literals, print_function
-import spacy
-
+import numpy as np
+import json
+from keras.utils import to_categorical
 import plac
-from pathlib import Path
-import ujson as json
-import numpy
-from keras.utils.np_utils import to_categorical
-
-from spacy_hook import get_embeddings, get_word_ids
-from spacy_hook import create_similarity_pipeline
+import sys

 from keras_decomposable_attention import build_model
+from spacy_hook import get_embeddings, KerasSimilarityShim

 try:
    import cPickle as pickle
 except ImportError:
    import pickle

+import spacy
+
+# workaround for keras/tensorflow bug
+# see https://github.com/tensorflow/tensorflow/issues/3388
+import os
+import importlib
+from keras import backend as K
+
+
+def set_keras_backend(backend):
+    if K.backend() != backend:
+        os.environ["KERAS_BACKEND"] = backend
+        importlib.reload(K)
+        assert K.backend() == backend
+    if backend == "tensorflow":
+        K.get_session().close()
+        cfg = K.tf.ConfigProto()
+        cfg.gpu_options.allow_growth = True
+        K.set_session(K.tf.Session(config=cfg))
+        K.clear_session()
+
+
+set_keras_backend("tensorflow")
+

 def train(train_loc, dev_loc, shape, settings):
    train_texts1, train_texts2, train_labels = read_snli(train_loc)
    dev_texts1, dev_texts2, dev_labels = read_snli(dev_loc)

    print("Loading spaCy")
-    nlp = spacy.load('en')
+    nlp = spacy.load("en_vectors_web_lg")
    assert nlp.path is not None
+    print("Processing texts...")
+    train_X = create_dataset(nlp, train_texts1, train_texts2, 100, shape[0])
+    dev_X = create_dataset(nlp, dev_texts1, dev_texts2, 100, shape[0])
+
    print("Compiling network")
    model = build_model(get_embeddings(nlp.vocab), shape, settings)
-    print("Processing texts...")
-    Xs = []
-    for texts in (train_texts1, train_texts2, dev_texts1, dev_texts2):
-        Xs.append(get_word_ids(list(nlp.pipe(texts, n_threads=20, batch_size=20000)),
-                         max_length=shape[0],
-                         rnn_encode=settings['gru_encode'],
-                         tree_truncate=settings['tree_truncate']))
-    train_X1, train_X2, dev_X1, dev_X2 = Xs
+
    print(settings)
    model.fit(
-        [train_X1, train_X2],
+        train_X,
        train_labels,
-        validation_data=([dev_X1, dev_X2], dev_labels),
-        nb_epoch=settings['nr_epoch'],
-        batch_size=settings['batch_size'])
-    if not (nlp.path / 'similarity').exists():
-        (nlp.path / 'similarity').mkdir()
-    print("Saving to", nlp.path / 'similarity')
+        validation_data=(dev_X, dev_labels),
+        epochs=settings["nr_epoch"],
+        batch_size=settings["batch_size"],
+    )
+    if not (nlp.path / "similarity").exists():
+        (nlp.path / "similarity").mkdir()
+    print("Saving to", nlp.path / "similarity")
    weights = model.get_weights()
-    with (nlp.path / 'similarity' / 'model').open('wb') as file_:
-        pickle.dump(weights[1:], file_)
-    with (nlp.path / 'similarity' / 'config.json').open('wb') as file_:
+    # remove the embedding matrix.  We can reconstruct it.
+    del weights[1]
+    with (nlp.path / "similarity" / "model").open("wb") as file_:
+        pickle.dump(weights, file_)
+    with (nlp.path / "similarity" / "config.json").open("w") as file_:
        file_.write(model.to_json())


-def evaluate(dev_loc):
+def evaluate(dev_loc, shape):
    dev_texts1, dev_texts2, dev_labels = read_snli(dev_loc)
-    nlp = spacy.load('en',
-            create_pipeline=create_similarity_pipeline)
-    total = 0.
-    correct = 0.
+    nlp = spacy.load("en_vectors_web_lg")
+    nlp.add_pipe(KerasSimilarityShim.load(nlp.path / "similarity", nlp, shape[0]))
+    total = 0.0
+    correct = 0.0
    for text1, text2, label in zip(dev_texts1, dev_texts2, dev_labels):
        doc1 = nlp(text1)
        doc2 = nlp(text2)
-        sim = doc1.similarity(doc2)
-        if sim.argmax() == label.argmax():
+        sim, _ = doc1.similarity(doc2)
+        if sim == KerasSimilarityShim.entailment_types[label.argmax()]:
            correct += 1
        total += 1
    return correct, total


-def demo():
-    nlp = spacy.load('en',
-            create_pipeline=create_similarity_pipeline)
-    doc1 = nlp(u'What were the best crime fiction books in 2016?')
-    doc2 = nlp(
-        u'What should I read that was published last year? I like crime stories.')
-    print(doc1)
-    print(doc2)
-    print("Similarity", doc1.similarity(doc2))
+def demo(shape):
+    nlp = spacy.load("en_vectors_web_lg")
+    nlp.add_pipe(KerasSimilarityShim.load(nlp.path / "similarity", nlp, shape[0]))
+
+    doc1 = nlp(u"The king of France is bald.")
+    doc2 = nlp(u"France has no king.")
+
+    print("Sentence 1:", doc1)
+    print("Sentence 2:", doc2)
+
+    entailment_type, confidence = doc1.similarity(doc2)
+    print("Entailment type:", entailment_type, "(Confidence:", confidence, ")")
+
+
+LABELS = {"entailment": 0, "contradiction": 1, "neutral": 2}


-LABELS = {'entailment': 0, 'contradiction': 1, 'neutral': 2}
 def read_snli(path):
    texts1 = []
    texts2 = []
    labels = []
-    with path.open() as file_:
+    with open(path, "r") as file_:
        for line in file_:
            eg = json.loads(line)
-            label = eg['gold_label']
-            if label == '-':
+            label = eg["gold_label"]
+            if label == "-":  # per Parikh, ignore - SNLI entries
                continue
-            texts1.append(eg['sentence1'])
-            texts2.append(eg['sentence2'])
+            texts1.append(eg["sentence1"])
+            texts2.append(eg["sentence2"])
            labels.append(LABELS[label])
-    return texts1, texts2, to_categorical(numpy.asarray(labels, dtype='int32'))
+    return texts1, texts2, to_categorical(np.asarray(labels, dtype="int32"))
+
+
+def create_dataset(nlp, texts, hypotheses, num_unk, max_length):
+    sents = texts + hypotheses
+    sents_as_ids = []
+    for sent in sents:
+        doc = nlp(sent)
+        word_ids = []
+        for i, token in enumerate(doc):
+            # skip odd spaces from tokenizer
+            if token.has_vector and token.vector_norm == 0:
+                continue
+
+            if i > max_length:
+                break
+
+            if token.has_vector:
+                word_ids.append(token.rank + num_unk + 1)
+            else:
+                # if we don't have a vector, pick an OOV entry
+                word_ids.append(token.rank % num_unk + 1)
+
+        # there must be a simpler way of generating padded arrays from lists...
+        word_id_vec = np.zeros((max_length), dtype="int")
+        clipped_len = min(max_length, len(word_ids))
+        word_id_vec[:clipped_len] = word_ids[:clipped_len]
+        sents_as_ids.append(word_id_vec)
+
+    return [np.array(sents_as_ids[: len(texts)]), np.array(sents_as_ids[len(texts) :])]


@plac.annotations(
    mode=("Mode to execute", "positional", None, str, ["train", "evaluate", "demo"]),
-    train_loc=("Path to training data", "positional", None, Path),
-    dev_loc=("Path to development data", "positional", None, Path),
+    train_loc=("Path to training data", "option", "t", str),
+    dev_loc=("Path to development or test data", "option", "s", str),
    max_length=("Length to truncate sentences", "option", "L", int),
    nr_hidden=("Number of hidden units", "option", "H", int),
    dropout=("Dropout level", "option", "d", float),
-    learn_rate=("Learning rate", "option", "e", float),
+    learn_rate=("Learning rate", "option", "r", float),
    batch_size=("Batch size for neural network training", "option", "b", int),
-    nr_epoch=("Number of training epochs", "option", "i", int),
-    tree_truncate=("Truncate sentences by tree distance", "flag", "T", bool),
-    gru_encode=("Encode sentences with bidirectional GRU", "flag", "E", bool),
+    nr_epoch=("Number of training epochs", "option", "e", int),
+    entail_dir=(
+        "Direction of entailment",
+        "option",
+        "D",
+        str,
+        ["both", "left", "right"],
+    ),
 )
-def main(mode, train_loc, dev_loc,
-        tree_truncate=False,
-        gru_encode=False,
-        max_length=100,
-        nr_hidden=100,
+def main(
+    mode,
+    train_loc,
+    dev_loc,
+    max_length=50,
+    nr_hidden=200,
    dropout=0.2,
    learn_rate=0.001,
-        batch_size=100,
-        nr_epoch=5):
+    batch_size=1024,
+    nr_epoch=10,
+    entail_dir="both",
+):
    shape = (max_length, nr_hidden, 3)
    settings = {
-        'lr': learn_rate,
-        'dropout': dropout,
-        'batch_size': batch_size,
-        'nr_epoch': nr_epoch,
-        'tree_truncate': tree_truncate,
-        'gru_encode': gru_encode
+        "lr": learn_rate,
+        "dropout": dropout,
+        "batch_size": batch_size,
+        "nr_epoch": nr_epoch,
+        "entail_dir": entail_dir,
    }
-    if mode == 'train':
-        train(train_loc, dev_loc, shape, settings)
-    elif mode == 'evaluate':
-        correct, total = evaluate(dev_loc)
-        print(correct, '/', total, correct / total)
-    else:
-        demo()

-if __name__ == '__main__':
+    if mode == "train":
+        if train_loc == None or dev_loc == None:
+            print("Train mode requires paths to training and development data sets.")
+            sys.exit(1)
+        train(train_loc, dev_loc, shape, settings)
+    elif mode == "evaluate":
+        if dev_loc == None:
+            print("Evaluate mode requires paths to test data set.")
+            sys.exit(1)
+        correct, total = evaluate(dev_loc, shape)
+        print(correct, "/", total, correct / total)
+    else:
+        demo(shape)
+
+
+if __name__ == "__main__":
    plac.call(main)
--- a/examples/keras_parikh_entailment/keras_decomposable_attention.py
+++ b/examples/keras_parikh_entailment/keras_decomposable_attention.py
@ -1,259 +1,144 @@
-# Semantic similarity with decomposable attention (using spaCy and Keras)
-# Practical state-of-the-art text similarity with spaCy and Keras
-import numpy
+# Semantic entailment/similarity with decomposable attention (using spaCy and Keras)
+# Practical state-of-the-art textual entailment with spaCy and Keras

-from keras.layers import InputSpec, Layer, Input, Dense, merge
-from keras.layers import Lambda, Activation, Dropout, Embedding, TimeDistributed
-from keras.layers import Bidirectional, GRU, LSTM
-from keras.layers.noise import GaussianNoise
-from keras.layers.advanced_activations import ELU
-import keras.backend as K
-from keras.models import Sequential, Model, model_from_json
-from keras.regularizers import l2
-from keras.optimizers import Adam
-from keras.layers.normalization import BatchNormalization
-from keras.layers.pooling import GlobalAveragePooling1D, GlobalMaxPooling1D
-from keras.layers import Merge
+import numpy as np
+from keras import layers, Model, models, optimizers
+from keras import backend as K


 def build_model(vectors, shape, settings):
-    '''Compile the model.'''
    max_length, nr_hidden, nr_class = shape
-    # Declare inputs.
-    ids1 = Input(shape=(max_length,), dtype='int32', name='words1')
-    ids2 = Input(shape=(max_length,), dtype='int32', name='words2')

-    # Construct operations, which we'll chain together.
-    embed = _StaticEmbedding(vectors, max_length, nr_hidden, dropout=0.2, nr_tune=5000)
-    if settings['gru_encode']:
-        encode = _BiRNNEncoding(max_length, nr_hidden, dropout=settings['dropout'])
-    attend = _Attention(max_length, nr_hidden, dropout=settings['dropout'])
-    align = _SoftAlignment(max_length, nr_hidden)
-    compare = _Comparison(max_length, nr_hidden, dropout=settings['dropout'])
-    entail = _Entailment(nr_hidden, nr_class, dropout=settings['dropout'])
+    input1 = layers.Input(shape=(max_length,), dtype="int32", name="words1")
+    input2 = layers.Input(shape=(max_length,), dtype="int32", name="words2")

-    # Declare the model as a computational graph.
-    sent1 = embed(ids1) # Shape: (i, n)
-    sent2 = embed(ids2) # Shape: (j, n)
+    # embeddings (projected)
+    embed = create_embedding(vectors, max_length, nr_hidden)

-    if settings['gru_encode']:
-        sent1 = encode(sent1)
-        sent2 = encode(sent2)
+    a = embed(input1)
+    b = embed(input2)

-    attention = attend(sent1, sent2)  # Shape: (i, j)
+    # step 1: attend
+    F = create_feedforward(nr_hidden)
+    att_weights = layers.dot([F(a), F(b)], axes=-1)

-    align1 = align(sent2, attention)
-    align2 = align(sent1, attention, transpose=True)
+    G = create_feedforward(nr_hidden)

-    feats1 = compare(sent1, align1)
-    feats2 = compare(sent2, align2)
+    if settings["entail_dir"] == "both":
+        norm_weights_a = layers.Lambda(normalizer(1))(att_weights)
+        norm_weights_b = layers.Lambda(normalizer(2))(att_weights)
+        alpha = layers.dot([norm_weights_a, a], axes=1)
+        beta = layers.dot([norm_weights_b, b], axes=1)

-    scores = entail(feats1, feats2)
+        # step 2: compare
+        comp1 = layers.concatenate([a, beta])
+        comp2 = layers.concatenate([b, alpha])
+        v1 = layers.TimeDistributed(G)(comp1)
+        v2 = layers.TimeDistributed(G)(comp2)

-    # Now that we have the input/output, we can construct the Model object...
-    model = Model(input=[ids1, ids2], output=[scores])
+        # step 3: aggregate
+        v1_sum = layers.Lambda(sum_word)(v1)
+        v2_sum = layers.Lambda(sum_word)(v2)
+        concat = layers.concatenate([v1_sum, v2_sum])
+
+    elif settings["entail_dir"] == "left":
+        norm_weights_a = layers.Lambda(normalizer(1))(att_weights)
+        alpha = layers.dot([norm_weights_a, a], axes=1)
+        comp2 = layers.concatenate([b, alpha])
+        v2 = layers.TimeDistributed(G)(comp2)
+        v2_sum = layers.Lambda(sum_word)(v2)
+        concat = v2_sum
+
+    else:
+        norm_weights_b = layers.Lambda(normalizer(2))(att_weights)
+        beta = layers.dot([norm_weights_b, b], axes=1)
+        comp1 = layers.concatenate([a, beta])
+        v1 = layers.TimeDistributed(G)(comp1)
+        v1_sum = layers.Lambda(sum_word)(v1)
+        concat = v1_sum
+
+    H = create_feedforward(nr_hidden)
+    out = H(concat)
+    out = layers.Dense(nr_class, activation="softmax")(out)
+
+    model = Model([input1, input2], out)

-    # ...Compile it...
    model.compile(
-        optimizer=Adam(lr=settings['lr']),
-        loss='categorical_crossentropy',
-        metrics=['accuracy'])
-    # ...And return it for training.
+        optimizer=optimizers.Adam(lr=settings["lr"]),
+        loss="categorical_crossentropy",
+        metrics=["accuracy"],
+    )
+
    return model


-class _StaticEmbedding(object):
-    def __init__(self, vectors, max_length, nr_out, nr_tune=1000, dropout=0.0):
-        self.nr_out = nr_out
-        self.max_length = max_length
-        self.embed = Embedding(
+def create_embedding(vectors, max_length, projected_dim):
+    return models.Sequential(
+        [
+            layers.Embedding(
                vectors.shape[0],
                vectors.shape[1],
                input_length=max_length,
                weights=[vectors],
-                        name='embed',
-                        trainable=False)
-        self.tune = Embedding(
-                        nr_tune,
-                        nr_out,
-                        input_length=max_length,
-                        weights=None,
-                        name='tune',
-                        trainable=True,
-                        dropout=dropout)
-        self.mod_ids = Lambda(lambda sent: sent % (nr_tune-1)+1,
-                              output_shape=(self.max_length,))
-
-        self.project = TimeDistributed(
-                            Dense(
-                                nr_out,
-                                activation=None,
-                                bias=False,
-                                name='project'))
-
-    def __call__(self, sentence):
-        def get_output_shape(shapes):
-            print(shapes)
-            return shapes[0]
-        mod_sent = self.mod_ids(sentence)
-        tuning = self.tune(mod_sent)
-        #tuning = merge([tuning, mod_sent],
-        #    mode=lambda AB: AB[0] * (K.clip(K.cast(AB[1], 'float32'), 0, 1)),
-        #    output_shape=(self.max_length, self.nr_out))
-        pretrained = self.project(self.embed(sentence))
-        vectors = merge([pretrained, tuning], mode='sum')
-        return vectors
+                trainable=False,
+            ),
+            layers.TimeDistributed(
+                layers.Dense(projected_dim, activation=None, use_bias=False)
+            ),
+        ]
+    )


-class _BiRNNEncoding(object):
-    def __init__(self, max_length, nr_out, dropout=0.0):
-        self.model = Sequential()
-        self.model.add(Bidirectional(LSTM(nr_out, return_sequences=True,
-                                         dropout_W=dropout, dropout_U=dropout),
-                                         input_shape=(max_length, nr_out)))
-        self.model.add(TimeDistributed(Dense(nr_out, activation='relu', init='he_normal')))
-        self.model.add(TimeDistributed(Dropout(0.2)))
-
-    def __call__(self, sentence):
-        return self.model(sentence)
+def create_feedforward(num_units=200, activation="relu", dropout_rate=0.2):
+    return models.Sequential(
+        [
+            layers.Dense(num_units, activation=activation),
+            layers.Dropout(dropout_rate),
+            layers.Dense(num_units, activation=activation),
+            layers.Dropout(dropout_rate),
+        ]
+    )


-class _Attention(object):
-    def __init__(self, max_length, nr_hidden, dropout=0.0, L2=0.0, activation='relu'):
-        self.max_length = max_length
-        self.model = Sequential()
-        self.model.add(Dropout(dropout, input_shape=(nr_hidden,)))
-        self.model.add(
-            Dense(nr_hidden, name='attend1',
-                init='he_normal', W_regularizer=l2(L2),
-                input_shape=(nr_hidden,), activation='relu'))
-        self.model.add(Dropout(dropout))
-        self.model.add(Dense(nr_hidden, name='attend2',
-            init='he_normal', W_regularizer=l2(L2), activation='relu'))
-        self.model = TimeDistributed(self.model)
+def normalizer(axis):
+    def _normalize(att_weights):
+        exp_weights = K.exp(att_weights)
+        sum_weights = K.sum(exp_weights, axis=axis, keepdims=True)
+        return exp_weights / sum_weights

-    def __call__(self, sent1, sent2):
-        def _outer(AB):
-            att_ji = K.batch_dot(AB[1], K.permute_dimensions(AB[0], (0, 2, 1)))
-            return K.permute_dimensions(att_ji,(0, 2, 1))
-        return merge(
-                [self.model(sent1), self.model(sent2)],
-                mode=_outer,
-                output_shape=(self.max_length, self.max_length))
+    return _normalize


-class _SoftAlignment(object):
-    def __init__(self, max_length, nr_hidden):
-        self.max_length = max_length
-        self.nr_hidden = nr_hidden
-
-    def __call__(self, sentence, attention, transpose=False):
-        def _normalize_attention(attmat):
-            att = attmat[0]
-            mat = attmat[1]
-            if transpose:
-                att = K.permute_dimensions(att,(0, 2, 1))
-            # 3d softmax
-            e = K.exp(att - K.max(att, axis=-1, keepdims=True))
-            s = K.sum(e, axis=-1, keepdims=True)
-            sm_att = e / s
-            return K.batch_dot(sm_att, mat)
-        return merge([attention, sentence], mode=_normalize_attention,
-                      output_shape=(self.max_length, self.nr_hidden)) # Shape: (i, n)
-
-
-class _Comparison(object):
-    def __init__(self, words, nr_hidden, L2=0.0, dropout=0.0):
-        self.words = words
-        self.model = Sequential()
-        self.model.add(Dropout(dropout, input_shape=(nr_hidden*2,)))
-        self.model.add(Dense(nr_hidden, name='compare1',
-            init='he_normal', W_regularizer=l2(L2)))
-        self.model.add(Activation('relu'))
-        self.model.add(Dropout(dropout))
-        self.model.add(Dense(nr_hidden, name='compare2',
-                        W_regularizer=l2(L2), init='he_normal'))
-        self.model.add(Activation('relu'))
-        self.model = TimeDistributed(self.model)
-
-    def __call__(self, sent, align, **kwargs):
-        result = self.model(merge([sent, align], mode='concat')) # Shape: (i, n)
-        avged = GlobalAveragePooling1D()(result, mask=self.words)
-        maxed = GlobalMaxPooling1D()(result, mask=self.words)
-        merged = merge([avged, maxed])
-        result = BatchNormalization()(merged)
-        return result
-
-
-class _Entailment(object):
-    def __init__(self, nr_hidden, nr_out, dropout=0.0, L2=0.0):
-        self.model = Sequential()
-        self.model.add(Dropout(dropout, input_shape=(nr_hidden*2,)))
-        self.model.add(Dense(nr_hidden, name='entail1',
-            init='he_normal', W_regularizer=l2(L2)))
-        self.model.add(Activation('relu'))
-        self.model.add(Dropout(dropout))
-        self.model.add(Dense(nr_hidden, name='entail2',
-            init='he_normal', W_regularizer=l2(L2)))
-        self.model.add(Activation('relu'))
-        self.model.add(Dense(nr_out, name='entail_out', activation='softmax',
-                        W_regularizer=l2(L2), init='zero'))
-
-    def __call__(self, feats1, feats2):
-        features = merge([feats1, feats2], mode='concat')
-        return self.model(features)
-
-
-class _GlobalSumPooling1D(Layer):
-    '''Global sum pooling operation for temporal data.
-
-    # Input shape
-        3D tensor with shape: `(samples, steps, features)`.
-
-    # Output shape
-        2D tensor with shape: `(samples, features)`.
-    '''
-    def __init__(self, **kwargs):
-        super(_GlobalSumPooling1D, self).__init__(**kwargs)
-        self.input_spec = [InputSpec(ndim=3)]
-
-    def get_output_shape_for(self, input_shape):
-        return (input_shape[0], input_shape[2])
-
-    def call(self, x, mask=None):
-        if mask is not None:
-            return K.sum(x * K.clip(mask, 0, 1), axis=1)
-        else:
+def sum_word(x):
    return K.sum(x, axis=1)


 def test_build_model():
-    vectors = numpy.ndarray((100, 8), dtype='float32')
+    vectors = np.ndarray((100, 8), dtype="float32")
    shape = (10, 16, 3)
-    settings = {'lr': 0.001, 'dropout': 0.2, 'gru_encode':True}
+    settings = {"lr": 0.001, "dropout": 0.2, "gru_encode": True, "entail_dir": "both"}
    model = build_model(vectors, shape, settings)


 def test_fit_model():
-
    def _generate_X(nr_example, length, nr_vector):
-        X1 = numpy.ndarray((nr_example, length), dtype='int32')
+        X1 = np.ndarray((nr_example, length), dtype="int32")
        X1 *= X1 < nr_vector
        X1 *= 0 <= X1
-        X2 = numpy.ndarray((nr_example, length), dtype='int32')
+        X2 = np.ndarray((nr_example, length), dtype="int32")
        X2 *= X2 < nr_vector
        X2 *= 0 <= X2
        return [X1, X2]

    def _generate_Y(nr_example, nr_class):
-        ys = numpy.zeros((nr_example, nr_class), dtype='int32')
+        ys = np.zeros((nr_example, nr_class), dtype="int32")
        for i in range(nr_example):
            ys[i, i % nr_class] = 1
        return ys

-    vectors = numpy.ndarray((100, 8), dtype='float32')
+    vectors = np.ndarray((100, 8), dtype="float32")
    shape = (10, 16, 3)
-    settings = {'lr': 0.001, 'dropout': 0.2, 'gru_encode':True}
+    settings = {"lr": 0.001, "dropout": 0.2, "gru_encode": True, "entail_dir": "both"}
    model = build_model(vectors, shape, settings)

    train_X = _generate_X(20, shape[0], vectors.shape[0])
@ -261,8 +146,7 @@ def test_fit_model():
    dev_X = _generate_X(15, shape[0], vectors.shape[0])
    dev_Y = _generate_Y(15, shape[2])

-    model.fit(train_X, train_Y, validation_data=(dev_X, dev_Y), nb_epoch=5,
-              batch_size=4)
+    model.fit(train_X, train_Y, validation_data=(dev_X, dev_Y), epochs=5, batch_size=4)


 __all__ = [build_model]
--- a/examples/keras_parikh_entailment/spacy_hook.py
+++ b/examples/keras_parikh_entailment/spacy_hook.py
@ -1,8 +1,5 @@
+import numpy as np
 from keras.models import model_from_json
-import numpy
-import numpy.random
-import json
-from spacy.tokens.span import Span

 try:
    import cPickle as pickle
@ -11,16 +8,23 @@ except ImportError:


 class KerasSimilarityShim(object):
+    entailment_types = ["entailment", "contradiction", "neutral"]
+
    @classmethod
-    def load(cls, path, nlp, get_features=None, max_length=100):
+    def load(cls, path, nlp, max_length=100, get_features=None):
+        
        if get_features is None:
            get_features = get_word_ids
+            
        with (path / 'config.json').open() as file_:
            model = model_from_json(file_.read())
        with (path / 'model').open('rb') as file_:
            weights = pickle.load(file_)
+            
        embeddings = get_embeddings(nlp.vocab)
-        model.set_weights([embeddings] + weights)
+        weights.insert(1, embeddings)
+        model.set_weights(weights)
+
        return cls(model, get_features=get_features, max_length=max_length)

    def __init__(self, model, get_features=None, max_length=100):
@ -32,58 +36,42 @@ class KerasSimilarityShim(object):
        doc.user_hooks['similarity'] = self.predict
        doc.user_span_hooks['similarity'] = self.predict

+        return doc
+
    def predict(self, doc1, doc2):
-        x1 = self.get_features([doc1], max_length=self.max_length, tree_truncate=True)
-        x2 = self.get_features([doc2], max_length=self.max_length, tree_truncate=True)
+        x1 = self.get_features([doc1], max_length=self.max_length)
+        x2 = self.get_features([doc2], max_length=self.max_length)
        scores = self.model.predict([x1, x2])
-        return scores[0]
+
+        return self.entailment_types[scores.argmax()], scores.max()


 def get_embeddings(vocab, nr_unk=100):
-    nr_vector = max(lex.rank for lex in vocab) + 1
-    vectors = numpy.zeros((nr_vector+nr_unk+2, vocab.vectors_length), dtype='float32')
+    # the extra +1 is for a zero vector representing sentence-final padding
+    num_vectors = max(lex.rank for lex in vocab) + 2 
+    
+    # create random vectors for OOV tokens
+    oov = np.random.normal(size=(nr_unk, vocab.vectors_length))
+    oov = oov / oov.sum(axis=1, keepdims=True)
+    
+    vectors = np.zeros((num_vectors + nr_unk, vocab.vectors_length), dtype='float32')
+    vectors[1:(nr_unk + 1), ] = oov
    for lex in vocab:
-        if lex.has_vector:
-            vectors[lex.rank+1] = lex.vector / lex.vector_norm
+        if lex.has_vector and lex.vector_norm > 0:
+            vectors[nr_unk + lex.rank + 1] = lex.vector / lex.vector_norm 
+
    return vectors


-def get_word_ids(docs, rnn_encode=False, tree_truncate=False, max_length=100, nr_unk=100):
-    Xs = numpy.zeros((len(docs), max_length), dtype='int32')
+def get_word_ids(docs, max_length=100, nr_unk=100):
+    Xs = np.zeros((len(docs), max_length), dtype='int32')
+    
    for i, doc in enumerate(docs):
-        if tree_truncate:
-            if isinstance(doc, Span):
-                queue = [doc.root]
-            else:
-                queue = [sent.root for sent in doc.sents]
-        else:
-            queue = list(doc)
-        words = []
-        while len(words) <= max_length and queue:
-            word = queue.pop(0)
-            if rnn_encode or (not word.is_punct and not word.is_space):
-                words.append(word)
-            if tree_truncate:
-                queue.extend(list(word.lefts))
-                queue.extend(list(word.rights))
-        words.sort()
-        for j, token in enumerate(words):
-            if token.has_vector:
-                Xs[i, j] = token.rank+1
-            else:
-                Xs[i, j] = (token.shape % (nr_unk-1))+2
-            j += 1
-            if j >= max_length:
+        for j, token in enumerate(doc):
+            if j == max_length:
                break
+            if token.has_vector:
+                Xs[i, j] = token.rank + nr_unk + 1
            else:
-            Xs[i, len(words)] = 1
+                Xs[i, j] = token.rank % nr_unk + 1
    return Xs
-
-
-def create_similarity_pipeline(nlp, max_length=100):
-    return [
-        nlp.tagger,
-        nlp.entity,
-        nlp.parser,
-        KerasSimilarityShim.load(nlp.path / 'similarity', nlp, max_length)
-    ]
--- a/examples/notebooks/Decompositional
+++ b/examples/notebooks/Decompositional
@ -0,0 +1,955 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Natural language inference using spaCy and Keras"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Introduction"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This notebook details an implementation of the natural language inference model presented in [(Parikh et al, 2016)](https://arxiv.org/abs/1606.01933).  The model is notable for the small number of paramaters *and hyperparameters* it specifices, while still yielding good performance."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Constructing the dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import spacy\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We only need the GloVe vectors from spaCy, not a full NLP pipeline."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nlp = spacy.load('en_vectors_web_lg')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Function to load the SNLI dataset.  The categories are converted to one-shot representation.  The function comes from an example in spaCy."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/jds/tensorflow-gpu/lib/python3.5/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
+      "  from ._conv import register_converters as _register_converters\n",
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import json\n",
+    "from keras.utils import to_categorical\n",
+    "\n",
+    "LABELS = {'entailment': 0, 'contradiction': 1, 'neutral': 2}\n",
+    "def read_snli(path):\n",
+    "    texts1 = []\n",
+    "    texts2 = []\n",
+    "    labels = []\n",
+    "    with open(path, 'r') as file_:\n",
+    "        for line in file_:\n",
+    "            eg = json.loads(line)\n",
+    "            label = eg['gold_label']\n",
+    "            if label == '-':  # per Parikh, ignore - SNLI entries\n",
+    "                continue\n",
+    "            texts1.append(eg['sentence1'])\n",
+    "            texts2.append(eg['sentence2'])\n",
+    "            labels.append(LABELS[label])\n",
+    "    return texts1, texts2, to_categorical(np.asarray(labels, dtype='int32'))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Because Keras can do the train/test split for us, we'll load *all* SNLI triples from one file."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "texts,hypotheses,labels = read_snli('snli/snli_1.0_train.jsonl')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_dataset(nlp, texts, hypotheses, num_oov, max_length, norm_vectors = True):\n",
+    "    sents = texts + hypotheses\n",
+    "    \n",
+    "    # the extra +1 is for a zero vector represting NULL for padding\n",
+    "    num_vectors = max(lex.rank for lex in nlp.vocab) + 2 \n",
+    "    \n",
+    "    # create random vectors for OOV tokens\n",
+    "    oov = np.random.normal(size=(num_oov, nlp.vocab.vectors_length))\n",
+    "    oov = oov / oov.sum(axis=1, keepdims=True)\n",
+    "    \n",
+    "    vectors = np.zeros((num_vectors + num_oov, nlp.vocab.vectors_length), dtype='float32')\n",
+    "    vectors[num_vectors:, ] = oov\n",
+    "    for lex in nlp.vocab:\n",
+    "        if lex.has_vector and lex.vector_norm > 0:\n",
+    "            vectors[lex.rank + 1] = lex.vector / lex.vector_norm if norm_vectors == True else lex.vector\n",
+    "            \n",
+    "    sents_as_ids = []\n",
+    "    for sent in sents:\n",
+    "        doc = nlp(sent)\n",
+    "        word_ids = []\n",
+    "        \n",
+    "        for i, token in enumerate(doc):\n",
+    "            # skip odd spaces from tokenizer\n",
+    "            if token.has_vector and token.vector_norm == 0:\n",
+    "                continue\n",
+    "                \n",
+    "            if i > max_length:\n",
+    "                break\n",
+    "                \n",
+    "            if token.has_vector:\n",
+    "                word_ids.append(token.rank + 1)\n",
+    "            else:\n",
+    "                # if we don't have a vector, pick an OOV entry\n",
+    "                word_ids.append(token.rank % num_oov + num_vectors) \n",
+    "                \n",
+    "        # there must be a simpler way of generating padded arrays from lists...\n",
+    "        word_id_vec = np.zeros((max_length), dtype='int')\n",
+    "        clipped_len = min(max_length, len(word_ids))\n",
+    "        word_id_vec[:clipped_len] = word_ids[:clipped_len]\n",
+    "        sents_as_ids.append(word_id_vec)\n",
+    "        \n",
+    "        \n",
+    "    return vectors, np.array(sents_as_ids[:len(texts)]), np.array(sents_as_ids[len(texts):])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sem_vectors, text_vectors, hypothesis_vectors = create_dataset(nlp, texts, hypotheses, 100, 50, True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "texts_test,hypotheses_test,labels_test = read_snli('snli/snli_1.0_test.jsonl')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "_, text_vectors_test, hypothesis_vectors_test = create_dataset(nlp, texts_test, hypotheses_test, 100, 50, True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We use spaCy to tokenize the sentences and return, when available, a semantic vector for each token.  \n",
+    "\n",
+    "OOV terms (tokens for which no semantic vector is available) are assigned to one of a set of randomly-generated OOV vectors, per (Parikh et al, 2016).\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that we will clip sentences to 50 words maximum."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from keras import layers, Model, models\n",
+    "from keras import backend as K"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Building the model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The embedding layer copies the 300-dimensional GloVe vectors into GPU memory.  Per (Parikh et al, 2016), the vectors, which are not adapted during training, are projected down to lower-dimensional vectors using a trained projection matrix."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_embedding(vectors, max_length, projected_dim):\n",
+    "    return models.Sequential([\n",
+    "        layers.Embedding(\n",
+    "            vectors.shape[0],\n",
+    "            vectors.shape[1],\n",
+    "            input_length=max_length,\n",
+    "            weights=[vectors],\n",
+    "            trainable=False),\n",
+    "        \n",
+    "        layers.TimeDistributed(\n",
+    "            layers.Dense(projected_dim,\n",
+    "                         activation=None,\n",
+    "                         use_bias=False))\n",
+    "    ])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The Parikh model makes use of three feedforward blocks that construct nonlinear combinations of their input.  Each block contains two ReLU layers and two dropout layers."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_feedforward(num_units=200, activation='relu', dropout_rate=0.2):\n",
+    "    return models.Sequential([\n",
+    "        layers.Dense(num_units, activation=activation),\n",
+    "        layers.Dropout(dropout_rate),\n",
+    "        layers.Dense(num_units, activation=activation),\n",
+    "        layers.Dropout(dropout_rate)\n",
+    "    ])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The basic idea of the (Parikh et al, 2016) model is to:\n",
+    "\n",
+    "1.  *Align*: Construct an alignment of subphrases in the text and hypothesis using an attention-like mechanism, called \"decompositional\" because the layer is applied to each of the two sentences individually rather than to their product.  The dot product of the nonlinear transformations of the inputs is then normalized vertically and horizontally to yield a pair of \"soft\" alignment structures, from text->hypothesis and hypothesis->text.  Concretely, for each word in one sentence, a multinomial distribution is computed over the words of the other sentence, by learning a multinomial logistic with softmax target.\n",
+    "2.  *Compare*: Each word is now compared to its aligned phrase using a function modeled as a two-layer feedforward ReLU network.  The output is a high-dimensional representation of the strength of association between word and aligned phrase.\n",
+    "3.  *Aggregate*: The comparison vectors are summed, separately, for the text and the hypothesis.  The result is two vectors: one that describes the degree of association of the text to the hypothesis, and the second, of the hypothesis to the text.\n",
+    "4.  Finally, these two vectors are processed by a dense layer followed by a softmax classifier, as usual.\n",
+    "\n",
+    "Note that because in entailment the truth conditions of the consequent must be a subset of those of the antecedent, it is not obvious that we need both vectors in step (3).  Entailment is not symmetric.  It may be enough to just use the hypothesis->text vector.  We will explore this possibility later."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We need a couple of little functions for Lambda layers to normalize and aggregate weights:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def normalizer(axis):\n",
+    "    def _normalize(att_weights):\n",
+    "        exp_weights = K.exp(att_weights)\n",
+    "        sum_weights = K.sum(exp_weights, axis=axis, keepdims=True)\n",
+    "        return exp_weights/sum_weights\n",
+    "    return _normalize\n",
+    "\n",
+    "def sum_word(x):\n",
+    "    return K.sum(x, axis=1)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def build_model(vectors, max_length, num_hidden, num_classes, projected_dim, entail_dir='both'):\n",
+    "    input1 = layers.Input(shape=(max_length,), dtype='int32', name='words1')\n",
+    "    input2 = layers.Input(shape=(max_length,), dtype='int32', name='words2')\n",
+    "    \n",
+    "    # embeddings (projected)\n",
+    "    embed = create_embedding(vectors, max_length, projected_dim)\n",
+    "   \n",
+    "    a = embed(input1)\n",
+    "    b = embed(input2)\n",
+    "    \n",
+    "    # step 1: attend\n",
+    "    F = create_feedforward(num_hidden)\n",
+    "    att_weights = layers.dot([F(a), F(b)], axes=-1)\n",
+    "    \n",
+    "    G = create_feedforward(num_hidden)\n",
+    "    \n",
+    "    if entail_dir == 'both':\n",
+    "        norm_weights_a = layers.Lambda(normalizer(1))(att_weights)\n",
+    "        norm_weights_b = layers.Lambda(normalizer(2))(att_weights)\n",
+    "        alpha = layers.dot([norm_weights_a, a], axes=1)\n",
+    "        beta  = layers.dot([norm_weights_b, b], axes=1)\n",
+    "\n",
+    "        # step 2: compare\n",
+    "        comp1 = layers.concatenate([a, beta])\n",
+    "        comp2 = layers.concatenate([b, alpha])\n",
+    "        v1 = layers.TimeDistributed(G)(comp1)\n",
+    "        v2 = layers.TimeDistributed(G)(comp2)\n",
+    "\n",
+    "        # step 3: aggregate\n",
+    "        v1_sum = layers.Lambda(sum_word)(v1)\n",
+    "        v2_sum = layers.Lambda(sum_word)(v2)\n",
+    "        concat = layers.concatenate([v1_sum, v2_sum])\n",
+    "    elif entail_dir == 'left':\n",
+    "        norm_weights_a = layers.Lambda(normalizer(1))(att_weights)\n",
+    "        alpha = layers.dot([norm_weights_a, a], axes=1)\n",
+    "        comp2 = layers.concatenate([b, alpha])\n",
+    "        v2 = layers.TimeDistributed(G)(comp2)\n",
+    "        v2_sum = layers.Lambda(sum_word)(v2)\n",
+    "        concat = v2_sum\n",
+    "    else:\n",
+    "        norm_weights_b = layers.Lambda(normalizer(2))(att_weights)\n",
+    "        beta  = layers.dot([norm_weights_b, b], axes=1)\n",
+    "        comp1 = layers.concatenate([a, beta])\n",
+    "        v1 = layers.TimeDistributed(G)(comp1)\n",
+    "        v1_sum = layers.Lambda(sum_word)(v1)\n",
+    "        concat = v1_sum\n",
+    "    \n",
+    "    H = create_feedforward(num_hidden)\n",
+    "    out = H(concat)\n",
+    "    out = layers.Dense(num_classes, activation='softmax')(out)\n",
+    "    \n",
+    "    model = Model([input1, input2], out)\n",
+    "    \n",
+    "    model.compile(optimizer='adam',\n",
+    "                  loss='categorical_crossentropy',\n",
+    "                  metrics=['accuracy'])\n",
+    "    return model\n",
+    "    \n",
+    "    \n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "__________________________________________________________________________________________________\n",
+      "Layer (type)                    Output Shape         Param #     Connected to                     \n",
+      "==================================================================================================\n",
+      "words1 (InputLayer)             (None, 50)           0                                            \n",
+      "__________________________________________________________________________________________________\n",
+      "words2 (InputLayer)             (None, 50)           0                                            \n",
+      "__________________________________________________________________________________________________\n",
+      "sequential_1 (Sequential)       (None, 50, 200)      321381600   words1[0][0]                     \n",
+      "                                                                 words2[0][0]                     \n",
+      "__________________________________________________________________________________________________\n",
+      "sequential_2 (Sequential)       (None, 50, 200)      80400       sequential_1[1][0]               \n",
+      "                                                                 sequential_1[2][0]               \n",
+      "__________________________________________________________________________________________________\n",
+      "dot_1 (Dot)                     (None, 50, 50)       0           sequential_2[1][0]               \n",
+      "                                                                 sequential_2[2][0]               \n",
+      "__________________________________________________________________________________________________\n",
+      "lambda_2 (Lambda)               (None, 50, 50)       0           dot_1[0][0]                      \n",
+      "__________________________________________________________________________________________________\n",
+      "lambda_1 (Lambda)               (None, 50, 50)       0           dot_1[0][0]                      \n",
+      "__________________________________________________________________________________________________\n",
+      "dot_3 (Dot)                     (None, 50, 200)      0           lambda_2[0][0]                   \n",
+      "                                                                 sequential_1[2][0]               \n",
+      "__________________________________________________________________________________________________\n",
+      "dot_2 (Dot)                     (None, 50, 200)      0           lambda_1[0][0]                   \n",
+      "                                                                 sequential_1[1][0]               \n",
+      "__________________________________________________________________________________________________\n",
+      "concatenate_1 (Concatenate)     (None, 50, 400)      0           sequential_1[1][0]               \n",
+      "                                                                 dot_3[0][0]                      \n",
+      "__________________________________________________________________________________________________\n",
+      "concatenate_2 (Concatenate)     (None, 50, 400)      0           sequential_1[2][0]               \n",
+      "                                                                 dot_2[0][0]                      \n",
+      "__________________________________________________________________________________________________\n",
+      "time_distributed_2 (TimeDistrib (None, 50, 200)      120400      concatenate_1[0][0]              \n",
+      "__________________________________________________________________________________________________\n",
+      "time_distributed_3 (TimeDistrib (None, 50, 200)      120400      concatenate_2[0][0]              \n",
+      "__________________________________________________________________________________________________\n",
+      "lambda_3 (Lambda)               (None, 200)          0           time_distributed_2[0][0]         \n",
+      "__________________________________________________________________________________________________\n",
+      "lambda_4 (Lambda)               (None, 200)          0           time_distributed_3[0][0]         \n",
+      "__________________________________________________________________________________________________\n",
+      "concatenate_3 (Concatenate)     (None, 400)          0           lambda_3[0][0]                   \n",
+      "                                                                 lambda_4[0][0]                   \n",
+      "__________________________________________________________________________________________________\n",
+      "sequential_4 (Sequential)       (None, 200)          120400      concatenate_3[0][0]              \n",
+      "__________________________________________________________________________________________________\n",
+      "dense_8 (Dense)                 (None, 3)            603         sequential_4[1][0]               \n",
+      "==================================================================================================\n",
+      "Total params: 321,703,403\n",
+      "Trainable params: 381,803\n",
+      "Non-trainable params: 321,321,600\n",
+      "__________________________________________________________________________________________________\n"
+     ]
+    }
+   ],
+   "source": [
+    "K.clear_session()\n",
+    "m = build_model(sem_vectors, 50, 200, 3, 200)\n",
+    "m.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The number of trainable parameters, ~381k, is the number given by Parikh et al, so we're on the right track."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Training the model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Parikh et al use tiny batches of 4, training for 50MM batches, which amounts to around 500 epochs.  Here we'll use large batches to better use the GPU, and train for fewer epochs -- for purposes of this experiment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train on 549367 samples, validate on 9824 samples\n",
+      "Epoch 1/50\n",
+      "549367/549367 [==============================] - 34s 62us/step - loss: 0.7599 - acc: 0.6617 - val_loss: 0.5396 - val_acc: 0.7861\n",
+      "Epoch 2/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.5611 - acc: 0.7763 - val_loss: 0.4892 - val_acc: 0.8085\n",
+      "Epoch 3/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.5212 - acc: 0.7948 - val_loss: 0.4574 - val_acc: 0.8261\n",
+      "Epoch 4/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4986 - acc: 0.8045 - val_loss: 0.4410 - val_acc: 0.8274\n",
+      "Epoch 5/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4819 - acc: 0.8114 - val_loss: 0.4224 - val_acc: 0.8383\n",
+      "Epoch 6/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4714 - acc: 0.8166 - val_loss: 0.4200 - val_acc: 0.8379\n",
+      "Epoch 7/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4633 - acc: 0.8203 - val_loss: 0.4098 - val_acc: 0.8457\n",
+      "Epoch 8/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4558 - acc: 0.8232 - val_loss: 0.4114 - val_acc: 0.8415\n",
+      "Epoch 9/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4508 - acc: 0.8250 - val_loss: 0.4062 - val_acc: 0.8477\n",
+      "Epoch 10/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4433 - acc: 0.8286 - val_loss: 0.3982 - val_acc: 0.8486\n",
+      "Epoch 11/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4388 - acc: 0.8307 - val_loss: 0.3953 - val_acc: 0.8497\n",
+      "Epoch 12/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4351 - acc: 0.8321 - val_loss: 0.3973 - val_acc: 0.8522\n",
+      "Epoch 13/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4309 - acc: 0.8342 - val_loss: 0.3939 - val_acc: 0.8539\n",
+      "Epoch 14/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4269 - acc: 0.8355 - val_loss: 0.3932 - val_acc: 0.8517\n",
+      "Epoch 15/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4247 - acc: 0.8369 - val_loss: 0.3938 - val_acc: 0.8515\n",
+      "Epoch 16/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4208 - acc: 0.8379 - val_loss: 0.3936 - val_acc: 0.8504\n",
+      "Epoch 17/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4194 - acc: 0.8390 - val_loss: 0.3885 - val_acc: 0.8560\n",
+      "Epoch 18/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4162 - acc: 0.8402 - val_loss: 0.3874 - val_acc: 0.8561\n",
+      "Epoch 19/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4140 - acc: 0.8409 - val_loss: 0.3889 - val_acc: 0.8545\n",
+      "Epoch 20/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4114 - acc: 0.8426 - val_loss: 0.3864 - val_acc: 0.8583\n",
+      "Epoch 21/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4092 - acc: 0.8430 - val_loss: 0.3870 - val_acc: 0.8561\n",
+      "Epoch 22/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4062 - acc: 0.8442 - val_loss: 0.3852 - val_acc: 0.8577\n",
+      "Epoch 23/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4050 - acc: 0.8450 - val_loss: 0.3850 - val_acc: 0.8578\n",
+      "Epoch 24/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4035 - acc: 0.8455 - val_loss: 0.3825 - val_acc: 0.8555\n",
+      "Epoch 25/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.4018 - acc: 0.8460 - val_loss: 0.3837 - val_acc: 0.8573\n",
+      "Epoch 26/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3989 - acc: 0.8476 - val_loss: 0.3843 - val_acc: 0.8599\n",
+      "Epoch 27/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3979 - acc: 0.8481 - val_loss: 0.3841 - val_acc: 0.8589\n",
+      "Epoch 28/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3967 - acc: 0.8484 - val_loss: 0.3811 - val_acc: 0.8575\n",
+      "Epoch 29/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3956 - acc: 0.8492 - val_loss: 0.3829 - val_acc: 0.8589\n",
+      "Epoch 30/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3938 - acc: 0.8499 - val_loss: 0.3859 - val_acc: 0.8562\n",
+      "Epoch 31/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3925 - acc: 0.8500 - val_loss: 0.3798 - val_acc: 0.8587\n",
+      "Epoch 32/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3906 - acc: 0.8509 - val_loss: 0.3834 - val_acc: 0.8569\n",
+      "Epoch 33/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3893 - acc: 0.8511 - val_loss: 0.3806 - val_acc: 0.8588\n",
+      "Epoch 34/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3885 - acc: 0.8515 - val_loss: 0.3828 - val_acc: 0.8603\n",
+      "Epoch 35/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3879 - acc: 0.8520 - val_loss: 0.3800 - val_acc: 0.8594\n",
+      "Epoch 36/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3860 - acc: 0.8530 - val_loss: 0.3796 - val_acc: 0.8577\n",
+      "Epoch 37/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3856 - acc: 0.8532 - val_loss: 0.3857 - val_acc: 0.8591\n",
+      "Epoch 38/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3838 - acc: 0.8535 - val_loss: 0.3835 - val_acc: 0.8603\n",
+      "Epoch 39/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3830 - acc: 0.8543 - val_loss: 0.3830 - val_acc: 0.8599\n",
+      "Epoch 40/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3818 - acc: 0.8548 - val_loss: 0.3832 - val_acc: 0.8559\n",
+      "Epoch 41/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3806 - acc: 0.8551 - val_loss: 0.3845 - val_acc: 0.8553\n",
+      "Epoch 42/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3803 - acc: 0.8550 - val_loss: 0.3789 - val_acc: 0.8617\n",
+      "Epoch 43/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3791 - acc: 0.8556 - val_loss: 0.3835 - val_acc: 0.8580\n",
+      "Epoch 44/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3778 - acc: 0.8565 - val_loss: 0.3799 - val_acc: 0.8580\n",
+      "Epoch 45/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3766 - acc: 0.8571 - val_loss: 0.3790 - val_acc: 0.8625\n",
+      "Epoch 46/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3770 - acc: 0.8569 - val_loss: 0.3820 - val_acc: 0.8590\n",
+      "Epoch 47/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3761 - acc: 0.8573 - val_loss: 0.3831 - val_acc: 0.8581\n",
+      "Epoch 48/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3739 - acc: 0.8579 - val_loss: 0.3828 - val_acc: 0.8599\n",
+      "Epoch 49/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3738 - acc: 0.8577 - val_loss: 0.3785 - val_acc: 0.8590\n",
+      "Epoch 50/50\n",
+      "549367/549367 [==============================] - 33s 60us/step - loss: 0.3726 - acc: 0.8580 - val_loss: 0.3820 - val_acc: 0.8585\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<keras.callbacks.History at 0x7f5c9f49c438>"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m.fit([text_vectors, hypothesis_vectors], labels, batch_size=1024, epochs=50,validation_data=([text_vectors_test, hypothesis_vectors_test], labels_test))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The result is broadly in the region reported by Parikh et al: ~86 vs 86.3%.  The small difference might be accounted by differences in `max_length` (here set at 50), in the training regime, and that here we use Keras' built-in validation splitting rather than the SNLI test set."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Experiment: the asymmetric model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "It was suggested earlier that, based on the semantics of entailment, the vector representing the strength of association between the hypothesis to the text is all that is needed for classifying the entailment.\n",
+    "\n",
+    "The following model removes consideration of the complementary vector (text to hypothesis) from the computation.  This will decrease the paramater count slightly, because the final dense layers will be smaller, and speed up the forward pass when predicting, because fewer calculations will be needed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "__________________________________________________________________________________________________\n",
+      "Layer (type)                    Output Shape         Param #     Connected to                     \n",
+      "==================================================================================================\n",
+      "words2 (InputLayer)             (None, 50)           0                                            \n",
+      "__________________________________________________________________________________________________\n",
+      "words1 (InputLayer)             (None, 50)           0                                            \n",
+      "__________________________________________________________________________________________________\n",
+      "sequential_5 (Sequential)       (None, 50, 200)      321381600   words1[0][0]                     \n",
+      "                                                                 words2[0][0]                     \n",
+      "__________________________________________________________________________________________________\n",
+      "sequential_6 (Sequential)       (None, 50, 200)      80400       sequential_5[1][0]               \n",
+      "                                                                 sequential_5[2][0]               \n",
+      "__________________________________________________________________________________________________\n",
+      "dot_4 (Dot)                     (None, 50, 50)       0           sequential_6[1][0]               \n",
+      "                                                                 sequential_6[2][0]               \n",
+      "__________________________________________________________________________________________________\n",
+      "lambda_5 (Lambda)               (None, 50, 50)       0           dot_4[0][0]                      \n",
+      "__________________________________________________________________________________________________\n",
+      "dot_5 (Dot)                     (None, 50, 200)      0           lambda_5[0][0]                   \n",
+      "                                                                 sequential_5[1][0]               \n",
+      "__________________________________________________________________________________________________\n",
+      "concatenate_4 (Concatenate)     (None, 50, 400)      0           sequential_5[2][0]               \n",
+      "                                                                 dot_5[0][0]                      \n",
+      "__________________________________________________________________________________________________\n",
+      "time_distributed_5 (TimeDistrib (None, 50, 200)      120400      concatenate_4[0][0]              \n",
+      "__________________________________________________________________________________________________\n",
+      "lambda_6 (Lambda)               (None, 200)          0           time_distributed_5[0][0]         \n",
+      "__________________________________________________________________________________________________\n",
+      "sequential_8 (Sequential)       (None, 200)          80400       lambda_6[0][0]                   \n",
+      "__________________________________________________________________________________________________\n",
+      "dense_16 (Dense)                (None, 3)            603         sequential_8[1][0]               \n",
+      "==================================================================================================\n",
+      "Total params: 321,663,403\n",
+      "Trainable params: 341,803\n",
+      "Non-trainable params: 321,321,600\n",
+      "__________________________________________________________________________________________________\n"
+     ]
+    }
+   ],
+   "source": [
+    "m1 = build_model(sem_vectors, 50, 200, 3, 200, 'left')\n",
+    "m1.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The parameter count has indeed decreased by 40,000, corresponding to the 200x200 smaller H function."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train on 549367 samples, validate on 9824 samples\n",
+      "Epoch 1/50\n",
+      "549367/549367 [==============================] - 25s 46us/step - loss: 0.7331 - acc: 0.6770 - val_loss: 0.5257 - val_acc: 0.7936\n",
+      "Epoch 2/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.5518 - acc: 0.7799 - val_loss: 0.4717 - val_acc: 0.8159\n",
+      "Epoch 3/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.5147 - acc: 0.7967 - val_loss: 0.4449 - val_acc: 0.8278\n",
+      "Epoch 4/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.4948 - acc: 0.8060 - val_loss: 0.4326 - val_acc: 0.8344\n",
+      "Epoch 5/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.4814 - acc: 0.8122 - val_loss: 0.4247 - val_acc: 0.8359\n",
+      "Epoch 6/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.4712 - acc: 0.8162 - val_loss: 0.4143 - val_acc: 0.8430\n",
+      "Epoch 7/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.4635 - acc: 0.8205 - val_loss: 0.4172 - val_acc: 0.8401\n",
+      "Epoch 8/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.4570 - acc: 0.8223 - val_loss: 0.4106 - val_acc: 0.8422\n",
+      "Epoch 9/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.4505 - acc: 0.8259 - val_loss: 0.4043 - val_acc: 0.8451\n",
+      "Epoch 10/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.4459 - acc: 0.8280 - val_loss: 0.4050 - val_acc: 0.8467\n",
+      "Epoch 11/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.4405 - acc: 0.8300 - val_loss: 0.3975 - val_acc: 0.8481\n",
+      "Epoch 12/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.4360 - acc: 0.8324 - val_loss: 0.4026 - val_acc: 0.8496\n",
+      "Epoch 13/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.4327 - acc: 0.8334 - val_loss: 0.4024 - val_acc: 0.8471\n",
+      "Epoch 14/50\n",
+      "549367/549367 [==============================] - 24s 45us/step - loss: 0.4293 - acc: 0.8350 - val_loss: 0.3955 - val_acc: 0.8496\n",
+      "Epoch 15/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.4263 - acc: 0.8369 - val_loss: 0.3980 - val_acc: 0.8490\n",
+      "Epoch 16/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.4236 - acc: 0.8377 - val_loss: 0.3958 - val_acc: 0.8496\n",
+      "Epoch 17/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.4213 - acc: 0.8384 - val_loss: 0.3954 - val_acc: 0.8496\n",
+      "Epoch 18/50\n",
+      "549367/549367 [==============================] - 24s 45us/step - loss: 0.4187 - acc: 0.8394 - val_loss: 0.3929 - val_acc: 0.8514\n",
+      "Epoch 19/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.4157 - acc: 0.8409 - val_loss: 0.3939 - val_acc: 0.8507\n",
+      "Epoch 20/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.4135 - acc: 0.8417 - val_loss: 0.3953 - val_acc: 0.8522\n",
+      "Epoch 21/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.4122 - acc: 0.8424 - val_loss: 0.3974 - val_acc: 0.8506\n",
+      "Epoch 22/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.4099 - acc: 0.8435 - val_loss: 0.3918 - val_acc: 0.8522\n",
+      "Epoch 23/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.4075 - acc: 0.8443 - val_loss: 0.3901 - val_acc: 0.8513\n",
+      "Epoch 24/50\n",
+      "549367/549367 [==============================] - 24s 44us/step - loss: 0.4067 - acc: 0.8447 - val_loss: 0.3885 - val_acc: 0.8543\n",
+      "Epoch 25/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.4047 - acc: 0.8454 - val_loss: 0.3846 - val_acc: 0.8531\n",
+      "Epoch 26/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.4031 - acc: 0.8461 - val_loss: 0.3864 - val_acc: 0.8562\n",
+      "Epoch 27/50\n",
+      "549367/549367 [==============================] - 24s 45us/step - loss: 0.4020 - acc: 0.8467 - val_loss: 0.3874 - val_acc: 0.8546\n",
+      "Epoch 28/50\n",
+      "549367/549367 [==============================] - 24s 45us/step - loss: 0.4001 - acc: 0.8473 - val_loss: 0.3848 - val_acc: 0.8534\n",
+      "Epoch 29/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.3991 - acc: 0.8479 - val_loss: 0.3865 - val_acc: 0.8562\n",
+      "Epoch 30/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.3976 - acc: 0.8484 - val_loss: 0.3833 - val_acc: 0.8574\n",
+      "Epoch 31/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.3961 - acc: 0.8487 - val_loss: 0.3846 - val_acc: 0.8585\n",
+      "Epoch 32/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.3942 - acc: 0.8498 - val_loss: 0.3805 - val_acc: 0.8573\n",
+      "Epoch 33/50\n",
+      "549367/549367 [==============================] - 24s 44us/step - loss: 0.3935 - acc: 0.8503 - val_loss: 0.3856 - val_acc: 0.8579\n",
+      "Epoch 34/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.3923 - acc: 0.8507 - val_loss: 0.3829 - val_acc: 0.8560\n",
+      "Epoch 35/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.3920 - acc: 0.8508 - val_loss: 0.3864 - val_acc: 0.8575\n",
+      "Epoch 36/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.3907 - acc: 0.8516 - val_loss: 0.3873 - val_acc: 0.8563\n",
+      "Epoch 37/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.3891 - acc: 0.8519 - val_loss: 0.3850 - val_acc: 0.8570\n",
+      "Epoch 38/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.3872 - acc: 0.8522 - val_loss: 0.3815 - val_acc: 0.8591\n",
+      "Epoch 39/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.3887 - acc: 0.8520 - val_loss: 0.3829 - val_acc: 0.8590\n",
+      "Epoch 40/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.3868 - acc: 0.8531 - val_loss: 0.3807 - val_acc: 0.8600\n",
+      "Epoch 41/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.3859 - acc: 0.8537 - val_loss: 0.3832 - val_acc: 0.8574\n",
+      "Epoch 42/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.3849 - acc: 0.8537 - val_loss: 0.3850 - val_acc: 0.8576\n",
+      "Epoch 43/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.3834 - acc: 0.8541 - val_loss: 0.3825 - val_acc: 0.8563\n",
+      "Epoch 44/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.3829 - acc: 0.8548 - val_loss: 0.3844 - val_acc: 0.8540\n",
+      "Epoch 45/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.3816 - acc: 0.8552 - val_loss: 0.3841 - val_acc: 0.8559\n",
+      "Epoch 46/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.3816 - acc: 0.8549 - val_loss: 0.3880 - val_acc: 0.8567\n",
+      "Epoch 47/50\n",
+      "549367/549367 [==============================] - 24s 45us/step - loss: 0.3799 - acc: 0.8559 - val_loss: 0.3767 - val_acc: 0.8635\n",
+      "Epoch 48/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.3800 - acc: 0.8560 - val_loss: 0.3786 - val_acc: 0.8563\n",
+      "Epoch 49/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.3781 - acc: 0.8563 - val_loss: 0.3812 - val_acc: 0.8596\n",
+      "Epoch 50/50\n",
+      "549367/549367 [==============================] - 25s 45us/step - loss: 0.3788 - acc: 0.8560 - val_loss: 0.3782 - val_acc: 0.8601\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<keras.callbacks.History at 0x7f5ca1bf3e48>"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m1.fit([text_vectors, hypothesis_vectors], labels, batch_size=1024, epochs=50,validation_data=([text_vectors_test, hypothesis_vectors_test], labels_test))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This model performs the same as the slightly more complex model that evaluates alignments in both directions.  Note also that processing time is improved, from 64 down to 48 microseconds per step. \n",
+    "\n",
+    "Let's now look at an asymmetric model that evaluates text to hypothesis comparisons.  The prediction is that such a model will correctly classify a decent proportion of the exemplars, but not as accurately as the previous two.\n",
+    "\n",
+    "We'll just use 10 epochs for expediency."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "__________________________________________________________________________________________________\n",
+      "Layer (type)                    Output Shape         Param #     Connected to                     \n",
+      "==================================================================================================\n",
+      "words1 (InputLayer)             (None, 50)           0                                            \n",
+      "__________________________________________________________________________________________________\n",
+      "words2 (InputLayer)             (None, 50)           0                                            \n",
+      "__________________________________________________________________________________________________\n",
+      "sequential_13 (Sequential)      (None, 50, 200)      321381600   words1[0][0]                     \n",
+      "                                                                 words2[0][0]                     \n",
+      "__________________________________________________________________________________________________\n",
+      "sequential_14 (Sequential)      (None, 50, 200)      80400       sequential_13[1][0]              \n",
+      "                                                                 sequential_13[2][0]              \n",
+      "__________________________________________________________________________________________________\n",
+      "dot_8 (Dot)                     (None, 50, 50)       0           sequential_14[1][0]              \n",
+      "                                                                 sequential_14[2][0]              \n",
+      "__________________________________________________________________________________________________\n",
+      "lambda_9 (Lambda)               (None, 50, 50)       0           dot_8[0][0]                      \n",
+      "__________________________________________________________________________________________________\n",
+      "dot_9 (Dot)                     (None, 50, 200)      0           lambda_9[0][0]                   \n",
+      "                                                                 sequential_13[2][0]              \n",
+      "__________________________________________________________________________________________________\n",
+      "concatenate_6 (Concatenate)     (None, 50, 400)      0           sequential_13[1][0]              \n",
+      "                                                                 dot_9[0][0]                      \n",
+      "__________________________________________________________________________________________________\n",
+      "time_distributed_9 (TimeDistrib (None, 50, 200)      120400      concatenate_6[0][0]              \n",
+      "__________________________________________________________________________________________________\n",
+      "lambda_10 (Lambda)              (None, 200)          0           time_distributed_9[0][0]         \n",
+      "__________________________________________________________________________________________________\n",
+      "sequential_16 (Sequential)      (None, 200)          80400       lambda_10[0][0]                  \n",
+      "__________________________________________________________________________________________________\n",
+      "dense_32 (Dense)                (None, 3)            603         sequential_16[1][0]              \n",
+      "==================================================================================================\n",
+      "Total params: 321,663,403\n",
+      "Trainable params: 341,803\n",
+      "Non-trainable params: 321,321,600\n",
+      "__________________________________________________________________________________________________\n"
+     ]
+    }
+   ],
+   "source": [
+    "m2 = build_model(sem_vectors, 50, 200, 3, 200, 'right')\n",
+    "m2.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 97,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train on 455226 samples, validate on 113807 samples\n",
+      "Epoch 1/10\n",
+      "455226/455226 [==============================] - 22s 49us/step - loss: 0.8920 - acc: 0.5771 - val_loss: 0.8001 - val_acc: 0.6435\n",
+      "Epoch 2/10\n",
+      "455226/455226 [==============================] - 22s 47us/step - loss: 0.7808 - acc: 0.6553 - val_loss: 0.7267 - val_acc: 0.6855\n",
+      "Epoch 3/10\n",
+      "455226/455226 [==============================] - 22s 47us/step - loss: 0.7329 - acc: 0.6825 - val_loss: 0.6966 - val_acc: 0.7006\n",
+      "Epoch 4/10\n",
+      "455226/455226 [==============================] - 22s 47us/step - loss: 0.7055 - acc: 0.6978 - val_loss: 0.6713 - val_acc: 0.7150\n",
+      "Epoch 5/10\n",
+      "455226/455226 [==============================] - 22s 47us/step - loss: 0.6862 - acc: 0.7081 - val_loss: 0.6533 - val_acc: 0.7253\n",
+      "Epoch 6/10\n",
+      "455226/455226 [==============================] - 21s 47us/step - loss: 0.6694 - acc: 0.7179 - val_loss: 0.6472 - val_acc: 0.7277\n",
+      "Epoch 7/10\n",
+      "455226/455226 [==============================] - 22s 47us/step - loss: 0.6555 - acc: 0.7252 - val_loss: 0.6338 - val_acc: 0.7347\n",
+      "Epoch 8/10\n",
+      "455226/455226 [==============================] - 22s 48us/step - loss: 0.6434 - acc: 0.7310 - val_loss: 0.6246 - val_acc: 0.7385\n",
+      "Epoch 9/10\n",
+      "455226/455226 [==============================] - 22s 47us/step - loss: 0.6325 - acc: 0.7367 - val_loss: 0.6164 - val_acc: 0.7424\n",
+      "Epoch 10/10\n",
+      "455226/455226 [==============================] - 22s 47us/step - loss: 0.6216 - acc: 0.7426 - val_loss: 0.6082 - val_acc: 0.7478\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<keras.callbacks.History at 0x7fa6850cf080>"
+      ]
+     },
+     "execution_count": 97,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m2.fit([text_vectors, hypothesis_vectors], labels, batch_size=1024, epochs=10,validation_split=.2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Comparing this fit to the validation accuracy of the previous two models after 10 epochs, we observe that its accuracy is roughly 10% lower.\n",
+    "\n",
+    "It is reassuring that the neural modeling here reproduces what we know from the semantics of natural language!"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/examples/pipeline/custom_attr_methods.py
+++ b/examples/pipeline/custom_attr_methods.py
@ -19,39 +19,40 @@ from pathlib import Path


@plac.annotations(
-    output_dir=("Output directory for saved HTML", "positional", None, Path))
+    output_dir=("Output directory for saved HTML", "positional", None, Path)
+)
 def main(output_dir=None):
    nlp = English()  # start off with blank English class

-    Doc.set_extension('overlap', method=overlap_tokens)
-    doc1 = nlp(u"Peach emoji is where it has always been.")
-    doc2 = nlp(u"Peach is the superior emoji.")
+    Doc.set_extension("overlap", method=overlap_tokens)
+    doc1 = nlp("Peach emoji is where it has always been.")
+    doc2 = nlp("Peach is the superior emoji.")
    print("Text 1:", doc1.text)
    print("Text 2:", doc2.text)
    print("Overlapping tokens:", doc1._.overlap(doc2))

-    Doc.set_extension('to_html', method=to_html)
-    doc = nlp(u"This is a sentence about Apple.")
+    Doc.set_extension("to_html", method=to_html)
+    doc = nlp("This is a sentence about Apple.")
    # add entity manually for demo purposes, to make it work without a model
-    doc.ents = [Span(doc, 5, 6, label=nlp.vocab.strings['ORG'])]
+    doc.ents = [Span(doc, 5, 6, label=nlp.vocab.strings["ORG"])]
    print("Text:", doc.text)
-    doc._.to_html(output=output_dir, style='ent')
+    doc._.to_html(output=output_dir, style="ent")


-def to_html(doc, output='/tmp', style='dep'):
+def to_html(doc, output="/tmp", style="dep"):
    """Doc method extension for saving the current state as a displaCy
    visualization.
    """
    # generate filename from first six non-punct tokens
-    file_name = '-'.join([w.text for w in doc[:6] if not w.is_punct]) + '.html'
+    file_name = "-".join([w.text for w in doc[:6] if not w.is_punct]) + ".html"
    html = displacy.render(doc, style=style, page=True)  # render markup
    if output is not None:
        output_path = Path(output)
        if not output_path.exists():
            output_path.mkdir()
        output_file = Path(output) / file_name
-        output_file.open('w', encoding='utf-8').write(html)  # save to file
-        print('Saved HTML to {}'.format(output_file))
+        output_file.open("w", encoding="utf-8").write(html)  # save to file
+        print("Saved HTML to {}".format(output_file))
    else:
        print(html)

@ -67,7 +68,7 @@ def overlap_tokens(doc, other_doc):
    return overlap


-if __name__ == '__main__':
+if __name__ == "__main__":
    plac.call(main)

    # Expected output:
--- a/examples/pipeline/custom_component_countries_api.py
+++ b/examples/pipeline/custom_component_countries_api.py
@ -26,14 +26,18 @@ def main():
    nlp = English()
    rest_countries = RESTCountriesComponent(nlp)  # initialise component
    nlp.add_pipe(rest_countries)  # add it to the pipeline
-    doc = nlp(u"Some text about Colombia and the Czech Republic")
-    print('Pipeline', nlp.pipe_names)  # pipeline contains component name
-    print('Doc has countries', doc._.has_country)  # Doc contains countries
+    doc = nlp("Some text about Colombia and the Czech Republic")
+    print("Pipeline", nlp.pipe_names)  # pipeline contains component name
+    print("Doc has countries", doc._.has_country)  # Doc contains countries
    for token in doc:
        if token._.is_country:
-            print(token.text, token._.country_capital, token._.country_latlng,
-                token._.country_flag)  # country data
-    print('Entities', [(e.text, e.label_) for e in doc.ents])  # entities
+            print(
+                token.text,
+                token._.country_capital,
+                token._.country_latlng,
+                token._.country_flag,
+            )  # country data
+    print("Entities", [(e.text, e.label_) for e in doc.ents])  # entities


 class RESTCountriesComponent(object):
@ -41,42 +45,42 @@ class RESTCountriesComponent(object):
    the REST Countries API, merges country names into one token, assigns entity
    labels and sets attributes on country tokens.
    """
-    name = 'rest_countries' # component name, will show up in the pipeline

-    def __init__(self, nlp, label='GPE'):
+    name = "rest_countries"  # component name, will show up in the pipeline
+
+    def __init__(self, nlp, label="GPE"):
        """Initialise the pipeline component. The shared nlp instance is used
        to initialise the matcher with the shared vocab, get the label ID and
        generate Doc objects as phrase match patterns.
        """
        # Make request once on initialisation and store the data
-        r = requests.get('https://restcountries.eu/rest/v2/all')
+        r = requests.get("https://restcountries.eu/rest/v2/all")
        r.raise_for_status()  # make sure requests raises an error if it fails
        countries = r.json()

        # Convert API response to dict keyed by country name for easy lookup
        # This could also be extended using the alternative and foreign language
        # names provided by the API
-        self.countries = {c['name']: c for c in countries}
+        self.countries = {c["name"]: c for c in countries}
        self.label = nlp.vocab.strings[label]  # get entity label ID

        # Set up the PhraseMatcher with Doc patterns for each country name
        patterns = [nlp(c) for c in self.countries.keys()]
        self.matcher = PhraseMatcher(nlp.vocab)
-        self.matcher.add('COUNTRIES', None, *patterns)
+        self.matcher.add("COUNTRIES", None, *patterns)

        # Register attribute on the Token. We'll be overwriting this based on
        # the matches, so we're only setting a default value, not a getter.
        # If no default value is set, it defaults to None.
-        Token.set_extension('is_country', default=False)
-        Token.set_extension('country_capital', default=False)
-        Token.set_extension('country_latlng', default=False)
-        Token.set_extension('country_flag', default=False)
+        Token.set_extension("is_country", default=False)
+        Token.set_extension("country_capital", default=False)
+        Token.set_extension("country_latlng", default=False)
+        Token.set_extension("country_flag", default=False)

        # Register attributes on Doc and Span via a getter that checks if one of
        # the contained tokens is set to is_country == True.
-        Doc.set_extension('has_country', getter=self.has_country)
-        Span.set_extension('has_country', getter=self.has_country)
-
+        Doc.set_extension("has_country", getter=self.has_country)
+        Span.set_extension("has_country", getter=self.has_country)

    def __call__(self, doc):
        """Apply the pipeline component on a Doc object and modify it if matches
@ -93,10 +97,10 @@ class RESTCountriesComponent(object):
            # Can be extended with other data returned by the API, like
            # currencies, country code, flag, calling code etc.
            for token in entity:
-                token._.set('is_country', True)
-                token._.set('country_capital', self.countries[entity.text]['capital'])
-                token._.set('country_latlng', self.countries[entity.text]['latlng'])
-                token._.set('country_flag', self.countries[entity.text]['flag'])
+                token._.set("is_country", True)
+                token._.set("country_capital", self.countries[entity.text]["capital"])
+                token._.set("country_latlng", self.countries[entity.text]["latlng"])
+                token._.set("country_flag", self.countries[entity.text]["flag"])
            # Overwrite doc.ents and add entity – be careful not to replace!
            doc.ents = list(doc.ents) + [entity]
        for span in spans:
@ -111,10 +115,10 @@ class RESTCountriesComponent(object):
        is a country. Since the getter is only called when we access the
        attribute, we can refer to the Token's 'is_country' attribute here,
        which is already set in the processing step."""
-        return any([t._.get('is_country') for t in tokens])
+        return any([t._.get("is_country") for t in tokens])


-if __name__ == '__main__':
+if __name__ == "__main__":
    plac.call(main)

    # Expected output:
--- a/examples/pipeline/custom_component_entities.py
+++ b/examples/pipeline/custom_component_entities.py
@ -20,23 +20,24 @@ from spacy.tokens import Doc, Span, Token

@plac.annotations(
    text=("Text to process", "positional", None, str),
-    companies=("Names of technology companies", "positional", None, str))
+    companies=("Names of technology companies", "positional", None, str),
+)
 def main(text="Alphabet Inc. is the company behind Google.", *companies):
    # For simplicity, we start off with only the blank English Language class
    # and no model or pre-defined pipeline loaded.
    nlp = English()
    if not companies:  # set default companies if none are set via args
-        companies = ['Alphabet Inc.', 'Google', 'Netflix', 'Apple']  # etc.
+        companies = ["Alphabet Inc.", "Google", "Netflix", "Apple"]  # etc.
    component = TechCompanyRecognizer(nlp, companies)  # initialise component
    nlp.add_pipe(component, last=True)  # add last to the pipeline

    doc = nlp(text)
-    print('Pipeline', nlp.pipe_names)  # pipeline contains component name
-    print('Tokens', [t.text for t in doc])  # company names from the list are merged
-    print('Doc has_tech_org', doc._.has_tech_org)  # Doc contains tech orgs
-    print('Token 0 is_tech_org', doc[0]._.is_tech_org)  # "Alphabet Inc." is a tech org
-    print('Token 1 is_tech_org', doc[1]._.is_tech_org)  # "is" is not
-    print('Entities', [(e.text, e.label_) for e in doc.ents])  # all orgs are entities
+    print("Pipeline", nlp.pipe_names)  # pipeline contains component name
+    print("Tokens", [t.text for t in doc])  # company names from the list are merged
+    print("Doc has_tech_org", doc._.has_tech_org)  # Doc contains tech orgs
+    print("Token 0 is_tech_org", doc[0]._.is_tech_org)  # "Alphabet Inc." is a tech org
+    print("Token 1 is_tech_org", doc[1]._.is_tech_org)  # "is" is not
+    print("Entities", [(e.text, e.label_) for e in doc.ents])  # all orgs are entities


 class TechCompanyRecognizer(object):
@ -45,9 +46,10 @@ class TechCompanyRecognizer(object):
    labelled as ORG and their spans are merged into one token. Additionally,
    ._.has_tech_org and ._.is_tech_org is set on the Doc/Span and Token
    respectively."""
-    name = 'tech_companies'  # component name, will show up in the pipeline

-    def __init__(self, nlp, companies=tuple(), label='ORG'):
+    name = "tech_companies"  # component name, will show up in the pipeline
+
+    def __init__(self, nlp, companies=tuple(), label="ORG"):
        """Initialise the pipeline component. The shared nlp instance is used
        to initialise the matcher with the shared vocab, get the label ID and
        generate Doc objects as phrase match patterns.
@ -58,16 +60,16 @@ class TechCompanyRecognizer(object):
        # so even if the list of companies is long, it's very efficient
        patterns = [nlp(org) for org in companies]
        self.matcher = PhraseMatcher(nlp.vocab)
-        self.matcher.add('TECH_ORGS', None, *patterns)
+        self.matcher.add("TECH_ORGS", None, *patterns)

        # Register attribute on the Token. We'll be overwriting this based on
        # the matches, so we're only setting a default value, not a getter.
-        Token.set_extension('is_tech_org', default=False)
+        Token.set_extension("is_tech_org", default=False)

        # Register attributes on Doc and Span via a getter that checks if one of
        # the contained tokens is set to is_tech_org == True.
-        Doc.set_extension('has_tech_org', getter=self.has_tech_org)
-        Span.set_extension('has_tech_org', getter=self.has_tech_org)
+        Doc.set_extension("has_tech_org", getter=self.has_tech_org)
+        Span.set_extension("has_tech_org", getter=self.has_tech_org)

    def __call__(self, doc):
        """Apply the pipeline component on a Doc object and modify it if matches
@ -82,7 +84,7 @@ class TechCompanyRecognizer(object):
            spans.append(entity)
            # Set custom attribute on each token of the entity
            for token in entity:
-                token._.set('is_tech_org', True)
+                token._.set("is_tech_org", True)
            # Overwrite doc.ents and add entity – be careful not to replace!
            doc.ents = list(doc.ents) + [entity]
        for span in spans:
@ -97,10 +99,10 @@ class TechCompanyRecognizer(object):
        is a tech org. Since the getter is only called when we access the
        attribute, we can refer to the Token's 'is_tech_org' attribute here,
        which is already set in the processing step."""
-        return any([t._.get('is_tech_org') for t in tokens])
+        return any([t._.get("is_tech_org") for t in tokens])


-if __name__ == '__main__':
+if __name__ == "__main__":
    plac.call(main)

    # Expected output:
--- a/examples/pipeline/custom_sentence_segmentation.py
+++ b/examples/pipeline/custom_sentence_segmentation.py
@ -1,4 +1,4 @@
-'''Example of adding a pipeline component to prohibit sentence boundaries
+"""Example of adding a pipeline component to prohibit sentence boundaries
 before certain tokens.

 What we do is write to the token.is_sent_start attribute, which
@ -10,16 +10,18 @@ should also improve the parse quality.
 The specific example here is drawn from https://github.com/explosion/spaCy/issues/2627
 Other versions of the model may not make the original mistake, so the specific
 example might not be apt for future versions.
-'''
+"""
 import plac
 import spacy

+
 def prevent_sentence_boundaries(doc):
    for token in doc:
        if not can_be_sentence_start(token):
            token.is_sent_start = False
    return doc

+
 def can_be_sentence_start(token):
    if token.i == 0:
        return True
@ -32,17 +34,18 @@ def can_be_sentence_start(token):
    else:
        return False

+
 def main():
-    nlp = spacy.load('en_core_web_lg')
+    nlp = spacy.load("en_core_web_lg")
    raw_text = "Been here and I'm loving it."
    doc = nlp(raw_text)
    sentences = [sent.string.strip() for sent in doc.sents]
    print(sentences)
-    nlp.add_pipe(prevent_sentence_boundaries, before='parser')
+    nlp.add_pipe(prevent_sentence_boundaries, before="parser")
    doc = nlp(raw_text)
    sentences = [sent.string.strip() for sent in doc.sents]
    print(sentences)


-if __name__ == '__main__':
+if __name__ == "__main__":
    plac.call(main)
--- a/examples/pipeline/fix_space_entities.py
+++ b/examples/pipeline/fix_space_entities.py
@ -0,0 +1,30 @@
+"""Demonstrate adding a rule-based component that forces some tokens to not
+be entities, before the NER tagger is applied. This is used to hotfix the issue
+in https://github.com/explosion/spaCy/issues/2870 , present as of spaCy v2.0.16.
+"""
+import spacy
+from spacy.attrs import ENT_IOB
+
+
+def fix_space_tags(doc):
+    ent_iobs = doc.to_array([ENT_IOB])
+    for i, token in enumerate(doc):
+        if token.is_space:
+            # Sets 'O' tag (0 is None, so I is 1, O is 2)
+            ent_iobs[i] = 2
+    doc.from_array([ENT_IOB], ent_iobs.reshape((len(doc), 1)))
+    return doc
+
+
+def main():
+    nlp = spacy.load("en_core_web_sm")
+    text = u"""This is some crazy test where I dont need an Apple                Watch to make things bug"""
+    doc = nlp(text)
+    print("Before", doc.ents)
+    nlp.add_pipe(fix_space_tags, name="fix-ner", before="ner")
+    doc = nlp(text)
+    print("After", doc.ents)
+
+
+if __name__ == "__main__":
+    main()
--- a/examples/pipeline/multi_processing.py
+++ b/examples/pipeline/multi_processing.py
@ -9,12 +9,14 @@ built-in dataset loader.
 Compatible with: spaCy v2.0.0+
 """
 from __future__ import print_function, unicode_literals
-from toolz import partition_all
+
 from pathlib import Path
 from joblib import Parallel, delayed
+from functools import partial
 import thinc.extra.datasets
 import plac
 import spacy
+from spacy.util import minibatch


@plac.annotations(
@ -22,9 +24,9 @@ import spacy
    model=("Model name (needs tagger)", "positional", None, str),
    n_jobs=("Number of workers", "option", "n", int),
    batch_size=("Batch-size for each process", "option", "b", int),
-    limit=("Limit of entries from the dataset", "option", "l", int))
-def main(output_dir, model='en_core_web_sm', n_jobs=4, batch_size=1000,
-         limit=10000):
+    limit=("Limit of entries from the dataset", "option", "l", int),
+)
+def main(output_dir, model="en_core_web_sm", n_jobs=4, batch_size=1000, limit=10000):
    nlp = spacy.load(model)  # load spaCy model
    print("Loaded model '%s'" % model)
    if not output_dir.exists():
@ -34,45 +36,47 @@ def main(output_dir, model='en_core_web_sm', n_jobs=4, batch_size=1000,
    data, _ = thinc.extra.datasets.imdb()
    texts, _ = zip(*data[-limit:])
    print("Processing texts...")
-    partitions = partition_all(batch_size, texts)
-    executor = Parallel(n_jobs=n_jobs)
-    do = delayed(transform_texts)
-    tasks = (do(nlp, i, batch, output_dir)
-             for i, batch in enumerate(partitions))
+    partitions = minibatch(texts, size=batch_size)
+    executor = Parallel(n_jobs=n_jobs, backend="multiprocessing", prefer="processes")
+    do = delayed(partial(transform_texts, nlp))
+    tasks = (do(i, batch, output_dir) for i, batch in enumerate(partitions))
    executor(tasks)


 def transform_texts(nlp, batch_id, texts, output_dir):
    print(nlp.pipe_names)
-    out_path = Path(output_dir) / ('%d.txt' % batch_id)
+    out_path = Path(output_dir) / ("%d.txt" % batch_id)
    if out_path.exists():  # return None in case same batch is called again
        return None
-    print('Processing batch', batch_id)
-    with out_path.open('w', encoding='utf8') as f:
+    print("Processing batch", batch_id)
+    with out_path.open("w", encoding="utf8") as f:
        for doc in nlp.pipe(texts):
-            f.write(' '.join(represent_word(w) for w in doc if not w.is_space))
-            f.write('\n')
-    print('Saved {} texts to {}.txt'.format(len(texts), batch_id))
+            f.write(" ".join(represent_word(w) for w in doc if not w.is_space))
+            f.write("\n")
+    print("Saved {} texts to {}.txt".format(len(texts), batch_id))


 def represent_word(word):
    text = word.text
    # True-case, i.e. try to normalize sentence-initial capitals.
    # Only do this if the lower-cased form is more probable.
-    if text.istitle() and is_sent_begin(word) \
-       and word.prob < word.doc.vocab[text.lower()].prob:
+    if (
+        text.istitle()
+        and is_sent_begin(word)
+        and word.prob < word.doc.vocab[text.lower()].prob
+    ):
        text = text.lower()
-    return text + '|' + word.tag_
+    return text + "|" + word.tag_


 def is_sent_begin(word):
    if word.i == 0:
        return True
-    elif word.i >= 2 and word.nbor(-1).text in ('.', '!', '?', '...'):
+    elif word.i >= 2 and word.nbor(-1).text in (".", "!", "?", "..."):
        return True
    else:
        return False


-if __name__ == '__main__':
+if __name__ == "__main__":
    plac.call(main)
--- a/examples/training/conllu.py
+++ b/examples/training/conllu.py
@ -1,6 +1,6 @@
-'''Train for CONLL 2017 UD treebank evaluation. Takes .conllu files, writes
+"""Train for CONLL 2017 UD treebank evaluation. Takes .conllu files, writes
 .conllu format for development data, allowing the official scorer to be used.
-'''
+"""
 from __future__ import unicode_literals
 import plac
 import tqdm
@ -22,7 +22,6 @@ from spacy.matcher import Matcher
 import itertools
 import random
 import numpy.random
-import cytoolz

 import conll17_ud_eval

@ -35,6 +34,7 @@ spacy.lang.ja.Japanese.Defaults.use_janome = False
 random.seed(0)
 numpy.random.seed(0)

+
 def minibatch_by_words(items, size=5000):
    random.shuffle(items)
    if isinstance(size, int):
@ -59,21 +59,31 @@ def minibatch_by_words(items, size=5000):
        else:
            break

+
 ################
 # Data reading #
 ################

-space_re = re.compile('\s+')
+space_re = re.compile("\s+")
+
+
 def split_text(text):
-    return [space_re.sub(' ', par.strip()) for par in text.split('\n\n')]
+    return [space_re.sub(" ", par.strip()) for par in text.split("\n\n")]


-def read_data(nlp, conllu_file, text_file, raw_text=True, oracle_segments=False,
-              max_doc_length=None, limit=None):
-    '''Read the CONLLU format into (Doc, GoldParse) tuples. If raw_text=True,
+def read_data(
+    nlp,
+    conllu_file,
+    text_file,
+    raw_text=True,
+    oracle_segments=False,
+    max_doc_length=None,
+    limit=None,
+):
+    """Read the CONLLU format into (Doc, GoldParse) tuples. If raw_text=True,
    include Doc objects created using nlp.make_doc and then aligned against
    the gold-standard sequences. If oracle_segments=True, include Doc objects
-    created from the gold-standard segments. At least one must be True.'''
+    created from the gold-standard segments. At least one must be True."""
    if not raw_text and not oracle_segments:
        raise ValueError("At least one of raw_text or oracle_segments must be True")
    paragraphs = split_text(text_file.read())
@ -87,22 +97,21 @@ def read_data(nlp, conllu_file, text_file, raw_text=True, oracle_segments=False,
        for cs in cd:
            sent = defaultdict(list)
            for id_, word, lemma, pos, tag, morph, head, dep, _, space_after in cs:
-                if '.' in id_:
+                if "." in id_:
                    continue
-                if '-' in id_:
+                if "-" in id_:
                    continue
-                id_ = int(id_)-1
-                head = int(head)-1 if head != '0' else id_
-                sent['words'].append(word)
-                sent['tags'].append(tag)
-                sent['heads'].append(head)
-                sent['deps'].append('ROOT' if dep == 'root' else dep)
-                sent['spaces'].append(space_after == '_')
-            sent['entities'] = ['-'] * len(sent['words'])
-            sent['heads'], sent['deps'] = projectivize(sent['heads'],
-                                                       sent['deps'])
+                id_ = int(id_) - 1
+                head = int(head) - 1 if head != "0" else id_
+                sent["words"].append(word)
+                sent["tags"].append(tag)
+                sent["heads"].append(head)
+                sent["deps"].append("ROOT" if dep == "root" else dep)
+                sent["spaces"].append(space_after == "_")
+            sent["entities"] = ["-"] * len(sent["words"])
+            sent["heads"], sent["deps"] = projectivize(sent["heads"], sent["deps"])
            if oracle_segments:
-                docs.append(Doc(nlp.vocab, words=sent['words'], spaces=sent['spaces']))
+                docs.append(Doc(nlp.vocab, words=sent["words"], spaces=sent["spaces"]))
                golds.append(GoldParse(docs[-1], **sent))

            sent_annots.append(sent)
@ -128,18 +137,18 @@ def read_conllu(file_):
    sent = []
    doc = []
    for line in file_:
-        if line.startswith('# newdoc'):
+        if line.startswith("# newdoc"):
            if doc:
                docs.append(doc)
            doc = []
-        elif line.startswith('#'):
+        elif line.startswith("#"):
            continue
        elif not line.strip():
            if sent:
                doc.append(sent)
            sent = []
        else:
-            sent.append(list(line.strip().split('\t')))
+            sent.append(list(line.strip().split("\t")))
            if len(sent[-1]) != 10:
                print(repr(line))
                raise ValueError
@ -154,25 +163,29 @@ def _make_gold(nlp, text, sent_annots):
    # Flatten the conll annotations, and adjust the head indices
    flat = defaultdict(list)
    for sent in sent_annots:
-        flat['heads'].extend(len(flat['words'])+head for head in sent['heads'])
-        for field in ['words', 'tags', 'deps', 'entities', 'spaces']:
+        flat["heads"].extend(len(flat["words"]) + head for head in sent["heads"])
+        for field in ["words", "tags", "deps", "entities", "spaces"]:
            flat[field].extend(sent[field])
    # Construct text if necessary
-    assert len(flat['words']) == len(flat['spaces'])
+    assert len(flat["words"]) == len(flat["spaces"])
    if text is None:
-        text = ''.join(word+' '*space for word, space in zip(flat['words'], flat['spaces'])) 
+        text = "".join(
+            word + " " * space for word, space in zip(flat["words"], flat["spaces"])
+        )
    doc = nlp.make_doc(text)
-    flat.pop('spaces')
+    flat.pop("spaces")
    gold = GoldParse(doc, **flat)
    return doc, gold

+
 #############################
 # Data transforms for spaCy #
 #############################

+
 def golds_to_gold_tuples(docs, golds):
-    '''Get out the annoying 'tuples' format used by begin_training, given the
-    GoldParse objects.'''
+    """Get out the annoying 'tuples' format used by begin_training, given the
+    GoldParse objects."""
    tuples = []
    for doc, gold in zip(docs, golds):
        text = doc.text
@ -186,15 +199,16 @@ def golds_to_gold_tuples(docs, golds):
 # Evaluation #
 ##############

+
 def evaluate(nlp, text_loc, gold_loc, sys_loc, limit=None):
-    with text_loc.open('r', encoding='utf8') as text_file:
+    with text_loc.open("r", encoding="utf8") as text_file:
        texts = split_text(text_file.read())
        docs = list(nlp.pipe(texts))
-    with sys_loc.open('w', encoding='utf8') as out_file:
+    with sys_loc.open("w", encoding="utf8") as out_file:
        write_conllu(docs, out_file)
-    with gold_loc.open('r', encoding='utf8') as gold_file:
+    with gold_loc.open("r", encoding="utf8") as gold_file:
        gold_ud = conll17_ud_eval.load_conllu(gold_file)
-        with sys_loc.open('r', encoding='utf8') as sys_file:
+        with sys_loc.open("r", encoding="utf8") as sys_file:
            sys_ud = conll17_ud_eval.load_conllu(sys_file)
        scores = conll17_ud_eval.evaluate(gold_ud, sys_ud)
    return scores
@ -202,10 +216,10 @@ def evaluate(nlp, text_loc, gold_loc, sys_loc, limit=None):

 def write_conllu(docs, file_):
    merger = Matcher(docs[0].vocab)
-    merger.add('SUBTOK', None, [{'DEP': 'subtok', 'op': '+'}])
+    merger.add("SUBTOK", None, [{"DEP": "subtok", "op": "+"}])
    for i, doc in enumerate(docs):
        matches = merger(doc)
-        spans = [doc[start:end+1] for _, start, end in matches]
+        spans = [doc[start : end + 1] for _, start, end in matches]
        offsets = [(span.start_char, span.end_char) for span in spans]
        for start_char, end_char in offsets:
            doc.merge(start_char, end_char)
@ -214,58 +228,73 @@ def write_conllu(docs, file_):
            file_.write("# sent_id = {i}.{j}\n".format(i=i, j=j))
            file_.write("# text = {text}\n".format(text=sent.text))
            for k, token in enumerate(sent):
-                file_.write(token._.get_conllu_lines(k) + '\n')
-            file_.write('\n')
+                file_.write(token._.get_conllu_lines(k) + "\n")
+            file_.write("\n")


 def print_progress(itn, losses, ud_scores):
    fields = {
-        'dep_loss': losses.get('parser', 0.0),
-        'tag_loss': losses.get('tagger', 0.0),
-        'words': ud_scores['Words'].f1 * 100,
-        'sents': ud_scores['Sentences'].f1 * 100,
-        'tags': ud_scores['XPOS'].f1 * 100,
-        'uas': ud_scores['UAS'].f1 * 100,
-        'las': ud_scores['LAS'].f1 * 100,
+        "dep_loss": losses.get("parser", 0.0),
+        "tag_loss": losses.get("tagger", 0.0),
+        "words": ud_scores["Words"].f1 * 100,
+        "sents": ud_scores["Sentences"].f1 * 100,
+        "tags": ud_scores["XPOS"].f1 * 100,
+        "uas": ud_scores["UAS"].f1 * 100,
+        "las": ud_scores["LAS"].f1 * 100,
    }
-    header = ['Epoch', 'Loss', 'LAS', 'UAS', 'TAG', 'SENT', 'WORD']
+    header = ["Epoch", "Loss", "LAS", "UAS", "TAG", "SENT", "WORD"]
    if itn == 0:
-        print('\t'.join(header))
-    tpl = '\t'.join((
-        '{:d}',
-        '{dep_loss:.1f}',
-        '{las:.1f}',
-        '{uas:.1f}',
-        '{tags:.1f}',
-        '{sents:.1f}',
-        '{words:.1f}',
-    ))
+        print("\t".join(header))
+    tpl = "\t".join(
+        (
+            "{:d}",
+            "{dep_loss:.1f}",
+            "{las:.1f}",
+            "{uas:.1f}",
+            "{tags:.1f}",
+            "{sents:.1f}",
+            "{words:.1f}",
+        )
+    )
    print(tpl.format(itn, **fields))

-#def get_sent_conllu(sent, sent_id):
+
+# def get_sent_conllu(sent, sent_id):
 #    lines = ["# sent_id = {sent_id}".format(sent_id=sent_id)]

+
 def get_token_conllu(token, i):
    if token._.begins_fused:
        n = 1
        while token.nbor(n)._.inside_fused:
            n += 1
-        id_ = '%d-%d' % (i, i+n)
-        lines = [id_, token.text, '_', '_', '_', '_', '_', '_', '_', '_']
+        id_ = "%d-%d" % (i, i + n)
+        lines = [id_, token.text, "_", "_", "_", "_", "_", "_", "_", "_"]
    else:
        lines = []
    if token.head.i == token.i:
        head = 0
    else:
        head = i + (token.head.i - token.i) + 1
-    fields = [str(i+1), token.text, token.lemma_, token.pos_, token.tag_, '_',
-              str(head), token.dep_.lower(), '_', '_']
-    lines.append('\t'.join(fields))
-    return '\n'.join(lines)
+    fields = [
+        str(i + 1),
+        token.text,
+        token.lemma_,
+        token.pos_,
+        token.tag_,
+        "_",
+        str(head),
+        token.dep_.lower(),
+        "_",
+        "_",
+    ]
+    lines.append("\t".join(fields))
+    return "\n".join(lines)

-Token.set_extension('get_conllu_lines', method=get_token_conllu)
-Token.set_extension('begins_fused', default=False)
-Token.set_extension('inside_fused', default=False)
+
+Token.set_extension("get_conllu_lines", method=get_token_conllu)
+Token.set_extension("begins_fused", default=False)
+Token.set_extension("inside_fused", default=False)


 ##################
@ -274,31 +303,32 @@ Token.set_extension('inside_fused', default=False)


 def load_nlp(corpus, config):
-    lang = corpus.split('_')[0]
+    lang = corpus.split("_")[0]
    nlp = spacy.blank(lang)
    if config.vectors:
-        nlp.vocab.from_disk(config.vectors / 'vocab')
+        nlp.vocab.from_disk(config.vectors / "vocab")
    return nlp

+
 def initialize_pipeline(nlp, docs, golds, config):
-    nlp.add_pipe(nlp.create_pipe('parser'))
+    nlp.add_pipe(nlp.create_pipe("parser"))
    if config.multitask_tag:
-        nlp.parser.add_multitask_objective('tag')
+        nlp.parser.add_multitask_objective("tag")
    if config.multitask_sent:
-        nlp.parser.add_multitask_objective('sent_start')
-    nlp.parser.moves.add_action(2, 'subtok')
-    nlp.add_pipe(nlp.create_pipe('tagger'))
+        nlp.parser.add_multitask_objective("sent_start")
+    nlp.parser.moves.add_action(2, "subtok")
+    nlp.add_pipe(nlp.create_pipe("tagger"))
    for gold in golds:
        for tag in gold.tags:
            if tag is not None:
                nlp.tagger.add_label(tag)
    # Replace labels that didn't make the frequency cutoff
    actions = set(nlp.parser.labels)
-    label_set = set([act.split('-')[1] for act in actions if '-' in act])
+    label_set = set([act.split("-")[1] for act in actions if "-" in act])
    for gold in golds:
        for i, label in enumerate(gold.labels):
            if label is not None and label not in label_set:
-                gold.labels[i] = label.split('||')[0]
+                gold.labels[i] = label.split("||")[0]
    return nlp.begin_training(lambda: golds_to_gold_tuples(docs, golds))


@ -306,6 +336,7 @@ def initialize_pipeline(nlp, docs, golds, config):
 # Command line helpers #
 ########################

+
@attr.s
 class Config(object):
    vectors = attr.ib(default=None)
@ -318,7 +349,7 @@ class Config(object):

    @classmethod
    def load(cls, loc):
-        with Path(loc).open('r', encoding='utf8') as file_:
+        with Path(loc).open("r", encoding="utf8") as file_:
            cfg = json.load(file_)
        return cls(**cfg)

@ -331,32 +362,36 @@ class Dataset(object):
        self.text = None
        for file_path in self.path.iterdir():
            name = file_path.parts[-1]
-            if section in name and name.endswith('conllu'):
+            if section in name and name.endswith("conllu"):
                self.conllu = file_path
-            elif section in name and name.endswith('txt'):
+            elif section in name and name.endswith("txt"):
                self.text = file_path
        if self.conllu is None:
            msg = "Could not find .txt file in {path} for {section}"
            raise IOError(msg.format(section=section, path=path))
        if self.text is None:
            msg = "Could not find .txt file in {path} for {section}"
-        self.lang = self.conllu.parts[-1].split('-')[0].split('_')[0]
+        self.lang = self.conllu.parts[-1].split("-")[0].split("_")[0]


 class TreebankPaths(object):
    def __init__(self, ud_path, treebank, **cfg):
-        self.train = Dataset(ud_path / treebank, 'train')
-        self.dev = Dataset(ud_path / treebank, 'dev')
+        self.train = Dataset(ud_path / treebank, "train")
+        self.dev = Dataset(ud_path / treebank, "dev")
        self.lang = self.train.lang


@plac.annotations(
    ud_dir=("Path to Universal Dependencies corpus", "positional", None, Path),
-    corpus=("UD corpus to train and evaluate on, e.g. en, es_ancora, etc",
-            "positional", None, str),
+    corpus=(
+        "UD corpus to train and evaluate on, e.g. en, es_ancora, etc",
+        "positional",
+        None,
+        str,
+    ),
    parses_dir=("Directory to write the development parses", "positional", None, Path),
    config=("Path to json formatted config file", "positional", None, Config.load),
-    limit=("Size limit", "option", "n", int)
+    limit=("Size limit", "option", "n", int),
 )
 def main(ud_dir, parses_dir, config, corpus, limit=0):
    paths = TreebankPaths(ud_dir, corpus)
@ -365,8 +400,13 @@ def main(ud_dir, parses_dir, config, corpus, limit=0):
    print("Train and evaluate", corpus, "using lang", paths.lang)
    nlp = load_nlp(paths.lang, config)

-    docs, golds = read_data(nlp, paths.train.conllu.open(), paths.train.text.open(),
-                            max_doc_length=config.max_doc_length, limit=limit)
+    docs, golds = read_data(
+        nlp,
+        paths.train.conllu.open(),
+        paths.train.text.open(),
+        max_doc_length=config.max_doc_length,
+        limit=limit,
+    )

    optimizer = initialize_pipeline(nlp, docs, golds, config)

@ -379,14 +419,19 @@ def main(ud_dir, parses_dir, config, corpus, limit=0):
            for batch in batches:
                batch_docs, batch_gold = zip(*batch)
                pbar.update(sum(len(doc) for doc in batch_docs))
-                nlp.update(batch_docs, batch_gold, sgd=optimizer,
-                           drop=config.dropout, losses=losses)
+                nlp.update(
+                    batch_docs,
+                    batch_gold,
+                    sgd=optimizer,
+                    drop=config.dropout,
+                    losses=losses,
+                )

-        out_path = parses_dir / corpus / 'epoch-{i}.conllu'.format(i=i)
+        out_path = parses_dir / corpus / "epoch-{i}.conllu".format(i=i)
        with nlp.use_params(optimizer.averages):
            scores = evaluate(nlp, paths.dev.text, paths.dev.conllu, out_path)
            print_progress(i, losses, scores)


-if __name__ == '__main__':
+if __name__ == "__main__":
    plac.call(main)
--- a/examples/training/ner_multitask_objective.py
+++ b/examples/training/ner_multitask_objective.py
@ -1,4 +1,4 @@
-'''This example shows how to add a multi-task objective that is trained
+"""This example shows how to add a multi-task objective that is trained
 alongside the entity recognizer. This is an alternative to adding features
 to the model.

@ -19,7 +19,7 @@ The specific example here is not necessarily a good idea --- but it shows
 how an arbitrary objective function for some word can be used.

 Developed and tested for spaCy 2.0.6
-'''
+"""
 import random
 import plac
 import spacy
@ -30,30 +30,29 @@ random.seed(0)

 PWD = os.path.dirname(__file__)

-TRAIN_DATA = list(read_json_file(os.path.join(PWD, 'training-data.json')))
-
+TRAIN_DATA = list(read_json_file(os.path.join(PWD, "training-data.json")))


 def get_position_label(i, words, tags, heads, labels, ents):
-    '''Return labels indicating the position of the word in the document.
-    '''
+    """Return labels indicating the position of the word in the document.
+    """
    if len(words) < 20:
-        return 'short-doc'
+        return "short-doc"
    elif i == 0:
-        return 'first-word'
+        return "first-word"
    elif i < 10:
-        return 'early-word'
+        return "early-word"
    elif i < 20:
-        return 'mid-word'
-    elif i == len(words)-1:
-        return 'last-word'
+        return "mid-word"
+    elif i == len(words) - 1:
+        return "last-word"
    else:
-        return 'late-word'
+        return "late-word"


 def main(n_iter=10):
-    nlp = spacy.blank('en')
-    ner = nlp.create_pipe('ner')
+    nlp = spacy.blank("en")
+    ner = nlp.create_pipe("ner")
    ner.add_multitask_objective(get_position_label)
    nlp.add_pipe(ner)

@ -71,15 +70,16 @@ def main(n_iter=10):
                [gold],  # batch of annotations
                drop=0.2,  # dropout - make it harder to memorise data
                sgd=optimizer,  # callable to update weights
-                losses=losses)
-        print(losses.get('nn_labeller', 0.0), losses['ner'])
+                losses=losses,
+            )
+        print(losses.get("nn_labeller", 0.0), losses["ner"])

    # test the trained model
    for text, _ in TRAIN_DATA:
        doc = nlp(text)
-        print('Entities', [(ent.text, ent.label_) for ent in doc.ents])
-        print('Tokens', [(t.text, t.ent_type_, t.ent_iob) for t in doc])
+        print("Entities", [(ent.text, ent.label_) for ent in doc.ents])
+        print("Tokens", [(t.text, t.ent_type_, t.ent_iob) for t in doc])


-if __name__ == '__main__':
+if __name__ == "__main__":
    plac.call(main)
--- a/examples/training/pretrain_textcat.py
+++ b/examples/training/pretrain_textcat.py
@ -0,0 +1,216 @@
+"""This script is experimental.
+
+Try pre-training the CNN component of the text categorizer using a cheap
+language modelling-like objective. Specifically, we load pre-trained vectors
+(from something like word2vec, GloVe, FastText etc), and use the CNN to
+predict the tokens' pre-trained vectors. This isn't as easy as it sounds:
+we're not merely doing compression here, because heavy dropout is applied,
+including over the input words. This means the model must often (50% of the time)
+use the context in order to predict the word.
+
+To evaluate the technique, we're pre-training with the 50k texts from the IMDB
+corpus, and then training with only 100 labels. Note that it's a bit dirty to
+pre-train with the development data, but also not *so* terrible: we're not using
+the development labels, after all --- only the unlabelled text.
+"""
+import plac
+import random
+import spacy
+import thinc.extra.datasets
+from spacy.util import minibatch, use_gpu, compounding
+import tqdm
+from spacy._ml import Tok2Vec
+from spacy.pipeline import TextCategorizer
+import numpy
+
+
+def load_texts(limit=0):
+    train, dev = thinc.extra.datasets.imdb()
+    train_texts, train_labels = zip(*train)
+    dev_texts, dev_labels = zip(*train)
+    train_texts = list(train_texts)
+    dev_texts = list(dev_texts)
+    random.shuffle(train_texts)
+    random.shuffle(dev_texts)
+    if limit >= 1:
+        return train_texts[:limit]
+    else:
+        return list(train_texts) + list(dev_texts)
+
+
+def load_textcat_data(limit=0):
+    """Load data from the IMDB dataset."""
+    # Partition off part of the train data for evaluation
+    train_data, eval_data = thinc.extra.datasets.imdb()
+    random.shuffle(train_data)
+    train_data = train_data[-limit:]
+    texts, labels = zip(*train_data)
+    eval_texts, eval_labels = zip(*eval_data)
+    cats = [{"POSITIVE": bool(y), "NEGATIVE": not bool(y)} for y in labels]
+    eval_cats = [{"POSITIVE": bool(y), "NEGATIVE": not bool(y)} for y in eval_labels]
+    return (texts, cats), (eval_texts, eval_cats)
+
+
+def prefer_gpu():
+    used = spacy.util.use_gpu(0)
+    if used is None:
+        return False
+    else:
+        import cupy.random
+
+        cupy.random.seed(0)
+        return True
+
+
+def build_textcat_model(tok2vec, nr_class, width):
+    from thinc.v2v import Model, Softmax, Maxout
+    from thinc.api import flatten_add_lengths, chain
+    from thinc.t2v import Pooling, sum_pool, mean_pool, max_pool
+    from thinc.misc import Residual, LayerNorm
+    from spacy._ml import logistic, zero_init
+
+    with Model.define_operators({">>": chain}):
+        model = (
+            tok2vec
+            >> flatten_add_lengths
+            >> Pooling(mean_pool)
+            >> Softmax(nr_class, width)
+        )
+    model.tok2vec = tok2vec
+    return model
+
+
+def block_gradients(model):
+    from thinc.api import wrap
+
+    def forward(X, drop=0.0):
+        Y, _ = model.begin_update(X, drop=drop)
+        return Y, None
+
+    return wrap(forward, model)
+
+
+def create_pipeline(width, embed_size, vectors_model):
+    print("Load vectors")
+    nlp = spacy.load(vectors_model)
+    print("Start training")
+    textcat = TextCategorizer(
+        nlp.vocab,
+        labels=["POSITIVE", "NEGATIVE"],
+        model=build_textcat_model(
+            Tok2Vec(width=width, embed_size=embed_size), 2, width
+        ),
+    )
+
+    nlp.add_pipe(textcat)
+    return nlp
+
+
+def train_tensorizer(nlp, texts, dropout, n_iter):
+    tensorizer = nlp.create_pipe("tensorizer")
+    nlp.add_pipe(tensorizer)
+    optimizer = nlp.begin_training()
+    for i in range(n_iter):
+        losses = {}
+        for i, batch in enumerate(minibatch(tqdm.tqdm(texts))):
+            docs = [nlp.make_doc(text) for text in batch]
+            tensorizer.update(docs, None, losses=losses, sgd=optimizer, drop=dropout)
+        print(losses)
+    return optimizer
+
+
+def train_textcat(nlp, n_texts, n_iter=10):
+    textcat = nlp.get_pipe("textcat")
+    tok2vec_weights = textcat.model.tok2vec.to_bytes()
+    (train_texts, train_cats), (dev_texts, dev_cats) = load_textcat_data(limit=n_texts)
+    print(
+        "Using {} examples ({} training, {} evaluation)".format(
+            n_texts, len(train_texts), len(dev_texts)
+        )
+    )
+    train_data = list(zip(train_texts, [{"cats": cats} for cats in train_cats]))
+
+    # get names of other pipes to disable them during training
+    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "textcat"]
+    with nlp.disable_pipes(*other_pipes):  # only train textcat
+        optimizer = nlp.begin_training()
+        textcat.model.tok2vec.from_bytes(tok2vec_weights)
+        print("Training the model...")
+        print("{:^5}\t{:^5}\t{:^5}\t{:^5}".format("LOSS", "P", "R", "F"))
+        for i in range(n_iter):
+            losses = {"textcat": 0.0}
+            # batch up the examples using spaCy's minibatch
+            batches = minibatch(tqdm.tqdm(train_data), size=2)
+            for batch in batches:
+                texts, annotations = zip(*batch)
+                nlp.update(texts, annotations, sgd=optimizer, drop=0.2, losses=losses)
+            with textcat.model.use_params(optimizer.averages):
+                # evaluate on the dev data split off in load_data()
+                scores = evaluate_textcat(nlp.tokenizer, textcat, dev_texts, dev_cats)
+            print(
+                "{0:.3f}\t{1:.3f}\t{2:.3f}\t{3:.3f}".format(  # print a simple table
+                    losses["textcat"],
+                    scores["textcat_p"],
+                    scores["textcat_r"],
+                    scores["textcat_f"],
+                )
+            )
+
+
+def evaluate_textcat(tokenizer, textcat, texts, cats):
+    docs = (tokenizer(text) for text in texts)
+    tp = 1e-8
+    fp = 1e-8
+    tn = 1e-8
+    fn = 1e-8
+    for i, doc in enumerate(textcat.pipe(docs)):
+        gold = cats[i]
+        for label, score in doc.cats.items():
+            if label not in gold:
+                continue
+            if score >= 0.5 and gold[label] >= 0.5:
+                tp += 1.0
+            elif score >= 0.5 and gold[label] < 0.5:
+                fp += 1.0
+            elif score < 0.5 and gold[label] < 0.5:
+                tn += 1
+            elif score < 0.5 and gold[label] >= 0.5:
+                fn += 1
+    precision = tp / (tp + fp)
+    recall = tp / (tp + fn)
+    f_score = 2 * (precision * recall) / (precision + recall)
+    return {"textcat_p": precision, "textcat_r": recall, "textcat_f": f_score}
+
+
+@plac.annotations(
+    width=("Width of CNN layers", "positional", None, int),
+    embed_size=("Embedding rows", "positional", None, int),
+    pretrain_iters=("Number of iterations to pretrain", "option", "pn", int),
+    train_iters=("Number of iterations to pretrain", "option", "tn", int),
+    train_examples=("Number of labelled examples", "option", "eg", int),
+    vectors_model=("Name or path to vectors model to learn from"),
+)
+def main(
+    width,
+    embed_size,
+    vectors_model,
+    pretrain_iters=30,
+    train_iters=30,
+    train_examples=1000,
+):
+    random.seed(0)
+    numpy.random.seed(0)
+    use_gpu = prefer_gpu()
+    print("Using GPU?", use_gpu)
+
+    nlp = create_pipeline(width, embed_size, vectors_model)
+    print("Load data")
+    texts = load_texts(limit=0)
+    print("Train tensorizer")
+    optimizer = train_tensorizer(nlp, texts, dropout=0.2, n_iter=pretrain_iters)
+    print("Train textcat")
+    train_textcat(nlp, train_examples, n_iter=train_iters)
+
+
+if __name__ == "__main__":
+    plac.call(main)
--- a/examples/training/rehearsal.py
+++ b/examples/training/rehearsal.py
@ -0,0 +1,94 @@
+"""Prevent catastrophic forgetting with rehearsal updates."""
+import plac
+import random
+import srsly
+import spacy
+from spacy.gold import GoldParse
+from spacy.util import minibatch, compounding
+
+
+LABEL = "ANIMAL"
+TRAIN_DATA = [
+    (
+        "Horses are too tall and they pretend to care about your feelings",
+        {"entities": [(0, 6, "ANIMAL")]},
+    ),
+    ("Do they bite?", {"entities": []}),
+    (
+        "horses are too tall and they pretend to care about your feelings",
+        {"entities": [(0, 6, "ANIMAL")]},
+    ),
+    ("horses pretend to care about your feelings", {"entities": [(0, 6, "ANIMAL")]}),
+    (
+        "they pretend to care about your feelings, those horses",
+        {"entities": [(48, 54, "ANIMAL")]},
+    ),
+    ("horses?", {"entities": [(0, 6, "ANIMAL")]}),
+]
+
+
+def read_raw_data(nlp, jsonl_loc):
+    for json_obj in srsly.read_jsonl(jsonl_loc):
+        if json_obj["text"].strip():
+            doc = nlp.make_doc(json_obj["text"])
+            yield doc
+
+
+def read_gold_data(nlp, gold_loc):
+    docs = []
+    golds = []
+    for json_obj in srsly.read_jsonl(gold_loc):
+        doc = nlp.make_doc(json_obj["text"])
+        ents = [(ent["start"], ent["end"], ent["label"]) for ent in json_obj["spans"]]
+        gold = GoldParse(doc, entities=ents)
+        docs.append(doc)
+        golds.append(gold)
+    return list(zip(docs, golds))
+
+
+def main(model_name, unlabelled_loc):
+    n_iter = 10
+    dropout = 0.2
+    batch_size = 4
+    nlp = spacy.load(model_name)
+    nlp.get_pipe("ner").add_label(LABEL)
+    raw_docs = list(read_raw_data(nlp, unlabelled_loc))
+    optimizer = nlp.resume_training()
+    # Avoid use of Adam when resuming training. I don't understand this well
+    # yet, but I'm getting weird results from Adam. Try commenting out the
+    # nlp.update(), and using Adam -- you'll find the models drift apart.
+    # I guess Adam is losing precision, introducing gradient noise?
+    optimizer.alpha = 0.1
+    optimizer.b1 = 0.0
+    optimizer.b2 = 0.0
+
+    # get names of other pipes to disable them during training
+    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
+    sizes = compounding(1.0, 4.0, 1.001)
+    with nlp.disable_pipes(*other_pipes):
+        for itn in range(n_iter):
+            random.shuffle(TRAIN_DATA)
+            random.shuffle(raw_docs)
+            losses = {}
+            r_losses = {}
+            # batch up the examples using spaCy's minibatch
+            raw_batches = minibatch(raw_docs, size=4)
+            for batch in minibatch(TRAIN_DATA, size=sizes):
+                docs, golds = zip(*batch)
+                nlp.update(docs, golds, sgd=optimizer, drop=dropout, losses=losses)
+                raw_batch = list(next(raw_batches))
+                nlp.rehearse(raw_batch, sgd=optimizer, losses=r_losses)
+            print("Losses", losses)
+            print("R. Losses", r_losses)
+    print(nlp.get_pipe('ner').model.unseen_classes)
+    test_text = "Do you like horses?"
+    doc = nlp(test_text)
+    print("Entities in '%s'" % test_text)
+    for ent in doc.ents:
+        print(ent.label_, ent.text)
+
+
+
+
+if __name__ == "__main__":
+    plac.call(main)
--- a/examples/training/train_intent_parser.py
+++ b/examples/training/train_intent_parser.py
@ -21,77 +21,121 @@ from __future__ import unicode_literals, print_function

 import plac
 import random
-import spacy
 from pathlib import Path
+import spacy
+from spacy.util import minibatch, compounding


 # training data: texts, heads and dependency labels
 # for no relation, we simply chose an arbitrary dependency label, e.g. '-'
 TRAIN_DATA = [
-    ("find a cafe with great wifi", {
-        'heads': [0, 2, 0, 5, 5, 2],  # index of token head
-        'deps': ['ROOT', '-', 'PLACE', '-', 'QUALITY', 'ATTRIBUTE']
-    }),
-    ("find a hotel near the beach", {
-        'heads': [0, 2, 0, 5, 5, 2],
-        'deps': ['ROOT', '-', 'PLACE', 'QUALITY', '-', 'ATTRIBUTE']
-    }),
-    ("find me the closest gym that's open late", {
-        'heads': [0, 0, 4, 4, 0, 6, 4, 6, 6],
-        'deps': ['ROOT', '-', '-', 'QUALITY', 'PLACE', '-', '-', 'ATTRIBUTE', 'TIME']
-    }),
-    ("show me the cheapest store that sells flowers", {
-        'heads': [0, 0, 4, 4, 0, 4, 4, 4],  # attach "flowers" to store!
-        'deps': ['ROOT', '-', '-', 'QUALITY', 'PLACE', '-', '-', 'PRODUCT']
-    }),
-    ("find a nice restaurant in london", {
-        'heads': [0, 3, 3, 0, 3, 3],
-        'deps': ['ROOT', '-', 'QUALITY', 'PLACE', '-', 'LOCATION']
-    }),
-    ("show me the coolest hostel in berlin", {
-        'heads': [0, 0, 4, 4, 0, 4, 4],
-        'deps': ['ROOT', '-', '-', 'QUALITY', 'PLACE', '-', 'LOCATION']
-    }),
-    ("find a good italian restaurant near work", {
-        'heads': [0, 4, 4, 4, 0, 4, 5],
-        'deps': ['ROOT', '-', 'QUALITY', 'ATTRIBUTE', 'PLACE', 'ATTRIBUTE', 'LOCATION']
-    })
+    (
+        "find a cafe with great wifi",
+        {
+            "heads": [0, 2, 0, 5, 5, 2],  # index of token head
+            "deps": ["ROOT", "-", "PLACE", "-", "QUALITY", "ATTRIBUTE"],
+        },
+    ),
+    (
+        "find a hotel near the beach",
+        {
+            "heads": [0, 2, 0, 5, 5, 2],
+            "deps": ["ROOT", "-", "PLACE", "QUALITY", "-", "ATTRIBUTE"],
+        },
+    ),
+    (
+        "find me the closest gym that's open late",
+        {
+            "heads": [0, 0, 4, 4, 0, 6, 4, 6, 6],
+            "deps": [
+                "ROOT",
+                "-",
+                "-",
+                "QUALITY",
+                "PLACE",
+                "-",
+                "-",
+                "ATTRIBUTE",
+                "TIME",
+            ],
+        },
+    ),
+    (
+        "show me the cheapest store that sells flowers",
+        {
+            "heads": [0, 0, 4, 4, 0, 4, 4, 4],  # attach "flowers" to store!
+            "deps": ["ROOT", "-", "-", "QUALITY", "PLACE", "-", "-", "PRODUCT"],
+        },
+    ),
+    (
+        "find a nice restaurant in london",
+        {
+            "heads": [0, 3, 3, 0, 3, 3],
+            "deps": ["ROOT", "-", "QUALITY", "PLACE", "-", "LOCATION"],
+        },
+    ),
+    (
+        "show me the coolest hostel in berlin",
+        {
+            "heads": [0, 0, 4, 4, 0, 4, 4],
+            "deps": ["ROOT", "-", "-", "QUALITY", "PLACE", "-", "LOCATION"],
+        },
+    ),
+    (
+        "find a good italian restaurant near work",
+        {
+            "heads": [0, 4, 4, 4, 0, 4, 5],
+            "deps": [
+                "ROOT",
+                "-",
+                "QUALITY",
+                "ATTRIBUTE",
+                "PLACE",
+                "ATTRIBUTE",
+                "LOCATION",
+            ],
+        },
+    ),
 ]


@plac.annotations(
    model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
    output_dir=("Optional output directory", "option", "o", Path),
-    n_iter=("Number of training iterations", "option", "n", int))
-def main(model=None, output_dir=None, n_iter=5):
+    n_iter=("Number of training iterations", "option", "n", int),
+)
+def main(model=None, output_dir=None, n_iter=15):
    """Load the model, set up the pipeline and train the parser."""
    if model is not None:
        nlp = spacy.load(model)  # load existing spaCy model
        print("Loaded model '%s'" % model)
    else:
-        nlp = spacy.blank('en')  # create blank Language class
+        nlp = spacy.blank("en")  # create blank Language class
        print("Created blank 'en' model")

    # We'll use the built-in dependency parser class, but we want to create a
    # fresh instance – just in case.
-    if 'parser' in nlp.pipe_names:
-        nlp.remove_pipe('parser')
-    parser = nlp.create_pipe('parser')
+    if "parser" in nlp.pipe_names:
+        nlp.remove_pipe("parser")
+    parser = nlp.create_pipe("parser")
    nlp.add_pipe(parser, first=True)

    for text, annotations in TRAIN_DATA:
-        for dep in annotations.get('deps', []):
+        for dep in annotations.get("deps", []):
            parser.add_label(dep)

-    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'parser']
+    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "parser"]
    with nlp.disable_pipes(*other_pipes):  # only train parser
        optimizer = nlp.begin_training()
        for itn in range(n_iter):
            random.shuffle(TRAIN_DATA)
            losses = {}
-            for text, annotations in TRAIN_DATA:
-                nlp.update([text], [annotations], sgd=optimizer, losses=losses)
-            print(losses)
+            # batch up the examples using spaCy's minibatch
+            batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
+            for batch in batches:
+                texts, annotations = zip(*batch)
+                nlp.update(texts, annotations, sgd=optimizer, losses=losses)
+            print("Losses", losses)

    # test the trained model
    test_model(nlp)
@ -111,16 +155,18 @@ def main(model=None, output_dir=None, n_iter=5):


 def test_model(nlp):
-    texts = ["find a hotel with good wifi",
+    texts = [
+        "find a hotel with good wifi",
        "find me the cheapest gym near work",
-             "show me the best hotel in berlin"]
+        "show me the best hotel in berlin",
+    ]
    docs = nlp.pipe(texts)
    for doc in docs:
        print(doc.text)
-        print([(t.text, t.dep_, t.head.text) for t in doc if t.dep_ != '-'])
+        print([(t.text, t.dep_, t.head.text) for t in doc if t.dep_ != "-"])


-if __name__ == '__main__':
+if __name__ == "__main__":
    plac.call(main)

    # Expected output:
@ -135,7 +181,8 @@ if __name__ == '__main__':
    # [
    #   ('find', 'ROOT', 'find'),
    #   ('cheapest', 'QUALITY', 'gym'),
-    #   ('gym', 'PLACE', 'find')
+    #   ('gym', 'PLACE', 'find'),
+    #   ('near', 'ATTRIBUTE', 'gym'),
    #   ('work', 'LOCATION', 'near')
    # ]
    # show me the best hotel in berlin
--- a/examples/training/train_ner.py
+++ b/examples/training/train_ner.py
@ -15,67 +15,71 @@ import plac
 import random
 from pathlib import Path
 import spacy
+from spacy.util import minibatch, compounding


 # training data
 TRAIN_DATA = [
-    ('Who is Shaka Khan?', {
-        'entities': [(7, 17, 'PERSON')]
-    }),
-    ('I like London and Berlin.', {
-        'entities': [(7, 13, 'LOC'), (18, 24, 'LOC')]
-    })
+    ("Who is Shaka Khan?", {"entities": [(7, 17, "PERSON")]}),
+    ("I like London and Berlin.", {"entities": [(7, 13, "LOC"), (18, 24, "LOC")]}),
 ]


@plac.annotations(
    model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
    output_dir=("Optional output directory", "option", "o", Path),
-    n_iter=("Number of training iterations", "option", "n", int))
+    n_iter=("Number of training iterations", "option", "n", int),
+)
 def main(model=None, output_dir=None, n_iter=100):
    """Load the model, set up the pipeline and train the entity recognizer."""
    if model is not None:
        nlp = spacy.load(model)  # load existing spaCy model
        print("Loaded model '%s'" % model)
    else:
-        nlp = spacy.blank('en')  # create blank Language class
+        nlp = spacy.blank("en")  # create blank Language class
        print("Created blank 'en' model")

    # create the built-in pipeline components and add them to the pipeline
    # nlp.create_pipe works for built-ins that are registered with spaCy
-    if 'ner' not in nlp.pipe_names:
-        ner = nlp.create_pipe('ner')
+    if "ner" not in nlp.pipe_names:
+        ner = nlp.create_pipe("ner")
        nlp.add_pipe(ner, last=True)
    # otherwise, get it so we can add labels
    else:
-        ner = nlp.get_pipe('ner')
+        ner = nlp.get_pipe("ner")

    # add labels
    for _, annotations in TRAIN_DATA:
-        for ent in annotations.get('entities'):
+        for ent in annotations.get("entities"):
            ner.add_label(ent[2])

    # get names of other pipes to disable them during training
-    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
+    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
    with nlp.disable_pipes(*other_pipes):  # only train NER
-        optimizer = nlp.begin_training()
+        # reset and initialize the weights randomly – but only if we're
+        # training a new model
+        if model is None:
+            nlp.begin_training()
        for itn in range(n_iter):
            random.shuffle(TRAIN_DATA)
            losses = {}
-            for text, annotations in TRAIN_DATA:
+            # batch up the examples using spaCy's minibatch
+            batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
+            for batch in batches:
+                texts, annotations = zip(*batch)
                nlp.update(
-                    [text],  # batch of texts
-                    [annotations],  # batch of annotations
+                    texts,  # batch of texts
+                    annotations,  # batch of annotations
                    drop=0.5,  # dropout - make it harder to memorise data
-                    sgd=optimizer,  # callable to update weights
-                    losses=losses)
-            print(losses)
+                    losses=losses,
+                )
+            print("Losses", losses)

    # test the trained model
    for text, _ in TRAIN_DATA:
        doc = nlp(text)
-        print('Entities', [(ent.text, ent.label_) for ent in doc.ents])
-        print('Tokens', [(t.text, t.ent_type_, t.ent_iob) for t in doc])
+        print("Entities", [(ent.text, ent.label_) for ent in doc.ents])
+        print("Tokens", [(t.text, t.ent_type_, t.ent_iob) for t in doc])

    # save model to output directory
    if output_dir is not None:
@ -90,11 +94,11 @@ def main(model=None, output_dir=None, n_iter=100):
        nlp2 = spacy.load(output_dir)
        for text, _ in TRAIN_DATA:
            doc = nlp2(text)
-            print('Entities', [(ent.text, ent.label_) for ent in doc.ents])
-            print('Tokens', [(t.text, t.ent_type_, t.ent_iob) for t in doc])
+            print("Entities", [(ent.text, ent.label_) for ent in doc.ents])
+            print("Tokens", [(t.text, t.ent_type_, t.ent_iob) for t in doc])


-if __name__ == '__main__':
+if __name__ == "__main__":
    plac.call(main)

    # Expected output:
--- a/examples/training/train_new_entity_type.py
+++ b/examples/training/train_new_entity_type.py
@ -31,10 +31,11 @@ import plac
 import random
 from pathlib import Path
 import spacy
+from spacy.util import minibatch, compounding


 # new entity label
-LABEL = 'ANIMAL'
+LABEL = "ANIMAL"

 # training data
 # Note: If you're using an existing model, make sure to mix in examples of
@ -42,29 +43,21 @@ LABEL = 'ANIMAL'
 # model might learn the new type, but "forget" what it previously knew.
 # https://explosion.ai/blog/pseudo-rehearsal-catastrophic-forgetting
 TRAIN_DATA = [
-    ("Horses are too tall and they pretend to care about your feelings", {
-        'entities': [(0, 6, 'ANIMAL')]
-    }),
-
-    ("Do they bite?", {
-        'entities': []
-    }),
-
-    ("horses are too tall and they pretend to care about your feelings", {
-        'entities': [(0, 6, 'ANIMAL')]
-    }),
-
-    ("horses pretend to care about your feelings", {
-        'entities': [(0, 6, 'ANIMAL')]
-    }),
-
-    ("they pretend to care about your feelings, those horses", {
-        'entities': [(48, 54, 'ANIMAL')]
-    }),
-
-    ("horses?", {
-        'entities': [(0, 6, 'ANIMAL')]
-    })
+    (
+        "Horses are too tall and they pretend to care about your feelings",
+        {"entities": [(0, 6, LABEL)]},
+    ),
+    ("Do they bite?", {"entities": []}),
+    (
+        "horses are too tall and they pretend to care about your feelings",
+        {"entities": [(0, 6, LABEL)]},
+    ),
+    ("horses pretend to care about your feelings", {"entities": [(0, 6, LABEL)]}),
+    (
+        "they pretend to care about your feelings, those horses",
+        {"entities": [(48, 54, LABEL)]},
+    ),
+    ("horses?", {"entities": [(0, 6, LABEL)]}),
 ]


@ -72,45 +65,50 @@ TRAIN_DATA = [
    model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
    new_model_name=("New model name for model meta.", "option", "nm", str),
    output_dir=("Optional output directory", "option", "o", Path),
-    n_iter=("Number of training iterations", "option", "n", int))
-def main(model=None, new_model_name='animal', output_dir=None, n_iter=20):
+    n_iter=("Number of training iterations", "option", "n", int),
+)
+def main(model=None, new_model_name="animal", output_dir=None, n_iter=30):
    """Set up the pipeline and entity recognizer, and train the new entity."""
+    random.seed(0)
    if model is not None:
        nlp = spacy.load(model)  # load existing spaCy model
        print("Loaded model '%s'" % model)
    else:
-        nlp = spacy.blank('en')  # create blank Language class
+        nlp = spacy.blank("en")  # create blank Language class
        print("Created blank 'en' model")
    # Add entity recognizer to model if it's not in the pipeline
    # nlp.create_pipe works for built-ins that are registered with spaCy
-    if 'ner' not in nlp.pipe_names:
-        ner = nlp.create_pipe('ner')
+    if "ner" not in nlp.pipe_names:
+        ner = nlp.create_pipe("ner")
        nlp.add_pipe(ner)
    # otherwise, get it, so we can add labels to it
    else:
-        ner = nlp.get_pipe('ner')
+        ner = nlp.get_pipe("ner")

    ner.add_label(LABEL)  # add new entity label to entity recognizer
+    # Adding extraneous labels shouldn't mess anything up
+    ner.add_label('VEGETABLE')
    if model is None:
        optimizer = nlp.begin_training()
    else:
-        # Note that 'begin_training' initializes the models, so it'll zero out
-        # existing entity types.
-        optimizer = nlp.entity.create_optimizer()
-
+        optimizer = nlp.resume_training()
+    move_names = list(ner.move_names)
    # get names of other pipes to disable them during training
-    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
+    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
    with nlp.disable_pipes(*other_pipes):  # only train NER
+        sizes = compounding(1.0, 4.0, 1.001)
+        # batch up the examples using spaCy's minibatch
        for itn in range(n_iter):
            random.shuffle(TRAIN_DATA)
+            batches = minibatch(TRAIN_DATA, size=sizes)
            losses = {}
-            for text, annotations in TRAIN_DATA:
-                nlp.update([text], [annotations], sgd=optimizer, drop=0.35,
-                           losses=losses)
-            print(losses)
+            for batch in batches:
+                texts, annotations = zip(*batch)
+                nlp.update(texts, annotations, sgd=optimizer, drop=0.35, losses=losses)
+            print("Losses", losses)

    # test the trained model
-    test_text = 'Do you like horses?'
+    test_text = "Do you like horses?"
    doc = nlp(test_text)
    print("Entities in '%s'" % test_text)
    for ent in doc.ents:
@ -121,17 +119,19 @@ def main(model=None, new_model_name='animal', output_dir=None, n_iter=20):
        output_dir = Path(output_dir)
        if not output_dir.exists():
            output_dir.mkdir()
-        nlp.meta['name'] = new_model_name  # rename model
+        nlp.meta["name"] = new_model_name  # rename model
        nlp.to_disk(output_dir)
        print("Saved model to", output_dir)

        # test the saved model
        print("Loading from", output_dir)
        nlp2 = spacy.load(output_dir)
+        # Check the classes have loaded back consistently
+        assert nlp2.get_pipe('ner').move_names == move_names
        doc2 = nlp2(test_text)
        for ent in doc2.ents:
            print(ent.label_, ent.text)


-if __name__ == '__main__':
+if __name__ == "__main__":
    plac.call(main)
--- a/examples/training/train_parser.py
+++ b/examples/training/train_parser.py
@ -13,63 +13,74 @@ import plac
 import random
 from pathlib import Path
 import spacy
+from spacy.util import minibatch, compounding


 # training data
 TRAIN_DATA = [
-    ("They trade mortgage-backed securities.", {
-        'heads': [1, 1, 4, 4, 5, 1, 1],
-        'deps': ['nsubj', 'ROOT', 'compound', 'punct', 'nmod', 'dobj', 'punct']
-    }),
-    ("I like London and Berlin.", {
-        'heads': [1, 1, 1, 2, 2, 1],
-        'deps': ['nsubj', 'ROOT', 'dobj', 'cc', 'conj', 'punct']
-    })
+    (
+        "They trade mortgage-backed securities.",
+        {
+            "heads": [1, 1, 4, 4, 5, 1, 1],
+            "deps": ["nsubj", "ROOT", "compound", "punct", "nmod", "dobj", "punct"],
+        },
+    ),
+    (
+        "I like London and Berlin.",
+        {
+            "heads": [1, 1, 1, 2, 2, 1],
+            "deps": ["nsubj", "ROOT", "dobj", "cc", "conj", "punct"],
+        },
+    ),
 ]


@plac.annotations(
    model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
    output_dir=("Optional output directory", "option", "o", Path),
-    n_iter=("Number of training iterations", "option", "n", int))
+    n_iter=("Number of training iterations", "option", "n", int),
+)
 def main(model=None, output_dir=None, n_iter=10):
    """Load the model, set up the pipeline and train the parser."""
    if model is not None:
        nlp = spacy.load(model)  # load existing spaCy model
        print("Loaded model '%s'" % model)
    else:
-        nlp = spacy.blank('en')  # create blank Language class
+        nlp = spacy.blank("en")  # create blank Language class
        print("Created blank 'en' model")

    # add the parser to the pipeline if it doesn't exist
    # nlp.create_pipe works for built-ins that are registered with spaCy
-    if 'parser' not in nlp.pipe_names:
-        parser = nlp.create_pipe('parser')
+    if "parser" not in nlp.pipe_names:
+        parser = nlp.create_pipe("parser")
        nlp.add_pipe(parser, first=True)
    # otherwise, get it, so we can add labels to it
    else:
-        parser = nlp.get_pipe('parser')
+        parser = nlp.get_pipe("parser")

    # add labels to the parser
    for _, annotations in TRAIN_DATA:
-        for dep in annotations.get('deps', []):
+        for dep in annotations.get("deps", []):
            parser.add_label(dep)

    # get names of other pipes to disable them during training
-    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'parser']
+    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "parser"]
    with nlp.disable_pipes(*other_pipes):  # only train parser
        optimizer = nlp.begin_training()
        for itn in range(n_iter):
            random.shuffle(TRAIN_DATA)
            losses = {}
-            for text, annotations in TRAIN_DATA:
-                nlp.update([text], [annotations], sgd=optimizer, losses=losses)
-            print(losses)
+            # batch up the examples using spaCy's minibatch
+            batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
+            for batch in batches:
+                texts, annotations = zip(*batch)
+                nlp.update(texts, annotations, sgd=optimizer, losses=losses)
+            print("Losses", losses)

    # test the trained model
    test_text = "I like securities."
    doc = nlp(test_text)
-    print('Dependencies', [(t.text, t.dep_, t.head.text) for t in doc])
+    print("Dependencies", [(t.text, t.dep_, t.head.text) for t in doc])

    # save model to output directory
    if output_dir is not None:
@ -83,10 +94,10 @@ def main(model=None, output_dir=None, n_iter=10):
        print("Loading from", output_dir)
        nlp2 = spacy.load(output_dir)
        doc = nlp2(test_text)
-        print('Dependencies', [(t.text, t.dep_, t.head.text) for t in doc])
+        print("Dependencies", [(t.text, t.dep_, t.head.text) for t in doc])


-if __name__ == '__main__':
+if __name__ == "__main__":
    plac.call(main)

    # expected result:
--- a/examples/training/train_tagger.py
+++ b/examples/training/train_tagger.py
@ -16,6 +16,7 @@ import plac
 import random
 from pathlib import Path
 import spacy
+from spacy.util import minibatch, compounding


 # You need to define a mapping from your data's part-of-speech tag names to the
@ -24,28 +25,25 @@ import spacy
 # http://universaldependencies.github.io/docs/u/pos/index.html
 # You may also specify morphological features for your tags, from the universal
 # scheme.
-TAG_MAP = {
-    'N': {'pos': 'NOUN'},
-    'V': {'pos': 'VERB'},
-    'J': {'pos': 'ADJ'}
-}
+TAG_MAP = {"N": {"pos": "NOUN"}, "V": {"pos": "VERB"}, "J": {"pos": "ADJ"}}

 # Usually you'll read this in, of course. Data formats vary. Ensure your
 # strings are unicode and that the number of tags assigned matches spaCy's
 # tokenization. If not, you can always add a 'words' key to the annotations
 # that specifies the gold-standard tokenization, e.g.:
-# ("Eatblueham", {'words': ['Eat', 'blue', 'ham'] 'tags': ['V', 'J', 'N']})
+# ("Eatblueham", {'words': ['Eat', 'blue', 'ham'], 'tags': ['V', 'J', 'N']})
 TRAIN_DATA = [
-    ("I like green eggs", {'tags': ['N', 'V', 'J', 'N']}),
-    ("Eat blue ham", {'tags': ['V', 'J', 'N']})
+    ("I like green eggs", {"tags": ["N", "V", "J", "N"]}),
+    ("Eat blue ham", {"tags": ["V", "J", "N"]}),
 ]


@plac.annotations(
    lang=("ISO Code of language to use", "option", "l", str),
    output_dir=("Optional output directory", "option", "o", Path),
-    n_iter=("Number of training iterations", "option", "n", int))
-def main(lang='en', output_dir=None, n_iter=25):
+    n_iter=("Number of training iterations", "option", "n", int),
+)
+def main(lang="en", output_dir=None, n_iter=25):
    """Create a new model, set up the pipeline and train the tagger. In order to
    train the tagger with a custom tag map, we're creating a new Language
    instance with a custom vocab.
@ -53,7 +51,7 @@ def main(lang='en', output_dir=None, n_iter=25):
    nlp = spacy.blank(lang)
    # add the tagger to the pipeline
    # nlp.create_pipe works for built-ins that are registered with spaCy
-    tagger = nlp.create_pipe('tagger')
+    tagger = nlp.create_pipe("tagger")
    # Add the tags. This needs to be done before you start training.
    for tag, values in TAG_MAP.items():
        tagger.add_label(tag, values)
@ -63,14 +61,17 @@ def main(lang='en', output_dir=None, n_iter=25):
    for i in range(n_iter):
        random.shuffle(TRAIN_DATA)
        losses = {}
-        for text, annotations in TRAIN_DATA:
-            nlp.update([text], [annotations], sgd=optimizer, losses=losses)
-        print(losses)
+        # batch up the examples using spaCy's minibatch
+        batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
+        for batch in batches:
+            texts, annotations = zip(*batch)
+            nlp.update(texts, annotations, sgd=optimizer, losses=losses)
+        print("Losses", losses)

    # test the trained model
    test_text = "I like blue eggs"
    doc = nlp(test_text)
-    print('Tags', [(t.text, t.tag_, t.pos_) for t in doc])
+    print("Tags", [(t.text, t.tag_, t.pos_) for t in doc])

    # save model to output directory
    if output_dir is not None:
@ -84,10 +85,10 @@ def main(lang='en', output_dir=None, n_iter=25):
        print("Loading from", output_dir)
        nlp2 = spacy.load(output_dir)
        doc = nlp2(test_text)
-        print('Tags', [(t.text, t.tag_, t.pos_) for t in doc])
+        print("Tags", [(t.text, t.tag_, t.pos_) for t in doc])


-if __name__ == '__main__':
+if __name__ == "__main__":
    plac.call(main)

    # Expected output:
--- a/examples/training/train_textcat.py
+++ b/examples/training/train_textcat.py
@ -23,55 +23,70 @@ from spacy.util import minibatch, compounding
    model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
    output_dir=("Optional output directory", "option", "o", Path),
    n_texts=("Number of texts to train from", "option", "t", int),
-    n_iter=("Number of training iterations", "option", "n", int))
+    n_iter=("Number of training iterations", "option", "n", int),
+)
 def main(model=None, output_dir=None, n_iter=20, n_texts=2000):
+    if output_dir is not None:
+        output_dir = Path(output_dir)
+        if not output_dir.exists():
+            output_dir.mkdir()
+
    if model is not None:
        nlp = spacy.load(model)  # load existing spaCy model
        print("Loaded model '%s'" % model)
    else:
-        nlp = spacy.blank('en')  # create blank Language class
+        nlp = spacy.blank("en")  # create blank Language class
        print("Created blank 'en' model")

    # add the text classifier to the pipeline if it doesn't exist
    # nlp.create_pipe works for built-ins that are registered with spaCy
-    if 'textcat' not in nlp.pipe_names:
-        textcat = nlp.create_pipe('textcat')
+    if "textcat" not in nlp.pipe_names:
+        textcat = nlp.create_pipe("textcat", config={
+            "architecture": "simple_cnn",
+            "exclusive_classes": True})
        nlp.add_pipe(textcat, last=True)
    # otherwise, get it, so we can add labels to it
    else:
-        textcat = nlp.get_pipe('textcat')
+        textcat = nlp.get_pipe("textcat")

    # add label to text classifier
-    textcat.add_label('POSITIVE')
+    textcat.add_label("POSITIVE")
+    textcat.add_label("NEGATIVE")

    # load the IMDB dataset
    print("Loading IMDB data...")
    (train_texts, train_cats), (dev_texts, dev_cats) = load_data(limit=n_texts)
-    print("Using {} examples ({} training, {} evaluation)"
-          .format(n_texts, len(train_texts), len(dev_texts)))
-    train_data = list(zip(train_texts,
-                          [{'cats': cats} for cats in train_cats]))
+    print(
+        "Using {} examples ({} training, {} evaluation)".format(
+            n_texts, len(train_texts), len(dev_texts)
+        )
+    )
+    train_data = list(zip(train_texts, [{"cats": cats} for cats in train_cats]))

    # get names of other pipes to disable them during training
-    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'textcat']
+    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "textcat"]
    with nlp.disable_pipes(*other_pipes):  # only train textcat
        optimizer = nlp.begin_training()
        print("Training the model...")
-        print('{:^5}\t{:^5}\t{:^5}\t{:^5}'.format('LOSS', 'P', 'R', 'F'))
+        print("{:^5}\t{:^5}\t{:^5}\t{:^5}".format("LOSS", "P", "R", "F"))
        for i in range(n_iter):
            losses = {}
            # batch up the examples using spaCy's minibatch
-            batches = minibatch(train_data, size=compounding(4., 32., 1.001))
+            batches = minibatch(train_data, size=compounding(4.0, 32.0, 1.001))
            for batch in batches:
                texts, annotations = zip(*batch)
-                nlp.update(texts, annotations, sgd=optimizer, drop=0.2,
-                           losses=losses)
+                nlp.update(texts, annotations, sgd=optimizer, drop=0.2, losses=losses)
            with textcat.model.use_params(optimizer.averages):
                # evaluate on the dev data split off in load_data()
                scores = evaluate(nlp.tokenizer, textcat, dev_texts, dev_cats)
-            print('{0:.3f}\t{1:.3f}\t{2:.3f}\t{3:.3f}'  # print a simple table
-                  .format(losses['textcat'], scores['textcat_p'],
-                          scores['textcat_r'], scores['textcat_f']))
+            print(
+                "{0:.3f}\t{1:.3f}\t{2:.3f}\t{3:.3f}".format(  # print a simple table
+                    losses["textcat"],
+                    scores["textcat_p"],
+                    scores["textcat_r"],
+                    scores["textcat_f"],
+                )
+            )

    # test the trained model
    test_text = "This movie sucked"
@ -79,9 +94,7 @@ def main(model=None, output_dir=None, n_iter=20, n_texts=2000):
    print(test_text, doc.cats)

    if output_dir is not None:
-        output_dir = Path(output_dir)
-        if not output_dir.exists():
-            output_dir.mkdir()
+        with nlp.use_params(optimizer.averages):
            nlp.to_disk(output_dir)
        print("Saved model to", output_dir)

@ -99,35 +112,40 @@ def load_data(limit=0, split=0.8):
    random.shuffle(train_data)
    train_data = train_data[-limit:]
    texts, labels = zip(*train_data)
-    cats = [{'POSITIVE': bool(y)} for y in labels]
+    cats = [{"POSITIVE": bool(y), "NEGATIVE": not bool(y)} for y in labels]
    split = int(len(train_data) * split)
    return (texts[:split], cats[:split]), (texts[split:], cats[split:])


 def evaluate(tokenizer, textcat, texts, cats):
    docs = (tokenizer(text) for text in texts)
-    tp = 1e-8  # True positives
+    tp = 0.0  # True positives
    fp = 1e-8  # False positives
    fn = 1e-8  # False negatives
-    tn = 1e-8  # True negatives
+    tn = 0.0  # True negatives
    for i, doc in enumerate(textcat.pipe(docs)):
        gold = cats[i]
        for label, score in doc.cats.items():
            if label not in gold:
                continue
+            if label == "NEGATIVE":
+                continue
            if score >= 0.5 and gold[label] >= 0.5:
-                tp += 1.
+                tp += 1.0
            elif score >= 0.5 and gold[label] < 0.5:
-                fp += 1.
+                fp += 1.0
            elif score < 0.5 and gold[label] < 0.5:
                tn += 1
            elif score < 0.5 and gold[label] >= 0.5:
                fn += 1
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
+    if (precision+recall) == 0:
+        f_score = 0.0
+    else:
        f_score = 2 * (precision * recall) / (precision + recall)
-    return {'textcat_p': precision, 'textcat_r': recall, 'textcat_f': f_score}
+    return {"textcat_p": precision, "textcat_r": recall, "textcat_f": f_score}


-if __name__ == '__main__':
+if __name__ == "__main__":
    plac.call(main)
--- a/examples/training/training-data.json
+++ b/examples/training/training-data.json
@ -1,6 +1,6 @@
 [
    {
-      "id": "wsj_0200",
+      "id": 42,
      "paragraphs": [
        {
          "raw": "In an Oct. 19 review of \"The Misanthrope\" at Chicago's Goodman Theatre (\"Revitalized Classics Take the Stage in Windy City,\" Leisure & Arts), the role of Celimene, played by Kim Cattrall, was mistakenly attributed to Christina Haag. Ms. Haag plays Elianti.",
--- a/examples/vectors_fast_text.py
+++ b/examples/vectors_fast_text.py
@ -14,8 +14,13 @@ from spacy.language import Language

@plac.annotations(
    vectors_loc=("Path to .vec file", "positional", None, str),
-    lang=("Optional language ID. If not set, blank Language() will be used.",
-          "positional", None, str))
+    lang=(
+        "Optional language ID. If not set, blank Language() will be used.",
+        "positional",
+        None,
+        str,
+    ),
+)
 def main(vectors_loc, lang=None):
    if lang is None:
        nlp = Language()
@ -24,21 +29,21 @@ def main(vectors_loc, lang=None):
        # save the model to disk and load it back later (models always need a
        # "lang" setting). Use 'xx' for blank multi-language class.
        nlp = spacy.blank(lang)
-    with open(vectors_loc, 'rb') as file_:
+    with open(vectors_loc, "rb") as file_:
        header = file_.readline()
        nr_row, nr_dim = header.split()
        nlp.vocab.reset_vectors(width=int(nr_dim))
        for line in file_:
-            line = line.rstrip().decode('utf8')
-            pieces = line.rsplit(' ', int(nr_dim))
+            line = line.rstrip().decode("utf8")
+            pieces = line.rsplit(" ", int(nr_dim))
            word = pieces[0]
-            vector = numpy.asarray([float(v) for v in pieces[1:]], dtype='f')
+            vector = numpy.asarray([float(v) for v in pieces[1:]], dtype="f")
            nlp.vocab.set_vector(word, vector)  # add the vectors to the vocab
    # test the vectors and similarity
-    text = 'class colspan'
+    text = "class colspan"
    doc = nlp(text)
    print(text, doc[0].similarity(doc[1]))


-if __name__ == '__main__':
+if __name__ == "__main__":
    plac.call(main)
--- a/examples/vectors_tensorboard.py
+++ b/examples/vectors_tensorboard.py
@ -14,26 +14,45 @@ import plac
 import spacy
 import tensorflow as tf
 import tqdm
-from tensorflow.contrib.tensorboard.plugins.projector import visualize_embeddings, ProjectorConfig
+from tensorflow.contrib.tensorboard.plugins.projector import (
+    visualize_embeddings,
+    ProjectorConfig,
+)


@plac.annotations(
    vectors_loc=("Path to spaCy model that contains vectors", "positional", None, str),
-    out_loc=("Path to output folder for tensorboard session data", "positional", None, str),
-    name=("Human readable name for tsv file and vectors tensor", "positional", None, str),
+    out_loc=(
+        "Path to output folder for tensorboard session data",
+        "positional",
+        None,
+        str,
+    ),
+    name=(
+        "Human readable name for tsv file and vectors tensor",
+        "positional",
+        None,
+        str,
+    ),
 )
 def main(vectors_loc, out_loc, name="spaCy_vectors"):
    meta_file = "{}.tsv".format(name)
    out_meta_file = path.join(out_loc, meta_file)

-    print('Loading spaCy vectors model: {}'.format(vectors_loc))
+    print("Loading spaCy vectors model: {}".format(vectors_loc))
    model = spacy.load(vectors_loc)
-    print('Finding lexemes with vectors attached: {}'.format(vectors_loc))
-    strings_stream = tqdm.tqdm(model.vocab.strings, total=len(model.vocab.strings), leave=False)
+    print("Finding lexemes with vectors attached: {}".format(vectors_loc))
+    strings_stream = tqdm.tqdm(
+        model.vocab.strings, total=len(model.vocab.strings), leave=False
+    )
    queries = [w for w in strings_stream if model.vocab.has_vector(w)]
    vector_count = len(queries)

-    print('Building Tensorboard Projector metadata for ({}) vectors: {}'.format(vector_count, out_meta_file))
+    print(
+        "Building Tensorboard Projector metadata for ({}) vectors: {}".format(
+            vector_count, out_meta_file
+        )
+    )

    # Store vector data in a tensorflow variable
    tf_vectors_variable = numpy.zeros((vector_count, model.vocab.vectors.shape[1]))
@ -41,22 +60,26 @@ def main(vectors_loc, out_loc, name="spaCy_vectors"):
    # Write a tab-separated file that contains information about the vectors for visualization
    #
    # Reference: https://www.tensorflow.org/programmers_guide/embedding#metadata
-    with open(out_meta_file, 'wb') as file_metadata:
+    with open(out_meta_file, "wb") as file_metadata:
        # Define columns in the first row
-        file_metadata.write("Text\tFrequency\n".encode('utf-8'))
+        file_metadata.write("Text\tFrequency\n".encode("utf-8"))
        # Write out a row for each vector that we add to the tensorflow variable we created
        vec_index = 0
        for text in tqdm.tqdm(queries, total=len(queries), leave=False):
            # https://github.com/tensorflow/tensorflow/issues/9094
-            text = '<Space>' if text.lstrip() == '' else text
+            text = "<Space>" if text.lstrip() == "" else text
            lex = model.vocab[text]

            # Store vector data and metadata
            tf_vectors_variable[vec_index] = model.vocab.get_vector(text)
-            file_metadata.write("{}\t{}\n".format(text, math.exp(lex.prob) * vector_count).encode('utf-8'))
+            file_metadata.write(
+                "{}\t{}\n".format(text, math.exp(lex.prob) * vector_count).encode(
+                    "utf-8"
+                )
+            )
            vec_index += 1

-    print('Running Tensorflow Session...')
+    print("Running Tensorflow Session...")
    sess = tf.InteractiveSession()
    tf.Variable(tf_vectors_variable, trainable=False, name=name)
    tf.global_variables_initializer().run()
@ -73,10 +96,10 @@ def main(vectors_loc, out_loc, name="spaCy_vectors"):
    visualize_embeddings(writer, config)

    # Save session and print run command to the output
-    print('Saving Tensorboard Session...')
-    saver.save(sess, path.join(out_loc, '{}.ckpt'.format(name)))
-    print('Done. Run `tensorboard --logdir={0}` to view in Tensorboard'.format(out_loc))
+    print("Saving Tensorboard Session...")
+    saver.save(sess, path.join(out_loc, "{}.ckpt".format(name)))
+    print("Done. Run `tensorboard --logdir={0}` to view in Tensorboard".format(out_loc))


-if __name__ == '__main__':
+if __name__ == "__main__":
    plac.call(main)
--- a/fabfile.py
+++ b/fabfile.py
@ -59,6 +59,7 @@ def make():
 def sdist():
    with virtualenv(VENV_DIR) as venv_local:
        with lcd(path.dirname(__file__)):
+            local('python -m pip install -U setuptools')
            local('python setup.py sdist')

 def wheel():
--- a/Show More
+++ b/Show More