mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Update morphology branch from develop
This commit is contained in:
commit
3993f41cc4
|
@ -1,50 +1,21 @@
|
||||||
environment:
|
environment:
|
||||||
|
|
||||||
matrix:
|
matrix:
|
||||||
|
- PYTHON: "C:\\Python35-x64"
|
||||||
# For Python versions available on Appveyor, see
|
|
||||||
# http://www.appveyor.com/docs/installed-software#python
|
|
||||||
|
|
||||||
- PYTHON: "C:\\Python27-x64"
|
|
||||||
#- PYTHON: "C:\\Python34"
|
|
||||||
#- PYTHON: "C:\\Python35"
|
|
||||||
#- DISTUTILS_USE_SDK: "1"
|
|
||||||
#- PYTHON: "C:\\Python34-x64"
|
|
||||||
#- DISTUTILS_USE_SDK: "1"
|
|
||||||
#- PYTHON: "C:\\Python35-x64"
|
|
||||||
- PYTHON: "C:\\Python36-x64"
|
- PYTHON: "C:\\Python36-x64"
|
||||||
|
- PYTHON: "C:\\Python37-x64"
|
||||||
install:
|
install:
|
||||||
# We need wheel installed to build wheels
|
# We need wheel installed to build wheels
|
||||||
- "%PYTHON%\\python.exe -m pip install wheel"
|
- "%PYTHON%\\python.exe -m pip install wheel"
|
||||||
- "%PYTHON%\\python.exe -m pip install cython"
|
- "%PYTHON%\\python.exe -m pip install cython"
|
||||||
- "%PYTHON%\\python.exe -m pip install -r requirements.txt"
|
- "%PYTHON%\\python.exe -m pip install -r requirements.txt"
|
||||||
- "%PYTHON%\\python.exe -m pip install -e ."
|
- "%PYTHON%\\python.exe -m pip install -e ."
|
||||||
|
|
||||||
build: off
|
build: off
|
||||||
|
|
||||||
test_script:
|
test_script:
|
||||||
# Put your test command here.
|
|
||||||
# If you don't need to build C extensions on 64-bit Python 3.4,
|
|
||||||
# you can remove "build.cmd" from the front of the command, as it's
|
|
||||||
# only needed to support those cases.
|
|
||||||
# Note that you must use the environment variable %PYTHON% to refer to
|
|
||||||
# the interpreter you're using - Appveyor does not do anything special
|
|
||||||
# to put the Python version you want to use on PATH.
|
|
||||||
- "%PYTHON%\\python.exe -m pytest spacy/ --no-print-logs"
|
- "%PYTHON%\\python.exe -m pytest spacy/ --no-print-logs"
|
||||||
|
|
||||||
after_test:
|
after_test:
|
||||||
# This step builds your wheels.
|
|
||||||
# Again, you only need build.cmd if you're building C extensions for
|
|
||||||
# 64-bit Python 3.4. And you need to use %PYTHON% to get the correct
|
|
||||||
# interpreter
|
|
||||||
- "%PYTHON%\\python.exe setup.py bdist_wheel"
|
- "%PYTHON%\\python.exe setup.py bdist_wheel"
|
||||||
|
|
||||||
artifacts:
|
artifacts:
|
||||||
# bdist_wheel puts your built wheel in the dist directory
|
|
||||||
- path: dist\*
|
- path: dist\*
|
||||||
|
branches:
|
||||||
#on_success:
|
except:
|
||||||
# You can use this step to upload your artifacts to a public website.
|
- spacy.io
|
||||||
# See Appveyor's documentation for more details. Or you can simply
|
|
||||||
# access your wheels from the Appveyor "artifacts" tab for your build.
|
|
||||||
|
|
14
.flake8
Normal file
14
.flake8
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
[flake8]
|
||||||
|
ignore = E203, E266, E501, E731, W503
|
||||||
|
max-line-length = 80
|
||||||
|
select = B,C,E,F,W,T4,B9
|
||||||
|
exclude =
|
||||||
|
.env,
|
||||||
|
.git,
|
||||||
|
__pycache__,
|
||||||
|
lemmatizer.py,
|
||||||
|
lookup.py,
|
||||||
|
_tokenizer_exceptions_list.py,
|
||||||
|
spacy/lang/fr/lemmatizer,
|
||||||
|
spacy/lang/nb/lemmatizer
|
||||||
|
spacy/__init__.py
|
2
.github/CONTRIBUTOR_AGREEMENT.md
vendored
2
.github/CONTRIBUTOR_AGREEMENT.md
vendored
|
@ -5,7 +5,7 @@ This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
The SCA applies to any contribution that you make to any product or project
|
The SCA applies to any contribution that you make to any product or project
|
||||||
managed by us (the **"project"**), and sets out the intellectual property rights
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
you grant to us in the contributed materials. The term **"us"** shall mean
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
[ExplosionAI UG GmbH](https://explosion.ai/legal). The term
|
||||||
**"you"** shall mean the person or entity identified below.
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
If you agree to be bound by these terms, fill in the information requested
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
|
2
.github/ISSUE_TEMPLATE.md
vendored
2
.github/ISSUE_TEMPLATE.md
vendored
|
@ -1,7 +1,7 @@
|
||||||
<!--- Please provide a summary in the title and describe your issue here.
|
<!--- Please provide a summary in the title and describe your issue here.
|
||||||
Is this a bug or feature request? If a bug, include all the steps that led to the issue.
|
Is this a bug or feature request? If a bug, include all the steps that led to the issue.
|
||||||
|
|
||||||
If you're looking for help with your code, consider posting a question on StackOverflow instead:
|
If you're looking for help with your code, consider posting a question on Stack Overflow instead:
|
||||||
http://stackoverflow.com/questions/tagged/spacy -->
|
http://stackoverflow.com/questions/tagged/spacy -->
|
||||||
|
|
||||||
|
|
||||||
|
|
4
.github/ISSUE_TEMPLATE/05_other.md
vendored
4
.github/ISSUE_TEMPLATE/05_other.md
vendored
|
@ -1,11 +1,11 @@
|
||||||
---
|
---
|
||||||
name: "\U0001F4AC Anything else?"
|
name: "\U0001F4AC Anything else?"
|
||||||
about: For general usage questions or help with your code, please consider
|
about: For general usage questions or help with your code, please consider
|
||||||
posting on StackOverflow instead.
|
posting on Stack Overflow instead.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
<!-- Describe your issue here. Please keep in mind that the GitHub issue tracker is mostly intended for reports related to the spaCy code base and source, and for bugs and feature requests. If you're looking for help with your code, consider posting a question on StackOverflow instead: http://stackoverflow.com/questions/tagged/spacy -->
|
<!-- Describe your issue here. Please keep in mind that the GitHub issue tracker is mostly intended for reports related to the spaCy code base and source, and for bugs and feature requests. If you're looking for help with your code, consider posting a question on Stack Overflow instead: http://stackoverflow.com/questions/tagged/spacy -->
|
||||||
|
|
||||||
## Your Environment
|
## Your Environment
|
||||||
<!-- Include details of your environment. If you're using spaCy 1.7+, you can also type `python -m spacy info --markdown` and copy-paste the result here.-->
|
<!-- Include details of your environment. If you're using spaCy 1.7+, you can also type `python -m spacy info --markdown` and copy-paste the result here.-->
|
||||||
|
|
106
.github/contributors/ALSchwalm.md
vendored
Normal file
106
.github/contributors/ALSchwalm.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | ------------------------ |
|
||||||
|
| Name | Adam Schwalm |
|
||||||
|
| Company name (if applicable) | Star Lab |
|
||||||
|
| Title or role (if applicable) | Software Engineer |
|
||||||
|
| Date | 2018-11-28 |
|
||||||
|
| GitHub username | ALSchwalm |
|
||||||
|
| Website (optional) | https://alschwalm.com |
|
106
.github/contributors/BramVanroy.md
vendored
Normal file
106
.github/contributors/BramVanroy.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | ----------------------|
|
||||||
|
| Name | Bram Vanroy |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | October 19, 2018 |
|
||||||
|
| GitHub username | BramVanroy |
|
||||||
|
| Website (optional) | https://bramvanroy.be |
|
106
.github/contributors/Brixjohn.md
vendored
Normal file
106
.github/contributors/Brixjohn.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [X] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Brixter John Lumabi |
|
||||||
|
| Company name (if applicable) | Stratpoint |
|
||||||
|
| Title or role (if applicable) | Software Developer |
|
||||||
|
| Date | 18 December 2018 |
|
||||||
|
| GitHub username | Brixjohn |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/Cinnamy.md
vendored
Normal file
106
.github/contributors/Cinnamy.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Marina Lysyuk |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 13.10.2018 |
|
||||||
|
| GitHub username | Cinnamy |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/DeNeutoy.md
vendored
Normal file
106
.github/contributors/DeNeutoy.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name |Mark Neumann |
|
||||||
|
| Company name (if applicable) |Allen Institute for AI |
|
||||||
|
| Title or role (if applicable) |Research Engineer |
|
||||||
|
| Date | 13/01/2019 |
|
||||||
|
| GitHub username |@Deneutoy |
|
||||||
|
| Website (optional) |markneumann.xyz |
|
106
.github/contributors/DoomCoder.md
vendored
Normal file
106
.github/contributors/DoomCoder.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Piotr Książek |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 22.11.2018 |
|
||||||
|
| GitHub username | DoomCoder |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/Gizzio.md
vendored
Normal file
106
.github/contributors/Gizzio.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [X] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Stanisław Giziński |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 21.11.2018 |
|
||||||
|
| GitHub username | Gizzio |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/JKhakpour.md
vendored
Normal file
106
.github/contributors/JKhakpour.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Ja'far Khakpour |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 2018-09-24 |
|
||||||
|
| GitHub username | JKhakpour |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/Loghijiaha.md
vendored
Normal file
106
.github/contributors/Loghijiaha.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [ x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ x] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Loghi Perinpanayagam |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | Student |
|
||||||
|
| Date | 13 Jan, 2019 |
|
||||||
|
| GitHub username | loghijiaha |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/MateuszOlko.md
vendored
Normal file
106
.github/contributors/MateuszOlko.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Mateusz Olko |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 22.11.2018 |
|
||||||
|
| GitHub username | MateuszOlko |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/PolyglotOpenstreetmap.md
vendored
Normal file
106
.github/contributors/PolyglotOpenstreetmap.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Jo |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 2018-01-26 |
|
||||||
|
| GitHub username | PolyglotOpenstreetmap|
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/adrianeboyd.md
vendored
Normal file
106
.github/contributors/adrianeboyd.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Adriane Boyd |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 28 January 2019 |
|
||||||
|
| GitHub username | adrianeboyd |
|
||||||
|
| Website (optional) | |
|
87
.github/contributors/akki2825.md
vendored
Normal file
87
.github/contributors/akki2825.md
vendored
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Akhilesh K R |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 2019-02-12 |
|
||||||
|
| GitHub username | akki2825 |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/alvaroabascar.md
vendored
Normal file
106
.github/contributors/alvaroabascar.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Álvaro Abella |
|
||||||
|
| Company name (if applicable) | IOMED |
|
||||||
|
| Title or role (if applicable) | CSO |
|
||||||
|
| Date | 21/12/2018 |
|
||||||
|
| GitHub username | alvaroabascar |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/alvations.md
vendored
Normal file
106
.github/contributors/alvations.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Liling |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 04 Jan 2019 |
|
||||||
|
| GitHub username | alvations |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/amperinet.md
vendored
Normal file
106
.github/contributors/amperinet.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | ----------------------- |
|
||||||
|
| Name | Amandine Périnet |
|
||||||
|
| Company name (if applicable) | 365Talents |
|
||||||
|
| Title or role (if applicable) | Data Science Researcher |
|
||||||
|
| Date | 28/01/2019 |
|
||||||
|
| GitHub username | amperinet |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/aniruddha-adhikary.md
vendored
Normal file
106
.github/contributors/aniruddha-adhikary.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Aniruddha Adhikary |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 2018-09-05 |
|
||||||
|
| GitHub username | aniruddha-adhikary |
|
||||||
|
| Website (optional) | https://adhikary.net |
|
106
.github/contributors/aongko.md
vendored
Normal file
106
.github/contributors/aongko.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Andrew Ongko |
|
||||||
|
| Company name (if applicable) | Kurio |
|
||||||
|
| Title or role (if applicable) | Senior Data Science |
|
||||||
|
| Date | Sep 10, 2018 |
|
||||||
|
| GitHub username | aongko |
|
||||||
|
| Website (optional) | |
|
54
.github/contributors/aryaprabhudesai.md
vendored
Normal file
54
.github/contributors/aryaprabhudesai.md
vendored
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
spaCy contributor agreement
|
||||||
|
This spaCy Contributor Agreement ("SCA") is based on the Oracle Contributor Agreement. The SCA applies to any contribution that you make to any product or project managed by us (the "project"), and sets out the intellectual property rights you grant to us in the contributed materials. The term "us" shall mean ExplosionAI UG (haftungsbeschränkt). The term "you" shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested below and include the filled-in version with your first pull request, under the folder .github/contributors/. The name of the file should be your GitHub username, with the extension .md. For example, the user example_user would create the file .github/contributors/example_user.md.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions constitute a binding legal agreement.
|
||||||
|
|
||||||
|
Contributor Agreement
|
||||||
|
The term "contribution" or "contributed materials" means any source code, object code, patch, tool, sample, graphic, specification, manual, documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
With respect to any worldwide copyrights, or copyright applications and registrations, in your contribution:
|
||||||
|
|
||||||
|
you hereby assign to us joint ownership, and to the extent that such assignment is or becomes invalid, ineffective or unenforceable, you hereby grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, royalty-free, unrestricted license to exercise all rights under those copyrights. This includes, at our option, the right to sublicense these same rights to third parties through multiple levels of sublicensees or other licensing arrangements;
|
||||||
|
|
||||||
|
you agree that each of us can do all things in relation to your contribution as if each of us were the sole owners, and if one of us makes a derivative work of your contribution, the one who makes the derivative work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
you agree that you will not assert any moral rights in your contribution against us, our licensees or transferees;
|
||||||
|
|
||||||
|
you agree that we may register a copyright in your contribution and exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
you agree that neither of us has any duty to consult with, obtain the consent of, pay or render an accounting to the other for any use or distribution of your contribution.
|
||||||
|
|
||||||
|
With respect to any patents you own, or that you can license without payment to any third party, you hereby grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
make, have made, use, sell, offer to sell, import, and otherwise transfer your contribution in whole or in part, alone or in combination with or included in any product, work or materials arising out of the project to which your contribution was submitted, and
|
||||||
|
|
||||||
|
at our option, to sublicense these same rights to third parties through multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
Except as set out above, you keep all right, title, and interest in your contribution. The rights that you grant to us under these terms are effective on the date you first submitted a contribution to us, even if your submission took place before the date you sign these terms.
|
||||||
|
|
||||||
|
You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
Each contribution that you submit is and shall be an original work of authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
to the best of your knowledge, each contribution will not violate any third party's copyrights, trademarks, patents, or other intellectual property rights; and
|
||||||
|
|
||||||
|
each contribution shall be in compliance with U.S. export control laws and other applicable export and import laws. You agree to notify us if you become aware of any circumstance which would make any of the foregoing representations inaccurate in any respect. We may publicly disclose your participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
This SCA is governed by the laws of the State of California and applicable U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
Please place an “x” on one of the applicable statement below. Please do NOT mark both statements:
|
||||||
|
|
||||||
|
[X] I am signing on behalf of myself as an individual and no other person or entity, including my employer, has or will have rights with respect to my contributions.
|
||||||
|
|
||||||
|
I am signing on behalf of my employer or a legal entity and I have the actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
Contributor Details
|
||||||
|
Field Entry
|
||||||
|
Name Arya Prabhudesai
|
||||||
|
Company name (if applicable) -
|
||||||
|
Title or role (if applicable) -
|
||||||
|
Date 2018-08-17
|
||||||
|
GitHub username aryaprabhudesai
|
||||||
|
Website (optional) -
|
106
.github/contributors/beatesi.md
vendored
Normal file
106
.github/contributors/beatesi.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Beate Sildnes |
|
||||||
|
| Company name (if applicable) | NAV |
|
||||||
|
| Title or role (if applicable) | Data Scientist |
|
||||||
|
| Date | 04.12.2018 |
|
||||||
|
| GitHub username | beatesi |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/boena.md
vendored
Normal file
106
.github/contributors/boena.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Björn Lennartsson |
|
||||||
|
| Company name (if applicable) | Uptrail AB |
|
||||||
|
| Title or role (if applicable) | CTO |
|
||||||
|
| Date | 2019-01-15 |
|
||||||
|
| GitHub username | boena |
|
||||||
|
| Website (optional) | www.uptrail.com |
|
106
.github/contributors/charlax.md
vendored
Normal file
106
.github/contributors/charlax.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Charles-Axel Dein |
|
||||||
|
| Company name (if applicable) | Skrib |
|
||||||
|
| Title or role (if applicable) | CEO |
|
||||||
|
| Date | 27/09/2018 |
|
||||||
|
| GitHub username | charlax |
|
||||||
|
| Website (optional) | www.dein.fr |
|
106
.github/contributors/chezou.md
vendored
Normal file
106
.github/contributors/chezou.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Aki Ariga |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 07/12/2018 |
|
||||||
|
| GitHub username | chezou |
|
||||||
|
| Website (optional) | chezo.uno |
|
106
.github/contributors/cicorias.md
vendored
Normal file
106
.github/contributors/cicorias.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [X] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Shawn Cicoria |
|
||||||
|
| Company name (if applicable) | Microsoft |
|
||||||
|
| Title or role (if applicable) | Principal Software Engineer |
|
||||||
|
| Date | November 20, 2018 |
|
||||||
|
| GitHub username | cicorias |
|
||||||
|
| Website (optional) | www.cicoria.com |
|
106
.github/contributors/clippered.md
vendored
Normal file
106
.github/contributors/clippered.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Kenneth Cruz |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 2018-12-07 |
|
||||||
|
| GitHub username | clippered |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/darindf.md
vendored
Normal file
106
.github/contributors/darindf.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Darin DeForest |
|
||||||
|
| Company name (if applicable) | Ipro Tech |
|
||||||
|
| Title or role (if applicable) | Senior Software Engineer |
|
||||||
|
| Date | 2018-09-26 |
|
||||||
|
| GitHub username | darindf |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/filipecaixeta.md
vendored
Normal file
106
.github/contributors/filipecaixeta.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Filipe Caixeta |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 09.12.2018 |
|
||||||
|
| GitHub username | filipecaixeta |
|
||||||
|
| Website (optional) | filipecaixeta.com.br |
|
106
.github/contributors/foufaster.md
vendored
Normal file
106
.github/contributors/foufaster.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name |Anès Foufa |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) |NLP developer |
|
||||||
|
| Date |21/01/2019 |
|
||||||
|
| GitHub username |foufaster |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/frascuchon.md
vendored
Normal file
106
.github/contributors/frascuchon.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Francisco Aranda |
|
||||||
|
| Company name (if applicable) | recognai |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | |
|
||||||
|
| GitHub username | frascuchon |
|
||||||
|
| Website (optional) | https://recogn.ai |
|
106
.github/contributors/free-variation.md
vendored
Normal file
106
.github/contributors/free-variation.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | John Stewart |
|
||||||
|
| Company name (if applicable) | Amplify |
|
||||||
|
| Title or role (if applicable) | SVP Research |
|
||||||
|
| Date | 14/09/2018 |
|
||||||
|
| GitHub username | free-variation |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/gavrieltal.md
vendored
Normal file
106
.github/contributors/gavrieltal.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Gavriel Loria |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | Nov 29, 2018 |
|
||||||
|
| GitHub username | gavrieltal |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/grivaz.md
vendored
Normal file
106
.github/contributors/grivaz.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name |C. Grivaz |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date |08.22.2018 |
|
||||||
|
| GitHub username |grivaz |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/jacopofar.md
vendored
Normal file
106
.github/contributors/jacopofar.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [X] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Jacopo Farina |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 2018-10-12 |
|
||||||
|
| GitHub username | jacopofar |
|
||||||
|
| Website (optional) | jacopofarina.eu |
|
106
.github/contributors/jarib.md
vendored
Normal file
106
.github/contributors/jarib.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Jari Bakken |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 2018-12-21 |
|
||||||
|
| GitHub username | jarib |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/juliamakogon.md
vendored
Normal file
106
.github/contributors/juliamakogon.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Julia Makogon |
|
||||||
|
| Company name (if applicable) | Semantrum |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 07.02.2019 |
|
||||||
|
| GitHub username | juliamakogon |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/kbulygin.md
vendored
Normal file
106
.github/contributors/kbulygin.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Kirill Bulygin |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 2018-12-18 |
|
||||||
|
| GitHub username | kbulygin |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/keshan.md
vendored
Normal file
106
.github/contributors/keshan.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Keshan Sodimana |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | Sep 21, 2018 |
|
||||||
|
| GitHub username | keshan |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/kowaalczyk.md
vendored
Normal file
106
.github/contributors/kowaalczyk.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name |Krzysztof Kowalczyk |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date |22.11.2018 |
|
||||||
|
| GitHub username |kowaalczyk |
|
||||||
|
| Website (optional) |kowaalczyk.pl |
|
106
.github/contributors/lauraBaakman.md
vendored
Normal file
106
.github/contributors/lauraBaakman.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Laura Baakman |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | February 7, 2019 |
|
||||||
|
| GitHub username | lauraBaakman |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/mbkupfer.md
vendored
Normal file
106
.github/contributors/mbkupfer.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Maxim Kupfer |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | Sep 6, 2018 |
|
||||||
|
| GitHub username | mbkupfer |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/mikelibg.md
vendored
Normal file
106
.github/contributors/mikelibg.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | ------------------------ |
|
||||||
|
| Name | Michael Liberman |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 2018-11-08 |
|
||||||
|
| GitHub username | mikelibg |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/moreymat.md
vendored
Normal file
106
.github/contributors/moreymat.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Mathieu Morey |
|
||||||
|
| Company name (if applicable) | Datactivist |
|
||||||
|
| Title or role (if applicable) | Researcher |
|
||||||
|
| Date | 2019-01-07 |
|
||||||
|
| GitHub username | moreymat |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/mpuig.md
vendored
Normal file
106
.github/contributors/mpuig.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Marc Puig |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 2018-11-17 |
|
||||||
|
| GitHub username | mpuig |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/njsmith.md
vendored
Normal file
106
.github/contributors/njsmith.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Nathaniel J. Smith |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 2018-08-26 |
|
||||||
|
| GitHub username | njsmith |
|
||||||
|
| Website (optional) | https://vorpus.org |
|
106
.github/contributors/ozcankasal.md
vendored
Normal file
106
.github/contributors/ozcankasal.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Ozcan Kasal |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | December 21, 2018 |
|
||||||
|
| GitHub username | ozcankasal |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/phojnacki.md
vendored
Normal file
106
.github/contributors/phojnacki.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [ X ] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | ------------------------------------- |
|
||||||
|
| Name | Przemysław Hojnacki |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 12/09/2018 |
|
||||||
|
| GitHub username | phojnacki |
|
||||||
|
| Website (optional) | https://about.me/przemyslaw.hojnacki |
|
106
.github/contributors/pzelasko.md
vendored
Normal file
106
.github/contributors/pzelasko.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Piotr Żelasko |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 04-09-2018 |
|
||||||
|
| GitHub username | pzelasko |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/retnuh.md
vendored
Normal file
106
.github/contributors/retnuh.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
- Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
- to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
- each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
| ----------------------------- | ------------ |
|
||||||
|
| Name | Hunter Kelly |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 2019-01-10 |
|
||||||
|
| GitHub username | retnuh |
|
||||||
|
| Website (optional) | |
|
107
.github/contributors/roshni-b.md
vendored
Normal file
107
.github/contributors/roshni-b.md
vendored
Normal file
|
@ -0,0 +1,107 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Roshni Biswas |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 02-17-2019 |
|
||||||
|
| GitHub username | roshni-b |
|
||||||
|
| Website (optional) | |
|
||||||
|
|
106
.github/contributors/sainathadapa.md
vendored
Normal file
106
.github/contributors/sainathadapa.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Sainath Adapa |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 2018-09-06 |
|
||||||
|
| GitHub username | sainathadapa |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/svlandeg.md
vendored
Normal file
106
.github/contributors/svlandeg.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Sofie Van Landeghem |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 29 Nov 2018 |
|
||||||
|
| GitHub username | svlandeg |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/tyburam.md
vendored
Normal file
106
.github/contributors/tyburam.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Mateusz Tybura |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 08.09.2018 |
|
||||||
|
| GitHub username | tyburam |
|
||||||
|
| Website (optional) | |
|
106
.github/contributors/willprice.md
vendored
Normal file
106
.github/contributors/willprice.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | --------------------- |
|
||||||
|
| Name | Will Price |
|
||||||
|
| Company name (if applicable) | N/A |
|
||||||
|
| Title or role (if applicable) | N/A |
|
||||||
|
| Date | 26/12/2018 |
|
||||||
|
| GitHub username | willprice |
|
||||||
|
| Website (optional) | https://willprice.org |
|
106
.github/contributors/wxv.md
vendored
Normal file
106
.github/contributors/wxv.md
vendored
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# spaCy contributor agreement
|
||||||
|
|
||||||
|
This spaCy Contributor Agreement (**"SCA"**) is based on the
|
||||||
|
[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
|
||||||
|
The SCA applies to any contribution that you make to any product or project
|
||||||
|
managed by us (the **"project"**), and sets out the intellectual property rights
|
||||||
|
you grant to us in the contributed materials. The term **"us"** shall mean
|
||||||
|
[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
|
||||||
|
**"you"** shall mean the person or entity identified below.
|
||||||
|
|
||||||
|
If you agree to be bound by these terms, fill in the information requested
|
||||||
|
below and include the filled-in version with your first pull request, under the
|
||||||
|
folder [`.github/contributors/`](/.github/contributors/). The name of the file
|
||||||
|
should be your GitHub username, with the extension `.md`. For example, the user
|
||||||
|
example_user would create the file `.github/contributors/example_user.md`.
|
||||||
|
|
||||||
|
Read this agreement carefully before signing. These terms and conditions
|
||||||
|
constitute a binding legal agreement.
|
||||||
|
|
||||||
|
## Contributor Agreement
|
||||||
|
|
||||||
|
1. The term "contribution" or "contributed materials" means any source code,
|
||||||
|
object code, patch, tool, sample, graphic, specification, manual,
|
||||||
|
documentation, or any other material posted or submitted by you to the project.
|
||||||
|
|
||||||
|
2. With respect to any worldwide copyrights, or copyright applications and
|
||||||
|
registrations, in your contribution:
|
||||||
|
|
||||||
|
* you hereby assign to us joint ownership, and to the extent that such
|
||||||
|
assignment is or becomes invalid, ineffective or unenforceable, you hereby
|
||||||
|
grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
|
||||||
|
royalty-free, unrestricted license to exercise all rights under those
|
||||||
|
copyrights. This includes, at our option, the right to sublicense these same
|
||||||
|
rights to third parties through multiple levels of sublicensees or other
|
||||||
|
licensing arrangements;
|
||||||
|
|
||||||
|
* you agree that each of us can do all things in relation to your
|
||||||
|
contribution as if each of us were the sole owners, and if one of us makes
|
||||||
|
a derivative work of your contribution, the one who makes the derivative
|
||||||
|
work (or has it made will be the sole owner of that derivative work;
|
||||||
|
|
||||||
|
* you agree that you will not assert any moral rights in your contribution
|
||||||
|
against us, our licensees or transferees;
|
||||||
|
|
||||||
|
* you agree that we may register a copyright in your contribution and
|
||||||
|
exercise all ownership rights associated with it; and
|
||||||
|
|
||||||
|
* you agree that neither of us has any duty to consult with, obtain the
|
||||||
|
consent of, pay or render an accounting to the other for any use or
|
||||||
|
distribution of your contribution.
|
||||||
|
|
||||||
|
3. With respect to any patents you own, or that you can license without payment
|
||||||
|
to any third party, you hereby grant to us a perpetual, irrevocable,
|
||||||
|
non-exclusive, worldwide, no-charge, royalty-free license to:
|
||||||
|
|
||||||
|
* make, have made, use, sell, offer to sell, import, and otherwise transfer
|
||||||
|
your contribution in whole or in part, alone or in combination with or
|
||||||
|
included in any product, work or materials arising out of the project to
|
||||||
|
which your contribution was submitted, and
|
||||||
|
|
||||||
|
* at our option, to sublicense these same rights to third parties through
|
||||||
|
multiple levels of sublicensees or other licensing arrangements.
|
||||||
|
|
||||||
|
4. Except as set out above, you keep all right, title, and interest in your
|
||||||
|
contribution. The rights that you grant to us under these terms are effective
|
||||||
|
on the date you first submitted a contribution to us, even if your submission
|
||||||
|
took place before the date you sign these terms.
|
||||||
|
|
||||||
|
5. You covenant, represent, warrant and agree that:
|
||||||
|
|
||||||
|
* Each contribution that you submit is and shall be an original work of
|
||||||
|
authorship and you can legally grant the rights set out in this SCA;
|
||||||
|
|
||||||
|
* to the best of your knowledge, each contribution will not violate any
|
||||||
|
third party's copyrights, trademarks, patents, or other intellectual
|
||||||
|
property rights; and
|
||||||
|
|
||||||
|
* each contribution shall be in compliance with U.S. export control laws and
|
||||||
|
other applicable export and import laws. You agree to notify us if you
|
||||||
|
become aware of any circumstance which would make any of the foregoing
|
||||||
|
representations inaccurate in any respect. We may publicly disclose your
|
||||||
|
participation in the project, including the fact that you have signed the SCA.
|
||||||
|
|
||||||
|
6. This SCA is governed by the laws of the State of California and applicable
|
||||||
|
U.S. Federal law. Any choice of law rules will not apply.
|
||||||
|
|
||||||
|
7. Please place an “x” on one of the applicable statement below. Please do NOT
|
||||||
|
mark both statements:
|
||||||
|
|
||||||
|
* [x] I am signing on behalf of myself as an individual and no other person
|
||||||
|
or entity, including my employer, has or will have rights with respect to my
|
||||||
|
contributions.
|
||||||
|
|
||||||
|
* [ ] I am signing on behalf of my employer or a legal entity and I have the
|
||||||
|
actual authority to contractually bind that entity.
|
||||||
|
|
||||||
|
## Contributor Details
|
||||||
|
|
||||||
|
| Field | Entry |
|
||||||
|
|------------------------------- | -------------------- |
|
||||||
|
| Name | Jason Xu |
|
||||||
|
| Company name (if applicable) | |
|
||||||
|
| Title or role (if applicable) | |
|
||||||
|
| Date | 2018-11-29 |
|
||||||
|
| GitHub username | wxv |
|
||||||
|
| Website (optional) | |
|
8
.gitignore
vendored
8
.gitignore
vendored
|
@ -5,9 +5,15 @@ corpora/
|
||||||
keys/
|
keys/
|
||||||
|
|
||||||
# Website
|
# Website
|
||||||
|
website/.cache/
|
||||||
|
website/public/
|
||||||
|
website/node_modules
|
||||||
|
website/.npm
|
||||||
|
website/logs
|
||||||
|
*.log
|
||||||
|
npm-debug.log*
|
||||||
website/www/
|
website/www/
|
||||||
website/_deploy.sh
|
website/_deploy.sh
|
||||||
website/.gitignore
|
|
||||||
|
|
||||||
# Cython / C extensions
|
# Cython / C extensions
|
||||||
cythonize.json
|
cythonize.json
|
||||||
|
|
14
.travis.yml
14
.travis.yml
|
@ -1,26 +1,20 @@
|
||||||
language: python
|
language: python
|
||||||
|
|
||||||
sudo: false
|
sudo: false
|
||||||
|
cache: pip
|
||||||
dist: trusty
|
dist: trusty
|
||||||
group: edge
|
group: edge
|
||||||
|
|
||||||
python:
|
python:
|
||||||
- "2.7"
|
- "2.7"
|
||||||
- "3.5"
|
- "3.5"
|
||||||
- "3.6"
|
- "3.6"
|
||||||
|
|
||||||
os:
|
os:
|
||||||
- linux
|
- linux
|
||||||
|
|
||||||
env:
|
env:
|
||||||
- VIA=compile
|
- VIA=compile
|
||||||
- VIA=flake8
|
- VIA=flake8
|
||||||
#- VIA=pypi_nightly
|
|
||||||
|
|
||||||
install:
|
install:
|
||||||
- "./travis.sh"
|
- "./travis.sh"
|
||||||
- pip install flake8
|
- pip install flake8
|
||||||
|
|
||||||
script:
|
script:
|
||||||
- "cat /proc/cpuinfo | grep flags | head -n 1"
|
- "cat /proc/cpuinfo | grep flags | head -n 1"
|
||||||
- "pip install pytest pytest-timeout"
|
- "pip install pytest pytest-timeout"
|
||||||
|
@ -28,10 +22,10 @@ script:
|
||||||
- if [[ "${VIA}" == "flake8" ]]; then flake8 . --count --exclude=spacy/compat.py,spacy/lang --select=E901,E999,F821,F822,F823 --show-source --statistics; fi
|
- if [[ "${VIA}" == "flake8" ]]; then flake8 . --count --exclude=spacy/compat.py,spacy/lang --select=E901,E999,F821,F822,F823 --show-source --statistics; fi
|
||||||
- if [[ "${VIA}" == "pypi_nightly" ]]; then python -m pytest --tb=native --models --en `python -c "import os.path; import spacy; print(os.path.abspath(os.path.dirname(spacy.__file__)))"`; fi
|
- if [[ "${VIA}" == "pypi_nightly" ]]; then python -m pytest --tb=native --models --en `python -c "import os.path; import spacy; print(os.path.abspath(os.path.dirname(spacy.__file__)))"`; fi
|
||||||
- if [[ "${VIA}" == "sdist" ]]; then python -m pytest --tb=native `python -c "import os.path; import spacy; print(os.path.abspath(os.path.dirname(spacy.__file__)))"`; fi
|
- if [[ "${VIA}" == "sdist" ]]; then python -m pytest --tb=native `python -c "import os.path; import spacy; print(os.path.abspath(os.path.dirname(spacy.__file__)))"`; fi
|
||||||
|
branches:
|
||||||
|
except:
|
||||||
|
- spacy.io
|
||||||
notifications:
|
notifications:
|
||||||
slack:
|
slack:
|
||||||
secure: F8GvqnweSdzImuLL64TpfG0i5rYl89liyr9tmFVsHl4c0DNiDuGhZivUz0M1broS8svE3OPOllLfQbACG/4KxD890qfF9MoHzvRDlp7U+RtwMV/YAkYn8MGWjPIbRbX0HpGdY7O2Rc9Qy4Kk0T8ZgiqXYIqAz2Eva9/9BlSmsJQ=
|
secure: F8GvqnweSdzImuLL64TpfG0i5rYl89liyr9tmFVsHl4c0DNiDuGhZivUz0M1broS8svE3OPOllLfQbACG/4KxD890qfF9MoHzvRDlp7U+RtwMV/YAkYn8MGWjPIbRbX0HpGdY7O2Rc9Qy4Kk0T8ZgiqXYIqAz2Eva9/9BlSmsJQ=
|
||||||
email: false
|
email: false
|
||||||
|
|
||||||
cache: pip
|
|
||||||
|
|
111
CONTRIBUTING.md
111
CONTRIBUTING.md
|
@ -26,7 +26,7 @@ also check the [troubleshooting guide](https://spacy.io/usage/#troubleshooting)
|
||||||
to see if your problem is already listed there.
|
to see if your problem is already listed there.
|
||||||
|
|
||||||
If you're looking for help with your code, consider posting a question on
|
If you're looking for help with your code, consider posting a question on
|
||||||
[StackOverflow](http://stackoverflow.com/questions/tagged/spacy) instead. If you
|
[Stack Overflow](http://stackoverflow.com/questions/tagged/spacy) instead. If you
|
||||||
tag it `spacy` and `python`, more people will see it and hopefully be able to
|
tag it `spacy` and `python`, more people will see it and hopefully be able to
|
||||||
help. Please understand that we won't be able to provide individual support via
|
help. Please understand that we won't be able to provide individual support via
|
||||||
email. We also believe that help is much more valuable if it's **shared publicly**,
|
email. We also believe that help is much more valuable if it's **shared publicly**,
|
||||||
|
@ -55,7 +55,7 @@ even format them as Markdown to copy-paste into GitHub issues:
|
||||||
`python -m spacy info --markdown`.
|
`python -m spacy info --markdown`.
|
||||||
|
|
||||||
* **Checking the model compatibility:** If you're having problems with a
|
* **Checking the model compatibility:** If you're having problems with a
|
||||||
[statistical model](https://spacy.io/models), it may be because to the
|
[statistical model](https://spacy.io/models), it may be because the
|
||||||
model is incompatible with your spaCy installation. In spaCy v2.0+, you can check
|
model is incompatible with your spaCy installation. In spaCy v2.0+, you can check
|
||||||
this on the command line by running `python -m spacy validate`.
|
this on the command line by running `python -m spacy validate`.
|
||||||
|
|
||||||
|
@ -186,13 +186,99 @@ sure your test passes and reference the issue in your commit message.
|
||||||
## Code conventions
|
## Code conventions
|
||||||
|
|
||||||
Code should loosely follow [pep8](https://www.python.org/dev/peps/pep-0008/).
|
Code should loosely follow [pep8](https://www.python.org/dev/peps/pep-0008/).
|
||||||
Regular line length is **80 characters**, with some tolerance for lines up to
|
As of `v2.1.0`, spaCy uses [`black`](https://github.com/ambv/black) for code
|
||||||
90 characters if the alternative would be worse — for instance, if your list
|
formatting and [`flake8`](http://flake8.pycqa.org/en/latest/) for linting its
|
||||||
comprehension comes to 82 characters, it's better not to split it over two lines.
|
Python modules. If you've built spaCy from source, you'll already have both
|
||||||
You can also use a linter like [`flake8`](https://pypi.python.org/pypi/flake8)
|
tools installed.
|
||||||
or [`frosted`](https://pypi.python.org/pypi/frosted) – just keep in mind that
|
|
||||||
it won't work very well for `.pyx` files and will complain about Cython syntax
|
**⚠️ Note that formatting and linting is currently only possible for Python
|
||||||
like `<int*>` or `cimport`.
|
modules in `.py` files, not Cython modules in `.pyx` and `.pxd` files.**
|
||||||
|
|
||||||
|
### Code formatting
|
||||||
|
|
||||||
|
[`black`](https://github.com/ambv/black) is an opinionated Python code
|
||||||
|
formatter, optimised to produce readable code and small diffs. You can run
|
||||||
|
`black` from the command-line, or via your code editor. For example, if you're
|
||||||
|
using [Visual Studio Code](https://code.visualstudio.com/), you can add the
|
||||||
|
following to your `settings.json` to use `black` for formatting and auto-format
|
||||||
|
your files on save:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"python.formatting.provider": "black",
|
||||||
|
"[python]": {
|
||||||
|
"editor.formatOnSave": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
[See here](https://github.com/ambv/black#editor-integration) for the full
|
||||||
|
list of available editor integrations.
|
||||||
|
|
||||||
|
#### Disabling formatting
|
||||||
|
|
||||||
|
There are a few cases where auto-formatting doesn't improve readability – for
|
||||||
|
example, in some of the the language data files like the `tag_map.py`, or in
|
||||||
|
the tests that construct `Doc` objects from lists of words and other labels.
|
||||||
|
Wrapping a block in `# fmt: off` and `# fmt: on` lets you disable formatting
|
||||||
|
for that particular code. Here's an example:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# fmt: off
|
||||||
|
text = "I look forward to using Thingamajig. I've been told it will make my life easier..."
|
||||||
|
heads = [1, 0, -1, -2, -1, -1, -5, -1, 3, 2, 1, 0, 2, 1, -3, 1, 1, -3, -7]
|
||||||
|
deps = ["nsubj", "ROOT", "advmod", "prep", "pcomp", "dobj", "punct", "",
|
||||||
|
"nsubjpass", "aux", "auxpass", "ROOT", "nsubj", "aux", "ccomp",
|
||||||
|
"poss", "nsubj", "ccomp", "punct"]
|
||||||
|
# fmt: on
|
||||||
|
```
|
||||||
|
|
||||||
|
### Code linting
|
||||||
|
|
||||||
|
[`flake8`](http://flake8.pycqa.org/en/latest/) is a tool for enforcing code
|
||||||
|
style. It scans one or more files and outputs errors and warnings. This feedback
|
||||||
|
can help you stick to general standards and conventions, and can be very useful
|
||||||
|
for spotting potential mistakes and inconsistencies in your code. The most
|
||||||
|
important things to watch out for are syntax errors and undefined names, but you
|
||||||
|
also want to keep an eye on unused declared variables or repeated
|
||||||
|
(i.e. overwritten) dictionary keys. If your code was formatted with `black`
|
||||||
|
(see above), you shouldn't see any formatting-related warnings.
|
||||||
|
|
||||||
|
The [`.flake8`](.flake8) config defines the configuration we use for this
|
||||||
|
codebase. For example, we're not super strict about the line length, and we're
|
||||||
|
excluding very large files like lemmatization and tokenizer exception tables.
|
||||||
|
|
||||||
|
Ideally, running the following command from within the repo directory should
|
||||||
|
not return any errors or warnings:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
flake8 spacy
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Disabling linting
|
||||||
|
|
||||||
|
Sometimes, you explicitly want to write code that's not compatible with our
|
||||||
|
rules. For example, a module's `__init__.py` might import a function so other
|
||||||
|
modules can import it from there, but `flake8` will complain about an unused
|
||||||
|
import. And although it's generally discouraged, there might be cases where it
|
||||||
|
makes sense to use a bare `except`.
|
||||||
|
|
||||||
|
To ignore a given line, you can add a comment like `# noqa: F401`, specifying
|
||||||
|
the code of the error or warning we want to ignore. It's also possible to
|
||||||
|
ignore several comma-separated codes at once, e.g. `# noqa: E731,E123`. Here
|
||||||
|
are some examples:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# The imported class isn't used in this file, but imported here, so it can be
|
||||||
|
# imported *from* here by another module.
|
||||||
|
from .submodule import SomeClass # noqa: F401
|
||||||
|
|
||||||
|
try:
|
||||||
|
do_something()
|
||||||
|
except: # noqa: E722
|
||||||
|
# This bare except is justified, for some specific reason
|
||||||
|
do_something_else()
|
||||||
|
```
|
||||||
|
|
||||||
### Python conventions
|
### Python conventions
|
||||||
|
|
||||||
|
@ -206,10 +292,9 @@ for example to show more specific error messages, you can use the `is_config()`
|
||||||
helper function.
|
helper function.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from .compat import unicode_, json_dumps, is_config
|
from .compat import unicode_, is_config
|
||||||
|
|
||||||
compatible_unicode = unicode_('hello world')
|
compatible_unicode = unicode_('hello world')
|
||||||
compatible_json = json_dumps({'key': 'value'})
|
|
||||||
if is_config(windows=True, python2=True):
|
if is_config(windows=True, python2=True):
|
||||||
print("You are using Python 2 on Windows.")
|
print("You are using Python 2 on Windows.")
|
||||||
```
|
```
|
||||||
|
@ -235,7 +320,7 @@ of other types these names. For instance, don't name a text string `doc` — you
|
||||||
should usually call this `text`. Two general code style preferences further help
|
should usually call this `text`. Two general code style preferences further help
|
||||||
with naming. First, **lean away from introducing temporary variables**, as these
|
with naming. First, **lean away from introducing temporary variables**, as these
|
||||||
clutter your namespace. This is one reason why comprehension expressions are
|
clutter your namespace. This is one reason why comprehension expressions are
|
||||||
often preferred. Second, **keep your functions shortish**, so that can work in a
|
often preferred. Second, **keep your functions shortish**, so they can work in a
|
||||||
smaller scope. Of course, this is a question of trade-offs.
|
smaller scope. Of course, this is a question of trade-offs.
|
||||||
|
|
||||||
### Cython conventions
|
### Cython conventions
|
||||||
|
@ -353,7 +438,7 @@ avoid unnecessary imports.
|
||||||
Extensive tests that take a long time should be marked with `@pytest.mark.slow`.
|
Extensive tests that take a long time should be marked with `@pytest.mark.slow`.
|
||||||
Tests that require the model to be loaded should be marked with
|
Tests that require the model to be loaded should be marked with
|
||||||
`@pytest.mark.models`. Loading the models is expensive and not necessary if
|
`@pytest.mark.models`. Loading the models is expensive and not necessary if
|
||||||
you're not actually testing the model performance. If all you needs ia a `Doc`
|
you're not actually testing the model performance. If all you need is a `Doc`
|
||||||
object with annotations like heads, POS tags or the dependency parse, you can
|
object with annotations like heads, POS tags or the dependency parse, you can
|
||||||
use the `get_doc()` utility function to construct it manually.
|
use the `get_doc()` utility function to construct it manually.
|
||||||
|
|
||||||
|
|
|
@ -1,83 +0,0 @@
|
||||||
# 👥 Contributors
|
|
||||||
|
|
||||||
This is a list of everyone who has made significant contributions to spaCy, in alphabetical order. Thanks a lot for the great work!
|
|
||||||
|
|
||||||
* Adam Bittlingmayer, [@bittlingmayer](https://github.com/bittlingmayer)
|
|
||||||
* Alexey Kim, [@yuukos](https://github.com/yuukos)
|
|
||||||
* Alexis Eidelman, [@AlexisEidelman](https://github.com/AlexisEidelman)
|
|
||||||
* Ali Zarezade, [@azarezade](https://github.com/azarezade)
|
|
||||||
* Andreas Grivas, [@andreasgrv](https://github.com/andreasgrv)
|
|
||||||
* Andrew Poliakov, [@pavlin99th](https://github.com/pavlin99th)
|
|
||||||
* Aniruddha Adhikary, [@aniruddha-adhikary](https://github.com/aniruddha-adhikary)
|
|
||||||
* Anto Binish Kaspar, [@binishkaspar](https://github.com/binishkaspar)
|
|
||||||
* Avadh Patel, [@avadhpatel](https://github.com/avadhpatel)
|
|
||||||
* Ben Eyal, [@beneyal](https://github.com/beneyal)
|
|
||||||
* Bhargav Srinivasa, [@bhargavvader](https://github.com/bhargavvader)
|
|
||||||
* Bruno P. Kinoshita, [@kinow](https://github.com/kinow)
|
|
||||||
* Canbey Bilgili, [@cbilgili](https://github.com/cbilgili)
|
|
||||||
* Chris DuBois, [@chrisdubois](https://github.com/chrisdubois)
|
|
||||||
* Christoph Schwienheer, [@chssch](https://github.com/chssch)
|
|
||||||
* Dafne van Kuppevelt, [@dafnevk](https://github.com/dafnevk)
|
|
||||||
* Daniel Rapp, [@rappdw](https://github.com/rappdw)
|
|
||||||
* Daniel Vila Suero, [@dvsrepo](https://github.com/dvsrepo)
|
|
||||||
* Dmytro Sadovnychyi, [@sadovnychyi](https://github.com/sadovnychyi)
|
|
||||||
* Eric Zhao, [@ericzhao28](https://github.com/ericzhao28)
|
|
||||||
* Francisco Aranda, [@frascuchon](https://github.com/frascuchon)
|
|
||||||
* Greg Baker, [@solresol](https://github.com/solresol)
|
|
||||||
* Greg Dubbin, [@GregDubbin](https://github.com/GregDubbin)
|
|
||||||
* Grégory Howard, [@Gregory-Howard](https://github.com/Gregory-Howard)
|
|
||||||
* György Orosz, [@oroszgy](https://github.com/oroszgy)
|
|
||||||
* Henning Peters, [@henningpeters](https://github.com/henningpeters)
|
|
||||||
* Iddo Berger, [@iddoberger](https://github.com/iddoberger)
|
|
||||||
* Ines Montani, [@ines](https://github.com/ines)
|
|
||||||
* J Nicolas Schrading, [@NSchrading](https://github.com/NSchrading)
|
|
||||||
* Janneke van der Zwaan, [@jvdzwaan](https://github.com/jvdzwaan)
|
|
||||||
* Jim Geovedi, [@geovedi](https://github.com/geovedi)
|
|
||||||
* Jim Regan, [@jimregan](https://github.com/jimregan)
|
|
||||||
* Jeffrey Gerard, [@IamJeffG](https://github.com/IamJeffG)
|
|
||||||
* Jordan Suchow, [@suchow](https://github.com/suchow)
|
|
||||||
* Josh Reeter, [@jreeter](https://github.com/jreeter)
|
|
||||||
* Juan Miguel Cejuela, [@juanmirocks](https://github.com/juanmirocks)
|
|
||||||
* Kendrick Tan, [@kendricktan](https://github.com/kendricktan)
|
|
||||||
* Kyle P. Johnson, [@kylepjohnson](https://github.com/kylepjohnson)
|
|
||||||
* Leif Uwe Vogelsang, [@luvogels](https://github.com/luvogels)
|
|
||||||
* Liling Tan, [@alvations](https://github.com/alvations)
|
|
||||||
* Magnus Burton, [@magnusburton](https://github.com/magnusburton)
|
|
||||||
* Mark Amery, [@ExplodingCabbage](https://github.com/ExplodingCabbage)
|
|
||||||
* Matthew Honnibal, [@honnibal](https://github.com/honnibal)
|
|
||||||
* Maxim Samsonov, [@maxirmx](https://github.com/maxirmx)
|
|
||||||
* Michael Wallin, [@wallinm1](https://github.com/wallinm1)
|
|
||||||
* Miguel Almeida, [@mamoit](https://github.com/mamoit)
|
|
||||||
* Motoki Wu, [@tokestermw](https://github.com/tokestermw)
|
|
||||||
* Ole Henrik Skogstrøm, [@ohenrik](https://github.com/ohenrik)
|
|
||||||
* Oleg Zd, [@olegzd](https://github.com/olegzd)
|
|
||||||
* Orhan Bilgin, [@melanuria](https://github.com/melanuria)
|
|
||||||
* Orion Montoya, [@mdcclv](https://github.com/mdcclv)
|
|
||||||
* Paul O'Leary McCann, [@polm](https://github.com/polm)
|
|
||||||
* Pokey Rule, [@pokey](https://github.com/pokey)
|
|
||||||
* Ramanan Balakrishnan, [@ramananbalakrishnan](https://github.com/ramananbalakrishnan)
|
|
||||||
* Raphaël Bournhonesque, [@raphael0202](https://github.com/raphael0202)
|
|
||||||
* Rob van Nieuwpoort, [@RvanNieuwpoort](https://github.com/RvanNieuwpoort)
|
|
||||||
* Roman Domrachev, [@ligser](https://github.com/ligser)
|
|
||||||
* Roman Inflianskas, [@rominf](https://github.com/rominf)
|
|
||||||
* Sam Bozek, [@sambozek](https://github.com/sambozek)
|
|
||||||
* Sasho Savkov, [@savkov](https://github.com/savkov)
|
|
||||||
* Shuvanon Razik, [@shuvanon](https://github.com/shuvanon)
|
|
||||||
* Søren Lind Kristiansen, [@sorenlind](https://github.com/sorenlind)
|
|
||||||
* Swier, [@swierh](https://github.com/swierh)
|
|
||||||
* Thomas Tanon, [@Tpt](https://github.com/Tpt)
|
|
||||||
* Thomas Opsomer, [@thomasopsomer](https://github.com/thomasopsomer)
|
|
||||||
* Tiago Rodrigues, [@TiagoMRodrigues](https://github.com/TiagoMRodrigues)
|
|
||||||
* Vadim Mazaev, [@GreenRiverRUS](https://github.com/GreenRiverRUS)
|
|
||||||
* Vimos Tan, [@Vimos](https://github.com/Vimos)
|
|
||||||
* Vsevolod Solovyov, [@vsolovyov](https://github.com/vsolovyov)
|
|
||||||
* Wah Loon Keng, [@kengz](https://github.com/kengz)
|
|
||||||
* Wannaphong Phatthiyaphaibun, [@wannaphongcom](https://github.com/wannaphongcom)
|
|
||||||
* Willem van Hage, [@wrvhage](https://github.com/wrvhage)
|
|
||||||
* Wolfgang Seeker, [@wbwseeker](https://github.com/wbwseeker)
|
|
||||||
* Yam, [@hscspring](https://github.com/hscspring)
|
|
||||||
* Yanhao Yang, [@YanhaoYang](https://github.com/YanhaoYang)
|
|
||||||
* Yasuaki Uechi, [@uetchy](https://github.com/uetchy)
|
|
||||||
* Yu-chun Huang, [@galaxyh](https://github.com/galaxyh)
|
|
||||||
* Yubing Dong, [@tomtung](https://github.com/tomtung)
|
|
||||||
* Yuval Pinter, [@yuvalpinter](https://github.com/yuvalpinter)
|
|
2
LICENSE
2
LICENSE
|
@ -1,6 +1,6 @@
|
||||||
The MIT License (MIT)
|
The MIT License (MIT)
|
||||||
|
|
||||||
Copyright (C) 2016 ExplosionAI UG (haftungsbeschränkt), 2016 spaCy GmbH, 2015 Matthew Honnibal
|
Copyright (C) 2016-2019 ExplosionAI GmbH, 2016 spaCy GmbH, 2015 Matthew Honnibal
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
recursive-include include *.h
|
recursive-include include *.h
|
||||||
include LICENSE
|
include LICENSE
|
||||||
include README.rst
|
include README.md
|
||||||
|
include pyproject.toml
|
||||||
include bin/spacy
|
include bin/spacy
|
||||||
|
|
4
Makefile
4
Makefile
|
@ -5,11 +5,11 @@ dist/spacy.pex : spacy/*.py* spacy/*/*.py*
|
||||||
python3.6 -m venv env3.6
|
python3.6 -m venv env3.6
|
||||||
source env3.6/bin/activate
|
source env3.6/bin/activate
|
||||||
env3.6/bin/pip install wheel
|
env3.6/bin/pip install wheel
|
||||||
env3.6/bin/pip install -r requirements.txt --no-cache-dir --no-binary :all:
|
env3.6/bin/pip install -r requirements.txt --no-cache-dir
|
||||||
env3.6/bin/python setup.py build_ext --inplace
|
env3.6/bin/python setup.py build_ext --inplace
|
||||||
env3.6/bin/python setup.py sdist
|
env3.6/bin/python setup.py sdist
|
||||||
env3.6/bin/python setup.py bdist_wheel
|
env3.6/bin/python setup.py bdist_wheel
|
||||||
env3.6/bin/python -m pip install pex
|
env3.6/bin/python -m pip install pex==1.5.3
|
||||||
env3.6/bin/pex pytest dist/*.whl -e spacy -o dist/spacy-$(sha).pex
|
env3.6/bin/pex pytest dist/*.whl -e spacy -o dist/spacy-$(sha).pex
|
||||||
cp dist/spacy-$(sha).pex dist/spacy.pex
|
cp dist/spacy-$(sha).pex dist/spacy.pex
|
||||||
chmod a+rx dist/spacy.pex
|
chmod a+rx dist/spacy.pex
|
||||||
|
|
284
README.md
Normal file
284
README.md
Normal file
|
@ -0,0 +1,284 @@
|
||||||
|
<a href="https://explosion.ai"><img src="https://explosion.ai/assets/img/logo.svg" width="125" height="125" align="right" /></a>
|
||||||
|
|
||||||
|
# spaCy: Industrial-strength NLP
|
||||||
|
|
||||||
|
spaCy is a library for advanced Natural Language Processing in Python and
|
||||||
|
Cython. It's built on the very latest research, and was designed from day one
|
||||||
|
to be used in real products. spaCy comes with
|
||||||
|
[pre-trained statistical models](https://spacy.io/models) and word vectors, and
|
||||||
|
currently supports tokenization for **45+ languages**. It features the
|
||||||
|
**fastest syntactic parser** in the world, convolutional
|
||||||
|
**neural network models** for tagging, parsing and **named entity recognition**
|
||||||
|
and easy **deep learning** integration. It's commercial open-source software,
|
||||||
|
released under the MIT license.
|
||||||
|
|
||||||
|
💫 **Version 2.1 out now!** [Check out the release notes here.](https://github.com/explosion/spaCy/releases)
|
||||||
|
|
||||||
|
[![Travis Build Status](https://img.shields.io/travis/explosion/spaCy/master.svg?style=flat-square&logo=travis)](https://travis-ci.org/explosion/spaCy)
|
||||||
|
[![Appveyor Build Status](https://img.shields.io/appveyor/ci/explosion/spaCy/master.svg?style=flat-square&logo=appveyor)](https://ci.appveyor.com/project/explosion/spaCy)
|
||||||
|
[![Current Release Version](https://img.shields.io/github/release/explosion/spacy.svg?style=flat-square)](https://github.com/explosion/spaCy/releases)
|
||||||
|
[![pypi Version](https://img.shields.io/pypi/v/spacy.svg?style=flat-square)](https://pypi.python.org/pypi/spacy)
|
||||||
|
[![conda Version](https://img.shields.io/conda/vn/conda-forge/spacy.svg?style=flat-square)](https://anaconda.org/conda-forge/spacy)
|
||||||
|
[![Python wheels](https://img.shields.io/badge/wheels-%E2%9C%93-4c1.svg?longCache=true&style=flat-square&logo=python&logoColor=white)](https://github.com/explosion/wheelwright/releases)
|
||||||
|
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg?style=flat-square)](https://github.com/ambv/black)
|
||||||
|
[![spaCy on Twitter](https://img.shields.io/twitter/follow/spacy_io.svg?style=social&label=Follow)](https://twitter.com/spacy_io)
|
||||||
|
|
||||||
|
## 📖 Documentation
|
||||||
|
|
||||||
|
| Documentation | |
|
||||||
|
| --------------- | -------------------------------------------------------------- |
|
||||||
|
| [spaCy 101] | New to spaCy? Here's everything you need to know! |
|
||||||
|
| [Usage Guides] | How to use spaCy and its features. |
|
||||||
|
| [New in v2.1] | New features, backwards incompatibilities and migration guide. |
|
||||||
|
| [API Reference] | The detailed reference for spaCy's API. |
|
||||||
|
| [Models] | Download statistical language models for spaCy. |
|
||||||
|
| [Universe] | Libraries, extensions, demos, books and courses. |
|
||||||
|
| [Changelog] | Changes and version history. |
|
||||||
|
| [Contribute] | How to contribute to the spaCy project and code base. |
|
||||||
|
|
||||||
|
[spacy 101]: https://spacy.io/usage/spacy-101
|
||||||
|
[new in v2.1]: https://spacy.io/usage/v2-1
|
||||||
|
[usage guides]: https://spacy.io/usage/
|
||||||
|
[api reference]: https://spacy.io/api/
|
||||||
|
[models]: https://spacy.io/models
|
||||||
|
[universe]: https://spacy.io/universe
|
||||||
|
[changelog]: https://spacy.io/usage/#changelog
|
||||||
|
[contribute]: https://github.com/explosion/spaCy/blob/master/CONTRIBUTING.md
|
||||||
|
|
||||||
|
## 💬 Where to ask questions
|
||||||
|
|
||||||
|
The spaCy project is maintained by [@honnibal](https://github.com/honnibal)
|
||||||
|
and [@ines](https://github.com/ines). Please understand that we won't be able
|
||||||
|
to provide individual support via email. We also believe that help is much more
|
||||||
|
valuable if it's shared publicly, so that more people can benefit from it.
|
||||||
|
|
||||||
|
| Type | Platforms |
|
||||||
|
| ------------------------ | ------------------------------------------------------ |
|
||||||
|
| 🚨 **Bug Reports** | [GitHub Issue Tracker] |
|
||||||
|
| 🎁 **Feature Requests** | [GitHub Issue Tracker] |
|
||||||
|
| 👩💻 **Usage Questions** | [Stack Overflow] · [Gitter Chat] · [Reddit User Group] |
|
||||||
|
| 🗯 **General Discussion** | [Gitter Chat] · [Reddit User Group] |
|
||||||
|
|
||||||
|
[github issue tracker]: https://github.com/explosion/spaCy/issues
|
||||||
|
[stack overflow]: http://stackoverflow.com/questions/tagged/spacy
|
||||||
|
[gitter chat]: https://gitter.im/explosion/spaCy
|
||||||
|
[reddit user group]: https://www.reddit.com/r/spacynlp
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- **Fastest syntactic parser** in the world
|
||||||
|
- **Named entity** recognition
|
||||||
|
- Non-destructive **tokenization**
|
||||||
|
- Support for **45+ languages**
|
||||||
|
- Pre-trained [statistical models](https://spacy.io/models) and word vectors
|
||||||
|
- Easy **deep learning** integration
|
||||||
|
- Part-of-speech tagging
|
||||||
|
- Labelled dependency parsing
|
||||||
|
- Syntax-driven sentence segmentation
|
||||||
|
- Built in **visualizers** for syntax and NER
|
||||||
|
- Convenient string-to-hash mapping
|
||||||
|
- Export to numpy data arrays
|
||||||
|
- Efficient binary serialization
|
||||||
|
- Easy **model packaging** and deployment
|
||||||
|
- State-of-the-art speed
|
||||||
|
- Robust, rigorously evaluated accuracy
|
||||||
|
|
||||||
|
📖 **For more details, see the
|
||||||
|
[facts, figures and benchmarks](https://spacy.io/usage/facts-figures).**
|
||||||
|
|
||||||
|
## Install spaCy
|
||||||
|
|
||||||
|
For detailed installation instructions, see the
|
||||||
|
[documentation](https://spacy.io/usage).
|
||||||
|
|
||||||
|
- **Operating system**: macOS / OS X · Linux · Windows (Cygwin, MinGW, Visual Studio)
|
||||||
|
- **Python version**: Python 2.7, 3.4+ (only 64 bit)
|
||||||
|
- **Package managers**: [pip] · [conda] (via `conda-forge`)
|
||||||
|
|
||||||
|
[pip]: https://pypi.python.org/pypi/spacy
|
||||||
|
[conda]: https://anaconda.org/conda-forge/spacy
|
||||||
|
|
||||||
|
### pip
|
||||||
|
|
||||||
|
Using pip, spaCy releases are available as source packages and binary wheels
|
||||||
|
(as of `v2.0.13`).
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install spacy
|
||||||
|
```
|
||||||
|
|
||||||
|
When using pip it is generally recommended to install packages in a virtual
|
||||||
|
environment to avoid modifying system state:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -m venv .env
|
||||||
|
source .env/bin/activate
|
||||||
|
pip install spacy
|
||||||
|
```
|
||||||
|
|
||||||
|
### conda
|
||||||
|
|
||||||
|
Thanks to our great community, we've finally re-added conda support. You can now
|
||||||
|
install spaCy via `conda-forge`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
conda config --add channels conda-forge
|
||||||
|
conda install spacy
|
||||||
|
```
|
||||||
|
|
||||||
|
For the feedstock including the build recipe and configuration,
|
||||||
|
check out [this repository](https://github.com/conda-forge/spacy-feedstock).
|
||||||
|
Improvements and pull requests to the recipe and setup are always appreciated.
|
||||||
|
|
||||||
|
### Updating spaCy
|
||||||
|
|
||||||
|
Some updates to spaCy may require downloading new statistical models. If you're
|
||||||
|
running spaCy v2.0 or higher, you can use the `validate` command to check if
|
||||||
|
your installed models are compatible and if not, print details on how to update
|
||||||
|
them:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install -U spacy
|
||||||
|
python -m spacy validate
|
||||||
|
```
|
||||||
|
|
||||||
|
If you've trained your own models, keep in mind that your training and runtime
|
||||||
|
inputs must match. After updating spaCy, we recommend **retraining your models**
|
||||||
|
with the new version.
|
||||||
|
|
||||||
|
📖 **For details on upgrading from spaCy 1.x to spaCy 2.x, see the
|
||||||
|
[migration guide](https://spacy.io/usage/v2#migrating).**
|
||||||
|
|
||||||
|
## Download models
|
||||||
|
|
||||||
|
As of v1.7.0, models for spaCy can be installed as **Python packages**.
|
||||||
|
This means that they're a component of your application, just like any
|
||||||
|
other module. Models can be installed using spaCy's `download` command,
|
||||||
|
or manually by pointing pip to a path or URL.
|
||||||
|
|
||||||
|
| Documentation | |
|
||||||
|
| ---------------------- | ------------------------------------------------------------- |
|
||||||
|
| [Available Models] | Detailed model descriptions, accuracy figures and benchmarks. |
|
||||||
|
| [Models Documentation] | Detailed usage instructions. |
|
||||||
|
|
||||||
|
[available models]: https://spacy.io/models
|
||||||
|
[models documentation]: https://spacy.io/docs/usage/models
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# out-of-the-box: download best-matching default model
|
||||||
|
python -m spacy download en
|
||||||
|
|
||||||
|
# download best-matching version of specific model for your spaCy installation
|
||||||
|
python -m spacy download en_core_web_lg
|
||||||
|
|
||||||
|
# pip install .tar.gz archive from path or URL
|
||||||
|
pip install /Users/you/en_core_web_sm-2.0.0.tar.gz
|
||||||
|
```
|
||||||
|
|
||||||
|
### Loading and using models
|
||||||
|
|
||||||
|
To load a model, use `spacy.load()` with the model's shortcut link:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import spacy
|
||||||
|
nlp = spacy.load('en')
|
||||||
|
doc = nlp(u'This is a sentence.')
|
||||||
|
```
|
||||||
|
|
||||||
|
If you've installed a model via pip, you can also `import` it directly and
|
||||||
|
then call its `load()` method:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import spacy
|
||||||
|
import en_core_web_sm
|
||||||
|
|
||||||
|
nlp = en_core_web_sm.load()
|
||||||
|
doc = nlp(u'This is a sentence.')
|
||||||
|
```
|
||||||
|
|
||||||
|
📖 **For more info and examples, check out the
|
||||||
|
[models documentation](https://spacy.io/docs/usage/models).**
|
||||||
|
|
||||||
|
### Support for older versions
|
||||||
|
|
||||||
|
If you're using an older version (`v1.6.0` or below), you can still download
|
||||||
|
and install the old models from within spaCy using `python -m spacy.en.download all`
|
||||||
|
or `python -m spacy.de.download all`. The `.tar.gz` archives are also
|
||||||
|
[attached to the v1.6.0 release](https://github.com/explosion/spaCy/tree/v1.6.0).
|
||||||
|
To download and install the models manually, unpack the archive, drop the
|
||||||
|
contained directory into `spacy/data` and load the model via `spacy.load('en')`
|
||||||
|
or `spacy.load('de')`.
|
||||||
|
|
||||||
|
## Compile from source
|
||||||
|
|
||||||
|
The other way to install spaCy is to clone its
|
||||||
|
[GitHub repository](https://github.com/explosion/spaCy) and build it from
|
||||||
|
source. That is the common way if you want to make changes to the code base.
|
||||||
|
You'll need to make sure that you have a development environment consisting of a
|
||||||
|
Python distribution including header files, a compiler,
|
||||||
|
[pip](https://pip.pypa.io/en/latest/installing/),
|
||||||
|
[virtualenv](https://virtualenv.pypa.io/) and [git](https://git-scm.com)
|
||||||
|
installed. The compiler part is the trickiest. How to do that depends on your
|
||||||
|
system. See notes on Ubuntu, OS X and Windows for details.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# make sure you are using the latest pip
|
||||||
|
python -m pip install -U pip
|
||||||
|
git clone https://github.com/explosion/spaCy
|
||||||
|
cd spaCy
|
||||||
|
|
||||||
|
python -m venv .env
|
||||||
|
source .env/bin/activate
|
||||||
|
export PYTHONPATH=`pwd`
|
||||||
|
pip install -r requirements.txt
|
||||||
|
python setup.py build_ext --inplace
|
||||||
|
```
|
||||||
|
|
||||||
|
Compared to regular install via pip, [requirements.txt](requirements.txt)
|
||||||
|
additionally installs developer dependencies such as Cython. For more details
|
||||||
|
and instructions, see the documentation on
|
||||||
|
[compiling spaCy from source](https://spacy.io/usage/#source) and the
|
||||||
|
[quickstart widget](https://spacy.io/usage/#section-quickstart) to get
|
||||||
|
the right commands for your platform and Python version.
|
||||||
|
|
||||||
|
### Ubuntu
|
||||||
|
|
||||||
|
Install system-level dependencies via `apt-get`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo apt-get install build-essential python-dev git
|
||||||
|
```
|
||||||
|
|
||||||
|
### macOS / OS X
|
||||||
|
|
||||||
|
Install a recent version of [XCode](https://developer.apple.com/xcode/),
|
||||||
|
including the so-called "Command Line Tools". macOS and OS X ship with Python
|
||||||
|
and git preinstalled.
|
||||||
|
|
||||||
|
### Windows
|
||||||
|
|
||||||
|
Install a version of the [Visual C++ Build Tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/) or
|
||||||
|
[Visual Studio Express](https://www.visualstudio.com/vs/visual-studio-express/)
|
||||||
|
that matches the version that was used to compile your Python
|
||||||
|
interpreter. For official distributions these are VS 2008 (Python 2.7),
|
||||||
|
VS 2010 (Python 3.4) and VS 2015 (Python 3.5).
|
||||||
|
|
||||||
|
## Run tests
|
||||||
|
|
||||||
|
spaCy comes with an [extensive test suite](spacy/tests). In order to run the
|
||||||
|
tests, you'll usually want to clone the repository and build spaCy from source.
|
||||||
|
This will also install the required development dependencies and test utilities
|
||||||
|
defined in the `requirements.txt`.
|
||||||
|
|
||||||
|
Alternatively, you can find out where spaCy is installed and run `pytest` on
|
||||||
|
that directory. Don't forget to also install the test utilities via spaCy's
|
||||||
|
`requirements.txt`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -c "import os; import spacy; print(os.path.dirname(spacy.__file__))"
|
||||||
|
pip install -r path/to/requirements.txt
|
||||||
|
python -m pytest <spacy-directory>
|
||||||
|
```
|
||||||
|
|
||||||
|
See [the documentation](https://spacy.io/usage/#tests) for more details and
|
||||||
|
examples.
|
332
README.rst
332
README.rst
|
@ -1,332 +0,0 @@
|
||||||
spaCy: Industrial-strength NLP
|
|
||||||
******************************
|
|
||||||
|
|
||||||
spaCy is a library for advanced Natural Language Processing in Python and Cython.
|
|
||||||
It's built on the very latest research, and was designed from day one to be
|
|
||||||
used in real products. spaCy comes with
|
|
||||||
`pre-trained statistical models <https://spacy.io/models>`_ and word
|
|
||||||
vectors, and currently supports tokenization for **20+ languages**. It features
|
|
||||||
the **fastest syntactic parser** in the world, convolutional **neural network models**
|
|
||||||
for tagging, parsing and **named entity recognition** and easy **deep learning**
|
|
||||||
integration. It's commercial open-source software, released under the MIT license.
|
|
||||||
|
|
||||||
💫 **Version 2.0 out now!** `Check out the new features here. <https://spacy.io/usage/v2>`_
|
|
||||||
|
|
||||||
.. image:: https://img.shields.io/travis/explosion/spaCy/master.svg?style=flat-square&logo=travis
|
|
||||||
:target: https://travis-ci.org/explosion/spaCy
|
|
||||||
:alt: Build Status
|
|
||||||
|
|
||||||
.. image:: https://img.shields.io/appveyor/ci/explosion/spaCy/master.svg?style=flat-square&logo=appveyor
|
|
||||||
:target: https://ci.appveyor.com/project/explosion/spaCy
|
|
||||||
:alt: Appveyor Build Status
|
|
||||||
|
|
||||||
.. image:: https://img.shields.io/github/release/explosion/spacy.svg?style=flat-square
|
|
||||||
:target: https://github.com/explosion/spaCy/releases
|
|
||||||
:alt: Current Release Version
|
|
||||||
|
|
||||||
.. image:: https://img.shields.io/pypi/v/spacy.svg?style=flat-square
|
|
||||||
:target: https://pypi.python.org/pypi/spacy
|
|
||||||
:alt: pypi Version
|
|
||||||
|
|
||||||
.. image:: https://img.shields.io/conda/vn/conda-forge/spacy.svg?style=flat-square
|
|
||||||
:target: https://anaconda.org/conda-forge/spacy
|
|
||||||
:alt: conda Version
|
|
||||||
|
|
||||||
.. image:: https://img.shields.io/badge/chat-join%20%E2%86%92-09a3d5.svg?style=flat-square&logo=gitter-white
|
|
||||||
:target: https://gitter.im/explosion/spaCy
|
|
||||||
:alt: spaCy on Gitter
|
|
||||||
|
|
||||||
.. image:: https://img.shields.io/twitter/follow/spacy_io.svg?style=social&label=Follow
|
|
||||||
:target: https://twitter.com/spacy_io
|
|
||||||
:alt: spaCy on Twitter
|
|
||||||
|
|
||||||
📖 Documentation
|
|
||||||
================
|
|
||||||
|
|
||||||
=================== ===
|
|
||||||
`spaCy 101`_ New to spaCy? Here's everything you need to know!
|
|
||||||
`Usage Guides`_ How to use spaCy and its features.
|
|
||||||
`New in v2.0`_ New features, backwards incompatibilities and migration guide.
|
|
||||||
`API Reference`_ The detailed reference for spaCy's API.
|
|
||||||
`Models`_ Download statistical language models for spaCy.
|
|
||||||
`Universe`_ Libraries, extensions, demos, books and courses.
|
|
||||||
`Changelog`_ Changes and version history.
|
|
||||||
`Contribute`_ How to contribute to the spaCy project and code base.
|
|
||||||
=================== ===
|
|
||||||
|
|
||||||
.. _spaCy 101: https://spacy.io/usage/spacy-101
|
|
||||||
.. _New in v2.0: https://spacy.io/usage/v2#migrating
|
|
||||||
.. _Usage Guides: https://spacy.io/usage/
|
|
||||||
.. _API Reference: https://spacy.io/api/
|
|
||||||
.. _Models: https://spacy.io/models
|
|
||||||
.. _Universe: https://spacy.io/universe
|
|
||||||
.. _Changelog: https://spacy.io/usage/#changelog
|
|
||||||
.. _Contribute: https://github.com/explosion/spaCy/blob/master/CONTRIBUTING.md
|
|
||||||
|
|
||||||
💬 Where to ask questions
|
|
||||||
==========================
|
|
||||||
|
|
||||||
The spaCy project is maintained by `@honnibal <https://github.com/honnibal>`_
|
|
||||||
and `@ines <https://github.com/ines>`_. Please understand that we won't be able
|
|
||||||
to provide individual support via email. We also believe that help is much more
|
|
||||||
valuable if it's shared publicly, so that more people can benefit from it.
|
|
||||||
|
|
||||||
====================== ===
|
|
||||||
**Bug Reports** `GitHub Issue Tracker`_
|
|
||||||
**Usage Questions** `StackOverflow`_, `Gitter Chat`_, `Reddit User Group`_
|
|
||||||
**General Discussion** `Gitter Chat`_, `Reddit User Group`_
|
|
||||||
====================== ===
|
|
||||||
|
|
||||||
.. _GitHub Issue Tracker: https://github.com/explosion/spaCy/issues
|
|
||||||
.. _StackOverflow: http://stackoverflow.com/questions/tagged/spacy
|
|
||||||
.. _Gitter Chat: https://gitter.im/explosion/spaCy
|
|
||||||
.. _Reddit User Group: https://www.reddit.com/r/spacynlp
|
|
||||||
|
|
||||||
Features
|
|
||||||
========
|
|
||||||
|
|
||||||
* **Fastest syntactic parser** in the world
|
|
||||||
* **Named entity** recognition
|
|
||||||
* Non-destructive **tokenization**
|
|
||||||
* Support for **20+ languages**
|
|
||||||
* Pre-trained `statistical models <https://spacy.io/models>`_ and word vectors
|
|
||||||
* Easy **deep learning** integration
|
|
||||||
* Part-of-speech tagging
|
|
||||||
* Labelled dependency parsing
|
|
||||||
* Syntax-driven sentence segmentation
|
|
||||||
* Built in **visualizers** for syntax and NER
|
|
||||||
* Convenient string-to-hash mapping
|
|
||||||
* Export to numpy data arrays
|
|
||||||
* Efficient binary serialization
|
|
||||||
* Easy **model packaging** and deployment
|
|
||||||
* State-of-the-art speed
|
|
||||||
* Robust, rigorously evaluated accuracy
|
|
||||||
|
|
||||||
📖 **For more details, see the** `facts, figures and benchmarks <https://spacy.io/usage/facts-figures>`_.
|
|
||||||
|
|
||||||
Install spaCy
|
|
||||||
=============
|
|
||||||
|
|
||||||
For detailed installation instructions, see
|
|
||||||
the `documentation <https://spacy.io/usage>`_.
|
|
||||||
|
|
||||||
==================== ===
|
|
||||||
**Operating system** macOS / OS X, Linux, Windows (Cygwin, MinGW, Visual Studio)
|
|
||||||
**Python version** CPython 2.7, 3.4+. Only 64 bit.
|
|
||||||
**Package managers** `pip`_ (source packages only), `conda`_ (via ``conda-forge``)
|
|
||||||
==================== ===
|
|
||||||
|
|
||||||
.. _pip: https://pypi.python.org/pypi/spacy
|
|
||||||
.. _conda: https://anaconda.org/conda-forge/spacy
|
|
||||||
|
|
||||||
pip
|
|
||||||
---
|
|
||||||
|
|
||||||
Using pip, spaCy releases are currently only available as source packages.
|
|
||||||
|
|
||||||
.. code:: bash
|
|
||||||
|
|
||||||
pip install spacy
|
|
||||||
|
|
||||||
When using pip it is generally recommended to install packages in a virtual
|
|
||||||
environment to avoid modifying system state:
|
|
||||||
|
|
||||||
.. code:: bash
|
|
||||||
|
|
||||||
python -m venv .env
|
|
||||||
source .env/bin/activate
|
|
||||||
pip install spacy
|
|
||||||
|
|
||||||
conda
|
|
||||||
-----
|
|
||||||
|
|
||||||
Thanks to our great community, we've finally re-added conda support. You can now
|
|
||||||
install spaCy via ``conda-forge``:
|
|
||||||
|
|
||||||
.. code:: bash
|
|
||||||
|
|
||||||
conda config --add channels conda-forge
|
|
||||||
conda install spacy
|
|
||||||
|
|
||||||
For the feedstock including the build recipe and configuration,
|
|
||||||
check out `this repository <https://github.com/conda-forge/spacy-feedstock>`_.
|
|
||||||
Improvements and pull requests to the recipe and setup are always appreciated.
|
|
||||||
|
|
||||||
Updating spaCy
|
|
||||||
--------------
|
|
||||||
|
|
||||||
Some updates to spaCy may require downloading new statistical models. If you're
|
|
||||||
running spaCy v2.0 or higher, you can use the ``validate`` command to check if
|
|
||||||
your installed models are compatible and if not, print details on how to update
|
|
||||||
them:
|
|
||||||
|
|
||||||
.. code:: bash
|
|
||||||
|
|
||||||
pip install -U spacy
|
|
||||||
python -m spacy validate
|
|
||||||
|
|
||||||
If you've trained your own models, keep in mind that your training and runtime
|
|
||||||
inputs must match. After updating spaCy, we recommend **retraining your models**
|
|
||||||
with the new version.
|
|
||||||
|
|
||||||
📖 **For details on upgrading from spaCy 1.x to spaCy 2.x, see the**
|
|
||||||
`migration guide <https://spacy.io/usage/v2#migrating>`_.
|
|
||||||
|
|
||||||
Download models
|
|
||||||
===============
|
|
||||||
|
|
||||||
As of v1.7.0, models for spaCy can be installed as **Python packages**.
|
|
||||||
This means that they're a component of your application, just like any
|
|
||||||
other module. Models can be installed using spaCy's ``download`` command,
|
|
||||||
or manually by pointing pip to a path or URL.
|
|
||||||
|
|
||||||
======================= ===
|
|
||||||
`Available Models`_ Detailed model descriptions, accuracy figures and benchmarks.
|
|
||||||
`Models Documentation`_ Detailed usage instructions.
|
|
||||||
======================= ===
|
|
||||||
|
|
||||||
.. _Available Models: https://spacy.io/models
|
|
||||||
.. _Models Documentation: https://spacy.io/docs/usage/models
|
|
||||||
|
|
||||||
.. code:: bash
|
|
||||||
|
|
||||||
# out-of-the-box: download best-matching default model
|
|
||||||
python -m spacy download en
|
|
||||||
|
|
||||||
# download best-matching version of specific model for your spaCy installation
|
|
||||||
python -m spacy download en_core_web_lg
|
|
||||||
|
|
||||||
# pip install .tar.gz archive from path or URL
|
|
||||||
pip install /Users/you/en_core_web_sm-2.0.0.tar.gz
|
|
||||||
|
|
||||||
If you have SSL certification problems, SSL customization options are described in the help:
|
|
||||||
|
|
||||||
# help for the download command
|
|
||||||
python -m spacy download --help
|
|
||||||
|
|
||||||
Loading and using models
|
|
||||||
------------------------
|
|
||||||
|
|
||||||
To load a model, use ``spacy.load()`` with the model's shortcut link:
|
|
||||||
|
|
||||||
.. code:: python
|
|
||||||
|
|
||||||
import spacy
|
|
||||||
nlp = spacy.load('en')
|
|
||||||
doc = nlp(u'This is a sentence.')
|
|
||||||
|
|
||||||
If you've installed a model via pip, you can also ``import`` it directly and
|
|
||||||
then call its ``load()`` method:
|
|
||||||
|
|
||||||
.. code:: python
|
|
||||||
|
|
||||||
import spacy
|
|
||||||
import en_core_web_sm
|
|
||||||
|
|
||||||
nlp = en_core_web_sm.load()
|
|
||||||
doc = nlp(u'This is a sentence.')
|
|
||||||
|
|
||||||
📖 **For more info and examples, check out the**
|
|
||||||
`models documentation <https://spacy.io/docs/usage/models>`_.
|
|
||||||
|
|
||||||
Support for older versions
|
|
||||||
--------------------------
|
|
||||||
|
|
||||||
If you're using an older version (``v1.6.0`` or below), you can still download
|
|
||||||
and install the old models from within spaCy using ``python -m spacy.en.download all``
|
|
||||||
or ``python -m spacy.de.download all``. The ``.tar.gz`` archives are also
|
|
||||||
`attached to the v1.6.0 release <https://github.com/explosion/spaCy/tree/v1.6.0>`_.
|
|
||||||
To download and install the models manually, unpack the archive, drop the
|
|
||||||
contained directory into ``spacy/data`` and load the model via ``spacy.load('en')``
|
|
||||||
or ``spacy.load('de')``.
|
|
||||||
|
|
||||||
Compile from source
|
|
||||||
===================
|
|
||||||
|
|
||||||
The other way to install spaCy is to clone its
|
|
||||||
`GitHub repository <https://github.com/explosion/spaCy>`_ and build it from
|
|
||||||
source. That is the common way if you want to make changes to the code base.
|
|
||||||
You'll need to make sure that you have a development environment consisting of a
|
|
||||||
Python distribution including header files, a compiler,
|
|
||||||
`pip <https://pip.pypa.io/en/latest/installing/>`__, `virtualenv <https://virtualenv.pypa.io/>`_
|
|
||||||
and `git <https://git-scm.com>`_ installed. The compiler part is the trickiest.
|
|
||||||
How to do that depends on your system. See notes on Ubuntu, OS X and Windows for
|
|
||||||
details.
|
|
||||||
|
|
||||||
.. code:: bash
|
|
||||||
|
|
||||||
# make sure you are using the latest pip
|
|
||||||
python -m pip install -U pip
|
|
||||||
git clone https://github.com/explosion/spaCy
|
|
||||||
cd spaCy
|
|
||||||
|
|
||||||
python -m venv .env
|
|
||||||
source .env/bin/activate
|
|
||||||
export PYTHONPATH=`pwd`
|
|
||||||
pip install -r requirements.txt
|
|
||||||
python setup.py build_ext --inplace
|
|
||||||
|
|
||||||
Compared to regular install via pip, `requirements.txt <requirements.txt>`_
|
|
||||||
additionally installs developer dependencies such as Cython. For more details
|
|
||||||
and instructions, see the documentation on
|
|
||||||
`compiling spaCy from source <https://spacy.io/usage/#source>`_ and the
|
|
||||||
`quickstart widget <https://spacy.io/usage/#section-quickstart>`_ to get
|
|
||||||
the right commands for your platform and Python version.
|
|
||||||
|
|
||||||
Instead of the above verbose commands, you can also use the following
|
|
||||||
`Fabric <http://www.fabfile.org/>`_ commands. All commands assume that your
|
|
||||||
virtual environment is located in a directory ``.env``. If you're using a
|
|
||||||
different directory, you can change it via the environment variable ``VENV_DIR``,
|
|
||||||
for example ``VENV_DIR=".custom-env" fab clean make``.
|
|
||||||
|
|
||||||
============= ===
|
|
||||||
``fab env`` Create virtual environment and delete previous one, if it exists.
|
|
||||||
``fab make`` Compile the source.
|
|
||||||
``fab clean`` Remove compiled objects, including the generated C++.
|
|
||||||
``fab test`` Run basic tests, aborting after first failure.
|
|
||||||
============= ===
|
|
||||||
|
|
||||||
Ubuntu
|
|
||||||
------
|
|
||||||
|
|
||||||
Install system-level dependencies via ``apt-get``:
|
|
||||||
|
|
||||||
.. code:: bash
|
|
||||||
|
|
||||||
sudo apt-get install build-essential python-dev git
|
|
||||||
|
|
||||||
macOS / OS X
|
|
||||||
------------
|
|
||||||
|
|
||||||
Install a recent version of `XCode <https://developer.apple.com/xcode/>`_,
|
|
||||||
including the so-called "Command Line Tools". macOS and OS X ship with Python
|
|
||||||
and git preinstalled.
|
|
||||||
|
|
||||||
Windows
|
|
||||||
-------
|
|
||||||
|
|
||||||
Install a version of `Visual Studio Express <https://www.visualstudio.com/vs/visual-studio-express/>`_
|
|
||||||
or higher that matches the version that was used to compile your Python
|
|
||||||
interpreter. For official distributions these are VS 2008 (Python 2.7),
|
|
||||||
VS 2010 (Python 3.4) and VS 2015 (Python 3.5).
|
|
||||||
|
|
||||||
Run tests
|
|
||||||
=========
|
|
||||||
|
|
||||||
spaCy comes with an `extensive test suite <spacy/tests>`_. In order to run the
|
|
||||||
tests, you'll usually want to clone the repository and build spaCy from source.
|
|
||||||
This will also install the required development dependencies and test utilities
|
|
||||||
defined in the ``requirements.txt``.
|
|
||||||
|
|
||||||
Alternatively, you can find out where spaCy is installed and run ``pytest`` on
|
|
||||||
that directory. Don't forget to also install the test utilities via spaCy's
|
|
||||||
``requirements.txt``:
|
|
||||||
|
|
||||||
.. code:: bash
|
|
||||||
|
|
||||||
python -c "import os; import spacy; print(os.path.dirname(spacy.__file__))"
|
|
||||||
pip install -r path/to/requirements.txt
|
|
||||||
python -m pytest <spacy-directory>
|
|
||||||
|
|
||||||
See `the documentation <https://spacy.io/usage/#tests>`_ for more details and
|
|
||||||
examples.
|
|
|
@ -35,41 +35,49 @@ import subprocess
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
HASH_FILE = 'cythonize.json'
|
HASH_FILE = "cythonize.json"
|
||||||
|
|
||||||
|
|
||||||
def process_pyx(fromfile, tofile):
|
def process_pyx(fromfile, tofile, language_level="-2"):
|
||||||
print('Processing %s' % fromfile)
|
print("Processing %s" % fromfile)
|
||||||
try:
|
try:
|
||||||
from Cython.Compiler.Version import version as cython_version
|
from Cython.Compiler.Version import version as cython_version
|
||||||
from distutils.version import LooseVersion
|
from distutils.version import LooseVersion
|
||||||
if LooseVersion(cython_version) < LooseVersion('0.19'):
|
|
||||||
raise Exception('Require Cython >= 0.19')
|
if LooseVersion(cython_version) < LooseVersion("0.19"):
|
||||||
|
raise Exception("Require Cython >= 0.19")
|
||||||
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
flags = ['--fast-fail']
|
flags = ["--fast-fail", language_level]
|
||||||
if tofile.endswith('.cpp'):
|
if tofile.endswith(".cpp"):
|
||||||
flags += ['--cplus']
|
flags += ["--cplus"]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
r = subprocess.call(['cython'] + flags + ['-o', tofile, fromfile],
|
r = subprocess.call(
|
||||||
env=os.environ) # See Issue #791
|
["cython"] + flags + ["-o", tofile, fromfile], env=os.environ
|
||||||
|
) # See Issue #791
|
||||||
if r != 0:
|
if r != 0:
|
||||||
raise Exception('Cython failed')
|
raise Exception("Cython failed")
|
||||||
except OSError:
|
except OSError:
|
||||||
# There are ways of installing Cython that don't result in a cython
|
# There are ways of installing Cython that don't result in a cython
|
||||||
# executable on the path, see gh-2397.
|
# executable on the path, see gh-2397.
|
||||||
r = subprocess.call([sys.executable, '-c',
|
r = subprocess.call(
|
||||||
'import sys; from Cython.Compiler.Main import '
|
[
|
||||||
'setuptools_main as main; sys.exit(main())'] + flags +
|
sys.executable,
|
||||||
['-o', tofile, fromfile])
|
"-c",
|
||||||
|
"import sys; from Cython.Compiler.Main import "
|
||||||
|
"setuptools_main as main; sys.exit(main())",
|
||||||
|
]
|
||||||
|
+ flags
|
||||||
|
+ ["-o", tofile, fromfile]
|
||||||
|
)
|
||||||
if r != 0:
|
if r != 0:
|
||||||
raise Exception('Cython failed')
|
raise Exception("Cython failed")
|
||||||
except OSError:
|
except OSError:
|
||||||
raise OSError('Cython needs to be installed')
|
raise OSError("Cython needs to be installed")
|
||||||
|
|
||||||
|
|
||||||
def preserve_cwd(path, func, *args):
|
def preserve_cwd(path, func, *args):
|
||||||
|
@ -89,12 +97,12 @@ def load_hashes(filename):
|
||||||
|
|
||||||
|
|
||||||
def save_hashes(hash_db, filename):
|
def save_hashes(hash_db, filename):
|
||||||
with open(filename, 'w') as f:
|
with open(filename, "w") as f:
|
||||||
f.write(json.dumps(hash_db))
|
f.write(json.dumps(hash_db))
|
||||||
|
|
||||||
|
|
||||||
def get_hash(path):
|
def get_hash(path):
|
||||||
return hashlib.md5(open(path, 'rb').read()).hexdigest()
|
return hashlib.md5(open(path, "rb").read()).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
def hash_changed(base, path, db):
|
def hash_changed(base, path, db):
|
||||||
|
@ -109,25 +117,27 @@ def hash_add(base, path, db):
|
||||||
|
|
||||||
def process(base, filename, db):
|
def process(base, filename, db):
|
||||||
root, ext = os.path.splitext(filename)
|
root, ext = os.path.splitext(filename)
|
||||||
if ext in ['.pyx', '.cpp']:
|
if ext in [".pyx", ".cpp"]:
|
||||||
if hash_changed(base, filename, db) or not os.path.isfile(os.path.join(base, root + '.cpp')):
|
if hash_changed(base, filename, db) or not os.path.isfile(
|
||||||
preserve_cwd(base, process_pyx, root + '.pyx', root + '.cpp')
|
os.path.join(base, root + ".cpp")
|
||||||
hash_add(base, root + '.cpp', db)
|
):
|
||||||
hash_add(base, root + '.pyx', db)
|
preserve_cwd(base, process_pyx, root + ".pyx", root + ".cpp")
|
||||||
|
hash_add(base, root + ".cpp", db)
|
||||||
|
hash_add(base, root + ".pyx", db)
|
||||||
|
|
||||||
|
|
||||||
def check_changes(root, db):
|
def check_changes(root, db):
|
||||||
res = False
|
res = False
|
||||||
new_db = {}
|
new_db = {}
|
||||||
|
|
||||||
setup_filename = 'setup.py'
|
setup_filename = "setup.py"
|
||||||
hash_add('.', setup_filename, new_db)
|
hash_add(".", setup_filename, new_db)
|
||||||
if hash_changed('.', setup_filename, db):
|
if hash_changed(".", setup_filename, db):
|
||||||
res = True
|
res = True
|
||||||
|
|
||||||
for base, _, files in os.walk(root):
|
for base, _, files in os.walk(root):
|
||||||
for filename in files:
|
for filename in files:
|
||||||
if filename.endswith('.pxd'):
|
if filename.endswith(".pxd"):
|
||||||
hash_add(base, filename, new_db)
|
hash_add(base, filename, new_db)
|
||||||
if hash_changed(base, filename, db):
|
if hash_changed(base, filename, db):
|
||||||
res = True
|
res = True
|
||||||
|
@ -150,8 +160,10 @@ def run(root):
|
||||||
save_hashes(db, HASH_FILE)
|
save_hashes(db, HASH_FILE)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(description='Cythonize pyx files into C++ files as needed')
|
parser = argparse.ArgumentParser(
|
||||||
parser.add_argument('root', help='root directory')
|
description="Cythonize pyx files into C++ files as needed"
|
||||||
|
)
|
||||||
|
parser.add_argument("root", help="root directory")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
run(args.root)
|
run(args.root)
|
||||||
|
|
97
bin/load_reddit.py
Normal file
97
bin/load_reddit.py
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import bz2
|
||||||
|
import re
|
||||||
|
import srsly
|
||||||
|
import sys
|
||||||
|
import random
|
||||||
|
import datetime
|
||||||
|
import plac
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
_unset = object()
|
||||||
|
|
||||||
|
|
||||||
|
class Reddit(object):
|
||||||
|
"""Stream cleaned comments from Reddit."""
|
||||||
|
|
||||||
|
pre_format_re = re.compile(r"^[`*~]")
|
||||||
|
post_format_re = re.compile(r"[`*~]$")
|
||||||
|
url_re = re.compile(r"\[([^]]+)\]\(%%URL\)")
|
||||||
|
link_re = re.compile(r"\[([^]]+)\]\(https?://[^\)]+\)")
|
||||||
|
|
||||||
|
def __init__(self, file_path, meta_keys={"subreddit": "section"}):
|
||||||
|
"""
|
||||||
|
file_path (unicode / Path): Path to archive or directory of archives.
|
||||||
|
meta_keys (dict): Meta data key included in the Reddit corpus, mapped
|
||||||
|
to display name in Prodigy meta.
|
||||||
|
RETURNS (Reddit): The Reddit loader.
|
||||||
|
"""
|
||||||
|
self.meta = meta_keys
|
||||||
|
file_path = Path(file_path)
|
||||||
|
if not file_path.exists():
|
||||||
|
raise IOError("Can't find file path: {}".format(file_path))
|
||||||
|
if not file_path.is_dir():
|
||||||
|
self.files = [file_path]
|
||||||
|
else:
|
||||||
|
self.files = list(file_path.iterdir())
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
for file_path in self.iter_files():
|
||||||
|
with bz2.open(str(file_path)) as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
comment = srsly.json_loads(line)
|
||||||
|
if self.is_valid(comment):
|
||||||
|
text = self.strip_tags(comment["body"])
|
||||||
|
yield {"text": text}
|
||||||
|
|
||||||
|
def get_meta(self, item):
|
||||||
|
return {name: item.get(key, "n/a") for key, name in self.meta.items()}
|
||||||
|
|
||||||
|
def iter_files(self):
|
||||||
|
for file_path in self.files:
|
||||||
|
yield file_path
|
||||||
|
|
||||||
|
def strip_tags(self, text):
|
||||||
|
text = self.link_re.sub(r"\1", text)
|
||||||
|
text = text.replace(">", ">").replace("<", "<")
|
||||||
|
text = self.pre_format_re.sub("", text)
|
||||||
|
text = self.post_format_re.sub("", text)
|
||||||
|
text = re.sub(r"\s+", " ", text)
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
def is_valid(self, comment):
|
||||||
|
return (
|
||||||
|
comment["body"] is not None
|
||||||
|
and comment["body"] != "[deleted]"
|
||||||
|
and comment["body"] != "[removed]"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def main(path):
|
||||||
|
reddit = Reddit(path)
|
||||||
|
for comment in reddit:
|
||||||
|
print(srsly.json_dumps(comment))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import socket
|
||||||
|
|
||||||
|
try:
|
||||||
|
BrokenPipeError
|
||||||
|
except NameError:
|
||||||
|
BrokenPipeError = socket.error
|
||||||
|
try:
|
||||||
|
plac.call(main)
|
||||||
|
except BrokenPipeError:
|
||||||
|
import os, sys
|
||||||
|
|
||||||
|
# Python flushes standard streams on exit; redirect remaining output
|
||||||
|
# to devnull to avoid another BrokenPipeError at shutdown
|
||||||
|
devnull = os.open(os.devnull, os.O_WRONLY)
|
||||||
|
os.dup2(devnull, sys.stdout.fileno())
|
||||||
|
sys.exit(1) # Python exits with error code 1 on EPIPE
|
|
@ -7,9 +7,12 @@ git diff-index --quiet HEAD
|
||||||
|
|
||||||
git checkout $1
|
git checkout $1
|
||||||
git pull origin $1
|
git pull origin $1
|
||||||
|
|
||||||
version=$(grep "__version__ = " spacy/about.py)
|
version=$(grep "__version__ = " spacy/about.py)
|
||||||
version=${version/__version__ = }
|
version=${version/__version__ = }
|
||||||
version=${version/\'/}
|
version=${version/\'/}
|
||||||
version=${version/\'/}
|
version=${version/\'/}
|
||||||
|
version=${version/\"/}
|
||||||
|
version=${version/\"/}
|
||||||
git tag "v$version"
|
git tag "v$version"
|
||||||
git push origin --tags
|
git push origin --tags
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
"""
|
"""
|
||||||
This example shows how to use an LSTM sentiment classification model trained using Keras in spaCy. spaCy splits the document into sentences, and each sentence is classified using the LSTM. The scores for the sentences are then aggregated to give the document score. This kind of hierarchical model is quite difficult in "pure" Keras or Tensorflow, but it's very effective. The Keras example on this dataset performs quite poorly, because it cuts off the documents so that they're a fixed size. This hurts review accuracy a lot, because people often summarise their rating in the final sentence
|
This example shows how to use an LSTM sentiment classification model trained
|
||||||
|
using Keras in spaCy. spaCy splits the document into sentences, and each
|
||||||
|
sentence is classified using the LSTM. The scores for the sentences are then
|
||||||
|
aggregated to give the document score. This kind of hierarchical model is quite
|
||||||
|
difficult in "pure" Keras or Tensorflow, but it's very effective. The Keras
|
||||||
|
example on this dataset performs quite poorly, because it cuts off the documents
|
||||||
|
so that they're a fixed size. This hurts review accuracy a lot, because people
|
||||||
|
often summarise their rating in the final sentence
|
||||||
|
|
||||||
Prerequisites:
|
Prerequisites:
|
||||||
spacy download en_vectors_web_lg
|
spacy download en_vectors_web_lg
|
||||||
|
@ -25,9 +32,9 @@ import spacy
|
||||||
class SentimentAnalyser(object):
|
class SentimentAnalyser(object):
|
||||||
@classmethod
|
@classmethod
|
||||||
def load(cls, path, nlp, max_length=100):
|
def load(cls, path, nlp, max_length=100):
|
||||||
with (path / 'config.json').open() as file_:
|
with (path / "config.json").open() as file_:
|
||||||
model = model_from_json(file_.read())
|
model = model_from_json(file_.read())
|
||||||
with (path / 'model').open('rb') as file_:
|
with (path / "model").open("rb") as file_:
|
||||||
lstm_weights = pickle.load(file_)
|
lstm_weights = pickle.load(file_)
|
||||||
embeddings = get_embeddings(nlp.vocab)
|
embeddings = get_embeddings(nlp.vocab)
|
||||||
model.set_weights([embeddings] + lstm_weights)
|
model.set_weights([embeddings] + lstm_weights)
|
||||||
|
@ -69,12 +76,12 @@ def get_labelled_sentences(docs, doc_labels):
|
||||||
for sent in doc.sents:
|
for sent in doc.sents:
|
||||||
sentences.append(sent)
|
sentences.append(sent)
|
||||||
labels.append(y)
|
labels.append(y)
|
||||||
return sentences, numpy.asarray(labels, dtype='int32')
|
return sentences, numpy.asarray(labels, dtype="int32")
|
||||||
|
|
||||||
|
|
||||||
def get_features(docs, max_length):
|
def get_features(docs, max_length):
|
||||||
docs = list(docs)
|
docs = list(docs)
|
||||||
Xs = numpy.zeros((len(docs), max_length), dtype='int32')
|
Xs = numpy.zeros((len(docs), max_length), dtype="int32")
|
||||||
for i, doc in enumerate(docs):
|
for i, doc in enumerate(docs):
|
||||||
j = 0
|
j = 0
|
||||||
for token in doc:
|
for token in doc:
|
||||||
|
@ -89,14 +96,25 @@ def get_features(docs, max_length):
|
||||||
return Xs
|
return Xs
|
||||||
|
|
||||||
|
|
||||||
def train(train_texts, train_labels, dev_texts, dev_labels,
|
def train(
|
||||||
lstm_shape, lstm_settings, lstm_optimizer, batch_size=100,
|
train_texts,
|
||||||
nb_epoch=5, by_sentence=True):
|
train_labels,
|
||||||
|
dev_texts,
|
||||||
|
dev_labels,
|
||||||
|
lstm_shape,
|
||||||
|
lstm_settings,
|
||||||
|
lstm_optimizer,
|
||||||
|
batch_size=100,
|
||||||
|
nb_epoch=5,
|
||||||
|
by_sentence=True,
|
||||||
|
):
|
||||||
|
|
||||||
print("Loading spaCy")
|
print("Loading spaCy")
|
||||||
nlp = spacy.load('en_vectors_web_lg')
|
nlp = spacy.load("en_vectors_web_lg")
|
||||||
nlp.add_pipe(nlp.create_pipe('sentencizer'))
|
nlp.add_pipe(nlp.create_pipe("sentencizer"))
|
||||||
embeddings = get_embeddings(nlp.vocab)
|
embeddings = get_embeddings(nlp.vocab)
|
||||||
model = compile_lstm(embeddings, lstm_shape, lstm_settings)
|
model = compile_lstm(embeddings, lstm_shape, lstm_settings)
|
||||||
|
|
||||||
print("Parsing texts...")
|
print("Parsing texts...")
|
||||||
train_docs = list(nlp.pipe(train_texts))
|
train_docs = list(nlp.pipe(train_texts))
|
||||||
dev_docs = list(nlp.pipe(dev_texts))
|
dev_docs = list(nlp.pipe(dev_texts))
|
||||||
|
@ -104,10 +122,15 @@ def train(train_texts, train_labels, dev_texts, dev_labels,
|
||||||
train_docs, train_labels = get_labelled_sentences(train_docs, train_labels)
|
train_docs, train_labels = get_labelled_sentences(train_docs, train_labels)
|
||||||
dev_docs, dev_labels = get_labelled_sentences(dev_docs, dev_labels)
|
dev_docs, dev_labels = get_labelled_sentences(dev_docs, dev_labels)
|
||||||
|
|
||||||
train_X = get_features(train_docs, lstm_shape['max_length'])
|
train_X = get_features(train_docs, lstm_shape["max_length"])
|
||||||
dev_X = get_features(dev_docs, lstm_shape['max_length'])
|
dev_X = get_features(dev_docs, lstm_shape["max_length"])
|
||||||
model.fit(train_X, train_labels, validation_data=(dev_X, dev_labels),
|
model.fit(
|
||||||
nb_epoch=nb_epoch, batch_size=batch_size)
|
train_X,
|
||||||
|
train_labels,
|
||||||
|
validation_data=(dev_X, dev_labels),
|
||||||
|
epochs=nb_epoch,
|
||||||
|
batch_size=batch_size,
|
||||||
|
)
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
@ -117,19 +140,28 @@ def compile_lstm(embeddings, shape, settings):
|
||||||
Embedding(
|
Embedding(
|
||||||
embeddings.shape[0],
|
embeddings.shape[0],
|
||||||
embeddings.shape[1],
|
embeddings.shape[1],
|
||||||
input_length=shape['max_length'],
|
input_length=shape["max_length"],
|
||||||
trainable=False,
|
trainable=False,
|
||||||
weights=[embeddings],
|
weights=[embeddings],
|
||||||
mask_zero=True
|
mask_zero=True,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
model.add(TimeDistributed(Dense(shape['nr_hidden'], use_bias=False)))
|
model.add(TimeDistributed(Dense(shape["nr_hidden"], use_bias=False)))
|
||||||
model.add(Bidirectional(LSTM(shape['nr_hidden'],
|
model.add(
|
||||||
recurrent_dropout=settings['dropout'],
|
Bidirectional(
|
||||||
dropout=settings['dropout'])))
|
LSTM(
|
||||||
model.add(Dense(shape['nr_class'], activation='sigmoid'))
|
shape["nr_hidden"],
|
||||||
model.compile(optimizer=Adam(lr=settings['lr']), loss='binary_crossentropy',
|
recurrent_dropout=settings["dropout"],
|
||||||
metrics=['accuracy'])
|
dropout=settings["dropout"],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
model.add(Dense(shape["nr_class"], activation="sigmoid"))
|
||||||
|
model.compile(
|
||||||
|
optimizer=Adam(lr=settings["lr"]),
|
||||||
|
loss="binary_crossentropy",
|
||||||
|
metrics=["accuracy"],
|
||||||
|
)
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
@ -138,15 +170,9 @@ def get_embeddings(vocab):
|
||||||
|
|
||||||
|
|
||||||
def evaluate(model_dir, texts, labels, max_length=100):
|
def evaluate(model_dir, texts, labels, max_length=100):
|
||||||
def create_pipeline(nlp):
|
nlp = spacy.load("en_vectors_web_lg")
|
||||||
'''
|
nlp.add_pipe(nlp.create_pipe("sentencizer"))
|
||||||
This could be a lambda, but named functions are easier to read in Python.
|
nlp.add_pipe(SentimentAnalyser.load(model_dir, nlp, max_length=max_length))
|
||||||
'''
|
|
||||||
return [nlp.tagger, nlp.parser, SentimentAnalyser.load(model_dir, nlp,
|
|
||||||
max_length=max_length)]
|
|
||||||
|
|
||||||
nlp = spacy.load('en')
|
|
||||||
nlp.pipeline = create_pipeline(nlp)
|
|
||||||
|
|
||||||
correct = 0
|
correct = 0
|
||||||
i = 0
|
i = 0
|
||||||
|
@ -158,7 +184,7 @@ def evaluate(model_dir, texts, labels, max_length=100):
|
||||||
|
|
||||||
def read_data(data_dir, limit=0):
|
def read_data(data_dir, limit=0):
|
||||||
examples = []
|
examples = []
|
||||||
for subdir, label in (('pos', 1), ('neg', 0)):
|
for subdir, label in (("pos", 1), ("neg", 0)):
|
||||||
for filename in (data_dir / subdir).iterdir():
|
for filename in (data_dir / subdir).iterdir():
|
||||||
with filename.open() as file_:
|
with filename.open() as file_:
|
||||||
text = file_.read()
|
text = file_.read()
|
||||||
|
@ -166,7 +192,7 @@ def read_data(data_dir, limit=0):
|
||||||
random.shuffle(examples)
|
random.shuffle(examples)
|
||||||
if limit >= 1:
|
if limit >= 1:
|
||||||
examples = examples[:limit]
|
examples = examples[:limit]
|
||||||
return zip(*examples) # Unzips into two lists
|
return zip(*examples) # Unzips into two lists
|
||||||
|
|
||||||
|
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
|
@ -180,13 +206,21 @@ def read_data(data_dir, limit=0):
|
||||||
learn_rate=("Learn rate", "option", "e", float),
|
learn_rate=("Learn rate", "option", "e", float),
|
||||||
nb_epoch=("Number of training epochs", "option", "i", int),
|
nb_epoch=("Number of training epochs", "option", "i", int),
|
||||||
batch_size=("Size of minibatches for training LSTM", "option", "b", int),
|
batch_size=("Size of minibatches for training LSTM", "option", "b", int),
|
||||||
nr_examples=("Limit to N examples", "option", "n", int)
|
nr_examples=("Limit to N examples", "option", "n", int),
|
||||||
)
|
)
|
||||||
def main(model_dir=None, train_dir=None, dev_dir=None,
|
def main(
|
||||||
is_runtime=False,
|
model_dir=None,
|
||||||
nr_hidden=64, max_length=100, # Shape
|
train_dir=None,
|
||||||
dropout=0.5, learn_rate=0.001, # General NN config
|
dev_dir=None,
|
||||||
nb_epoch=5, batch_size=100, nr_examples=-1): # Training params
|
is_runtime=False,
|
||||||
|
nr_hidden=64,
|
||||||
|
max_length=100, # Shape
|
||||||
|
dropout=0.5,
|
||||||
|
learn_rate=0.001, # General NN config
|
||||||
|
nb_epoch=5,
|
||||||
|
batch_size=256,
|
||||||
|
nr_examples=-1,
|
||||||
|
): # Training params
|
||||||
if model_dir is not None:
|
if model_dir is not None:
|
||||||
model_dir = pathlib.Path(model_dir)
|
model_dir = pathlib.Path(model_dir)
|
||||||
if train_dir is None or dev_dir is None:
|
if train_dir is None or dev_dir is None:
|
||||||
|
@ -208,20 +242,26 @@ def main(model_dir=None, train_dir=None, dev_dir=None,
|
||||||
dev_texts, dev_labels = zip(*imdb_data[1])
|
dev_texts, dev_labels = zip(*imdb_data[1])
|
||||||
else:
|
else:
|
||||||
dev_texts, dev_labels = read_data(dev_dir, imdb_data, limit=nr_examples)
|
dev_texts, dev_labels = read_data(dev_dir, imdb_data, limit=nr_examples)
|
||||||
train_labels = numpy.asarray(train_labels, dtype='int32')
|
train_labels = numpy.asarray(train_labels, dtype="int32")
|
||||||
dev_labels = numpy.asarray(dev_labels, dtype='int32')
|
dev_labels = numpy.asarray(dev_labels, dtype="int32")
|
||||||
lstm = train(train_texts, train_labels, dev_texts, dev_labels,
|
lstm = train(
|
||||||
{'nr_hidden': nr_hidden, 'max_length': max_length, 'nr_class': 1},
|
train_texts,
|
||||||
{'dropout': dropout, 'lr': learn_rate},
|
train_labels,
|
||||||
{},
|
dev_texts,
|
||||||
nb_epoch=nb_epoch, batch_size=batch_size)
|
dev_labels,
|
||||||
|
{"nr_hidden": nr_hidden, "max_length": max_length, "nr_class": 1},
|
||||||
|
{"dropout": dropout, "lr": learn_rate},
|
||||||
|
{},
|
||||||
|
nb_epoch=nb_epoch,
|
||||||
|
batch_size=batch_size,
|
||||||
|
)
|
||||||
weights = lstm.get_weights()
|
weights = lstm.get_weights()
|
||||||
if model_dir is not None:
|
if model_dir is not None:
|
||||||
with (model_dir / 'model').open('wb') as file_:
|
with (model_dir / "model").open("wb") as file_:
|
||||||
pickle.dump(weights[1:], file_)
|
pickle.dump(weights[1:], file_)
|
||||||
with (model_dir / 'config.json').open('wb') as file_:
|
with (model_dir / "config.json").open("w") as file_:
|
||||||
file_.write(lstm.to_json())
|
file_.write(lstm.to_json())
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
plac.call(main)
|
plac.call(main)
|
||||||
|
|
|
@ -15,14 +15,15 @@ import spacy
|
||||||
|
|
||||||
|
|
||||||
TEXTS = [
|
TEXTS = [
|
||||||
'Net income was $9.4 million compared to the prior year of $2.7 million.',
|
"Net income was $9.4 million compared to the prior year of $2.7 million.",
|
||||||
'Revenue exceeded twelve billion dollars, with a loss of $1b.',
|
"Revenue exceeded twelve billion dollars, with a loss of $1b.",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
model=("Model to load (needs parser and NER)", "positional", None, str))
|
model=("Model to load (needs parser and NER)", "positional", None, str)
|
||||||
def main(model='en_core_web_sm'):
|
)
|
||||||
|
def main(model="en_core_web_sm"):
|
||||||
nlp = spacy.load(model)
|
nlp = spacy.load(model)
|
||||||
print("Loaded model '%s'" % model)
|
print("Loaded model '%s'" % model)
|
||||||
print("Processing %d texts" % len(TEXTS))
|
print("Processing %d texts" % len(TEXTS))
|
||||||
|
@ -31,7 +32,7 @@ def main(model='en_core_web_sm'):
|
||||||
doc = nlp(text)
|
doc = nlp(text)
|
||||||
relations = extract_currency_relations(doc)
|
relations = extract_currency_relations(doc)
|
||||||
for r1, r2 in relations:
|
for r1, r2 in relations:
|
||||||
print('{:<10}\t{}\t{}'.format(r1.text, r2.ent_type_, r2.text))
|
print("{:<10}\t{}\t{}".format(r1.text, r2.ent_type_, r2.text))
|
||||||
|
|
||||||
|
|
||||||
def extract_currency_relations(doc):
|
def extract_currency_relations(doc):
|
||||||
|
@ -41,18 +42,18 @@ def extract_currency_relations(doc):
|
||||||
span.merge()
|
span.merge()
|
||||||
|
|
||||||
relations = []
|
relations = []
|
||||||
for money in filter(lambda w: w.ent_type_ == 'MONEY', doc):
|
for money in filter(lambda w: w.ent_type_ == "MONEY", doc):
|
||||||
if money.dep_ in ('attr', 'dobj'):
|
if money.dep_ in ("attr", "dobj"):
|
||||||
subject = [w for w in money.head.lefts if w.dep_ == 'nsubj']
|
subject = [w for w in money.head.lefts if w.dep_ == "nsubj"]
|
||||||
if subject:
|
if subject:
|
||||||
subject = subject[0]
|
subject = subject[0]
|
||||||
relations.append((subject, money))
|
relations.append((subject, money))
|
||||||
elif money.dep_ == 'pobj' and money.head.dep_ == 'prep':
|
elif money.dep_ == "pobj" and money.head.dep_ == "prep":
|
||||||
relations.append((money.head.head, money))
|
relations.append((money.head.head, money))
|
||||||
return relations
|
return relations
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
plac.call(main)
|
plac.call(main)
|
||||||
|
|
||||||
# Expected output:
|
# Expected output:
|
||||||
|
|
|
@ -24,37 +24,39 @@ import plac
|
||||||
import spacy
|
import spacy
|
||||||
|
|
||||||
|
|
||||||
@plac.annotations(
|
@plac.annotations(model=("Model to load", "positional", None, str))
|
||||||
model=("Model to load", "positional", None, str))
|
def main(model="en_core_web_sm"):
|
||||||
def main(model='en_core_web_sm'):
|
|
||||||
nlp = spacy.load(model)
|
nlp = spacy.load(model)
|
||||||
print("Loaded model '%s'" % model)
|
print("Loaded model '%s'" % model)
|
||||||
|
|
||||||
doc = nlp("displaCy uses CSS and JavaScript to show you how computers "
|
doc = nlp(
|
||||||
"understand language")
|
"displaCy uses CSS and JavaScript to show you how computers "
|
||||||
|
"understand language"
|
||||||
|
)
|
||||||
|
|
||||||
# The easiest way is to find the head of the subtree you want, and then use
|
# The easiest way is to find the head of the subtree you want, and then use
|
||||||
# the `.subtree`, `.children`, `.lefts` and `.rights` iterators. `.subtree`
|
# the `.subtree`, `.children`, `.lefts` and `.rights` iterators. `.subtree`
|
||||||
# is the one that does what you're asking for most directly:
|
# is the one that does what you're asking for most directly:
|
||||||
for word in doc:
|
for word in doc:
|
||||||
if word.dep_ in ('xcomp', 'ccomp'):
|
if word.dep_ in ("xcomp", "ccomp"):
|
||||||
print(''.join(w.text_with_ws for w in word.subtree))
|
print("".join(w.text_with_ws for w in word.subtree))
|
||||||
|
|
||||||
# It'd probably be better for `word.subtree` to return a `Span` object
|
# It'd probably be better for `word.subtree` to return a `Span` object
|
||||||
# instead of a generator over the tokens. If you want the `Span` you can
|
# instead of a generator over the tokens. If you want the `Span` you can
|
||||||
# get it via the `.right_edge` and `.left_edge` properties. The `Span`
|
# get it via the `.right_edge` and `.left_edge` properties. The `Span`
|
||||||
# object is nice because you can easily get a vector, merge it, etc.
|
# object is nice because you can easily get a vector, merge it, etc.
|
||||||
for word in doc:
|
for word in doc:
|
||||||
if word.dep_ in ('xcomp', 'ccomp'):
|
if word.dep_ in ("xcomp", "ccomp"):
|
||||||
subtree_span = doc[word.left_edge.i : word.right_edge.i + 1]
|
subtree_span = doc[word.left_edge.i : word.right_edge.i + 1]
|
||||||
print(subtree_span.text, '|', subtree_span.root.text)
|
print(subtree_span.text, "|", subtree_span.root.text)
|
||||||
|
|
||||||
# You might also want to select a head, and then select a start and end
|
# You might also want to select a head, and then select a start and end
|
||||||
# position by walking along its children. You could then take the
|
# position by walking along its children. You could then take the
|
||||||
# `.left_edge` and `.right_edge` of those tokens, and use it to calculate
|
# `.left_edge` and `.right_edge` of those tokens, and use it to calculate
|
||||||
# a span.
|
# a span.
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
if __name__ == "__main__":
|
||||||
plac.call(main)
|
plac.call(main)
|
||||||
|
|
||||||
# Expected output:
|
# Expected output:
|
||||||
|
|
|
@ -45,7 +45,7 @@ from __future__ import print_function, unicode_literals, division
|
||||||
from bz2 import BZ2File
|
from bz2 import BZ2File
|
||||||
import time
|
import time
|
||||||
import plac
|
import plac
|
||||||
import ujson
|
import json
|
||||||
|
|
||||||
from spacy.matcher import PhraseMatcher
|
from spacy.matcher import PhraseMatcher
|
||||||
import spacy
|
import spacy
|
||||||
|
@ -55,15 +55,15 @@ import spacy
|
||||||
patterns_loc=("Path to gazetteer", "positional", None, str),
|
patterns_loc=("Path to gazetteer", "positional", None, str),
|
||||||
text_loc=("Path to Reddit corpus file", "positional", None, str),
|
text_loc=("Path to Reddit corpus file", "positional", None, str),
|
||||||
n=("Number of texts to read", "option", "n", int),
|
n=("Number of texts to read", "option", "n", int),
|
||||||
lang=("Language class to initialise", "option", "l", str))
|
lang=("Language class to initialise", "option", "l", str),
|
||||||
def main(patterns_loc, text_loc, n=10000, lang='en'):
|
)
|
||||||
nlp = spacy.blank('en')
|
def main(patterns_loc, text_loc, n=10000, lang="en"):
|
||||||
|
nlp = spacy.blank(lang)
|
||||||
nlp.vocab.lex_attr_getters = {}
|
nlp.vocab.lex_attr_getters = {}
|
||||||
phrases = read_gazetteer(nlp.tokenizer, patterns_loc)
|
phrases = read_gazetteer(nlp.tokenizer, patterns_loc)
|
||||||
count = 0
|
count = 0
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
for ent_id, text in get_matches(nlp.tokenizer, phrases,
|
for ent_id, text in get_matches(nlp.tokenizer, phrases, read_text(text_loc, n=n)):
|
||||||
read_text(text_loc, n=n)):
|
|
||||||
count += 1
|
count += 1
|
||||||
t2 = time.time()
|
t2 = time.time()
|
||||||
print("%d docs in %.3f s. %d matches" % (n, (t2 - t1), count))
|
print("%d docs in %.3f s. %d matches" % (n, (t2 - t1), count))
|
||||||
|
@ -71,8 +71,8 @@ def main(patterns_loc, text_loc, n=10000, lang='en'):
|
||||||
|
|
||||||
def read_gazetteer(tokenizer, loc, n=-1):
|
def read_gazetteer(tokenizer, loc, n=-1):
|
||||||
for i, line in enumerate(open(loc)):
|
for i, line in enumerate(open(loc)):
|
||||||
data = ujson.loads(line.strip())
|
data = json.loads(line.strip())
|
||||||
phrase = tokenizer(data['text'])
|
phrase = tokenizer(data["text"])
|
||||||
for w in phrase:
|
for w in phrase:
|
||||||
_ = tokenizer.vocab[w.text]
|
_ = tokenizer.vocab[w.text]
|
||||||
if len(phrase) >= 2:
|
if len(phrase) >= 2:
|
||||||
|
@ -82,15 +82,15 @@ def read_gazetteer(tokenizer, loc, n=-1):
|
||||||
def read_text(bz2_loc, n=10000):
|
def read_text(bz2_loc, n=10000):
|
||||||
with BZ2File(bz2_loc) as file_:
|
with BZ2File(bz2_loc) as file_:
|
||||||
for i, line in enumerate(file_):
|
for i, line in enumerate(file_):
|
||||||
data = ujson.loads(line)
|
data = json.loads(line)
|
||||||
yield data['body']
|
yield data["body"]
|
||||||
if i >= n:
|
if i >= n:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
def get_matches(tokenizer, phrases, texts, max_length=6):
|
def get_matches(tokenizer, phrases, texts, max_length=6):
|
||||||
matcher = PhraseMatcher(tokenizer.vocab, max_length=max_length)
|
matcher = PhraseMatcher(tokenizer.vocab, max_length=max_length)
|
||||||
matcher.add('Phrase', None, *phrases)
|
matcher.add("Phrase", None, *phrases)
|
||||||
for text in texts:
|
for text in texts:
|
||||||
doc = tokenizer(text)
|
doc = tokenizer(text)
|
||||||
for w in doc:
|
for w in doc:
|
||||||
|
@ -100,10 +100,11 @@ def get_matches(tokenizer, phrases, texts, max_length=6):
|
||||||
yield (ent_id, doc[start:end].text)
|
yield (ent_id, doc[start:end].text)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
if False:
|
if False:
|
||||||
import cProfile
|
import cProfile
|
||||||
import pstats
|
import pstats
|
||||||
|
|
||||||
cProfile.runctx("plac.call(main)", globals(), locals(), "Profile.prof")
|
cProfile.runctx("plac.call(main)", globals(), locals(), "Profile.prof")
|
||||||
s = pstats.Stats("Profile.prof")
|
s = pstats.Stats("Profile.prof")
|
||||||
s.strip_dirs().sort_stats("time").print_stats()
|
s.strip_dirs().sort_stats("time").print_stats()
|
||||||
|
|
|
@ -2,11 +2,7 @@
|
||||||
|
|
||||||
# A decomposable attention model for Natural Language Inference
|
# A decomposable attention model for Natural Language Inference
|
||||||
**by Matthew Honnibal, [@honnibal](https://github.com/honnibal)**
|
**by Matthew Honnibal, [@honnibal](https://github.com/honnibal)**
|
||||||
|
**Updated for spaCy 2.0+ and Keras 2.2.2+ by John Stewart, [@free-variation](https://github.com/free-variation)**
|
||||||
> ⚠️ **IMPORTANT NOTE:** This example is currently only compatible with spaCy
|
|
||||||
> v1.x. We're working on porting the example over to Keras v2.x and spaCy v2.x.
|
|
||||||
> See [#1445](https://github.com/explosion/spaCy/issues/1445) for details –
|
|
||||||
> contributions welcome!
|
|
||||||
|
|
||||||
This directory contains an implementation of the entailment prediction model described
|
This directory contains an implementation of the entailment prediction model described
|
||||||
by [Parikh et al. (2016)](https://arxiv.org/pdf/1606.01933.pdf). The model is notable
|
by [Parikh et al. (2016)](https://arxiv.org/pdf/1606.01933.pdf). The model is notable
|
||||||
|
@ -21,19 +17,25 @@ hook is installed to customise the `.similarity()` method of spaCy's `Doc`
|
||||||
and `Span` objects:
|
and `Span` objects:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
def demo(model_dir):
|
def demo(shape):
|
||||||
nlp = spacy.load('en', path=model_dir,
|
nlp = spacy.load('en_vectors_web_lg')
|
||||||
create_pipeline=create_similarity_pipeline)
|
nlp.add_pipe(KerasSimilarityShim.load(nlp.path / 'similarity', nlp, shape[0]))
|
||||||
doc1 = nlp(u'Worst fries ever! Greasy and horrible...')
|
|
||||||
doc2 = nlp(u'The milkshakes are good. The fries are bad.')
|
doc1 = nlp(u'The king of France is bald.')
|
||||||
print(doc1.similarity(doc2))
|
doc2 = nlp(u'France has no king.')
|
||||||
sent1a, sent1b = doc1.sents
|
|
||||||
print(sent1a.similarity(sent1b))
|
print("Sentence 1:", doc1)
|
||||||
print(sent1a.similarity(doc2))
|
print("Sentence 2:", doc2)
|
||||||
print(sent1b.similarity(doc2))
|
|
||||||
|
entailment_type, confidence = doc1.similarity(doc2)
|
||||||
|
print("Entailment type:", entailment_type, "(Confidence:", confidence, ")")
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Which gives the output `Entailment type: contradiction (Confidence: 0.60604566)`, showing that
|
||||||
|
the system has definite opinions about Betrand Russell's [famous conundrum](https://users.drew.edu/jlenz/br-on-denoting.html)!
|
||||||
|
|
||||||
I'm working on a blog post to explain Parikh et al.'s model in more detail.
|
I'm working on a blog post to explain Parikh et al.'s model in more detail.
|
||||||
|
A [notebook](https://github.com/free-variation/spaCy/blob/master/examples/notebooks/Decompositional%20Attention.ipynb) is available that briefly explains this implementation.
|
||||||
I think it is a very interesting example of the attention mechanism, which
|
I think it is a very interesting example of the attention mechanism, which
|
||||||
I didn't understand very well before working through this paper. There are
|
I didn't understand very well before working through this paper. There are
|
||||||
lots of ways to extend the model.
|
lots of ways to extend the model.
|
||||||
|
@ -43,7 +45,7 @@ lots of ways to extend the model.
|
||||||
| File | Description |
|
| File | Description |
|
||||||
| --- | --- |
|
| --- | --- |
|
||||||
| `__main__.py` | The script that will be executed. Defines the CLI, the data reading, etc — all the boring stuff. |
|
| `__main__.py` | The script that will be executed. Defines the CLI, the data reading, etc — all the boring stuff. |
|
||||||
| `spacy_hook.py` | Provides a class `SimilarityShim` that lets you use an arbitrary function to customize spaCy's `doc.similarity()` method. Instead of the default average-of-vectors algorithm, when you call `doc1.similarity(doc2)`, you'll get the result of `your_model(doc1, doc2)`. |
|
| `spacy_hook.py` | Provides a class `KerasSimilarityShim` that lets you use an arbitrary function to customize spaCy's `doc.similarity()` method. Instead of the default average-of-vectors algorithm, when you call `doc1.similarity(doc2)`, you'll get the result of `your_model(doc1, doc2)`. |
|
||||||
| `keras_decomposable_attention.py` | Defines the neural network model. |
|
| `keras_decomposable_attention.py` | Defines the neural network model. |
|
||||||
|
|
||||||
## Setting up
|
## Setting up
|
||||||
|
@ -52,17 +54,13 @@ First, install [Keras](https://keras.io/), [spaCy](https://spacy.io) and the spa
|
||||||
English models (about 1GB of data):
|
English models (about 1GB of data):
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install https://github.com/fchollet/keras/archive/1.2.2.zip
|
pip install keras
|
||||||
pip install spacy
|
pip install spacy
|
||||||
python -m spacy.en.download
|
python -m spacy download en_vectors_web_lg
|
||||||
```
|
```
|
||||||
|
|
||||||
⚠️ **Important:** In order for the example to run, you'll need to install Keras from
|
You'll also want to get Keras working on your GPU, and you will need a backend, such as TensorFlow or Theano.
|
||||||
the 1.2.2 release (and not via `pip install keras`). For more info on this, see
|
This will depend on your set up, so you're mostly on your own for this step. If you're using AWS, try the
|
||||||
[#727](https://github.com/explosion/spaCy/issues/727).
|
|
||||||
|
|
||||||
You'll also want to get Keras working on your GPU. This will depend on your
|
|
||||||
set up, so you're mostly on your own for this step. If you're using AWS, try the
|
|
||||||
[NVidia AMI](https://aws.amazon.com/marketplace/pp/B00FYCDDTE). It made things pretty easy.
|
[NVidia AMI](https://aws.amazon.com/marketplace/pp/B00FYCDDTE). It made things pretty easy.
|
||||||
|
|
||||||
Once you've installed the dependencies, you can run a small preliminary test of
|
Once you've installed the dependencies, you can run a small preliminary test of
|
||||||
|
@ -80,22 +78,35 @@ Finally, download the [Stanford Natural Language Inference corpus](http://nlp.st
|
||||||
## Running the example
|
## Running the example
|
||||||
|
|
||||||
You can run the `keras_parikh_entailment/` directory as a script, which executes the file
|
You can run the `keras_parikh_entailment/` directory as a script, which executes the file
|
||||||
[`keras_parikh_entailment/__main__.py`](__main__.py). The first thing you'll want to do is train the model:
|
[`keras_parikh_entailment/__main__.py`](__main__.py). If you run the script without arguments
|
||||||
|
the usage is shown. Running it with `-h` explains the command line arguments.
|
||||||
|
|
||||||
|
The first thing you'll want to do is train the model:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python keras_parikh_entailment/ train <train_directory> <dev_directory>
|
python keras_parikh_entailment/ train -t <path to SNLI train JSON> -s <path to SNLI dev JSON>
|
||||||
```
|
```
|
||||||
|
|
||||||
Training takes about 300 epochs for full accuracy, and I haven't rerun the full
|
Training takes about 300 epochs for full accuracy, and I haven't rerun the full
|
||||||
experiment since refactoring things to publish this example — please let me
|
experiment since refactoring things to publish this example — please let me
|
||||||
know if I've broken something. You should get to at least 85% on the development data.
|
know if I've broken something. You should get to at least 85% on the development data even after 10-15 epochs.
|
||||||
|
|
||||||
The other two modes demonstrate run-time usage. I never like relying on the accuracy printed
|
The other two modes demonstrate run-time usage. I never like relying on the accuracy printed
|
||||||
by `.fit()` methods. I never really feel confident until I've run a new process that loads
|
by `.fit()` methods. I never really feel confident until I've run a new process that loads
|
||||||
the model and starts making predictions, without access to the gold labels. I've therefore
|
the model and starts making predictions, without access to the gold labels. I've therefore
|
||||||
included an `evaluate` mode. Finally, there's also a little demo, which mostly exists to show
|
included an `evaluate` mode.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python keras_parikh_entailment/ evaluate -s <path to SNLI train JSON>
|
||||||
|
```
|
||||||
|
|
||||||
|
Finally, there's also a little demo, which mostly exists to show
|
||||||
you how run-time usage will eventually look.
|
you how run-time usage will eventually look.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python keras_parikh_entailment/ demo
|
||||||
|
```
|
||||||
|
|
||||||
## Getting updates
|
## Getting updates
|
||||||
|
|
||||||
We should have the blog post explaining the model ready before the end of the week. To get
|
We should have the blog post explaining the model ready before the end of the week. To get
|
||||||
|
|
|
@ -1,139 +1,207 @@
|
||||||
from __future__ import division, unicode_literals, print_function
|
import numpy as np
|
||||||
import spacy
|
import json
|
||||||
|
from keras.utils import to_categorical
|
||||||
import plac
|
import plac
|
||||||
from pathlib import Path
|
import sys
|
||||||
import ujson as json
|
|
||||||
import numpy
|
|
||||||
from keras.utils.np_utils import to_categorical
|
|
||||||
|
|
||||||
from spacy_hook import get_embeddings, get_word_ids
|
|
||||||
from spacy_hook import create_similarity_pipeline
|
|
||||||
|
|
||||||
from keras_decomposable_attention import build_model
|
from keras_decomposable_attention import build_model
|
||||||
|
from spacy_hook import get_embeddings, KerasSimilarityShim
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import cPickle as pickle
|
import cPickle as pickle
|
||||||
except ImportError:
|
except ImportError:
|
||||||
import pickle
|
import pickle
|
||||||
|
|
||||||
|
import spacy
|
||||||
|
|
||||||
|
# workaround for keras/tensorflow bug
|
||||||
|
# see https://github.com/tensorflow/tensorflow/issues/3388
|
||||||
|
import os
|
||||||
|
import importlib
|
||||||
|
from keras import backend as K
|
||||||
|
|
||||||
|
|
||||||
|
def set_keras_backend(backend):
|
||||||
|
if K.backend() != backend:
|
||||||
|
os.environ["KERAS_BACKEND"] = backend
|
||||||
|
importlib.reload(K)
|
||||||
|
assert K.backend() == backend
|
||||||
|
if backend == "tensorflow":
|
||||||
|
K.get_session().close()
|
||||||
|
cfg = K.tf.ConfigProto()
|
||||||
|
cfg.gpu_options.allow_growth = True
|
||||||
|
K.set_session(K.tf.Session(config=cfg))
|
||||||
|
K.clear_session()
|
||||||
|
|
||||||
|
|
||||||
|
set_keras_backend("tensorflow")
|
||||||
|
|
||||||
|
|
||||||
def train(train_loc, dev_loc, shape, settings):
|
def train(train_loc, dev_loc, shape, settings):
|
||||||
train_texts1, train_texts2, train_labels = read_snli(train_loc)
|
train_texts1, train_texts2, train_labels = read_snli(train_loc)
|
||||||
dev_texts1, dev_texts2, dev_labels = read_snli(dev_loc)
|
dev_texts1, dev_texts2, dev_labels = read_snli(dev_loc)
|
||||||
|
|
||||||
print("Loading spaCy")
|
print("Loading spaCy")
|
||||||
nlp = spacy.load('en')
|
nlp = spacy.load("en_vectors_web_lg")
|
||||||
assert nlp.path is not None
|
assert nlp.path is not None
|
||||||
|
print("Processing texts...")
|
||||||
|
train_X = create_dataset(nlp, train_texts1, train_texts2, 100, shape[0])
|
||||||
|
dev_X = create_dataset(nlp, dev_texts1, dev_texts2, 100, shape[0])
|
||||||
|
|
||||||
print("Compiling network")
|
print("Compiling network")
|
||||||
model = build_model(get_embeddings(nlp.vocab), shape, settings)
|
model = build_model(get_embeddings(nlp.vocab), shape, settings)
|
||||||
print("Processing texts...")
|
|
||||||
Xs = []
|
|
||||||
for texts in (train_texts1, train_texts2, dev_texts1, dev_texts2):
|
|
||||||
Xs.append(get_word_ids(list(nlp.pipe(texts, n_threads=20, batch_size=20000)),
|
|
||||||
max_length=shape[0],
|
|
||||||
rnn_encode=settings['gru_encode'],
|
|
||||||
tree_truncate=settings['tree_truncate']))
|
|
||||||
train_X1, train_X2, dev_X1, dev_X2 = Xs
|
|
||||||
print(settings)
|
print(settings)
|
||||||
model.fit(
|
model.fit(
|
||||||
[train_X1, train_X2],
|
train_X,
|
||||||
train_labels,
|
train_labels,
|
||||||
validation_data=([dev_X1, dev_X2], dev_labels),
|
validation_data=(dev_X, dev_labels),
|
||||||
nb_epoch=settings['nr_epoch'],
|
epochs=settings["nr_epoch"],
|
||||||
batch_size=settings['batch_size'])
|
batch_size=settings["batch_size"],
|
||||||
if not (nlp.path / 'similarity').exists():
|
)
|
||||||
(nlp.path / 'similarity').mkdir()
|
if not (nlp.path / "similarity").exists():
|
||||||
print("Saving to", nlp.path / 'similarity')
|
(nlp.path / "similarity").mkdir()
|
||||||
|
print("Saving to", nlp.path / "similarity")
|
||||||
weights = model.get_weights()
|
weights = model.get_weights()
|
||||||
with (nlp.path / 'similarity' / 'model').open('wb') as file_:
|
# remove the embedding matrix. We can reconstruct it.
|
||||||
pickle.dump(weights[1:], file_)
|
del weights[1]
|
||||||
with (nlp.path / 'similarity' / 'config.json').open('wb') as file_:
|
with (nlp.path / "similarity" / "model").open("wb") as file_:
|
||||||
|
pickle.dump(weights, file_)
|
||||||
|
with (nlp.path / "similarity" / "config.json").open("w") as file_:
|
||||||
file_.write(model.to_json())
|
file_.write(model.to_json())
|
||||||
|
|
||||||
|
|
||||||
def evaluate(dev_loc):
|
def evaluate(dev_loc, shape):
|
||||||
dev_texts1, dev_texts2, dev_labels = read_snli(dev_loc)
|
dev_texts1, dev_texts2, dev_labels = read_snli(dev_loc)
|
||||||
nlp = spacy.load('en',
|
nlp = spacy.load("en_vectors_web_lg")
|
||||||
create_pipeline=create_similarity_pipeline)
|
nlp.add_pipe(KerasSimilarityShim.load(nlp.path / "similarity", nlp, shape[0]))
|
||||||
total = 0.
|
total = 0.0
|
||||||
correct = 0.
|
correct = 0.0
|
||||||
for text1, text2, label in zip(dev_texts1, dev_texts2, dev_labels):
|
for text1, text2, label in zip(dev_texts1, dev_texts2, dev_labels):
|
||||||
doc1 = nlp(text1)
|
doc1 = nlp(text1)
|
||||||
doc2 = nlp(text2)
|
doc2 = nlp(text2)
|
||||||
sim = doc1.similarity(doc2)
|
sim, _ = doc1.similarity(doc2)
|
||||||
if sim.argmax() == label.argmax():
|
if sim == KerasSimilarityShim.entailment_types[label.argmax()]:
|
||||||
correct += 1
|
correct += 1
|
||||||
total += 1
|
total += 1
|
||||||
return correct, total
|
return correct, total
|
||||||
|
|
||||||
|
|
||||||
def demo():
|
def demo(shape):
|
||||||
nlp = spacy.load('en',
|
nlp = spacy.load("en_vectors_web_lg")
|
||||||
create_pipeline=create_similarity_pipeline)
|
nlp.add_pipe(KerasSimilarityShim.load(nlp.path / "similarity", nlp, shape[0]))
|
||||||
doc1 = nlp(u'What were the best crime fiction books in 2016?')
|
|
||||||
doc2 = nlp(
|
doc1 = nlp(u"The king of France is bald.")
|
||||||
u'What should I read that was published last year? I like crime stories.')
|
doc2 = nlp(u"France has no king.")
|
||||||
print(doc1)
|
|
||||||
print(doc2)
|
print("Sentence 1:", doc1)
|
||||||
print("Similarity", doc1.similarity(doc2))
|
print("Sentence 2:", doc2)
|
||||||
|
|
||||||
|
entailment_type, confidence = doc1.similarity(doc2)
|
||||||
|
print("Entailment type:", entailment_type, "(Confidence:", confidence, ")")
|
||||||
|
|
||||||
|
|
||||||
|
LABELS = {"entailment": 0, "contradiction": 1, "neutral": 2}
|
||||||
|
|
||||||
|
|
||||||
LABELS = {'entailment': 0, 'contradiction': 1, 'neutral': 2}
|
|
||||||
def read_snli(path):
|
def read_snli(path):
|
||||||
texts1 = []
|
texts1 = []
|
||||||
texts2 = []
|
texts2 = []
|
||||||
labels = []
|
labels = []
|
||||||
with path.open() as file_:
|
with open(path, "r") as file_:
|
||||||
for line in file_:
|
for line in file_:
|
||||||
eg = json.loads(line)
|
eg = json.loads(line)
|
||||||
label = eg['gold_label']
|
label = eg["gold_label"]
|
||||||
if label == '-':
|
if label == "-": # per Parikh, ignore - SNLI entries
|
||||||
continue
|
continue
|
||||||
texts1.append(eg['sentence1'])
|
texts1.append(eg["sentence1"])
|
||||||
texts2.append(eg['sentence2'])
|
texts2.append(eg["sentence2"])
|
||||||
labels.append(LABELS[label])
|
labels.append(LABELS[label])
|
||||||
return texts1, texts2, to_categorical(numpy.asarray(labels, dtype='int32'))
|
return texts1, texts2, to_categorical(np.asarray(labels, dtype="int32"))
|
||||||
|
|
||||||
|
|
||||||
|
def create_dataset(nlp, texts, hypotheses, num_unk, max_length):
|
||||||
|
sents = texts + hypotheses
|
||||||
|
sents_as_ids = []
|
||||||
|
for sent in sents:
|
||||||
|
doc = nlp(sent)
|
||||||
|
word_ids = []
|
||||||
|
for i, token in enumerate(doc):
|
||||||
|
# skip odd spaces from tokenizer
|
||||||
|
if token.has_vector and token.vector_norm == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if i > max_length:
|
||||||
|
break
|
||||||
|
|
||||||
|
if token.has_vector:
|
||||||
|
word_ids.append(token.rank + num_unk + 1)
|
||||||
|
else:
|
||||||
|
# if we don't have a vector, pick an OOV entry
|
||||||
|
word_ids.append(token.rank % num_unk + 1)
|
||||||
|
|
||||||
|
# there must be a simpler way of generating padded arrays from lists...
|
||||||
|
word_id_vec = np.zeros((max_length), dtype="int")
|
||||||
|
clipped_len = min(max_length, len(word_ids))
|
||||||
|
word_id_vec[:clipped_len] = word_ids[:clipped_len]
|
||||||
|
sents_as_ids.append(word_id_vec)
|
||||||
|
|
||||||
|
return [np.array(sents_as_ids[: len(texts)]), np.array(sents_as_ids[len(texts) :])]
|
||||||
|
|
||||||
|
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
mode=("Mode to execute", "positional", None, str, ["train", "evaluate", "demo"]),
|
mode=("Mode to execute", "positional", None, str, ["train", "evaluate", "demo"]),
|
||||||
train_loc=("Path to training data", "positional", None, Path),
|
train_loc=("Path to training data", "option", "t", str),
|
||||||
dev_loc=("Path to development data", "positional", None, Path),
|
dev_loc=("Path to development or test data", "option", "s", str),
|
||||||
max_length=("Length to truncate sentences", "option", "L", int),
|
max_length=("Length to truncate sentences", "option", "L", int),
|
||||||
nr_hidden=("Number of hidden units", "option", "H", int),
|
nr_hidden=("Number of hidden units", "option", "H", int),
|
||||||
dropout=("Dropout level", "option", "d", float),
|
dropout=("Dropout level", "option", "d", float),
|
||||||
learn_rate=("Learning rate", "option", "e", float),
|
learn_rate=("Learning rate", "option", "r", float),
|
||||||
batch_size=("Batch size for neural network training", "option", "b", int),
|
batch_size=("Batch size for neural network training", "option", "b", int),
|
||||||
nr_epoch=("Number of training epochs", "option", "i", int),
|
nr_epoch=("Number of training epochs", "option", "e", int),
|
||||||
tree_truncate=("Truncate sentences by tree distance", "flag", "T", bool),
|
entail_dir=(
|
||||||
gru_encode=("Encode sentences with bidirectional GRU", "flag", "E", bool),
|
"Direction of entailment",
|
||||||
|
"option",
|
||||||
|
"D",
|
||||||
|
str,
|
||||||
|
["both", "left", "right"],
|
||||||
|
),
|
||||||
)
|
)
|
||||||
def main(mode, train_loc, dev_loc,
|
def main(
|
||||||
tree_truncate=False,
|
mode,
|
||||||
gru_encode=False,
|
train_loc,
|
||||||
max_length=100,
|
dev_loc,
|
||||||
nr_hidden=100,
|
max_length=50,
|
||||||
dropout=0.2,
|
nr_hidden=200,
|
||||||
learn_rate=0.001,
|
dropout=0.2,
|
||||||
batch_size=100,
|
learn_rate=0.001,
|
||||||
nr_epoch=5):
|
batch_size=1024,
|
||||||
|
nr_epoch=10,
|
||||||
|
entail_dir="both",
|
||||||
|
):
|
||||||
shape = (max_length, nr_hidden, 3)
|
shape = (max_length, nr_hidden, 3)
|
||||||
settings = {
|
settings = {
|
||||||
'lr': learn_rate,
|
"lr": learn_rate,
|
||||||
'dropout': dropout,
|
"dropout": dropout,
|
||||||
'batch_size': batch_size,
|
"batch_size": batch_size,
|
||||||
'nr_epoch': nr_epoch,
|
"nr_epoch": nr_epoch,
|
||||||
'tree_truncate': tree_truncate,
|
"entail_dir": entail_dir,
|
||||||
'gru_encode': gru_encode
|
|
||||||
}
|
}
|
||||||
if mode == 'train':
|
|
||||||
train(train_loc, dev_loc, shape, settings)
|
|
||||||
elif mode == 'evaluate':
|
|
||||||
correct, total = evaluate(dev_loc)
|
|
||||||
print(correct, '/', total, correct / total)
|
|
||||||
else:
|
|
||||||
demo()
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if mode == "train":
|
||||||
|
if train_loc == None or dev_loc == None:
|
||||||
|
print("Train mode requires paths to training and development data sets.")
|
||||||
|
sys.exit(1)
|
||||||
|
train(train_loc, dev_loc, shape, settings)
|
||||||
|
elif mode == "evaluate":
|
||||||
|
if dev_loc == None:
|
||||||
|
print("Evaluate mode requires paths to test data set.")
|
||||||
|
sys.exit(1)
|
||||||
|
correct, total = evaluate(dev_loc, shape)
|
||||||
|
print(correct, "/", total, correct / total)
|
||||||
|
else:
|
||||||
|
demo(shape)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
plac.call(main)
|
plac.call(main)
|
||||||
|
|
|
@ -1,259 +1,144 @@
|
||||||
# Semantic similarity with decomposable attention (using spaCy and Keras)
|
# Semantic entailment/similarity with decomposable attention (using spaCy and Keras)
|
||||||
# Practical state-of-the-art text similarity with spaCy and Keras
|
# Practical state-of-the-art textual entailment with spaCy and Keras
|
||||||
import numpy
|
|
||||||
|
|
||||||
from keras.layers import InputSpec, Layer, Input, Dense, merge
|
import numpy as np
|
||||||
from keras.layers import Lambda, Activation, Dropout, Embedding, TimeDistributed
|
from keras import layers, Model, models, optimizers
|
||||||
from keras.layers import Bidirectional, GRU, LSTM
|
from keras import backend as K
|
||||||
from keras.layers.noise import GaussianNoise
|
|
||||||
from keras.layers.advanced_activations import ELU
|
|
||||||
import keras.backend as K
|
|
||||||
from keras.models import Sequential, Model, model_from_json
|
|
||||||
from keras.regularizers import l2
|
|
||||||
from keras.optimizers import Adam
|
|
||||||
from keras.layers.normalization import BatchNormalization
|
|
||||||
from keras.layers.pooling import GlobalAveragePooling1D, GlobalMaxPooling1D
|
|
||||||
from keras.layers import Merge
|
|
||||||
|
|
||||||
|
|
||||||
def build_model(vectors, shape, settings):
|
def build_model(vectors, shape, settings):
|
||||||
'''Compile the model.'''
|
|
||||||
max_length, nr_hidden, nr_class = shape
|
max_length, nr_hidden, nr_class = shape
|
||||||
# Declare inputs.
|
|
||||||
ids1 = Input(shape=(max_length,), dtype='int32', name='words1')
|
|
||||||
ids2 = Input(shape=(max_length,), dtype='int32', name='words2')
|
|
||||||
|
|
||||||
# Construct operations, which we'll chain together.
|
input1 = layers.Input(shape=(max_length,), dtype="int32", name="words1")
|
||||||
embed = _StaticEmbedding(vectors, max_length, nr_hidden, dropout=0.2, nr_tune=5000)
|
input2 = layers.Input(shape=(max_length,), dtype="int32", name="words2")
|
||||||
if settings['gru_encode']:
|
|
||||||
encode = _BiRNNEncoding(max_length, nr_hidden, dropout=settings['dropout'])
|
|
||||||
attend = _Attention(max_length, nr_hidden, dropout=settings['dropout'])
|
|
||||||
align = _SoftAlignment(max_length, nr_hidden)
|
|
||||||
compare = _Comparison(max_length, nr_hidden, dropout=settings['dropout'])
|
|
||||||
entail = _Entailment(nr_hidden, nr_class, dropout=settings['dropout'])
|
|
||||||
|
|
||||||
# Declare the model as a computational graph.
|
# embeddings (projected)
|
||||||
sent1 = embed(ids1) # Shape: (i, n)
|
embed = create_embedding(vectors, max_length, nr_hidden)
|
||||||
sent2 = embed(ids2) # Shape: (j, n)
|
|
||||||
|
|
||||||
if settings['gru_encode']:
|
a = embed(input1)
|
||||||
sent1 = encode(sent1)
|
b = embed(input2)
|
||||||
sent2 = encode(sent2)
|
|
||||||
|
|
||||||
attention = attend(sent1, sent2) # Shape: (i, j)
|
# step 1: attend
|
||||||
|
F = create_feedforward(nr_hidden)
|
||||||
|
att_weights = layers.dot([F(a), F(b)], axes=-1)
|
||||||
|
|
||||||
align1 = align(sent2, attention)
|
G = create_feedforward(nr_hidden)
|
||||||
align2 = align(sent1, attention, transpose=True)
|
|
||||||
|
|
||||||
feats1 = compare(sent1, align1)
|
if settings["entail_dir"] == "both":
|
||||||
feats2 = compare(sent2, align2)
|
norm_weights_a = layers.Lambda(normalizer(1))(att_weights)
|
||||||
|
norm_weights_b = layers.Lambda(normalizer(2))(att_weights)
|
||||||
|
alpha = layers.dot([norm_weights_a, a], axes=1)
|
||||||
|
beta = layers.dot([norm_weights_b, b], axes=1)
|
||||||
|
|
||||||
scores = entail(feats1, feats2)
|
# step 2: compare
|
||||||
|
comp1 = layers.concatenate([a, beta])
|
||||||
|
comp2 = layers.concatenate([b, alpha])
|
||||||
|
v1 = layers.TimeDistributed(G)(comp1)
|
||||||
|
v2 = layers.TimeDistributed(G)(comp2)
|
||||||
|
|
||||||
# Now that we have the input/output, we can construct the Model object...
|
# step 3: aggregate
|
||||||
model = Model(input=[ids1, ids2], output=[scores])
|
v1_sum = layers.Lambda(sum_word)(v1)
|
||||||
|
v2_sum = layers.Lambda(sum_word)(v2)
|
||||||
|
concat = layers.concatenate([v1_sum, v2_sum])
|
||||||
|
|
||||||
|
elif settings["entail_dir"] == "left":
|
||||||
|
norm_weights_a = layers.Lambda(normalizer(1))(att_weights)
|
||||||
|
alpha = layers.dot([norm_weights_a, a], axes=1)
|
||||||
|
comp2 = layers.concatenate([b, alpha])
|
||||||
|
v2 = layers.TimeDistributed(G)(comp2)
|
||||||
|
v2_sum = layers.Lambda(sum_word)(v2)
|
||||||
|
concat = v2_sum
|
||||||
|
|
||||||
|
else:
|
||||||
|
norm_weights_b = layers.Lambda(normalizer(2))(att_weights)
|
||||||
|
beta = layers.dot([norm_weights_b, b], axes=1)
|
||||||
|
comp1 = layers.concatenate([a, beta])
|
||||||
|
v1 = layers.TimeDistributed(G)(comp1)
|
||||||
|
v1_sum = layers.Lambda(sum_word)(v1)
|
||||||
|
concat = v1_sum
|
||||||
|
|
||||||
|
H = create_feedforward(nr_hidden)
|
||||||
|
out = H(concat)
|
||||||
|
out = layers.Dense(nr_class, activation="softmax")(out)
|
||||||
|
|
||||||
|
model = Model([input1, input2], out)
|
||||||
|
|
||||||
# ...Compile it...
|
|
||||||
model.compile(
|
model.compile(
|
||||||
optimizer=Adam(lr=settings['lr']),
|
optimizer=optimizers.Adam(lr=settings["lr"]),
|
||||||
loss='categorical_crossentropy',
|
loss="categorical_crossentropy",
|
||||||
metrics=['accuracy'])
|
metrics=["accuracy"],
|
||||||
# ...And return it for training.
|
)
|
||||||
|
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
class _StaticEmbedding(object):
|
def create_embedding(vectors, max_length, projected_dim):
|
||||||
def __init__(self, vectors, max_length, nr_out, nr_tune=1000, dropout=0.0):
|
return models.Sequential(
|
||||||
self.nr_out = nr_out
|
[
|
||||||
self.max_length = max_length
|
layers.Embedding(
|
||||||
self.embed = Embedding(
|
vectors.shape[0],
|
||||||
vectors.shape[0],
|
vectors.shape[1],
|
||||||
vectors.shape[1],
|
input_length=max_length,
|
||||||
input_length=max_length,
|
weights=[vectors],
|
||||||
weights=[vectors],
|
trainable=False,
|
||||||
name='embed',
|
),
|
||||||
trainable=False)
|
layers.TimeDistributed(
|
||||||
self.tune = Embedding(
|
layers.Dense(projected_dim, activation=None, use_bias=False)
|
||||||
nr_tune,
|
),
|
||||||
nr_out,
|
]
|
||||||
input_length=max_length,
|
)
|
||||||
weights=None,
|
|
||||||
name='tune',
|
|
||||||
trainable=True,
|
|
||||||
dropout=dropout)
|
|
||||||
self.mod_ids = Lambda(lambda sent: sent % (nr_tune-1)+1,
|
|
||||||
output_shape=(self.max_length,))
|
|
||||||
|
|
||||||
self.project = TimeDistributed(
|
|
||||||
Dense(
|
|
||||||
nr_out,
|
|
||||||
activation=None,
|
|
||||||
bias=False,
|
|
||||||
name='project'))
|
|
||||||
|
|
||||||
def __call__(self, sentence):
|
|
||||||
def get_output_shape(shapes):
|
|
||||||
print(shapes)
|
|
||||||
return shapes[0]
|
|
||||||
mod_sent = self.mod_ids(sentence)
|
|
||||||
tuning = self.tune(mod_sent)
|
|
||||||
#tuning = merge([tuning, mod_sent],
|
|
||||||
# mode=lambda AB: AB[0] * (K.clip(K.cast(AB[1], 'float32'), 0, 1)),
|
|
||||||
# output_shape=(self.max_length, self.nr_out))
|
|
||||||
pretrained = self.project(self.embed(sentence))
|
|
||||||
vectors = merge([pretrained, tuning], mode='sum')
|
|
||||||
return vectors
|
|
||||||
|
|
||||||
|
|
||||||
class _BiRNNEncoding(object):
|
def create_feedforward(num_units=200, activation="relu", dropout_rate=0.2):
|
||||||
def __init__(self, max_length, nr_out, dropout=0.0):
|
return models.Sequential(
|
||||||
self.model = Sequential()
|
[
|
||||||
self.model.add(Bidirectional(LSTM(nr_out, return_sequences=True,
|
layers.Dense(num_units, activation=activation),
|
||||||
dropout_W=dropout, dropout_U=dropout),
|
layers.Dropout(dropout_rate),
|
||||||
input_shape=(max_length, nr_out)))
|
layers.Dense(num_units, activation=activation),
|
||||||
self.model.add(TimeDistributed(Dense(nr_out, activation='relu', init='he_normal')))
|
layers.Dropout(dropout_rate),
|
||||||
self.model.add(TimeDistributed(Dropout(0.2)))
|
]
|
||||||
|
)
|
||||||
def __call__(self, sentence):
|
|
||||||
return self.model(sentence)
|
|
||||||
|
|
||||||
|
|
||||||
class _Attention(object):
|
def normalizer(axis):
|
||||||
def __init__(self, max_length, nr_hidden, dropout=0.0, L2=0.0, activation='relu'):
|
def _normalize(att_weights):
|
||||||
self.max_length = max_length
|
exp_weights = K.exp(att_weights)
|
||||||
self.model = Sequential()
|
sum_weights = K.sum(exp_weights, axis=axis, keepdims=True)
|
||||||
self.model.add(Dropout(dropout, input_shape=(nr_hidden,)))
|
return exp_weights / sum_weights
|
||||||
self.model.add(
|
|
||||||
Dense(nr_hidden, name='attend1',
|
|
||||||
init='he_normal', W_regularizer=l2(L2),
|
|
||||||
input_shape=(nr_hidden,), activation='relu'))
|
|
||||||
self.model.add(Dropout(dropout))
|
|
||||||
self.model.add(Dense(nr_hidden, name='attend2',
|
|
||||||
init='he_normal', W_regularizer=l2(L2), activation='relu'))
|
|
||||||
self.model = TimeDistributed(self.model)
|
|
||||||
|
|
||||||
def __call__(self, sent1, sent2):
|
return _normalize
|
||||||
def _outer(AB):
|
|
||||||
att_ji = K.batch_dot(AB[1], K.permute_dimensions(AB[0], (0, 2, 1)))
|
|
||||||
return K.permute_dimensions(att_ji,(0, 2, 1))
|
|
||||||
return merge(
|
|
||||||
[self.model(sent1), self.model(sent2)],
|
|
||||||
mode=_outer,
|
|
||||||
output_shape=(self.max_length, self.max_length))
|
|
||||||
|
|
||||||
|
|
||||||
class _SoftAlignment(object):
|
def sum_word(x):
|
||||||
def __init__(self, max_length, nr_hidden):
|
return K.sum(x, axis=1)
|
||||||
self.max_length = max_length
|
|
||||||
self.nr_hidden = nr_hidden
|
|
||||||
|
|
||||||
def __call__(self, sentence, attention, transpose=False):
|
|
||||||
def _normalize_attention(attmat):
|
|
||||||
att = attmat[0]
|
|
||||||
mat = attmat[1]
|
|
||||||
if transpose:
|
|
||||||
att = K.permute_dimensions(att,(0, 2, 1))
|
|
||||||
# 3d softmax
|
|
||||||
e = K.exp(att - K.max(att, axis=-1, keepdims=True))
|
|
||||||
s = K.sum(e, axis=-1, keepdims=True)
|
|
||||||
sm_att = e / s
|
|
||||||
return K.batch_dot(sm_att, mat)
|
|
||||||
return merge([attention, sentence], mode=_normalize_attention,
|
|
||||||
output_shape=(self.max_length, self.nr_hidden)) # Shape: (i, n)
|
|
||||||
|
|
||||||
|
|
||||||
class _Comparison(object):
|
|
||||||
def __init__(self, words, nr_hidden, L2=0.0, dropout=0.0):
|
|
||||||
self.words = words
|
|
||||||
self.model = Sequential()
|
|
||||||
self.model.add(Dropout(dropout, input_shape=(nr_hidden*2,)))
|
|
||||||
self.model.add(Dense(nr_hidden, name='compare1',
|
|
||||||
init='he_normal', W_regularizer=l2(L2)))
|
|
||||||
self.model.add(Activation('relu'))
|
|
||||||
self.model.add(Dropout(dropout))
|
|
||||||
self.model.add(Dense(nr_hidden, name='compare2',
|
|
||||||
W_regularizer=l2(L2), init='he_normal'))
|
|
||||||
self.model.add(Activation('relu'))
|
|
||||||
self.model = TimeDistributed(self.model)
|
|
||||||
|
|
||||||
def __call__(self, sent, align, **kwargs):
|
|
||||||
result = self.model(merge([sent, align], mode='concat')) # Shape: (i, n)
|
|
||||||
avged = GlobalAveragePooling1D()(result, mask=self.words)
|
|
||||||
maxed = GlobalMaxPooling1D()(result, mask=self.words)
|
|
||||||
merged = merge([avged, maxed])
|
|
||||||
result = BatchNormalization()(merged)
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
class _Entailment(object):
|
|
||||||
def __init__(self, nr_hidden, nr_out, dropout=0.0, L2=0.0):
|
|
||||||
self.model = Sequential()
|
|
||||||
self.model.add(Dropout(dropout, input_shape=(nr_hidden*2,)))
|
|
||||||
self.model.add(Dense(nr_hidden, name='entail1',
|
|
||||||
init='he_normal', W_regularizer=l2(L2)))
|
|
||||||
self.model.add(Activation('relu'))
|
|
||||||
self.model.add(Dropout(dropout))
|
|
||||||
self.model.add(Dense(nr_hidden, name='entail2',
|
|
||||||
init='he_normal', W_regularizer=l2(L2)))
|
|
||||||
self.model.add(Activation('relu'))
|
|
||||||
self.model.add(Dense(nr_out, name='entail_out', activation='softmax',
|
|
||||||
W_regularizer=l2(L2), init='zero'))
|
|
||||||
|
|
||||||
def __call__(self, feats1, feats2):
|
|
||||||
features = merge([feats1, feats2], mode='concat')
|
|
||||||
return self.model(features)
|
|
||||||
|
|
||||||
|
|
||||||
class _GlobalSumPooling1D(Layer):
|
|
||||||
'''Global sum pooling operation for temporal data.
|
|
||||||
|
|
||||||
# Input shape
|
|
||||||
3D tensor with shape: `(samples, steps, features)`.
|
|
||||||
|
|
||||||
# Output shape
|
|
||||||
2D tensor with shape: `(samples, features)`.
|
|
||||||
'''
|
|
||||||
def __init__(self, **kwargs):
|
|
||||||
super(_GlobalSumPooling1D, self).__init__(**kwargs)
|
|
||||||
self.input_spec = [InputSpec(ndim=3)]
|
|
||||||
|
|
||||||
def get_output_shape_for(self, input_shape):
|
|
||||||
return (input_shape[0], input_shape[2])
|
|
||||||
|
|
||||||
def call(self, x, mask=None):
|
|
||||||
if mask is not None:
|
|
||||||
return K.sum(x * K.clip(mask, 0, 1), axis=1)
|
|
||||||
else:
|
|
||||||
return K.sum(x, axis=1)
|
|
||||||
|
|
||||||
|
|
||||||
def test_build_model():
|
def test_build_model():
|
||||||
vectors = numpy.ndarray((100, 8), dtype='float32')
|
vectors = np.ndarray((100, 8), dtype="float32")
|
||||||
shape = (10, 16, 3)
|
shape = (10, 16, 3)
|
||||||
settings = {'lr': 0.001, 'dropout': 0.2, 'gru_encode':True}
|
settings = {"lr": 0.001, "dropout": 0.2, "gru_encode": True, "entail_dir": "both"}
|
||||||
model = build_model(vectors, shape, settings)
|
model = build_model(vectors, shape, settings)
|
||||||
|
|
||||||
|
|
||||||
def test_fit_model():
|
def test_fit_model():
|
||||||
|
|
||||||
def _generate_X(nr_example, length, nr_vector):
|
def _generate_X(nr_example, length, nr_vector):
|
||||||
X1 = numpy.ndarray((nr_example, length), dtype='int32')
|
X1 = np.ndarray((nr_example, length), dtype="int32")
|
||||||
X1 *= X1 < nr_vector
|
X1 *= X1 < nr_vector
|
||||||
X1 *= 0 <= X1
|
X1 *= 0 <= X1
|
||||||
X2 = numpy.ndarray((nr_example, length), dtype='int32')
|
X2 = np.ndarray((nr_example, length), dtype="int32")
|
||||||
X2 *= X2 < nr_vector
|
X2 *= X2 < nr_vector
|
||||||
X2 *= 0 <= X2
|
X2 *= 0 <= X2
|
||||||
return [X1, X2]
|
return [X1, X2]
|
||||||
|
|
||||||
def _generate_Y(nr_example, nr_class):
|
def _generate_Y(nr_example, nr_class):
|
||||||
ys = numpy.zeros((nr_example, nr_class), dtype='int32')
|
ys = np.zeros((nr_example, nr_class), dtype="int32")
|
||||||
for i in range(nr_example):
|
for i in range(nr_example):
|
||||||
ys[i, i % nr_class] = 1
|
ys[i, i % nr_class] = 1
|
||||||
return ys
|
return ys
|
||||||
|
|
||||||
vectors = numpy.ndarray((100, 8), dtype='float32')
|
vectors = np.ndarray((100, 8), dtype="float32")
|
||||||
shape = (10, 16, 3)
|
shape = (10, 16, 3)
|
||||||
settings = {'lr': 0.001, 'dropout': 0.2, 'gru_encode':True}
|
settings = {"lr": 0.001, "dropout": 0.2, "gru_encode": True, "entail_dir": "both"}
|
||||||
model = build_model(vectors, shape, settings)
|
model = build_model(vectors, shape, settings)
|
||||||
|
|
||||||
train_X = _generate_X(20, shape[0], vectors.shape[0])
|
train_X = _generate_X(20, shape[0], vectors.shape[0])
|
||||||
|
@ -261,8 +146,7 @@ def test_fit_model():
|
||||||
dev_X = _generate_X(15, shape[0], vectors.shape[0])
|
dev_X = _generate_X(15, shape[0], vectors.shape[0])
|
||||||
dev_Y = _generate_Y(15, shape[2])
|
dev_Y = _generate_Y(15, shape[2])
|
||||||
|
|
||||||
model.fit(train_X, train_Y, validation_data=(dev_X, dev_Y), nb_epoch=5,
|
model.fit(train_X, train_Y, validation_data=(dev_X, dev_Y), epochs=5, batch_size=4)
|
||||||
batch_size=4)
|
|
||||||
|
|
||||||
|
|
||||||
__all__ = [build_model]
|
__all__ = [build_model]
|
||||||
|
|
|
@ -1,8 +1,5 @@
|
||||||
|
import numpy as np
|
||||||
from keras.models import model_from_json
|
from keras.models import model_from_json
|
||||||
import numpy
|
|
||||||
import numpy.random
|
|
||||||
import json
|
|
||||||
from spacy.tokens.span import Span
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import cPickle as pickle
|
import cPickle as pickle
|
||||||
|
@ -11,16 +8,23 @@ except ImportError:
|
||||||
|
|
||||||
|
|
||||||
class KerasSimilarityShim(object):
|
class KerasSimilarityShim(object):
|
||||||
|
entailment_types = ["entailment", "contradiction", "neutral"]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def load(cls, path, nlp, get_features=None, max_length=100):
|
def load(cls, path, nlp, max_length=100, get_features=None):
|
||||||
|
|
||||||
if get_features is None:
|
if get_features is None:
|
||||||
get_features = get_word_ids
|
get_features = get_word_ids
|
||||||
|
|
||||||
with (path / 'config.json').open() as file_:
|
with (path / 'config.json').open() as file_:
|
||||||
model = model_from_json(file_.read())
|
model = model_from_json(file_.read())
|
||||||
with (path / 'model').open('rb') as file_:
|
with (path / 'model').open('rb') as file_:
|
||||||
weights = pickle.load(file_)
|
weights = pickle.load(file_)
|
||||||
|
|
||||||
embeddings = get_embeddings(nlp.vocab)
|
embeddings = get_embeddings(nlp.vocab)
|
||||||
model.set_weights([embeddings] + weights)
|
weights.insert(1, embeddings)
|
||||||
|
model.set_weights(weights)
|
||||||
|
|
||||||
return cls(model, get_features=get_features, max_length=max_length)
|
return cls(model, get_features=get_features, max_length=max_length)
|
||||||
|
|
||||||
def __init__(self, model, get_features=None, max_length=100):
|
def __init__(self, model, get_features=None, max_length=100):
|
||||||
|
@ -32,58 +36,42 @@ class KerasSimilarityShim(object):
|
||||||
doc.user_hooks['similarity'] = self.predict
|
doc.user_hooks['similarity'] = self.predict
|
||||||
doc.user_span_hooks['similarity'] = self.predict
|
doc.user_span_hooks['similarity'] = self.predict
|
||||||
|
|
||||||
|
return doc
|
||||||
|
|
||||||
def predict(self, doc1, doc2):
|
def predict(self, doc1, doc2):
|
||||||
x1 = self.get_features([doc1], max_length=self.max_length, tree_truncate=True)
|
x1 = self.get_features([doc1], max_length=self.max_length)
|
||||||
x2 = self.get_features([doc2], max_length=self.max_length, tree_truncate=True)
|
x2 = self.get_features([doc2], max_length=self.max_length)
|
||||||
scores = self.model.predict([x1, x2])
|
scores = self.model.predict([x1, x2])
|
||||||
return scores[0]
|
|
||||||
|
return self.entailment_types[scores.argmax()], scores.max()
|
||||||
|
|
||||||
|
|
||||||
def get_embeddings(vocab, nr_unk=100):
|
def get_embeddings(vocab, nr_unk=100):
|
||||||
nr_vector = max(lex.rank for lex in vocab) + 1
|
# the extra +1 is for a zero vector representing sentence-final padding
|
||||||
vectors = numpy.zeros((nr_vector+nr_unk+2, vocab.vectors_length), dtype='float32')
|
num_vectors = max(lex.rank for lex in vocab) + 2
|
||||||
|
|
||||||
|
# create random vectors for OOV tokens
|
||||||
|
oov = np.random.normal(size=(nr_unk, vocab.vectors_length))
|
||||||
|
oov = oov / oov.sum(axis=1, keepdims=True)
|
||||||
|
|
||||||
|
vectors = np.zeros((num_vectors + nr_unk, vocab.vectors_length), dtype='float32')
|
||||||
|
vectors[1:(nr_unk + 1), ] = oov
|
||||||
for lex in vocab:
|
for lex in vocab:
|
||||||
if lex.has_vector:
|
if lex.has_vector and lex.vector_norm > 0:
|
||||||
vectors[lex.rank+1] = lex.vector / lex.vector_norm
|
vectors[nr_unk + lex.rank + 1] = lex.vector / lex.vector_norm
|
||||||
|
|
||||||
return vectors
|
return vectors
|
||||||
|
|
||||||
|
|
||||||
def get_word_ids(docs, rnn_encode=False, tree_truncate=False, max_length=100, nr_unk=100):
|
def get_word_ids(docs, max_length=100, nr_unk=100):
|
||||||
Xs = numpy.zeros((len(docs), max_length), dtype='int32')
|
Xs = np.zeros((len(docs), max_length), dtype='int32')
|
||||||
|
|
||||||
for i, doc in enumerate(docs):
|
for i, doc in enumerate(docs):
|
||||||
if tree_truncate:
|
for j, token in enumerate(doc):
|
||||||
if isinstance(doc, Span):
|
if j == max_length:
|
||||||
queue = [doc.root]
|
|
||||||
else:
|
|
||||||
queue = [sent.root for sent in doc.sents]
|
|
||||||
else:
|
|
||||||
queue = list(doc)
|
|
||||||
words = []
|
|
||||||
while len(words) <= max_length and queue:
|
|
||||||
word = queue.pop(0)
|
|
||||||
if rnn_encode or (not word.is_punct and not word.is_space):
|
|
||||||
words.append(word)
|
|
||||||
if tree_truncate:
|
|
||||||
queue.extend(list(word.lefts))
|
|
||||||
queue.extend(list(word.rights))
|
|
||||||
words.sort()
|
|
||||||
for j, token in enumerate(words):
|
|
||||||
if token.has_vector:
|
|
||||||
Xs[i, j] = token.rank+1
|
|
||||||
else:
|
|
||||||
Xs[i, j] = (token.shape % (nr_unk-1))+2
|
|
||||||
j += 1
|
|
||||||
if j >= max_length:
|
|
||||||
break
|
break
|
||||||
else:
|
if token.has_vector:
|
||||||
Xs[i, len(words)] = 1
|
Xs[i, j] = token.rank + nr_unk + 1
|
||||||
|
else:
|
||||||
|
Xs[i, j] = token.rank % nr_unk + 1
|
||||||
return Xs
|
return Xs
|
||||||
|
|
||||||
|
|
||||||
def create_similarity_pipeline(nlp, max_length=100):
|
|
||||||
return [
|
|
||||||
nlp.tagger,
|
|
||||||
nlp.entity,
|
|
||||||
nlp.parser,
|
|
||||||
KerasSimilarityShim.load(nlp.path / 'similarity', nlp, max_length)
|
|
||||||
]
|
|
||||||
|
|
955
examples/notebooks/Decompositional Attention.ipynb
Normal file
955
examples/notebooks/Decompositional Attention.ipynb
Normal file
|
@ -0,0 +1,955 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Natural language inference using spaCy and Keras"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Introduction"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"This notebook details an implementation of the natural language inference model presented in [(Parikh et al, 2016)](https://arxiv.org/abs/1606.01933). The model is notable for the small number of paramaters *and hyperparameters* it specifices, while still yielding good performance."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Constructing the dataset"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import spacy\n",
|
||||||
|
"import numpy as np"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We only need the GloVe vectors from spaCy, not a full NLP pipeline."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"nlp = spacy.load('en_vectors_web_lg')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Function to load the SNLI dataset. The categories are converted to one-shot representation. The function comes from an example in spaCy."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"/home/jds/tensorflow-gpu/lib/python3.5/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
|
||||||
|
" from ._conv import register_converters as _register_converters\n",
|
||||||
|
"Using TensorFlow backend.\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import json\n",
|
||||||
|
"from keras.utils import to_categorical\n",
|
||||||
|
"\n",
|
||||||
|
"LABELS = {'entailment': 0, 'contradiction': 1, 'neutral': 2}\n",
|
||||||
|
"def read_snli(path):\n",
|
||||||
|
" texts1 = []\n",
|
||||||
|
" texts2 = []\n",
|
||||||
|
" labels = []\n",
|
||||||
|
" with open(path, 'r') as file_:\n",
|
||||||
|
" for line in file_:\n",
|
||||||
|
" eg = json.loads(line)\n",
|
||||||
|
" label = eg['gold_label']\n",
|
||||||
|
" if label == '-': # per Parikh, ignore - SNLI entries\n",
|
||||||
|
" continue\n",
|
||||||
|
" texts1.append(eg['sentence1'])\n",
|
||||||
|
" texts2.append(eg['sentence2'])\n",
|
||||||
|
" labels.append(LABELS[label])\n",
|
||||||
|
" return texts1, texts2, to_categorical(np.asarray(labels, dtype='int32'))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Because Keras can do the train/test split for us, we'll load *all* SNLI triples from one file."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"texts,hypotheses,labels = read_snli('snli/snli_1.0_train.jsonl')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def create_dataset(nlp, texts, hypotheses, num_oov, max_length, norm_vectors = True):\n",
|
||||||
|
" sents = texts + hypotheses\n",
|
||||||
|
" \n",
|
||||||
|
" # the extra +1 is for a zero vector represting NULL for padding\n",
|
||||||
|
" num_vectors = max(lex.rank for lex in nlp.vocab) + 2 \n",
|
||||||
|
" \n",
|
||||||
|
" # create random vectors for OOV tokens\n",
|
||||||
|
" oov = np.random.normal(size=(num_oov, nlp.vocab.vectors_length))\n",
|
||||||
|
" oov = oov / oov.sum(axis=1, keepdims=True)\n",
|
||||||
|
" \n",
|
||||||
|
" vectors = np.zeros((num_vectors + num_oov, nlp.vocab.vectors_length), dtype='float32')\n",
|
||||||
|
" vectors[num_vectors:, ] = oov\n",
|
||||||
|
" for lex in nlp.vocab:\n",
|
||||||
|
" if lex.has_vector and lex.vector_norm > 0:\n",
|
||||||
|
" vectors[lex.rank + 1] = lex.vector / lex.vector_norm if norm_vectors == True else lex.vector\n",
|
||||||
|
" \n",
|
||||||
|
" sents_as_ids = []\n",
|
||||||
|
" for sent in sents:\n",
|
||||||
|
" doc = nlp(sent)\n",
|
||||||
|
" word_ids = []\n",
|
||||||
|
" \n",
|
||||||
|
" for i, token in enumerate(doc):\n",
|
||||||
|
" # skip odd spaces from tokenizer\n",
|
||||||
|
" if token.has_vector and token.vector_norm == 0:\n",
|
||||||
|
" continue\n",
|
||||||
|
" \n",
|
||||||
|
" if i > max_length:\n",
|
||||||
|
" break\n",
|
||||||
|
" \n",
|
||||||
|
" if token.has_vector:\n",
|
||||||
|
" word_ids.append(token.rank + 1)\n",
|
||||||
|
" else:\n",
|
||||||
|
" # if we don't have a vector, pick an OOV entry\n",
|
||||||
|
" word_ids.append(token.rank % num_oov + num_vectors) \n",
|
||||||
|
" \n",
|
||||||
|
" # there must be a simpler way of generating padded arrays from lists...\n",
|
||||||
|
" word_id_vec = np.zeros((max_length), dtype='int')\n",
|
||||||
|
" clipped_len = min(max_length, len(word_ids))\n",
|
||||||
|
" word_id_vec[:clipped_len] = word_ids[:clipped_len]\n",
|
||||||
|
" sents_as_ids.append(word_id_vec)\n",
|
||||||
|
" \n",
|
||||||
|
" \n",
|
||||||
|
" return vectors, np.array(sents_as_ids[:len(texts)]), np.array(sents_as_ids[len(texts):])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"sem_vectors, text_vectors, hypothesis_vectors = create_dataset(nlp, texts, hypotheses, 100, 50, True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"texts_test,hypotheses_test,labels_test = read_snli('snli/snli_1.0_test.jsonl')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 12,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"_, text_vectors_test, hypothesis_vectors_test = create_dataset(nlp, texts_test, hypotheses_test, 100, 50, True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We use spaCy to tokenize the sentences and return, when available, a semantic vector for each token. \n",
|
||||||
|
"\n",
|
||||||
|
"OOV terms (tokens for which no semantic vector is available) are assigned to one of a set of randomly-generated OOV vectors, per (Parikh et al, 2016).\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Note that we will clip sentences to 50 words maximum."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 13,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from keras import layers, Model, models\n",
|
||||||
|
"from keras import backend as K"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Building the model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The embedding layer copies the 300-dimensional GloVe vectors into GPU memory. Per (Parikh et al, 2016), the vectors, which are not adapted during training, are projected down to lower-dimensional vectors using a trained projection matrix."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 14,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def create_embedding(vectors, max_length, projected_dim):\n",
|
||||||
|
" return models.Sequential([\n",
|
||||||
|
" layers.Embedding(\n",
|
||||||
|
" vectors.shape[0],\n",
|
||||||
|
" vectors.shape[1],\n",
|
||||||
|
" input_length=max_length,\n",
|
||||||
|
" weights=[vectors],\n",
|
||||||
|
" trainable=False),\n",
|
||||||
|
" \n",
|
||||||
|
" layers.TimeDistributed(\n",
|
||||||
|
" layers.Dense(projected_dim,\n",
|
||||||
|
" activation=None,\n",
|
||||||
|
" use_bias=False))\n",
|
||||||
|
" ])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The Parikh model makes use of three feedforward blocks that construct nonlinear combinations of their input. Each block contains two ReLU layers and two dropout layers."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 15,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def create_feedforward(num_units=200, activation='relu', dropout_rate=0.2):\n",
|
||||||
|
" return models.Sequential([\n",
|
||||||
|
" layers.Dense(num_units, activation=activation),\n",
|
||||||
|
" layers.Dropout(dropout_rate),\n",
|
||||||
|
" layers.Dense(num_units, activation=activation),\n",
|
||||||
|
" layers.Dropout(dropout_rate)\n",
|
||||||
|
" ])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The basic idea of the (Parikh et al, 2016) model is to:\n",
|
||||||
|
"\n",
|
||||||
|
"1. *Align*: Construct an alignment of subphrases in the text and hypothesis using an attention-like mechanism, called \"decompositional\" because the layer is applied to each of the two sentences individually rather than to their product. The dot product of the nonlinear transformations of the inputs is then normalized vertically and horizontally to yield a pair of \"soft\" alignment structures, from text->hypothesis and hypothesis->text. Concretely, for each word in one sentence, a multinomial distribution is computed over the words of the other sentence, by learning a multinomial logistic with softmax target.\n",
|
||||||
|
"2. *Compare*: Each word is now compared to its aligned phrase using a function modeled as a two-layer feedforward ReLU network. The output is a high-dimensional representation of the strength of association between word and aligned phrase.\n",
|
||||||
|
"3. *Aggregate*: The comparison vectors are summed, separately, for the text and the hypothesis. The result is two vectors: one that describes the degree of association of the text to the hypothesis, and the second, of the hypothesis to the text.\n",
|
||||||
|
"4. Finally, these two vectors are processed by a dense layer followed by a softmax classifier, as usual.\n",
|
||||||
|
"\n",
|
||||||
|
"Note that because in entailment the truth conditions of the consequent must be a subset of those of the antecedent, it is not obvious that we need both vectors in step (3). Entailment is not symmetric. It may be enough to just use the hypothesis->text vector. We will explore this possibility later."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We need a couple of little functions for Lambda layers to normalize and aggregate weights:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 16,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def normalizer(axis):\n",
|
||||||
|
" def _normalize(att_weights):\n",
|
||||||
|
" exp_weights = K.exp(att_weights)\n",
|
||||||
|
" sum_weights = K.sum(exp_weights, axis=axis, keepdims=True)\n",
|
||||||
|
" return exp_weights/sum_weights\n",
|
||||||
|
" return _normalize\n",
|
||||||
|
"\n",
|
||||||
|
"def sum_word(x):\n",
|
||||||
|
" return K.sum(x, axis=1)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 17,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def build_model(vectors, max_length, num_hidden, num_classes, projected_dim, entail_dir='both'):\n",
|
||||||
|
" input1 = layers.Input(shape=(max_length,), dtype='int32', name='words1')\n",
|
||||||
|
" input2 = layers.Input(shape=(max_length,), dtype='int32', name='words2')\n",
|
||||||
|
" \n",
|
||||||
|
" # embeddings (projected)\n",
|
||||||
|
" embed = create_embedding(vectors, max_length, projected_dim)\n",
|
||||||
|
" \n",
|
||||||
|
" a = embed(input1)\n",
|
||||||
|
" b = embed(input2)\n",
|
||||||
|
" \n",
|
||||||
|
" # step 1: attend\n",
|
||||||
|
" F = create_feedforward(num_hidden)\n",
|
||||||
|
" att_weights = layers.dot([F(a), F(b)], axes=-1)\n",
|
||||||
|
" \n",
|
||||||
|
" G = create_feedforward(num_hidden)\n",
|
||||||
|
" \n",
|
||||||
|
" if entail_dir == 'both':\n",
|
||||||
|
" norm_weights_a = layers.Lambda(normalizer(1))(att_weights)\n",
|
||||||
|
" norm_weights_b = layers.Lambda(normalizer(2))(att_weights)\n",
|
||||||
|
" alpha = layers.dot([norm_weights_a, a], axes=1)\n",
|
||||||
|
" beta = layers.dot([norm_weights_b, b], axes=1)\n",
|
||||||
|
"\n",
|
||||||
|
" # step 2: compare\n",
|
||||||
|
" comp1 = layers.concatenate([a, beta])\n",
|
||||||
|
" comp2 = layers.concatenate([b, alpha])\n",
|
||||||
|
" v1 = layers.TimeDistributed(G)(comp1)\n",
|
||||||
|
" v2 = layers.TimeDistributed(G)(comp2)\n",
|
||||||
|
"\n",
|
||||||
|
" # step 3: aggregate\n",
|
||||||
|
" v1_sum = layers.Lambda(sum_word)(v1)\n",
|
||||||
|
" v2_sum = layers.Lambda(sum_word)(v2)\n",
|
||||||
|
" concat = layers.concatenate([v1_sum, v2_sum])\n",
|
||||||
|
" elif entail_dir == 'left':\n",
|
||||||
|
" norm_weights_a = layers.Lambda(normalizer(1))(att_weights)\n",
|
||||||
|
" alpha = layers.dot([norm_weights_a, a], axes=1)\n",
|
||||||
|
" comp2 = layers.concatenate([b, alpha])\n",
|
||||||
|
" v2 = layers.TimeDistributed(G)(comp2)\n",
|
||||||
|
" v2_sum = layers.Lambda(sum_word)(v2)\n",
|
||||||
|
" concat = v2_sum\n",
|
||||||
|
" else:\n",
|
||||||
|
" norm_weights_b = layers.Lambda(normalizer(2))(att_weights)\n",
|
||||||
|
" beta = layers.dot([norm_weights_b, b], axes=1)\n",
|
||||||
|
" comp1 = layers.concatenate([a, beta])\n",
|
||||||
|
" v1 = layers.TimeDistributed(G)(comp1)\n",
|
||||||
|
" v1_sum = layers.Lambda(sum_word)(v1)\n",
|
||||||
|
" concat = v1_sum\n",
|
||||||
|
" \n",
|
||||||
|
" H = create_feedforward(num_hidden)\n",
|
||||||
|
" out = H(concat)\n",
|
||||||
|
" out = layers.Dense(num_classes, activation='softmax')(out)\n",
|
||||||
|
" \n",
|
||||||
|
" model = Model([input1, input2], out)\n",
|
||||||
|
" \n",
|
||||||
|
" model.compile(optimizer='adam',\n",
|
||||||
|
" loss='categorical_crossentropy',\n",
|
||||||
|
" metrics=['accuracy'])\n",
|
||||||
|
" return model\n",
|
||||||
|
" \n",
|
||||||
|
" \n",
|
||||||
|
" "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 18,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"Layer (type) Output Shape Param # Connected to \n",
|
||||||
|
"==================================================================================================\n",
|
||||||
|
"words1 (InputLayer) (None, 50) 0 \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"words2 (InputLayer) (None, 50) 0 \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"sequential_1 (Sequential) (None, 50, 200) 321381600 words1[0][0] \n",
|
||||||
|
" words2[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"sequential_2 (Sequential) (None, 50, 200) 80400 sequential_1[1][0] \n",
|
||||||
|
" sequential_1[2][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"dot_1 (Dot) (None, 50, 50) 0 sequential_2[1][0] \n",
|
||||||
|
" sequential_2[2][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"lambda_2 (Lambda) (None, 50, 50) 0 dot_1[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"lambda_1 (Lambda) (None, 50, 50) 0 dot_1[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"dot_3 (Dot) (None, 50, 200) 0 lambda_2[0][0] \n",
|
||||||
|
" sequential_1[2][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"dot_2 (Dot) (None, 50, 200) 0 lambda_1[0][0] \n",
|
||||||
|
" sequential_1[1][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"concatenate_1 (Concatenate) (None, 50, 400) 0 sequential_1[1][0] \n",
|
||||||
|
" dot_3[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"concatenate_2 (Concatenate) (None, 50, 400) 0 sequential_1[2][0] \n",
|
||||||
|
" dot_2[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"time_distributed_2 (TimeDistrib (None, 50, 200) 120400 concatenate_1[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"time_distributed_3 (TimeDistrib (None, 50, 200) 120400 concatenate_2[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"lambda_3 (Lambda) (None, 200) 0 time_distributed_2[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"lambda_4 (Lambda) (None, 200) 0 time_distributed_3[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"concatenate_3 (Concatenate) (None, 400) 0 lambda_3[0][0] \n",
|
||||||
|
" lambda_4[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"sequential_4 (Sequential) (None, 200) 120400 concatenate_3[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"dense_8 (Dense) (None, 3) 603 sequential_4[1][0] \n",
|
||||||
|
"==================================================================================================\n",
|
||||||
|
"Total params: 321,703,403\n",
|
||||||
|
"Trainable params: 381,803\n",
|
||||||
|
"Non-trainable params: 321,321,600\n",
|
||||||
|
"__________________________________________________________________________________________________\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"K.clear_session()\n",
|
||||||
|
"m = build_model(sem_vectors, 50, 200, 3, 200)\n",
|
||||||
|
"m.summary()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The number of trainable parameters, ~381k, is the number given by Parikh et al, so we're on the right track."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Training the model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Parikh et al use tiny batches of 4, training for 50MM batches, which amounts to around 500 epochs. Here we'll use large batches to better use the GPU, and train for fewer epochs -- for purposes of this experiment."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 19,
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Train on 549367 samples, validate on 9824 samples\n",
|
||||||
|
"Epoch 1/50\n",
|
||||||
|
"549367/549367 [==============================] - 34s 62us/step - loss: 0.7599 - acc: 0.6617 - val_loss: 0.5396 - val_acc: 0.7861\n",
|
||||||
|
"Epoch 2/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.5611 - acc: 0.7763 - val_loss: 0.4892 - val_acc: 0.8085\n",
|
||||||
|
"Epoch 3/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.5212 - acc: 0.7948 - val_loss: 0.4574 - val_acc: 0.8261\n",
|
||||||
|
"Epoch 4/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4986 - acc: 0.8045 - val_loss: 0.4410 - val_acc: 0.8274\n",
|
||||||
|
"Epoch 5/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4819 - acc: 0.8114 - val_loss: 0.4224 - val_acc: 0.8383\n",
|
||||||
|
"Epoch 6/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4714 - acc: 0.8166 - val_loss: 0.4200 - val_acc: 0.8379\n",
|
||||||
|
"Epoch 7/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4633 - acc: 0.8203 - val_loss: 0.4098 - val_acc: 0.8457\n",
|
||||||
|
"Epoch 8/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4558 - acc: 0.8232 - val_loss: 0.4114 - val_acc: 0.8415\n",
|
||||||
|
"Epoch 9/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4508 - acc: 0.8250 - val_loss: 0.4062 - val_acc: 0.8477\n",
|
||||||
|
"Epoch 10/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4433 - acc: 0.8286 - val_loss: 0.3982 - val_acc: 0.8486\n",
|
||||||
|
"Epoch 11/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4388 - acc: 0.8307 - val_loss: 0.3953 - val_acc: 0.8497\n",
|
||||||
|
"Epoch 12/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4351 - acc: 0.8321 - val_loss: 0.3973 - val_acc: 0.8522\n",
|
||||||
|
"Epoch 13/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4309 - acc: 0.8342 - val_loss: 0.3939 - val_acc: 0.8539\n",
|
||||||
|
"Epoch 14/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4269 - acc: 0.8355 - val_loss: 0.3932 - val_acc: 0.8517\n",
|
||||||
|
"Epoch 15/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4247 - acc: 0.8369 - val_loss: 0.3938 - val_acc: 0.8515\n",
|
||||||
|
"Epoch 16/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4208 - acc: 0.8379 - val_loss: 0.3936 - val_acc: 0.8504\n",
|
||||||
|
"Epoch 17/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4194 - acc: 0.8390 - val_loss: 0.3885 - val_acc: 0.8560\n",
|
||||||
|
"Epoch 18/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4162 - acc: 0.8402 - val_loss: 0.3874 - val_acc: 0.8561\n",
|
||||||
|
"Epoch 19/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4140 - acc: 0.8409 - val_loss: 0.3889 - val_acc: 0.8545\n",
|
||||||
|
"Epoch 20/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4114 - acc: 0.8426 - val_loss: 0.3864 - val_acc: 0.8583\n",
|
||||||
|
"Epoch 21/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4092 - acc: 0.8430 - val_loss: 0.3870 - val_acc: 0.8561\n",
|
||||||
|
"Epoch 22/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4062 - acc: 0.8442 - val_loss: 0.3852 - val_acc: 0.8577\n",
|
||||||
|
"Epoch 23/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4050 - acc: 0.8450 - val_loss: 0.3850 - val_acc: 0.8578\n",
|
||||||
|
"Epoch 24/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4035 - acc: 0.8455 - val_loss: 0.3825 - val_acc: 0.8555\n",
|
||||||
|
"Epoch 25/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.4018 - acc: 0.8460 - val_loss: 0.3837 - val_acc: 0.8573\n",
|
||||||
|
"Epoch 26/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3989 - acc: 0.8476 - val_loss: 0.3843 - val_acc: 0.8599\n",
|
||||||
|
"Epoch 27/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3979 - acc: 0.8481 - val_loss: 0.3841 - val_acc: 0.8589\n",
|
||||||
|
"Epoch 28/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3967 - acc: 0.8484 - val_loss: 0.3811 - val_acc: 0.8575\n",
|
||||||
|
"Epoch 29/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3956 - acc: 0.8492 - val_loss: 0.3829 - val_acc: 0.8589\n",
|
||||||
|
"Epoch 30/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3938 - acc: 0.8499 - val_loss: 0.3859 - val_acc: 0.8562\n",
|
||||||
|
"Epoch 31/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3925 - acc: 0.8500 - val_loss: 0.3798 - val_acc: 0.8587\n",
|
||||||
|
"Epoch 32/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3906 - acc: 0.8509 - val_loss: 0.3834 - val_acc: 0.8569\n",
|
||||||
|
"Epoch 33/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3893 - acc: 0.8511 - val_loss: 0.3806 - val_acc: 0.8588\n",
|
||||||
|
"Epoch 34/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3885 - acc: 0.8515 - val_loss: 0.3828 - val_acc: 0.8603\n",
|
||||||
|
"Epoch 35/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3879 - acc: 0.8520 - val_loss: 0.3800 - val_acc: 0.8594\n",
|
||||||
|
"Epoch 36/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3860 - acc: 0.8530 - val_loss: 0.3796 - val_acc: 0.8577\n",
|
||||||
|
"Epoch 37/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3856 - acc: 0.8532 - val_loss: 0.3857 - val_acc: 0.8591\n",
|
||||||
|
"Epoch 38/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3838 - acc: 0.8535 - val_loss: 0.3835 - val_acc: 0.8603\n",
|
||||||
|
"Epoch 39/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3830 - acc: 0.8543 - val_loss: 0.3830 - val_acc: 0.8599\n",
|
||||||
|
"Epoch 40/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3818 - acc: 0.8548 - val_loss: 0.3832 - val_acc: 0.8559\n",
|
||||||
|
"Epoch 41/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3806 - acc: 0.8551 - val_loss: 0.3845 - val_acc: 0.8553\n",
|
||||||
|
"Epoch 42/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3803 - acc: 0.8550 - val_loss: 0.3789 - val_acc: 0.8617\n",
|
||||||
|
"Epoch 43/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3791 - acc: 0.8556 - val_loss: 0.3835 - val_acc: 0.8580\n",
|
||||||
|
"Epoch 44/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3778 - acc: 0.8565 - val_loss: 0.3799 - val_acc: 0.8580\n",
|
||||||
|
"Epoch 45/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3766 - acc: 0.8571 - val_loss: 0.3790 - val_acc: 0.8625\n",
|
||||||
|
"Epoch 46/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3770 - acc: 0.8569 - val_loss: 0.3820 - val_acc: 0.8590\n",
|
||||||
|
"Epoch 47/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3761 - acc: 0.8573 - val_loss: 0.3831 - val_acc: 0.8581\n",
|
||||||
|
"Epoch 48/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3739 - acc: 0.8579 - val_loss: 0.3828 - val_acc: 0.8599\n",
|
||||||
|
"Epoch 49/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3738 - acc: 0.8577 - val_loss: 0.3785 - val_acc: 0.8590\n",
|
||||||
|
"Epoch 50/50\n",
|
||||||
|
"549367/549367 [==============================] - 33s 60us/step - loss: 0.3726 - acc: 0.8580 - val_loss: 0.3820 - val_acc: 0.8585\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"<keras.callbacks.History at 0x7f5c9f49c438>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 19,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"m.fit([text_vectors, hypothesis_vectors], labels, batch_size=1024, epochs=50,validation_data=([text_vectors_test, hypothesis_vectors_test], labels_test))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The result is broadly in the region reported by Parikh et al: ~86 vs 86.3%. The small difference might be accounted by differences in `max_length` (here set at 50), in the training regime, and that here we use Keras' built-in validation splitting rather than the SNLI test set."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Experiment: the asymmetric model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"It was suggested earlier that, based on the semantics of entailment, the vector representing the strength of association between the hypothesis to the text is all that is needed for classifying the entailment.\n",
|
||||||
|
"\n",
|
||||||
|
"The following model removes consideration of the complementary vector (text to hypothesis) from the computation. This will decrease the paramater count slightly, because the final dense layers will be smaller, and speed up the forward pass when predicting, because fewer calculations will be needed."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 20,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"Layer (type) Output Shape Param # Connected to \n",
|
||||||
|
"==================================================================================================\n",
|
||||||
|
"words2 (InputLayer) (None, 50) 0 \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"words1 (InputLayer) (None, 50) 0 \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"sequential_5 (Sequential) (None, 50, 200) 321381600 words1[0][0] \n",
|
||||||
|
" words2[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"sequential_6 (Sequential) (None, 50, 200) 80400 sequential_5[1][0] \n",
|
||||||
|
" sequential_5[2][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"dot_4 (Dot) (None, 50, 50) 0 sequential_6[1][0] \n",
|
||||||
|
" sequential_6[2][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"lambda_5 (Lambda) (None, 50, 50) 0 dot_4[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"dot_5 (Dot) (None, 50, 200) 0 lambda_5[0][0] \n",
|
||||||
|
" sequential_5[1][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"concatenate_4 (Concatenate) (None, 50, 400) 0 sequential_5[2][0] \n",
|
||||||
|
" dot_5[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"time_distributed_5 (TimeDistrib (None, 50, 200) 120400 concatenate_4[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"lambda_6 (Lambda) (None, 200) 0 time_distributed_5[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"sequential_8 (Sequential) (None, 200) 80400 lambda_6[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"dense_16 (Dense) (None, 3) 603 sequential_8[1][0] \n",
|
||||||
|
"==================================================================================================\n",
|
||||||
|
"Total params: 321,663,403\n",
|
||||||
|
"Trainable params: 341,803\n",
|
||||||
|
"Non-trainable params: 321,321,600\n",
|
||||||
|
"__________________________________________________________________________________________________\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"m1 = build_model(sem_vectors, 50, 200, 3, 200, 'left')\n",
|
||||||
|
"m1.summary()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The parameter count has indeed decreased by 40,000, corresponding to the 200x200 smaller H function."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 21,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Train on 549367 samples, validate on 9824 samples\n",
|
||||||
|
"Epoch 1/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 46us/step - loss: 0.7331 - acc: 0.6770 - val_loss: 0.5257 - val_acc: 0.7936\n",
|
||||||
|
"Epoch 2/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.5518 - acc: 0.7799 - val_loss: 0.4717 - val_acc: 0.8159\n",
|
||||||
|
"Epoch 3/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.5147 - acc: 0.7967 - val_loss: 0.4449 - val_acc: 0.8278\n",
|
||||||
|
"Epoch 4/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.4948 - acc: 0.8060 - val_loss: 0.4326 - val_acc: 0.8344\n",
|
||||||
|
"Epoch 5/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.4814 - acc: 0.8122 - val_loss: 0.4247 - val_acc: 0.8359\n",
|
||||||
|
"Epoch 6/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.4712 - acc: 0.8162 - val_loss: 0.4143 - val_acc: 0.8430\n",
|
||||||
|
"Epoch 7/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.4635 - acc: 0.8205 - val_loss: 0.4172 - val_acc: 0.8401\n",
|
||||||
|
"Epoch 8/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.4570 - acc: 0.8223 - val_loss: 0.4106 - val_acc: 0.8422\n",
|
||||||
|
"Epoch 9/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.4505 - acc: 0.8259 - val_loss: 0.4043 - val_acc: 0.8451\n",
|
||||||
|
"Epoch 10/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.4459 - acc: 0.8280 - val_loss: 0.4050 - val_acc: 0.8467\n",
|
||||||
|
"Epoch 11/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.4405 - acc: 0.8300 - val_loss: 0.3975 - val_acc: 0.8481\n",
|
||||||
|
"Epoch 12/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.4360 - acc: 0.8324 - val_loss: 0.4026 - val_acc: 0.8496\n",
|
||||||
|
"Epoch 13/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.4327 - acc: 0.8334 - val_loss: 0.4024 - val_acc: 0.8471\n",
|
||||||
|
"Epoch 14/50\n",
|
||||||
|
"549367/549367 [==============================] - 24s 45us/step - loss: 0.4293 - acc: 0.8350 - val_loss: 0.3955 - val_acc: 0.8496\n",
|
||||||
|
"Epoch 15/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.4263 - acc: 0.8369 - val_loss: 0.3980 - val_acc: 0.8490\n",
|
||||||
|
"Epoch 16/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.4236 - acc: 0.8377 - val_loss: 0.3958 - val_acc: 0.8496\n",
|
||||||
|
"Epoch 17/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.4213 - acc: 0.8384 - val_loss: 0.3954 - val_acc: 0.8496\n",
|
||||||
|
"Epoch 18/50\n",
|
||||||
|
"549367/549367 [==============================] - 24s 45us/step - loss: 0.4187 - acc: 0.8394 - val_loss: 0.3929 - val_acc: 0.8514\n",
|
||||||
|
"Epoch 19/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.4157 - acc: 0.8409 - val_loss: 0.3939 - val_acc: 0.8507\n",
|
||||||
|
"Epoch 20/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.4135 - acc: 0.8417 - val_loss: 0.3953 - val_acc: 0.8522\n",
|
||||||
|
"Epoch 21/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.4122 - acc: 0.8424 - val_loss: 0.3974 - val_acc: 0.8506\n",
|
||||||
|
"Epoch 22/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.4099 - acc: 0.8435 - val_loss: 0.3918 - val_acc: 0.8522\n",
|
||||||
|
"Epoch 23/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.4075 - acc: 0.8443 - val_loss: 0.3901 - val_acc: 0.8513\n",
|
||||||
|
"Epoch 24/50\n",
|
||||||
|
"549367/549367 [==============================] - 24s 44us/step - loss: 0.4067 - acc: 0.8447 - val_loss: 0.3885 - val_acc: 0.8543\n",
|
||||||
|
"Epoch 25/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.4047 - acc: 0.8454 - val_loss: 0.3846 - val_acc: 0.8531\n",
|
||||||
|
"Epoch 26/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.4031 - acc: 0.8461 - val_loss: 0.3864 - val_acc: 0.8562\n",
|
||||||
|
"Epoch 27/50\n",
|
||||||
|
"549367/549367 [==============================] - 24s 45us/step - loss: 0.4020 - acc: 0.8467 - val_loss: 0.3874 - val_acc: 0.8546\n",
|
||||||
|
"Epoch 28/50\n",
|
||||||
|
"549367/549367 [==============================] - 24s 45us/step - loss: 0.4001 - acc: 0.8473 - val_loss: 0.3848 - val_acc: 0.8534\n",
|
||||||
|
"Epoch 29/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.3991 - acc: 0.8479 - val_loss: 0.3865 - val_acc: 0.8562\n",
|
||||||
|
"Epoch 30/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.3976 - acc: 0.8484 - val_loss: 0.3833 - val_acc: 0.8574\n",
|
||||||
|
"Epoch 31/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.3961 - acc: 0.8487 - val_loss: 0.3846 - val_acc: 0.8585\n",
|
||||||
|
"Epoch 32/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.3942 - acc: 0.8498 - val_loss: 0.3805 - val_acc: 0.8573\n",
|
||||||
|
"Epoch 33/50\n",
|
||||||
|
"549367/549367 [==============================] - 24s 44us/step - loss: 0.3935 - acc: 0.8503 - val_loss: 0.3856 - val_acc: 0.8579\n",
|
||||||
|
"Epoch 34/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.3923 - acc: 0.8507 - val_loss: 0.3829 - val_acc: 0.8560\n",
|
||||||
|
"Epoch 35/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.3920 - acc: 0.8508 - val_loss: 0.3864 - val_acc: 0.8575\n",
|
||||||
|
"Epoch 36/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.3907 - acc: 0.8516 - val_loss: 0.3873 - val_acc: 0.8563\n",
|
||||||
|
"Epoch 37/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.3891 - acc: 0.8519 - val_loss: 0.3850 - val_acc: 0.8570\n",
|
||||||
|
"Epoch 38/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.3872 - acc: 0.8522 - val_loss: 0.3815 - val_acc: 0.8591\n",
|
||||||
|
"Epoch 39/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.3887 - acc: 0.8520 - val_loss: 0.3829 - val_acc: 0.8590\n",
|
||||||
|
"Epoch 40/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.3868 - acc: 0.8531 - val_loss: 0.3807 - val_acc: 0.8600\n",
|
||||||
|
"Epoch 41/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.3859 - acc: 0.8537 - val_loss: 0.3832 - val_acc: 0.8574\n",
|
||||||
|
"Epoch 42/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.3849 - acc: 0.8537 - val_loss: 0.3850 - val_acc: 0.8576\n",
|
||||||
|
"Epoch 43/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.3834 - acc: 0.8541 - val_loss: 0.3825 - val_acc: 0.8563\n",
|
||||||
|
"Epoch 44/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.3829 - acc: 0.8548 - val_loss: 0.3844 - val_acc: 0.8540\n",
|
||||||
|
"Epoch 45/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.3816 - acc: 0.8552 - val_loss: 0.3841 - val_acc: 0.8559\n",
|
||||||
|
"Epoch 46/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.3816 - acc: 0.8549 - val_loss: 0.3880 - val_acc: 0.8567\n",
|
||||||
|
"Epoch 47/50\n",
|
||||||
|
"549367/549367 [==============================] - 24s 45us/step - loss: 0.3799 - acc: 0.8559 - val_loss: 0.3767 - val_acc: 0.8635\n",
|
||||||
|
"Epoch 48/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.3800 - acc: 0.8560 - val_loss: 0.3786 - val_acc: 0.8563\n",
|
||||||
|
"Epoch 49/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.3781 - acc: 0.8563 - val_loss: 0.3812 - val_acc: 0.8596\n",
|
||||||
|
"Epoch 50/50\n",
|
||||||
|
"549367/549367 [==============================] - 25s 45us/step - loss: 0.3788 - acc: 0.8560 - val_loss: 0.3782 - val_acc: 0.8601\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"<keras.callbacks.History at 0x7f5ca1bf3e48>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 21,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"m1.fit([text_vectors, hypothesis_vectors], labels, batch_size=1024, epochs=50,validation_data=([text_vectors_test, hypothesis_vectors_test], labels_test))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"This model performs the same as the slightly more complex model that evaluates alignments in both directions. Note also that processing time is improved, from 64 down to 48 microseconds per step. \n",
|
||||||
|
"\n",
|
||||||
|
"Let's now look at an asymmetric model that evaluates text to hypothesis comparisons. The prediction is that such a model will correctly classify a decent proportion of the exemplars, but not as accurately as the previous two.\n",
|
||||||
|
"\n",
|
||||||
|
"We'll just use 10 epochs for expediency."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 96,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"Layer (type) Output Shape Param # Connected to \n",
|
||||||
|
"==================================================================================================\n",
|
||||||
|
"words1 (InputLayer) (None, 50) 0 \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"words2 (InputLayer) (None, 50) 0 \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"sequential_13 (Sequential) (None, 50, 200) 321381600 words1[0][0] \n",
|
||||||
|
" words2[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"sequential_14 (Sequential) (None, 50, 200) 80400 sequential_13[1][0] \n",
|
||||||
|
" sequential_13[2][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"dot_8 (Dot) (None, 50, 50) 0 sequential_14[1][0] \n",
|
||||||
|
" sequential_14[2][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"lambda_9 (Lambda) (None, 50, 50) 0 dot_8[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"dot_9 (Dot) (None, 50, 200) 0 lambda_9[0][0] \n",
|
||||||
|
" sequential_13[2][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"concatenate_6 (Concatenate) (None, 50, 400) 0 sequential_13[1][0] \n",
|
||||||
|
" dot_9[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"time_distributed_9 (TimeDistrib (None, 50, 200) 120400 concatenate_6[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"lambda_10 (Lambda) (None, 200) 0 time_distributed_9[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"sequential_16 (Sequential) (None, 200) 80400 lambda_10[0][0] \n",
|
||||||
|
"__________________________________________________________________________________________________\n",
|
||||||
|
"dense_32 (Dense) (None, 3) 603 sequential_16[1][0] \n",
|
||||||
|
"==================================================================================================\n",
|
||||||
|
"Total params: 321,663,403\n",
|
||||||
|
"Trainable params: 341,803\n",
|
||||||
|
"Non-trainable params: 321,321,600\n",
|
||||||
|
"__________________________________________________________________________________________________\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"m2 = build_model(sem_vectors, 50, 200, 3, 200, 'right')\n",
|
||||||
|
"m2.summary()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 97,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Train on 455226 samples, validate on 113807 samples\n",
|
||||||
|
"Epoch 1/10\n",
|
||||||
|
"455226/455226 [==============================] - 22s 49us/step - loss: 0.8920 - acc: 0.5771 - val_loss: 0.8001 - val_acc: 0.6435\n",
|
||||||
|
"Epoch 2/10\n",
|
||||||
|
"455226/455226 [==============================] - 22s 47us/step - loss: 0.7808 - acc: 0.6553 - val_loss: 0.7267 - val_acc: 0.6855\n",
|
||||||
|
"Epoch 3/10\n",
|
||||||
|
"455226/455226 [==============================] - 22s 47us/step - loss: 0.7329 - acc: 0.6825 - val_loss: 0.6966 - val_acc: 0.7006\n",
|
||||||
|
"Epoch 4/10\n",
|
||||||
|
"455226/455226 [==============================] - 22s 47us/step - loss: 0.7055 - acc: 0.6978 - val_loss: 0.6713 - val_acc: 0.7150\n",
|
||||||
|
"Epoch 5/10\n",
|
||||||
|
"455226/455226 [==============================] - 22s 47us/step - loss: 0.6862 - acc: 0.7081 - val_loss: 0.6533 - val_acc: 0.7253\n",
|
||||||
|
"Epoch 6/10\n",
|
||||||
|
"455226/455226 [==============================] - 21s 47us/step - loss: 0.6694 - acc: 0.7179 - val_loss: 0.6472 - val_acc: 0.7277\n",
|
||||||
|
"Epoch 7/10\n",
|
||||||
|
"455226/455226 [==============================] - 22s 47us/step - loss: 0.6555 - acc: 0.7252 - val_loss: 0.6338 - val_acc: 0.7347\n",
|
||||||
|
"Epoch 8/10\n",
|
||||||
|
"455226/455226 [==============================] - 22s 48us/step - loss: 0.6434 - acc: 0.7310 - val_loss: 0.6246 - val_acc: 0.7385\n",
|
||||||
|
"Epoch 9/10\n",
|
||||||
|
"455226/455226 [==============================] - 22s 47us/step - loss: 0.6325 - acc: 0.7367 - val_loss: 0.6164 - val_acc: 0.7424\n",
|
||||||
|
"Epoch 10/10\n",
|
||||||
|
"455226/455226 [==============================] - 22s 47us/step - loss: 0.6216 - acc: 0.7426 - val_loss: 0.6082 - val_acc: 0.7478\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"<keras.callbacks.History at 0x7fa6850cf080>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 97,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"m2.fit([text_vectors, hypothesis_vectors], labels, batch_size=1024, epochs=10,validation_split=.2)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Comparing this fit to the validation accuracy of the previous two models after 10 epochs, we observe that its accuracy is roughly 10% lower.\n",
|
||||||
|
"\n",
|
||||||
|
"It is reassuring that the neural modeling here reproduces what we know from the semantics of natural language!"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.5.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
|
@ -19,39 +19,40 @@ from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
output_dir=("Output directory for saved HTML", "positional", None, Path))
|
output_dir=("Output directory for saved HTML", "positional", None, Path)
|
||||||
|
)
|
||||||
def main(output_dir=None):
|
def main(output_dir=None):
|
||||||
nlp = English() # start off with blank English class
|
nlp = English() # start off with blank English class
|
||||||
|
|
||||||
Doc.set_extension('overlap', method=overlap_tokens)
|
Doc.set_extension("overlap", method=overlap_tokens)
|
||||||
doc1 = nlp(u"Peach emoji is where it has always been.")
|
doc1 = nlp("Peach emoji is where it has always been.")
|
||||||
doc2 = nlp(u"Peach is the superior emoji.")
|
doc2 = nlp("Peach is the superior emoji.")
|
||||||
print("Text 1:", doc1.text)
|
print("Text 1:", doc1.text)
|
||||||
print("Text 2:", doc2.text)
|
print("Text 2:", doc2.text)
|
||||||
print("Overlapping tokens:", doc1._.overlap(doc2))
|
print("Overlapping tokens:", doc1._.overlap(doc2))
|
||||||
|
|
||||||
Doc.set_extension('to_html', method=to_html)
|
Doc.set_extension("to_html", method=to_html)
|
||||||
doc = nlp(u"This is a sentence about Apple.")
|
doc = nlp("This is a sentence about Apple.")
|
||||||
# add entity manually for demo purposes, to make it work without a model
|
# add entity manually for demo purposes, to make it work without a model
|
||||||
doc.ents = [Span(doc, 5, 6, label=nlp.vocab.strings['ORG'])]
|
doc.ents = [Span(doc, 5, 6, label=nlp.vocab.strings["ORG"])]
|
||||||
print("Text:", doc.text)
|
print("Text:", doc.text)
|
||||||
doc._.to_html(output=output_dir, style='ent')
|
doc._.to_html(output=output_dir, style="ent")
|
||||||
|
|
||||||
|
|
||||||
def to_html(doc, output='/tmp', style='dep'):
|
def to_html(doc, output="/tmp", style="dep"):
|
||||||
"""Doc method extension for saving the current state as a displaCy
|
"""Doc method extension for saving the current state as a displaCy
|
||||||
visualization.
|
visualization.
|
||||||
"""
|
"""
|
||||||
# generate filename from first six non-punct tokens
|
# generate filename from first six non-punct tokens
|
||||||
file_name = '-'.join([w.text for w in doc[:6] if not w.is_punct]) + '.html'
|
file_name = "-".join([w.text for w in doc[:6] if not w.is_punct]) + ".html"
|
||||||
html = displacy.render(doc, style=style, page=True) # render markup
|
html = displacy.render(doc, style=style, page=True) # render markup
|
||||||
if output is not None:
|
if output is not None:
|
||||||
output_path = Path(output)
|
output_path = Path(output)
|
||||||
if not output_path.exists():
|
if not output_path.exists():
|
||||||
output_path.mkdir()
|
output_path.mkdir()
|
||||||
output_file = Path(output) / file_name
|
output_file = Path(output) / file_name
|
||||||
output_file.open('w', encoding='utf-8').write(html) # save to file
|
output_file.open("w", encoding="utf-8").write(html) # save to file
|
||||||
print('Saved HTML to {}'.format(output_file))
|
print("Saved HTML to {}".format(output_file))
|
||||||
else:
|
else:
|
||||||
print(html)
|
print(html)
|
||||||
|
|
||||||
|
@ -67,7 +68,7 @@ def overlap_tokens(doc, other_doc):
|
||||||
return overlap
|
return overlap
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
plac.call(main)
|
plac.call(main)
|
||||||
|
|
||||||
# Expected output:
|
# Expected output:
|
||||||
|
|
|
@ -25,15 +25,19 @@ def main():
|
||||||
# and no model or pre-defined pipeline loaded.
|
# and no model or pre-defined pipeline loaded.
|
||||||
nlp = English()
|
nlp = English()
|
||||||
rest_countries = RESTCountriesComponent(nlp) # initialise component
|
rest_countries = RESTCountriesComponent(nlp) # initialise component
|
||||||
nlp.add_pipe(rest_countries) # add it to the pipeline
|
nlp.add_pipe(rest_countries) # add it to the pipeline
|
||||||
doc = nlp(u"Some text about Colombia and the Czech Republic")
|
doc = nlp("Some text about Colombia and the Czech Republic")
|
||||||
print('Pipeline', nlp.pipe_names) # pipeline contains component name
|
print("Pipeline", nlp.pipe_names) # pipeline contains component name
|
||||||
print('Doc has countries', doc._.has_country) # Doc contains countries
|
print("Doc has countries", doc._.has_country) # Doc contains countries
|
||||||
for token in doc:
|
for token in doc:
|
||||||
if token._.is_country:
|
if token._.is_country:
|
||||||
print(token.text, token._.country_capital, token._.country_latlng,
|
print(
|
||||||
token._.country_flag) # country data
|
token.text,
|
||||||
print('Entities', [(e.text, e.label_) for e in doc.ents]) # entities
|
token._.country_capital,
|
||||||
|
token._.country_latlng,
|
||||||
|
token._.country_flag,
|
||||||
|
) # country data
|
||||||
|
print("Entities", [(e.text, e.label_) for e in doc.ents]) # entities
|
||||||
|
|
||||||
|
|
||||||
class RESTCountriesComponent(object):
|
class RESTCountriesComponent(object):
|
||||||
|
@ -41,42 +45,42 @@ class RESTCountriesComponent(object):
|
||||||
the REST Countries API, merges country names into one token, assigns entity
|
the REST Countries API, merges country names into one token, assigns entity
|
||||||
labels and sets attributes on country tokens.
|
labels and sets attributes on country tokens.
|
||||||
"""
|
"""
|
||||||
name = 'rest_countries' # component name, will show up in the pipeline
|
|
||||||
|
|
||||||
def __init__(self, nlp, label='GPE'):
|
name = "rest_countries" # component name, will show up in the pipeline
|
||||||
|
|
||||||
|
def __init__(self, nlp, label="GPE"):
|
||||||
"""Initialise the pipeline component. The shared nlp instance is used
|
"""Initialise the pipeline component. The shared nlp instance is used
|
||||||
to initialise the matcher with the shared vocab, get the label ID and
|
to initialise the matcher with the shared vocab, get the label ID and
|
||||||
generate Doc objects as phrase match patterns.
|
generate Doc objects as phrase match patterns.
|
||||||
"""
|
"""
|
||||||
# Make request once on initialisation and store the data
|
# Make request once on initialisation and store the data
|
||||||
r = requests.get('https://restcountries.eu/rest/v2/all')
|
r = requests.get("https://restcountries.eu/rest/v2/all")
|
||||||
r.raise_for_status() # make sure requests raises an error if it fails
|
r.raise_for_status() # make sure requests raises an error if it fails
|
||||||
countries = r.json()
|
countries = r.json()
|
||||||
|
|
||||||
# Convert API response to dict keyed by country name for easy lookup
|
# Convert API response to dict keyed by country name for easy lookup
|
||||||
# This could also be extended using the alternative and foreign language
|
# This could also be extended using the alternative and foreign language
|
||||||
# names provided by the API
|
# names provided by the API
|
||||||
self.countries = {c['name']: c for c in countries}
|
self.countries = {c["name"]: c for c in countries}
|
||||||
self.label = nlp.vocab.strings[label] # get entity label ID
|
self.label = nlp.vocab.strings[label] # get entity label ID
|
||||||
|
|
||||||
# Set up the PhraseMatcher with Doc patterns for each country name
|
# Set up the PhraseMatcher with Doc patterns for each country name
|
||||||
patterns = [nlp(c) for c in self.countries.keys()]
|
patterns = [nlp(c) for c in self.countries.keys()]
|
||||||
self.matcher = PhraseMatcher(nlp.vocab)
|
self.matcher = PhraseMatcher(nlp.vocab)
|
||||||
self.matcher.add('COUNTRIES', None, *patterns)
|
self.matcher.add("COUNTRIES", None, *patterns)
|
||||||
|
|
||||||
# Register attribute on the Token. We'll be overwriting this based on
|
# Register attribute on the Token. We'll be overwriting this based on
|
||||||
# the matches, so we're only setting a default value, not a getter.
|
# the matches, so we're only setting a default value, not a getter.
|
||||||
# If no default value is set, it defaults to None.
|
# If no default value is set, it defaults to None.
|
||||||
Token.set_extension('is_country', default=False)
|
Token.set_extension("is_country", default=False)
|
||||||
Token.set_extension('country_capital', default=False)
|
Token.set_extension("country_capital", default=False)
|
||||||
Token.set_extension('country_latlng', default=False)
|
Token.set_extension("country_latlng", default=False)
|
||||||
Token.set_extension('country_flag', default=False)
|
Token.set_extension("country_flag", default=False)
|
||||||
|
|
||||||
# Register attributes on Doc and Span via a getter that checks if one of
|
# Register attributes on Doc and Span via a getter that checks if one of
|
||||||
# the contained tokens is set to is_country == True.
|
# the contained tokens is set to is_country == True.
|
||||||
Doc.set_extension('has_country', getter=self.has_country)
|
Doc.set_extension("has_country", getter=self.has_country)
|
||||||
Span.set_extension('has_country', getter=self.has_country)
|
Span.set_extension("has_country", getter=self.has_country)
|
||||||
|
|
||||||
|
|
||||||
def __call__(self, doc):
|
def __call__(self, doc):
|
||||||
"""Apply the pipeline component on a Doc object and modify it if matches
|
"""Apply the pipeline component on a Doc object and modify it if matches
|
||||||
|
@ -93,10 +97,10 @@ class RESTCountriesComponent(object):
|
||||||
# Can be extended with other data returned by the API, like
|
# Can be extended with other data returned by the API, like
|
||||||
# currencies, country code, flag, calling code etc.
|
# currencies, country code, flag, calling code etc.
|
||||||
for token in entity:
|
for token in entity:
|
||||||
token._.set('is_country', True)
|
token._.set("is_country", True)
|
||||||
token._.set('country_capital', self.countries[entity.text]['capital'])
|
token._.set("country_capital", self.countries[entity.text]["capital"])
|
||||||
token._.set('country_latlng', self.countries[entity.text]['latlng'])
|
token._.set("country_latlng", self.countries[entity.text]["latlng"])
|
||||||
token._.set('country_flag', self.countries[entity.text]['flag'])
|
token._.set("country_flag", self.countries[entity.text]["flag"])
|
||||||
# Overwrite doc.ents and add entity – be careful not to replace!
|
# Overwrite doc.ents and add entity – be careful not to replace!
|
||||||
doc.ents = list(doc.ents) + [entity]
|
doc.ents = list(doc.ents) + [entity]
|
||||||
for span in spans:
|
for span in spans:
|
||||||
|
@ -111,10 +115,10 @@ class RESTCountriesComponent(object):
|
||||||
is a country. Since the getter is only called when we access the
|
is a country. Since the getter is only called when we access the
|
||||||
attribute, we can refer to the Token's 'is_country' attribute here,
|
attribute, we can refer to the Token's 'is_country' attribute here,
|
||||||
which is already set in the processing step."""
|
which is already set in the processing step."""
|
||||||
return any([t._.get('is_country') for t in tokens])
|
return any([t._.get("is_country") for t in tokens])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
plac.call(main)
|
plac.call(main)
|
||||||
|
|
||||||
# Expected output:
|
# Expected output:
|
||||||
|
|
|
@ -20,23 +20,24 @@ from spacy.tokens import Doc, Span, Token
|
||||||
|
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
text=("Text to process", "positional", None, str),
|
text=("Text to process", "positional", None, str),
|
||||||
companies=("Names of technology companies", "positional", None, str))
|
companies=("Names of technology companies", "positional", None, str),
|
||||||
|
)
|
||||||
def main(text="Alphabet Inc. is the company behind Google.", *companies):
|
def main(text="Alphabet Inc. is the company behind Google.", *companies):
|
||||||
# For simplicity, we start off with only the blank English Language class
|
# For simplicity, we start off with only the blank English Language class
|
||||||
# and no model or pre-defined pipeline loaded.
|
# and no model or pre-defined pipeline loaded.
|
||||||
nlp = English()
|
nlp = English()
|
||||||
if not companies: # set default companies if none are set via args
|
if not companies: # set default companies if none are set via args
|
||||||
companies = ['Alphabet Inc.', 'Google', 'Netflix', 'Apple'] # etc.
|
companies = ["Alphabet Inc.", "Google", "Netflix", "Apple"] # etc.
|
||||||
component = TechCompanyRecognizer(nlp, companies) # initialise component
|
component = TechCompanyRecognizer(nlp, companies) # initialise component
|
||||||
nlp.add_pipe(component, last=True) # add last to the pipeline
|
nlp.add_pipe(component, last=True) # add last to the pipeline
|
||||||
|
|
||||||
doc = nlp(text)
|
doc = nlp(text)
|
||||||
print('Pipeline', nlp.pipe_names) # pipeline contains component name
|
print("Pipeline", nlp.pipe_names) # pipeline contains component name
|
||||||
print('Tokens', [t.text for t in doc]) # company names from the list are merged
|
print("Tokens", [t.text for t in doc]) # company names from the list are merged
|
||||||
print('Doc has_tech_org', doc._.has_tech_org) # Doc contains tech orgs
|
print("Doc has_tech_org", doc._.has_tech_org) # Doc contains tech orgs
|
||||||
print('Token 0 is_tech_org', doc[0]._.is_tech_org) # "Alphabet Inc." is a tech org
|
print("Token 0 is_tech_org", doc[0]._.is_tech_org) # "Alphabet Inc." is a tech org
|
||||||
print('Token 1 is_tech_org', doc[1]._.is_tech_org) # "is" is not
|
print("Token 1 is_tech_org", doc[1]._.is_tech_org) # "is" is not
|
||||||
print('Entities', [(e.text, e.label_) for e in doc.ents]) # all orgs are entities
|
print("Entities", [(e.text, e.label_) for e in doc.ents]) # all orgs are entities
|
||||||
|
|
||||||
|
|
||||||
class TechCompanyRecognizer(object):
|
class TechCompanyRecognizer(object):
|
||||||
|
@ -45,9 +46,10 @@ class TechCompanyRecognizer(object):
|
||||||
labelled as ORG and their spans are merged into one token. Additionally,
|
labelled as ORG and their spans are merged into one token. Additionally,
|
||||||
._.has_tech_org and ._.is_tech_org is set on the Doc/Span and Token
|
._.has_tech_org and ._.is_tech_org is set on the Doc/Span and Token
|
||||||
respectively."""
|
respectively."""
|
||||||
name = 'tech_companies' # component name, will show up in the pipeline
|
|
||||||
|
|
||||||
def __init__(self, nlp, companies=tuple(), label='ORG'):
|
name = "tech_companies" # component name, will show up in the pipeline
|
||||||
|
|
||||||
|
def __init__(self, nlp, companies=tuple(), label="ORG"):
|
||||||
"""Initialise the pipeline component. The shared nlp instance is used
|
"""Initialise the pipeline component. The shared nlp instance is used
|
||||||
to initialise the matcher with the shared vocab, get the label ID and
|
to initialise the matcher with the shared vocab, get the label ID and
|
||||||
generate Doc objects as phrase match patterns.
|
generate Doc objects as phrase match patterns.
|
||||||
|
@ -58,16 +60,16 @@ class TechCompanyRecognizer(object):
|
||||||
# so even if the list of companies is long, it's very efficient
|
# so even if the list of companies is long, it's very efficient
|
||||||
patterns = [nlp(org) for org in companies]
|
patterns = [nlp(org) for org in companies]
|
||||||
self.matcher = PhraseMatcher(nlp.vocab)
|
self.matcher = PhraseMatcher(nlp.vocab)
|
||||||
self.matcher.add('TECH_ORGS', None, *patterns)
|
self.matcher.add("TECH_ORGS", None, *patterns)
|
||||||
|
|
||||||
# Register attribute on the Token. We'll be overwriting this based on
|
# Register attribute on the Token. We'll be overwriting this based on
|
||||||
# the matches, so we're only setting a default value, not a getter.
|
# the matches, so we're only setting a default value, not a getter.
|
||||||
Token.set_extension('is_tech_org', default=False)
|
Token.set_extension("is_tech_org", default=False)
|
||||||
|
|
||||||
# Register attributes on Doc and Span via a getter that checks if one of
|
# Register attributes on Doc and Span via a getter that checks if one of
|
||||||
# the contained tokens is set to is_tech_org == True.
|
# the contained tokens is set to is_tech_org == True.
|
||||||
Doc.set_extension('has_tech_org', getter=self.has_tech_org)
|
Doc.set_extension("has_tech_org", getter=self.has_tech_org)
|
||||||
Span.set_extension('has_tech_org', getter=self.has_tech_org)
|
Span.set_extension("has_tech_org", getter=self.has_tech_org)
|
||||||
|
|
||||||
def __call__(self, doc):
|
def __call__(self, doc):
|
||||||
"""Apply the pipeline component on a Doc object and modify it if matches
|
"""Apply the pipeline component on a Doc object and modify it if matches
|
||||||
|
@ -82,7 +84,7 @@ class TechCompanyRecognizer(object):
|
||||||
spans.append(entity)
|
spans.append(entity)
|
||||||
# Set custom attribute on each token of the entity
|
# Set custom attribute on each token of the entity
|
||||||
for token in entity:
|
for token in entity:
|
||||||
token._.set('is_tech_org', True)
|
token._.set("is_tech_org", True)
|
||||||
# Overwrite doc.ents and add entity – be careful not to replace!
|
# Overwrite doc.ents and add entity – be careful not to replace!
|
||||||
doc.ents = list(doc.ents) + [entity]
|
doc.ents = list(doc.ents) + [entity]
|
||||||
for span in spans:
|
for span in spans:
|
||||||
|
@ -97,10 +99,10 @@ class TechCompanyRecognizer(object):
|
||||||
is a tech org. Since the getter is only called when we access the
|
is a tech org. Since the getter is only called when we access the
|
||||||
attribute, we can refer to the Token's 'is_tech_org' attribute here,
|
attribute, we can refer to the Token's 'is_tech_org' attribute here,
|
||||||
which is already set in the processing step."""
|
which is already set in the processing step."""
|
||||||
return any([t._.get('is_tech_org') for t in tokens])
|
return any([t._.get("is_tech_org") for t in tokens])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
plac.call(main)
|
plac.call(main)
|
||||||
|
|
||||||
# Expected output:
|
# Expected output:
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
'''Example of adding a pipeline component to prohibit sentence boundaries
|
"""Example of adding a pipeline component to prohibit sentence boundaries
|
||||||
before certain tokens.
|
before certain tokens.
|
||||||
|
|
||||||
What we do is write to the token.is_sent_start attribute, which
|
What we do is write to the token.is_sent_start attribute, which
|
||||||
|
@ -10,16 +10,18 @@ should also improve the parse quality.
|
||||||
The specific example here is drawn from https://github.com/explosion/spaCy/issues/2627
|
The specific example here is drawn from https://github.com/explosion/spaCy/issues/2627
|
||||||
Other versions of the model may not make the original mistake, so the specific
|
Other versions of the model may not make the original mistake, so the specific
|
||||||
example might not be apt for future versions.
|
example might not be apt for future versions.
|
||||||
'''
|
"""
|
||||||
import plac
|
import plac
|
||||||
import spacy
|
import spacy
|
||||||
|
|
||||||
|
|
||||||
def prevent_sentence_boundaries(doc):
|
def prevent_sentence_boundaries(doc):
|
||||||
for token in doc:
|
for token in doc:
|
||||||
if not can_be_sentence_start(token):
|
if not can_be_sentence_start(token):
|
||||||
token.is_sent_start = False
|
token.is_sent_start = False
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
|
|
||||||
def can_be_sentence_start(token):
|
def can_be_sentence_start(token):
|
||||||
if token.i == 0:
|
if token.i == 0:
|
||||||
return True
|
return True
|
||||||
|
@ -32,17 +34,18 @@ def can_be_sentence_start(token):
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
nlp = spacy.load('en_core_web_lg')
|
nlp = spacy.load("en_core_web_lg")
|
||||||
raw_text = "Been here and I'm loving it."
|
raw_text = "Been here and I'm loving it."
|
||||||
doc = nlp(raw_text)
|
doc = nlp(raw_text)
|
||||||
sentences = [sent.string.strip() for sent in doc.sents]
|
sentences = [sent.string.strip() for sent in doc.sents]
|
||||||
print(sentences)
|
print(sentences)
|
||||||
nlp.add_pipe(prevent_sentence_boundaries, before='parser')
|
nlp.add_pipe(prevent_sentence_boundaries, before="parser")
|
||||||
doc = nlp(raw_text)
|
doc = nlp(raw_text)
|
||||||
sentences = [sent.string.strip() for sent in doc.sents]
|
sentences = [sent.string.strip() for sent in doc.sents]
|
||||||
print(sentences)
|
print(sentences)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
plac.call(main)
|
plac.call(main)
|
||||||
|
|
30
examples/pipeline/fix_space_entities.py
Normal file
30
examples/pipeline/fix_space_entities.py
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
"""Demonstrate adding a rule-based component that forces some tokens to not
|
||||||
|
be entities, before the NER tagger is applied. This is used to hotfix the issue
|
||||||
|
in https://github.com/explosion/spaCy/issues/2870 , present as of spaCy v2.0.16.
|
||||||
|
"""
|
||||||
|
import spacy
|
||||||
|
from spacy.attrs import ENT_IOB
|
||||||
|
|
||||||
|
|
||||||
|
def fix_space_tags(doc):
|
||||||
|
ent_iobs = doc.to_array([ENT_IOB])
|
||||||
|
for i, token in enumerate(doc):
|
||||||
|
if token.is_space:
|
||||||
|
# Sets 'O' tag (0 is None, so I is 1, O is 2)
|
||||||
|
ent_iobs[i] = 2
|
||||||
|
doc.from_array([ENT_IOB], ent_iobs.reshape((len(doc), 1)))
|
||||||
|
return doc
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
nlp = spacy.load("en_core_web_sm")
|
||||||
|
text = u"""This is some crazy test where I dont need an Apple Watch to make things bug"""
|
||||||
|
doc = nlp(text)
|
||||||
|
print("Before", doc.ents)
|
||||||
|
nlp.add_pipe(fix_space_tags, name="fix-ner", before="ner")
|
||||||
|
doc = nlp(text)
|
||||||
|
print("After", doc.ents)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
|
@ -9,12 +9,14 @@ built-in dataset loader.
|
||||||
Compatible with: spaCy v2.0.0+
|
Compatible with: spaCy v2.0.0+
|
||||||
"""
|
"""
|
||||||
from __future__ import print_function, unicode_literals
|
from __future__ import print_function, unicode_literals
|
||||||
from toolz import partition_all
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from joblib import Parallel, delayed
|
from joblib import Parallel, delayed
|
||||||
|
from functools import partial
|
||||||
import thinc.extra.datasets
|
import thinc.extra.datasets
|
||||||
import plac
|
import plac
|
||||||
import spacy
|
import spacy
|
||||||
|
from spacy.util import minibatch
|
||||||
|
|
||||||
|
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
|
@ -22,9 +24,9 @@ import spacy
|
||||||
model=("Model name (needs tagger)", "positional", None, str),
|
model=("Model name (needs tagger)", "positional", None, str),
|
||||||
n_jobs=("Number of workers", "option", "n", int),
|
n_jobs=("Number of workers", "option", "n", int),
|
||||||
batch_size=("Batch-size for each process", "option", "b", int),
|
batch_size=("Batch-size for each process", "option", "b", int),
|
||||||
limit=("Limit of entries from the dataset", "option", "l", int))
|
limit=("Limit of entries from the dataset", "option", "l", int),
|
||||||
def main(output_dir, model='en_core_web_sm', n_jobs=4, batch_size=1000,
|
)
|
||||||
limit=10000):
|
def main(output_dir, model="en_core_web_sm", n_jobs=4, batch_size=1000, limit=10000):
|
||||||
nlp = spacy.load(model) # load spaCy model
|
nlp = spacy.load(model) # load spaCy model
|
||||||
print("Loaded model '%s'" % model)
|
print("Loaded model '%s'" % model)
|
||||||
if not output_dir.exists():
|
if not output_dir.exists():
|
||||||
|
@ -34,45 +36,47 @@ def main(output_dir, model='en_core_web_sm', n_jobs=4, batch_size=1000,
|
||||||
data, _ = thinc.extra.datasets.imdb()
|
data, _ = thinc.extra.datasets.imdb()
|
||||||
texts, _ = zip(*data[-limit:])
|
texts, _ = zip(*data[-limit:])
|
||||||
print("Processing texts...")
|
print("Processing texts...")
|
||||||
partitions = partition_all(batch_size, texts)
|
partitions = minibatch(texts, size=batch_size)
|
||||||
executor = Parallel(n_jobs=n_jobs)
|
executor = Parallel(n_jobs=n_jobs, backend="multiprocessing", prefer="processes")
|
||||||
do = delayed(transform_texts)
|
do = delayed(partial(transform_texts, nlp))
|
||||||
tasks = (do(nlp, i, batch, output_dir)
|
tasks = (do(i, batch, output_dir) for i, batch in enumerate(partitions))
|
||||||
for i, batch in enumerate(partitions))
|
|
||||||
executor(tasks)
|
executor(tasks)
|
||||||
|
|
||||||
|
|
||||||
def transform_texts(nlp, batch_id, texts, output_dir):
|
def transform_texts(nlp, batch_id, texts, output_dir):
|
||||||
print(nlp.pipe_names)
|
print(nlp.pipe_names)
|
||||||
out_path = Path(output_dir) / ('%d.txt' % batch_id)
|
out_path = Path(output_dir) / ("%d.txt" % batch_id)
|
||||||
if out_path.exists(): # return None in case same batch is called again
|
if out_path.exists(): # return None in case same batch is called again
|
||||||
return None
|
return None
|
||||||
print('Processing batch', batch_id)
|
print("Processing batch", batch_id)
|
||||||
with out_path.open('w', encoding='utf8') as f:
|
with out_path.open("w", encoding="utf8") as f:
|
||||||
for doc in nlp.pipe(texts):
|
for doc in nlp.pipe(texts):
|
||||||
f.write(' '.join(represent_word(w) for w in doc if not w.is_space))
|
f.write(" ".join(represent_word(w) for w in doc if not w.is_space))
|
||||||
f.write('\n')
|
f.write("\n")
|
||||||
print('Saved {} texts to {}.txt'.format(len(texts), batch_id))
|
print("Saved {} texts to {}.txt".format(len(texts), batch_id))
|
||||||
|
|
||||||
|
|
||||||
def represent_word(word):
|
def represent_word(word):
|
||||||
text = word.text
|
text = word.text
|
||||||
# True-case, i.e. try to normalize sentence-initial capitals.
|
# True-case, i.e. try to normalize sentence-initial capitals.
|
||||||
# Only do this if the lower-cased form is more probable.
|
# Only do this if the lower-cased form is more probable.
|
||||||
if text.istitle() and is_sent_begin(word) \
|
if (
|
||||||
and word.prob < word.doc.vocab[text.lower()].prob:
|
text.istitle()
|
||||||
|
and is_sent_begin(word)
|
||||||
|
and word.prob < word.doc.vocab[text.lower()].prob
|
||||||
|
):
|
||||||
text = text.lower()
|
text = text.lower()
|
||||||
return text + '|' + word.tag_
|
return text + "|" + word.tag_
|
||||||
|
|
||||||
|
|
||||||
def is_sent_begin(word):
|
def is_sent_begin(word):
|
||||||
if word.i == 0:
|
if word.i == 0:
|
||||||
return True
|
return True
|
||||||
elif word.i >= 2 and word.nbor(-1).text in ('.', '!', '?', '...'):
|
elif word.i >= 2 and word.nbor(-1).text in (".", "!", "?", "..."):
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
plac.call(main)
|
plac.call(main)
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
'''Train for CONLL 2017 UD treebank evaluation. Takes .conllu files, writes
|
"""Train for CONLL 2017 UD treebank evaluation. Takes .conllu files, writes
|
||||||
.conllu format for development data, allowing the official scorer to be used.
|
.conllu format for development data, allowing the official scorer to be used.
|
||||||
'''
|
"""
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
import plac
|
import plac
|
||||||
import tqdm
|
import tqdm
|
||||||
|
@ -22,7 +22,6 @@ from spacy.matcher import Matcher
|
||||||
import itertools
|
import itertools
|
||||||
import random
|
import random
|
||||||
import numpy.random
|
import numpy.random
|
||||||
import cytoolz
|
|
||||||
|
|
||||||
import conll17_ud_eval
|
import conll17_ud_eval
|
||||||
|
|
||||||
|
@ -35,6 +34,7 @@ spacy.lang.ja.Japanese.Defaults.use_janome = False
|
||||||
random.seed(0)
|
random.seed(0)
|
||||||
numpy.random.seed(0)
|
numpy.random.seed(0)
|
||||||
|
|
||||||
|
|
||||||
def minibatch_by_words(items, size=5000):
|
def minibatch_by_words(items, size=5000):
|
||||||
random.shuffle(items)
|
random.shuffle(items)
|
||||||
if isinstance(size, int):
|
if isinstance(size, int):
|
||||||
|
@ -59,21 +59,31 @@ def minibatch_by_words(items, size=5000):
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
################
|
################
|
||||||
# Data reading #
|
# Data reading #
|
||||||
################
|
################
|
||||||
|
|
||||||
space_re = re.compile('\s+')
|
space_re = re.compile("\s+")
|
||||||
def split_text(text):
|
|
||||||
return [space_re.sub(' ', par.strip()) for par in text.split('\n\n')]
|
|
||||||
|
|
||||||
|
|
||||||
def read_data(nlp, conllu_file, text_file, raw_text=True, oracle_segments=False,
|
|
||||||
max_doc_length=None, limit=None):
|
def split_text(text):
|
||||||
'''Read the CONLLU format into (Doc, GoldParse) tuples. If raw_text=True,
|
return [space_re.sub(" ", par.strip()) for par in text.split("\n\n")]
|
||||||
|
|
||||||
|
|
||||||
|
def read_data(
|
||||||
|
nlp,
|
||||||
|
conllu_file,
|
||||||
|
text_file,
|
||||||
|
raw_text=True,
|
||||||
|
oracle_segments=False,
|
||||||
|
max_doc_length=None,
|
||||||
|
limit=None,
|
||||||
|
):
|
||||||
|
"""Read the CONLLU format into (Doc, GoldParse) tuples. If raw_text=True,
|
||||||
include Doc objects created using nlp.make_doc and then aligned against
|
include Doc objects created using nlp.make_doc and then aligned against
|
||||||
the gold-standard sequences. If oracle_segments=True, include Doc objects
|
the gold-standard sequences. If oracle_segments=True, include Doc objects
|
||||||
created from the gold-standard segments. At least one must be True.'''
|
created from the gold-standard segments. At least one must be True."""
|
||||||
if not raw_text and not oracle_segments:
|
if not raw_text and not oracle_segments:
|
||||||
raise ValueError("At least one of raw_text or oracle_segments must be True")
|
raise ValueError("At least one of raw_text or oracle_segments must be True")
|
||||||
paragraphs = split_text(text_file.read())
|
paragraphs = split_text(text_file.read())
|
||||||
|
@ -87,22 +97,21 @@ def read_data(nlp, conllu_file, text_file, raw_text=True, oracle_segments=False,
|
||||||
for cs in cd:
|
for cs in cd:
|
||||||
sent = defaultdict(list)
|
sent = defaultdict(list)
|
||||||
for id_, word, lemma, pos, tag, morph, head, dep, _, space_after in cs:
|
for id_, word, lemma, pos, tag, morph, head, dep, _, space_after in cs:
|
||||||
if '.' in id_:
|
if "." in id_:
|
||||||
continue
|
continue
|
||||||
if '-' in id_:
|
if "-" in id_:
|
||||||
continue
|
continue
|
||||||
id_ = int(id_)-1
|
id_ = int(id_) - 1
|
||||||
head = int(head)-1 if head != '0' else id_
|
head = int(head) - 1 if head != "0" else id_
|
||||||
sent['words'].append(word)
|
sent["words"].append(word)
|
||||||
sent['tags'].append(tag)
|
sent["tags"].append(tag)
|
||||||
sent['heads'].append(head)
|
sent["heads"].append(head)
|
||||||
sent['deps'].append('ROOT' if dep == 'root' else dep)
|
sent["deps"].append("ROOT" if dep == "root" else dep)
|
||||||
sent['spaces'].append(space_after == '_')
|
sent["spaces"].append(space_after == "_")
|
||||||
sent['entities'] = ['-'] * len(sent['words'])
|
sent["entities"] = ["-"] * len(sent["words"])
|
||||||
sent['heads'], sent['deps'] = projectivize(sent['heads'],
|
sent["heads"], sent["deps"] = projectivize(sent["heads"], sent["deps"])
|
||||||
sent['deps'])
|
|
||||||
if oracle_segments:
|
if oracle_segments:
|
||||||
docs.append(Doc(nlp.vocab, words=sent['words'], spaces=sent['spaces']))
|
docs.append(Doc(nlp.vocab, words=sent["words"], spaces=sent["spaces"]))
|
||||||
golds.append(GoldParse(docs[-1], **sent))
|
golds.append(GoldParse(docs[-1], **sent))
|
||||||
|
|
||||||
sent_annots.append(sent)
|
sent_annots.append(sent)
|
||||||
|
@ -128,18 +137,18 @@ def read_conllu(file_):
|
||||||
sent = []
|
sent = []
|
||||||
doc = []
|
doc = []
|
||||||
for line in file_:
|
for line in file_:
|
||||||
if line.startswith('# newdoc'):
|
if line.startswith("# newdoc"):
|
||||||
if doc:
|
if doc:
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
doc = []
|
doc = []
|
||||||
elif line.startswith('#'):
|
elif line.startswith("#"):
|
||||||
continue
|
continue
|
||||||
elif not line.strip():
|
elif not line.strip():
|
||||||
if sent:
|
if sent:
|
||||||
doc.append(sent)
|
doc.append(sent)
|
||||||
sent = []
|
sent = []
|
||||||
else:
|
else:
|
||||||
sent.append(list(line.strip().split('\t')))
|
sent.append(list(line.strip().split("\t")))
|
||||||
if len(sent[-1]) != 10:
|
if len(sent[-1]) != 10:
|
||||||
print(repr(line))
|
print(repr(line))
|
||||||
raise ValueError
|
raise ValueError
|
||||||
|
@ -154,25 +163,29 @@ def _make_gold(nlp, text, sent_annots):
|
||||||
# Flatten the conll annotations, and adjust the head indices
|
# Flatten the conll annotations, and adjust the head indices
|
||||||
flat = defaultdict(list)
|
flat = defaultdict(list)
|
||||||
for sent in sent_annots:
|
for sent in sent_annots:
|
||||||
flat['heads'].extend(len(flat['words'])+head for head in sent['heads'])
|
flat["heads"].extend(len(flat["words"]) + head for head in sent["heads"])
|
||||||
for field in ['words', 'tags', 'deps', 'entities', 'spaces']:
|
for field in ["words", "tags", "deps", "entities", "spaces"]:
|
||||||
flat[field].extend(sent[field])
|
flat[field].extend(sent[field])
|
||||||
# Construct text if necessary
|
# Construct text if necessary
|
||||||
assert len(flat['words']) == len(flat['spaces'])
|
assert len(flat["words"]) == len(flat["spaces"])
|
||||||
if text is None:
|
if text is None:
|
||||||
text = ''.join(word+' '*space for word, space in zip(flat['words'], flat['spaces']))
|
text = "".join(
|
||||||
|
word + " " * space for word, space in zip(flat["words"], flat["spaces"])
|
||||||
|
)
|
||||||
doc = nlp.make_doc(text)
|
doc = nlp.make_doc(text)
|
||||||
flat.pop('spaces')
|
flat.pop("spaces")
|
||||||
gold = GoldParse(doc, **flat)
|
gold = GoldParse(doc, **flat)
|
||||||
return doc, gold
|
return doc, gold
|
||||||
|
|
||||||
|
|
||||||
#############################
|
#############################
|
||||||
# Data transforms for spaCy #
|
# Data transforms for spaCy #
|
||||||
#############################
|
#############################
|
||||||
|
|
||||||
|
|
||||||
def golds_to_gold_tuples(docs, golds):
|
def golds_to_gold_tuples(docs, golds):
|
||||||
'''Get out the annoying 'tuples' format used by begin_training, given the
|
"""Get out the annoying 'tuples' format used by begin_training, given the
|
||||||
GoldParse objects.'''
|
GoldParse objects."""
|
||||||
tuples = []
|
tuples = []
|
||||||
for doc, gold in zip(docs, golds):
|
for doc, gold in zip(docs, golds):
|
||||||
text = doc.text
|
text = doc.text
|
||||||
|
@ -186,15 +199,16 @@ def golds_to_gold_tuples(docs, golds):
|
||||||
# Evaluation #
|
# Evaluation #
|
||||||
##############
|
##############
|
||||||
|
|
||||||
|
|
||||||
def evaluate(nlp, text_loc, gold_loc, sys_loc, limit=None):
|
def evaluate(nlp, text_loc, gold_loc, sys_loc, limit=None):
|
||||||
with text_loc.open('r', encoding='utf8') as text_file:
|
with text_loc.open("r", encoding="utf8") as text_file:
|
||||||
texts = split_text(text_file.read())
|
texts = split_text(text_file.read())
|
||||||
docs = list(nlp.pipe(texts))
|
docs = list(nlp.pipe(texts))
|
||||||
with sys_loc.open('w', encoding='utf8') as out_file:
|
with sys_loc.open("w", encoding="utf8") as out_file:
|
||||||
write_conllu(docs, out_file)
|
write_conllu(docs, out_file)
|
||||||
with gold_loc.open('r', encoding='utf8') as gold_file:
|
with gold_loc.open("r", encoding="utf8") as gold_file:
|
||||||
gold_ud = conll17_ud_eval.load_conllu(gold_file)
|
gold_ud = conll17_ud_eval.load_conllu(gold_file)
|
||||||
with sys_loc.open('r', encoding='utf8') as sys_file:
|
with sys_loc.open("r", encoding="utf8") as sys_file:
|
||||||
sys_ud = conll17_ud_eval.load_conllu(sys_file)
|
sys_ud = conll17_ud_eval.load_conllu(sys_file)
|
||||||
scores = conll17_ud_eval.evaluate(gold_ud, sys_ud)
|
scores = conll17_ud_eval.evaluate(gold_ud, sys_ud)
|
||||||
return scores
|
return scores
|
||||||
|
@ -202,10 +216,10 @@ def evaluate(nlp, text_loc, gold_loc, sys_loc, limit=None):
|
||||||
|
|
||||||
def write_conllu(docs, file_):
|
def write_conllu(docs, file_):
|
||||||
merger = Matcher(docs[0].vocab)
|
merger = Matcher(docs[0].vocab)
|
||||||
merger.add('SUBTOK', None, [{'DEP': 'subtok', 'op': '+'}])
|
merger.add("SUBTOK", None, [{"DEP": "subtok", "op": "+"}])
|
||||||
for i, doc in enumerate(docs):
|
for i, doc in enumerate(docs):
|
||||||
matches = merger(doc)
|
matches = merger(doc)
|
||||||
spans = [doc[start:end+1] for _, start, end in matches]
|
spans = [doc[start : end + 1] for _, start, end in matches]
|
||||||
offsets = [(span.start_char, span.end_char) for span in spans]
|
offsets = [(span.start_char, span.end_char) for span in spans]
|
||||||
for start_char, end_char in offsets:
|
for start_char, end_char in offsets:
|
||||||
doc.merge(start_char, end_char)
|
doc.merge(start_char, end_char)
|
||||||
|
@ -214,58 +228,73 @@ def write_conllu(docs, file_):
|
||||||
file_.write("# sent_id = {i}.{j}\n".format(i=i, j=j))
|
file_.write("# sent_id = {i}.{j}\n".format(i=i, j=j))
|
||||||
file_.write("# text = {text}\n".format(text=sent.text))
|
file_.write("# text = {text}\n".format(text=sent.text))
|
||||||
for k, token in enumerate(sent):
|
for k, token in enumerate(sent):
|
||||||
file_.write(token._.get_conllu_lines(k) + '\n')
|
file_.write(token._.get_conllu_lines(k) + "\n")
|
||||||
file_.write('\n')
|
file_.write("\n")
|
||||||
|
|
||||||
|
|
||||||
def print_progress(itn, losses, ud_scores):
|
def print_progress(itn, losses, ud_scores):
|
||||||
fields = {
|
fields = {
|
||||||
'dep_loss': losses.get('parser', 0.0),
|
"dep_loss": losses.get("parser", 0.0),
|
||||||
'tag_loss': losses.get('tagger', 0.0),
|
"tag_loss": losses.get("tagger", 0.0),
|
||||||
'words': ud_scores['Words'].f1 * 100,
|
"words": ud_scores["Words"].f1 * 100,
|
||||||
'sents': ud_scores['Sentences'].f1 * 100,
|
"sents": ud_scores["Sentences"].f1 * 100,
|
||||||
'tags': ud_scores['XPOS'].f1 * 100,
|
"tags": ud_scores["XPOS"].f1 * 100,
|
||||||
'uas': ud_scores['UAS'].f1 * 100,
|
"uas": ud_scores["UAS"].f1 * 100,
|
||||||
'las': ud_scores['LAS'].f1 * 100,
|
"las": ud_scores["LAS"].f1 * 100,
|
||||||
}
|
}
|
||||||
header = ['Epoch', 'Loss', 'LAS', 'UAS', 'TAG', 'SENT', 'WORD']
|
header = ["Epoch", "Loss", "LAS", "UAS", "TAG", "SENT", "WORD"]
|
||||||
if itn == 0:
|
if itn == 0:
|
||||||
print('\t'.join(header))
|
print("\t".join(header))
|
||||||
tpl = '\t'.join((
|
tpl = "\t".join(
|
||||||
'{:d}',
|
(
|
||||||
'{dep_loss:.1f}',
|
"{:d}",
|
||||||
'{las:.1f}',
|
"{dep_loss:.1f}",
|
||||||
'{uas:.1f}',
|
"{las:.1f}",
|
||||||
'{tags:.1f}',
|
"{uas:.1f}",
|
||||||
'{sents:.1f}',
|
"{tags:.1f}",
|
||||||
'{words:.1f}',
|
"{sents:.1f}",
|
||||||
))
|
"{words:.1f}",
|
||||||
|
)
|
||||||
|
)
|
||||||
print(tpl.format(itn, **fields))
|
print(tpl.format(itn, **fields))
|
||||||
|
|
||||||
#def get_sent_conllu(sent, sent_id):
|
|
||||||
|
# def get_sent_conllu(sent, sent_id):
|
||||||
# lines = ["# sent_id = {sent_id}".format(sent_id=sent_id)]
|
# lines = ["# sent_id = {sent_id}".format(sent_id=sent_id)]
|
||||||
|
|
||||||
|
|
||||||
def get_token_conllu(token, i):
|
def get_token_conllu(token, i):
|
||||||
if token._.begins_fused:
|
if token._.begins_fused:
|
||||||
n = 1
|
n = 1
|
||||||
while token.nbor(n)._.inside_fused:
|
while token.nbor(n)._.inside_fused:
|
||||||
n += 1
|
n += 1
|
||||||
id_ = '%d-%d' % (i, i+n)
|
id_ = "%d-%d" % (i, i + n)
|
||||||
lines = [id_, token.text, '_', '_', '_', '_', '_', '_', '_', '_']
|
lines = [id_, token.text, "_", "_", "_", "_", "_", "_", "_", "_"]
|
||||||
else:
|
else:
|
||||||
lines = []
|
lines = []
|
||||||
if token.head.i == token.i:
|
if token.head.i == token.i:
|
||||||
head = 0
|
head = 0
|
||||||
else:
|
else:
|
||||||
head = i + (token.head.i - token.i) + 1
|
head = i + (token.head.i - token.i) + 1
|
||||||
fields = [str(i+1), token.text, token.lemma_, token.pos_, token.tag_, '_',
|
fields = [
|
||||||
str(head), token.dep_.lower(), '_', '_']
|
str(i + 1),
|
||||||
lines.append('\t'.join(fields))
|
token.text,
|
||||||
return '\n'.join(lines)
|
token.lemma_,
|
||||||
|
token.pos_,
|
||||||
|
token.tag_,
|
||||||
|
"_",
|
||||||
|
str(head),
|
||||||
|
token.dep_.lower(),
|
||||||
|
"_",
|
||||||
|
"_",
|
||||||
|
]
|
||||||
|
lines.append("\t".join(fields))
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
Token.set_extension('get_conllu_lines', method=get_token_conllu)
|
|
||||||
Token.set_extension('begins_fused', default=False)
|
Token.set_extension("get_conllu_lines", method=get_token_conllu)
|
||||||
Token.set_extension('inside_fused', default=False)
|
Token.set_extension("begins_fused", default=False)
|
||||||
|
Token.set_extension("inside_fused", default=False)
|
||||||
|
|
||||||
|
|
||||||
##################
|
##################
|
||||||
|
@ -274,31 +303,32 @@ Token.set_extension('inside_fused', default=False)
|
||||||
|
|
||||||
|
|
||||||
def load_nlp(corpus, config):
|
def load_nlp(corpus, config):
|
||||||
lang = corpus.split('_')[0]
|
lang = corpus.split("_")[0]
|
||||||
nlp = spacy.blank(lang)
|
nlp = spacy.blank(lang)
|
||||||
if config.vectors:
|
if config.vectors:
|
||||||
nlp.vocab.from_disk(config.vectors / 'vocab')
|
nlp.vocab.from_disk(config.vectors / "vocab")
|
||||||
return nlp
|
return nlp
|
||||||
|
|
||||||
|
|
||||||
def initialize_pipeline(nlp, docs, golds, config):
|
def initialize_pipeline(nlp, docs, golds, config):
|
||||||
nlp.add_pipe(nlp.create_pipe('parser'))
|
nlp.add_pipe(nlp.create_pipe("parser"))
|
||||||
if config.multitask_tag:
|
if config.multitask_tag:
|
||||||
nlp.parser.add_multitask_objective('tag')
|
nlp.parser.add_multitask_objective("tag")
|
||||||
if config.multitask_sent:
|
if config.multitask_sent:
|
||||||
nlp.parser.add_multitask_objective('sent_start')
|
nlp.parser.add_multitask_objective("sent_start")
|
||||||
nlp.parser.moves.add_action(2, 'subtok')
|
nlp.parser.moves.add_action(2, "subtok")
|
||||||
nlp.add_pipe(nlp.create_pipe('tagger'))
|
nlp.add_pipe(nlp.create_pipe("tagger"))
|
||||||
for gold in golds:
|
for gold in golds:
|
||||||
for tag in gold.tags:
|
for tag in gold.tags:
|
||||||
if tag is not None:
|
if tag is not None:
|
||||||
nlp.tagger.add_label(tag)
|
nlp.tagger.add_label(tag)
|
||||||
# Replace labels that didn't make the frequency cutoff
|
# Replace labels that didn't make the frequency cutoff
|
||||||
actions = set(nlp.parser.labels)
|
actions = set(nlp.parser.labels)
|
||||||
label_set = set([act.split('-')[1] for act in actions if '-' in act])
|
label_set = set([act.split("-")[1] for act in actions if "-" in act])
|
||||||
for gold in golds:
|
for gold in golds:
|
||||||
for i, label in enumerate(gold.labels):
|
for i, label in enumerate(gold.labels):
|
||||||
if label is not None and label not in label_set:
|
if label is not None and label not in label_set:
|
||||||
gold.labels[i] = label.split('||')[0]
|
gold.labels[i] = label.split("||")[0]
|
||||||
return nlp.begin_training(lambda: golds_to_gold_tuples(docs, golds))
|
return nlp.begin_training(lambda: golds_to_gold_tuples(docs, golds))
|
||||||
|
|
||||||
|
|
||||||
|
@ -306,6 +336,7 @@ def initialize_pipeline(nlp, docs, golds, config):
|
||||||
# Command line helpers #
|
# Command line helpers #
|
||||||
########################
|
########################
|
||||||
|
|
||||||
|
|
||||||
@attr.s
|
@attr.s
|
||||||
class Config(object):
|
class Config(object):
|
||||||
vectors = attr.ib(default=None)
|
vectors = attr.ib(default=None)
|
||||||
|
@ -318,7 +349,7 @@ class Config(object):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def load(cls, loc):
|
def load(cls, loc):
|
||||||
with Path(loc).open('r', encoding='utf8') as file_:
|
with Path(loc).open("r", encoding="utf8") as file_:
|
||||||
cfg = json.load(file_)
|
cfg = json.load(file_)
|
||||||
return cls(**cfg)
|
return cls(**cfg)
|
||||||
|
|
||||||
|
@ -331,32 +362,36 @@ class Dataset(object):
|
||||||
self.text = None
|
self.text = None
|
||||||
for file_path in self.path.iterdir():
|
for file_path in self.path.iterdir():
|
||||||
name = file_path.parts[-1]
|
name = file_path.parts[-1]
|
||||||
if section in name and name.endswith('conllu'):
|
if section in name and name.endswith("conllu"):
|
||||||
self.conllu = file_path
|
self.conllu = file_path
|
||||||
elif section in name and name.endswith('txt'):
|
elif section in name and name.endswith("txt"):
|
||||||
self.text = file_path
|
self.text = file_path
|
||||||
if self.conllu is None:
|
if self.conllu is None:
|
||||||
msg = "Could not find .txt file in {path} for {section}"
|
msg = "Could not find .txt file in {path} for {section}"
|
||||||
raise IOError(msg.format(section=section, path=path))
|
raise IOError(msg.format(section=section, path=path))
|
||||||
if self.text is None:
|
if self.text is None:
|
||||||
msg = "Could not find .txt file in {path} for {section}"
|
msg = "Could not find .txt file in {path} for {section}"
|
||||||
self.lang = self.conllu.parts[-1].split('-')[0].split('_')[0]
|
self.lang = self.conllu.parts[-1].split("-")[0].split("_")[0]
|
||||||
|
|
||||||
|
|
||||||
class TreebankPaths(object):
|
class TreebankPaths(object):
|
||||||
def __init__(self, ud_path, treebank, **cfg):
|
def __init__(self, ud_path, treebank, **cfg):
|
||||||
self.train = Dataset(ud_path / treebank, 'train')
|
self.train = Dataset(ud_path / treebank, "train")
|
||||||
self.dev = Dataset(ud_path / treebank, 'dev')
|
self.dev = Dataset(ud_path / treebank, "dev")
|
||||||
self.lang = self.train.lang
|
self.lang = self.train.lang
|
||||||
|
|
||||||
|
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
ud_dir=("Path to Universal Dependencies corpus", "positional", None, Path),
|
ud_dir=("Path to Universal Dependencies corpus", "positional", None, Path),
|
||||||
corpus=("UD corpus to train and evaluate on, e.g. en, es_ancora, etc",
|
corpus=(
|
||||||
"positional", None, str),
|
"UD corpus to train and evaluate on, e.g. en, es_ancora, etc",
|
||||||
|
"positional",
|
||||||
|
None,
|
||||||
|
str,
|
||||||
|
),
|
||||||
parses_dir=("Directory to write the development parses", "positional", None, Path),
|
parses_dir=("Directory to write the development parses", "positional", None, Path),
|
||||||
config=("Path to json formatted config file", "positional", None, Config.load),
|
config=("Path to json formatted config file", "positional", None, Config.load),
|
||||||
limit=("Size limit", "option", "n", int)
|
limit=("Size limit", "option", "n", int),
|
||||||
)
|
)
|
||||||
def main(ud_dir, parses_dir, config, corpus, limit=0):
|
def main(ud_dir, parses_dir, config, corpus, limit=0):
|
||||||
paths = TreebankPaths(ud_dir, corpus)
|
paths = TreebankPaths(ud_dir, corpus)
|
||||||
|
@ -365,8 +400,13 @@ def main(ud_dir, parses_dir, config, corpus, limit=0):
|
||||||
print("Train and evaluate", corpus, "using lang", paths.lang)
|
print("Train and evaluate", corpus, "using lang", paths.lang)
|
||||||
nlp = load_nlp(paths.lang, config)
|
nlp = load_nlp(paths.lang, config)
|
||||||
|
|
||||||
docs, golds = read_data(nlp, paths.train.conllu.open(), paths.train.text.open(),
|
docs, golds = read_data(
|
||||||
max_doc_length=config.max_doc_length, limit=limit)
|
nlp,
|
||||||
|
paths.train.conllu.open(),
|
||||||
|
paths.train.text.open(),
|
||||||
|
max_doc_length=config.max_doc_length,
|
||||||
|
limit=limit,
|
||||||
|
)
|
||||||
|
|
||||||
optimizer = initialize_pipeline(nlp, docs, golds, config)
|
optimizer = initialize_pipeline(nlp, docs, golds, config)
|
||||||
|
|
||||||
|
@ -379,14 +419,19 @@ def main(ud_dir, parses_dir, config, corpus, limit=0):
|
||||||
for batch in batches:
|
for batch in batches:
|
||||||
batch_docs, batch_gold = zip(*batch)
|
batch_docs, batch_gold = zip(*batch)
|
||||||
pbar.update(sum(len(doc) for doc in batch_docs))
|
pbar.update(sum(len(doc) for doc in batch_docs))
|
||||||
nlp.update(batch_docs, batch_gold, sgd=optimizer,
|
nlp.update(
|
||||||
drop=config.dropout, losses=losses)
|
batch_docs,
|
||||||
|
batch_gold,
|
||||||
out_path = parses_dir / corpus / 'epoch-{i}.conllu'.format(i=i)
|
sgd=optimizer,
|
||||||
|
drop=config.dropout,
|
||||||
|
losses=losses,
|
||||||
|
)
|
||||||
|
|
||||||
|
out_path = parses_dir / corpus / "epoch-{i}.conllu".format(i=i)
|
||||||
with nlp.use_params(optimizer.averages):
|
with nlp.use_params(optimizer.averages):
|
||||||
scores = evaluate(nlp, paths.dev.text, paths.dev.conllu, out_path)
|
scores = evaluate(nlp, paths.dev.text, paths.dev.conllu, out_path)
|
||||||
print_progress(i, losses, scores)
|
print_progress(i, losses, scores)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
plac.call(main)
|
plac.call(main)
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
'''This example shows how to add a multi-task objective that is trained
|
"""This example shows how to add a multi-task objective that is trained
|
||||||
alongside the entity recognizer. This is an alternative to adding features
|
alongside the entity recognizer. This is an alternative to adding features
|
||||||
to the model.
|
to the model.
|
||||||
|
|
||||||
|
@ -19,7 +19,7 @@ The specific example here is not necessarily a good idea --- but it shows
|
||||||
how an arbitrary objective function for some word can be used.
|
how an arbitrary objective function for some word can be used.
|
||||||
|
|
||||||
Developed and tested for spaCy 2.0.6
|
Developed and tested for spaCy 2.0.6
|
||||||
'''
|
"""
|
||||||
import random
|
import random
|
||||||
import plac
|
import plac
|
||||||
import spacy
|
import spacy
|
||||||
|
@ -30,30 +30,29 @@ random.seed(0)
|
||||||
|
|
||||||
PWD = os.path.dirname(__file__)
|
PWD = os.path.dirname(__file__)
|
||||||
|
|
||||||
TRAIN_DATA = list(read_json_file(os.path.join(PWD, 'training-data.json')))
|
TRAIN_DATA = list(read_json_file(os.path.join(PWD, "training-data.json")))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_position_label(i, words, tags, heads, labels, ents):
|
def get_position_label(i, words, tags, heads, labels, ents):
|
||||||
'''Return labels indicating the position of the word in the document.
|
"""Return labels indicating the position of the word in the document.
|
||||||
'''
|
"""
|
||||||
if len(words) < 20:
|
if len(words) < 20:
|
||||||
return 'short-doc'
|
return "short-doc"
|
||||||
elif i == 0:
|
elif i == 0:
|
||||||
return 'first-word'
|
return "first-word"
|
||||||
elif i < 10:
|
elif i < 10:
|
||||||
return 'early-word'
|
return "early-word"
|
||||||
elif i < 20:
|
elif i < 20:
|
||||||
return 'mid-word'
|
return "mid-word"
|
||||||
elif i == len(words)-1:
|
elif i == len(words) - 1:
|
||||||
return 'last-word'
|
return "last-word"
|
||||||
else:
|
else:
|
||||||
return 'late-word'
|
return "late-word"
|
||||||
|
|
||||||
|
|
||||||
def main(n_iter=10):
|
def main(n_iter=10):
|
||||||
nlp = spacy.blank('en')
|
nlp = spacy.blank("en")
|
||||||
ner = nlp.create_pipe('ner')
|
ner = nlp.create_pipe("ner")
|
||||||
ner.add_multitask_objective(get_position_label)
|
ner.add_multitask_objective(get_position_label)
|
||||||
nlp.add_pipe(ner)
|
nlp.add_pipe(ner)
|
||||||
|
|
||||||
|
@ -71,15 +70,16 @@ def main(n_iter=10):
|
||||||
[gold], # batch of annotations
|
[gold], # batch of annotations
|
||||||
drop=0.2, # dropout - make it harder to memorise data
|
drop=0.2, # dropout - make it harder to memorise data
|
||||||
sgd=optimizer, # callable to update weights
|
sgd=optimizer, # callable to update weights
|
||||||
losses=losses)
|
losses=losses,
|
||||||
print(losses.get('nn_labeller', 0.0), losses['ner'])
|
)
|
||||||
|
print(losses.get("nn_labeller", 0.0), losses["ner"])
|
||||||
|
|
||||||
# test the trained model
|
# test the trained model
|
||||||
for text, _ in TRAIN_DATA:
|
for text, _ in TRAIN_DATA:
|
||||||
doc = nlp(text)
|
doc = nlp(text)
|
||||||
print('Entities', [(ent.text, ent.label_) for ent in doc.ents])
|
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])
|
||||||
print('Tokens', [(t.text, t.ent_type_, t.ent_iob) for t in doc])
|
print("Tokens", [(t.text, t.ent_type_, t.ent_iob) for t in doc])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
plac.call(main)
|
plac.call(main)
|
||||||
|
|
216
examples/training/pretrain_textcat.py
Normal file
216
examples/training/pretrain_textcat.py
Normal file
|
@ -0,0 +1,216 @@
|
||||||
|
"""This script is experimental.
|
||||||
|
|
||||||
|
Try pre-training the CNN component of the text categorizer using a cheap
|
||||||
|
language modelling-like objective. Specifically, we load pre-trained vectors
|
||||||
|
(from something like word2vec, GloVe, FastText etc), and use the CNN to
|
||||||
|
predict the tokens' pre-trained vectors. This isn't as easy as it sounds:
|
||||||
|
we're not merely doing compression here, because heavy dropout is applied,
|
||||||
|
including over the input words. This means the model must often (50% of the time)
|
||||||
|
use the context in order to predict the word.
|
||||||
|
|
||||||
|
To evaluate the technique, we're pre-training with the 50k texts from the IMDB
|
||||||
|
corpus, and then training with only 100 labels. Note that it's a bit dirty to
|
||||||
|
pre-train with the development data, but also not *so* terrible: we're not using
|
||||||
|
the development labels, after all --- only the unlabelled text.
|
||||||
|
"""
|
||||||
|
import plac
|
||||||
|
import random
|
||||||
|
import spacy
|
||||||
|
import thinc.extra.datasets
|
||||||
|
from spacy.util import minibatch, use_gpu, compounding
|
||||||
|
import tqdm
|
||||||
|
from spacy._ml import Tok2Vec
|
||||||
|
from spacy.pipeline import TextCategorizer
|
||||||
|
import numpy
|
||||||
|
|
||||||
|
|
||||||
|
def load_texts(limit=0):
|
||||||
|
train, dev = thinc.extra.datasets.imdb()
|
||||||
|
train_texts, train_labels = zip(*train)
|
||||||
|
dev_texts, dev_labels = zip(*train)
|
||||||
|
train_texts = list(train_texts)
|
||||||
|
dev_texts = list(dev_texts)
|
||||||
|
random.shuffle(train_texts)
|
||||||
|
random.shuffle(dev_texts)
|
||||||
|
if limit >= 1:
|
||||||
|
return train_texts[:limit]
|
||||||
|
else:
|
||||||
|
return list(train_texts) + list(dev_texts)
|
||||||
|
|
||||||
|
|
||||||
|
def load_textcat_data(limit=0):
|
||||||
|
"""Load data from the IMDB dataset."""
|
||||||
|
# Partition off part of the train data for evaluation
|
||||||
|
train_data, eval_data = thinc.extra.datasets.imdb()
|
||||||
|
random.shuffle(train_data)
|
||||||
|
train_data = train_data[-limit:]
|
||||||
|
texts, labels = zip(*train_data)
|
||||||
|
eval_texts, eval_labels = zip(*eval_data)
|
||||||
|
cats = [{"POSITIVE": bool(y), "NEGATIVE": not bool(y)} for y in labels]
|
||||||
|
eval_cats = [{"POSITIVE": bool(y), "NEGATIVE": not bool(y)} for y in eval_labels]
|
||||||
|
return (texts, cats), (eval_texts, eval_cats)
|
||||||
|
|
||||||
|
|
||||||
|
def prefer_gpu():
|
||||||
|
used = spacy.util.use_gpu(0)
|
||||||
|
if used is None:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
import cupy.random
|
||||||
|
|
||||||
|
cupy.random.seed(0)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def build_textcat_model(tok2vec, nr_class, width):
|
||||||
|
from thinc.v2v import Model, Softmax, Maxout
|
||||||
|
from thinc.api import flatten_add_lengths, chain
|
||||||
|
from thinc.t2v import Pooling, sum_pool, mean_pool, max_pool
|
||||||
|
from thinc.misc import Residual, LayerNorm
|
||||||
|
from spacy._ml import logistic, zero_init
|
||||||
|
|
||||||
|
with Model.define_operators({">>": chain}):
|
||||||
|
model = (
|
||||||
|
tok2vec
|
||||||
|
>> flatten_add_lengths
|
||||||
|
>> Pooling(mean_pool)
|
||||||
|
>> Softmax(nr_class, width)
|
||||||
|
)
|
||||||
|
model.tok2vec = tok2vec
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def block_gradients(model):
|
||||||
|
from thinc.api import wrap
|
||||||
|
|
||||||
|
def forward(X, drop=0.0):
|
||||||
|
Y, _ = model.begin_update(X, drop=drop)
|
||||||
|
return Y, None
|
||||||
|
|
||||||
|
return wrap(forward, model)
|
||||||
|
|
||||||
|
|
||||||
|
def create_pipeline(width, embed_size, vectors_model):
|
||||||
|
print("Load vectors")
|
||||||
|
nlp = spacy.load(vectors_model)
|
||||||
|
print("Start training")
|
||||||
|
textcat = TextCategorizer(
|
||||||
|
nlp.vocab,
|
||||||
|
labels=["POSITIVE", "NEGATIVE"],
|
||||||
|
model=build_textcat_model(
|
||||||
|
Tok2Vec(width=width, embed_size=embed_size), 2, width
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
nlp.add_pipe(textcat)
|
||||||
|
return nlp
|
||||||
|
|
||||||
|
|
||||||
|
def train_tensorizer(nlp, texts, dropout, n_iter):
|
||||||
|
tensorizer = nlp.create_pipe("tensorizer")
|
||||||
|
nlp.add_pipe(tensorizer)
|
||||||
|
optimizer = nlp.begin_training()
|
||||||
|
for i in range(n_iter):
|
||||||
|
losses = {}
|
||||||
|
for i, batch in enumerate(minibatch(tqdm.tqdm(texts))):
|
||||||
|
docs = [nlp.make_doc(text) for text in batch]
|
||||||
|
tensorizer.update(docs, None, losses=losses, sgd=optimizer, drop=dropout)
|
||||||
|
print(losses)
|
||||||
|
return optimizer
|
||||||
|
|
||||||
|
|
||||||
|
def train_textcat(nlp, n_texts, n_iter=10):
|
||||||
|
textcat = nlp.get_pipe("textcat")
|
||||||
|
tok2vec_weights = textcat.model.tok2vec.to_bytes()
|
||||||
|
(train_texts, train_cats), (dev_texts, dev_cats) = load_textcat_data(limit=n_texts)
|
||||||
|
print(
|
||||||
|
"Using {} examples ({} training, {} evaluation)".format(
|
||||||
|
n_texts, len(train_texts), len(dev_texts)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
train_data = list(zip(train_texts, [{"cats": cats} for cats in train_cats]))
|
||||||
|
|
||||||
|
# get names of other pipes to disable them during training
|
||||||
|
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "textcat"]
|
||||||
|
with nlp.disable_pipes(*other_pipes): # only train textcat
|
||||||
|
optimizer = nlp.begin_training()
|
||||||
|
textcat.model.tok2vec.from_bytes(tok2vec_weights)
|
||||||
|
print("Training the model...")
|
||||||
|
print("{:^5}\t{:^5}\t{:^5}\t{:^5}".format("LOSS", "P", "R", "F"))
|
||||||
|
for i in range(n_iter):
|
||||||
|
losses = {"textcat": 0.0}
|
||||||
|
# batch up the examples using spaCy's minibatch
|
||||||
|
batches = minibatch(tqdm.tqdm(train_data), size=2)
|
||||||
|
for batch in batches:
|
||||||
|
texts, annotations = zip(*batch)
|
||||||
|
nlp.update(texts, annotations, sgd=optimizer, drop=0.2, losses=losses)
|
||||||
|
with textcat.model.use_params(optimizer.averages):
|
||||||
|
# evaluate on the dev data split off in load_data()
|
||||||
|
scores = evaluate_textcat(nlp.tokenizer, textcat, dev_texts, dev_cats)
|
||||||
|
print(
|
||||||
|
"{0:.3f}\t{1:.3f}\t{2:.3f}\t{3:.3f}".format( # print a simple table
|
||||||
|
losses["textcat"],
|
||||||
|
scores["textcat_p"],
|
||||||
|
scores["textcat_r"],
|
||||||
|
scores["textcat_f"],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_textcat(tokenizer, textcat, texts, cats):
|
||||||
|
docs = (tokenizer(text) for text in texts)
|
||||||
|
tp = 1e-8
|
||||||
|
fp = 1e-8
|
||||||
|
tn = 1e-8
|
||||||
|
fn = 1e-8
|
||||||
|
for i, doc in enumerate(textcat.pipe(docs)):
|
||||||
|
gold = cats[i]
|
||||||
|
for label, score in doc.cats.items():
|
||||||
|
if label not in gold:
|
||||||
|
continue
|
||||||
|
if score >= 0.5 and gold[label] >= 0.5:
|
||||||
|
tp += 1.0
|
||||||
|
elif score >= 0.5 and gold[label] < 0.5:
|
||||||
|
fp += 1.0
|
||||||
|
elif score < 0.5 and gold[label] < 0.5:
|
||||||
|
tn += 1
|
||||||
|
elif score < 0.5 and gold[label] >= 0.5:
|
||||||
|
fn += 1
|
||||||
|
precision = tp / (tp + fp)
|
||||||
|
recall = tp / (tp + fn)
|
||||||
|
f_score = 2 * (precision * recall) / (precision + recall)
|
||||||
|
return {"textcat_p": precision, "textcat_r": recall, "textcat_f": f_score}
|
||||||
|
|
||||||
|
|
||||||
|
@plac.annotations(
|
||||||
|
width=("Width of CNN layers", "positional", None, int),
|
||||||
|
embed_size=("Embedding rows", "positional", None, int),
|
||||||
|
pretrain_iters=("Number of iterations to pretrain", "option", "pn", int),
|
||||||
|
train_iters=("Number of iterations to pretrain", "option", "tn", int),
|
||||||
|
train_examples=("Number of labelled examples", "option", "eg", int),
|
||||||
|
vectors_model=("Name or path to vectors model to learn from"),
|
||||||
|
)
|
||||||
|
def main(
|
||||||
|
width,
|
||||||
|
embed_size,
|
||||||
|
vectors_model,
|
||||||
|
pretrain_iters=30,
|
||||||
|
train_iters=30,
|
||||||
|
train_examples=1000,
|
||||||
|
):
|
||||||
|
random.seed(0)
|
||||||
|
numpy.random.seed(0)
|
||||||
|
use_gpu = prefer_gpu()
|
||||||
|
print("Using GPU?", use_gpu)
|
||||||
|
|
||||||
|
nlp = create_pipeline(width, embed_size, vectors_model)
|
||||||
|
print("Load data")
|
||||||
|
texts = load_texts(limit=0)
|
||||||
|
print("Train tensorizer")
|
||||||
|
optimizer = train_tensorizer(nlp, texts, dropout=0.2, n_iter=pretrain_iters)
|
||||||
|
print("Train textcat")
|
||||||
|
train_textcat(nlp, train_examples, n_iter=train_iters)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
plac.call(main)
|
94
examples/training/rehearsal.py
Normal file
94
examples/training/rehearsal.py
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
"""Prevent catastrophic forgetting with rehearsal updates."""
|
||||||
|
import plac
|
||||||
|
import random
|
||||||
|
import srsly
|
||||||
|
import spacy
|
||||||
|
from spacy.gold import GoldParse
|
||||||
|
from spacy.util import minibatch, compounding
|
||||||
|
|
||||||
|
|
||||||
|
LABEL = "ANIMAL"
|
||||||
|
TRAIN_DATA = [
|
||||||
|
(
|
||||||
|
"Horses are too tall and they pretend to care about your feelings",
|
||||||
|
{"entities": [(0, 6, "ANIMAL")]},
|
||||||
|
),
|
||||||
|
("Do they bite?", {"entities": []}),
|
||||||
|
(
|
||||||
|
"horses are too tall and they pretend to care about your feelings",
|
||||||
|
{"entities": [(0, 6, "ANIMAL")]},
|
||||||
|
),
|
||||||
|
("horses pretend to care about your feelings", {"entities": [(0, 6, "ANIMAL")]}),
|
||||||
|
(
|
||||||
|
"they pretend to care about your feelings, those horses",
|
||||||
|
{"entities": [(48, 54, "ANIMAL")]},
|
||||||
|
),
|
||||||
|
("horses?", {"entities": [(0, 6, "ANIMAL")]}),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def read_raw_data(nlp, jsonl_loc):
|
||||||
|
for json_obj in srsly.read_jsonl(jsonl_loc):
|
||||||
|
if json_obj["text"].strip():
|
||||||
|
doc = nlp.make_doc(json_obj["text"])
|
||||||
|
yield doc
|
||||||
|
|
||||||
|
|
||||||
|
def read_gold_data(nlp, gold_loc):
|
||||||
|
docs = []
|
||||||
|
golds = []
|
||||||
|
for json_obj in srsly.read_jsonl(gold_loc):
|
||||||
|
doc = nlp.make_doc(json_obj["text"])
|
||||||
|
ents = [(ent["start"], ent["end"], ent["label"]) for ent in json_obj["spans"]]
|
||||||
|
gold = GoldParse(doc, entities=ents)
|
||||||
|
docs.append(doc)
|
||||||
|
golds.append(gold)
|
||||||
|
return list(zip(docs, golds))
|
||||||
|
|
||||||
|
|
||||||
|
def main(model_name, unlabelled_loc):
|
||||||
|
n_iter = 10
|
||||||
|
dropout = 0.2
|
||||||
|
batch_size = 4
|
||||||
|
nlp = spacy.load(model_name)
|
||||||
|
nlp.get_pipe("ner").add_label(LABEL)
|
||||||
|
raw_docs = list(read_raw_data(nlp, unlabelled_loc))
|
||||||
|
optimizer = nlp.resume_training()
|
||||||
|
# Avoid use of Adam when resuming training. I don't understand this well
|
||||||
|
# yet, but I'm getting weird results from Adam. Try commenting out the
|
||||||
|
# nlp.update(), and using Adam -- you'll find the models drift apart.
|
||||||
|
# I guess Adam is losing precision, introducing gradient noise?
|
||||||
|
optimizer.alpha = 0.1
|
||||||
|
optimizer.b1 = 0.0
|
||||||
|
optimizer.b2 = 0.0
|
||||||
|
|
||||||
|
# get names of other pipes to disable them during training
|
||||||
|
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
|
||||||
|
sizes = compounding(1.0, 4.0, 1.001)
|
||||||
|
with nlp.disable_pipes(*other_pipes):
|
||||||
|
for itn in range(n_iter):
|
||||||
|
random.shuffle(TRAIN_DATA)
|
||||||
|
random.shuffle(raw_docs)
|
||||||
|
losses = {}
|
||||||
|
r_losses = {}
|
||||||
|
# batch up the examples using spaCy's minibatch
|
||||||
|
raw_batches = minibatch(raw_docs, size=4)
|
||||||
|
for batch in minibatch(TRAIN_DATA, size=sizes):
|
||||||
|
docs, golds = zip(*batch)
|
||||||
|
nlp.update(docs, golds, sgd=optimizer, drop=dropout, losses=losses)
|
||||||
|
raw_batch = list(next(raw_batches))
|
||||||
|
nlp.rehearse(raw_batch, sgd=optimizer, losses=r_losses)
|
||||||
|
print("Losses", losses)
|
||||||
|
print("R. Losses", r_losses)
|
||||||
|
print(nlp.get_pipe('ner').model.unseen_classes)
|
||||||
|
test_text = "Do you like horses?"
|
||||||
|
doc = nlp(test_text)
|
||||||
|
print("Entities in '%s'" % test_text)
|
||||||
|
for ent in doc.ents:
|
||||||
|
print(ent.label_, ent.text)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
plac.call(main)
|
|
@ -21,77 +21,121 @@ from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
import plac
|
import plac
|
||||||
import random
|
import random
|
||||||
import spacy
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
import spacy
|
||||||
|
from spacy.util import minibatch, compounding
|
||||||
|
|
||||||
|
|
||||||
# training data: texts, heads and dependency labels
|
# training data: texts, heads and dependency labels
|
||||||
# for no relation, we simply chose an arbitrary dependency label, e.g. '-'
|
# for no relation, we simply chose an arbitrary dependency label, e.g. '-'
|
||||||
TRAIN_DATA = [
|
TRAIN_DATA = [
|
||||||
("find a cafe with great wifi", {
|
(
|
||||||
'heads': [0, 2, 0, 5, 5, 2], # index of token head
|
"find a cafe with great wifi",
|
||||||
'deps': ['ROOT', '-', 'PLACE', '-', 'QUALITY', 'ATTRIBUTE']
|
{
|
||||||
}),
|
"heads": [0, 2, 0, 5, 5, 2], # index of token head
|
||||||
("find a hotel near the beach", {
|
"deps": ["ROOT", "-", "PLACE", "-", "QUALITY", "ATTRIBUTE"],
|
||||||
'heads': [0, 2, 0, 5, 5, 2],
|
},
|
||||||
'deps': ['ROOT', '-', 'PLACE', 'QUALITY', '-', 'ATTRIBUTE']
|
),
|
||||||
}),
|
(
|
||||||
("find me the closest gym that's open late", {
|
"find a hotel near the beach",
|
||||||
'heads': [0, 0, 4, 4, 0, 6, 4, 6, 6],
|
{
|
||||||
'deps': ['ROOT', '-', '-', 'QUALITY', 'PLACE', '-', '-', 'ATTRIBUTE', 'TIME']
|
"heads": [0, 2, 0, 5, 5, 2],
|
||||||
}),
|
"deps": ["ROOT", "-", "PLACE", "QUALITY", "-", "ATTRIBUTE"],
|
||||||
("show me the cheapest store that sells flowers", {
|
},
|
||||||
'heads': [0, 0, 4, 4, 0, 4, 4, 4], # attach "flowers" to store!
|
),
|
||||||
'deps': ['ROOT', '-', '-', 'QUALITY', 'PLACE', '-', '-', 'PRODUCT']
|
(
|
||||||
}),
|
"find me the closest gym that's open late",
|
||||||
("find a nice restaurant in london", {
|
{
|
||||||
'heads': [0, 3, 3, 0, 3, 3],
|
"heads": [0, 0, 4, 4, 0, 6, 4, 6, 6],
|
||||||
'deps': ['ROOT', '-', 'QUALITY', 'PLACE', '-', 'LOCATION']
|
"deps": [
|
||||||
}),
|
"ROOT",
|
||||||
("show me the coolest hostel in berlin", {
|
"-",
|
||||||
'heads': [0, 0, 4, 4, 0, 4, 4],
|
"-",
|
||||||
'deps': ['ROOT', '-', '-', 'QUALITY', 'PLACE', '-', 'LOCATION']
|
"QUALITY",
|
||||||
}),
|
"PLACE",
|
||||||
("find a good italian restaurant near work", {
|
"-",
|
||||||
'heads': [0, 4, 4, 4, 0, 4, 5],
|
"-",
|
||||||
'deps': ['ROOT', '-', 'QUALITY', 'ATTRIBUTE', 'PLACE', 'ATTRIBUTE', 'LOCATION']
|
"ATTRIBUTE",
|
||||||
})
|
"TIME",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"show me the cheapest store that sells flowers",
|
||||||
|
{
|
||||||
|
"heads": [0, 0, 4, 4, 0, 4, 4, 4], # attach "flowers" to store!
|
||||||
|
"deps": ["ROOT", "-", "-", "QUALITY", "PLACE", "-", "-", "PRODUCT"],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"find a nice restaurant in london",
|
||||||
|
{
|
||||||
|
"heads": [0, 3, 3, 0, 3, 3],
|
||||||
|
"deps": ["ROOT", "-", "QUALITY", "PLACE", "-", "LOCATION"],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"show me the coolest hostel in berlin",
|
||||||
|
{
|
||||||
|
"heads": [0, 0, 4, 4, 0, 4, 4],
|
||||||
|
"deps": ["ROOT", "-", "-", "QUALITY", "PLACE", "-", "LOCATION"],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"find a good italian restaurant near work",
|
||||||
|
{
|
||||||
|
"heads": [0, 4, 4, 4, 0, 4, 5],
|
||||||
|
"deps": [
|
||||||
|
"ROOT",
|
||||||
|
"-",
|
||||||
|
"QUALITY",
|
||||||
|
"ATTRIBUTE",
|
||||||
|
"PLACE",
|
||||||
|
"ATTRIBUTE",
|
||||||
|
"LOCATION",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
|
model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
|
||||||
output_dir=("Optional output directory", "option", "o", Path),
|
output_dir=("Optional output directory", "option", "o", Path),
|
||||||
n_iter=("Number of training iterations", "option", "n", int))
|
n_iter=("Number of training iterations", "option", "n", int),
|
||||||
def main(model=None, output_dir=None, n_iter=5):
|
)
|
||||||
|
def main(model=None, output_dir=None, n_iter=15):
|
||||||
"""Load the model, set up the pipeline and train the parser."""
|
"""Load the model, set up the pipeline and train the parser."""
|
||||||
if model is not None:
|
if model is not None:
|
||||||
nlp = spacy.load(model) # load existing spaCy model
|
nlp = spacy.load(model) # load existing spaCy model
|
||||||
print("Loaded model '%s'" % model)
|
print("Loaded model '%s'" % model)
|
||||||
else:
|
else:
|
||||||
nlp = spacy.blank('en') # create blank Language class
|
nlp = spacy.blank("en") # create blank Language class
|
||||||
print("Created blank 'en' model")
|
print("Created blank 'en' model")
|
||||||
|
|
||||||
# We'll use the built-in dependency parser class, but we want to create a
|
# We'll use the built-in dependency parser class, but we want to create a
|
||||||
# fresh instance – just in case.
|
# fresh instance – just in case.
|
||||||
if 'parser' in nlp.pipe_names:
|
if "parser" in nlp.pipe_names:
|
||||||
nlp.remove_pipe('parser')
|
nlp.remove_pipe("parser")
|
||||||
parser = nlp.create_pipe('parser')
|
parser = nlp.create_pipe("parser")
|
||||||
nlp.add_pipe(parser, first=True)
|
nlp.add_pipe(parser, first=True)
|
||||||
|
|
||||||
for text, annotations in TRAIN_DATA:
|
for text, annotations in TRAIN_DATA:
|
||||||
for dep in annotations.get('deps', []):
|
for dep in annotations.get("deps", []):
|
||||||
parser.add_label(dep)
|
parser.add_label(dep)
|
||||||
|
|
||||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'parser']
|
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "parser"]
|
||||||
with nlp.disable_pipes(*other_pipes): # only train parser
|
with nlp.disable_pipes(*other_pipes): # only train parser
|
||||||
optimizer = nlp.begin_training()
|
optimizer = nlp.begin_training()
|
||||||
for itn in range(n_iter):
|
for itn in range(n_iter):
|
||||||
random.shuffle(TRAIN_DATA)
|
random.shuffle(TRAIN_DATA)
|
||||||
losses = {}
|
losses = {}
|
||||||
for text, annotations in TRAIN_DATA:
|
# batch up the examples using spaCy's minibatch
|
||||||
nlp.update([text], [annotations], sgd=optimizer, losses=losses)
|
batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
|
||||||
print(losses)
|
for batch in batches:
|
||||||
|
texts, annotations = zip(*batch)
|
||||||
|
nlp.update(texts, annotations, sgd=optimizer, losses=losses)
|
||||||
|
print("Losses", losses)
|
||||||
|
|
||||||
# test the trained model
|
# test the trained model
|
||||||
test_model(nlp)
|
test_model(nlp)
|
||||||
|
@ -111,16 +155,18 @@ def main(model=None, output_dir=None, n_iter=5):
|
||||||
|
|
||||||
|
|
||||||
def test_model(nlp):
|
def test_model(nlp):
|
||||||
texts = ["find a hotel with good wifi",
|
texts = [
|
||||||
"find me the cheapest gym near work",
|
"find a hotel with good wifi",
|
||||||
"show me the best hotel in berlin"]
|
"find me the cheapest gym near work",
|
||||||
|
"show me the best hotel in berlin",
|
||||||
|
]
|
||||||
docs = nlp.pipe(texts)
|
docs = nlp.pipe(texts)
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
print(doc.text)
|
print(doc.text)
|
||||||
print([(t.text, t.dep_, t.head.text) for t in doc if t.dep_ != '-'])
|
print([(t.text, t.dep_, t.head.text) for t in doc if t.dep_ != "-"])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
plac.call(main)
|
plac.call(main)
|
||||||
|
|
||||||
# Expected output:
|
# Expected output:
|
||||||
|
@ -135,7 +181,8 @@ if __name__ == '__main__':
|
||||||
# [
|
# [
|
||||||
# ('find', 'ROOT', 'find'),
|
# ('find', 'ROOT', 'find'),
|
||||||
# ('cheapest', 'QUALITY', 'gym'),
|
# ('cheapest', 'QUALITY', 'gym'),
|
||||||
# ('gym', 'PLACE', 'find')
|
# ('gym', 'PLACE', 'find'),
|
||||||
|
# ('near', 'ATTRIBUTE', 'gym'),
|
||||||
# ('work', 'LOCATION', 'near')
|
# ('work', 'LOCATION', 'near')
|
||||||
# ]
|
# ]
|
||||||
# show me the best hotel in berlin
|
# show me the best hotel in berlin
|
||||||
|
|
|
@ -15,67 +15,71 @@ import plac
|
||||||
import random
|
import random
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import spacy
|
import spacy
|
||||||
|
from spacy.util import minibatch, compounding
|
||||||
|
|
||||||
|
|
||||||
# training data
|
# training data
|
||||||
TRAIN_DATA = [
|
TRAIN_DATA = [
|
||||||
('Who is Shaka Khan?', {
|
("Who is Shaka Khan?", {"entities": [(7, 17, "PERSON")]}),
|
||||||
'entities': [(7, 17, 'PERSON')]
|
("I like London and Berlin.", {"entities": [(7, 13, "LOC"), (18, 24, "LOC")]}),
|
||||||
}),
|
|
||||||
('I like London and Berlin.', {
|
|
||||||
'entities': [(7, 13, 'LOC'), (18, 24, 'LOC')]
|
|
||||||
})
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
|
model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
|
||||||
output_dir=("Optional output directory", "option", "o", Path),
|
output_dir=("Optional output directory", "option", "o", Path),
|
||||||
n_iter=("Number of training iterations", "option", "n", int))
|
n_iter=("Number of training iterations", "option", "n", int),
|
||||||
|
)
|
||||||
def main(model=None, output_dir=None, n_iter=100):
|
def main(model=None, output_dir=None, n_iter=100):
|
||||||
"""Load the model, set up the pipeline and train the entity recognizer."""
|
"""Load the model, set up the pipeline and train the entity recognizer."""
|
||||||
if model is not None:
|
if model is not None:
|
||||||
nlp = spacy.load(model) # load existing spaCy model
|
nlp = spacy.load(model) # load existing spaCy model
|
||||||
print("Loaded model '%s'" % model)
|
print("Loaded model '%s'" % model)
|
||||||
else:
|
else:
|
||||||
nlp = spacy.blank('en') # create blank Language class
|
nlp = spacy.blank("en") # create blank Language class
|
||||||
print("Created blank 'en' model")
|
print("Created blank 'en' model")
|
||||||
|
|
||||||
# create the built-in pipeline components and add them to the pipeline
|
# create the built-in pipeline components and add them to the pipeline
|
||||||
# nlp.create_pipe works for built-ins that are registered with spaCy
|
# nlp.create_pipe works for built-ins that are registered with spaCy
|
||||||
if 'ner' not in nlp.pipe_names:
|
if "ner" not in nlp.pipe_names:
|
||||||
ner = nlp.create_pipe('ner')
|
ner = nlp.create_pipe("ner")
|
||||||
nlp.add_pipe(ner, last=True)
|
nlp.add_pipe(ner, last=True)
|
||||||
# otherwise, get it so we can add labels
|
# otherwise, get it so we can add labels
|
||||||
else:
|
else:
|
||||||
ner = nlp.get_pipe('ner')
|
ner = nlp.get_pipe("ner")
|
||||||
|
|
||||||
# add labels
|
# add labels
|
||||||
for _, annotations in TRAIN_DATA:
|
for _, annotations in TRAIN_DATA:
|
||||||
for ent in annotations.get('entities'):
|
for ent in annotations.get("entities"):
|
||||||
ner.add_label(ent[2])
|
ner.add_label(ent[2])
|
||||||
|
|
||||||
# get names of other pipes to disable them during training
|
# get names of other pipes to disable them during training
|
||||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
|
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
|
||||||
with nlp.disable_pipes(*other_pipes): # only train NER
|
with nlp.disable_pipes(*other_pipes): # only train NER
|
||||||
optimizer = nlp.begin_training()
|
# reset and initialize the weights randomly – but only if we're
|
||||||
|
# training a new model
|
||||||
|
if model is None:
|
||||||
|
nlp.begin_training()
|
||||||
for itn in range(n_iter):
|
for itn in range(n_iter):
|
||||||
random.shuffle(TRAIN_DATA)
|
random.shuffle(TRAIN_DATA)
|
||||||
losses = {}
|
losses = {}
|
||||||
for text, annotations in TRAIN_DATA:
|
# batch up the examples using spaCy's minibatch
|
||||||
|
batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
|
||||||
|
for batch in batches:
|
||||||
|
texts, annotations = zip(*batch)
|
||||||
nlp.update(
|
nlp.update(
|
||||||
[text], # batch of texts
|
texts, # batch of texts
|
||||||
[annotations], # batch of annotations
|
annotations, # batch of annotations
|
||||||
drop=0.5, # dropout - make it harder to memorise data
|
drop=0.5, # dropout - make it harder to memorise data
|
||||||
sgd=optimizer, # callable to update weights
|
losses=losses,
|
||||||
losses=losses)
|
)
|
||||||
print(losses)
|
print("Losses", losses)
|
||||||
|
|
||||||
# test the trained model
|
# test the trained model
|
||||||
for text, _ in TRAIN_DATA:
|
for text, _ in TRAIN_DATA:
|
||||||
doc = nlp(text)
|
doc = nlp(text)
|
||||||
print('Entities', [(ent.text, ent.label_) for ent in doc.ents])
|
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])
|
||||||
print('Tokens', [(t.text, t.ent_type_, t.ent_iob) for t in doc])
|
print("Tokens", [(t.text, t.ent_type_, t.ent_iob) for t in doc])
|
||||||
|
|
||||||
# save model to output directory
|
# save model to output directory
|
||||||
if output_dir is not None:
|
if output_dir is not None:
|
||||||
|
@ -90,11 +94,11 @@ def main(model=None, output_dir=None, n_iter=100):
|
||||||
nlp2 = spacy.load(output_dir)
|
nlp2 = spacy.load(output_dir)
|
||||||
for text, _ in TRAIN_DATA:
|
for text, _ in TRAIN_DATA:
|
||||||
doc = nlp2(text)
|
doc = nlp2(text)
|
||||||
print('Entities', [(ent.text, ent.label_) for ent in doc.ents])
|
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])
|
||||||
print('Tokens', [(t.text, t.ent_type_, t.ent_iob) for t in doc])
|
print("Tokens", [(t.text, t.ent_type_, t.ent_iob) for t in doc])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
plac.call(main)
|
plac.call(main)
|
||||||
|
|
||||||
# Expected output:
|
# Expected output:
|
||||||
|
|
|
@ -31,10 +31,11 @@ import plac
|
||||||
import random
|
import random
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import spacy
|
import spacy
|
||||||
|
from spacy.util import minibatch, compounding
|
||||||
|
|
||||||
|
|
||||||
# new entity label
|
# new entity label
|
||||||
LABEL = 'ANIMAL'
|
LABEL = "ANIMAL"
|
||||||
|
|
||||||
# training data
|
# training data
|
||||||
# Note: If you're using an existing model, make sure to mix in examples of
|
# Note: If you're using an existing model, make sure to mix in examples of
|
||||||
|
@ -42,29 +43,21 @@ LABEL = 'ANIMAL'
|
||||||
# model might learn the new type, but "forget" what it previously knew.
|
# model might learn the new type, but "forget" what it previously knew.
|
||||||
# https://explosion.ai/blog/pseudo-rehearsal-catastrophic-forgetting
|
# https://explosion.ai/blog/pseudo-rehearsal-catastrophic-forgetting
|
||||||
TRAIN_DATA = [
|
TRAIN_DATA = [
|
||||||
("Horses are too tall and they pretend to care about your feelings", {
|
(
|
||||||
'entities': [(0, 6, 'ANIMAL')]
|
"Horses are too tall and they pretend to care about your feelings",
|
||||||
}),
|
{"entities": [(0, 6, LABEL)]},
|
||||||
|
),
|
||||||
("Do they bite?", {
|
("Do they bite?", {"entities": []}),
|
||||||
'entities': []
|
(
|
||||||
}),
|
"horses are too tall and they pretend to care about your feelings",
|
||||||
|
{"entities": [(0, 6, LABEL)]},
|
||||||
("horses are too tall and they pretend to care about your feelings", {
|
),
|
||||||
'entities': [(0, 6, 'ANIMAL')]
|
("horses pretend to care about your feelings", {"entities": [(0, 6, LABEL)]}),
|
||||||
}),
|
(
|
||||||
|
"they pretend to care about your feelings, those horses",
|
||||||
("horses pretend to care about your feelings", {
|
{"entities": [(48, 54, LABEL)]},
|
||||||
'entities': [(0, 6, 'ANIMAL')]
|
),
|
||||||
}),
|
("horses?", {"entities": [(0, 6, LABEL)]}),
|
||||||
|
|
||||||
("they pretend to care about your feelings, those horses", {
|
|
||||||
'entities': [(48, 54, 'ANIMAL')]
|
|
||||||
}),
|
|
||||||
|
|
||||||
("horses?", {
|
|
||||||
'entities': [(0, 6, 'ANIMAL')]
|
|
||||||
})
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -72,45 +65,50 @@ TRAIN_DATA = [
|
||||||
model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
|
model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
|
||||||
new_model_name=("New model name for model meta.", "option", "nm", str),
|
new_model_name=("New model name for model meta.", "option", "nm", str),
|
||||||
output_dir=("Optional output directory", "option", "o", Path),
|
output_dir=("Optional output directory", "option", "o", Path),
|
||||||
n_iter=("Number of training iterations", "option", "n", int))
|
n_iter=("Number of training iterations", "option", "n", int),
|
||||||
def main(model=None, new_model_name='animal', output_dir=None, n_iter=20):
|
)
|
||||||
|
def main(model=None, new_model_name="animal", output_dir=None, n_iter=30):
|
||||||
"""Set up the pipeline and entity recognizer, and train the new entity."""
|
"""Set up the pipeline and entity recognizer, and train the new entity."""
|
||||||
|
random.seed(0)
|
||||||
if model is not None:
|
if model is not None:
|
||||||
nlp = spacy.load(model) # load existing spaCy model
|
nlp = spacy.load(model) # load existing spaCy model
|
||||||
print("Loaded model '%s'" % model)
|
print("Loaded model '%s'" % model)
|
||||||
else:
|
else:
|
||||||
nlp = spacy.blank('en') # create blank Language class
|
nlp = spacy.blank("en") # create blank Language class
|
||||||
print("Created blank 'en' model")
|
print("Created blank 'en' model")
|
||||||
# Add entity recognizer to model if it's not in the pipeline
|
# Add entity recognizer to model if it's not in the pipeline
|
||||||
# nlp.create_pipe works for built-ins that are registered with spaCy
|
# nlp.create_pipe works for built-ins that are registered with spaCy
|
||||||
if 'ner' not in nlp.pipe_names:
|
if "ner" not in nlp.pipe_names:
|
||||||
ner = nlp.create_pipe('ner')
|
ner = nlp.create_pipe("ner")
|
||||||
nlp.add_pipe(ner)
|
nlp.add_pipe(ner)
|
||||||
# otherwise, get it, so we can add labels to it
|
# otherwise, get it, so we can add labels to it
|
||||||
else:
|
else:
|
||||||
ner = nlp.get_pipe('ner')
|
ner = nlp.get_pipe("ner")
|
||||||
|
|
||||||
ner.add_label(LABEL) # add new entity label to entity recognizer
|
ner.add_label(LABEL) # add new entity label to entity recognizer
|
||||||
|
# Adding extraneous labels shouldn't mess anything up
|
||||||
|
ner.add_label('VEGETABLE')
|
||||||
if model is None:
|
if model is None:
|
||||||
optimizer = nlp.begin_training()
|
optimizer = nlp.begin_training()
|
||||||
else:
|
else:
|
||||||
# Note that 'begin_training' initializes the models, so it'll zero out
|
optimizer = nlp.resume_training()
|
||||||
# existing entity types.
|
move_names = list(ner.move_names)
|
||||||
optimizer = nlp.entity.create_optimizer()
|
|
||||||
|
|
||||||
# get names of other pipes to disable them during training
|
# get names of other pipes to disable them during training
|
||||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
|
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
|
||||||
with nlp.disable_pipes(*other_pipes): # only train NER
|
with nlp.disable_pipes(*other_pipes): # only train NER
|
||||||
|
sizes = compounding(1.0, 4.0, 1.001)
|
||||||
|
# batch up the examples using spaCy's minibatch
|
||||||
for itn in range(n_iter):
|
for itn in range(n_iter):
|
||||||
random.shuffle(TRAIN_DATA)
|
random.shuffle(TRAIN_DATA)
|
||||||
|
batches = minibatch(TRAIN_DATA, size=sizes)
|
||||||
losses = {}
|
losses = {}
|
||||||
for text, annotations in TRAIN_DATA:
|
for batch in batches:
|
||||||
nlp.update([text], [annotations], sgd=optimizer, drop=0.35,
|
texts, annotations = zip(*batch)
|
||||||
losses=losses)
|
nlp.update(texts, annotations, sgd=optimizer, drop=0.35, losses=losses)
|
||||||
print(losses)
|
print("Losses", losses)
|
||||||
|
|
||||||
# test the trained model
|
# test the trained model
|
||||||
test_text = 'Do you like horses?'
|
test_text = "Do you like horses?"
|
||||||
doc = nlp(test_text)
|
doc = nlp(test_text)
|
||||||
print("Entities in '%s'" % test_text)
|
print("Entities in '%s'" % test_text)
|
||||||
for ent in doc.ents:
|
for ent in doc.ents:
|
||||||
|
@ -121,17 +119,19 @@ def main(model=None, new_model_name='animal', output_dir=None, n_iter=20):
|
||||||
output_dir = Path(output_dir)
|
output_dir = Path(output_dir)
|
||||||
if not output_dir.exists():
|
if not output_dir.exists():
|
||||||
output_dir.mkdir()
|
output_dir.mkdir()
|
||||||
nlp.meta['name'] = new_model_name # rename model
|
nlp.meta["name"] = new_model_name # rename model
|
||||||
nlp.to_disk(output_dir)
|
nlp.to_disk(output_dir)
|
||||||
print("Saved model to", output_dir)
|
print("Saved model to", output_dir)
|
||||||
|
|
||||||
# test the saved model
|
# test the saved model
|
||||||
print("Loading from", output_dir)
|
print("Loading from", output_dir)
|
||||||
nlp2 = spacy.load(output_dir)
|
nlp2 = spacy.load(output_dir)
|
||||||
|
# Check the classes have loaded back consistently
|
||||||
|
assert nlp2.get_pipe('ner').move_names == move_names
|
||||||
doc2 = nlp2(test_text)
|
doc2 = nlp2(test_text)
|
||||||
for ent in doc2.ents:
|
for ent in doc2.ents:
|
||||||
print(ent.label_, ent.text)
|
print(ent.label_, ent.text)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
plac.call(main)
|
plac.call(main)
|
||||||
|
|
|
@ -13,63 +13,74 @@ import plac
|
||||||
import random
|
import random
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import spacy
|
import spacy
|
||||||
|
from spacy.util import minibatch, compounding
|
||||||
|
|
||||||
|
|
||||||
# training data
|
# training data
|
||||||
TRAIN_DATA = [
|
TRAIN_DATA = [
|
||||||
("They trade mortgage-backed securities.", {
|
(
|
||||||
'heads': [1, 1, 4, 4, 5, 1, 1],
|
"They trade mortgage-backed securities.",
|
||||||
'deps': ['nsubj', 'ROOT', 'compound', 'punct', 'nmod', 'dobj', 'punct']
|
{
|
||||||
}),
|
"heads": [1, 1, 4, 4, 5, 1, 1],
|
||||||
("I like London and Berlin.", {
|
"deps": ["nsubj", "ROOT", "compound", "punct", "nmod", "dobj", "punct"],
|
||||||
'heads': [1, 1, 1, 2, 2, 1],
|
},
|
||||||
'deps': ['nsubj', 'ROOT', 'dobj', 'cc', 'conj', 'punct']
|
),
|
||||||
})
|
(
|
||||||
|
"I like London and Berlin.",
|
||||||
|
{
|
||||||
|
"heads": [1, 1, 1, 2, 2, 1],
|
||||||
|
"deps": ["nsubj", "ROOT", "dobj", "cc", "conj", "punct"],
|
||||||
|
},
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
|
model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
|
||||||
output_dir=("Optional output directory", "option", "o", Path),
|
output_dir=("Optional output directory", "option", "o", Path),
|
||||||
n_iter=("Number of training iterations", "option", "n", int))
|
n_iter=("Number of training iterations", "option", "n", int),
|
||||||
|
)
|
||||||
def main(model=None, output_dir=None, n_iter=10):
|
def main(model=None, output_dir=None, n_iter=10):
|
||||||
"""Load the model, set up the pipeline and train the parser."""
|
"""Load the model, set up the pipeline and train the parser."""
|
||||||
if model is not None:
|
if model is not None:
|
||||||
nlp = spacy.load(model) # load existing spaCy model
|
nlp = spacy.load(model) # load existing spaCy model
|
||||||
print("Loaded model '%s'" % model)
|
print("Loaded model '%s'" % model)
|
||||||
else:
|
else:
|
||||||
nlp = spacy.blank('en') # create blank Language class
|
nlp = spacy.blank("en") # create blank Language class
|
||||||
print("Created blank 'en' model")
|
print("Created blank 'en' model")
|
||||||
|
|
||||||
# add the parser to the pipeline if it doesn't exist
|
# add the parser to the pipeline if it doesn't exist
|
||||||
# nlp.create_pipe works for built-ins that are registered with spaCy
|
# nlp.create_pipe works for built-ins that are registered with spaCy
|
||||||
if 'parser' not in nlp.pipe_names:
|
if "parser" not in nlp.pipe_names:
|
||||||
parser = nlp.create_pipe('parser')
|
parser = nlp.create_pipe("parser")
|
||||||
nlp.add_pipe(parser, first=True)
|
nlp.add_pipe(parser, first=True)
|
||||||
# otherwise, get it, so we can add labels to it
|
# otherwise, get it, so we can add labels to it
|
||||||
else:
|
else:
|
||||||
parser = nlp.get_pipe('parser')
|
parser = nlp.get_pipe("parser")
|
||||||
|
|
||||||
# add labels to the parser
|
# add labels to the parser
|
||||||
for _, annotations in TRAIN_DATA:
|
for _, annotations in TRAIN_DATA:
|
||||||
for dep in annotations.get('deps', []):
|
for dep in annotations.get("deps", []):
|
||||||
parser.add_label(dep)
|
parser.add_label(dep)
|
||||||
|
|
||||||
# get names of other pipes to disable them during training
|
# get names of other pipes to disable them during training
|
||||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'parser']
|
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "parser"]
|
||||||
with nlp.disable_pipes(*other_pipes): # only train parser
|
with nlp.disable_pipes(*other_pipes): # only train parser
|
||||||
optimizer = nlp.begin_training()
|
optimizer = nlp.begin_training()
|
||||||
for itn in range(n_iter):
|
for itn in range(n_iter):
|
||||||
random.shuffle(TRAIN_DATA)
|
random.shuffle(TRAIN_DATA)
|
||||||
losses = {}
|
losses = {}
|
||||||
for text, annotations in TRAIN_DATA:
|
# batch up the examples using spaCy's minibatch
|
||||||
nlp.update([text], [annotations], sgd=optimizer, losses=losses)
|
batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
|
||||||
print(losses)
|
for batch in batches:
|
||||||
|
texts, annotations = zip(*batch)
|
||||||
|
nlp.update(texts, annotations, sgd=optimizer, losses=losses)
|
||||||
|
print("Losses", losses)
|
||||||
|
|
||||||
# test the trained model
|
# test the trained model
|
||||||
test_text = "I like securities."
|
test_text = "I like securities."
|
||||||
doc = nlp(test_text)
|
doc = nlp(test_text)
|
||||||
print('Dependencies', [(t.text, t.dep_, t.head.text) for t in doc])
|
print("Dependencies", [(t.text, t.dep_, t.head.text) for t in doc])
|
||||||
|
|
||||||
# save model to output directory
|
# save model to output directory
|
||||||
if output_dir is not None:
|
if output_dir is not None:
|
||||||
|
@ -83,10 +94,10 @@ def main(model=None, output_dir=None, n_iter=10):
|
||||||
print("Loading from", output_dir)
|
print("Loading from", output_dir)
|
||||||
nlp2 = spacy.load(output_dir)
|
nlp2 = spacy.load(output_dir)
|
||||||
doc = nlp2(test_text)
|
doc = nlp2(test_text)
|
||||||
print('Dependencies', [(t.text, t.dep_, t.head.text) for t in doc])
|
print("Dependencies", [(t.text, t.dep_, t.head.text) for t in doc])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
plac.call(main)
|
plac.call(main)
|
||||||
|
|
||||||
# expected result:
|
# expected result:
|
||||||
|
|
|
@ -16,6 +16,7 @@ import plac
|
||||||
import random
|
import random
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import spacy
|
import spacy
|
||||||
|
from spacy.util import minibatch, compounding
|
||||||
|
|
||||||
|
|
||||||
# You need to define a mapping from your data's part-of-speech tag names to the
|
# You need to define a mapping from your data's part-of-speech tag names to the
|
||||||
|
@ -24,28 +25,25 @@ import spacy
|
||||||
# http://universaldependencies.github.io/docs/u/pos/index.html
|
# http://universaldependencies.github.io/docs/u/pos/index.html
|
||||||
# You may also specify morphological features for your tags, from the universal
|
# You may also specify morphological features for your tags, from the universal
|
||||||
# scheme.
|
# scheme.
|
||||||
TAG_MAP = {
|
TAG_MAP = {"N": {"pos": "NOUN"}, "V": {"pos": "VERB"}, "J": {"pos": "ADJ"}}
|
||||||
'N': {'pos': 'NOUN'},
|
|
||||||
'V': {'pos': 'VERB'},
|
|
||||||
'J': {'pos': 'ADJ'}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Usually you'll read this in, of course. Data formats vary. Ensure your
|
# Usually you'll read this in, of course. Data formats vary. Ensure your
|
||||||
# strings are unicode and that the number of tags assigned matches spaCy's
|
# strings are unicode and that the number of tags assigned matches spaCy's
|
||||||
# tokenization. If not, you can always add a 'words' key to the annotations
|
# tokenization. If not, you can always add a 'words' key to the annotations
|
||||||
# that specifies the gold-standard tokenization, e.g.:
|
# that specifies the gold-standard tokenization, e.g.:
|
||||||
# ("Eatblueham", {'words': ['Eat', 'blue', 'ham'] 'tags': ['V', 'J', 'N']})
|
# ("Eatblueham", {'words': ['Eat', 'blue', 'ham'], 'tags': ['V', 'J', 'N']})
|
||||||
TRAIN_DATA = [
|
TRAIN_DATA = [
|
||||||
("I like green eggs", {'tags': ['N', 'V', 'J', 'N']}),
|
("I like green eggs", {"tags": ["N", "V", "J", "N"]}),
|
||||||
("Eat blue ham", {'tags': ['V', 'J', 'N']})
|
("Eat blue ham", {"tags": ["V", "J", "N"]}),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
lang=("ISO Code of language to use", "option", "l", str),
|
lang=("ISO Code of language to use", "option", "l", str),
|
||||||
output_dir=("Optional output directory", "option", "o", Path),
|
output_dir=("Optional output directory", "option", "o", Path),
|
||||||
n_iter=("Number of training iterations", "option", "n", int))
|
n_iter=("Number of training iterations", "option", "n", int),
|
||||||
def main(lang='en', output_dir=None, n_iter=25):
|
)
|
||||||
|
def main(lang="en", output_dir=None, n_iter=25):
|
||||||
"""Create a new model, set up the pipeline and train the tagger. In order to
|
"""Create a new model, set up the pipeline and train the tagger. In order to
|
||||||
train the tagger with a custom tag map, we're creating a new Language
|
train the tagger with a custom tag map, we're creating a new Language
|
||||||
instance with a custom vocab.
|
instance with a custom vocab.
|
||||||
|
@ -53,7 +51,7 @@ def main(lang='en', output_dir=None, n_iter=25):
|
||||||
nlp = spacy.blank(lang)
|
nlp = spacy.blank(lang)
|
||||||
# add the tagger to the pipeline
|
# add the tagger to the pipeline
|
||||||
# nlp.create_pipe works for built-ins that are registered with spaCy
|
# nlp.create_pipe works for built-ins that are registered with spaCy
|
||||||
tagger = nlp.create_pipe('tagger')
|
tagger = nlp.create_pipe("tagger")
|
||||||
# Add the tags. This needs to be done before you start training.
|
# Add the tags. This needs to be done before you start training.
|
||||||
for tag, values in TAG_MAP.items():
|
for tag, values in TAG_MAP.items():
|
||||||
tagger.add_label(tag, values)
|
tagger.add_label(tag, values)
|
||||||
|
@ -63,14 +61,17 @@ def main(lang='en', output_dir=None, n_iter=25):
|
||||||
for i in range(n_iter):
|
for i in range(n_iter):
|
||||||
random.shuffle(TRAIN_DATA)
|
random.shuffle(TRAIN_DATA)
|
||||||
losses = {}
|
losses = {}
|
||||||
for text, annotations in TRAIN_DATA:
|
# batch up the examples using spaCy's minibatch
|
||||||
nlp.update([text], [annotations], sgd=optimizer, losses=losses)
|
batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
|
||||||
print(losses)
|
for batch in batches:
|
||||||
|
texts, annotations = zip(*batch)
|
||||||
|
nlp.update(texts, annotations, sgd=optimizer, losses=losses)
|
||||||
|
print("Losses", losses)
|
||||||
|
|
||||||
# test the trained model
|
# test the trained model
|
||||||
test_text = "I like blue eggs"
|
test_text = "I like blue eggs"
|
||||||
doc = nlp(test_text)
|
doc = nlp(test_text)
|
||||||
print('Tags', [(t.text, t.tag_, t.pos_) for t in doc])
|
print("Tags", [(t.text, t.tag_, t.pos_) for t in doc])
|
||||||
|
|
||||||
# save model to output directory
|
# save model to output directory
|
||||||
if output_dir is not None:
|
if output_dir is not None:
|
||||||
|
@ -84,10 +85,10 @@ def main(lang='en', output_dir=None, n_iter=25):
|
||||||
print("Loading from", output_dir)
|
print("Loading from", output_dir)
|
||||||
nlp2 = spacy.load(output_dir)
|
nlp2 = spacy.load(output_dir)
|
||||||
doc = nlp2(test_text)
|
doc = nlp2(test_text)
|
||||||
print('Tags', [(t.text, t.tag_, t.pos_) for t in doc])
|
print("Tags", [(t.text, t.tag_, t.pos_) for t in doc])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
plac.call(main)
|
plac.call(main)
|
||||||
|
|
||||||
# Expected output:
|
# Expected output:
|
||||||
|
|
|
@ -23,55 +23,70 @@ from spacy.util import minibatch, compounding
|
||||||
model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
|
model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
|
||||||
output_dir=("Optional output directory", "option", "o", Path),
|
output_dir=("Optional output directory", "option", "o", Path),
|
||||||
n_texts=("Number of texts to train from", "option", "t", int),
|
n_texts=("Number of texts to train from", "option", "t", int),
|
||||||
n_iter=("Number of training iterations", "option", "n", int))
|
n_iter=("Number of training iterations", "option", "n", int),
|
||||||
|
)
|
||||||
def main(model=None, output_dir=None, n_iter=20, n_texts=2000):
|
def main(model=None, output_dir=None, n_iter=20, n_texts=2000):
|
||||||
|
if output_dir is not None:
|
||||||
|
output_dir = Path(output_dir)
|
||||||
|
if not output_dir.exists():
|
||||||
|
output_dir.mkdir()
|
||||||
|
|
||||||
if model is not None:
|
if model is not None:
|
||||||
nlp = spacy.load(model) # load existing spaCy model
|
nlp = spacy.load(model) # load existing spaCy model
|
||||||
print("Loaded model '%s'" % model)
|
print("Loaded model '%s'" % model)
|
||||||
else:
|
else:
|
||||||
nlp = spacy.blank('en') # create blank Language class
|
nlp = spacy.blank("en") # create blank Language class
|
||||||
print("Created blank 'en' model")
|
print("Created blank 'en' model")
|
||||||
|
|
||||||
# add the text classifier to the pipeline if it doesn't exist
|
# add the text classifier to the pipeline if it doesn't exist
|
||||||
# nlp.create_pipe works for built-ins that are registered with spaCy
|
# nlp.create_pipe works for built-ins that are registered with spaCy
|
||||||
if 'textcat' not in nlp.pipe_names:
|
if "textcat" not in nlp.pipe_names:
|
||||||
textcat = nlp.create_pipe('textcat')
|
textcat = nlp.create_pipe("textcat", config={
|
||||||
|
"architecture": "simple_cnn",
|
||||||
|
"exclusive_classes": True})
|
||||||
nlp.add_pipe(textcat, last=True)
|
nlp.add_pipe(textcat, last=True)
|
||||||
# otherwise, get it, so we can add labels to it
|
# otherwise, get it, so we can add labels to it
|
||||||
else:
|
else:
|
||||||
textcat = nlp.get_pipe('textcat')
|
textcat = nlp.get_pipe("textcat")
|
||||||
|
|
||||||
# add label to text classifier
|
# add label to text classifier
|
||||||
textcat.add_label('POSITIVE')
|
textcat.add_label("POSITIVE")
|
||||||
|
textcat.add_label("NEGATIVE")
|
||||||
|
|
||||||
# load the IMDB dataset
|
# load the IMDB dataset
|
||||||
print("Loading IMDB data...")
|
print("Loading IMDB data...")
|
||||||
(train_texts, train_cats), (dev_texts, dev_cats) = load_data(limit=n_texts)
|
(train_texts, train_cats), (dev_texts, dev_cats) = load_data(limit=n_texts)
|
||||||
print("Using {} examples ({} training, {} evaluation)"
|
print(
|
||||||
.format(n_texts, len(train_texts), len(dev_texts)))
|
"Using {} examples ({} training, {} evaluation)".format(
|
||||||
train_data = list(zip(train_texts,
|
n_texts, len(train_texts), len(dev_texts)
|
||||||
[{'cats': cats} for cats in train_cats]))
|
)
|
||||||
|
)
|
||||||
|
train_data = list(zip(train_texts, [{"cats": cats} for cats in train_cats]))
|
||||||
|
|
||||||
# get names of other pipes to disable them during training
|
# get names of other pipes to disable them during training
|
||||||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'textcat']
|
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "textcat"]
|
||||||
with nlp.disable_pipes(*other_pipes): # only train textcat
|
with nlp.disable_pipes(*other_pipes): # only train textcat
|
||||||
optimizer = nlp.begin_training()
|
optimizer = nlp.begin_training()
|
||||||
print("Training the model...")
|
print("Training the model...")
|
||||||
print('{:^5}\t{:^5}\t{:^5}\t{:^5}'.format('LOSS', 'P', 'R', 'F'))
|
print("{:^5}\t{:^5}\t{:^5}\t{:^5}".format("LOSS", "P", "R", "F"))
|
||||||
for i in range(n_iter):
|
for i in range(n_iter):
|
||||||
losses = {}
|
losses = {}
|
||||||
# batch up the examples using spaCy's minibatch
|
# batch up the examples using spaCy's minibatch
|
||||||
batches = minibatch(train_data, size=compounding(4., 32., 1.001))
|
batches = minibatch(train_data, size=compounding(4.0, 32.0, 1.001))
|
||||||
for batch in batches:
|
for batch in batches:
|
||||||
texts, annotations = zip(*batch)
|
texts, annotations = zip(*batch)
|
||||||
nlp.update(texts, annotations, sgd=optimizer, drop=0.2,
|
nlp.update(texts, annotations, sgd=optimizer, drop=0.2, losses=losses)
|
||||||
losses=losses)
|
|
||||||
with textcat.model.use_params(optimizer.averages):
|
with textcat.model.use_params(optimizer.averages):
|
||||||
# evaluate on the dev data split off in load_data()
|
# evaluate on the dev data split off in load_data()
|
||||||
scores = evaluate(nlp.tokenizer, textcat, dev_texts, dev_cats)
|
scores = evaluate(nlp.tokenizer, textcat, dev_texts, dev_cats)
|
||||||
print('{0:.3f}\t{1:.3f}\t{2:.3f}\t{3:.3f}' # print a simple table
|
print(
|
||||||
.format(losses['textcat'], scores['textcat_p'],
|
"{0:.3f}\t{1:.3f}\t{2:.3f}\t{3:.3f}".format( # print a simple table
|
||||||
scores['textcat_r'], scores['textcat_f']))
|
losses["textcat"],
|
||||||
|
scores["textcat_p"],
|
||||||
|
scores["textcat_r"],
|
||||||
|
scores["textcat_f"],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# test the trained model
|
# test the trained model
|
||||||
test_text = "This movie sucked"
|
test_text = "This movie sucked"
|
||||||
|
@ -79,10 +94,8 @@ def main(model=None, output_dir=None, n_iter=20, n_texts=2000):
|
||||||
print(test_text, doc.cats)
|
print(test_text, doc.cats)
|
||||||
|
|
||||||
if output_dir is not None:
|
if output_dir is not None:
|
||||||
output_dir = Path(output_dir)
|
with nlp.use_params(optimizer.averages):
|
||||||
if not output_dir.exists():
|
nlp.to_disk(output_dir)
|
||||||
output_dir.mkdir()
|
|
||||||
nlp.to_disk(output_dir)
|
|
||||||
print("Saved model to", output_dir)
|
print("Saved model to", output_dir)
|
||||||
|
|
||||||
# test the saved model
|
# test the saved model
|
||||||
|
@ -99,35 +112,40 @@ def load_data(limit=0, split=0.8):
|
||||||
random.shuffle(train_data)
|
random.shuffle(train_data)
|
||||||
train_data = train_data[-limit:]
|
train_data = train_data[-limit:]
|
||||||
texts, labels = zip(*train_data)
|
texts, labels = zip(*train_data)
|
||||||
cats = [{'POSITIVE': bool(y)} for y in labels]
|
cats = [{"POSITIVE": bool(y), "NEGATIVE": not bool(y)} for y in labels]
|
||||||
split = int(len(train_data) * split)
|
split = int(len(train_data) * split)
|
||||||
return (texts[:split], cats[:split]), (texts[split:], cats[split:])
|
return (texts[:split], cats[:split]), (texts[split:], cats[split:])
|
||||||
|
|
||||||
|
|
||||||
def evaluate(tokenizer, textcat, texts, cats):
|
def evaluate(tokenizer, textcat, texts, cats):
|
||||||
docs = (tokenizer(text) for text in texts)
|
docs = (tokenizer(text) for text in texts)
|
||||||
tp = 1e-8 # True positives
|
tp = 0.0 # True positives
|
||||||
fp = 1e-8 # False positives
|
fp = 1e-8 # False positives
|
||||||
fn = 1e-8 # False negatives
|
fn = 1e-8 # False negatives
|
||||||
tn = 1e-8 # True negatives
|
tn = 0.0 # True negatives
|
||||||
for i, doc in enumerate(textcat.pipe(docs)):
|
for i, doc in enumerate(textcat.pipe(docs)):
|
||||||
gold = cats[i]
|
gold = cats[i]
|
||||||
for label, score in doc.cats.items():
|
for label, score in doc.cats.items():
|
||||||
if label not in gold:
|
if label not in gold:
|
||||||
continue
|
continue
|
||||||
|
if label == "NEGATIVE":
|
||||||
|
continue
|
||||||
if score >= 0.5 and gold[label] >= 0.5:
|
if score >= 0.5 and gold[label] >= 0.5:
|
||||||
tp += 1.
|
tp += 1.0
|
||||||
elif score >= 0.5 and gold[label] < 0.5:
|
elif score >= 0.5 and gold[label] < 0.5:
|
||||||
fp += 1.
|
fp += 1.0
|
||||||
elif score < 0.5 and gold[label] < 0.5:
|
elif score < 0.5 and gold[label] < 0.5:
|
||||||
tn += 1
|
tn += 1
|
||||||
elif score < 0.5 and gold[label] >= 0.5:
|
elif score < 0.5 and gold[label] >= 0.5:
|
||||||
fn += 1
|
fn += 1
|
||||||
precision = tp / (tp + fp)
|
precision = tp / (tp + fp)
|
||||||
recall = tp / (tp + fn)
|
recall = tp / (tp + fn)
|
||||||
f_score = 2 * (precision * recall) / (precision + recall)
|
if (precision+recall) == 0:
|
||||||
return {'textcat_p': precision, 'textcat_r': recall, 'textcat_f': f_score}
|
f_score = 0.0
|
||||||
|
else:
|
||||||
|
f_score = 2 * (precision * recall) / (precision + recall)
|
||||||
|
return {"textcat_p": precision, "textcat_r": recall, "textcat_f": f_score}
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
plac.call(main)
|
plac.call(main)
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"id": "wsj_0200",
|
"id": 42,
|
||||||
"paragraphs": [
|
"paragraphs": [
|
||||||
{
|
{
|
||||||
"raw": "In an Oct. 19 review of \"The Misanthrope\" at Chicago's Goodman Theatre (\"Revitalized Classics Take the Stage in Windy City,\" Leisure & Arts), the role of Celimene, played by Kim Cattrall, was mistakenly attributed to Christina Haag. Ms. Haag plays Elianti.",
|
"raw": "In an Oct. 19 review of \"The Misanthrope\" at Chicago's Goodman Theatre (\"Revitalized Classics Take the Stage in Windy City,\" Leisure & Arts), the role of Celimene, played by Kim Cattrall, was mistakenly attributed to Christina Haag. Ms. Haag plays Elianti.",
|
||||||
|
|
|
@ -14,8 +14,13 @@ from spacy.language import Language
|
||||||
|
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
vectors_loc=("Path to .vec file", "positional", None, str),
|
vectors_loc=("Path to .vec file", "positional", None, str),
|
||||||
lang=("Optional language ID. If not set, blank Language() will be used.",
|
lang=(
|
||||||
"positional", None, str))
|
"Optional language ID. If not set, blank Language() will be used.",
|
||||||
|
"positional",
|
||||||
|
None,
|
||||||
|
str,
|
||||||
|
),
|
||||||
|
)
|
||||||
def main(vectors_loc, lang=None):
|
def main(vectors_loc, lang=None):
|
||||||
if lang is None:
|
if lang is None:
|
||||||
nlp = Language()
|
nlp = Language()
|
||||||
|
@ -24,21 +29,21 @@ def main(vectors_loc, lang=None):
|
||||||
# save the model to disk and load it back later (models always need a
|
# save the model to disk and load it back later (models always need a
|
||||||
# "lang" setting). Use 'xx' for blank multi-language class.
|
# "lang" setting). Use 'xx' for blank multi-language class.
|
||||||
nlp = spacy.blank(lang)
|
nlp = spacy.blank(lang)
|
||||||
with open(vectors_loc, 'rb') as file_:
|
with open(vectors_loc, "rb") as file_:
|
||||||
header = file_.readline()
|
header = file_.readline()
|
||||||
nr_row, nr_dim = header.split()
|
nr_row, nr_dim = header.split()
|
||||||
nlp.vocab.reset_vectors(width=int(nr_dim))
|
nlp.vocab.reset_vectors(width=int(nr_dim))
|
||||||
for line in file_:
|
for line in file_:
|
||||||
line = line.rstrip().decode('utf8')
|
line = line.rstrip().decode("utf8")
|
||||||
pieces = line.rsplit(' ', int(nr_dim))
|
pieces = line.rsplit(" ", int(nr_dim))
|
||||||
word = pieces[0]
|
word = pieces[0]
|
||||||
vector = numpy.asarray([float(v) for v in pieces[1:]], dtype='f')
|
vector = numpy.asarray([float(v) for v in pieces[1:]], dtype="f")
|
||||||
nlp.vocab.set_vector(word, vector) # add the vectors to the vocab
|
nlp.vocab.set_vector(word, vector) # add the vectors to the vocab
|
||||||
# test the vectors and similarity
|
# test the vectors and similarity
|
||||||
text = 'class colspan'
|
text = "class colspan"
|
||||||
doc = nlp(text)
|
doc = nlp(text)
|
||||||
print(text, doc[0].similarity(doc[1]))
|
print(text, doc[0].similarity(doc[1]))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
plac.call(main)
|
plac.call(main)
|
||||||
|
|
|
@ -14,26 +14,45 @@ import plac
|
||||||
import spacy
|
import spacy
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import tqdm
|
import tqdm
|
||||||
from tensorflow.contrib.tensorboard.plugins.projector import visualize_embeddings, ProjectorConfig
|
from tensorflow.contrib.tensorboard.plugins.projector import (
|
||||||
|
visualize_embeddings,
|
||||||
|
ProjectorConfig,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
vectors_loc=("Path to spaCy model that contains vectors", "positional", None, str),
|
vectors_loc=("Path to spaCy model that contains vectors", "positional", None, str),
|
||||||
out_loc=("Path to output folder for tensorboard session data", "positional", None, str),
|
out_loc=(
|
||||||
name=("Human readable name for tsv file and vectors tensor", "positional", None, str),
|
"Path to output folder for tensorboard session data",
|
||||||
|
"positional",
|
||||||
|
None,
|
||||||
|
str,
|
||||||
|
),
|
||||||
|
name=(
|
||||||
|
"Human readable name for tsv file and vectors tensor",
|
||||||
|
"positional",
|
||||||
|
None,
|
||||||
|
str,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
def main(vectors_loc, out_loc, name="spaCy_vectors"):
|
def main(vectors_loc, out_loc, name="spaCy_vectors"):
|
||||||
meta_file = "{}.tsv".format(name)
|
meta_file = "{}.tsv".format(name)
|
||||||
out_meta_file = path.join(out_loc, meta_file)
|
out_meta_file = path.join(out_loc, meta_file)
|
||||||
|
|
||||||
print('Loading spaCy vectors model: {}'.format(vectors_loc))
|
print("Loading spaCy vectors model: {}".format(vectors_loc))
|
||||||
model = spacy.load(vectors_loc)
|
model = spacy.load(vectors_loc)
|
||||||
print('Finding lexemes with vectors attached: {}'.format(vectors_loc))
|
print("Finding lexemes with vectors attached: {}".format(vectors_loc))
|
||||||
strings_stream = tqdm.tqdm(model.vocab.strings, total=len(model.vocab.strings), leave=False)
|
strings_stream = tqdm.tqdm(
|
||||||
|
model.vocab.strings, total=len(model.vocab.strings), leave=False
|
||||||
|
)
|
||||||
queries = [w for w in strings_stream if model.vocab.has_vector(w)]
|
queries = [w for w in strings_stream if model.vocab.has_vector(w)]
|
||||||
vector_count = len(queries)
|
vector_count = len(queries)
|
||||||
|
|
||||||
print('Building Tensorboard Projector metadata for ({}) vectors: {}'.format(vector_count, out_meta_file))
|
print(
|
||||||
|
"Building Tensorboard Projector metadata for ({}) vectors: {}".format(
|
||||||
|
vector_count, out_meta_file
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# Store vector data in a tensorflow variable
|
# Store vector data in a tensorflow variable
|
||||||
tf_vectors_variable = numpy.zeros((vector_count, model.vocab.vectors.shape[1]))
|
tf_vectors_variable = numpy.zeros((vector_count, model.vocab.vectors.shape[1]))
|
||||||
|
@ -41,22 +60,26 @@ def main(vectors_loc, out_loc, name="spaCy_vectors"):
|
||||||
# Write a tab-separated file that contains information about the vectors for visualization
|
# Write a tab-separated file that contains information about the vectors for visualization
|
||||||
#
|
#
|
||||||
# Reference: https://www.tensorflow.org/programmers_guide/embedding#metadata
|
# Reference: https://www.tensorflow.org/programmers_guide/embedding#metadata
|
||||||
with open(out_meta_file, 'wb') as file_metadata:
|
with open(out_meta_file, "wb") as file_metadata:
|
||||||
# Define columns in the first row
|
# Define columns in the first row
|
||||||
file_metadata.write("Text\tFrequency\n".encode('utf-8'))
|
file_metadata.write("Text\tFrequency\n".encode("utf-8"))
|
||||||
# Write out a row for each vector that we add to the tensorflow variable we created
|
# Write out a row for each vector that we add to the tensorflow variable we created
|
||||||
vec_index = 0
|
vec_index = 0
|
||||||
for text in tqdm.tqdm(queries, total=len(queries), leave=False):
|
for text in tqdm.tqdm(queries, total=len(queries), leave=False):
|
||||||
# https://github.com/tensorflow/tensorflow/issues/9094
|
# https://github.com/tensorflow/tensorflow/issues/9094
|
||||||
text = '<Space>' if text.lstrip() == '' else text
|
text = "<Space>" if text.lstrip() == "" else text
|
||||||
lex = model.vocab[text]
|
lex = model.vocab[text]
|
||||||
|
|
||||||
# Store vector data and metadata
|
# Store vector data and metadata
|
||||||
tf_vectors_variable[vec_index] = model.vocab.get_vector(text)
|
tf_vectors_variable[vec_index] = model.vocab.get_vector(text)
|
||||||
file_metadata.write("{}\t{}\n".format(text, math.exp(lex.prob) * vector_count).encode('utf-8'))
|
file_metadata.write(
|
||||||
|
"{}\t{}\n".format(text, math.exp(lex.prob) * vector_count).encode(
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
)
|
||||||
vec_index += 1
|
vec_index += 1
|
||||||
|
|
||||||
print('Running Tensorflow Session...')
|
print("Running Tensorflow Session...")
|
||||||
sess = tf.InteractiveSession()
|
sess = tf.InteractiveSession()
|
||||||
tf.Variable(tf_vectors_variable, trainable=False, name=name)
|
tf.Variable(tf_vectors_variable, trainable=False, name=name)
|
||||||
tf.global_variables_initializer().run()
|
tf.global_variables_initializer().run()
|
||||||
|
@ -73,10 +96,10 @@ def main(vectors_loc, out_loc, name="spaCy_vectors"):
|
||||||
visualize_embeddings(writer, config)
|
visualize_embeddings(writer, config)
|
||||||
|
|
||||||
# Save session and print run command to the output
|
# Save session and print run command to the output
|
||||||
print('Saving Tensorboard Session...')
|
print("Saving Tensorboard Session...")
|
||||||
saver.save(sess, path.join(out_loc, '{}.ckpt'.format(name)))
|
saver.save(sess, path.join(out_loc, "{}.ckpt".format(name)))
|
||||||
print('Done. Run `tensorboard --logdir={0}` to view in Tensorboard'.format(out_loc))
|
print("Done. Run `tensorboard --logdir={0}` to view in Tensorboard".format(out_loc))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
plac.call(main)
|
plac.call(main)
|
||||||
|
|
1
fabfile.py
vendored
1
fabfile.py
vendored
|
@ -59,6 +59,7 @@ def make():
|
||||||
def sdist():
|
def sdist():
|
||||||
with virtualenv(VENV_DIR) as venv_local:
|
with virtualenv(VENV_DIR) as venv_local:
|
||||||
with lcd(path.dirname(__file__)):
|
with lcd(path.dirname(__file__)):
|
||||||
|
local('python -m pip install -U setuptools')
|
||||||
local('python setup.py sdist')
|
local('python setup.py sdist')
|
||||||
|
|
||||||
def wheel():
|
def wheel():
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user