mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
Revert #4334
This commit is contained in:
parent
8489ec08ba
commit
3d8fd4b461
|
@ -14,7 +14,7 @@ install:
|
||||||
script:
|
script:
|
||||||
- "cat /proc/cpuinfo | grep flags | head -n 1"
|
- "cat /proc/cpuinfo | grep flags | head -n 1"
|
||||||
- "pip install pytest pytest-timeout"
|
- "pip install pytest pytest-timeout"
|
||||||
- "python -m pytest tests --tb=native spacy"
|
- "python -m pytest --tb=native spacy"
|
||||||
branches:
|
branches:
|
||||||
except:
|
except:
|
||||||
- spacy.io
|
- spacy.io
|
||||||
|
|
|
@ -175,12 +175,12 @@ The description text can be very short – we don't want to make this too
|
||||||
bureaucratic.
|
bureaucratic.
|
||||||
|
|
||||||
Next, create a test file named `test_issue[ISSUE NUMBER].py` in the
|
Next, create a test file named `test_issue[ISSUE NUMBER].py` in the
|
||||||
[`tests/regression`](tests/regression) folder. Test for the bug
|
[`spacy/tests/regression`](spacy/tests/regression) folder. Test for the bug
|
||||||
you're fixing, and make sure the test fails. Next, add and commit your test file
|
you're fixing, and make sure the test fails. Next, add and commit your test file
|
||||||
referencing the issue number in the commit message. Finally, fix the bug, make
|
referencing the issue number in the commit message. Finally, fix the bug, make
|
||||||
sure your test passes and reference the issue in your commit message.
|
sure your test passes and reference the issue in your commit message.
|
||||||
|
|
||||||
📖 **For more information on how to add tests, check out the [tests README](tests/README.md).**
|
📖 **For more information on how to add tests, check out the [tests README](spacy/tests/README.md).**
|
||||||
|
|
||||||
## Code conventions
|
## Code conventions
|
||||||
|
|
||||||
|
@ -425,7 +425,7 @@ spaCy uses the [pytest](http://doc.pytest.org/) framework for testing. For more
|
||||||
info on this, see the [pytest documentation](http://docs.pytest.org/en/latest/contents.html).
|
info on this, see the [pytest documentation](http://docs.pytest.org/en/latest/contents.html).
|
||||||
Tests for spaCy modules and classes live in their own directories of the same
|
Tests for spaCy modules and classes live in their own directories of the same
|
||||||
name. For example, tests for the `Tokenizer` can be found in
|
name. For example, tests for the `Tokenizer` can be found in
|
||||||
[`tests/tokenizer`](tests/tokenizer). To be interpreted and run,
|
[`/spacy/tests/tokenizer`](spacy/tests/tokenizer). To be interpreted and run,
|
||||||
all test files and test functions need to be prefixed with `test_`.
|
all test files and test functions need to be prefixed with `test_`.
|
||||||
|
|
||||||
When adding tests, make sure to use descriptive names, keep the code short and
|
When adding tests, make sure to use descriptive names, keep the code short and
|
||||||
|
@ -440,7 +440,7 @@ you're not actually testing the model performance. If all you need is a `Doc`
|
||||||
object with annotations like heads, POS tags or the dependency parse, you can
|
object with annotations like heads, POS tags or the dependency parse, you can
|
||||||
use the `get_doc()` utility function to construct it manually.
|
use the `get_doc()` utility function to construct it manually.
|
||||||
|
|
||||||
📖 **For more guidelines and information on how to add tests, check out the [tests README](tests/README.md).**
|
📖 **For more guidelines and information on how to add tests, check out the [tests README](spacy/tests/README.md).**
|
||||||
|
|
||||||
## Updating the website
|
## Updating the website
|
||||||
|
|
||||||
|
|
|
@ -270,7 +270,7 @@ VS 2010 (Python 3.4) and VS 2015 (Python 3.5).
|
||||||
|
|
||||||
## Run tests
|
## Run tests
|
||||||
|
|
||||||
spaCy comes with an [extensive test suite](tests). In order to run the
|
spaCy comes with an [extensive test suite](spacy/tests). In order to run the
|
||||||
tests, you'll usually want to clone the repository and build spaCy from source.
|
tests, you'll usually want to clone the repository and build spaCy from source.
|
||||||
This will also install the required development dependencies and test utilities
|
This will also install the required development dependencies and test utilities
|
||||||
defined in the `requirements.txt`.
|
defined in the `requirements.txt`.
|
||||||
|
|
|
@ -98,5 +98,5 @@ jobs:
|
||||||
pip install dist/$SDIST
|
pip install dist/$SDIST
|
||||||
displayName: 'Install from sdist'
|
displayName: 'Install from sdist'
|
||||||
|
|
||||||
- script: python -m pytest tests
|
- script: python -m pytest spacy/tests
|
||||||
displayName: 'Run tests'
|
displayName: 'Run tests'
|
||||||
|
|
124
fabfile.py
vendored
124
fabfile.py
vendored
|
@ -3,152 +3,120 @@ from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
import contextlib
|
import contextlib
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from fabric.api import local, lcd, env
|
from fabric.api import local, lcd, env, settings, prefix
|
||||||
from os import path, environ
|
from os import path, environ
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
PWD = path.dirname(__file__)
|
PWD = path.dirname(__file__)
|
||||||
ENV = environ["VENV_DIR"] if "VENV_DIR" in environ else ".env"
|
ENV = environ['VENV_DIR'] if 'VENV_DIR' in environ else '.env'
|
||||||
VENV_DIR = Path(PWD) / ENV
|
VENV_DIR = Path(PWD) / ENV
|
||||||
|
|
||||||
|
|
||||||
@contextlib.contextmanager
|
@contextlib.contextmanager
|
||||||
def virtualenv(name, create=False, python="/usr/bin/python3.6"):
|
def virtualenv(name, create=False, python='/usr/bin/python3.6'):
|
||||||
python = Path(python).resolve()
|
python = Path(python).resolve()
|
||||||
env_path = VENV_DIR
|
env_path = VENV_DIR
|
||||||
if create:
|
if create:
|
||||||
if env_path.exists():
|
if env_path.exists():
|
||||||
shutil.rmtree(str(env_path))
|
shutil.rmtree(str(env_path))
|
||||||
local("{python} -m venv {env_path}".format(python=python, env_path=VENV_DIR))
|
local('{python} -m venv {env_path}'.format(python=python, env_path=VENV_DIR))
|
||||||
|
|
||||||
def wrapped_local(cmd, env_vars=[], capture=False, direct=False):
|
def wrapped_local(cmd, env_vars=[], capture=False, direct=False):
|
||||||
return local(
|
return local('source {}/bin/activate && {}'.format(env_path, cmd),
|
||||||
"source {}/bin/activate && {}".format(env_path, cmd),
|
shell='/bin/bash', capture=False)
|
||||||
shell="/bin/bash",
|
|
||||||
capture=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
yield wrapped_local
|
yield wrapped_local
|
||||||
|
|
||||||
|
|
||||||
def env(lang="python3.6"):
|
def env(lang='python3.6'):
|
||||||
if VENV_DIR.exists():
|
if VENV_DIR.exists():
|
||||||
local("rm -rf {env}".format(env=VENV_DIR))
|
local('rm -rf {env}'.format(env=VENV_DIR))
|
||||||
if lang.startswith("python3"):
|
if lang.startswith('python3'):
|
||||||
local("{lang} -m venv {env}".format(lang=lang, env=VENV_DIR))
|
local('{lang} -m venv {env}'.format(lang=lang, env=VENV_DIR))
|
||||||
else:
|
else:
|
||||||
local("{lang} -m pip install virtualenv --no-cache-dir".format(lang=lang))
|
local('{lang} -m pip install virtualenv --no-cache-dir'.format(lang=lang))
|
||||||
local(
|
local('{lang} -m virtualenv {env} --no-cache-dir'.format(lang=lang, env=VENV_DIR))
|
||||||
"{lang} -m virtualenv {env} --no-cache-dir".format(lang=lang, env=VENV_DIR)
|
|
||||||
)
|
|
||||||
with virtualenv(VENV_DIR) as venv_local:
|
with virtualenv(VENV_DIR) as venv_local:
|
||||||
print(venv_local("python --version", capture=True))
|
print(venv_local('python --version', capture=True))
|
||||||
venv_local("pip install --upgrade setuptools --no-cache-dir")
|
venv_local('pip install --upgrade setuptools --no-cache-dir')
|
||||||
venv_local("pip install pytest --no-cache-dir")
|
venv_local('pip install pytest --no-cache-dir')
|
||||||
venv_local("pip install wheel --no-cache-dir")
|
venv_local('pip install wheel --no-cache-dir')
|
||||||
venv_local("pip install -r requirements.txt --no-cache-dir")
|
venv_local('pip install -r requirements.txt --no-cache-dir')
|
||||||
venv_local("pip install pex --no-cache-dir")
|
venv_local('pip install pex --no-cache-dir')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def install():
|
def install():
|
||||||
with virtualenv(VENV_DIR) as venv_local:
|
with virtualenv(VENV_DIR) as venv_local:
|
||||||
venv_local("pip install dist/*.tar.gz")
|
venv_local('pip install dist/*.tar.gz')
|
||||||
|
|
||||||
|
|
||||||
def make():
|
def make():
|
||||||
with lcd(path.dirname(__file__)):
|
with lcd(path.dirname(__file__)):
|
||||||
local(
|
local('export PYTHONPATH=`pwd` && source .env/bin/activate && python setup.py build_ext --inplace',
|
||||||
"export PYTHONPATH=`pwd` && source .env/bin/activate && python setup.py build_ext --inplace",
|
shell='/bin/bash')
|
||||||
shell="/bin/bash",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def sdist():
|
def sdist():
|
||||||
with virtualenv(VENV_DIR) as venv_local:
|
with virtualenv(VENV_DIR) as venv_local:
|
||||||
with lcd(path.dirname(__file__)):
|
with lcd(path.dirname(__file__)):
|
||||||
local("python -m pip install -U setuptools srsly")
|
local('python -m pip install -U setuptools srsly')
|
||||||
local("python setup.py sdist")
|
local('python setup.py sdist')
|
||||||
|
|
||||||
|
|
||||||
def wheel():
|
def wheel():
|
||||||
with virtualenv(VENV_DIR) as venv_local:
|
with virtualenv(VENV_DIR) as venv_local:
|
||||||
with lcd(path.dirname(__file__)):
|
with lcd(path.dirname(__file__)):
|
||||||
venv_local("python setup.py bdist_wheel")
|
venv_local('python setup.py bdist_wheel')
|
||||||
|
|
||||||
|
|
||||||
def pex():
|
def pex():
|
||||||
with virtualenv(VENV_DIR) as venv_local:
|
with virtualenv(VENV_DIR) as venv_local:
|
||||||
with lcd(path.dirname(__file__)):
|
with lcd(path.dirname(__file__)):
|
||||||
sha = local("git rev-parse --short HEAD", capture=True)
|
sha = local('git rev-parse --short HEAD', capture=True)
|
||||||
venv_local(
|
venv_local('pex dist/*.whl -e spacy -o dist/spacy-%s.pex' % sha,
|
||||||
"pex dist/*.whl -e spacy -o dist/spacy-%s.pex" % sha, direct=True
|
direct=True)
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def clean():
|
def clean():
|
||||||
with lcd(path.dirname(__file__)):
|
with lcd(path.dirname(__file__)):
|
||||||
local("rm -f dist/*.whl")
|
local('rm -f dist/*.whl')
|
||||||
local("rm -f dist/*.pex")
|
local('rm -f dist/*.pex')
|
||||||
with virtualenv(VENV_DIR) as venv_local:
|
with virtualenv(VENV_DIR) as venv_local:
|
||||||
venv_local("python setup.py clean --all")
|
venv_local('python setup.py clean --all')
|
||||||
|
|
||||||
|
|
||||||
def test():
|
def test():
|
||||||
with virtualenv(VENV_DIR) as venv_local:
|
with virtualenv(VENV_DIR) as venv_local:
|
||||||
with lcd(path.dirname(__file__)):
|
with lcd(path.dirname(__file__)):
|
||||||
venv_local("pytest -x tests")
|
venv_local('pytest -x spacy/tests')
|
||||||
|
|
||||||
|
|
||||||
def train():
|
def train():
|
||||||
args = environ.get("SPACY_TRAIN_ARGS", "")
|
args = environ.get('SPACY_TRAIN_ARGS', '')
|
||||||
with virtualenv(VENV_DIR) as venv_local:
|
with virtualenv(VENV_DIR) as venv_local:
|
||||||
venv_local("spacy train {args}".format(args=args))
|
venv_local('spacy train {args}'.format(args=args))
|
||||||
|
|
||||||
|
|
||||||
def conll17(treebank_dir, experiment_dir, vectors_dir, config, corpus=""):
|
def conll17(treebank_dir, experiment_dir, vectors_dir, config, corpus=''):
|
||||||
is_not_clean = local("git status --porcelain", capture=True)
|
is_not_clean = local('git status --porcelain', capture=True)
|
||||||
if is_not_clean:
|
if is_not_clean:
|
||||||
print("Repository is not clean")
|
print("Repository is not clean")
|
||||||
print(is_not_clean)
|
print(is_not_clean)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
git_sha = local("git rev-parse --short HEAD", capture=True)
|
git_sha = local('git rev-parse --short HEAD', capture=True)
|
||||||
config_checksum = local("sha256sum {config}".format(config=config), capture=True)
|
config_checksum = local('sha256sum {config}'.format(config=config), capture=True)
|
||||||
experiment_dir = Path(experiment_dir) / "{}--{}".format(
|
experiment_dir = Path(experiment_dir) / '{}--{}'.format(config_checksum[:6], git_sha)
|
||||||
config_checksum[:6], git_sha
|
|
||||||
)
|
|
||||||
if not experiment_dir.exists():
|
if not experiment_dir.exists():
|
||||||
experiment_dir.mkdir()
|
experiment_dir.mkdir()
|
||||||
test_data_dir = Path(treebank_dir) / "ud-test-v2.0-conll2017"
|
test_data_dir = Path(treebank_dir) / 'ud-test-v2.0-conll2017'
|
||||||
assert test_data_dir.exists()
|
assert test_data_dir.exists()
|
||||||
assert test_data_dir.is_dir()
|
assert test_data_dir.is_dir()
|
||||||
if corpus:
|
if corpus:
|
||||||
corpora = [corpus]
|
corpora = [corpus]
|
||||||
else:
|
else:
|
||||||
corpora = ["UD_English", "UD_Chinese", "UD_Japanese", "UD_Vietnamese"]
|
corpora = ['UD_English', 'UD_Chinese', 'UD_Japanese', 'UD_Vietnamese']
|
||||||
|
|
||||||
local(
|
local('cp {config} {experiment_dir}/config.json'.format(config=config, experiment_dir=experiment_dir))
|
||||||
"cp {config} {experiment_dir}/config.json".format(
|
|
||||||
config=config, experiment_dir=experiment_dir
|
|
||||||
)
|
|
||||||
)
|
|
||||||
with virtualenv(VENV_DIR) as venv_local:
|
with virtualenv(VENV_DIR) as venv_local:
|
||||||
for corpus in corpora:
|
for corpus in corpora:
|
||||||
venv_local(
|
venv_local('spacy ud-train {treebank_dir} {experiment_dir} {config} {corpus} -v {vectors_dir}'.format(
|
||||||
"spacy ud-train {treebank_dir} {experiment_dir} {config} {corpus} -v {vectors_dir}".format(
|
treebank_dir=treebank_dir, experiment_dir=experiment_dir, config=config, corpus=corpus, vectors_dir=vectors_dir))
|
||||||
treebank_dir=treebank_dir,
|
venv_local('spacy ud-run-test {test_data_dir} {experiment_dir} {corpus}'.format(
|
||||||
experiment_dir=experiment_dir,
|
test_data_dir=test_data_dir, experiment_dir=experiment_dir, config=config, corpus=corpus))
|
||||||
config=config,
|
|
||||||
corpus=corpus,
|
|
||||||
vectors_dir=vectors_dir,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
venv_local(
|
|
||||||
"spacy ud-run-test {test_data_dir} {experiment_dir} {corpus}".format(
|
|
||||||
test_data_dir=test_data_dir,
|
|
||||||
experiment_dir=experiment_dir,
|
|
||||||
config=config,
|
|
||||||
corpus=corpus,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
|
@ -17,6 +17,7 @@ Tests for spaCy modules and classes live in their own directories of the same na
|
||||||
5. [Helpers and utilities](#helpers-and-utilities)
|
5. [Helpers and utilities](#helpers-and-utilities)
|
||||||
6. [Contributing to the tests](#contributing-to-the-tests)
|
6. [Contributing to the tests](#contributing-to-the-tests)
|
||||||
|
|
||||||
|
|
||||||
## Running the tests
|
## Running the tests
|
||||||
|
|
||||||
To show print statements, run the tests with `py.test -s`. To abort after the
|
To show print statements, run the tests with `py.test -s`. To abort after the
|
||||||
|
@ -31,25 +32,26 @@ You can also run tests in a specific file or directory, or even only one
|
||||||
specific test:
|
specific test:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
py.test tests/tokenizer # run all tests in directory
|
py.test spacy/tests/tokenizer # run all tests in directory
|
||||||
py.test tests/tokenizer/test_exceptions.py # run all tests in file
|
py.test spacy/tests/tokenizer/test_exceptions.py # run all tests in file
|
||||||
py.test tests/tokenizer/test_exceptions.py::test_tokenizer_handles_emoji # run specific test
|
py.test spacy/tests/tokenizer/test_exceptions.py::test_tokenizer_handles_emoji # run specific test
|
||||||
```
|
```
|
||||||
|
|
||||||
## Dos and don'ts
|
## Dos and don'ts
|
||||||
|
|
||||||
To keep the behaviour of the tests consistent and predictable, we try to follow a few basic conventions:
|
To keep the behaviour of the tests consistent and predictable, we try to follow a few basic conventions:
|
||||||
|
|
||||||
- **Test names** should follow a pattern of `test_[module]_[tested behaviour]`. For example: `test_tokenizer_keeps_email` or `test_spans_override_sentiment`.
|
* **Test names** should follow a pattern of `test_[module]_[tested behaviour]`. For example: `test_tokenizer_keeps_email` or `test_spans_override_sentiment`.
|
||||||
- If you're testing for a bug reported in a specific issue, always create a **regression test**. Regression tests should be named `test_issue[ISSUE NUMBER]` and live in the [`regression`](regression) directory.
|
* If you're testing for a bug reported in a specific issue, always create a **regression test**. Regression tests should be named `test_issue[ISSUE NUMBER]` and live in the [`regression`](regression) directory.
|
||||||
- Only use `@pytest.mark.xfail` for tests that **should pass, but currently fail**. To test for desired negative behaviour, use `assert not` in your test.
|
* Only use `@pytest.mark.xfail` for tests that **should pass, but currently fail**. To test for desired negative behaviour, use `assert not` in your test.
|
||||||
- Very **extensive tests** that take a long time to run should be marked with `@pytest.mark.slow`. If your slow test is testing important behaviour, consider adding an additional simpler version.
|
* Very **extensive tests** that take a long time to run should be marked with `@pytest.mark.slow`. If your slow test is testing important behaviour, consider adding an additional simpler version.
|
||||||
- If tests require **loading the models**, they should be added to the [`spacy-models`](https://github.com/explosion/spacy-models) tests.
|
* If tests require **loading the models**, they should be added to the [`spacy-models`](https://github.com/explosion/spacy-models) tests.
|
||||||
- Before requiring the models, always make sure there is no other way to test the particular behaviour. In a lot of cases, it's sufficient to simply create a `Doc` object manually. See the section on [helpers and utility functions](#helpers-and-utilities) for more info on this.
|
* Before requiring the models, always make sure there is no other way to test the particular behaviour. In a lot of cases, it's sufficient to simply create a `Doc` object manually. See the section on [helpers and utility functions](#helpers-and-utilities) for more info on this.
|
||||||
- **Avoid unnecessary imports.** There should never be a need to explicitly import spaCy at the top of a file, and many components are available as [fixtures](#fixtures). You should also avoid wildcard imports (`from module import *`).
|
* **Avoid unnecessary imports.** There should never be a need to explicitly import spaCy at the top of a file, and many components are available as [fixtures](#fixtures). You should also avoid wildcard imports (`from module import *`).
|
||||||
- If you're importing from spaCy, **always use absolute imports**. For example: `from spacy.language import Language`.
|
* If you're importing from spaCy, **always use absolute imports**. For example: `from spacy.language import Language`.
|
||||||
- Don't forget the **unicode declarations** at the top of each file. This way, unicode strings won't have to be prefixed with `u`.
|
* Don't forget the **unicode declarations** at the top of each file. This way, unicode strings won't have to be prefixed with `u`.
|
||||||
- Try to keep the tests **readable and concise**. Use clear and descriptive variable names (`doc`, `tokens` and `text` are great), keep it short and only test for one behaviour at a time.
|
* Try to keep the tests **readable and concise**. Use clear and descriptive variable names (`doc`, `tokens` and `text` are great), keep it short and only test for one behaviour at a time.
|
||||||
|
|
||||||
|
|
||||||
## Parameters
|
## Parameters
|
||||||
|
|
||||||
|
@ -62,7 +64,7 @@ def test_tokenizer_keep_urls(tokenizer, text):
|
||||||
assert len(tokens) == 1
|
assert len(tokens) == 1
|
||||||
```
|
```
|
||||||
|
|
||||||
This will run the test once for each `text` value. Even if you're only testing one example, it's usually best to specify it as a parameter. This will later make it easier for others to quickly add additional test cases without having to modify the test.
|
This will run the test once for each `text` value. Even if you're only testing one example, it's usually best to specify it as a parameter. This will later make it easier for others to quickly add additional test cases without having to modify the test.
|
||||||
|
|
||||||
You can also specify parameters as tuples to test with multiple values per test:
|
You can also specify parameters as tuples to test with multiple values per test:
|
||||||
|
|
||||||
|
@ -79,17 +81,18 @@ To test for combinations of parameters, you can add several `parametrize` marker
|
||||||
|
|
||||||
This will run the test with all combinations of the two parameters `text` and `punct`. **Use this feature sparingly**, though, as it can easily cause unneccessary or undesired test bloat.
|
This will run the test with all combinations of the two parameters `text` and `punct`. **Use this feature sparingly**, though, as it can easily cause unneccessary or undesired test bloat.
|
||||||
|
|
||||||
|
|
||||||
## Fixtures
|
## Fixtures
|
||||||
|
|
||||||
Fixtures to create instances of spaCy objects and other components should only be defined once in the global [`conftest.py`](conftest.py). We avoid having per-directory conftest files, as this can easily lead to confusion.
|
Fixtures to create instances of spaCy objects and other components should only be defined once in the global [`conftest.py`](conftest.py). We avoid having per-directory conftest files, as this can easily lead to confusion.
|
||||||
|
|
||||||
These are the main fixtures that are currently available:
|
These are the main fixtures that are currently available:
|
||||||
|
|
||||||
| Fixture | Description |
|
| Fixture | Description |
|
||||||
| ----------------------------------- | ---------------------------------------------------------------------------- |
|
| --- | --- |
|
||||||
| `tokenizer` | Basic, language-independent tokenizer. Identical to the `xx` language class. |
|
| `tokenizer` | Basic, language-independent tokenizer. Identical to the `xx` language class. |
|
||||||
| `en_tokenizer`, `de_tokenizer`, ... | Creates an English, German etc. tokenizer. |
|
| `en_tokenizer`, `de_tokenizer`, ... | Creates an English, German etc. tokenizer. |
|
||||||
| `en_vocab` | Creates an instance of the English `Vocab`. |
|
| `en_vocab` | Creates an instance of the English `Vocab`. |
|
||||||
|
|
||||||
The fixtures can be used in all tests by simply setting them as an argument, like this:
|
The fixtures can be used in all tests by simply setting them as an argument, like this:
|
||||||
|
|
||||||
|
@ -104,6 +107,7 @@ If all tests in a file require a specific configuration, or use the same complex
|
||||||
|
|
||||||
Our new test setup comes with a few handy utility functions that can be imported from [`util.py`](util.py).
|
Our new test setup comes with a few handy utility functions that can be imported from [`util.py`](util.py).
|
||||||
|
|
||||||
|
|
||||||
### Constructing a `Doc` object manually with `get_doc()`
|
### Constructing a `Doc` object manually with `get_doc()`
|
||||||
|
|
||||||
Loading the models is expensive and not necessary if you're not actually testing the model performance. If all you need ia a `Doc` object with annotations like heads, POS tags or the dependency parse, you can use `get_doc()` to construct it manually.
|
Loading the models is expensive and not necessary if you're not actually testing the model performance. If all you need ia a `Doc` object with annotations like heads, POS tags or the dependency parse, you can use `get_doc()` to construct it manually.
|
||||||
|
@ -125,15 +129,15 @@ def test_doc_token_api_strings(en_tokenizer):
|
||||||
|
|
||||||
You can construct a `Doc` with the following arguments:
|
You can construct a `Doc` with the following arguments:
|
||||||
|
|
||||||
| Argument | Description |
|
| Argument | Description |
|
||||||
| -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| --- | --- |
|
||||||
| `vocab` | `Vocab` instance to use. If you're tokenizing before creating a `Doc`, make sure to use the tokenizer's vocab. Otherwise, you can also use the `en_vocab` fixture. **(required)** |
|
| `vocab` | `Vocab` instance to use. If you're tokenizing before creating a `Doc`, make sure to use the tokenizer's vocab. Otherwise, you can also use the `en_vocab` fixture. **(required)** |
|
||||||
| `words` | List of words, for example `[t.text for t in tokens]`. **(required)** |
|
| `words` | List of words, for example `[t.text for t in tokens]`. **(required)** |
|
||||||
| `heads` | List of heads as integers. |
|
| `heads` | List of heads as integers. |
|
||||||
| `pos` | List of POS tags as text values. |
|
| `pos` | List of POS tags as text values. |
|
||||||
| `tag` | List of tag names as text values. |
|
| `tag` | List of tag names as text values. |
|
||||||
| `dep` | List of dependencies as text values. |
|
| `dep` | List of dependencies as text values. |
|
||||||
| `ents` | List of entity tuples with `start`, `end`, `label` (for example `(0, 2, 'PERSON')`). The `label` will be looked up in `vocab.strings[label]`. |
|
| `ents` | List of entity tuples with `start`, `end`, `label` (for example `(0, 2, 'PERSON')`). The `label` will be looked up in `vocab.strings[label]`. |
|
||||||
|
|
||||||
Here's how to quickly get these values from within spaCy:
|
Here's how to quickly get these values from within spaCy:
|
||||||
|
|
||||||
|
@ -150,12 +154,12 @@ print([(ent.start, ent.end, ent.label_) for ent in doc.ents])
|
||||||
|
|
||||||
### Other utilities
|
### Other utilities
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| -------------------------------------------------- | ------------------------------------------------------------------------------------------------------------- |
|
| --- | --- |
|
||||||
| `apply_transition_sequence(parser, doc, sequence)` | Perform a series of pre-specified transitions, to put the parser in a desired state. |
|
| `apply_transition_sequence(parser, doc, sequence)` | Perform a series of pre-specified transitions, to put the parser in a desired state. |
|
||||||
| `add_vecs_to_vocab(vocab, vectors)` | Add list of vector tuples (`[("text", [1, 2, 3])]`) to given vocab. All vectors need to have the same length. |
|
| `add_vecs_to_vocab(vocab, vectors)` | Add list of vector tuples (`[("text", [1, 2, 3])]`) to given vocab. All vectors need to have the same length. |
|
||||||
| `get_cosine(vec1, vec2)` | Get cosine for two given vectors. |
|
| `get_cosine(vec1, vec2)` | Get cosine for two given vectors. |
|
||||||
| `assert_docs_equal(doc1, doc2)` | Compare two `Doc` objects and `assert` that they're equal. Tests for tokens, tags, dependencies and entities. |
|
| `assert_docs_equal(doc1, doc2)` | Compare two `Doc` objects and `assert` that they're equal. Tests for tokens, tags, dependencies and entities. |
|
||||||
|
|
||||||
## Contributing to the tests
|
## Contributing to the tests
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from spacy.pipeline import EntityRecognizer
|
from ...pipeline import EntityRecognizer
|
||||||
from spacy.tokens import Span
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from ..util import get_doc
|
from ..util import get_doc
|
||||||
|
from ...tokens import Span
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
def test_doc_add_entities_set_ents_iob(en_vocab):
|
def test_doc_add_entities_set_ents_iob(en_vocab):
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user