mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Revert #4334
This commit is contained in:
parent
8489ec08ba
commit
3d8fd4b461
|
@ -14,7 +14,7 @@ install:
|
|||
script:
|
||||
- "cat /proc/cpuinfo | grep flags | head -n 1"
|
||||
- "pip install pytest pytest-timeout"
|
||||
- "python -m pytest tests --tb=native spacy"
|
||||
- "python -m pytest --tb=native spacy"
|
||||
branches:
|
||||
except:
|
||||
- spacy.io
|
||||
|
|
|
@ -175,12 +175,12 @@ The description text can be very short – we don't want to make this too
|
|||
bureaucratic.
|
||||
|
||||
Next, create a test file named `test_issue[ISSUE NUMBER].py` in the
|
||||
[`tests/regression`](tests/regression) folder. Test for the bug
|
||||
[`spacy/tests/regression`](spacy/tests/regression) folder. Test for the bug
|
||||
you're fixing, and make sure the test fails. Next, add and commit your test file
|
||||
referencing the issue number in the commit message. Finally, fix the bug, make
|
||||
sure your test passes and reference the issue in your commit message.
|
||||
|
||||
📖 **For more information on how to add tests, check out the [tests README](tests/README.md).**
|
||||
📖 **For more information on how to add tests, check out the [tests README](spacy/tests/README.md).**
|
||||
|
||||
## Code conventions
|
||||
|
||||
|
@ -425,7 +425,7 @@ spaCy uses the [pytest](http://doc.pytest.org/) framework for testing. For more
|
|||
info on this, see the [pytest documentation](http://docs.pytest.org/en/latest/contents.html).
|
||||
Tests for spaCy modules and classes live in their own directories of the same
|
||||
name. For example, tests for the `Tokenizer` can be found in
|
||||
[`tests/tokenizer`](tests/tokenizer). To be interpreted and run,
|
||||
[`/spacy/tests/tokenizer`](spacy/tests/tokenizer). To be interpreted and run,
|
||||
all test files and test functions need to be prefixed with `test_`.
|
||||
|
||||
When adding tests, make sure to use descriptive names, keep the code short and
|
||||
|
@ -440,7 +440,7 @@ you're not actually testing the model performance. If all you need is a `Doc`
|
|||
object with annotations like heads, POS tags or the dependency parse, you can
|
||||
use the `get_doc()` utility function to construct it manually.
|
||||
|
||||
📖 **For more guidelines and information on how to add tests, check out the [tests README](tests/README.md).**
|
||||
📖 **For more guidelines and information on how to add tests, check out the [tests README](spacy/tests/README.md).**
|
||||
|
||||
## Updating the website
|
||||
|
||||
|
|
|
@ -270,7 +270,7 @@ VS 2010 (Python 3.4) and VS 2015 (Python 3.5).
|
|||
|
||||
## Run tests
|
||||
|
||||
spaCy comes with an [extensive test suite](tests). In order to run the
|
||||
spaCy comes with an [extensive test suite](spacy/tests). In order to run the
|
||||
tests, you'll usually want to clone the repository and build spaCy from source.
|
||||
This will also install the required development dependencies and test utilities
|
||||
defined in the `requirements.txt`.
|
||||
|
|
|
@ -98,5 +98,5 @@ jobs:
|
|||
pip install dist/$SDIST
|
||||
displayName: 'Install from sdist'
|
||||
|
||||
- script: python -m pytest tests
|
||||
- script: python -m pytest spacy/tests
|
||||
displayName: 'Run tests'
|
||||
|
|
124
fabfile.py
vendored
124
fabfile.py
vendored
|
@ -3,152 +3,120 @@ from __future__ import unicode_literals, print_function
|
|||
|
||||
import contextlib
|
||||
from pathlib import Path
|
||||
from fabric.api import local, lcd, env
|
||||
from fabric.api import local, lcd, env, settings, prefix
|
||||
from os import path, environ
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
|
||||
PWD = path.dirname(__file__)
|
||||
ENV = environ["VENV_DIR"] if "VENV_DIR" in environ else ".env"
|
||||
ENV = environ['VENV_DIR'] if 'VENV_DIR' in environ else '.env'
|
||||
VENV_DIR = Path(PWD) / ENV
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def virtualenv(name, create=False, python="/usr/bin/python3.6"):
|
||||
def virtualenv(name, create=False, python='/usr/bin/python3.6'):
|
||||
python = Path(python).resolve()
|
||||
env_path = VENV_DIR
|
||||
if create:
|
||||
if env_path.exists():
|
||||
shutil.rmtree(str(env_path))
|
||||
local("{python} -m venv {env_path}".format(python=python, env_path=VENV_DIR))
|
||||
|
||||
local('{python} -m venv {env_path}'.format(python=python, env_path=VENV_DIR))
|
||||
def wrapped_local(cmd, env_vars=[], capture=False, direct=False):
|
||||
return local(
|
||||
"source {}/bin/activate && {}".format(env_path, cmd),
|
||||
shell="/bin/bash",
|
||||
capture=False,
|
||||
)
|
||||
|
||||
return local('source {}/bin/activate && {}'.format(env_path, cmd),
|
||||
shell='/bin/bash', capture=False)
|
||||
yield wrapped_local
|
||||
|
||||
|
||||
def env(lang="python3.6"):
|
||||
def env(lang='python3.6'):
|
||||
if VENV_DIR.exists():
|
||||
local("rm -rf {env}".format(env=VENV_DIR))
|
||||
if lang.startswith("python3"):
|
||||
local("{lang} -m venv {env}".format(lang=lang, env=VENV_DIR))
|
||||
local('rm -rf {env}'.format(env=VENV_DIR))
|
||||
if lang.startswith('python3'):
|
||||
local('{lang} -m venv {env}'.format(lang=lang, env=VENV_DIR))
|
||||
else:
|
||||
local("{lang} -m pip install virtualenv --no-cache-dir".format(lang=lang))
|
||||
local(
|
||||
"{lang} -m virtualenv {env} --no-cache-dir".format(lang=lang, env=VENV_DIR)
|
||||
)
|
||||
local('{lang} -m pip install virtualenv --no-cache-dir'.format(lang=lang))
|
||||
local('{lang} -m virtualenv {env} --no-cache-dir'.format(lang=lang, env=VENV_DIR))
|
||||
with virtualenv(VENV_DIR) as venv_local:
|
||||
print(venv_local("python --version", capture=True))
|
||||
venv_local("pip install --upgrade setuptools --no-cache-dir")
|
||||
venv_local("pip install pytest --no-cache-dir")
|
||||
venv_local("pip install wheel --no-cache-dir")
|
||||
venv_local("pip install -r requirements.txt --no-cache-dir")
|
||||
venv_local("pip install pex --no-cache-dir")
|
||||
print(venv_local('python --version', capture=True))
|
||||
venv_local('pip install --upgrade setuptools --no-cache-dir')
|
||||
venv_local('pip install pytest --no-cache-dir')
|
||||
venv_local('pip install wheel --no-cache-dir')
|
||||
venv_local('pip install -r requirements.txt --no-cache-dir')
|
||||
venv_local('pip install pex --no-cache-dir')
|
||||
|
||||
|
||||
|
||||
def install():
|
||||
with virtualenv(VENV_DIR) as venv_local:
|
||||
venv_local("pip install dist/*.tar.gz")
|
||||
venv_local('pip install dist/*.tar.gz')
|
||||
|
||||
|
||||
def make():
|
||||
with lcd(path.dirname(__file__)):
|
||||
local(
|
||||
"export PYTHONPATH=`pwd` && source .env/bin/activate && python setup.py build_ext --inplace",
|
||||
shell="/bin/bash",
|
||||
)
|
||||
|
||||
local('export PYTHONPATH=`pwd` && source .env/bin/activate && python setup.py build_ext --inplace',
|
||||
shell='/bin/bash')
|
||||
|
||||
def sdist():
|
||||
with virtualenv(VENV_DIR) as venv_local:
|
||||
with lcd(path.dirname(__file__)):
|
||||
local("python -m pip install -U setuptools srsly")
|
||||
local("python setup.py sdist")
|
||||
|
||||
local('python -m pip install -U setuptools srsly')
|
||||
local('python setup.py sdist')
|
||||
|
||||
def wheel():
|
||||
with virtualenv(VENV_DIR) as venv_local:
|
||||
with lcd(path.dirname(__file__)):
|
||||
venv_local("python setup.py bdist_wheel")
|
||||
|
||||
venv_local('python setup.py bdist_wheel')
|
||||
|
||||
def pex():
|
||||
with virtualenv(VENV_DIR) as venv_local:
|
||||
with lcd(path.dirname(__file__)):
|
||||
sha = local("git rev-parse --short HEAD", capture=True)
|
||||
venv_local(
|
||||
"pex dist/*.whl -e spacy -o dist/spacy-%s.pex" % sha, direct=True
|
||||
)
|
||||
sha = local('git rev-parse --short HEAD', capture=True)
|
||||
venv_local('pex dist/*.whl -e spacy -o dist/spacy-%s.pex' % sha,
|
||||
direct=True)
|
||||
|
||||
|
||||
def clean():
|
||||
with lcd(path.dirname(__file__)):
|
||||
local("rm -f dist/*.whl")
|
||||
local("rm -f dist/*.pex")
|
||||
local('rm -f dist/*.whl')
|
||||
local('rm -f dist/*.pex')
|
||||
with virtualenv(VENV_DIR) as venv_local:
|
||||
venv_local("python setup.py clean --all")
|
||||
venv_local('python setup.py clean --all')
|
||||
|
||||
|
||||
def test():
|
||||
with virtualenv(VENV_DIR) as venv_local:
|
||||
with lcd(path.dirname(__file__)):
|
||||
venv_local("pytest -x tests")
|
||||
|
||||
venv_local('pytest -x spacy/tests')
|
||||
|
||||
def train():
|
||||
args = environ.get("SPACY_TRAIN_ARGS", "")
|
||||
args = environ.get('SPACY_TRAIN_ARGS', '')
|
||||
with virtualenv(VENV_DIR) as venv_local:
|
||||
venv_local("spacy train {args}".format(args=args))
|
||||
venv_local('spacy train {args}'.format(args=args))
|
||||
|
||||
|
||||
def conll17(treebank_dir, experiment_dir, vectors_dir, config, corpus=""):
|
||||
is_not_clean = local("git status --porcelain", capture=True)
|
||||
def conll17(treebank_dir, experiment_dir, vectors_dir, config, corpus=''):
|
||||
is_not_clean = local('git status --porcelain', capture=True)
|
||||
if is_not_clean:
|
||||
print("Repository is not clean")
|
||||
print(is_not_clean)
|
||||
sys.exit(1)
|
||||
git_sha = local("git rev-parse --short HEAD", capture=True)
|
||||
config_checksum = local("sha256sum {config}".format(config=config), capture=True)
|
||||
experiment_dir = Path(experiment_dir) / "{}--{}".format(
|
||||
config_checksum[:6], git_sha
|
||||
)
|
||||
git_sha = local('git rev-parse --short HEAD', capture=True)
|
||||
config_checksum = local('sha256sum {config}'.format(config=config), capture=True)
|
||||
experiment_dir = Path(experiment_dir) / '{}--{}'.format(config_checksum[:6], git_sha)
|
||||
if not experiment_dir.exists():
|
||||
experiment_dir.mkdir()
|
||||
test_data_dir = Path(treebank_dir) / "ud-test-v2.0-conll2017"
|
||||
test_data_dir = Path(treebank_dir) / 'ud-test-v2.0-conll2017'
|
||||
assert test_data_dir.exists()
|
||||
assert test_data_dir.is_dir()
|
||||
if corpus:
|
||||
corpora = [corpus]
|
||||
else:
|
||||
corpora = ["UD_English", "UD_Chinese", "UD_Japanese", "UD_Vietnamese"]
|
||||
corpora = ['UD_English', 'UD_Chinese', 'UD_Japanese', 'UD_Vietnamese']
|
||||
|
||||
local(
|
||||
"cp {config} {experiment_dir}/config.json".format(
|
||||
config=config, experiment_dir=experiment_dir
|
||||
)
|
||||
)
|
||||
local('cp {config} {experiment_dir}/config.json'.format(config=config, experiment_dir=experiment_dir))
|
||||
with virtualenv(VENV_DIR) as venv_local:
|
||||
for corpus in corpora:
|
||||
venv_local(
|
||||
"spacy ud-train {treebank_dir} {experiment_dir} {config} {corpus} -v {vectors_dir}".format(
|
||||
treebank_dir=treebank_dir,
|
||||
experiment_dir=experiment_dir,
|
||||
config=config,
|
||||
corpus=corpus,
|
||||
vectors_dir=vectors_dir,
|
||||
)
|
||||
)
|
||||
venv_local(
|
||||
"spacy ud-run-test {test_data_dir} {experiment_dir} {corpus}".format(
|
||||
test_data_dir=test_data_dir,
|
||||
experiment_dir=experiment_dir,
|
||||
config=config,
|
||||
corpus=corpus,
|
||||
)
|
||||
)
|
||||
venv_local('spacy ud-train {treebank_dir} {experiment_dir} {config} {corpus} -v {vectors_dir}'.format(
|
||||
treebank_dir=treebank_dir, experiment_dir=experiment_dir, config=config, corpus=corpus, vectors_dir=vectors_dir))
|
||||
venv_local('spacy ud-run-test {test_data_dir} {experiment_dir} {corpus}'.format(
|
||||
test_data_dir=test_data_dir, experiment_dir=experiment_dir, config=config, corpus=corpus))
|
||||
|
|
|
@ -17,6 +17,7 @@ Tests for spaCy modules and classes live in their own directories of the same na
|
|||
5. [Helpers and utilities](#helpers-and-utilities)
|
||||
6. [Contributing to the tests](#contributing-to-the-tests)
|
||||
|
||||
|
||||
## Running the tests
|
||||
|
||||
To show print statements, run the tests with `py.test -s`. To abort after the
|
||||
|
@ -31,25 +32,26 @@ You can also run tests in a specific file or directory, or even only one
|
|||
specific test:
|
||||
|
||||
```bash
|
||||
py.test tests/tokenizer # run all tests in directory
|
||||
py.test tests/tokenizer/test_exceptions.py # run all tests in file
|
||||
py.test tests/tokenizer/test_exceptions.py::test_tokenizer_handles_emoji # run specific test
|
||||
py.test spacy/tests/tokenizer # run all tests in directory
|
||||
py.test spacy/tests/tokenizer/test_exceptions.py # run all tests in file
|
||||
py.test spacy/tests/tokenizer/test_exceptions.py::test_tokenizer_handles_emoji # run specific test
|
||||
```
|
||||
|
||||
## Dos and don'ts
|
||||
|
||||
To keep the behaviour of the tests consistent and predictable, we try to follow a few basic conventions:
|
||||
|
||||
- **Test names** should follow a pattern of `test_[module]_[tested behaviour]`. For example: `test_tokenizer_keeps_email` or `test_spans_override_sentiment`.
|
||||
- If you're testing for a bug reported in a specific issue, always create a **regression test**. Regression tests should be named `test_issue[ISSUE NUMBER]` and live in the [`regression`](regression) directory.
|
||||
- Only use `@pytest.mark.xfail` for tests that **should pass, but currently fail**. To test for desired negative behaviour, use `assert not` in your test.
|
||||
- Very **extensive tests** that take a long time to run should be marked with `@pytest.mark.slow`. If your slow test is testing important behaviour, consider adding an additional simpler version.
|
||||
- If tests require **loading the models**, they should be added to the [`spacy-models`](https://github.com/explosion/spacy-models) tests.
|
||||
- Before requiring the models, always make sure there is no other way to test the particular behaviour. In a lot of cases, it's sufficient to simply create a `Doc` object manually. See the section on [helpers and utility functions](#helpers-and-utilities) for more info on this.
|
||||
- **Avoid unnecessary imports.** There should never be a need to explicitly import spaCy at the top of a file, and many components are available as [fixtures](#fixtures). You should also avoid wildcard imports (`from module import *`).
|
||||
- If you're importing from spaCy, **always use absolute imports**. For example: `from spacy.language import Language`.
|
||||
- Don't forget the **unicode declarations** at the top of each file. This way, unicode strings won't have to be prefixed with `u`.
|
||||
- Try to keep the tests **readable and concise**. Use clear and descriptive variable names (`doc`, `tokens` and `text` are great), keep it short and only test for one behaviour at a time.
|
||||
* **Test names** should follow a pattern of `test_[module]_[tested behaviour]`. For example: `test_tokenizer_keeps_email` or `test_spans_override_sentiment`.
|
||||
* If you're testing for a bug reported in a specific issue, always create a **regression test**. Regression tests should be named `test_issue[ISSUE NUMBER]` and live in the [`regression`](regression) directory.
|
||||
* Only use `@pytest.mark.xfail` for tests that **should pass, but currently fail**. To test for desired negative behaviour, use `assert not` in your test.
|
||||
* Very **extensive tests** that take a long time to run should be marked with `@pytest.mark.slow`. If your slow test is testing important behaviour, consider adding an additional simpler version.
|
||||
* If tests require **loading the models**, they should be added to the [`spacy-models`](https://github.com/explosion/spacy-models) tests.
|
||||
* Before requiring the models, always make sure there is no other way to test the particular behaviour. In a lot of cases, it's sufficient to simply create a `Doc` object manually. See the section on [helpers and utility functions](#helpers-and-utilities) for more info on this.
|
||||
* **Avoid unnecessary imports.** There should never be a need to explicitly import spaCy at the top of a file, and many components are available as [fixtures](#fixtures). You should also avoid wildcard imports (`from module import *`).
|
||||
* If you're importing from spaCy, **always use absolute imports**. For example: `from spacy.language import Language`.
|
||||
* Don't forget the **unicode declarations** at the top of each file. This way, unicode strings won't have to be prefixed with `u`.
|
||||
* Try to keep the tests **readable and concise**. Use clear and descriptive variable names (`doc`, `tokens` and `text` are great), keep it short and only test for one behaviour at a time.
|
||||
|
||||
|
||||
## Parameters
|
||||
|
||||
|
@ -79,6 +81,7 @@ To test for combinations of parameters, you can add several `parametrize` marker
|
|||
|
||||
This will run the test with all combinations of the two parameters `text` and `punct`. **Use this feature sparingly**, though, as it can easily cause unneccessary or undesired test bloat.
|
||||
|
||||
|
||||
## Fixtures
|
||||
|
||||
Fixtures to create instances of spaCy objects and other components should only be defined once in the global [`conftest.py`](conftest.py). We avoid having per-directory conftest files, as this can easily lead to confusion.
|
||||
|
@ -86,7 +89,7 @@ Fixtures to create instances of spaCy objects and other components should only b
|
|||
These are the main fixtures that are currently available:
|
||||
|
||||
| Fixture | Description |
|
||||
| ----------------------------------- | ---------------------------------------------------------------------------- |
|
||||
| --- | --- |
|
||||
| `tokenizer` | Basic, language-independent tokenizer. Identical to the `xx` language class. |
|
||||
| `en_tokenizer`, `de_tokenizer`, ... | Creates an English, German etc. tokenizer. |
|
||||
| `en_vocab` | Creates an instance of the English `Vocab`. |
|
||||
|
@ -104,6 +107,7 @@ If all tests in a file require a specific configuration, or use the same complex
|
|||
|
||||
Our new test setup comes with a few handy utility functions that can be imported from [`util.py`](util.py).
|
||||
|
||||
|
||||
### Constructing a `Doc` object manually with `get_doc()`
|
||||
|
||||
Loading the models is expensive and not necessary if you're not actually testing the model performance. If all you need ia a `Doc` object with annotations like heads, POS tags or the dependency parse, you can use `get_doc()` to construct it manually.
|
||||
|
@ -126,7 +130,7 @@ def test_doc_token_api_strings(en_tokenizer):
|
|||
You can construct a `Doc` with the following arguments:
|
||||
|
||||
| Argument | Description |
|
||||
| -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| --- | --- |
|
||||
| `vocab` | `Vocab` instance to use. If you're tokenizing before creating a `Doc`, make sure to use the tokenizer's vocab. Otherwise, you can also use the `en_vocab` fixture. **(required)** |
|
||||
| `words` | List of words, for example `[t.text for t in tokens]`. **(required)** |
|
||||
| `heads` | List of heads as integers. |
|
||||
|
@ -151,7 +155,7 @@ print([(ent.start, ent.end, ent.label_) for ent in doc.ents])
|
|||
### Other utilities
|
||||
|
||||
| Name | Description |
|
||||
| -------------------------------------------------- | ------------------------------------------------------------------------------------------------------------- |
|
||||
| --- | --- |
|
||||
| `apply_transition_sequence(parser, doc, sequence)` | Perform a series of pre-specified transitions, to put the parser in a desired state. |
|
||||
| `add_vecs_to_vocab(vocab, vectors)` | Add list of vector tuples (`[("text", [1, 2, 3])]`) to given vocab. All vectors need to have the same length. |
|
||||
| `get_cosine(vec1, vec2)` | Get cosine for two given vectors. |
|
|
@ -1,11 +1,11 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from spacy.pipeline import EntityRecognizer
|
||||
from spacy.tokens import Span
|
||||
import pytest
|
||||
|
||||
from ...pipeline import EntityRecognizer
|
||||
from ..util import get_doc
|
||||
from ...tokens import Span
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_doc_add_entities_set_ents_iob(en_vocab):
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user