spaCy/spacy/tests/parser/test_add_label.py

# coding: utf8
from __future__ import unicode_literals

import pytest
import numpy.random
from thinc.neural.optimizers import Adam
from thinc.neural.ops import NumpyOps
from spacy.attrs import NORM
from spacy.gold import GoldParse
from spacy.vocab import Vocab
from spacy.tokens import Doc
from spacy.pipeline import DependencyParser


@pytest.fixture
def vocab():
    return Vocab(lex_attr_getters={NORM: lambda s: s})


@pytest.fixture
def parser(vocab):
    parser = DependencyParser(vocab)
    parser.cfg['token_vector_width'] = 8
    parser.cfg['hidden_width'] = 30
    parser.cfg['hist_size'] = 0
    parser.add_label('left')
    parser.begin_training([], **parser.cfg)
    sgd = Adam(NumpyOps(), 0.001)

    for i in range(10):
        losses = {}
        doc = Doc(vocab, words=['a', 'b', 'c', 'd'])
        gold = GoldParse(doc, heads=[1, 1, 3, 3],
                deps=['left', 'ROOT', 'left', 'ROOT'])
        parser.update([doc], [gold], sgd=sgd, losses=losses)
    return parser


def test_init_parser(parser):
    pass


# TODO: This is flakey, because it depends on what the parser first learns.
# TODO: This now seems to be implicated in segfaults. Not sure what's up!
@pytest.mark.skip
def test_add_label(parser):
    doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd'])
    doc = parser(doc)
    assert doc[0].head.i == 1
    assert doc[0].dep_ == 'left'
    assert doc[1].head.i == 1
    assert doc[2].head.i == 3
    assert doc[2].head.i == 3
    parser.add_label('right')
    doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd'])
    doc = parser(doc)
    assert doc[0].head.i == 1
    assert doc[0].dep_ == 'left'
    assert doc[1].head.i == 1
    assert doc[2].head.i == 3
    assert doc[2].head.i == 3
    sgd = Adam(NumpyOps(), 0.001)
    for i in range(10):
        losses = {}
        doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd'])
        gold = GoldParse(doc, heads=[1, 1, 3, 3],
                deps=['right', 'ROOT', 'left', 'ROOT'])
        parser.update([doc], [gold], sgd=sgd, losses=losses)
    doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd'])
    doc = parser(doc)
    assert doc[0].dep_ == 'right'
    assert doc[2].dep_ == 'left'
💫 Refactor test suite (#2568) ## Description Related issues: #2379 (should be fixed by separating model tests) * total execution time down from > 300 seconds to under 60 seconds 🎉 * removed all model-specific tests that could only really be run manually anyway – those will now live in a separate test suite in the [`spacy-models`](https://github.com/explosion/spacy-models) repository and are already integrated into our new model training infrastructure * changed all relative imports to absolute imports to prepare for moving the test suite from `/spacy/tests` to `/tests` (it'll now always test against the installed version) * merged old regression tests into collections, e.g. `test_issue1001-1500.py` (about 90% of the regression tests are very short anyways) * tidied up and rewrote existing tests wherever possible ### Todo - [ ] move tests to `/tests` and adjust CI commands accordingly - [x] move model test suite from internal repo to `spacy-models` - [x] ~~investigate why `pipeline/test_textcat.py` is flakey~~ - [x] review old regression tests (leftover files) and see if they can be merged, simplified or deleted - [ ] update documentation on how to run tests ### Types of change enhancement, tests ## Checklist <!--- Before you submit the PR, go over this checklist and make sure you can tick off all the boxes. [] -> [x] --> - [x] I have submitted the spaCy Contributor Agreement. - [x] I ran the tests, and all new and existing tests passed. - [ ] My changes don't require a change to the documentation, or if they do, I've added all required information. 2018-07-25 00:38:44 +03:00			`# coding: utf8`
Add tests for adding parser actions 2017-10-09 04:42:35 +03:00			`from __future__ import unicode_literals`
💫 Refactor test suite (#2568) ## Description Related issues: #2379 (should be fixed by separating model tests) * total execution time down from > 300 seconds to under 60 seconds 🎉 * removed all model-specific tests that could only really be run manually anyway – those will now live in a separate test suite in the [`spacy-models`](https://github.com/explosion/spacy-models) repository and are already integrated into our new model training infrastructure * changed all relative imports to absolute imports to prepare for moving the test suite from `/spacy/tests` to `/tests` (it'll now always test against the installed version) * merged old regression tests into collections, e.g. `test_issue1001-1500.py` (about 90% of the regression tests are very short anyways) * tidied up and rewrote existing tests wherever possible ### Todo - [ ] move tests to `/tests` and adjust CI commands accordingly - [x] move model test suite from internal repo to `spacy-models` - [x] ~~investigate why `pipeline/test_textcat.py` is flakey~~ - [x] review old regression tests (leftover files) and see if they can be merged, simplified or deleted - [ ] update documentation on how to run tests ### Types of change enhancement, tests ## Checklist <!--- Before you submit the PR, go over this checklist and make sure you can tick off all the boxes. [] -> [x] --> - [x] I have submitted the spaCy Contributor Agreement. - [x] I ran the tests, and all new and existing tests passed. - [ ] My changes don't require a change to the documentation, or if they do, I've added all required information. 2018-07-25 00:38:44 +03:00
Add tests for adding parser actions 2017-10-09 04:42:35 +03:00			`import pytest`
			`import numpy.random`
			`from thinc.neural.optimizers import Adam`
			`from thinc.neural.ops import NumpyOps`
💫 Refactor test suite (#2568) ## Description Related issues: #2379 (should be fixed by separating model tests) * total execution time down from > 300 seconds to under 60 seconds 🎉 * removed all model-specific tests that could only really be run manually anyway – those will now live in a separate test suite in the [`spacy-models`](https://github.com/explosion/spacy-models) repository and are already integrated into our new model training infrastructure * changed all relative imports to absolute imports to prepare for moving the test suite from `/spacy/tests` to `/tests` (it'll now always test against the installed version) * merged old regression tests into collections, e.g. `test_issue1001-1500.py` (about 90% of the regression tests are very short anyways) * tidied up and rewrote existing tests wherever possible ### Todo - [ ] move tests to `/tests` and adjust CI commands accordingly - [x] move model test suite from internal repo to `spacy-models` - [x] ~~investigate why `pipeline/test_textcat.py` is flakey~~ - [x] review old regression tests (leftover files) and see if they can be merged, simplified or deleted - [ ] update documentation on how to run tests ### Types of change enhancement, tests ## Checklist <!--- Before you submit the PR, go over this checklist and make sure you can tick off all the boxes. [] -> [x] --> - [x] I have submitted the spaCy Contributor Agreement. - [x] I ran the tests, and all new and existing tests passed. - [ ] My changes don't require a change to the documentation, or if they do, I've added all required information. 2018-07-25 00:38:44 +03:00			`from spacy.attrs import NORM`
			`from spacy.gold import GoldParse`
			`from spacy.vocab import Vocab`
			`from spacy.tokens import Doc`
			`from spacy.pipeline import DependencyParser`
Add tests for adding parser actions 2017-10-09 04:42:35 +03:00

			`@pytest.fixture`
			`def vocab():`
			`return Vocab(lex_attr_getters={NORM: lambda s: s})`


			`@pytest.fixture`
			`def parser(vocab):`
Fix names of pipeline components NeuralDependencyParser --> DependencyParser NeuralEntityRecognizer --> EntityRecognizer TokenVectorEncoder --> Tensorizer NeuralLabeller --> MultitaskObjective 2017-10-26 13:38:23 +03:00			`parser = DependencyParser(vocab)`
Update add label test 2017-10-10 23:57:41 +03:00			`parser.cfg['token_vector_width'] = 8`
			`parser.cfg['hidden_width'] = 30`
Add tests for adding parser actions 2017-10-09 04:42:35 +03:00			`parser.cfg['hist_size'] = 0`
			`parser.add_label('left')`
			`parser.begin_training([], **parser.cfg)`
			`sgd = Adam(NumpyOps(), 0.001)`

Update add label test 2017-10-10 23:57:41 +03:00			`for i in range(10):`
Add tests for adding parser actions 2017-10-09 04:42:35 +03:00			`losses = {}`
			`doc = Doc(vocab, words=['a', 'b', 'c', 'd'])`
			`gold = GoldParse(doc, heads=[1, 1, 3, 3],`
			`deps=['left', 'ROOT', 'left', 'ROOT'])`
			`parser.update([doc], [gold], sgd=sgd, losses=losses)`
			`return parser`

💫 Refactor test suite (#2568) ## Description Related issues: #2379 (should be fixed by separating model tests) * total execution time down from > 300 seconds to under 60 seconds 🎉 * removed all model-specific tests that could only really be run manually anyway – those will now live in a separate test suite in the [`spacy-models`](https://github.com/explosion/spacy-models) repository and are already integrated into our new model training infrastructure * changed all relative imports to absolute imports to prepare for moving the test suite from `/spacy/tests` to `/tests` (it'll now always test against the installed version) * merged old regression tests into collections, e.g. `test_issue1001-1500.py` (about 90% of the regression tests are very short anyways) * tidied up and rewrote existing tests wherever possible ### Todo - [ ] move tests to `/tests` and adjust CI commands accordingly - [x] move model test suite from internal repo to `spacy-models` - [x] ~~investigate why `pipeline/test_textcat.py` is flakey~~ - [x] review old regression tests (leftover files) and see if they can be merged, simplified or deleted - [ ] update documentation on how to run tests ### Types of change enhancement, tests ## Checklist <!--- Before you submit the PR, go over this checklist and make sure you can tick off all the boxes. [] -> [x] --> - [x] I have submitted the spaCy Contributor Agreement. - [x] I ran the tests, and all new and existing tests passed. - [ ] My changes don't require a change to the documentation, or if they do, I've added all required information. 2018-07-25 00:38:44 +03:00
Update add label test 2017-10-10 23:57:41 +03:00			`def test_init_parser(parser):`
			`pass`
Add tests for adding parser actions 2017-10-09 04:42:35 +03:00
💫 Refactor test suite (#2568) ## Description Related issues: #2379 (should be fixed by separating model tests) * total execution time down from > 300 seconds to under 60 seconds 🎉 * removed all model-specific tests that could only really be run manually anyway – those will now live in a separate test suite in the [`spacy-models`](https://github.com/explosion/spacy-models) repository and are already integrated into our new model training infrastructure * changed all relative imports to absolute imports to prepare for moving the test suite from `/spacy/tests` to `/tests` (it'll now always test against the installed version) * merged old regression tests into collections, e.g. `test_issue1001-1500.py` (about 90% of the regression tests are very short anyways) * tidied up and rewrote existing tests wherever possible ### Todo - [ ] move tests to `/tests` and adjust CI commands accordingly - [x] move model test suite from internal repo to `spacy-models` - [x] ~~investigate why `pipeline/test_textcat.py` is flakey~~ - [x] review old regression tests (leftover files) and see if they can be merged, simplified or deleted - [ ] update documentation on how to run tests ### Types of change enhancement, tests ## Checklist <!--- Before you submit the PR, go over this checklist and make sure you can tick off all the boxes. [] -> [x] --> - [x] I have submitted the spaCy Contributor Agreement. - [x] I ran the tests, and all new and existing tests passed. - [ ] My changes don't require a change to the documentation, or if they do, I've added all required information. 2018-07-25 00:38:44 +03:00
Merge 'tidy-up' changes into branch. Resolve conflicts 2017-10-28 14:16:06 +03:00			`# TODO: This is flakey, because it depends on what the parser first learns.`
Skip flakey parser test 2018-08-15 16:37:04 +03:00			`# TODO: This now seems to be implicated in segfaults. Not sure what's up!`
			`@pytest.mark.skip`
Add tests for adding parser actions 2017-10-09 04:42:35 +03:00			`def test_add_label(parser):`
			`doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd'])`
			`doc = parser(doc)`
			`assert doc[0].head.i == 1`
			`assert doc[0].dep_ == 'left'`
			`assert doc[1].head.i == 1`
			`assert doc[2].head.i == 3`
			`assert doc[2].head.i == 3`
			`parser.add_label('right')`
			`doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd'])`
			`doc = parser(doc)`
			`assert doc[0].head.i == 1`
			`assert doc[0].dep_ == 'left'`
			`assert doc[1].head.i == 1`
			`assert doc[2].head.i == 3`
			`assert doc[2].head.i == 3`
			`sgd = Adam(NumpyOps(), 0.001)`
			`for i in range(10):`
			`losses = {}`
			`doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd'])`
			`gold = GoldParse(doc, heads=[1, 1, 3, 3],`
			`deps=['right', 'ROOT', 'left', 'ROOT'])`
			`parser.update([doc], [gold], sgd=sgd, losses=losses)`
			`doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd'])`
			`doc = parser(doc)`
			`assert doc[0].dep_ == 'right'`
			`assert doc[2].dep_ == 'left'`