spaCy/spacy/tests/parser/test_preset_sbd.py
Ines Montani b6e991440c 💫 Tidy up and auto-format tests (#2967)
* Auto-format tests with black

* Add flake8 config

* Tidy up and remove unused imports

* Fix redefinitions of test functions

* Replace orths_and_spaces with words and spaces

* Fix compatibility with pytest 4.0

* xfail test for now

Test was previously overwritten by following test due to naming conflict, so failure wasn't reported

* Unfail passing test

* Only use fixture via arguments

Fixes pytest 4.0 compatibility
2018-11-27 01:09:36 +01:00

76 lines
2.0 KiB
Python

# coding: utf8
from __future__ import unicode_literals
import pytest
from thinc.neural.optimizers import Adam
from thinc.neural.ops import NumpyOps
from spacy.attrs import NORM
from spacy.gold import GoldParse
from spacy.vocab import Vocab
from spacy.tokens import Doc
from spacy.pipeline import DependencyParser
@pytest.fixture
def vocab():
return Vocab(lex_attr_getters={NORM: lambda s: s})
@pytest.fixture
def parser(vocab):
parser = DependencyParser(vocab)
parser.cfg["token_vector_width"] = 4
parser.cfg["hidden_width"] = 32
# parser.add_label('right')
parser.add_label("left")
parser.begin_training([], **parser.cfg)
sgd = Adam(NumpyOps(), 0.001)
for i in range(10):
losses = {}
doc = Doc(vocab, words=["a", "b", "c", "d"])
gold = GoldParse(doc, heads=[1, 1, 3, 3], deps=["left", "ROOT", "left", "ROOT"])
parser.update([doc], [gold], sgd=sgd, losses=losses)
return parser
def test_no_sentences(parser):
doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
doc = parser(doc)
assert len(list(doc.sents)) >= 1
def test_sents_1(parser):
doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
doc[2].sent_start = True
doc = parser(doc)
assert len(list(doc.sents)) >= 2
doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
doc[1].sent_start = False
doc[2].sent_start = True
doc[3].sent_start = False
doc = parser(doc)
assert len(list(doc.sents)) == 2
def test_sents_1_2(parser):
doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
doc[1].sent_start = True
doc[2].sent_start = True
doc = parser(doc)
assert len(list(doc.sents)) >= 3
def test_sents_1_3(parser):
doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
doc[1].sent_start = True
doc[3].sent_start = True
doc = parser(doc)
assert len(list(doc.sents)) >= 3
doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
doc[1].sent_start = True
doc[2].sent_start = False
doc[3].sent_start = True
doc = parser(doc)
assert len(list(doc.sents)) == 3