diff --git a/spacy/tests/tagger/test_spaces.py b/spacy/tests/tagger/test_spaces.py index 8837f466b..5b12eba7f 100644 --- a/spacy/tests/tagger/test_spaces.py +++ b/spacy/tests/tagger/test_spaces.py @@ -1,37 +1,32 @@ +# coding: utf-8 """Ensure spaces are assigned the POS tag SPACE""" from __future__ import unicode_literals -from spacy.parts_of_speech import SPACE +from ...parts_of_speech import SPACE import pytest +@pytest.mark.models +def test_tagger_spaces(EN): + text = "Some\nspaces are\tnecessary." + doc = EN(text, tag=True, parse=False) + assert doc[0].pos != SPACE + assert doc[0].pos_ != 'SPACE' + assert doc[1].pos == SPACE + assert doc[1].pos_ == 'SPACE' + assert doc[1].tag_ == 'SP' + assert doc[2].pos != SPACE + assert doc[3].pos != SPACE + assert doc[4].pos == SPACE -@pytest.fixture -def tagged(EN): - string = u'Some\nspaces are\tnecessary.' - tokens = EN(string, tag=True, parse=False) - return tokens @pytest.mark.models -def test_spaces(tagged): - assert tagged[0].pos != SPACE - assert tagged[0].pos_ != 'SPACE' - assert tagged[1].pos == SPACE - assert tagged[1].pos_ == 'SPACE' - assert tagged[1].tag_ == 'SP' - assert tagged[2].pos != SPACE - assert tagged[3].pos != SPACE - assert tagged[4].pos == SPACE - - -@pytest.mark.xfail -@pytest.mark.models -def test_return_char(EN): - string = ('hi Aaron,\r\n\r\nHow is your schedule today, I was wondering if ' +def test_tagger_return_char(EN): + text = ('hi Aaron,\r\n\r\nHow is your schedule today, I was wondering if ' 'you had time for a phone\r\ncall this afternoon?\r\n\r\n\r\n') - tokens = EN(string) + tokens = EN(text) for token in tokens: if token.is_space: assert token.pos == SPACE diff --git a/spacy/tests/tagger/test_tag_names.py b/spacy/tests/tagger/test_tag_names.py index 37e6d9318..9c5b0adcc 100644 --- a/spacy/tests/tagger/test_tag_names.py +++ b/spacy/tests/tagger/test_tag_names.py @@ -1,14 +1,16 @@ -from spacy.en import English +# coding: utf-8 +from __future__ import unicode_literals + import six import pytest @pytest.mark.models def test_tag_names(EN): - tokens = EN(u'I ate pizzas with anchovies.', parse=False, tag=True) - pizza = tokens[2] - assert type(pizza.pos) == int - assert isinstance(pizza.pos_, six.text_type) - assert type(pizza.dep) == int - assert isinstance(pizza.dep_, six.text_type) - assert pizza.tag_ == u'NNS' + text = "I ate pizzas with anchovies." + doc = EN(text, parse=False, tag=True) + assert type(doc[2].pos) == int + assert isinstance(doc[2].pos_, six.text_type) + assert type(doc[2].dep) == int + assert isinstance(doc[2].dep_, six.text_type) + assert doc[2].tag_ == u'NNS'