Modernise tagger tests and fix xpassing test

This commit is contained in:
Ines Montani 2017-01-12 23:40:52 +01:00
parent 33e5f8dc2e
commit 33d9cf87f9
2 changed files with 27 additions and 30 deletions

View File

@ -1,37 +1,32 @@
# coding: utf-8
"""Ensure spaces are assigned the POS tag SPACE"""
from __future__ import unicode_literals
from spacy.parts_of_speech import SPACE
from ...parts_of_speech import SPACE
import pytest
@pytest.mark.models
def test_tagger_spaces(EN):
text = "Some\nspaces are\tnecessary."
doc = EN(text, tag=True, parse=False)
assert doc[0].pos != SPACE
assert doc[0].pos_ != 'SPACE'
assert doc[1].pos == SPACE
assert doc[1].pos_ == 'SPACE'
assert doc[1].tag_ == 'SP'
assert doc[2].pos != SPACE
assert doc[3].pos != SPACE
assert doc[4].pos == SPACE
@pytest.fixture
def tagged(EN):
string = u'Some\nspaces are\tnecessary.'
tokens = EN(string, tag=True, parse=False)
return tokens
@pytest.mark.models
def test_spaces(tagged):
assert tagged[0].pos != SPACE
assert tagged[0].pos_ != 'SPACE'
assert tagged[1].pos == SPACE
assert tagged[1].pos_ == 'SPACE'
assert tagged[1].tag_ == 'SP'
assert tagged[2].pos != SPACE
assert tagged[3].pos != SPACE
assert tagged[4].pos == SPACE
@pytest.mark.xfail
@pytest.mark.models
def test_return_char(EN):
string = ('hi Aaron,\r\n\r\nHow is your schedule today, I was wondering if '
def test_tagger_return_char(EN):
text = ('hi Aaron,\r\n\r\nHow is your schedule today, I was wondering if '
'you had time for a phone\r\ncall this afternoon?\r\n\r\n\r\n')
tokens = EN(string)
tokens = EN(text)
for token in tokens:
if token.is_space:
assert token.pos == SPACE

View File

@ -1,14 +1,16 @@
from spacy.en import English
# coding: utf-8
from __future__ import unicode_literals
import six
import pytest
@pytest.mark.models
def test_tag_names(EN):
tokens = EN(u'I ate pizzas with anchovies.', parse=False, tag=True)
pizza = tokens[2]
assert type(pizza.pos) == int
assert isinstance(pizza.pos_, six.text_type)
assert type(pizza.dep) == int
assert isinstance(pizza.dep_, six.text_type)
assert pizza.tag_ == u'NNS'
text = "I ate pizzas with anchovies."
doc = EN(text, parse=False, tag=True)
assert type(doc[2].pos) == int
assert isinstance(doc[2].pos_, six.text_type)
assert type(doc[2].dep) == int
assert isinstance(doc[2].dep_, six.text_type)
assert doc[2].tag_ == u'NNS'