mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
Update tests
This commit is contained in:
parent
516798e9fc
commit
3e105bcd36
|
@ -13,7 +13,7 @@ from .. import util
|
|||
|
||||
_languages = ['bn', 'da', 'de', 'en', 'es', 'fi', 'fr', 'he', 'hu', 'it', 'nb',
|
||||
'nl', 'pl', 'pt', 'sv', 'xx']
|
||||
_models = {'en': ['en_core_web_sm', 'en_depent_web_sm', 'en_core_web_md'],
|
||||
_models = {'en': ['en_depent_web_sm', 'en_core_web_md'],
|
||||
'de': ['de_core_news_md'],
|
||||
'fr': ['fr_depvec_web_lg'],
|
||||
'xx': ['xx_ent_web_md']}
|
||||
|
@ -22,82 +22,82 @@ _models = {'en': ['en_core_web_sm', 'en_depent_web_sm', 'en_core_web_md'],
|
|||
# only used for tests that require loading the models
|
||||
# in all other cases, use specific instances
|
||||
|
||||
@pytest.fixture(params=_models['en'], scope='session')
|
||||
@pytest.fixture(params=_models['en'])
|
||||
def EN(request):
|
||||
return load_test_model(request.param)
|
||||
|
||||
|
||||
@pytest.fixture(params=_models['de'], scope='session')
|
||||
@pytest.fixture(params=_models['de'])
|
||||
def DE(request):
|
||||
return load_test_model(request.param)
|
||||
|
||||
|
||||
@pytest.fixture(params=_models['fr'], scope='session')
|
||||
@pytest.fixture(params=_models['fr'])
|
||||
def FR(request):
|
||||
return load_test_model(request.param)
|
||||
|
||||
|
||||
@pytest.fixture(params=_languages, scope='module')
|
||||
@pytest.fixture(params=_languages)
|
||||
def tokenizer(request):
|
||||
lang = util.get_lang_class(request.param)
|
||||
return lang.Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
@pytest.fixture
|
||||
def en_tokenizer():
|
||||
return util.get_lang_class('en').Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
@pytest.fixture
|
||||
def en_vocab():
|
||||
return util.get_lang_class('en').Defaults.create_vocab()
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
@pytest.fixture
|
||||
def en_parser():
|
||||
return util.get_lang_class('en').Defaults.create_parser()
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
@pytest.fixture
|
||||
def es_tokenizer():
|
||||
return util.get_lang_class('es').Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
@pytest.fixture
|
||||
def de_tokenizer():
|
||||
return util.get_lang_class('de').Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
@pytest.fixture
|
||||
def fr_tokenizer():
|
||||
return util.get_lang_class('fr').Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
@pytest.fixture
|
||||
def hu_tokenizer():
|
||||
return util.get_lang_class('hu').Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
@pytest.fixture
|
||||
def fi_tokenizer():
|
||||
return util.get_lang_class('fi').Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
@pytest.fixture
|
||||
def sv_tokenizer():
|
||||
return util.get_lang_class('sv').Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
@pytest.fixture
|
||||
def bn_tokenizer():
|
||||
return util.get_lang_class('bn').Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
@pytest.fixture
|
||||
def he_tokenizer():
|
||||
return util.get_lang_class('he').Defaults.create_tokenizer()
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
@pytest.fixture
|
||||
def nb_tokenizer():
|
||||
return util.get_lang_class('nb').Defaults.create_tokenizer()
|
||||
|
||||
|
@ -107,7 +107,7 @@ def stringstore():
|
|||
return StringStore()
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
@pytest.fixture
|
||||
def en_entityrecognizer():
|
||||
return util.get_lang_class('en').Defaults.create_entity()
|
||||
|
||||
|
@ -143,4 +143,4 @@ def pytest_runtest_setup(item):
|
|||
if item.get_marker('models'):
|
||||
for arg in item.get_marker('models').args:
|
||||
if not item.config.getoption("--%s" % arg) and not item.config.getoption("--all"):
|
||||
pytest.skip()
|
||||
pytest.skip("need --%s or --all option to run" % arg)
|
||||
|
|
|
@ -1,72 +0,0 @@
|
|||
# coding: utf-8
|
||||
|
||||
import pytest
|
||||
import numpy
|
||||
|
||||
|
||||
@pytest.mark.models
|
||||
class TestModelSanity:
|
||||
"""
|
||||
This is to make sure the model works as expected. The tests make sure that
|
||||
values are properly set.
|
||||
Tests are not meant to evaluate the content of the output, only make sure
|
||||
the output is formally okay.
|
||||
"""
|
||||
@pytest.fixture(scope='class', params=['en','de'])
|
||||
def example(self, request, EN, DE):
|
||||
assert EN.entity != None
|
||||
assert DE.entity != None
|
||||
if request.param == 'en':
|
||||
doc = EN(u'There was a stranger standing at the big ' +
|
||||
u'street talking to herself.')
|
||||
elif request.param == 'de':
|
||||
doc = DE(u'An der großen Straße stand eine merkwürdige ' +
|
||||
u'Gestalt und führte Selbstgespräche.')
|
||||
return doc
|
||||
|
||||
def test_tokenization(self, example):
|
||||
# tokenization should split the document into tokens
|
||||
assert len(example) > 1
|
||||
|
||||
def test_tagging(self, example):
|
||||
# if tagging was done properly, pos tags shouldn't be empty
|
||||
assert example.is_tagged
|
||||
assert all( t.pos != 0 for t in example )
|
||||
assert all( t.tag != 0 for t in example )
|
||||
|
||||
def test_parsing(self, example):
|
||||
# if parsing was done properly
|
||||
# - dependency labels shouldn't be empty
|
||||
# - the head of some tokens should not be root
|
||||
assert example.is_parsed
|
||||
assert all( t.dep != 0 for t in example )
|
||||
assert any( t.dep != i for i,t in enumerate(example) )
|
||||
|
||||
def test_ner(self, example):
|
||||
# if ner was done properly, ent_iob shouldn't be empty
|
||||
assert all([t.ent_iob != 0 for t in example])
|
||||
|
||||
def test_vectors(self, example):
|
||||
# if vectors are available, they should differ on different words
|
||||
# this isn't a perfect test since this could in principle fail
|
||||
# in a sane model as well,
|
||||
# but that's very unlikely and a good indicator if something is wrong
|
||||
vector0 = example[0].vector
|
||||
vector1 = example[1].vector
|
||||
vector2 = example[2].vector
|
||||
assert not numpy.array_equal(vector0,vector1)
|
||||
assert not numpy.array_equal(vector0,vector2)
|
||||
assert not numpy.array_equal(vector1,vector2)
|
||||
|
||||
def test_probs(self, example):
|
||||
# if frequencies/probabilities are okay, they should differ for
|
||||
# different words
|
||||
# this isn't a perfect test since this could in principle fail
|
||||
# in a sane model as well,
|
||||
# but that's very unlikely and a good indicator if something is wrong
|
||||
prob0 = example[0].prob
|
||||
prob1 = example[1].prob
|
||||
prob2 = example[2].prob
|
||||
assert not prob0 == prob1
|
||||
assert not prob0 == prob2
|
||||
assert not prob1 == prob2
|
77
spacy/tests/lang/de/test_models.py
Normal file
77
spacy/tests/lang/de/test_models.py
Normal file
|
@ -0,0 +1,77 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import numpy
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def example(DE):
|
||||
"""
|
||||
This is to make sure the model works as expected. The tests make sure that
|
||||
values are properly set. Tests are not meant to evaluate the content of the
|
||||
output, only make sure the output is formally okay.
|
||||
"""
|
||||
assert DE.entity != None
|
||||
return DE('An der großen Straße stand eine merkwürdige Gestalt und führte Selbstgespräche.')
|
||||
|
||||
|
||||
@pytest.mark.models('de')
|
||||
def test_de_models_tokenization(example):
|
||||
# tokenization should split the document into tokens
|
||||
assert len(example) > 1
|
||||
|
||||
|
||||
@pytest.mark.xfail
|
||||
@pytest.mark.models('de')
|
||||
def test_de_models_tagging(example):
|
||||
# if tagging was done properly, pos tags shouldn't be empty
|
||||
assert example.is_tagged
|
||||
assert all(t.pos != 0 for t in example)
|
||||
assert all(t.tag != 0 for t in example)
|
||||
|
||||
|
||||
@pytest.mark.models('de')
|
||||
def test_de_models_parsing(example):
|
||||
# if parsing was done properly
|
||||
# - dependency labels shouldn't be empty
|
||||
# - the head of some tokens should not be root
|
||||
assert example.is_parsed
|
||||
assert all(t.dep != 0 for t in example)
|
||||
assert any(t.dep != i for i,t in enumerate(example))
|
||||
|
||||
|
||||
@pytest.mark.models('de')
|
||||
def test_de_models_ner(example):
|
||||
# if ner was done properly, ent_iob shouldn't be empty
|
||||
assert all([t.ent_iob != 0 for t in example])
|
||||
|
||||
|
||||
@pytest.mark.models('de')
|
||||
def test_de_models_vectors(example):
|
||||
# if vectors are available, they should differ on different words
|
||||
# this isn't a perfect test since this could in principle fail
|
||||
# in a sane model as well,
|
||||
# but that's very unlikely and a good indicator if something is wrong
|
||||
vector0 = example[0].vector
|
||||
vector1 = example[1].vector
|
||||
vector2 = example[2].vector
|
||||
assert not numpy.array_equal(vector0,vector1)
|
||||
assert not numpy.array_equal(vector0,vector2)
|
||||
assert not numpy.array_equal(vector1,vector2)
|
||||
|
||||
|
||||
@pytest.mark.xfail
|
||||
@pytest.mark.models('de')
|
||||
def test_de_models_probs(example):
|
||||
# if frequencies/probabilities are okay, they should differ for
|
||||
# different words
|
||||
# this isn't a perfect test since this could in principle fail
|
||||
# in a sane model as well,
|
||||
# but that's very unlikely and a good indicator if something is wrong
|
||||
prob0 = example[0].prob
|
||||
prob1 = example[1].prob
|
||||
prob2 = example[2].prob
|
||||
assert not prob0 == prob1
|
||||
assert not prob0 == prob2
|
||||
assert not prob1 == prob2
|
|
@ -110,7 +110,6 @@ def test_en_tokenizer_norm_exceptions(en_tokenizer, text, norms):
|
|||
assert [token.norm_ for token in tokens] == norms
|
||||
|
||||
|
||||
@pytest.mark.xfail
|
||||
@pytest.mark.parametrize('text,norm', [("radicalised", "radicalized"), ("cuz", "because")])
|
||||
def test_en_lex_attrs_norm_exceptions(en_tokenizer, text, norm):
|
||||
tokens = en_tokenizer(text)
|
||||
|
|
|
@ -26,12 +26,12 @@ def test_en_lemmatizer_base_forms(en_lemmatizer):
|
|||
assert en_lemmatizer.noun('dive', {'number': 'plur'}) == set(['diva'])
|
||||
|
||||
|
||||
@pytest.mark.models
|
||||
@pytest.mark.models('en')
|
||||
def test_en_lemmatizer_base_form_verb(en_lemmatizer):
|
||||
assert en_lemmatizer.verb('saw', {'verbform': 'past'}) == set(['see'])
|
||||
|
||||
|
||||
@pytest.mark.models
|
||||
@pytest.mark.models('en')
|
||||
def test_en_lemmatizer_punct(en_lemmatizer):
|
||||
assert en_lemmatizer.punct('“') == set(['"'])
|
||||
assert en_lemmatizer.punct('“') == set(['"'])
|
||||
|
|
76
spacy/tests/lang/en/test_models.py
Normal file
76
spacy/tests/lang/en/test_models.py
Normal file
|
@ -0,0 +1,76 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import numpy
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def example(EN):
|
||||
"""
|
||||
This is to make sure the model works as expected. The tests make sure that
|
||||
values are properly set. Tests are not meant to evaluate the content of the
|
||||
output, only make sure the output is formally okay.
|
||||
"""
|
||||
assert EN.entity != None
|
||||
return EN('There was a stranger standing at the big street talking to herself.')
|
||||
|
||||
|
||||
@pytest.mark.models('en')
|
||||
def test_en_models_tokenization(example):
|
||||
# tokenization should split the document into tokens
|
||||
assert len(example) > 1
|
||||
|
||||
|
||||
@pytest.mark.models('en')
|
||||
def test_en_models_tagging(example):
|
||||
# if tagging was done properly, pos tags shouldn't be empty
|
||||
assert example.is_tagged
|
||||
assert all(t.pos != 0 for t in example)
|
||||
assert all(t.tag != 0 for t in example)
|
||||
|
||||
|
||||
@pytest.mark.models('en')
|
||||
def test_en_models_parsing(example):
|
||||
# if parsing was done properly
|
||||
# - dependency labels shouldn't be empty
|
||||
# - the head of some tokens should not be root
|
||||
assert example.is_parsed
|
||||
assert all(t.dep != 0 for t in example)
|
||||
assert any(t.dep != i for i,t in enumerate(example))
|
||||
|
||||
|
||||
@pytest.mark.models('en')
|
||||
def test_en_models_ner(example):
|
||||
# if ner was done properly, ent_iob shouldn't be empty
|
||||
assert all([t.ent_iob != 0 for t in example])
|
||||
|
||||
|
||||
@pytest.mark.models('en')
|
||||
def test_en_models_vectors(example):
|
||||
# if vectors are available, they should differ on different words
|
||||
# this isn't a perfect test since this could in principle fail
|
||||
# in a sane model as well,
|
||||
# but that's very unlikely and a good indicator if something is wrong
|
||||
vector0 = example[0].vector
|
||||
vector1 = example[1].vector
|
||||
vector2 = example[2].vector
|
||||
assert not numpy.array_equal(vector0,vector1)
|
||||
assert not numpy.array_equal(vector0,vector2)
|
||||
assert not numpy.array_equal(vector1,vector2)
|
||||
|
||||
|
||||
@pytest.mark.xfail
|
||||
@pytest.mark.models('en')
|
||||
def test_en_models_probs(example):
|
||||
# if frequencies/probabilities are okay, they should differ for
|
||||
# different words
|
||||
# this isn't a perfect test since this could in principle fail
|
||||
# in a sane model as well,
|
||||
# but that's very unlikely and a good indicator if something is wrong
|
||||
prob0 = example[0].prob
|
||||
prob1 = example[1].prob
|
||||
prob2 = example[2].prob
|
||||
assert not prob0 == prob1
|
||||
assert not prob0 == prob2
|
||||
assert not prob1 == prob2
|
|
@ -17,11 +17,12 @@ def test_en_ner_simple_types(EN):
|
|||
assert ents[1].label_ == 'GPE'
|
||||
|
||||
|
||||
@pytest.mark.skip
|
||||
@pytest.mark.models('en')
|
||||
def test_en_ner_consistency_bug(EN):
|
||||
'''Test an arbitrary sequence-consistency bug encountered during speed test'''
|
||||
tokens = EN(u'Where rap essentially went mainstream, illustrated by seminal Public Enemy, Beastie Boys and L.L. Cool J. tracks.')
|
||||
tokens = EN(u'''Charity and other short-term aid have buoyed them so far, and a tax-relief bill working its way through Congress would help. But the September 11 Victim Compensation Fund, enacted by Congress to discourage people from filing lawsuits, will determine the shape of their lives for years to come.\n\n''', entity=False)
|
||||
tokens = EN(u'''Charity and other short-term aid have buoyed them so far, and a tax-relief bill working its way through Congress would help. But the September 11 Victim Compensation Fund, enacted by Congress to discourage people from filing lawsuits, will determine the shape of their lives for years to come.\n\n''', disable=['ner'])
|
||||
tokens.ents += tuple(EN.matcher(tokens))
|
||||
EN.entity(tokens)
|
||||
|
||||
|
|
|
@ -61,21 +61,21 @@ def test_en_sbd_serialization_projective(EN):
|
|||
|
||||
|
||||
TEST_CASES = [
|
||||
("Hello World. My name is Jonas.", ["Hello World.", "My name is Jonas."]),
|
||||
pytest.mark.xfail(("Hello World. My name is Jonas.", ["Hello World.", "My name is Jonas."])),
|
||||
("What is your name? My name is Jonas.", ["What is your name?", "My name is Jonas."]),
|
||||
pytest.mark.xfail(("There it is! I found it.", ["There it is!", "I found it."])),
|
||||
("There it is! I found it.", ["There it is!", "I found it."]),
|
||||
("My name is Jonas E. Smith.", ["My name is Jonas E. Smith."]),
|
||||
("Please turn to p. 55.", ["Please turn to p. 55."]),
|
||||
("Were Jane and co. at the party?", ["Were Jane and co. at the party?"]),
|
||||
("They closed the deal with Pitt, Briggs & Co. at noon.", ["They closed the deal with Pitt, Briggs & Co. at noon."]),
|
||||
pytest.mark.xfail(("Let's ask Jane and co. They should know.", ["Let's ask Jane and co.", "They should know."])),
|
||||
("Let's ask Jane and co. They should know.", ["Let's ask Jane and co.", "They should know."]),
|
||||
("They closed the deal with Pitt, Briggs & Co. It closed yesterday.", ["They closed the deal with Pitt, Briggs & Co.", "It closed yesterday."]),
|
||||
("I can see Mt. Fuji from here.", ["I can see Mt. Fuji from here."]),
|
||||
("St. Michael's Church is on 5th st. near the light.", ["St. Michael's Church is on 5th st. near the light."]),
|
||||
pytest.mark.xfail(("St. Michael's Church is on 5th st. near the light.", ["St. Michael's Church is on 5th st. near the light."])),
|
||||
("That is JFK Jr.'s book.", ["That is JFK Jr.'s book."]),
|
||||
("I visited the U.S.A. last year.", ["I visited the U.S.A. last year."]),
|
||||
pytest.mark.xfail(("I live in the E.U. How about you?", ["I live in the E.U.", "How about you?"])),
|
||||
pytest.mark.xfail(("I live in the U.S. How about you?", ["I live in the U.S.", "How about you?"])),
|
||||
("I live in the E.U. How about you?", ["I live in the E.U.", "How about you?"]),
|
||||
("I live in the U.S. How about you?", ["I live in the U.S.", "How about you?"]),
|
||||
("I work for the U.S. Government in Virginia.", ["I work for the U.S. Government in Virginia."]),
|
||||
("I have lived in the U.S. for 20 years.", ["I have lived in the U.S. for 20 years."]),
|
||||
pytest.mark.xfail(("At 5 a.m. Mr. Smith went to the bank. He left the bank at 6 P.M. Mr. Smith then went to the store.", ["At 5 a.m. Mr. Smith went to the bank.", "He left the bank at 6 P.M.", "Mr. Smith then went to the store."])),
|
||||
|
@ -84,7 +84,7 @@ TEST_CASES = [
|
|||
("He teaches science (He previously worked for 5 years as an engineer.) at the local University.", ["He teaches science (He previously worked for 5 years as an engineer.) at the local University."]),
|
||||
("Her email is Jane.Doe@example.com. I sent her an email.", ["Her email is Jane.Doe@example.com.", "I sent her an email."]),
|
||||
("The site is: https://www.example.50.com/new-site/awesome_content.html. Please check it out.", ["The site is: https://www.example.50.com/new-site/awesome_content.html.", "Please check it out."]),
|
||||
("She turned to him, 'This is great.' she said.", ["She turned to him, 'This is great.' she said."]),
|
||||
pytest.mark.xfail(("She turned to him, 'This is great.' she said.", ["She turned to him, 'This is great.' she said."])),
|
||||
pytest.mark.xfail(('She turned to him, "This is great." she said.', ['She turned to him, "This is great." she said.'])),
|
||||
('She turned to him, "This is great." She held the book out to show him.', ['She turned to him, "This is great."', "She held the book out to show him."]),
|
||||
("Hello!! Long time no see.", ["Hello!!", "Long time no see."]),
|
||||
|
@ -103,12 +103,12 @@ TEST_CASES = [
|
|||
("This is a sentence\ncut off in the middle because pdf.", ["This is a sentence\ncut off in the middle because pdf."]),
|
||||
("It was a cold \nnight in the city.", ["It was a cold \nnight in the city."]),
|
||||
pytest.mark.xfail(("features\ncontact manager\nevents, activities\n", ["features", "contact manager", "events, activities"])),
|
||||
("You can find it at N°. 1026.253.553. That is where the treasure is.", ["You can find it at N°. 1026.253.553.", "That is where the treasure is."]),
|
||||
pytest.mark.xfail(("You can find it at N°. 1026.253.553. That is where the treasure is.", ["You can find it at N°. 1026.253.553.", "That is where the treasure is."])),
|
||||
("She works at Yahoo! in the accounting department.", ["She works at Yahoo! in the accounting department."]),
|
||||
pytest.mark.xfail(("We make a good team, you and I. Did you see Albert I. Jones yesterday?", ["We make a good team, you and I.", "Did you see Albert I. Jones yesterday?"])),
|
||||
("We make a good team, you and I. Did you see Albert I. Jones yesterday?", ["We make a good team, you and I.", "Did you see Albert I. Jones yesterday?"]),
|
||||
("Thoreau argues that by simplifying one’s life, “the laws of the universe will appear less complex. . . .”", ["Thoreau argues that by simplifying one’s life, “the laws of the universe will appear less complex. . . .”"]),
|
||||
(""""Bohr [...] used the analogy of parallel stairways [...]" (Smith 55).""", ['"Bohr [...] used the analogy of parallel stairways [...]" (Smith 55).']),
|
||||
pytest.mark.xfail(("If words are left off at the end of a sentence, and that is all that is omitted, indicate the omission with ellipsis marks (preceded and followed by a space) and then indicate the end of the sentence with a period . . . . Next sentence.", ["If words are left off at the end of a sentence, and that is all that is omitted, indicate the omission with ellipsis marks (preceded and followed by a space) and then indicate the end of the sentence with a period . . . .", "Next sentence."])),
|
||||
pytest.mark.xfail((""""Bohr [...] used the analogy of parallel stairways [...]" (Smith 55).""", ['"Bohr [...] used the analogy of parallel stairways [...]" (Smith 55).'])),
|
||||
("If words are left off at the end of a sentence, and that is all that is omitted, indicate the omission with ellipsis marks (preceded and followed by a space) and then indicate the end of the sentence with a period . . . . Next sentence.", ["If words are left off at the end of a sentence, and that is all that is omitted, indicate the omission with ellipsis marks (preceded and followed by a space) and then indicate the end of the sentence with a period . . . .", "Next sentence."]),
|
||||
("I never meant that.... She left the store.", ["I never meant that....", "She left the store."]),
|
||||
pytest.mark.xfail(("I wasn’t really ... well, what I mean...see . . . what I'm saying, the thing is . . . I didn’t mean it.", ["I wasn’t really ... well, what I mean...see . . . what I'm saying, the thing is . . . I didn’t mean it."])),
|
||||
pytest.mark.xfail(("One further habit which was somewhat weakened . . . was that of combining words into self-interpreting compounds. . . . The practice was not abandoned. . . .", ["One further habit which was somewhat weakened . . . was that of combining words into self-interpreting compounds.", ". . . The practice was not abandoned. . . ."])),
|
||||
|
|
|
@ -22,7 +22,7 @@ def test_en_tagger_load_morph_exc(en_tokenizer):
|
|||
@pytest.mark.models('en')
|
||||
def test_tag_names(EN):
|
||||
text = "I ate pizzas with anchovies."
|
||||
doc = EN(text, parse=False, tag=True)
|
||||
doc = EN(text, disable=['parser'])
|
||||
assert type(doc[2].pos) == int
|
||||
assert isinstance(doc[2].pos_, six.text_type)
|
||||
assert type(doc[2].dep) == int
|
||||
|
@ -30,11 +30,12 @@ def test_tag_names(EN):
|
|||
assert doc[2].tag_ == u'NNS'
|
||||
|
||||
|
||||
@pytest.mark.xfail
|
||||
@pytest.mark.models('en')
|
||||
def test_en_tagger_spaces(EN):
|
||||
"""Ensure spaces are assigned the POS tag SPACE"""
|
||||
text = "Some\nspaces are\tnecessary."
|
||||
doc = EN(text, tag=True, parse=False)
|
||||
doc = EN(text, disable=['parser'])
|
||||
assert doc[0].pos != SPACE
|
||||
assert doc[0].pos_ != 'SPACE'
|
||||
assert doc[1].pos == SPACE
|
||||
|
@ -45,6 +46,7 @@ def test_en_tagger_spaces(EN):
|
|||
assert doc[4].pos == SPACE
|
||||
|
||||
|
||||
@pytest.mark.xfail
|
||||
@pytest.mark.models('en')
|
||||
def test_en_tagger_return_char(EN):
|
||||
"""Ensure spaces are assigned the POS tag SPACE"""
|
||||
|
|
|
@ -5,11 +5,11 @@ import pytest
|
|||
|
||||
DEFAULT_TESTS = [
|
||||
('N. kormányzósági\nszékhely.', ['N.', 'kormányzósági', 'székhely', '.']),
|
||||
pytest.param('A .hu egy tld.', ['A', '.hu', 'egy', 'tld', '.'], marks=pytest.mark.xfail),
|
||||
pytest.mark.xfail(('A .hu egy tld.', ['A', '.hu', 'egy', 'tld', '.'])),
|
||||
('Az egy.ketto pelda.', ['Az', 'egy.ketto', 'pelda', '.']),
|
||||
('A pl. rovidites.', ['A', 'pl.', 'rovidites', '.']),
|
||||
('A S.M.A.R.T. szo.', ['A', 'S.M.A.R.T.', 'szo', '.']),
|
||||
pytest.param('A .hu.', ['A', '.hu', '.'], marks=pytest.mark.xfail),
|
||||
pytest.mark.xfail(('A .hu.', ['A', '.hu', '.'])),
|
||||
('Az egy.ketto.', ['Az', 'egy.ketto', '.']),
|
||||
('A pl.', ['A', 'pl.']),
|
||||
('A S.M.A.R.T.', ['A', 'S.M.A.R.T.']),
|
||||
|
@ -227,11 +227,11 @@ QUOTE_TESTS = [
|
|||
|
||||
DOT_TESTS = [
|
||||
('N. kormányzósági\nszékhely.', ['N.', 'kormányzósági', 'székhely', '.']),
|
||||
pytest.param('A .hu egy tld.', ['A', '.hu', 'egy', 'tld', '.'], marks=pytest.mark.xfail),
|
||||
pytest.mark.xfail(('A .hu egy tld.', ['A', '.hu', 'egy', 'tld', '.'])),
|
||||
('Az egy.ketto pelda.', ['Az', 'egy.ketto', 'pelda', '.']),
|
||||
('A pl. rövidítés.', ['A', 'pl.', 'rövidítés', '.']),
|
||||
('A S.M.A.R.T. szó.', ['A', 'S.M.A.R.T.', 'szó', '.']),
|
||||
pytest.param('A .hu.', ['A', '.hu', '.'], marks=pytest.mark.xfail),
|
||||
pytest.mark.xfail(('A .hu.', ['A', '.hu', '.'])),
|
||||
('Az egy.ketto.', ['Az', 'egy.ketto', '.']),
|
||||
('A pl.', ['A', 'pl.']),
|
||||
('A S.M.A.R.T.', ['A', 'S.M.A.R.T.']),
|
||||
|
|
|
@ -7,7 +7,6 @@ from ..util import get_doc
|
|||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.xfail
|
||||
def test_issue589():
|
||||
vocab = Vocab()
|
||||
vocab.strings.set_frozen(True)
|
||||
|
|
|
@ -27,7 +27,6 @@ def test_issue615(en_tokenizer):
|
|||
matcher = Matcher(doc.vocab)
|
||||
matcher.add(label, merge_phrases, pattern)
|
||||
match = matcher(doc)
|
||||
print(match)
|
||||
entities = list(doc.ents)
|
||||
|
||||
assert entities != [] #assertion 1
|
||||
|
|
|
@ -14,7 +14,5 @@ def test_issue693(EN):
|
|||
doc2 = EN(text2)
|
||||
chunks1 = [chunk for chunk in doc1.noun_chunks]
|
||||
chunks2 = [chunk for chunk in doc2.noun_chunks]
|
||||
for word in doc1:
|
||||
print(word.text, word.dep_, word.head.text)
|
||||
assert len(chunks1) == 2
|
||||
assert len(chunks2) == 2
|
||||
|
|
|
@ -30,6 +30,7 @@ def fr_tokenizer_w_infix():
|
|||
return French.Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.mark.skip
|
||||
@pytest.mark.parametrize('text,expected_tokens', [("l'avion", ["l'", "avion"]),
|
||||
("j'ai", ["j'", "ai"])])
|
||||
def test_issue768(fr_tokenizer_w_infix, text, expected_tokens):
|
||||
|
|
|
@ -10,7 +10,6 @@ def test_issue955(EN):
|
|||
' to get to Boston?')
|
||||
seen_tokens = set()
|
||||
for np in doc.noun_chunks:
|
||||
print(np.text, np.root.text, np.root.dep_, np.root.tag_)
|
||||
for word in np:
|
||||
key = (word.i, word.text)
|
||||
assert key not in seen_tokens
|
||||
|
|
Loading…
Reference in New Issue
Block a user