diff --git a/spacy/tests/regression/test_issue2001-2500.py b/spacy/tests/regression/test_issue2001-2500.py index d9febb152..2c81651fd 100644 --- a/spacy/tests/regression/test_issue2001-2500.py +++ b/spacy/tests/regression/test_issue2001-2500.py @@ -5,10 +5,26 @@ import pytest from spacy.tokens import Doc from spacy.displacy import render from spacy.gold import iob_to_biluo +from spacy.lang.it import Italian from ..util import add_vecs_to_vocab +@pytest.mark.xfail +def test_issue2179(): + """Test that spurious 'extra_labels' aren't created when initializing NER.""" + nlp = Italian() + ner = nlp.create_pipe('ner') + ner.add_label('CITIZENSHIP') + nlp.add_pipe(ner) + nlp.begin_training() + nlp2 = Italian() + nlp2.add_pipe(nlp2.create_pipe('ner')) + nlp2.from_bytes(nlp.to_bytes()) + assert 'extra_labels' not in nlp2.get_pipe('ner').cfg + assert nlp2.get_pipe('ner').labels == ['CITIZENSHIP'] + + def test_issue2219(en_vocab): vectors = [("a", [1, 2, 3]), ("letter", [4, 5, 6])] add_vecs_to_vocab(en_vocab, vectors) diff --git a/spacy/tests/regression/test_issue2179.py b/spacy/tests/regression/test_issue2179.py deleted file mode 100644 index d4f194c0e..000000000 --- a/spacy/tests/regression/test_issue2179.py +++ /dev/null @@ -1,16 +0,0 @@ -'''Test that spurious 'extra_labels' aren't created when initializing NER.''' -import pytest -from ... import blank - -@pytest.mark.xfail -def test_issue2179(): - nlp = blank('it') - ner = nlp.create_pipe('ner') - ner.add_label('CITIZENSHIP') - nlp.add_pipe(ner) - nlp.begin_training() - nlp2 = blank('it') - nlp2.add_pipe(nlp2.create_pipe('ner')) - nlp2.from_bytes(nlp.to_bytes()) - assert 'extra_labels' not in nlp2.get_pipe('ner').cfg - assert nlp2.get_pipe('ner').labels == ['CITIZENSHIP'] diff --git a/spacy/tests/regression/test_issue2626.py b/spacy/tests/regression/test_issue2626.py index 9580c3c4b..afe8bc055 100644 --- a/spacy/tests/regression/test_issue2626.py +++ b/spacy/tests/regression/test_issue2626.py @@ -1,11 +1,10 @@ +# coding: utf8 from __future__ import unicode_literals -import spacy -def test_issue2626(): - '''Check that this sentence doesn't cause an infinite loop in the tokenizer.''' - nlp = spacy.blank('en') + +def test_issue2626(en_tokenizer): + """Check that sentence doesn't cause an infinite loop in the tokenizer.""" text = """ ABLEItemColumn IAcceptance Limits of ErrorIn-Service Limits of ErrorColumn IIColumn IIIColumn IVColumn VComputed VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeCubic FeetCubic FeetCubic FeetCubic FeetCubic Feet1Up to 10.0100.0050.0100.005220.0200.0100.0200.010350.0360.0180.0360.0184100.0500.0250.0500.0255Over 100.5% of computed volume0.25% of computed volume0.5% of computed volume0.25% of computed volume TABLE ItemColumn IAcceptance Limits of ErrorIn-Service Limits of ErrorColumn IIColumn IIIColumn IVColumn VComputed VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeCubic FeetCubic FeetCubic FeetCubic FeetCubic Feet1Up to 10.0100.0050.0100.005220.0200.0100.0200.010350.0360.0180.0360.0184100.0500.0250.0500.0255Over 100.5% of computed volume0.25% of computed volume0.5% of computed volume0.25% of computed volume ItemColumn IAcceptance Limits of ErrorIn-Service Limits of ErrorColumn IIColumn IIIColumn IVColumn VComputed VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeCubic FeetCubic FeetCubic FeetCubic FeetCubic Feet1Up to 10.0100.0050.0100.005220.0200.0100.0200.010350.0360.0180.0360.0184100.0500.0250.0500.0255Over 100.5% of computed volume0.25% of computed volume0.5% of computed volume0.25% of computed volume """ - doc = nlp.make_doc(text) - + doc = en_tokenizer(text) diff --git a/spacy/tests/regression/test_issue2671.py b/spacy/tests/regression/test_issue2671.py index ea33d69c7..561cb2a9e 100644 --- a/spacy/tests/regression/test_issue2671.py +++ b/spacy/tests/regression/test_issue2671.py @@ -1,29 +1,30 @@ # coding: utf-8 from __future__ import unicode_literals -import pytest -from ...lang.en import English -from ...matcher import Matcher -def get_rule_id(nlp, matcher, doc): - matches = matcher(doc) - for match_id, start, end in matches: - rule_id = nlp.vocab.strings[match_id] - span = doc[start:end] - return rule_id +import pytest +from spacy.lang.en import English +from spacy.matcher import Matcher def test_issue2671(): - '''Ensure the correct entity ID is returned for matches with quantifiers. + """Ensure the correct entity ID is returned for matches with quantifiers. See also #2675 - ''' + """ + def get_rule_id(nlp, matcher, doc): + matches = matcher(doc) + for match_id, start, end in matches: + rule_id = nlp.vocab.strings[match_id] + span = doc[start:end] + return rule_id + nlp = English() matcher = Matcher(nlp.vocab) - - pattern = [{'LOWER': 'high'}, {'IS_PUNCT': True, 'OP': '?'}, {'LOWER': 'adrenaline'}] - matcher.add("test_pattern", None, pattern) - + pattern_id = 'test_pattern' + pattern = [{'LOWER': 'high'}, + {'IS_PUNCT': True, 'OP': '?'}, + {'LOWER': 'adrenaline'}] + matcher.add(pattern_id, None, pattern) doc1 = nlp("This is a high-adrenaline situation.") doc2 = nlp("This is a high adrenaline situation.") - # Works correctly - assert get_rule_id(nlp, matcher, doc1) == 'test_pattern' - assert get_rule_id(nlp, matcher, doc2) == 'test_pattern' + assert get_rule_id(nlp, matcher, doc1) == pattern_id + assert get_rule_id(nlp, matcher, doc2) == pattern_id diff --git a/spacy/tests/regression/test_issue2772.py b/spacy/tests/regression/test_issue2772.py index c9e0cf0f2..d8188c71c 100644 --- a/spacy/tests/regression/test_issue2772.py +++ b/spacy/tests/regression/test_issue2772.py @@ -1,8 +1,13 @@ -'''Test that deprojectivization doesn't mess up sentence boundaries.''' +# coding: utf-8 +from __future__ import unicode_literals + import pytest + from ..util import get_doc + def test_issue2772(en_vocab): + """Test that deprojectivization doesn't mess up sentence boundaries.""" words = 'When we write or communicate virtually , we can hide our true feelings .'.split() # A tree with a non-projective (i.e. crossing) arc # The arcs (0, 4) and (2, 9) cross.