Tidy up and rename regression tests and remove unnecessary imports

2025-07-18 20:22:25 +03:00 · 2017-01-12 22:00:37 +01:00 · 2017-01-12 22:00:37 +01:00 · 9b4bea1df9
commit 9b4bea1df9
parent 5e1b6178e3
18 changed files with 97 additions and 142 deletions
--- a/spacy/tests/regression/test_issue118.py
+++ b/spacy/tests/regression/test_issue118.py
@ -1,7 +1,4 @@
 # coding: utf-8
-"""Test a bug that arose from having overlapping matches"""
-
-
 from __future__ import unicode_literals

 from ...matcher import Matcher
@ -25,6 +22,7 @@ def doc(en_tokenizer):

@pytest.mark.parametrize('pattern', [pattern1, pattern2])
 def test_issue118(doc, pattern):
+    """Test a bug that arose from having overlapping matches"""
    ORG = doc.vocab.strings['ORG']
    matcher = Matcher(doc.vocab, {'BostonCeltics': ('ORG', {}, pattern)})

@ -41,6 +39,7 @@ def test_issue118(doc, pattern):

@pytest.mark.parametrize('pattern', [pattern3, pattern4])
 def test_issue118_prefix_reorder(doc, pattern):
+    """Test a bug that arose from having overlapping matches"""
    ORG = doc.vocab.strings['ORG']
    matcher = Matcher(doc.vocab, {'BostonCeltics': ('ORG', {}, pattern)})

--- a/spacy/tests/regression/test_issue242.py
+++ b/spacy/tests/regression/test_issue242.py
@ -9,7 +9,6 @@ import pytest

 def test_issue242(en_tokenizer):
    """Test overlapping multi-word phrases."""
-
    text = "There are different food safety standards in different countries."
    patterns = [[{LOWER: 'food'}, {LOWER: 'safety'}],
                [{LOWER: 'safety'}, {LOWER: 'standards'}]]
--- a/spacy/tests/regression/test_issue309.py
+++ b/spacy/tests/regression/test_issue309.py
@ -4,7 +4,7 @@ from __future__ import unicode_literals
 from ..util import get_doc


-def test_sbd_empty_string(en_tokenizer):
+def test_issue309(en_tokenizer):
    """Test Issue #309: SBD fails on empty string"""
    tokens = en_tokenizer(" ")
    doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=[0], deps=['ROOT'])
--- a/spacy/tests/regression/test_issue351.py
+++ b/spacy/tests/regression/test_issue351.py
@ -1,16 +1,9 @@
 # coding: utf-8
 from __future__ import unicode_literals

-from ...en import English
-
 import pytest


-@pytest.fixture
-def en_tokenizer():
-    return English.Defaults.create_tokenizer()
-
-
 def test_issue351(en_tokenizer):
    doc = en_tokenizer("   This is a cat.")
    assert doc[0].idx == 0
--- a/spacy/tests/regression/test_issue360.py
+++ b/spacy/tests/regression/test_issue360.py
@ -1,16 +1,10 @@
 # coding: utf-8
 from __future__ import unicode_literals

-from ...en import English
-
 import pytest


-@pytest.fixture
-def en_tokenizer():
-    return English.Defaults.create_tokenizer()
-
-
-def test_big_ellipsis(en_tokenizer):
+def test_issue360(en_tokenizer):
+    """Test tokenization of big ellipsis"""
    tokens = en_tokenizer('$45...............Asking')
    assert len(tokens) > 2
--- a/spacy/tests/regression/test_issue429.py
+++ b/spacy/tests/regression/test_issue429.py
@ -1,31 +1,25 @@
 # coding: utf-8
 from __future__ import unicode_literals

-import spacy
-from spacy.attrs import ORTH
+from ...attrs import ORTH
+from ...matcher import Matcher

 import pytest


@pytest.mark.models
-def test_issue429():
-
-    nlp = spacy.load('en', parser=False)
-
-
+def test_issue429(EN):
    def merge_phrases(matcher, doc, i, matches):
      if i != len(matches) - 1:
        return None
      spans = [(ent_id, label, doc[start:end]) for ent_id, label, start, end in matches]
      for ent_id, label, span in spans:
-        span.merge('NNP' if label else span.root.tag_, span.text, nlp.vocab.strings[label])
+        span.merge('NNP' if label else span.root.tag_, span.text, EN.vocab.strings[label])

-    doc = nlp('a')
-    nlp.matcher.add('key', label='TEST', attrs={}, specs=[[{ORTH: 'a'}]], on_match=merge_phrases)
-    doc = nlp.tokenizer('a b c')
-    nlp.tagger(doc)
-    nlp.matcher(doc)
-
-    for word in doc:
-        print(word.text, word.ent_iob_, word.ent_type_)
-    nlp.entity(doc)
+    doc = EN('a')
+    matcher = Matcher(EN.vocab)
+    matcher.add('key', label='TEST', attrs={}, specs=[[{ORTH: 'a'}]], on_match=merge_phrases)
+    doc = EN.tokenizer('a b c')
+    EN.tagger(doc)
+    matcher(doc)
+    EN.entity(doc)
--- a/spacy/tests/regression/test_issue514.py
+++ b/spacy/tests/regression/test_issue514.py
@ -0,0 +1,21 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from ..util import get_doc
+
+import pytest
+
+
+@pytest.mark.models
+def test_issue514(EN):
+    """Test serializing after adding entity"""
+    text = ["This", "is", "a", "sentence", "about", "pasta", "."]
+    vocab = EN.entity.vocab
+    doc = get_doc(vocab, text)
+    EN.entity.add_label("Food")
+    EN.entity(doc)
+    label_id = vocab.strings[u'Food']
+    doc.ents = [(label_id, 5,6)]
+    assert [(ent.label_, ent.text) for ent in doc.ents] == [("Food", "pasta")]
+    doc2 = get_doc(EN.entity.vocab).from_bytes(doc.to_bytes())
+    assert [(ent.label_, ent.text) for ent in doc2.ents] == [("Food", "pasta")]
--- a/spacy/tests/regression/test_issue54.py
+++ b/spacy/tests/regression/test_issue54.py
@ -6,5 +6,5 @@ import pytest

@pytest.mark.models
 def test_issue54(EN):
-    text = u'Talks given by women had a slightly higher number of questions asked (3.2$\pm$0.2) than talks given by men (2.6$\pm$0.1).'
+    text = "Talks given by women had a slightly higher number of questions asked (3.2$\pm$0.2) than talks given by men (2.6$\pm$0.1)."
    tokens = EN(text)
--- a/spacy/tests/regression/test_issue587.py
+++ b/spacy/tests/regression/test_issue587.py
@ -1,21 +1,20 @@
 # coding: utf-8
 from __future__ import unicode_literals

-import spacy
-import spacy.matcher
-from spacy.attrs import IS_PUNCT, ORTH
+from ...matcher import Matcher
+from ...attrs import IS_PUNCT, ORTH

 import pytest


@pytest.mark.models
-def test_matcher_segfault():
-    nlp = spacy.load('en', parser=False, entity=False)
-    matcher = spacy.matcher.Matcher(nlp.vocab)
+def test_issue587(EN):
+    """Test that Matcher doesn't segfault on particular input"""
+    matcher = Matcher(EN.vocab)
    content = '''a b; c'''
    matcher.add(entity_key='1', label='TEST', attrs={}, specs=[[{ORTH: 'a'}, {ORTH: 'b'}]])
-    matcher(nlp(content))
+    matcher(EN(content))
    matcher.add(entity_key='2', label='TEST', attrs={}, specs=[[{ORTH: 'a'}, {ORTH: 'b'}, {IS_PUNCT: True}, {ORTH: 'c'}]])
-    matcher(nlp(content))
+    matcher(EN(content))
    matcher.add(entity_key='3', label='TEST', attrs={}, specs=[[{ORTH: 'a'}, {ORTH: 'b'}, {IS_PUNCT: True}, {ORTH: 'd'}]])
-    matcher(nlp(content))
+    matcher(EN(content))
--- a/spacy/tests/regression/test_issue588.py
+++ b/spacy/tests/regression/test_issue588.py
@ -1,14 +1,12 @@
 # coding: utf-8
 from __future__ import unicode_literals

-from ...vocab import Vocab
-from ...tokens import Doc
 from ...matcher import Matcher

 import pytest


-def test_issue588():
-    matcher = Matcher(Vocab())
+def test_issue588(en_vocab):
+    matcher = Matcher(en_vocab)
    with pytest.raises(ValueError):
        matcher.add(entity_key='1', label='TEST', attrs={}, specs=[[]])
--- a/spacy/tests/regression/test_issue589.py
+++ b/spacy/tests/regression/test_issue589.py
@ -2,7 +2,7 @@
 from __future__ import unicode_literals

 from ...vocab import Vocab
-from ...tokens import Doc
+from ..util import get_doc

 import pytest

@ -10,4 +10,4 @@ import pytest
 def test_issue589():
    vocab = Vocab()
    vocab.strings.set_frozen(True)
-    doc = Doc(vocab, words=['whata'])
+    doc = get_doc(vocab, ['whata'])
--- a/spacy/tests/regression/test_issue590.py
+++ b/spacy/tests/regression/test_issue590.py
@ -1,37 +1,22 @@
 # coding: utf-8
 from __future__ import unicode_literals

-from ...attrs import *
+from ...attrs import ORTH, IS_ALPHA, LIKE_NUM
 from ...matcher import Matcher
-from ...tokens import Doc
-from ...en import English
+from ..util import get_doc


-def test_overlapping_matches():
-    vocab = English.Defaults.create_vocab()
-    doc = Doc(vocab, words=['n', '=', '1', ';', 'a', ':', '5', '%'])
-
-    matcher = Matcher(vocab)
-    matcher.add_entity(
-        "ab",
-        acceptor=None,
-        on_match=None
-    )
-    matcher.add_pattern(
-        'ab',
-        [
-            {IS_ALPHA: True},
-            {ORTH: ':'},
-            {LIKE_NUM: True},
-            {ORTH: '%'}
-        ], label='a')
-    matcher.add_pattern(
-        'ab',
-        [
-            {IS_ALPHA: True},
-            {ORTH: '='},
-            {LIKE_NUM: True},
-        ], label='b')
+def test_issue590(en_vocab):
+    """Test overlapping matches"""
+    doc = get_doc(en_vocab, ['n', '=', '1', ';', 'a', ':', '5', '%'])

+    matcher = Matcher(en_vocab)
+    matcher.add_entity("ab", acceptor=None, on_match=None)
+    matcher.add_pattern('ab', [{IS_ALPHA: True}, {ORTH: ':'},
+                               {LIKE_NUM: True}, {ORTH: '%'}],
+                               label='a')
+    matcher.add_pattern('ab', [{IS_ALPHA: True}, {ORTH: '='},
+                               {LIKE_NUM: True}],
+                               label='b')
    matches = matcher(doc)
    assert len(matches) == 2
--- a/spacy/tests/regression/test_issue595.py
+++ b/spacy/tests/regression/test_issue595.py
@ -2,43 +2,23 @@
 from __future__ import unicode_literals

 from ...symbols import POS, VERB, VerbForm_inf
-from ...tokens import Doc
 from ...vocab import Vocab
 from ...lemmatizer import Lemmatizer
+from ..util import get_doc

 import pytest


-@pytest.fixture
-def index():
-    return {'verb': {}}
+def test_issue595():
+    """Test lemmatization of base forms"""
+    words = ["Do", "n't", "feed", "the", "dog"]
+    tag_map = {'VB': {POS: VERB, 'morph': VerbForm_inf}}
+    rules = {"verb": [["ed", "e"]]}

-@pytest.fixture
-def exceptions():
-    return {'verb': {}}
+    lemmatizer = Lemmatizer({'verb': {}}, {'verb': {}}, rules)
+    vocab = Vocab(lemmatizer=lemmatizer, tag_map=tag_map)
+    doc = get_doc(vocab, words)

-@pytest.fixture
-def rules():
-    return {"verb": [["ed", "e"]]}
-
-@pytest.fixture
-def lemmatizer(index, exceptions, rules):
-    return Lemmatizer(index, exceptions, rules)
-
-
-@pytest.fixture
-def tag_map():
-    return {'VB': {POS: VERB, 'morph': VerbForm_inf}}
-
-
-@pytest.fixture
-def vocab(lemmatizer, tag_map):
-    return Vocab(lemmatizer=lemmatizer, tag_map=tag_map)
-
-
-def test_not_lemmatize_base_forms(vocab):
-    doc = Doc(vocab, words=["Do", "n't", "feed", "the", "dog"])
-    feed = doc[2]
-    feed.tag_ = 'VB'
-    assert feed.text == 'feed'
-    assert feed.lemma_ == 'feed'
+    doc[2].tag_ = 'VB'
+    assert doc[2].text == 'feed'
+    assert doc[2].lemma_ == 'feed'
--- a/spacy/tests/regression/test_issue599.py
+++ b/spacy/tests/regression/test_issue599.py
@ -1,15 +1,13 @@
 # coding: utf-8
 from __future__ import unicode_literals

-from ...tokens import Doc
-from ...vocab import Vocab
+from ..util import get_doc


-def test_issue599():
-    doc = Doc(Vocab())
+def test_issue599(en_vocab):
+    doc = get_doc(en_vocab)
    doc.is_tagged = True
    doc.is_parsed = True
-    bytes_ = doc.to_bytes()
-    doc2 = Doc(doc.vocab)
-    doc2.from_bytes(bytes_)
+    doc2 = get_doc(doc.vocab)
+    doc2.from_bytes(doc.to_bytes())
    assert doc2.is_parsed
--- a/spacy/tests/regression/test_issue600.py
+++ b/spacy/tests/regression/test_issue600.py
@ -1,11 +1,11 @@
 # coding: utf-8
 from __future__ import unicode_literals

-from ...tokens import Doc
 from ...vocab import Vocab
-from ...attrs import POS
+from ..util import get_doc


 def test_issue600():
-    doc = Doc(Vocab(tag_map={'NN': {'pos': 'NOUN'}}), words=['hello'])
+    vocab = Vocab(tag_map={'NN': {'pos': 'NOUN'}})
+    doc = get_doc(vocab, ["hello"])
    doc[0].tag_ = 'NN'
--- a/spacy/tests/regression/test_issue605.py
+++ b/spacy/tests/regression/test_issue605.py
@ -1,27 +1,21 @@
 # coding: utf-8
 from __future__ import unicode_literals

-from ...attrs import LOWER, ORTH
-from ...tokens import Doc
-from ...vocab import Vocab
+from ...attrs import ORTH
 from ...matcher import Matcher
+from ..util import get_doc


-def return_false(doc, ent_id, label, start, end):
+def test_issue605(en_vocab):
+    def return_false(doc, ent_id, label, start, end):
        return False

-
-def test_matcher_accept():
-    doc = Doc(Vocab(), words=['The', 'golf', 'club', 'is', 'broken'])
-
-    golf_pattern =     [
-        { ORTH: "golf"},
-        { ORTH: "club"}
-    ]
+    words = ["The", "golf", "club", "is", "broken"]
+    pattern = [{ORTH: "golf"}, {ORTH: "club"}]
+    label = "Sport_Equipment"
+    doc = get_doc(en_vocab, words)
    matcher = Matcher(doc.vocab)
-
-    matcher.add_entity('Sport_Equipment', acceptor=return_false)
-    matcher.add_pattern("Sport_Equipment", golf_pattern)
+    matcher.add_entity(label, acceptor=return_false)
+    matcher.add_pattern(label, pattern)
    match = matcher(doc)
-
    assert match == []
--- a/spacy/tests/regression/test_issue615.py
+++ b/spacy/tests/regression/test_issue615.py
@ -19,7 +19,7 @@ def test_issue615(en_tokenizer):
            span.merge('NNP' if label else span.root.tag_, span.text, doc.vocab.strings[label])

    text = "The golf club is broken"
-    pattern = [{ ORTH: "golf"}, { ORTH: "club"}]
+    pattern = [{ORTH: "golf"}, {ORTH: "club"}]
    label = "Sport_Equipment"

    doc = en_tokenizer(text)
--- a/spacy/tests/regression/test_issue617.py
+++ b/spacy/tests/regression/test_issue617.py
@ -4,7 +4,8 @@ from __future__ import unicode_literals
 from ...vocab import Vocab


-def test_load_vocab_with_string():
+def test_issue617():
+    """Test loading Vocab with string"""
    try:
        vocab = Vocab.load('/tmp/vocab')
    except IOError: