Modernise span tests and don't depend on models

2025-10-27 06:01:28 +03:00 · 2017-01-12 15:30:49 +01:00 · 2017-01-12 15:30:49 +01:00 · 7cb3d74426
commit 7cb3d74426
parent 92e3d8b3ee
1 changed files with 48 additions and 68 deletions
--- a/spacy/tests/spans/test_span.py
+++ b/spacy/tests/spans/test_span.py
@ -1,19 +1,22 @@
 # coding: utf-8
 from __future__ import unicode_literals
-from spacy.attrs import HEAD
+
-from spacy.en import English
+from ..util import get_doc
 from spacy.tokens.doc import Doc
 import numpy as np
 import pytest
@pytest.fixture
-def doc(EN):
+def doc(en_tokenizer):
-    return EN('This is a sentence. This is another sentence. And a third.')
+    text = "This is a sentence. This is another sentence. And a third."
    heads = [1, 0, 1, -2, -3, 1, 0, 1, -2, -3, 0, 1, -2, -1]
    deps = ['nsubj', 'ROOT', 'det', 'attr', 'punct', 'nsubj', 'ROOT', 'det',
            'attr', 'punct', 'ROOT', 'det', 'npadvmod', 'punct']
    tokens = en_tokenizer(text)
    return get_doc(tokens.vocab, [t.text for t in tokens], heads=heads, deps=deps)
-@pytest.mark.models
+def test_spans_sent_spans(doc):
 def test_sent_spans(doc):
    sents = list(doc.sents)
    assert sents[0].start == 0
    assert sents[0].end == 5
@ -21,73 +24,50 @@ def test_sent_spans(doc):
    assert sum(len(sent) for sent in sents) == len(doc)
-@pytest.mark.models
+def test_spans_root(doc):
-def test_root(doc):
+    span = doc[2:4]
-    np = doc[2:4]
+    assert len(span) == 2
-    assert len(np) == 2
+    assert span.text == 'a sentence'
-    assert np.orth_ == 'a sentence'
+    assert span.root.text == 'sentence'
-    assert np.root.orth_ == 'sentence'
+    assert span.root.head.text == 'is'
    assert np.root.head.orth_ == 'is'
-def test_root2(EN):
+def test_spans_root2(en_tokenizer):
-    text = 'through North and South Carolina'
+    text = "through North and South Carolina"
-    doc = EN(text)
+    heads = [0, 3, -1, -2, -4]
-    heads = np.asarray([[0, 3, -1, -2, -4]], dtype='int32')
+    tokens = en_tokenizer(text)
-    doc.from_array([HEAD], heads.T)
+    doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)
-    south_carolina = doc[-2:]
+    assert doc[-2:].root.text == 'Carolina'
    assert south_carolina.root.text == 'Carolina'
-def test_sent(doc):
+def test_spans_span_sent(doc):
-    '''Test new span.sent property'''
+    """Test span.sent property"""
    #return EN('This is a sentence. This is another sentence. And a third.')
    heads = np.asarray([[1, 0, -1, -1, -1, 1, 0, -1, -1, -1, 2, 1, 0, -1]], dtype='int32')
    doc.from_array([HEAD], heads.T)
    assert len(list(doc.sents))
-    span = doc[:2]
+    assert doc[:2].sent.root.text == 'is'
-    assert span.sent.root.text == 'is'
+    assert doc[:2].sent.text == 'This is a sentence .'
-    assert span.sent.text == 'This is a sentence.'
+    assert doc[6:7].sent.root.left_edge.text == 'This'
    span = doc[6:7]
    assert span.sent.root.left_edge.text == 'This'
-def test_default_sentiment(EN):
+def test_spans_default_sentiment(en_tokenizer):
-    '''Test new span.sentiment property's default averaging behaviour'''
+    """Test span.sentiment property's default averaging behaviour"""
-    good = EN.vocab[u'good']
+    text = "good stuff bad stuff"
-    good.sentiment = 3.0
+    tokens = en_tokenizer(text)
-    bad = EN.vocab[u'bad']
+    tokens.vocab[tokens[0].text].sentiment = 3.0
-    bad.sentiment = -2.0
+    tokens.vocab[tokens[2].text].sentiment = -2.0
-
+    doc = get_doc(tokens.vocab, [t.text for t in tokens])
-    doc = Doc(EN.vocab, [u'good', 'stuff', u'bad', u'stuff'])
+    assert doc[:2].sentiment == 3.0 / 2
-
+    assert doc[-2:].sentiment == -2. / 2
-    good_stuff = doc[:2]
+    assert doc[:-1].sentiment == (3.+-2) / 3.
    assert good_stuff.sentiment == 3.0 / 2
    bad_stuff = doc[-2:]
    assert bad_stuff.sentiment == -2. / 2
    good_stuff_bad = doc[:-1]
    assert good_stuff_bad.sentiment == (3.+-2) / 3.
-
+def test_spans_override_sentiment(en_tokenizer):
-def test_override_sentiment(EN):
+    """Test span.sentiment property's default averaging behaviour"""
-    '''Test new span.sentiment property's default averaging behaviour'''
+    text = "good stuff bad stuff"
-    good = EN.vocab[u'good']
+    tokens = en_tokenizer(text)
-    good.sentiment = 3.0
+    tokens.vocab[tokens[0].text].sentiment = 3.0
-    bad = EN.vocab[u'bad']
+    tokens.vocab[tokens[2].text].sentiment = -2.0
-    bad.sentiment = -2.0
+    doc = get_doc(tokens.vocab, [t.text for t in tokens])
    doc = Doc(EN.vocab, [u'good', 'stuff', u'bad', u'stuff'])
    doc.user_span_hooks['sentiment'] = lambda span: 10.0
-
+    assert doc[:2].sentiment == 10.0
-    good_stuff = doc[:2]
+    assert doc[-2:].sentiment == 10.0
-    assert good_stuff.sentiment == 10.0
+    assert doc[:-1].sentiment == 10.0
    bad_stuff = doc[-2:]
    assert bad_stuff.sentiment == 10.0
    good_stuff_bad = doc[:-1]
    assert good_stuff_bad.sentiment == 10.0