Modernise Doc parse tree navigation tests and don't depend on models

2025-11-07 11:27:37 +03:00 · 2017-01-11 21:14:15 +01:00 · 2017-01-11 21:14:15 +01:00 · 55d151aa61
commit 55d151aa61
parent 7262421bb2
1 changed files with 62 additions and 23 deletions
--- a/spacy/tests/parser/test_parse_navigate.py
+++ b/spacy/tests/parser/test_parse_navigate.py
@ -1,35 +1,74 @@
 # coding: utf-8
 from __future__ import unicode_literals
-from os import path
+
-import io
+from ..util import get_doc
 import pytest
@pytest.fixture
-def sun_text():
+def text():
-    with io.open(path.join(path.dirname(__file__), '..', 'sun.txt'), 'r',
+    return u"""
-                 encoding='utf8') as file_:
+It was a bright cold day in April, and the clocks were striking thirteen.
-        text = file_.read()
+Winston Smith, his chin nuzzled into his breast in an effort to escape the
-    return text
+vile wind, slipped quickly through the glass doors of Victory Mansions,
 though not quickly enough to prevent a swirl of gritty dust from entering
 along with him.
 The hallway smelt of boiled cabbage and old rag mats. At one end of it a
 coloured poster, too large for indoor display, had been tacked to the wall.
 It depicted simply an enormous face, more than a metre wide: the face of a
 man of about forty-five, with a heavy black moustache and ruggedly handsome
 features. Winston made for the stairs. It was no use trying the lift. Even at
 the best of times it was seldom working, and at present the electric current
 was cut off during daylight hours. It was part of the economy drive in
 preparation for Hate Week. The flat was seven flights up, and Winston, who
 was thirty-nine and had a varicose ulcer above his right ankle, went slowly,
 resting several times on the way. On each landing, opposite the lift-shaft,
 the poster with the enormous face gazed from the wall. It was one of those
 pictures which are so contrived that the eyes follow you about when you move.
 BIG BROTHER IS WATCHING YOU, the caption beneath it ran.
 """
-@pytest.mark.models
+@pytest.fixture
-def test_consistency(EN, sun_text):
+def heads():
-    tokens = EN(sun_text)
+    return [1, 1, 0, 3, 2, 1, -4, -1, -1, -7, -8, 1, -10, 2, 1, -3, -1, -15,
-    for head in tokens:
+            -1, 1, 4, -1, 1, -3, 0, -1, 1, -2, -4, 1, -2, 1, -2, 3, -1, 1,
            -4, -13, -14, -1, -2, 2, 1, -3, -1, 1, -2, -9, -1, 3, 1, 1, -14,
            1, -2, 1, -2, -1, 1, -2, -6, -1, -1, -2, -1, -1, -42, -1, 2, 1,
            0, -1, 1, -2, -1, 2, 1, -4, -8, 0, 1, -2, -1, -1, 3, -1, 1, -6,
            9, 1, 7, -1, 1, -2, 3, 2, 1, -10, -1, 1, -2, -22, -1, 1, 0, -1,
            2, 1, -4, -1, -2, -1, 1, -2, -6, -7, 1, -9, -1, 2, -1, -3, -1,
            3, 2, 1, -4, -19, -24, 3, 2, 1, -4, -1, 1, 2, -1, -5, -34, 1, 0,
            -1, 1, -2, -4, 1, 0, 1, -2, -1, 1, -2, -6, 1, 9, -1, 1, -3, -1,
            -1, 3, 2, 1, 0, -1, -2, 7, -1, 5, 1, 3, -1, 1, -10, -1, -2, 1,
            -2, -15, 1, 0, -1, -1, 2, 1, -3, -1, -1, -2, -1, 1, -2, -12, 1,
            1, 0, 1, -2, -1, -2, -3, 9, -1, 2, -1, -4, 2, 1, -3, -4, -15, 2,
            1, -3, -1, 2, 1, -3, -8, -9, -1, -2, -1, -4, 1, -2, -3, 1, -2,
            -19, 17, 1, -2, 14, 13, 3, 2, 1, -4, 8, -1, 1, 5, -1, 2, 1, -3,
            0, -1, 1, -2, -4, 1, 0, -1, -1, 2, -1, -3, 1, -2, 1, -2, 3, 1,
            1, -4, -1, -2, 2, 1, -5, -19, -1, 1, 1, 0, 1, 6, -1, 1, -3, -1,
            -1, -8, -9, -1]
 def test_parser_parse_navigate_consistency(en_tokenizer, text, heads):
    tokens = en_tokenizer(text)
    doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)
    for head in doc:
        for child in head.lefts:
            assert child.head is head
        for child in head.rights:
            assert child.head is head
-@pytest.mark.models
+def test_parser_parse_navigate_child_consistency(en_tokenizer, text, heads):
-def test_child_consistency(EN, sun_text):
+    tokens = en_tokenizer(text)
-    tokens = EN(sun_text)
+    doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)
    lefts = {}
    rights = {}
-    for head in tokens:
+    for head in doc:
        assert head.i not in lefts
        lefts[head.i] = set()
        for left in head.lefts:
@ -38,10 +77,10 @@ def test_child_consistency(EN, sun_text):
        rights[head.i] = set()
        for right in head.rights:
            rights[head.i].add(right.i)
-    for head in tokens:
+    for head in doc:
        assert head.n_rights == len(rights[head.i])
        assert head.n_lefts == len(lefts[head.i])
-    for child in tokens:
+    for child in doc:
        if child.i < child.head.i:
            assert child.i in lefts[child.head.i]
            assert child.i not in rights[child.head.i]
@ -56,12 +95,12 @@ def test_child_consistency(EN, sun_text):
        assert not children
-@pytest.mark.models
+def test_parser_parse_navigate_edges(en_tokenizer, text, heads):
-def test_edges(EN, sun_text):
+    tokens = en_tokenizer(text)
-    tokens = EN(sun_text)
+    doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)
-    for token in tokens:
+    for token in doc:
        subtree = list(token.subtree)
-        debug = '\t'.join((token.orth_, token.left_edge.orth_, subtree[0].orth_))
+        debug = '\t'.join((token.text, token.left_edge.text, subtree[0].text))
        assert token.left_edge == subtree[0], debug
-        debug = '\t'.join((token.orth_, token.right_edge.orth_, subtree[-1].orth_, token.right_edge.head.orth_))
+        debug = '\t'.join((token.text, token.right_edge.text, subtree[-1].text, token.right_edge.head.text))
        assert token.right_edge == subtree[-1], debug