Modernise Doc parse tree navigation tests and don't depend on models

2026-02-18 05:00:41 +03:00 · 2017-01-11 21:14:15 +01:00 · 2017-01-11 21:14:15 +01:00 · 55d151aa61
commit 55d151aa61
parent 7262421bb2
1 changed files with 62 additions and 23 deletions
--- a/spacy/tests/parser/test_parse_navigate.py
+++ b/spacy/tests/parser/test_parse_navigate.py
@ -1,35 +1,74 @@
+# coding: utf-8
 from __future__ import unicode_literals
-from os import path
-import io
+
+from ..util import get_doc

 import pytest


@pytest.fixture
-def sun_text():
-    with io.open(path.join(path.dirname(__file__), '..', 'sun.txt'), 'r',
-                 encoding='utf8') as file_:
-        text = file_.read()
-    return text
+def text():
+    return u"""
+It was a bright cold day in April, and the clocks were striking thirteen.
+Winston Smith, his chin nuzzled into his breast in an effort to escape the
+vile wind, slipped quickly through the glass doors of Victory Mansions,
+though not quickly enough to prevent a swirl of gritty dust from entering
+along with him.
+
+The hallway smelt of boiled cabbage and old rag mats. At one end of it a
+coloured poster, too large for indoor display, had been tacked to the wall.
+It depicted simply an enormous face, more than a metre wide: the face of a
+man of about forty-five, with a heavy black moustache and ruggedly handsome
+features. Winston made for the stairs. It was no use trying the lift. Even at
+the best of times it was seldom working, and at present the electric current
+was cut off during daylight hours. It was part of the economy drive in
+preparation for Hate Week. The flat was seven flights up, and Winston, who
+was thirty-nine and had a varicose ulcer above his right ankle, went slowly,
+resting several times on the way. On each landing, opposite the lift-shaft,
+the poster with the enormous face gazed from the wall. It was one of those
+pictures which are so contrived that the eyes follow you about when you move.
+BIG BROTHER IS WATCHING YOU, the caption beneath it ran.
+"""


-@pytest.mark.models
-def test_consistency(EN, sun_text):
-    tokens = EN(sun_text)
-    for head in tokens:
+@pytest.fixture
+def heads():
+    return [1, 1, 0, 3, 2, 1, -4, -1, -1, -7, -8, 1, -10, 2, 1, -3, -1, -15,
+            -1, 1, 4, -1, 1, -3, 0, -1, 1, -2, -4, 1, -2, 1, -2, 3, -1, 1,
+            -4, -13, -14, -1, -2, 2, 1, -3, -1, 1, -2, -9, -1, 3, 1, 1, -14,
+            1, -2, 1, -2, -1, 1, -2, -6, -1, -1, -2, -1, -1, -42, -1, 2, 1,
+            0, -1, 1, -2, -1, 2, 1, -4, -8, 0, 1, -2, -1, -1, 3, -1, 1, -6,
+            9, 1, 7, -1, 1, -2, 3, 2, 1, -10, -1, 1, -2, -22, -1, 1, 0, -1,
+            2, 1, -4, -1, -2, -1, 1, -2, -6, -7, 1, -9, -1, 2, -1, -3, -1,
+            3, 2, 1, -4, -19, -24, 3, 2, 1, -4, -1, 1, 2, -1, -5, -34, 1, 0,
+            -1, 1, -2, -4, 1, 0, 1, -2, -1, 1, -2, -6, 1, 9, -1, 1, -3, -1,
+            -1, 3, 2, 1, 0, -1, -2, 7, -1, 5, 1, 3, -1, 1, -10, -1, -2, 1,
+            -2, -15, 1, 0, -1, -1, 2, 1, -3, -1, -1, -2, -1, 1, -2, -12, 1,
+            1, 0, 1, -2, -1, -2, -3, 9, -1, 2, -1, -4, 2, 1, -3, -4, -15, 2,
+            1, -3, -1, 2, 1, -3, -8, -9, -1, -2, -1, -4, 1, -2, -3, 1, -2,
+            -19, 17, 1, -2, 14, 13, 3, 2, 1, -4, 8, -1, 1, 5, -1, 2, 1, -3,
+            0, -1, 1, -2, -4, 1, 0, -1, -1, 2, -1, -3, 1, -2, 1, -2, 3, 1,
+            1, -4, -1, -2, 2, 1, -5, -19, -1, 1, 1, 0, 1, 6, -1, 1, -3, -1,
+            -1, -8, -9, -1]
+
+
+def test_parser_parse_navigate_consistency(en_tokenizer, text, heads):
+    tokens = en_tokenizer(text)
+    doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)
+    for head in doc:
        for child in head.lefts:
            assert child.head is head
        for child in head.rights:
            assert child.head is head


-@pytest.mark.models
-def test_child_consistency(EN, sun_text):
-    tokens = EN(sun_text)
+def test_parser_parse_navigate_child_consistency(en_tokenizer, text, heads):
+    tokens = en_tokenizer(text)
+    doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)

    lefts = {}
    rights = {}
-    for head in tokens:
+    for head in doc:
        assert head.i not in lefts
        lefts[head.i] = set()
        for left in head.lefts:
@ -38,10 +77,10 @@ def test_child_consistency(EN, sun_text):
        rights[head.i] = set()
        for right in head.rights:
            rights[head.i].add(right.i)
-    for head in tokens:
+    for head in doc:
        assert head.n_rights == len(rights[head.i])
        assert head.n_lefts == len(lefts[head.i])
-    for child in tokens:
+    for child in doc:
        if child.i < child.head.i:
            assert child.i in lefts[child.head.i]
            assert child.i not in rights[child.head.i]
@ -56,12 +95,12 @@ def test_child_consistency(EN, sun_text):
        assert not children


-@pytest.mark.models
-def test_edges(EN, sun_text):
-    tokens = EN(sun_text)
-    for token in tokens:
+def test_parser_parse_navigate_edges(en_tokenizer, text, heads):
+    tokens = en_tokenizer(text)
+    doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)
+    for token in doc:
        subtree = list(token.subtree)
-        debug = '\t'.join((token.orth_, token.left_edge.orth_, subtree[0].orth_))
+        debug = '\t'.join((token.text, token.left_edge.text, subtree[0].text))
        assert token.left_edge == subtree[0], debug
-        debug = '\t'.join((token.orth_, token.right_edge.orth_, subtree[-1].orth_, token.right_edge.head.orth_))
+        debug = '\t'.join((token.text, token.right_edge.text, subtree[-1].text, token.right_edge.head.text))
        assert token.right_edge == subtree[-1], debug