reformulate noun chunk tests for English

2025-09-22 20:16:43 +03:00 · 2016-05-03 14:24:35 +02:00 · 2016-05-03 14:24:35 +02:00 · 7b246c13cb
commit 7b246c13cb
parent 1786331cd8
7 changed files with 111 additions and 21 deletions
--- a/spacy/symbols.pxd
+++ b/spacy/symbols.pxd
@ -382,6 +382,7 @@ cpdef enum symbol_t:
    cc
    ccomp
    complm
    compound
    conj
    csubj
    csubjpass
--- a/spacy/symbols.pyx
+++ b/spacy/symbols.pyx
@ -381,6 +381,7 @@ IDS = {
    "cc": cc,
    "ccomp": ccomp,
    "complm": complm,
    "compound": compound,
    "conj": conj,
    "csubj": csubj,
    "csubjpass": csubjpass,
--- a/spacy/syntax/parser.pyx
+++ b/spacy/syntax/parser.pyx
@ -225,6 +225,11 @@ cdef class Parser:
    def step_through(self, Doc doc):
        return StepwiseState(self, doc)
    def from_transition_sequence(self, Doc doc, sequence):
        with self.step_through(doc) as stepwise:
            for transition in sequence:
                stepwise.transition(transition)
    def add_label(self, label):
        for action in self.moves.action_types:
            self.moves.add_action(action, label)
--- a/spacy/tests/conftest.py
+++ b/spacy/tests/conftest.py
@ -7,7 +7,7 @@ import spacy
 def EN():
    return spacy.load("en")
-@pytest.fixture(score="session")
+@pytest.fixture(scope="session")
 def DE():
    return spacy.load("de")
--- a/spacy/tests/parser/test_base_nps.py
+++ b/spacy/tests/parser/test_base_nps.py
@ -2,30 +2,30 @@ from __future__ import unicode_literals
 import pytest
-@pytest.mark.models
+# @pytest.mark.models
-def test_nsubj(EN):
+# def test_nsubj(EN):
-    sent = EN(u'A base phrase should be recognized.')
+#     sent = EN(u'A base phrase should be recognized.')
-    base_nps = list(sent.noun_chunks)
+#     base_nps = list(sent.noun_chunks)
-    assert len(base_nps) == 1
+#     assert len(base_nps) == 1
-    assert base_nps[0].string == 'A base phrase '
+#     assert base_nps[0].string == 'A base phrase '
-@pytest.mark.models
+# @pytest.mark.models
-def test_coord(EN):
+# def test_coord(EN):
-    sent = EN(u'A base phrase and a good phrase are often the same.')
+#     sent = EN(u'A base phrase and a good phrase are often the same.')
-    base_nps = list(sent.noun_chunks)
+#     base_nps = list(sent.noun_chunks)
-    assert len(base_nps) == 2
+#     assert len(base_nps) == 2
-    assert base_nps[0].string == 'A base phrase '
+#     assert base_nps[0].string == 'A base phrase '
-    assert base_nps[1].string == 'a good phrase '
+#     assert base_nps[1].string == 'a good phrase '
-@pytest.mark.models
+# @pytest.mark.models
-def test_pp(EN):
+# def test_pp(EN):
-    sent = EN(u'A phrase with another phrase occurs')
+#     sent = EN(u'A phrase with another phrase occurs')
-    base_nps = list(sent.noun_chunks)
+#     base_nps = list(sent.noun_chunks)
-    assert len(base_nps) == 2
+#     assert len(base_nps) == 2
-    assert base_nps[0].string == 'A phrase '
+#     assert base_nps[0].string == 'A phrase '
-    assert base_nps[1].string == 'another phrase ' 
+#     assert base_nps[1].string == 'another phrase ' 
@pytest.mark.models
--- a/spacy/tests/unit/init.py
+++ b/spacy/tests/unit/init.py
--- a/spacy/tests/unit/test_parser.py
+++ b/spacy/tests/unit/test_parser.py
@ -0,0 +1,83 @@
 from __future__ import unicode_literals
 import pytest
 import numpy
 from spacy.attrs import HEAD, DEP
 from spacy.symbols import root, det, compound, nsubjpass, aux, auxpass, punct, nsubj, cc, amod, conj, advmod, attr, prep, pobj
@pytest.mark.models
 class TestNounChunks:
    @pytest.fixture(scope="class")
    def ex1_en(self, EN):
        example = EN.tokenizer.tokens_from_list('A base phrase should be recognized .'.split(' '))
        EN.tagger.tag_from_strings(example, 'DT NN NN MD VB VBN .'.split(' '))
        example.from_array([HEAD, DEP],
        numpy.asarray(
            [
                [2, det],
                [1, compound],
                [3, nsubjpass],
                [2, aux],
                [1, auxpass],
                [0, root],
                [-1, punct]
            ], dtype='int32'))
        return example
    @pytest.fixture(scope="class")
    def ex2_en(self, EN):
        example = EN.tokenizer.tokens_from_list('A base phrase and a good phrase are often the same .'.split(' '))
        EN.tagger.tag_from_strings(example, 'DT NN NN CC DT JJ NN VBP RB DT JJ .'.split(' '))
        example.from_array([HEAD, DEP],
        numpy.asarray(
            [
                [2, det],
                [1, compound],
                [5, nsubj],
                [-1, cc],
                [1, det],
                [1, amod],
                [-4, conj],
                [0, root],
                [-1, advmod],
                [1, det],
                [-3, attr],
                [-4, punct]
            ], dtype='int32'))
        return example
    @pytest.fixture(scope="class")
    def ex3_en(self, EN):
        example = EN.tokenizer.tokens_from_list('A phrase with another phrase occurs .'.split(' '))
        EN.tagger.tag_from_strings(example, 'DT NN IN DT NN VBZ .'.split(' '))
        example.from_array([HEAD, DEP],
        numpy.asarray(
            [
                [1, det],
                [4, nsubj],
                [-1, prep],
                [1, det],
                [-2, pobj],
                [0, root],
                [-1, punct]
            ], dtype='int32'))
        return example
    def test_standard_chunk(self, ex1_en):
        chunks = list(ex1_en.noun_chunks)
        assert len(chunks) == 1
        assert chunks[0].string == 'A base phrase '
    def test_coordinated_chunks(self, ex2_en):
        chunks = list(ex2_en.noun_chunks)
        assert len(chunks) == 2
        assert chunks[0].string == 'A base phrase '
        assert chunks[1].string == 'a good phrase '
    def test_pp_chunks(self, ex3_en):
        chunks = list(ex3_en.noun_chunks)
        assert len(chunks) == 2
        assert chunks[0].string == 'A phrase '
        assert chunks[1].string == 'another phrase '