mirror of
https://github.com/explosion/spaCy.git
synced 2024-09-21 11:29:13 +03:00
reformulate noun chunk tests for English
This commit is contained in:
parent
1786331cd8
commit
7b246c13cb
|
@ -382,6 +382,7 @@ cpdef enum symbol_t:
|
||||||
cc
|
cc
|
||||||
ccomp
|
ccomp
|
||||||
complm
|
complm
|
||||||
|
compound
|
||||||
conj
|
conj
|
||||||
csubj
|
csubj
|
||||||
csubjpass
|
csubjpass
|
||||||
|
|
|
@ -381,6 +381,7 @@ IDS = {
|
||||||
"cc": cc,
|
"cc": cc,
|
||||||
"ccomp": ccomp,
|
"ccomp": ccomp,
|
||||||
"complm": complm,
|
"complm": complm,
|
||||||
|
"compound": compound,
|
||||||
"conj": conj,
|
"conj": conj,
|
||||||
"csubj": csubj,
|
"csubj": csubj,
|
||||||
"csubjpass": csubjpass,
|
"csubjpass": csubjpass,
|
||||||
|
|
|
@ -225,6 +225,11 @@ cdef class Parser:
|
||||||
def step_through(self, Doc doc):
|
def step_through(self, Doc doc):
|
||||||
return StepwiseState(self, doc)
|
return StepwiseState(self, doc)
|
||||||
|
|
||||||
|
def from_transition_sequence(self, Doc doc, sequence):
|
||||||
|
with self.step_through(doc) as stepwise:
|
||||||
|
for transition in sequence:
|
||||||
|
stepwise.transition(transition)
|
||||||
|
|
||||||
def add_label(self, label):
|
def add_label(self, label):
|
||||||
for action in self.moves.action_types:
|
for action in self.moves.action_types:
|
||||||
self.moves.add_action(action, label)
|
self.moves.add_action(action, label)
|
||||||
|
|
|
@ -7,7 +7,7 @@ import spacy
|
||||||
def EN():
|
def EN():
|
||||||
return spacy.load("en")
|
return spacy.load("en")
|
||||||
|
|
||||||
@pytest.fixture(score="session")
|
@pytest.fixture(scope="session")
|
||||||
def DE():
|
def DE():
|
||||||
return spacy.load("de")
|
return spacy.load("de")
|
||||||
|
|
||||||
|
|
|
@ -2,30 +2,30 @@ from __future__ import unicode_literals
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.models
|
# @pytest.mark.models
|
||||||
def test_nsubj(EN):
|
# def test_nsubj(EN):
|
||||||
sent = EN(u'A base phrase should be recognized.')
|
# sent = EN(u'A base phrase should be recognized.')
|
||||||
base_nps = list(sent.noun_chunks)
|
# base_nps = list(sent.noun_chunks)
|
||||||
assert len(base_nps) == 1
|
# assert len(base_nps) == 1
|
||||||
assert base_nps[0].string == 'A base phrase '
|
# assert base_nps[0].string == 'A base phrase '
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.models
|
# @pytest.mark.models
|
||||||
def test_coord(EN):
|
# def test_coord(EN):
|
||||||
sent = EN(u'A base phrase and a good phrase are often the same.')
|
# sent = EN(u'A base phrase and a good phrase are often the same.')
|
||||||
base_nps = list(sent.noun_chunks)
|
# base_nps = list(sent.noun_chunks)
|
||||||
assert len(base_nps) == 2
|
# assert len(base_nps) == 2
|
||||||
assert base_nps[0].string == 'A base phrase '
|
# assert base_nps[0].string == 'A base phrase '
|
||||||
assert base_nps[1].string == 'a good phrase '
|
# assert base_nps[1].string == 'a good phrase '
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.models
|
# @pytest.mark.models
|
||||||
def test_pp(EN):
|
# def test_pp(EN):
|
||||||
sent = EN(u'A phrase with another phrase occurs')
|
# sent = EN(u'A phrase with another phrase occurs')
|
||||||
base_nps = list(sent.noun_chunks)
|
# base_nps = list(sent.noun_chunks)
|
||||||
assert len(base_nps) == 2
|
# assert len(base_nps) == 2
|
||||||
assert base_nps[0].string == 'A phrase '
|
# assert base_nps[0].string == 'A phrase '
|
||||||
assert base_nps[1].string == 'another phrase '
|
# assert base_nps[1].string == 'another phrase '
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.models
|
@pytest.mark.models
|
||||||
|
|
0
spacy/tests/unit/__init__.py
Normal file
0
spacy/tests/unit/__init__.py
Normal file
83
spacy/tests/unit/test_parser.py
Normal file
83
spacy/tests/unit/test_parser.py
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import numpy
|
||||||
|
|
||||||
|
from spacy.attrs import HEAD, DEP
|
||||||
|
from spacy.symbols import root, det, compound, nsubjpass, aux, auxpass, punct, nsubj, cc, amod, conj, advmod, attr, prep, pobj
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.models
|
||||||
|
class TestNounChunks:
|
||||||
|
@pytest.fixture(scope="class")
|
||||||
|
def ex1_en(self, EN):
|
||||||
|
example = EN.tokenizer.tokens_from_list('A base phrase should be recognized .'.split(' '))
|
||||||
|
EN.tagger.tag_from_strings(example, 'DT NN NN MD VB VBN .'.split(' '))
|
||||||
|
example.from_array([HEAD, DEP],
|
||||||
|
numpy.asarray(
|
||||||
|
[
|
||||||
|
[2, det],
|
||||||
|
[1, compound],
|
||||||
|
[3, nsubjpass],
|
||||||
|
[2, aux],
|
||||||
|
[1, auxpass],
|
||||||
|
[0, root],
|
||||||
|
[-1, punct]
|
||||||
|
], dtype='int32'))
|
||||||
|
return example
|
||||||
|
|
||||||
|
@pytest.fixture(scope="class")
|
||||||
|
def ex2_en(self, EN):
|
||||||
|
example = EN.tokenizer.tokens_from_list('A base phrase and a good phrase are often the same .'.split(' '))
|
||||||
|
EN.tagger.tag_from_strings(example, 'DT NN NN CC DT JJ NN VBP RB DT JJ .'.split(' '))
|
||||||
|
example.from_array([HEAD, DEP],
|
||||||
|
numpy.asarray(
|
||||||
|
[
|
||||||
|
[2, det],
|
||||||
|
[1, compound],
|
||||||
|
[5, nsubj],
|
||||||
|
[-1, cc],
|
||||||
|
[1, det],
|
||||||
|
[1, amod],
|
||||||
|
[-4, conj],
|
||||||
|
[0, root],
|
||||||
|
[-1, advmod],
|
||||||
|
[1, det],
|
||||||
|
[-3, attr],
|
||||||
|
[-4, punct]
|
||||||
|
], dtype='int32'))
|
||||||
|
return example
|
||||||
|
|
||||||
|
@pytest.fixture(scope="class")
|
||||||
|
def ex3_en(self, EN):
|
||||||
|
example = EN.tokenizer.tokens_from_list('A phrase with another phrase occurs .'.split(' '))
|
||||||
|
EN.tagger.tag_from_strings(example, 'DT NN IN DT NN VBZ .'.split(' '))
|
||||||
|
example.from_array([HEAD, DEP],
|
||||||
|
numpy.asarray(
|
||||||
|
[
|
||||||
|
[1, det],
|
||||||
|
[4, nsubj],
|
||||||
|
[-1, prep],
|
||||||
|
[1, det],
|
||||||
|
[-2, pobj],
|
||||||
|
[0, root],
|
||||||
|
[-1, punct]
|
||||||
|
], dtype='int32'))
|
||||||
|
return example
|
||||||
|
|
||||||
|
def test_standard_chunk(self, ex1_en):
|
||||||
|
chunks = list(ex1_en.noun_chunks)
|
||||||
|
assert len(chunks) == 1
|
||||||
|
assert chunks[0].string == 'A base phrase '
|
||||||
|
|
||||||
|
def test_coordinated_chunks(self, ex2_en):
|
||||||
|
chunks = list(ex2_en.noun_chunks)
|
||||||
|
assert len(chunks) == 2
|
||||||
|
assert chunks[0].string == 'A base phrase '
|
||||||
|
assert chunks[1].string == 'a good phrase '
|
||||||
|
|
||||||
|
def test_pp_chunks(self, ex3_en):
|
||||||
|
chunks = list(ex3_en.noun_chunks)
|
||||||
|
assert len(chunks) == 2
|
||||||
|
assert chunks[0].string == 'A phrase '
|
||||||
|
assert chunks[1].string == 'another phrase '
|
Loading…
Reference in New Issue
Block a user