mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
failing test to reproduce overlapping spans problem
This commit is contained in:
parent
49ef06d793
commit
36a94c409a
21
spacy/tests/regression/test_issue5458.py
Normal file
21
spacy/tests/regression/test_issue5458.py
Normal file
|
@ -0,0 +1,21 @@
|
|||
from spacy.lang.en import English
|
||||
from spacy.lang.en.syntax_iterators import noun_chunks
|
||||
from spacy.tests.util import get_doc
|
||||
from spacy.vocab import Vocab
|
||||
|
||||
|
||||
def test_issue5458():
|
||||
# Test that the noun chuncker does not generate overlapping spans
|
||||
words = ["In", "an", "era", "where", "markets", "have", "brought", "prosperity", "and", "empowerment", "."]
|
||||
vocab = Vocab(strings=words)
|
||||
dependencies = ["ROOT", "det", "pobj", "advmod", "nsubj", "aux", "relcl", "dobj", "cc", "conj", "punct"]
|
||||
pos_tags = ["ADP", "DET", "NOUN", "ADV", "NOUN", "AUX", "VERB", "NOUN", "CCONJ", "NOUN", "PUNCT"]
|
||||
heads = [0, 1, -2, 6, 2, 1, -4, -1, -1, -2, -10]
|
||||
|
||||
en_doc = get_doc(vocab, words, pos_tags, heads, dependencies)
|
||||
en_doc.noun_chunks_iterator = noun_chunks
|
||||
|
||||
# if there are overlapping spans, this will fail with an E102 error "Can't merge non-disjoint spans"
|
||||
nlp = English()
|
||||
merge_nps = nlp.create_pipe("merge_noun_chunks")
|
||||
merge_nps(en_doc)
|
Loading…
Reference in New Issue
Block a user