Add test for Issue #1207

2025-08-04 20:30:24 +03:00 · 2017-07-22 14:14:01 +02:00 · 2017-07-22 14:14:01 +02:00 · dfbc7e49de
commit dfbc7e49de
parent 0ae3807d7d
1 changed files with 25 additions and 0 deletions
--- a/spacy/tests/regression/test_issue1307.py
+++ b/spacy/tests/regression/test_issue1307.py
@ -0,0 +1,25 @@
+from __future__ import unicode_literals
+from ..util import get_doc
+from ...vocab import Vocab
+from ...en import English
+
+
+def test_span_noun_chunks():
+    vocab = Vocab(lang='en', tag_map=English.Defaults.tag_map)
+    words = "Employees are recruiting talented staffers from overseas .".split()
+    heads = [1, 1, 0, 1, -2, -1, -5]
+    deps = ['nsubj', 'aux', 'ROOT', 'nmod', 'dobj', 'adv', 'pobj']
+    tags = ['NNS', 'VBP', 'VBG', 'JJ', 'NNS', 'IN', 'NN', '.']
+    doc = get_doc(vocab, words=words, heads=heads, deps=deps, tags=tags)
+    doc.is_parsed = True
+    
+    noun_chunks = [np.text for np in doc.noun_chunks]
+    assert noun_chunks == ['Employees', 'talented staffers', 'overseas']
+
+    span = doc[0:4]
+    noun_chunks = [np.text for np in span.noun_chunks]
+    assert noun_chunks == ['Employees']
+
+    for sent in doc.sents:
+        noun_chunks = [np.text for np in sent.noun_chunks]
+        assert noun_chunks == ['Employees', 'talented staffers', 'overseas']