Modernise Doc noun chunks tests

2025-12-07 02:04:27 +03:00 · 2017-01-11 18:54:56 +01:00 · 2017-01-11 18:54:56 +01:00 · e027936920
commit e027936920
parent 439f396acd
1 changed files with 18 additions and 20 deletions
--- a/spacy/tests/tokens/test_noun_chunks.py
+++ b/spacy/tests/tokens/test_noun_chunks.py
@ -1,27 +1,26 @@
-import numpy as np
+# coding: utf-8
+from __future__ import unicode_literals

-from spacy.attrs import HEAD, DEP
-from spacy.symbols import nsubj, dobj, punct, amod, nmod, conj, cc, root
-from spacy.en import English
-from spacy.syntax.iterators import english_noun_chunks
+from ...attrs import HEAD, DEP
+from ...symbols import nsubj, dobj, amod, nmod, conj, cc, root
+from ...syntax.iterators import english_noun_chunks
+from ..util import get_doc
+
+import numpy


-def test_not_nested():
-    nlp = English(parser=False, entity=False)
-    sent = u'''Peter has chronic command and control issues'''.strip()
-    tokens = nlp(sent)
+def test_noun_chunks_not_nested(en_tokenizer):
+    text = "Peter has chronic command and control issues"
+    heads = [1, 0, 4, 3, -1, -2, -5]
+    deps = ['nsubj', 'ROOT', 'amod', 'nmod', 'cc', 'conj', 'dobj']
+
+    tokens = en_tokenizer(text)
+    doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads, deps=deps)
+
    tokens.from_array(
        [HEAD, DEP],
-        np.asarray(
-            [
-                [1, nsubj],
-                [0, root],
-                [4, amod],
-                [3, nmod],
-                [-1, cc],
-                [-2, conj],
-                [-5, dobj]
-            ], dtype='int32'))
+        numpy.asarray([[1, nsubj], [0, root], [4, amod], [3, nmod], [-1, cc],
+                       [-2, conj], [-5, dobj]], dtype='int32'))
    tokens.noun_chunks_iterator = english_noun_chunks
    word_occurred = {}
    for chunk in tokens.noun_chunks:
@ -30,4 +29,3 @@ def test_not_nested():
            word_occurred[word.text] += 1
    for word, freq in word_occurred.items():
        assert freq == 1, (word, [chunk.text for chunk in tokens.noun_chunks])
-