Modernise Doc noun chunks tests

2025-10-17 09:14:14 +03:00 · 2017-01-11 18:54:56 +01:00 · 2017-01-11 18:54:56 +01:00 · e027936920
commit e027936920
parent 439f396acd
1 changed files with 18 additions and 20 deletions
--- a/spacy/tests/tokens/test_noun_chunks.py
+++ b/spacy/tests/tokens/test_noun_chunks.py
@ -1,27 +1,26 @@
-import numpy as np
+# coding: utf-8
 from __future__ import unicode_literals
-from spacy.attrs import HEAD, DEP
+from ...attrs import HEAD, DEP
-from spacy.symbols import nsubj, dobj, punct, amod, nmod, conj, cc, root
+from ...symbols import nsubj, dobj, amod, nmod, conj, cc, root
-from spacy.en import English
+from ...syntax.iterators import english_noun_chunks
-from spacy.syntax.iterators import english_noun_chunks
+from ..util import get_doc
 import numpy
-def test_not_nested():
+def test_noun_chunks_not_nested(en_tokenizer):
-    nlp = English(parser=False, entity=False)
+    text = "Peter has chronic command and control issues"
-    sent = u'''Peter has chronic command and control issues'''.strip()
+    heads = [1, 0, 4, 3, -1, -2, -5]
-    tokens = nlp(sent)
+    deps = ['nsubj', 'ROOT', 'amod', 'nmod', 'cc', 'conj', 'dobj']
    tokens = en_tokenizer(text)
    doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads, deps=deps)
    tokens.from_array(
        [HEAD, DEP],
-        np.asarray(
+        numpy.asarray([[1, nsubj], [0, root], [4, amod], [3, nmod], [-1, cc],
-            [
+                       [-2, conj], [-5, dobj]], dtype='int32'))
                [1, nsubj],
                [0, root],
                [4, amod],
                [3, nmod],
                [-1, cc],
                [-2, conj],
                [-5, dobj]
            ], dtype='int32'))
    tokens.noun_chunks_iterator = english_noun_chunks
    word_occurred = {}
    for chunk in tokens.noun_chunks:
@ -30,4 +29,3 @@ def test_not_nested():
            word_occurred[word.text] += 1
    for word, freq in word_occurred.items():
        assert freq == 1, (word, [chunk.text for chunk in tokens.noun_chunks])