From e027936920d1421d0ab29b7e90350d28e1411500 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Wed, 11 Jan 2017 18:54:56 +0100 Subject: [PATCH] Modernise Doc noun chunks tests --- spacy/tests/tokens/test_noun_chunks.py | 38 ++++++++++++-------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/spacy/tests/tokens/test_noun_chunks.py b/spacy/tests/tokens/test_noun_chunks.py index 642ce715b..659edbf57 100644 --- a/spacy/tests/tokens/test_noun_chunks.py +++ b/spacy/tests/tokens/test_noun_chunks.py @@ -1,27 +1,26 @@ -import numpy as np +# coding: utf-8 +from __future__ import unicode_literals -from spacy.attrs import HEAD, DEP -from spacy.symbols import nsubj, dobj, punct, amod, nmod, conj, cc, root -from spacy.en import English -from spacy.syntax.iterators import english_noun_chunks +from ...attrs import HEAD, DEP +from ...symbols import nsubj, dobj, amod, nmod, conj, cc, root +from ...syntax.iterators import english_noun_chunks +from ..util import get_doc + +import numpy -def test_not_nested(): - nlp = English(parser=False, entity=False) - sent = u'''Peter has chronic command and control issues'''.strip() - tokens = nlp(sent) +def test_noun_chunks_not_nested(en_tokenizer): + text = "Peter has chronic command and control issues" + heads = [1, 0, 4, 3, -1, -2, -5] + deps = ['nsubj', 'ROOT', 'amod', 'nmod', 'cc', 'conj', 'dobj'] + + tokens = en_tokenizer(text) + doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads, deps=deps) + tokens.from_array( [HEAD, DEP], - np.asarray( - [ - [1, nsubj], - [0, root], - [4, amod], - [3, nmod], - [-1, cc], - [-2, conj], - [-5, dobj] - ], dtype='int32')) + numpy.asarray([[1, nsubj], [0, root], [4, amod], [3, nmod], [-1, cc], + [-2, conj], [-5, dobj]], dtype='int32')) tokens.noun_chunks_iterator = english_noun_chunks word_occurred = {} for chunk in tokens.noun_chunks: @@ -30,4 +29,3 @@ def test_not_nested(): word_occurred[word.text] += 1 for word, freq in word_occurred.items(): assert freq == 1, (word, [chunk.text for chunk in tokens.noun_chunks]) -