Modernise Doc noun chunks tests

This commit is contained in:
Ines Montani 2017-01-11 18:54:56 +01:00
parent 439f396acd
commit e027936920

View File

@ -1,27 +1,26 @@
import numpy as np # coding: utf-8
from __future__ import unicode_literals
from spacy.attrs import HEAD, DEP from ...attrs import HEAD, DEP
from spacy.symbols import nsubj, dobj, punct, amod, nmod, conj, cc, root from ...symbols import nsubj, dobj, amod, nmod, conj, cc, root
from spacy.en import English from ...syntax.iterators import english_noun_chunks
from spacy.syntax.iterators import english_noun_chunks from ..util import get_doc
import numpy
def test_not_nested(): def test_noun_chunks_not_nested(en_tokenizer):
nlp = English(parser=False, entity=False) text = "Peter has chronic command and control issues"
sent = u'''Peter has chronic command and control issues'''.strip() heads = [1, 0, 4, 3, -1, -2, -5]
tokens = nlp(sent) deps = ['nsubj', 'ROOT', 'amod', 'nmod', 'cc', 'conj', 'dobj']
tokens = en_tokenizer(text)
doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads, deps=deps)
tokens.from_array( tokens.from_array(
[HEAD, DEP], [HEAD, DEP],
np.asarray( numpy.asarray([[1, nsubj], [0, root], [4, amod], [3, nmod], [-1, cc],
[ [-2, conj], [-5, dobj]], dtype='int32'))
[1, nsubj],
[0, root],
[4, amod],
[3, nmod],
[-1, cc],
[-2, conj],
[-5, dobj]
], dtype='int32'))
tokens.noun_chunks_iterator = english_noun_chunks tokens.noun_chunks_iterator = english_noun_chunks
word_occurred = {} word_occurred = {}
for chunk in tokens.noun_chunks: for chunk in tokens.noun_chunks:
@ -30,4 +29,3 @@ def test_not_nested():
word_occurred[word.text] += 1 word_occurred[word.text] += 1
for word, freq in word_occurred.items(): for word, freq in word_occurred.items():
assert freq == 1, (word, [chunk.text for chunk in tokens.noun_chunks]) assert freq == 1, (word, [chunk.text for chunk in tokens.noun_chunks])