Fix noun chunks test

This commit is contained in:
ines 2017-06-05 16:39:04 +02:00
parent 69cdfc843e
commit cc9c5dc7a3

View File

@ -1,15 +1,15 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from ...attrs import HEAD, DEP from ....attrs import HEAD, DEP
from ...symbols import nsubj, dobj, amod, nmod, conj, cc, root from ....symbols import nsubj, dobj, amod, nmod, conj, cc, root
from ...syntax.iterators import english_noun_chunks from ....lang.en.syntax_iterators import SYNTAX_ITERATORS
from ..util import get_doc from ...util import get_doc
import numpy import numpy
def test_doc_noun_chunks_not_nested(en_tokenizer): def test_en_noun_chunks_not_nested(en_tokenizer):
text = "Peter has chronic command and control issues" text = "Peter has chronic command and control issues"
heads = [1, 0, 4, 3, -1, -2, -5] heads = [1, 0, 4, 3, -1, -2, -5]
deps = ['nsubj', 'ROOT', 'amod', 'nmod', 'cc', 'conj', 'dobj'] deps = ['nsubj', 'ROOT', 'amod', 'nmod', 'cc', 'conj', 'dobj']
@ -21,7 +21,7 @@ def test_doc_noun_chunks_not_nested(en_tokenizer):
[HEAD, DEP], [HEAD, DEP],
numpy.asarray([[1, nsubj], [0, root], [4, amod], [3, nmod], [-1, cc], numpy.asarray([[1, nsubj], [0, root], [4, amod], [3, nmod], [-1, cc],
[-2, conj], [-5, dobj]], dtype='uint64')) [-2, conj], [-5, dobj]], dtype='uint64'))
tokens.noun_chunks_iterator = english_noun_chunks tokens.noun_chunks_iterator = SYNTAX_ITERATORS['noun_chunks']
word_occurred = {} word_occurred = {}
for chunk in tokens.noun_chunks: for chunk in tokens.noun_chunks:
for word in chunk: for word in chunk: