bugfix in unit test

This commit is contained in:
Wolfgang Seeker 2016-04-08 16:45:27 +02:00
parent 72e0de7330
commit 80bea62842
4 changed files with 9 additions and 4 deletions

View File

@ -1,6 +1,8 @@
from spacy.tokens.doc cimport Doc
cdef dict CHUNKERS
cdef class DocIterator:
cdef Doc _doc

View File

@ -5,6 +5,8 @@ from spacy.tokens.token cimport Token
from spacy.parts_of_speech cimport NOUN
CHUNKERS = {'en':EnglishNounChunks, 'de':GermanNounChunks}
# base class for document iterators
cdef class DocIterator:
def __init__(self, Doc doc):

View File

@ -47,8 +47,7 @@ from ._parse_features cimport fill_context
from .stateclass cimport StateClass
from ._state cimport StateC
from spacy.syntax.iterators cimport DocIterator, EnglishNounChunks, GermanNounChunks
CHUNKERS = {'en':EnglishNounChunks, 'de':GermanNounChunks}
from spacy.syntax.iterators cimport CHUNKERS, DocIterator, EnglishNounChunks, GermanNounChunks
DEBUG = False

View File

@ -3,11 +3,11 @@ import numpy as np
from spacy.attrs import HEAD, DEP
from spacy.symbols import nsubj, dobj, punct, amod, nmod, conj, cc, root
from spacy.en import English
from spacy.syntax.iterators import EnglishNounChunks
def test_not_nested():
nlp = English(parser=False)
nlp = English(parser=False, entity=False)
sent = u'''Peter has chronic command and control issues'''.strip()
tokens = nlp(sent)
tokens.from_array(
@ -22,6 +22,7 @@ def test_not_nested():
[-2, conj],
[-5, dobj]
], dtype='int32'))
tokens.noun_chunks = EnglishNounChunks
for chunk in tokens.noun_chunks:
print(chunk.text)
word_occurred = {}
@ -31,3 +32,4 @@ def test_not_nested():
word_occurred[word.text] += 1
for word, freq in word_occurred.items():
assert freq == 1, (word, [chunk.text for chunk in tokens.noun_chunks])