bugfix in unit test

This commit is contained in:
Wolfgang Seeker 2016-04-08 16:45:27 +02:00
parent 72e0de7330
commit 80bea62842
4 changed files with 9 additions and 4 deletions

View File

@ -1,6 +1,8 @@
from spacy.tokens.doc cimport Doc from spacy.tokens.doc cimport Doc
cdef dict CHUNKERS
cdef class DocIterator: cdef class DocIterator:
cdef Doc _doc cdef Doc _doc

View File

@ -5,6 +5,8 @@ from spacy.tokens.token cimport Token
from spacy.parts_of_speech cimport NOUN from spacy.parts_of_speech cimport NOUN
CHUNKERS = {'en':EnglishNounChunks, 'de':GermanNounChunks}
# base class for document iterators # base class for document iterators
cdef class DocIterator: cdef class DocIterator:
def __init__(self, Doc doc): def __init__(self, Doc doc):

View File

@ -47,8 +47,7 @@ from ._parse_features cimport fill_context
from .stateclass cimport StateClass from .stateclass cimport StateClass
from ._state cimport StateC from ._state cimport StateC
from spacy.syntax.iterators cimport DocIterator, EnglishNounChunks, GermanNounChunks from spacy.syntax.iterators cimport CHUNKERS, DocIterator, EnglishNounChunks, GermanNounChunks
CHUNKERS = {'en':EnglishNounChunks, 'de':GermanNounChunks}
DEBUG = False DEBUG = False

View File

@ -3,11 +3,11 @@ import numpy as np
from spacy.attrs import HEAD, DEP from spacy.attrs import HEAD, DEP
from spacy.symbols import nsubj, dobj, punct, amod, nmod, conj, cc, root from spacy.symbols import nsubj, dobj, punct, amod, nmod, conj, cc, root
from spacy.en import English from spacy.en import English
from spacy.syntax.iterators import EnglishNounChunks
def test_not_nested(): def test_not_nested():
nlp = English(parser=False) nlp = English(parser=False, entity=False)
sent = u'''Peter has chronic command and control issues'''.strip() sent = u'''Peter has chronic command and control issues'''.strip()
tokens = nlp(sent) tokens = nlp(sent)
tokens.from_array( tokens.from_array(
@ -22,6 +22,7 @@ def test_not_nested():
[-2, conj], [-2, conj],
[-5, dobj] [-5, dobj]
], dtype='int32')) ], dtype='int32'))
tokens.noun_chunks = EnglishNounChunks
for chunk in tokens.noun_chunks: for chunk in tokens.noun_chunks:
print(chunk.text) print(chunk.text)
word_occurred = {} word_occurred = {}
@ -31,3 +32,4 @@ def test_not_nested():
word_occurred[word.text] += 1 word_occurred[word.text] += 1
for word, freq in word_occurred.items(): for word, freq in word_occurred.items():
assert freq == 1, (word, [chunk.text for chunk in tokens.noun_chunks]) assert freq == 1, (word, [chunk.text for chunk in tokens.noun_chunks])