mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-12 17:22:25 +03:00
bugfix in unit test
This commit is contained in:
parent
72e0de7330
commit
80bea62842
|
@ -1,6 +1,8 @@
|
||||||
|
|
||||||
from spacy.tokens.doc cimport Doc
|
from spacy.tokens.doc cimport Doc
|
||||||
|
|
||||||
|
cdef dict CHUNKERS
|
||||||
|
|
||||||
cdef class DocIterator:
|
cdef class DocIterator:
|
||||||
cdef Doc _doc
|
cdef Doc _doc
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,8 @@ from spacy.tokens.token cimport Token
|
||||||
|
|
||||||
from spacy.parts_of_speech cimport NOUN
|
from spacy.parts_of_speech cimport NOUN
|
||||||
|
|
||||||
|
CHUNKERS = {'en':EnglishNounChunks, 'de':GermanNounChunks}
|
||||||
|
|
||||||
# base class for document iterators
|
# base class for document iterators
|
||||||
cdef class DocIterator:
|
cdef class DocIterator:
|
||||||
def __init__(self, Doc doc):
|
def __init__(self, Doc doc):
|
||||||
|
|
|
@ -47,8 +47,7 @@ from ._parse_features cimport fill_context
|
||||||
from .stateclass cimport StateClass
|
from .stateclass cimport StateClass
|
||||||
from ._state cimport StateC
|
from ._state cimport StateC
|
||||||
|
|
||||||
from spacy.syntax.iterators cimport DocIterator, EnglishNounChunks, GermanNounChunks
|
from spacy.syntax.iterators cimport CHUNKERS, DocIterator, EnglishNounChunks, GermanNounChunks
|
||||||
CHUNKERS = {'en':EnglishNounChunks, 'de':GermanNounChunks}
|
|
||||||
|
|
||||||
|
|
||||||
DEBUG = False
|
DEBUG = False
|
||||||
|
|
|
@ -3,11 +3,11 @@ import numpy as np
|
||||||
from spacy.attrs import HEAD, DEP
|
from spacy.attrs import HEAD, DEP
|
||||||
from spacy.symbols import nsubj, dobj, punct, amod, nmod, conj, cc, root
|
from spacy.symbols import nsubj, dobj, punct, amod, nmod, conj, cc, root
|
||||||
from spacy.en import English
|
from spacy.en import English
|
||||||
|
from spacy.syntax.iterators import EnglishNounChunks
|
||||||
|
|
||||||
|
|
||||||
def test_not_nested():
|
def test_not_nested():
|
||||||
nlp = English(parser=False)
|
nlp = English(parser=False, entity=False)
|
||||||
sent = u'''Peter has chronic command and control issues'''.strip()
|
sent = u'''Peter has chronic command and control issues'''.strip()
|
||||||
tokens = nlp(sent)
|
tokens = nlp(sent)
|
||||||
tokens.from_array(
|
tokens.from_array(
|
||||||
|
@ -22,6 +22,7 @@ def test_not_nested():
|
||||||
[-2, conj],
|
[-2, conj],
|
||||||
[-5, dobj]
|
[-5, dobj]
|
||||||
], dtype='int32'))
|
], dtype='int32'))
|
||||||
|
tokens.noun_chunks = EnglishNounChunks
|
||||||
for chunk in tokens.noun_chunks:
|
for chunk in tokens.noun_chunks:
|
||||||
print(chunk.text)
|
print(chunk.text)
|
||||||
word_occurred = {}
|
word_occurred = {}
|
||||||
|
@ -31,3 +32,4 @@ def test_not_nested():
|
||||||
word_occurred[word.text] += 1
|
word_occurred[word.text] += 1
|
||||||
for word, freq in word_occurred.items():
|
for word, freq in word_occurred.items():
|
||||||
assert freq == 1, (word, [chunk.text for chunk in tokens.noun_chunks])
|
assert freq == 1, (word, [chunk.text for chunk in tokens.noun_chunks])
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user