* Fix issue #37: missing check_flag attribute from Token class

This commit is contained in:
Matthew Honnibal 2015-03-26 15:06:26 +01:00
parent 5032f2a5c7
commit 0962ffc095
3 changed files with 38 additions and 0 deletions

View File

@ -89,3 +89,5 @@ cdef class Token:
return self
cdef int take_ownership_of_c_data(self) except -1
cpdef bint check_flag(self, attr_id_t flag_id) except -1

View File

@ -9,6 +9,7 @@ from .typedefs cimport LEMMA
from .typedefs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
from .typedefs cimport POS, LEMMA
from .parts_of_speech import UNIV_POS_NAMES
from .lexeme cimport check_flag
from unidecode import unidecode
@ -252,6 +253,10 @@ cdef class Token:
def __unicode__(self):
return self.string
cpdef bint check_flag(self, attr_id_t flag_id) except -1:
return check_flag(self.c.lex, flag_id)
cdef int take_ownership_of_c_data(self) except -1:
owned_data = <TokenC*>PyMem_Malloc(sizeof(TokenC) * self.array_len)
memcpy(owned_data, self.c, sizeof(TokenC) * self.array_len)

31
tests/test_token_api.py Normal file
View File

@ -0,0 +1,31 @@
from __future__ import unicode_literals
from spacy.en import English
from spacy.en.attrs import IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT
from spacy.en.attrs import IS_SPACE, IS_TITLE, IS_UPPER, LIKE_URL, LIKE_NUM
from spacy.en.attrs import IS_STOP
import pytest
@pytest.fixture
def token():
nlp = English()
tokens = nlp(u'Give it back! He pleaded.')
return tokens[0]
def test_strings(token):
assert token.orth_ == 'Give'
assert token.lower_ == 'give'
assert token.shape_ == 'Xxxx'
assert token.prefix_ == 'G'
assert token.suffix_ == 'ive'
assert token.lemma_ == 'give'
assert token.pos_ == 'VERB'
assert token.tag_ == 'VB'
assert token.dep_ == 'ROOT'
def test_flags(token):
assert token.check_flag(IS_ALPHA)
assert not token.check_flag(IS_DIGIT)
# TODO: Test more of these, esp. if a bug is found