mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
* Fix issue #37: missing check_flag attribute from Token class
This commit is contained in:
parent
5032f2a5c7
commit
0962ffc095
|
@ -89,3 +89,5 @@ cdef class Token:
|
||||||
return self
|
return self
|
||||||
|
|
||||||
cdef int take_ownership_of_c_data(self) except -1
|
cdef int take_ownership_of_c_data(self) except -1
|
||||||
|
|
||||||
|
cpdef bint check_flag(self, attr_id_t flag_id) except -1
|
||||||
|
|
|
@ -9,6 +9,7 @@ from .typedefs cimport LEMMA
|
||||||
from .typedefs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
|
from .typedefs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
|
||||||
from .typedefs cimport POS, LEMMA
|
from .typedefs cimport POS, LEMMA
|
||||||
from .parts_of_speech import UNIV_POS_NAMES
|
from .parts_of_speech import UNIV_POS_NAMES
|
||||||
|
from .lexeme cimport check_flag
|
||||||
|
|
||||||
from unidecode import unidecode
|
from unidecode import unidecode
|
||||||
|
|
||||||
|
@ -252,6 +253,10 @@ cdef class Token:
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
return self.string
|
return self.string
|
||||||
|
|
||||||
|
cpdef bint check_flag(self, attr_id_t flag_id) except -1:
|
||||||
|
return check_flag(self.c.lex, flag_id)
|
||||||
|
|
||||||
|
|
||||||
cdef int take_ownership_of_c_data(self) except -1:
|
cdef int take_ownership_of_c_data(self) except -1:
|
||||||
owned_data = <TokenC*>PyMem_Malloc(sizeof(TokenC) * self.array_len)
|
owned_data = <TokenC*>PyMem_Malloc(sizeof(TokenC) * self.array_len)
|
||||||
memcpy(owned_data, self.c, sizeof(TokenC) * self.array_len)
|
memcpy(owned_data, self.c, sizeof(TokenC) * self.array_len)
|
||||||
|
|
31
tests/test_token_api.py
Normal file
31
tests/test_token_api.py
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from spacy.en import English
|
||||||
|
from spacy.en.attrs import IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT
|
||||||
|
from spacy.en.attrs import IS_SPACE, IS_TITLE, IS_UPPER, LIKE_URL, LIKE_NUM
|
||||||
|
from spacy.en.attrs import IS_STOP
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def token():
|
||||||
|
nlp = English()
|
||||||
|
tokens = nlp(u'Give it back! He pleaded.')
|
||||||
|
return tokens[0]
|
||||||
|
|
||||||
|
|
||||||
|
def test_strings(token):
|
||||||
|
assert token.orth_ == 'Give'
|
||||||
|
assert token.lower_ == 'give'
|
||||||
|
assert token.shape_ == 'Xxxx'
|
||||||
|
assert token.prefix_ == 'G'
|
||||||
|
assert token.suffix_ == 'ive'
|
||||||
|
assert token.lemma_ == 'give'
|
||||||
|
assert token.pos_ == 'VERB'
|
||||||
|
assert token.tag_ == 'VB'
|
||||||
|
assert token.dep_ == 'ROOT'
|
||||||
|
|
||||||
|
|
||||||
|
def test_flags(token):
|
||||||
|
assert token.check_flag(IS_ALPHA)
|
||||||
|
assert not token.check_flag(IS_DIGIT)
|
||||||
|
# TODO: Test more of these, esp. if a bug is found
|
Loading…
Reference in New Issue
Block a user