mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 02:36:32 +03:00
* Add length property
This commit is contained in:
parent
18fb76b2c4
commit
6319ff0f22
|
@ -16,6 +16,7 @@ cdef struct Orthography:
|
|||
StringHash shape
|
||||
StringHash norm
|
||||
|
||||
size_t length
|
||||
Py_UNICODE first
|
||||
Bits8 flags
|
||||
|
||||
|
@ -45,6 +46,7 @@ cdef enum StringAttr:
|
|||
NORM
|
||||
SHAPE
|
||||
LAST3
|
||||
LENGTH
|
||||
|
||||
|
||||
cpdef StringHash attr_of(size_t lex_id, StringAttr attr) except 0
|
||||
|
@ -54,3 +56,4 @@ cpdef StringHash lex_of(size_t lex_id) except 0
|
|||
cpdef StringHash norm_of(size_t lex_id) except 0
|
||||
cpdef StringHash shape_of(size_t lex_id) except 0
|
||||
cpdef StringHash last3_of(size_t lex_id) except 0
|
||||
cpdef StringHash length_of(size_t lex_id)
|
||||
|
|
|
@ -32,6 +32,8 @@ cpdef StringHash attr_of(size_t lex_id, StringAttr attr) except 0:
|
|||
return shape_of(lex_id)
|
||||
elif attr == LAST3:
|
||||
return last3_of(lex_id)
|
||||
elif attr == LENGTH:
|
||||
return length_of(lex_id)
|
||||
else:
|
||||
raise StandardError
|
||||
|
||||
|
@ -118,9 +120,20 @@ cpdef Py_UNICODE first_of(size_t lex_id):
|
|||
>>> unhash(first_of(lex_id))
|
||||
u'H'
|
||||
'''
|
||||
if (<Lexeme*>lex_id).orth == NULL:
|
||||
return 0
|
||||
return (<Lexeme*>lex_id).orth.first
|
||||
|
||||
|
||||
cpdef StringHash length_of(size_t lex_id):
|
||||
'''Access the `length' field of the Lexeme pointed to by lex_id, which stores
|
||||
the length of the string hashed by lex_of.'''
|
||||
cdef Lexeme* word = <Lexeme*>lex_id
|
||||
if (<Lexeme*>lex_id).orth == NULL:
|
||||
return 0
|
||||
return (<Lexeme*>lex_id).orth.length
|
||||
|
||||
|
||||
cpdef double prob_of(size_t lex_id):
|
||||
'''Access the `prob' field of the Lexeme pointed to by lex_id, which stores
|
||||
the smoothed unigram log probability of the word, as estimated from a large
|
||||
|
|
|
@ -4,7 +4,7 @@ import pytest
|
|||
|
||||
from spacy.en import lookup, unhash
|
||||
|
||||
from spacy.lexeme import sic_of, lex_of, norm_of, shape_of, first_of
|
||||
from spacy.lexeme import sic_of, lex_of, norm_of, shape_of, first_of, length_of
|
||||
from spacy.lexeme import shape_of
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -14,3 +14,16 @@ def C3P0():
|
|||
|
||||
def test_shape(C3P0):
|
||||
assert unhash(shape_of(C3P0)) == "XdXd"
|
||||
|
||||
|
||||
def test_length():
|
||||
t = lookup('the')
|
||||
assert length_of(t) == 3
|
||||
t = lookup('')
|
||||
assert length_of(t) == 0
|
||||
t = lookup("n't")
|
||||
assert length_of(t) == 3
|
||||
t = lookup("'s")
|
||||
assert length_of(t) == 2
|
||||
t = lookup('Xxxx')
|
||||
assert length_of(t) == 4
|
||||
|
|
Loading…
Reference in New Issue
Block a user