mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 17:54:39 +03:00
* Add length property
This commit is contained in:
parent
18fb76b2c4
commit
6319ff0f22
|
@ -16,6 +16,7 @@ cdef struct Orthography:
|
||||||
StringHash shape
|
StringHash shape
|
||||||
StringHash norm
|
StringHash norm
|
||||||
|
|
||||||
|
size_t length
|
||||||
Py_UNICODE first
|
Py_UNICODE first
|
||||||
Bits8 flags
|
Bits8 flags
|
||||||
|
|
||||||
|
@ -45,6 +46,7 @@ cdef enum StringAttr:
|
||||||
NORM
|
NORM
|
||||||
SHAPE
|
SHAPE
|
||||||
LAST3
|
LAST3
|
||||||
|
LENGTH
|
||||||
|
|
||||||
|
|
||||||
cpdef StringHash attr_of(size_t lex_id, StringAttr attr) except 0
|
cpdef StringHash attr_of(size_t lex_id, StringAttr attr) except 0
|
||||||
|
@ -54,3 +56,4 @@ cpdef StringHash lex_of(size_t lex_id) except 0
|
||||||
cpdef StringHash norm_of(size_t lex_id) except 0
|
cpdef StringHash norm_of(size_t lex_id) except 0
|
||||||
cpdef StringHash shape_of(size_t lex_id) except 0
|
cpdef StringHash shape_of(size_t lex_id) except 0
|
||||||
cpdef StringHash last3_of(size_t lex_id) except 0
|
cpdef StringHash last3_of(size_t lex_id) except 0
|
||||||
|
cpdef StringHash length_of(size_t lex_id)
|
||||||
|
|
|
@ -32,6 +32,8 @@ cpdef StringHash attr_of(size_t lex_id, StringAttr attr) except 0:
|
||||||
return shape_of(lex_id)
|
return shape_of(lex_id)
|
||||||
elif attr == LAST3:
|
elif attr == LAST3:
|
||||||
return last3_of(lex_id)
|
return last3_of(lex_id)
|
||||||
|
elif attr == LENGTH:
|
||||||
|
return length_of(lex_id)
|
||||||
else:
|
else:
|
||||||
raise StandardError
|
raise StandardError
|
||||||
|
|
||||||
|
@ -118,9 +120,20 @@ cpdef Py_UNICODE first_of(size_t lex_id):
|
||||||
>>> unhash(first_of(lex_id))
|
>>> unhash(first_of(lex_id))
|
||||||
u'H'
|
u'H'
|
||||||
'''
|
'''
|
||||||
|
if (<Lexeme*>lex_id).orth == NULL:
|
||||||
|
return 0
|
||||||
return (<Lexeme*>lex_id).orth.first
|
return (<Lexeme*>lex_id).orth.first
|
||||||
|
|
||||||
|
|
||||||
|
cpdef StringHash length_of(size_t lex_id):
|
||||||
|
'''Access the `length' field of the Lexeme pointed to by lex_id, which stores
|
||||||
|
the length of the string hashed by lex_of.'''
|
||||||
|
cdef Lexeme* word = <Lexeme*>lex_id
|
||||||
|
if (<Lexeme*>lex_id).orth == NULL:
|
||||||
|
return 0
|
||||||
|
return (<Lexeme*>lex_id).orth.length
|
||||||
|
|
||||||
|
|
||||||
cpdef double prob_of(size_t lex_id):
|
cpdef double prob_of(size_t lex_id):
|
||||||
'''Access the `prob' field of the Lexeme pointed to by lex_id, which stores
|
'''Access the `prob' field of the Lexeme pointed to by lex_id, which stores
|
||||||
the smoothed unigram log probability of the word, as estimated from a large
|
the smoothed unigram log probability of the word, as estimated from a large
|
||||||
|
|
|
@ -4,7 +4,7 @@ import pytest
|
||||||
|
|
||||||
from spacy.en import lookup, unhash
|
from spacy.en import lookup, unhash
|
||||||
|
|
||||||
from spacy.lexeme import sic_of, lex_of, norm_of, shape_of, first_of
|
from spacy.lexeme import sic_of, lex_of, norm_of, shape_of, first_of, length_of
|
||||||
from spacy.lexeme import shape_of
|
from spacy.lexeme import shape_of
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
@ -14,3 +14,16 @@ def C3P0():
|
||||||
|
|
||||||
def test_shape(C3P0):
|
def test_shape(C3P0):
|
||||||
assert unhash(shape_of(C3P0)) == "XdXd"
|
assert unhash(shape_of(C3P0)) == "XdXd"
|
||||||
|
|
||||||
|
|
||||||
|
def test_length():
|
||||||
|
t = lookup('the')
|
||||||
|
assert length_of(t) == 3
|
||||||
|
t = lookup('')
|
||||||
|
assert length_of(t) == 0
|
||||||
|
t = lookup("n't")
|
||||||
|
assert length_of(t) == 3
|
||||||
|
t = lookup("'s")
|
||||||
|
assert length_of(t) == 2
|
||||||
|
t = lookup('Xxxx')
|
||||||
|
assert length_of(t) == 4
|
||||||
|
|
Loading…
Reference in New Issue
Block a user