mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	* Add length property
This commit is contained in:
		
							parent
							
								
									18fb76b2c4
								
							
						
					
					
						commit
						6319ff0f22
					
				| 
						 | 
				
			
			@ -16,6 +16,7 @@ cdef struct Orthography:
 | 
			
		|||
    StringHash shape
 | 
			
		||||
    StringHash norm
 | 
			
		||||
 | 
			
		||||
    size_t length
 | 
			
		||||
    Py_UNICODE first
 | 
			
		||||
    Bits8 flags
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -45,6 +46,7 @@ cdef enum StringAttr:
 | 
			
		|||
    NORM
 | 
			
		||||
    SHAPE
 | 
			
		||||
    LAST3
 | 
			
		||||
    LENGTH
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cpdef StringHash attr_of(size_t lex_id, StringAttr attr) except 0
 | 
			
		||||
| 
						 | 
				
			
			@ -54,3 +56,4 @@ cpdef StringHash lex_of(size_t lex_id) except 0
 | 
			
		|||
cpdef StringHash norm_of(size_t lex_id) except 0
 | 
			
		||||
cpdef StringHash shape_of(size_t lex_id) except 0
 | 
			
		||||
cpdef StringHash last3_of(size_t lex_id) except 0
 | 
			
		||||
cpdef StringHash length_of(size_t lex_id)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -32,6 +32,8 @@ cpdef StringHash attr_of(size_t lex_id, StringAttr attr) except 0:
 | 
			
		|||
        return shape_of(lex_id)
 | 
			
		||||
    elif attr == LAST3:
 | 
			
		||||
        return last3_of(lex_id)
 | 
			
		||||
    elif attr == LENGTH:
 | 
			
		||||
        return length_of(lex_id)
 | 
			
		||||
    else:
 | 
			
		||||
        raise StandardError
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -118,9 +120,20 @@ cpdef Py_UNICODE first_of(size_t lex_id):
 | 
			
		|||
    >>> unhash(first_of(lex_id))
 | 
			
		||||
    u'H'
 | 
			
		||||
    '''
 | 
			
		||||
    if (<Lexeme*>lex_id).orth == NULL:
 | 
			
		||||
        return 0
 | 
			
		||||
    return (<Lexeme*>lex_id).orth.first
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cpdef StringHash length_of(size_t lex_id):
 | 
			
		||||
    '''Access the `length' field of the Lexeme pointed to by lex_id, which stores
 | 
			
		||||
    the length of the string hashed by lex_of.'''
 | 
			
		||||
    cdef Lexeme* word = <Lexeme*>lex_id
 | 
			
		||||
    if (<Lexeme*>lex_id).orth == NULL:
 | 
			
		||||
        return 0
 | 
			
		||||
    return (<Lexeme*>lex_id).orth.length
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cpdef double prob_of(size_t lex_id):
 | 
			
		||||
    '''Access the `prob' field of the Lexeme pointed to by lex_id, which stores
 | 
			
		||||
    the smoothed unigram log probability of the word, as estimated from a large
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -4,7 +4,7 @@ import pytest
 | 
			
		|||
 | 
			
		||||
from spacy.en import lookup, unhash
 | 
			
		||||
 | 
			
		||||
from spacy.lexeme import sic_of, lex_of, norm_of, shape_of, first_of
 | 
			
		||||
from spacy.lexeme import sic_of, lex_of, norm_of, shape_of, first_of, length_of
 | 
			
		||||
from spacy.lexeme import shape_of
 | 
			
		||||
 | 
			
		||||
@pytest.fixture
 | 
			
		||||
| 
						 | 
				
			
			@ -14,3 +14,16 @@ def C3P0():
 | 
			
		|||
 | 
			
		||||
def test_shape(C3P0):
 | 
			
		||||
    assert unhash(shape_of(C3P0)) == "XdXd"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_length():
 | 
			
		||||
    t = lookup('the')
 | 
			
		||||
    assert length_of(t) == 3
 | 
			
		||||
    t = lookup('')
 | 
			
		||||
    assert length_of(t) == 0
 | 
			
		||||
    t = lookup("n't")
 | 
			
		||||
    assert length_of(t) == 3
 | 
			
		||||
    t = lookup("'s")
 | 
			
		||||
    assert length_of(t) == 2
 | 
			
		||||
    t = lookup('Xxxx')
 | 
			
		||||
    assert length_of(t) == 4
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user