mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
* Restore id attribute to lexeme, and rename pos field to postype, to store clustered tag dictionaries
This commit is contained in:
parent
aaf6953fe0
commit
6c807aa45f
|
@ -24,6 +24,7 @@ cpdef enum:
|
||||||
cdef struct Lexeme:
|
cdef struct Lexeme:
|
||||||
flag_t flags
|
flag_t flags
|
||||||
|
|
||||||
|
id_t id
|
||||||
id_t sic
|
id_t sic
|
||||||
id_t norm
|
id_t norm
|
||||||
id_t shape
|
id_t shape
|
||||||
|
@ -36,7 +37,7 @@ cdef struct Lexeme:
|
||||||
|
|
||||||
len_t length
|
len_t length
|
||||||
tag_t cluster
|
tag_t cluster
|
||||||
tag_t pos
|
tag_t postype
|
||||||
tag_t supersense
|
tag_t supersense
|
||||||
|
|
||||||
|
|
||||||
|
@ -44,7 +45,7 @@ cdef struct Lexeme:
|
||||||
|
|
||||||
cdef Lexeme EMPTY_LEXEME
|
cdef Lexeme EMPTY_LEXEME
|
||||||
|
|
||||||
cpdef Lexeme init(unicode string, hash_t hashed,
|
cpdef Lexeme init(id_t i, unicode string, hash_t hashed,
|
||||||
StringStore store, dict props) except *
|
StringStore store, dict props) except *
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -26,14 +26,15 @@ def get_flags(unicode string, float upper_pc, float title_pc, float lower_pc):
|
||||||
return flags
|
return flags
|
||||||
|
|
||||||
|
|
||||||
cpdef Lexeme init(unicode string, hash_t hashed,
|
cpdef Lexeme init(id_t i, unicode string, hash_t hashed,
|
||||||
StringStore store, dict props) except *:
|
StringStore store, dict props) except *:
|
||||||
cdef Lexeme lex
|
cdef Lexeme lex
|
||||||
|
lex.id = i
|
||||||
lex.length = len(string)
|
lex.length = len(string)
|
||||||
lex.sic = get_string_id(string, store)
|
lex.sic = get_string_id(string, store)
|
||||||
|
|
||||||
lex.cluster = props.get('cluster', 0)
|
lex.cluster = props.get('cluster', 0)
|
||||||
lex.pos = props.get('pos', 0)
|
lex.postype = props.get('postype', 0)
|
||||||
lex.supersense = props.get('supersense', 0)
|
lex.supersense = props.get('supersense', 0)
|
||||||
lex.prob = props.get('prob', 0)
|
lex.prob = props.get('prob', 0)
|
||||||
|
|
||||||
|
@ -55,6 +56,7 @@ cpdef Lexeme init(unicode string, hash_t hashed,
|
||||||
lex.flags = get_flags(string, upper_pc, title_pc, lower_pc)
|
lex.flags = get_flags(string, upper_pc, title_pc, lower_pc)
|
||||||
return lex
|
return lex
|
||||||
|
|
||||||
|
|
||||||
cdef id_t get_string_id(unicode string, StringStore store) except 0:
|
cdef id_t get_string_id(unicode string, StringStore store) except 0:
|
||||||
cdef bytes byte_string = string.encode('utf8')
|
cdef bytes byte_string = string.encode('utf8')
|
||||||
cdef Utf8Str* orig_str = store.intern(<char*>byte_string, len(byte_string))
|
cdef Utf8Str* orig_str = store.intern(<char*>byte_string, len(byte_string))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user