mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
* Remove vocab10k field, and add flags for gazetteers
This commit is contained in:
parent
f1c3e17c80
commit
70ea862703
|
@ -23,6 +23,14 @@ cpdef enum:
|
|||
OFT_TITLE
|
||||
OFT_UPPER
|
||||
|
||||
IN_MALES
|
||||
IN_FEMALES
|
||||
IN_SURNAMES
|
||||
IN_PLACES
|
||||
IN_GAMES
|
||||
IN_CELEBS
|
||||
IN_NAMES
|
||||
|
||||
|
||||
cdef struct Lexeme:
|
||||
flag_t flags
|
||||
|
@ -31,7 +39,6 @@ cdef struct Lexeme:
|
|||
id_t sic
|
||||
id_t norm
|
||||
id_t shape
|
||||
id_t vocab10k
|
||||
id_t asciied
|
||||
id_t prefix
|
||||
id_t suffix
|
||||
|
|
|
@ -54,9 +54,15 @@ cpdef Lexeme init(id_t i, unicode string, hash_t hashed,
|
|||
lex.norm = lex.sic
|
||||
lex.shape = get_string_id(orth.word_shape(string), store)
|
||||
lex.asciied = get_string_id(orth.asciied(string), store)
|
||||
non_sparse = orth.non_sparse(string, lex.prob, lex.cluster, upper_pc, title_pc, lower_pc)
|
||||
lex.vocab10k = get_string_id(non_sparse, store)
|
||||
lex.flags = get_flags(string, upper_pc, title_pc, lower_pc)
|
||||
|
||||
lex.flags |= props.get('in_males', 0) << IN_MALES
|
||||
lex.flags |= props.get('in_females', 0) << IN_FEMALES
|
||||
lex.flags |= props.get('in_surnames', 0) << IN_SURNAMES
|
||||
lex.flags |= props.get('in_places', 0) << IN_PLACES
|
||||
lex.flags |= props.get('in_celebs', 0) << IN_CELEBS
|
||||
lex.flags |= props.get('in_games', 0) << IN_GAMES
|
||||
lex.flags |= props.get('in_names', 0) << IN_NAMES
|
||||
return lex
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user