diff --git a/spacy/lang.pxd b/spacy/lang.pxd index ba9d0a779..e2c7c56e6 100644 --- a/spacy/lang.pxd +++ b/spacy/lang.pxd @@ -32,9 +32,6 @@ cdef class Lexicon: cdef PreshMap _dict - cdef list _string_features - cdef list _flag_features - cdef class Language: cdef Pool _mem diff --git a/spacy/lang.pyx b/spacy/lang.pyx index 9323dc052..5042ff4b2 100644 --- a/spacy/lang.pyx +++ b/spacy/lang.pyx @@ -266,10 +266,10 @@ cdef class Lexicon: cpdef Lexeme lookup(self, unicode uni_string): """Retrieve (or create, if not found) a Lexeme for a string, and return it. - Args + Args string (unicode): The string to be looked up. Must be unicode, not bytes. - Returns: + Returns: lexeme (Lexeme): A reference to a lexical type. """ cdef String string diff --git a/spacy/lexeme.pxd b/spacy/lexeme.pxd index b39a32522..066f05b20 100644 --- a/spacy/lexeme.pxd +++ b/spacy/lexeme.pxd @@ -1,8 +1,7 @@ -from .typedefs cimport hash_t, utf8_t, flag_t, id_t - -from thinc.typedefs cimport atom_t +from .typedefs cimport hash_t, utf8_t, flag_t, id_t, len_t, tag_t from .utf8string cimport StringStore +from libc.stdint cimport uint16_t cpdef flag_t OOV_DIST_FLAGS @@ -23,23 +22,24 @@ cpdef enum: cdef struct Lexeme: - atom_t length + flag_t flags - atom_t sic - atom_t norm - atom_t shape - atom_t vocab10k - atom_t asciied - atom_t prefix - atom_t suffix - - atom_t cluster - atom_t pos - atom_t supersense + id_t sic + id_t norm + id_t shape + id_t vocab10k + id_t asciied + id_t prefix + id_t suffix float prob + + len_t length + tag_t cluster + tag_t pos + tag_t supersense + - flag_t flags cdef Lexeme EMPTY_LEXEME diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx index 6760b3913..62804621d 100644 --- a/spacy/lexeme.pyx +++ b/spacy/lexeme.pyx @@ -52,7 +52,7 @@ cpdef Lexeme init(unicode string, hash_t hashed, lex.flags = get_flags(string, upper_pc, title_pc, lower_pc) return lex -cdef atom_t get_string_id(unicode string, StringStore store) except 0: +cdef id_t get_string_id(unicode string, StringStore store) except 0: cdef bytes byte_string = string.encode('utf8') cdef Utf8Str* orig_str = store.intern(byte_string, len(byte_string)) return orig_str.i diff --git a/spacy/typedefs.pxd b/spacy/typedefs.pxd index 34c327069..db6eb42ce 100644 --- a/spacy/typedefs.pxd +++ b/spacy/typedefs.pxd @@ -1,8 +1,10 @@ -from libc.stdint cimport uint64_t, uintptr_t +from libc.stdint cimport uint16_t, uint32_t, uint64_t, uintptr_t ctypedef uint64_t hash_t ctypedef char* utf8_t ctypedef uint64_t flag_t -ctypedef uintptr_t id_t +ctypedef uint32_t id_t +ctypedef uint16_t len_t +ctypedef uint16_t tag_t