mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Work on vectors
This commit is contained in:
parent
6937e311a4
commit
a131981f3b
|
@ -27,7 +27,8 @@ cdef struct _Cached:
|
||||||
cdef class Vocab:
|
cdef class Vocab:
|
||||||
cdef Pool mem
|
cdef Pool mem
|
||||||
cpdef readonly StringStore strings
|
cpdef readonly StringStore strings
|
||||||
cpdef readonly Morphology morphology
|
cpdef public Morphology morphology
|
||||||
|
cpdef public object vectors
|
||||||
cdef readonly int length
|
cdef readonly int length
|
||||||
cdef public object data_dir
|
cdef public object data_dir
|
||||||
cdef public object lex_attr_getters
|
cdef public object lex_attr_getters
|
||||||
|
@ -35,11 +36,10 @@ cdef class Vocab:
|
||||||
cdef const LexemeC* get(self, Pool mem, unicode string) except NULL
|
cdef const LexemeC* get(self, Pool mem, unicode string) except NULL
|
||||||
cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL
|
cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL
|
||||||
cdef const TokenC* make_fused_token(self, substrings) except NULL
|
cdef const TokenC* make_fused_token(self, substrings) except NULL
|
||||||
|
|
||||||
cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL
|
cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL
|
||||||
cdef int _add_lex_to_vocab(self, hash_t key, const LexemeC* lex) except -1
|
cdef int _add_lex_to_vocab(self, hash_t key, const LexemeC* lex) except -1
|
||||||
cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL
|
cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL
|
||||||
|
|
||||||
cdef PreshMap _by_hash
|
cdef PreshMap _by_hash
|
||||||
cdef PreshMap _by_orth
|
cdef PreshMap _by_orth
|
||||||
cdef readonly int vectors_length
|
|
||||||
|
|
|
@ -239,6 +239,16 @@ cdef class Vocab:
|
||||||
Token.set_struct_attr(token, attr_id, value)
|
Token.set_struct_attr(token, attr_id, value)
|
||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
|
@property
|
||||||
|
def vectors_length(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def clear_vectors(self):
|
||||||
|
"""Drop the current vector table. Because all vectors must be the same
|
||||||
|
width, you have to call this to change the size of the vectors.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
def get_vector(self, orth):
|
def get_vector(self, orth):
|
||||||
"""Retrieve a vector for a word in the vocabulary.
|
"""Retrieve a vector for a word in the vocabulary.
|
||||||
|
|
||||||
|
@ -253,6 +263,16 @@ cdef class Vocab:
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def set_vector(self, orth, vector):
|
||||||
|
"""Set a vector for a word in the vocabulary.
|
||||||
|
|
||||||
|
Words can be referenced by string or int ID.
|
||||||
|
|
||||||
|
RETURNS:
|
||||||
|
None
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
def has_vector(self, orth):
|
def has_vector(self, orth):
|
||||||
"""Check whether a word has a vector. Returns False if no
|
"""Check whether a word has a vector. Returns False if no
|
||||||
vectors have been loaded. Words can be looked up by string
|
vectors have been loaded. Words can be looked up by string
|
||||||
|
|
Loading…
Reference in New Issue
Block a user