Work on vectors

This commit is contained in:
Matthew Honnibal 2017-05-30 23:34:50 +02:00
parent 6937e311a4
commit a131981f3b
2 changed files with 23 additions and 3 deletions

View File

@ -27,7 +27,8 @@ cdef struct _Cached:
cdef class Vocab:
cdef Pool mem
cpdef readonly StringStore strings
cpdef readonly Morphology morphology
cpdef public Morphology morphology
cpdef public object vectors
cdef readonly int length
cdef public object data_dir
cdef public object lex_attr_getters
@ -35,11 +36,10 @@ cdef class Vocab:
cdef const LexemeC* get(self, Pool mem, unicode string) except NULL
cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL
cdef const TokenC* make_fused_token(self, substrings) except NULL
cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL
cdef int _add_lex_to_vocab(self, hash_t key, const LexemeC* lex) except -1
cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL
cdef PreshMap _by_hash
cdef PreshMap _by_orth
cdef readonly int vectors_length

View File

@ -239,6 +239,16 @@ cdef class Vocab:
Token.set_struct_attr(token, attr_id, value)
return tokens
@property
def vectors_length(self):
raise NotImplementedError
def clear_vectors(self):
"""Drop the current vector table. Because all vectors must be the same
width, you have to call this to change the size of the vectors.
"""
raise NotImplementedError
def get_vector(self, orth):
"""Retrieve a vector for a word in the vocabulary.
@ -253,6 +263,16 @@ cdef class Vocab:
"""
raise NotImplementedError
def set_vector(self, orth, vector):
"""Set a vector for a word in the vocabulary.
Words can be referenced by string or int ID.
RETURNS:
None
"""
raise NotImplementedError
def has_vector(self, orth):
"""Check whether a word has a vector. Returns False if no
vectors have been loaded. Words can be looked up by string