Work on vectors

This commit is contained in:
Matthew Honnibal 2017-05-30 23:34:50 +02:00
parent 6937e311a4
commit a131981f3b
2 changed files with 23 additions and 3 deletions

View File

@ -27,7 +27,8 @@ cdef struct _Cached:
cdef class Vocab: cdef class Vocab:
cdef Pool mem cdef Pool mem
cpdef readonly StringStore strings cpdef readonly StringStore strings
cpdef readonly Morphology morphology cpdef public Morphology morphology
cpdef public object vectors
cdef readonly int length cdef readonly int length
cdef public object data_dir cdef public object data_dir
cdef public object lex_attr_getters cdef public object lex_attr_getters
@ -35,11 +36,10 @@ cdef class Vocab:
cdef const LexemeC* get(self, Pool mem, unicode string) except NULL cdef const LexemeC* get(self, Pool mem, unicode string) except NULL
cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL
cdef const TokenC* make_fused_token(self, substrings) except NULL cdef const TokenC* make_fused_token(self, substrings) except NULL
cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL
cdef int _add_lex_to_vocab(self, hash_t key, const LexemeC* lex) except -1 cdef int _add_lex_to_vocab(self, hash_t key, const LexemeC* lex) except -1
cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL
cdef PreshMap _by_hash cdef PreshMap _by_hash
cdef PreshMap _by_orth cdef PreshMap _by_orth
cdef readonly int vectors_length

View File

@ -239,6 +239,16 @@ cdef class Vocab:
Token.set_struct_attr(token, attr_id, value) Token.set_struct_attr(token, attr_id, value)
return tokens return tokens
@property
def vectors_length(self):
raise NotImplementedError
def clear_vectors(self):
"""Drop the current vector table. Because all vectors must be the same
width, you have to call this to change the size of the vectors.
"""
raise NotImplementedError
def get_vector(self, orth): def get_vector(self, orth):
"""Retrieve a vector for a word in the vocabulary. """Retrieve a vector for a word in the vocabulary.
@ -253,6 +263,16 @@ cdef class Vocab:
""" """
raise NotImplementedError raise NotImplementedError
def set_vector(self, orth, vector):
"""Set a vector for a word in the vocabulary.
Words can be referenced by string or int ID.
RETURNS:
None
"""
raise NotImplementedError
def has_vector(self, orth): def has_vector(self, orth):
"""Check whether a word has a vector. Returns False if no """Check whether a word has a vector. Returns False if no
vectors have been loaded. Words can be looked up by string vectors have been loaded. Words can be looked up by string