From a131981f3b87cd6049340e9f29f05033ca796ce7 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 30 May 2017 23:34:50 +0200 Subject: [PATCH] Work on vectors --- spacy/vocab.pxd | 6 +++--- spacy/vocab.pyx | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/spacy/vocab.pxd b/spacy/vocab.pxd index 3c31a8f8f..8005cbf06 100644 --- a/spacy/vocab.pxd +++ b/spacy/vocab.pxd @@ -27,7 +27,8 @@ cdef struct _Cached: cdef class Vocab: cdef Pool mem cpdef readonly StringStore strings - cpdef readonly Morphology morphology + cpdef public Morphology morphology + cpdef public object vectors cdef readonly int length cdef public object data_dir cdef public object lex_attr_getters @@ -35,11 +36,10 @@ cdef class Vocab: cdef const LexemeC* get(self, Pool mem, unicode string) except NULL cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL cdef const TokenC* make_fused_token(self, substrings) except NULL - + cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL cdef int _add_lex_to_vocab(self, hash_t key, const LexemeC* lex) except -1 cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL cdef PreshMap _by_hash cdef PreshMap _by_orth - cdef readonly int vectors_length diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index 45c9e1a07..f11d3a6ef 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -239,6 +239,16 @@ cdef class Vocab: Token.set_struct_attr(token, attr_id, value) return tokens + @property + def vectors_length(self): + raise NotImplementedError + + def clear_vectors(self): + """Drop the current vector table. Because all vectors must be the same + width, you have to call this to change the size of the vectors. + """ + raise NotImplementedError + def get_vector(self, orth): """Retrieve a vector for a word in the vocabulary. @@ -253,6 +263,16 @@ cdef class Vocab: """ raise NotImplementedError + def set_vector(self, orth, vector): + """Set a vector for a word in the vocabulary. + + Words can be referenced by string or int ID. + + RETURNS: + None + """ + raise NotImplementedError + def has_vector(self, orth): """Check whether a word has a vector. Returns False if no vectors have been loaded. Words can be looked up by string