* Add missing __contains__ method to vocab

This commit is contained in:
Matthew Honnibal 2016-03-08 15:49:10 +00:00
parent 478aa21cb0
commit 963fe5258e
3 changed files with 15 additions and 0 deletions

View File

@ -118,6 +118,11 @@ cdef class StringStore:
else:
raise TypeError(type(string_or_id))
def __contains__(self, unicode string):
cdef hash_t key = hash_string(string)
value = <Utf8Str*>self._map.get(key)
return True if value is not NULL else False
def __iter__(self):
cdef int i
for i in range(self.size):

View File

@ -45,6 +45,11 @@ def test_symbols(en_vocab):
assert en_vocab.strings['PROB'] == PROB
def test_contains(en_vocab):
assert 'Hello' in en_vocab
assert 'LKsdjvlsakdvlaksdvlkasjdvljasdlkfvm' not in en_vocab
@pytest.mark.xfail
def test_pickle_vocab(en_vocab):
file_ = io.BytesIO()

View File

@ -172,6 +172,11 @@ cdef class Vocab:
self._by_orth.set(lex.orth, <void*>lex)
self.length += 1
def __contains__(self, unicode string):
key = hash_string(string)
lex = self._by_hash.get(key)
return True if lex is not NULL else False
def __iter__(self):
cdef attr_t orth
cdef size_t addr