* Add missing __contains__ method to vocab

This commit is contained in:
Matthew Honnibal 2016-03-08 15:49:10 +00:00
parent 478aa21cb0
commit 963fe5258e
3 changed files with 15 additions and 0 deletions

View File

@ -118,6 +118,11 @@ cdef class StringStore:
else: else:
raise TypeError(type(string_or_id)) raise TypeError(type(string_or_id))
def __contains__(self, unicode string):
cdef hash_t key = hash_string(string)
value = <Utf8Str*>self._map.get(key)
return True if value is not NULL else False
def __iter__(self): def __iter__(self):
cdef int i cdef int i
for i in range(self.size): for i in range(self.size):

View File

@ -43,6 +43,11 @@ def test_symbols(en_vocab):
assert en_vocab.strings['LEMMA'] == LEMMA assert en_vocab.strings['LEMMA'] == LEMMA
assert en_vocab.strings['ORTH'] == ORTH assert en_vocab.strings['ORTH'] == ORTH
assert en_vocab.strings['PROB'] == PROB assert en_vocab.strings['PROB'] == PROB
def test_contains(en_vocab):
assert 'Hello' in en_vocab
assert 'LKsdjvlsakdvlaksdvlkasjdvljasdlkfvm' not in en_vocab
@pytest.mark.xfail @pytest.mark.xfail

View File

@ -172,6 +172,11 @@ cdef class Vocab:
self._by_orth.set(lex.orth, <void*>lex) self._by_orth.set(lex.orth, <void*>lex)
self.length += 1 self.length += 1
def __contains__(self, unicode string):
key = hash_string(string)
lex = self._by_hash.get(key)
return True if lex is not NULL else False
def __iter__(self): def __iter__(self):
cdef attr_t orth cdef attr_t orth
cdef size_t addr cdef size_t addr