Revert "Fix StringStore after symbols changes"

This reverts commit bcfe3bd312.
This commit is contained in:
Matthew Honnibal 2019-03-07 12:52:15 +01:00
parent bcfe3bd312
commit c773b5011c

View File

@ -11,15 +11,11 @@ import srsly
from .compat import basestring_
from .symbols import IDS as SYMBOLS_BY_STR
from . import symbols
from .symbols import NAMES as SYMBOLS_BY_INT
from .typedefs cimport hash_t
from .errors import Errors
from . import util
SYMBOLS_BY_INT = {}
for name in symbols.NAMES:
SYMBOLS_BY_INT[SYMBOLS_BY_STR[name]] = name
print(SYMBOLS_BY_INT[6005])
def get_string_id(key):
"""Get a string ID, handling the reserved symbols correctly. If the key is
@ -120,8 +116,6 @@ cdef class StringStore:
return u''
elif string_or_id in SYMBOLS_BY_STR:
return SYMBOLS_BY_STR[string_or_id]
elif string_or_id in SYMBOLS_BY_INT:
return SYMBOLS_BY_INT[string_or_id]
cdef hash_t key
if isinstance(string_or_id, unicode):
key = hash_string(string_or_id)
@ -129,6 +123,8 @@ cdef class StringStore:
elif isinstance(string_or_id, bytes):
key = hash_utf8(string_or_id, len(string_or_id))
return key
elif string_or_id < len(SYMBOLS_BY_INT):
return SYMBOLS_BY_INT[string_or_id]
else:
key = string_or_id
self.hits.insert(key)
@ -185,14 +181,11 @@ cdef class StringStore:
string (unicode): The string to check.
RETURNS (bool): Whether the store contains the string.
"""
global SYMBOLS_BY_INT
cdef hash_t key
if isinstance(string, int) or isinstance(string, long):
if string == 0:
return True
key = string
if key in SYMBOLS_BY_INT:
return True
elif len(string) == 0:
return True
elif string in SYMBOLS_BY_STR:
@ -202,8 +195,11 @@ cdef class StringStore:
else:
string = string.encode('utf8')
key = hash_utf8(string, len(string))
self.hits.insert(key)
return self._map.get(key) is not NULL
if key < len(SYMBOLS_BY_INT):
return True
else:
self.hits.insert(key)
return self._map.get(key) is not NULL
def __iter__(self):
"""Iterate over the strings in the store, in order.