mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Simplify lookup hashing
Just use get_string_id, which already does everything ensure_hash was supposed to do
This commit is contained in:
parent
dd1810f05a
commit
f2c8b1e362
|
@ -7,16 +7,9 @@ from preshed.bloom import BloomFilter
|
||||||
|
|
||||||
from .errors import Errors
|
from .errors import Errors
|
||||||
from .util import SimpleFrozenDict, ensure_path
|
from .util import SimpleFrozenDict, ensure_path
|
||||||
from .compat import basestring_
|
|
||||||
from .strings import get_string_id
|
from .strings import get_string_id
|
||||||
|
|
||||||
|
|
||||||
def ensure_hash(key):
|
|
||||||
if isinstance(key, basestring_):
|
|
||||||
return get_string_id(key)
|
|
||||||
return key
|
|
||||||
|
|
||||||
|
|
||||||
class Lookups(object):
|
class Lookups(object):
|
||||||
"""Container for large lookup tables and dictionaries, e.g. lemmatization
|
"""Container for large lookup tables and dictionaries, e.g. lemmatization
|
||||||
data or tokenizer exception lists. Lookups are available via vocab.lookups,
|
data or tokenizer exception lists. Lookups are available via vocab.lookups,
|
||||||
|
@ -202,7 +195,7 @@ class Table(OrderedDict):
|
||||||
key (unicode / int): The key to set.
|
key (unicode / int): The key to set.
|
||||||
value: The value to set.
|
value: The value to set.
|
||||||
"""
|
"""
|
||||||
key = ensure_hash(key)
|
key = get_string_id(key)
|
||||||
OrderedDict.__setitem__(self, key, value)
|
OrderedDict.__setitem__(self, key, value)
|
||||||
self.bloom.add(key)
|
self.bloom.add(key)
|
||||||
|
|
||||||
|
@ -221,7 +214,7 @@ class Table(OrderedDict):
|
||||||
key (unicode / int): The key to get.
|
key (unicode / int): The key to get.
|
||||||
RETURNS: The value.
|
RETURNS: The value.
|
||||||
"""
|
"""
|
||||||
key = ensure_hash(key)
|
key = get_string_id(key)
|
||||||
return OrderedDict.__getitem__(self, key)
|
return OrderedDict.__getitem__(self, key)
|
||||||
|
|
||||||
def get(self, key, default=None):
|
def get(self, key, default=None):
|
||||||
|
@ -231,7 +224,7 @@ class Table(OrderedDict):
|
||||||
default: The default value to return.
|
default: The default value to return.
|
||||||
RETURNS: The value.
|
RETURNS: The value.
|
||||||
"""
|
"""
|
||||||
key = ensure_hash(key)
|
key = get_string_id(key)
|
||||||
return OrderedDict.get(self, key, default)
|
return OrderedDict.get(self, key, default)
|
||||||
|
|
||||||
def __contains__(self, key):
|
def __contains__(self, key):
|
||||||
|
@ -240,7 +233,7 @@ class Table(OrderedDict):
|
||||||
key (unicode / int): The key to check.
|
key (unicode / int): The key to check.
|
||||||
RETURNS (bool): Whether the key is in the table.
|
RETURNS (bool): Whether the key is in the table.
|
||||||
"""
|
"""
|
||||||
key = ensure_hash(key)
|
key = get_string_id(key)
|
||||||
# This can give a false positive, so we need to check it after
|
# This can give a false positive, so we need to check it after
|
||||||
if key not in self.bloom:
|
if key not in self.bloom:
|
||||||
return False
|
return False
|
||||||
|
|
|
@ -2,7 +2,8 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from spacy.lookups import Lookups, Table, ensure_hash
|
from spacy.lookups import Lookups, Table
|
||||||
|
from spacy.strings import get_string_id
|
||||||
from spacy.vocab import Vocab
|
from spacy.vocab import Vocab
|
||||||
|
|
||||||
from ..util import make_tempdir
|
from ..util import make_tempdir
|
||||||
|
@ -45,17 +46,17 @@ def test_table_api():
|
||||||
table = Table(name="table", data=data)
|
table = Table(name="table", data=data)
|
||||||
assert len(table) == len(data)
|
assert len(table) == len(data)
|
||||||
assert "foo" in table
|
assert "foo" in table
|
||||||
assert ensure_hash("foo") in table
|
assert get_string_id("foo") in table
|
||||||
assert table["foo"] == "bar"
|
assert table["foo"] == "bar"
|
||||||
assert table[ensure_hash("foo")] == "bar"
|
assert table[get_string_id("foo")] == "bar"
|
||||||
assert table.get("foo") == "bar"
|
assert table.get("foo") == "bar"
|
||||||
assert table.get("abc") is None
|
assert table.get("abc") is None
|
||||||
table["abc"] = 123
|
table["abc"] = 123
|
||||||
assert table["abc"] == 123
|
assert table["abc"] == 123
|
||||||
assert table[ensure_hash("abc")] == 123
|
assert table[get_string_id("abc")] == 123
|
||||||
table.set("def", 456)
|
table.set("def", 456)
|
||||||
assert table["def"] == 456
|
assert table["def"] == 456
|
||||||
assert table[ensure_hash("def")] == 456
|
assert table[get_string_id("def")] == 456
|
||||||
|
|
||||||
|
|
||||||
def test_table_api_to_from_bytes():
|
def test_table_api_to_from_bytes():
|
||||||
|
@ -66,7 +67,7 @@ def test_table_api_to_from_bytes():
|
||||||
assert new_table.name == "table"
|
assert new_table.name == "table"
|
||||||
assert len(new_table) == 3
|
assert len(new_table) == 3
|
||||||
assert new_table["foo"] == "bar"
|
assert new_table["foo"] == "bar"
|
||||||
assert new_table[ensure_hash("foo")] == "bar"
|
assert new_table[get_string_id("foo")] == "bar"
|
||||||
new_table2 = Table(data={"def": 456})
|
new_table2 = Table(data={"def": 456})
|
||||||
new_table2.from_bytes(table_bytes)
|
new_table2.from_bytes(table_bytes)
|
||||||
assert len(new_table2) == 3
|
assert len(new_table2) == 3
|
||||||
|
|
Loading…
Reference in New Issue
Block a user