mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Simplify lookup hashing
Just use get_string_id, which already does everything ensure_hash was supposed to do
This commit is contained in:
		
							parent
							
								
									dd1810f05a
								
							
						
					
					
						commit
						f2c8b1e362
					
				|  | @ -7,16 +7,9 @@ from preshed.bloom import BloomFilter | ||||||
| 
 | 
 | ||||||
| from .errors import Errors | from .errors import Errors | ||||||
| from .util import SimpleFrozenDict, ensure_path | from .util import SimpleFrozenDict, ensure_path | ||||||
| from .compat import basestring_ |  | ||||||
| from .strings import get_string_id | from .strings import get_string_id | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def ensure_hash(key): |  | ||||||
|     if isinstance(key, basestring_): |  | ||||||
|         return get_string_id(key) |  | ||||||
|     return key |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class Lookups(object): | class Lookups(object): | ||||||
|     """Container for large lookup tables and dictionaries, e.g. lemmatization |     """Container for large lookup tables and dictionaries, e.g. lemmatization | ||||||
|     data or tokenizer exception lists. Lookups are available via vocab.lookups, |     data or tokenizer exception lists. Lookups are available via vocab.lookups, | ||||||
|  | @ -202,7 +195,7 @@ class Table(OrderedDict): | ||||||
|         key (unicode / int): The key to set. |         key (unicode / int): The key to set. | ||||||
|         value: The value to set. |         value: The value to set. | ||||||
|         """ |         """ | ||||||
|         key = ensure_hash(key) |         key = get_string_id(key) | ||||||
|         OrderedDict.__setitem__(self, key, value) |         OrderedDict.__setitem__(self, key, value) | ||||||
|         self.bloom.add(key) |         self.bloom.add(key) | ||||||
| 
 | 
 | ||||||
|  | @ -221,7 +214,7 @@ class Table(OrderedDict): | ||||||
|         key (unicode / int): The key to get. |         key (unicode / int): The key to get. | ||||||
|         RETURNS: The value. |         RETURNS: The value. | ||||||
|         """ |         """ | ||||||
|         key = ensure_hash(key) |         key = get_string_id(key) | ||||||
|         return OrderedDict.__getitem__(self, key) |         return OrderedDict.__getitem__(self, key) | ||||||
| 
 | 
 | ||||||
|     def get(self, key, default=None): |     def get(self, key, default=None): | ||||||
|  | @ -231,7 +224,7 @@ class Table(OrderedDict): | ||||||
|         default: The default value to return. |         default: The default value to return. | ||||||
|         RETURNS: The value. |         RETURNS: The value. | ||||||
|         """ |         """ | ||||||
|         key = ensure_hash(key) |         key = get_string_id(key) | ||||||
|         return OrderedDict.get(self, key, default) |         return OrderedDict.get(self, key, default) | ||||||
| 
 | 
 | ||||||
|     def __contains__(self, key): |     def __contains__(self, key): | ||||||
|  | @ -240,7 +233,7 @@ class Table(OrderedDict): | ||||||
|         key (unicode / int): The key to check. |         key (unicode / int): The key to check. | ||||||
|         RETURNS (bool): Whether the key is in the table. |         RETURNS (bool): Whether the key is in the table. | ||||||
|         """ |         """ | ||||||
|         key = ensure_hash(key) |         key = get_string_id(key) | ||||||
|         # This can give a false positive, so we need to check it after |         # This can give a false positive, so we need to check it after | ||||||
|         if key not in self.bloom: |         if key not in self.bloom: | ||||||
|             return False |             return False | ||||||
|  |  | ||||||
|  | @ -2,7 +2,8 @@ | ||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
| 
 | 
 | ||||||
| import pytest | import pytest | ||||||
| from spacy.lookups import Lookups, Table, ensure_hash | from spacy.lookups import Lookups, Table | ||||||
|  | from spacy.strings import get_string_id | ||||||
| from spacy.vocab import Vocab | from spacy.vocab import Vocab | ||||||
| 
 | 
 | ||||||
| from ..util import make_tempdir | from ..util import make_tempdir | ||||||
|  | @ -45,17 +46,17 @@ def test_table_api(): | ||||||
|     table = Table(name="table", data=data) |     table = Table(name="table", data=data) | ||||||
|     assert len(table) == len(data) |     assert len(table) == len(data) | ||||||
|     assert "foo" in table |     assert "foo" in table | ||||||
|     assert ensure_hash("foo") in table |     assert get_string_id("foo") in table | ||||||
|     assert table["foo"] == "bar" |     assert table["foo"] == "bar" | ||||||
|     assert table[ensure_hash("foo")] == "bar" |     assert table[get_string_id("foo")] == "bar" | ||||||
|     assert table.get("foo") == "bar" |     assert table.get("foo") == "bar" | ||||||
|     assert table.get("abc") is None |     assert table.get("abc") is None | ||||||
|     table["abc"] = 123 |     table["abc"] = 123 | ||||||
|     assert table["abc"] == 123 |     assert table["abc"] == 123 | ||||||
|     assert table[ensure_hash("abc")] == 123 |     assert table[get_string_id("abc")] == 123 | ||||||
|     table.set("def", 456) |     table.set("def", 456) | ||||||
|     assert table["def"] == 456 |     assert table["def"] == 456 | ||||||
|     assert table[ensure_hash("def")] == 456 |     assert table[get_string_id("def")] == 456 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def test_table_api_to_from_bytes(): | def test_table_api_to_from_bytes(): | ||||||
|  | @ -66,7 +67,7 @@ def test_table_api_to_from_bytes(): | ||||||
|     assert new_table.name == "table" |     assert new_table.name == "table" | ||||||
|     assert len(new_table) == 3 |     assert len(new_table) == 3 | ||||||
|     assert new_table["foo"] == "bar" |     assert new_table["foo"] == "bar" | ||||||
|     assert new_table[ensure_hash("foo")] == "bar" |     assert new_table[get_string_id("foo")] == "bar" | ||||||
|     new_table2 = Table(data={"def": 456}) |     new_table2 = Table(data={"def": 456}) | ||||||
|     new_table2.from_bytes(table_bytes) |     new_table2.from_bytes(table_bytes) | ||||||
|     assert len(new_table2) == 3 |     assert len(new_table2) == 3 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user