mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	Fix _SP/POS=SPACE in strings serialization tests
This commit is contained in:
		
							parent
							
								
									631e20d0c6
								
							
						
					
					
						commit
						17ee9ab53a
					
				|  | @ -8,6 +8,7 @@ from ..util import make_tempdir | ||||||
| 
 | 
 | ||||||
| test_strings = [([], []), (["rats", "are", "cute"], ["i", "like", "rats"])] | test_strings = [([], []), (["rats", "are", "cute"], ["i", "like", "rats"])] | ||||||
| test_strings_attrs = [(["rats", "are", "cute"], "Hello")] | test_strings_attrs = [(["rats", "are", "cute"], "Hello")] | ||||||
|  | default_strings = ("_SP", "POS=SPACE") | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @pytest.mark.xfail | @pytest.mark.xfail | ||||||
|  | @ -34,8 +35,8 @@ def test_serialize_vocab_roundtrip_bytes(strings1, strings2): | ||||||
|     assert vocab1.to_bytes() == vocab1_b |     assert vocab1.to_bytes() == vocab1_b | ||||||
|     new_vocab1 = Vocab().from_bytes(vocab1_b) |     new_vocab1 = Vocab().from_bytes(vocab1_b) | ||||||
|     assert new_vocab1.to_bytes() == vocab1_b |     assert new_vocab1.to_bytes() == vocab1_b | ||||||
|     assert len(new_vocab1.strings) == len(strings1) + 1  # adds _SP |     assert len(new_vocab1.strings) == len(strings1) + 2 # adds _SP and POS=SPACE | ||||||
|     assert sorted([s for s in new_vocab1.strings]) == sorted(strings1 + ["_SP"]) |     assert sorted([s for s in new_vocab1.strings]) == sorted(strings1 + list(default_strings)) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @pytest.mark.parametrize("strings1,strings2", test_strings) | @pytest.mark.parametrize("strings1,strings2", test_strings) | ||||||
|  | @ -50,15 +51,15 @@ def test_serialize_vocab_roundtrip_disk(strings1, strings2): | ||||||
|         vocab1_d = Vocab().from_disk(file_path1) |         vocab1_d = Vocab().from_disk(file_path1) | ||||||
|         vocab2_d = Vocab().from_disk(file_path2) |         vocab2_d = Vocab().from_disk(file_path2) | ||||||
|         # check strings rather than lexemes, which are only reloaded on demand |         # check strings rather than lexemes, which are only reloaded on demand | ||||||
|         assert strings1 == [s for s in vocab1_d.strings if s != "_SP"] |         assert strings1 == [s for s in vocab1_d.strings if s not in default_strings] | ||||||
|         assert strings2 == [s for s in vocab2_d.strings if s != "_SP"] |         assert strings2 == [s for s in vocab2_d.strings if s not in default_strings] | ||||||
|         if strings1 == strings2: |         if strings1 == strings2: | ||||||
|             assert [s for s in vocab1_d.strings if s != "_SP"] == [ |             assert [s for s in vocab1_d.strings if s not in default_strings] == [ | ||||||
|                 s for s in vocab2_d.strings if s != "_SP" |                 s for s in vocab2_d.strings if s not in default_strings | ||||||
|             ] |             ] | ||||||
|         else: |         else: | ||||||
|             assert [s for s in vocab1_d.strings if s != "_SP"] != [ |             assert [s for s in vocab1_d.strings if s not in default_strings] != [ | ||||||
|                 s for s in vocab2_d.strings if s != "_SP" |                 s for s in vocab2_d.strings if s not in default_strings | ||||||
|             ] |             ] | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -78,7 +79,7 @@ def test_deserialize_vocab_seen_entries(strings, lex_attr): | ||||||
|     # Reported in #2153 |     # Reported in #2153 | ||||||
|     vocab = Vocab(strings=strings) |     vocab = Vocab(strings=strings) | ||||||
|     vocab.from_bytes(vocab.to_bytes()) |     vocab.from_bytes(vocab.to_bytes()) | ||||||
|     assert len(vocab.strings) == len(strings) + 1  # adds _SP |     assert len(vocab.strings) == len(strings) + 2  # adds _SP and POS=SPACE | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @pytest.mark.parametrize("strings,lex_attr", test_strings_attrs) | @pytest.mark.parametrize("strings,lex_attr", test_strings_attrs) | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user