mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Fix vocab deserialization when loading already present lexemes (#3383)
* Fix vocab deserialization bug. Closes #2153 * Un-xfail test for #2153
This commit is contained in:
		
							parent
							
								
									d6eaa71afc
								
							
						
					
					
						commit
						27dd820753
					
				|  | @ -68,7 +68,6 @@ def test_serialize_vocab_lex_attrs_bytes(strings, lex_attr): | ||||||
|     assert vocab2[strings[0]].norm_ == lex_attr |     assert vocab2[strings[0]].norm_ == lex_attr | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @pytest.mark.xfail |  | ||||||
| @pytest.mark.parametrize("strings,lex_attr", test_strings_attrs) | @pytest.mark.parametrize("strings,lex_attr", test_strings_attrs) | ||||||
| def test_deserialize_vocab_seen_entries(strings, lex_attr): | def test_deserialize_vocab_seen_entries(strings, lex_attr): | ||||||
|     # Reported in #2153 |     # Reported in #2153 | ||||||
|  |  | ||||||
|  | @ -1,6 +1,7 @@ | ||||||
| # coding: utf8 | # coding: utf8 | ||||||
| # cython: profile=True | # cython: profile=True | ||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  | from libc.string cimport memcpy | ||||||
| 
 | 
 | ||||||
| import numpy | import numpy | ||||||
| import srsly | import srsly | ||||||
|  | @ -518,7 +519,10 @@ cdef class Vocab: | ||||||
|             for j in range(sizeof(lex_data.data)): |             for j in range(sizeof(lex_data.data)): | ||||||
|                 lex_data.data[j] = bytes_ptr[i+j] |                 lex_data.data[j] = bytes_ptr[i+j] | ||||||
|             Lexeme.c_from_bytes(lexeme, lex_data) |             Lexeme.c_from_bytes(lexeme, lex_data) | ||||||
| 
 |             prev_entry = self._by_orth.get(lexeme.orth) | ||||||
|  |             if prev_entry != NULL: | ||||||
|  |                 memcpy(prev_entry, lexeme, sizeof(LexemeC)) | ||||||
|  |                 continue | ||||||
|             ptr = self.strings._map.get(lexeme.orth) |             ptr = self.strings._map.get(lexeme.orth) | ||||||
|             if ptr == NULL: |             if ptr == NULL: | ||||||
|                 continue |                 continue | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user