mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	Fix vocab deserialization when loading already present lexemes (#3383)
* Fix vocab deserialization bug. Closes #2153 * Un-xfail test for #2153
This commit is contained in:
		
							parent
							
								
									d6eaa71afc
								
							
						
					
					
						commit
						27dd820753
					
				|  | @ -68,7 +68,6 @@ def test_serialize_vocab_lex_attrs_bytes(strings, lex_attr): | |||
|     assert vocab2[strings[0]].norm_ == lex_attr | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.xfail | ||||
| @pytest.mark.parametrize("strings,lex_attr", test_strings_attrs) | ||||
| def test_deserialize_vocab_seen_entries(strings, lex_attr): | ||||
|     # Reported in #2153 | ||||
|  |  | |||
|  | @ -1,6 +1,7 @@ | |||
| # coding: utf8 | ||||
| # cython: profile=True | ||||
| from __future__ import unicode_literals | ||||
| from libc.string cimport memcpy | ||||
| 
 | ||||
| import numpy | ||||
| import srsly | ||||
|  | @ -518,7 +519,10 @@ cdef class Vocab: | |||
|             for j in range(sizeof(lex_data.data)): | ||||
|                 lex_data.data[j] = bytes_ptr[i+j] | ||||
|             Lexeme.c_from_bytes(lexeme, lex_data) | ||||
| 
 | ||||
|             prev_entry = self._by_orth.get(lexeme.orth) | ||||
|             if prev_entry != NULL: | ||||
|                 memcpy(prev_entry, lexeme, sizeof(LexemeC)) | ||||
|                 continue | ||||
|             ptr = self.strings._map.get(lexeme.orth) | ||||
|             if ptr == NULL: | ||||
|                 continue | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user