mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
Fix vocab deserialization when loading already present lexemes (#3383)
* Fix vocab deserialization bug. Closes #2153 * Un-xfail test for #2153
This commit is contained in:
parent
d6eaa71afc
commit
27dd820753
|
@ -68,7 +68,6 @@ def test_serialize_vocab_lex_attrs_bytes(strings, lex_attr):
|
||||||
assert vocab2[strings[0]].norm_ == lex_attr
|
assert vocab2[strings[0]].norm_ == lex_attr
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.xfail
|
|
||||||
@pytest.mark.parametrize("strings,lex_attr", test_strings_attrs)
|
@pytest.mark.parametrize("strings,lex_attr", test_strings_attrs)
|
||||||
def test_deserialize_vocab_seen_entries(strings, lex_attr):
|
def test_deserialize_vocab_seen_entries(strings, lex_attr):
|
||||||
# Reported in #2153
|
# Reported in #2153
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
# cython: profile=True
|
# cython: profile=True
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
from libc.string cimport memcpy
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
import srsly
|
import srsly
|
||||||
|
@ -518,7 +519,10 @@ cdef class Vocab:
|
||||||
for j in range(sizeof(lex_data.data)):
|
for j in range(sizeof(lex_data.data)):
|
||||||
lex_data.data[j] = bytes_ptr[i+j]
|
lex_data.data[j] = bytes_ptr[i+j]
|
||||||
Lexeme.c_from_bytes(lexeme, lex_data)
|
Lexeme.c_from_bytes(lexeme, lex_data)
|
||||||
|
prev_entry = self._by_orth.get(lexeme.orth)
|
||||||
|
if prev_entry != NULL:
|
||||||
|
memcpy(prev_entry, lexeme, sizeof(LexemeC))
|
||||||
|
continue
|
||||||
ptr = self.strings._map.get(lexeme.orth)
|
ptr = self.strings._map.get(lexeme.orth)
|
||||||
if ptr == NULL:
|
if ptr == NULL:
|
||||||
continue
|
continue
|
||||||
|
|
Loading…
Reference in New Issue
Block a user