fix: lexeme.pyx

This commit is contained in:
Basile Dura 2023-06-02 09:43:28 +02:00
parent 0210f0de26
commit e7ea284ff1
No known key found for this signature in database

View File

@ -1,6 +1,5 @@
# cython: embedsignature=True # cython: embedsignature=True
# Compiler crashes on memory view coercion without this. Should report bug. # Compiler crashes on memory view coercion without this. Should report bug.
from cython.view cimport array as cvarray
from libc.string cimport memset from libc.string cimport memset
cimport numpy as np cimport numpy as np
np.import_array() np.import_array()
@ -19,7 +18,7 @@ from .attrs import intify_attrs
from .errors import Errors, Warnings from .errors import Errors, Warnings
OOV_RANK = 0xffffffffffffffff # UINT64_MAX OOV_RANK = 0xffffffffffffffff # UINT64_MAX
memset(&EMPTY_LEXEME, 0, sizeof(LexemeC)) memset(&EMPTY_LEXEME, 0, sizeof(LexemeC))
EMPTY_LEXEME.id = OOV_RANK EMPTY_LEXEME.id = OOV_RANK
@ -89,7 +88,7 @@ cdef class Lexeme:
if isinstance(value, float): if isinstance(value, float):
continue continue
elif isinstance(value, (int, long)): elif isinstance(value, (int, long)):
Lexeme.set_struct_attr(self.c, attr, value) Lexeme.set_struct_attr(self.c, attr, value)
else: else:
Lexeme.set_struct_attr(self.c, attr, self.vocab.strings.add(value)) Lexeme.set_struct_attr(self.c, attr, self.vocab.strings.add(value))
@ -121,10 +120,12 @@ cdef class Lexeme:
if hasattr(other, "orth"): if hasattr(other, "orth"):
if self.c.orth == other.orth: if self.c.orth == other.orth:
return 1.0 return 1.0
elif hasattr(other, "__len__") and len(other) == 1 \ elif (
and hasattr(other[0], "orth"): hasattr(other, "__len__") and len(other) == 1
if self.c.orth == other[0].orth: and hasattr(other[0], "orth")
return 1.0 and self.c.orth == other[0].orth
):
return 1.0
if self.vector_norm == 0 or other.vector_norm == 0: if self.vector_norm == 0 or other.vector_norm == 0:
warnings.warn(Warnings.W008.format(obj="Lexeme")) warnings.warn(Warnings.W008.format(obj="Lexeme"))
return 0.0 return 0.0