fix: lexeme.pyx

This commit is contained in:
Basile Dura 2023-06-02 09:43:28 +02:00
parent 0210f0de26
commit e7ea284ff1
No known key found for this signature in database

View File

@ -1,6 +1,5 @@
# cython: embedsignature=True
# Compiler crashes on memory view coercion without this. Should report bug.
from cython.view cimport array as cvarray
from libc.string cimport memset
cimport numpy as np
np.import_array()
@ -19,7 +18,7 @@ from .attrs import intify_attrs
from .errors import Errors, Warnings
OOV_RANK = 0xffffffffffffffff # UINT64_MAX
OOV_RANK = 0xffffffffffffffff # UINT64_MAX
memset(&EMPTY_LEXEME, 0, sizeof(LexemeC))
EMPTY_LEXEME.id = OOV_RANK
@ -89,7 +88,7 @@ cdef class Lexeme:
if isinstance(value, float):
continue
elif isinstance(value, (int, long)):
Lexeme.set_struct_attr(self.c, attr, value)
Lexeme.set_struct_attr(self.c, attr, value)
else:
Lexeme.set_struct_attr(self.c, attr, self.vocab.strings.add(value))
@ -121,10 +120,12 @@ cdef class Lexeme:
if hasattr(other, "orth"):
if self.c.orth == other.orth:
return 1.0
elif hasattr(other, "__len__") and len(other) == 1 \
and hasattr(other[0], "orth"):
if self.c.orth == other[0].orth:
return 1.0
elif (
hasattr(other, "__len__") and len(other) == 1
and hasattr(other[0], "orth")
and self.c.orth == other[0].orth
):
return 1.0
if self.vector_norm == 0 or other.vector_norm == 0:
warnings.warn(Warnings.W008.format(obj="Lexeme"))
return 0.0
@ -133,7 +134,7 @@ cdef class Lexeme:
result = xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm)
# ensure we get a scalar back (numpy does this automatically but cupy doesn't)
return result.item()
@property
def has_vector(self):
"""RETURNS (bool): Whether a word vector is associated with the object.