mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-10 16:22:29 +03:00
Fix kb.
This commit is contained in:
parent
a0bf50661b
commit
77e9e4ddad
|
@ -4,7 +4,8 @@ from ..typedefs cimport hash_t
|
||||||
from .kb cimport KnowledgeBase
|
from .kb cimport KnowledgeBase
|
||||||
|
|
||||||
|
|
||||||
# Object used by the Entity Linker that summarizes one entity-alias candidate combination.
|
# Object used by the Entity Linker that summarizes one entity-alias candidate
|
||||||
|
# combination.
|
||||||
cdef class Candidate:
|
cdef class Candidate:
|
||||||
cdef readonly KnowledgeBase kb
|
cdef readonly KnowledgeBase kb
|
||||||
cdef hash_t entity_hash
|
cdef hash_t entity_hash
|
||||||
|
|
|
@ -55,23 +55,28 @@ cdef class InMemoryLookupKB(KnowledgeBase):
|
||||||
# optional data, we can let users configure a DB as the backend for this.
|
# optional data, we can let users configure a DB as the backend for this.
|
||||||
cdef object _features_table
|
cdef object _features_table
|
||||||
|
|
||||||
|
|
||||||
cdef inline int64_t c_add_vector(self, vector[float] entity_vector) nogil:
|
cdef inline int64_t c_add_vector(self, vector[float] entity_vector) nogil:
|
||||||
"""Add an entity vector to the vectors table."""
|
"""Add an entity vector to the vectors table."""
|
||||||
cdef int64_t new_index = self._vectors_table.size()
|
cdef int64_t new_index = self._vectors_table.size()
|
||||||
self._vectors_table.push_back(entity_vector)
|
self._vectors_table.push_back(entity_vector)
|
||||||
return new_index
|
return new_index
|
||||||
|
|
||||||
|
cdef inline int64_t c_add_entity(
|
||||||
cdef inline int64_t c_add_entity(self, hash_t entity_hash, float freq,
|
self,
|
||||||
int32_t vector_index, int feats_row) nogil:
|
hash_t entity_hash,
|
||||||
|
float freq,
|
||||||
|
int32_t vector_index,
|
||||||
|
int feats_row
|
||||||
|
) nogil:
|
||||||
"""Add an entry to the vector of entries.
|
"""Add an entry to the vector of entries.
|
||||||
After calling this method, make sure to update also the _entry_index using the return value"""
|
After calling this method, make sure to update also the _entry_index
|
||||||
|
using the return value"""
|
||||||
# This is what we'll map the entity hash key to. It's where the entry will sit
|
# This is what we'll map the entity hash key to. It's where the entry will sit
|
||||||
# in the vector of entries, so we can get it later.
|
# in the vector of entries, so we can get it later.
|
||||||
cdef int64_t new_index = self._entries.size()
|
cdef int64_t new_index = self._entries.size()
|
||||||
|
|
||||||
# Avoid struct initializer to enable nogil, cf https://github.com/cython/cython/issues/1642
|
# Avoid struct initializer to enable nogil, cf.
|
||||||
|
# https://github.com/cython/cython/issues/1642
|
||||||
cdef KBEntryC entry
|
cdef KBEntryC entry
|
||||||
entry.entity_hash = entity_hash
|
entry.entity_hash = entity_hash
|
||||||
entry.vector_index = vector_index
|
entry.vector_index = vector_index
|
||||||
|
@ -81,11 +86,17 @@ cdef class InMemoryLookupKB(KnowledgeBase):
|
||||||
self._entries.push_back(entry)
|
self._entries.push_back(entry)
|
||||||
return new_index
|
return new_index
|
||||||
|
|
||||||
cdef inline int64_t c_add_aliases(self, hash_t alias_hash, vector[int64_t] entry_indices, vector[float] probs) nogil:
|
cdef inline int64_t c_add_aliases(
|
||||||
"""Connect a mention to a list of potential entities with their prior probabilities .
|
self,
|
||||||
After calling this method, make sure to update also the _alias_index using the return value"""
|
hash_t alias_hash,
|
||||||
# This is what we'll map the alias hash key to. It's where the alias will be defined
|
vector[int64_t] entry_indices,
|
||||||
# in the vector of aliases.
|
vector[float] probs
|
||||||
|
) nogil:
|
||||||
|
"""Connect a mention to a list of potential entities with their prior
|
||||||
|
probabilities. After calling this method, make sure to update also the
|
||||||
|
_alias_index using the return value"""
|
||||||
|
# This is what we'll map the alias hash key to. It's where the alias will be
|
||||||
|
# defined in the vector of aliases.
|
||||||
cdef int64_t new_index = self._aliases_table.size()
|
cdef int64_t new_index = self._aliases_table.size()
|
||||||
|
|
||||||
# Avoid struct initializer to enable nogil
|
# Avoid struct initializer to enable nogil
|
||||||
|
@ -98,8 +109,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
|
||||||
|
|
||||||
cdef inline void _create_empty_vectors(self, hash_t dummy_hash) nogil:
|
cdef inline void _create_empty_vectors(self, hash_t dummy_hash) nogil:
|
||||||
"""
|
"""
|
||||||
Initializing the vectors and making sure the first element of each vector is a dummy,
|
Initializing the vectors and making sure the first element of each vector is a
|
||||||
because the PreshMap maps pointing to indices in these vectors can not contain 0 as value
|
dummy, because the PreshMap maps pointing to indices in these vectors can not
|
||||||
|
contain 0 as value.
|
||||||
cf. https://github.com/explosion/preshed/issues/17
|
cf. https://github.com/explosion/preshed/issues/17
|
||||||
"""
|
"""
|
||||||
cdef int32_t dummy_value = 0
|
cdef int32_t dummy_value = 0
|
||||||
|
@ -130,12 +142,18 @@ cdef class InMemoryLookupKB(KnowledgeBase):
|
||||||
cdef class Writer:
|
cdef class Writer:
|
||||||
cdef FILE* _fp
|
cdef FILE* _fp
|
||||||
|
|
||||||
cdef int write_header(self, int64_t nr_entries, int64_t entity_vector_length) except -1
|
cdef int write_header(
|
||||||
|
self, int64_t nr_entries, int64_t entity_vector_length
|
||||||
|
) except -1
|
||||||
cdef int write_vector_element(self, float element) except -1
|
cdef int write_vector_element(self, float element) except -1
|
||||||
cdef int write_entry(self, hash_t entry_hash, float entry_freq, int32_t vector_index) except -1
|
cdef int write_entry(
|
||||||
|
self, hash_t entry_hash, float entry_freq, int32_t vector_index
|
||||||
|
) except -1
|
||||||
|
|
||||||
cdef int write_alias_length(self, int64_t alias_length) except -1
|
cdef int write_alias_length(self, int64_t alias_length) except -1
|
||||||
cdef int write_alias_header(self, hash_t alias_hash, int64_t candidate_length) except -1
|
cdef int write_alias_header(
|
||||||
|
self, hash_t alias_hash, int64_t candidate_length
|
||||||
|
) except -1
|
||||||
cdef int write_alias(self, int64_t entry_index, float prob) except -1
|
cdef int write_alias(self, int64_t entry_index, float prob) except -1
|
||||||
|
|
||||||
cdef int _write(self, void* value, size_t size) except -1
|
cdef int _write(self, void* value, size_t size) except -1
|
||||||
|
@ -143,12 +161,18 @@ cdef class Writer:
|
||||||
cdef class Reader:
|
cdef class Reader:
|
||||||
cdef FILE* _fp
|
cdef FILE* _fp
|
||||||
|
|
||||||
cdef int read_header(self, int64_t* nr_entries, int64_t* entity_vector_length) except -1
|
cdef int read_header(
|
||||||
|
self, int64_t* nr_entries, int64_t* entity_vector_length
|
||||||
|
) except -1
|
||||||
cdef int read_vector_element(self, float* element) except -1
|
cdef int read_vector_element(self, float* element) except -1
|
||||||
cdef int read_entry(self, hash_t* entity_hash, float* freq, int32_t* vector_index) except -1
|
cdef int read_entry(
|
||||||
|
self, hash_t* entity_hash, float* freq, int32_t* vector_index
|
||||||
|
) except -1
|
||||||
|
|
||||||
cdef int read_alias_length(self, int64_t* alias_length) except -1
|
cdef int read_alias_length(self, int64_t* alias_length) except -1
|
||||||
cdef int read_alias_header(self, hash_t* alias_hash, int64_t* candidate_length) except -1
|
cdef int read_alias_header(
|
||||||
|
self, hash_t* alias_hash, int64_t* candidate_length
|
||||||
|
) except -1
|
||||||
cdef int read_alias(self, int64_t* entry_index, float* prob) except -1
|
cdef int read_alias(self, int64_t* entry_index, float* prob) except -1
|
||||||
|
|
||||||
cdef int _read(self, void* value, size_t size) except -1
|
cdef int _read(self, void* value, size_t size) except -1
|
||||||
|
|
Loading…
Reference in New Issue
Block a user