mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
little fixes
This commit is contained in:
parent
6ae3b5699e
commit
61a33f55d2
12
spacy/kb.pxd
12
spacy/kb.pxd
|
@ -13,7 +13,7 @@ from .typedefs cimport hash_t
|
|||
# of bits we need to keep track of the answers.
|
||||
cdef struct _EntryC:
|
||||
|
||||
# The hash of this entry's unique ID and name in the kB
|
||||
# The hash of this entry's unique ID/name in the kB
|
||||
hash_t entity_hash
|
||||
|
||||
# Allows retrieval of one or more vectors.
|
||||
|
@ -99,7 +99,7 @@ cdef class KnowledgeBase:
|
|||
cdef inline int64_t c_add_entity(self, hash_t entity_hash, float prob,
|
||||
int32_t* vector_rows, int feats_row):
|
||||
"""Add an entry to the knowledge base."""
|
||||
# This is what we'll map the hash key to. It's where the entry will sit
|
||||
# This is what we'll map the entity hash key to. It's where the entry will sit
|
||||
# in the vector of entries, so we can get it later.
|
||||
cdef int64_t new_index = self._entries.size()
|
||||
self._entries.push_back(
|
||||
|
@ -114,6 +114,8 @@ cdef class KnowledgeBase:
|
|||
|
||||
cdef inline int64_t c_add_aliases(self, hash_t alias_hash, vector[int64_t] entry_indices, vector[float] probs):
|
||||
"""Connect a mention to a list of potential entities with their prior probabilities ."""
|
||||
# This is what we'll map the alias hash key to. It's where the alias will be defined
|
||||
# in the vector of aliases.
|
||||
cdef int64_t new_index = self._aliases_table.size()
|
||||
|
||||
self._aliases_table.push_back(
|
||||
|
@ -126,12 +128,14 @@ cdef class KnowledgeBase:
|
|||
|
||||
cdef inline _create_empty_vectors(self):
|
||||
"""
|
||||
Making sure the first element of each vector is a dummy,
|
||||
Initializing the vectors and making sure the first element of each vector is a dummy,
|
||||
because the PreshMap maps pointing to indices in these vectors can not contain 0 as value
|
||||
cf. https://github.com/explosion/preshed/issues/17
|
||||
"""
|
||||
cdef int32_t dummy_value = 0
|
||||
self.vocab.strings.add("")
|
||||
|
||||
self._entry_index = PreshMap()
|
||||
self._entries.push_back(
|
||||
_EntryC(
|
||||
entity_hash=self.vocab.strings[""],
|
||||
|
@ -139,6 +143,8 @@ cdef class KnowledgeBase:
|
|||
feats_row=dummy_value,
|
||||
prob=dummy_value
|
||||
))
|
||||
|
||||
self._alias_index = PreshMap()
|
||||
self._aliases_table.push_back(
|
||||
_AliasC(
|
||||
entry_indices=[dummy_value],
|
||||
|
|
13
spacy/kb.pyx
13
spacy/kb.pyx
|
@ -1,3 +1,4 @@
|
|||
# cython: infer_types=True
|
||||
# cython: profile=True
|
||||
# coding: utf8
|
||||
from spacy.errors import Errors, Warnings, user_warning
|
||||
|
@ -19,7 +20,7 @@ cdef class Candidate:
|
|||
@property
|
||||
def entity_(self):
|
||||
"""RETURNS (unicode): ID/name of this entity in the KB"""
|
||||
return self.kb.vocab.strings[self.entity]
|
||||
return self.kb.vocab.strings[self.entity_hash]
|
||||
|
||||
@property
|
||||
def alias(self):
|
||||
|
@ -29,7 +30,7 @@ cdef class Candidate:
|
|||
@property
|
||||
def alias_(self):
|
||||
"""RETURNS (unicode): ID of the original alias"""
|
||||
return self.kb.vocab.strings[self.alias]
|
||||
return self.kb.vocab.strings[self.alias_hash]
|
||||
|
||||
@property
|
||||
def prior_prob(self):
|
||||
|
@ -40,8 +41,6 @@ cdef class KnowledgeBase:
|
|||
|
||||
def __init__(self, Vocab vocab):
|
||||
self.vocab = vocab
|
||||
self._entry_index = PreshMap()
|
||||
self._alias_index = PreshMap()
|
||||
self.mem = Pool()
|
||||
self._create_empty_vectors()
|
||||
|
||||
|
@ -56,8 +55,8 @@ cdef class KnowledgeBase:
|
|||
|
||||
def add_entity(self, unicode entity, float prob=0.5, vectors=None, features=None):
|
||||
"""
|
||||
Add an entity to the KB.
|
||||
Return the hash of the entity ID at the end
|
||||
Add an entity to the KB, optionally specifying its log probability based on corpus frequency
|
||||
Return the hash of the entity ID/name at the end
|
||||
"""
|
||||
cdef hash_t entity_hash = self.vocab.strings.add(entity)
|
||||
|
||||
|
@ -98,8 +97,6 @@ cdef class KnowledgeBase:
|
|||
user_warning(Warnings.W017.format(alias=alias))
|
||||
return
|
||||
|
||||
cdef hash_t entity_hash
|
||||
|
||||
cdef vector[int64_t] entry_indices
|
||||
cdef vector[float] probs
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user