mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
little fixes
This commit is contained in:
parent
6ae3b5699e
commit
61a33f55d2
12
spacy/kb.pxd
12
spacy/kb.pxd
|
@ -13,7 +13,7 @@ from .typedefs cimport hash_t
|
||||||
# of bits we need to keep track of the answers.
|
# of bits we need to keep track of the answers.
|
||||||
cdef struct _EntryC:
|
cdef struct _EntryC:
|
||||||
|
|
||||||
# The hash of this entry's unique ID and name in the kB
|
# The hash of this entry's unique ID/name in the kB
|
||||||
hash_t entity_hash
|
hash_t entity_hash
|
||||||
|
|
||||||
# Allows retrieval of one or more vectors.
|
# Allows retrieval of one or more vectors.
|
||||||
|
@ -99,7 +99,7 @@ cdef class KnowledgeBase:
|
||||||
cdef inline int64_t c_add_entity(self, hash_t entity_hash, float prob,
|
cdef inline int64_t c_add_entity(self, hash_t entity_hash, float prob,
|
||||||
int32_t* vector_rows, int feats_row):
|
int32_t* vector_rows, int feats_row):
|
||||||
"""Add an entry to the knowledge base."""
|
"""Add an entry to the knowledge base."""
|
||||||
# This is what we'll map the hash key to. It's where the entry will sit
|
# This is what we'll map the entity hash key to. It's where the entry will sit
|
||||||
# in the vector of entries, so we can get it later.
|
# in the vector of entries, so we can get it later.
|
||||||
cdef int64_t new_index = self._entries.size()
|
cdef int64_t new_index = self._entries.size()
|
||||||
self._entries.push_back(
|
self._entries.push_back(
|
||||||
|
@ -114,6 +114,8 @@ cdef class KnowledgeBase:
|
||||||
|
|
||||||
cdef inline int64_t c_add_aliases(self, hash_t alias_hash, vector[int64_t] entry_indices, vector[float] probs):
|
cdef inline int64_t c_add_aliases(self, hash_t alias_hash, vector[int64_t] entry_indices, vector[float] probs):
|
||||||
"""Connect a mention to a list of potential entities with their prior probabilities ."""
|
"""Connect a mention to a list of potential entities with their prior probabilities ."""
|
||||||
|
# This is what we'll map the alias hash key to. It's where the alias will be defined
|
||||||
|
# in the vector of aliases.
|
||||||
cdef int64_t new_index = self._aliases_table.size()
|
cdef int64_t new_index = self._aliases_table.size()
|
||||||
|
|
||||||
self._aliases_table.push_back(
|
self._aliases_table.push_back(
|
||||||
|
@ -126,12 +128,14 @@ cdef class KnowledgeBase:
|
||||||
|
|
||||||
cdef inline _create_empty_vectors(self):
|
cdef inline _create_empty_vectors(self):
|
||||||
"""
|
"""
|
||||||
Making sure the first element of each vector is a dummy,
|
Initializing the vectors and making sure the first element of each vector is a dummy,
|
||||||
because the PreshMap maps pointing to indices in these vectors can not contain 0 as value
|
because the PreshMap maps pointing to indices in these vectors can not contain 0 as value
|
||||||
cf. https://github.com/explosion/preshed/issues/17
|
cf. https://github.com/explosion/preshed/issues/17
|
||||||
"""
|
"""
|
||||||
cdef int32_t dummy_value = 0
|
cdef int32_t dummy_value = 0
|
||||||
self.vocab.strings.add("")
|
self.vocab.strings.add("")
|
||||||
|
|
||||||
|
self._entry_index = PreshMap()
|
||||||
self._entries.push_back(
|
self._entries.push_back(
|
||||||
_EntryC(
|
_EntryC(
|
||||||
entity_hash=self.vocab.strings[""],
|
entity_hash=self.vocab.strings[""],
|
||||||
|
@ -139,6 +143,8 @@ cdef class KnowledgeBase:
|
||||||
feats_row=dummy_value,
|
feats_row=dummy_value,
|
||||||
prob=dummy_value
|
prob=dummy_value
|
||||||
))
|
))
|
||||||
|
|
||||||
|
self._alias_index = PreshMap()
|
||||||
self._aliases_table.push_back(
|
self._aliases_table.push_back(
|
||||||
_AliasC(
|
_AliasC(
|
||||||
entry_indices=[dummy_value],
|
entry_indices=[dummy_value],
|
||||||
|
|
13
spacy/kb.pyx
13
spacy/kb.pyx
|
@ -1,3 +1,4 @@
|
||||||
|
# cython: infer_types=True
|
||||||
# cython: profile=True
|
# cython: profile=True
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
from spacy.errors import Errors, Warnings, user_warning
|
from spacy.errors import Errors, Warnings, user_warning
|
||||||
|
@ -19,7 +20,7 @@ cdef class Candidate:
|
||||||
@property
|
@property
|
||||||
def entity_(self):
|
def entity_(self):
|
||||||
"""RETURNS (unicode): ID/name of this entity in the KB"""
|
"""RETURNS (unicode): ID/name of this entity in the KB"""
|
||||||
return self.kb.vocab.strings[self.entity]
|
return self.kb.vocab.strings[self.entity_hash]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def alias(self):
|
def alias(self):
|
||||||
|
@ -29,7 +30,7 @@ cdef class Candidate:
|
||||||
@property
|
@property
|
||||||
def alias_(self):
|
def alias_(self):
|
||||||
"""RETURNS (unicode): ID of the original alias"""
|
"""RETURNS (unicode): ID of the original alias"""
|
||||||
return self.kb.vocab.strings[self.alias]
|
return self.kb.vocab.strings[self.alias_hash]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def prior_prob(self):
|
def prior_prob(self):
|
||||||
|
@ -40,8 +41,6 @@ cdef class KnowledgeBase:
|
||||||
|
|
||||||
def __init__(self, Vocab vocab):
|
def __init__(self, Vocab vocab):
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self._entry_index = PreshMap()
|
|
||||||
self._alias_index = PreshMap()
|
|
||||||
self.mem = Pool()
|
self.mem = Pool()
|
||||||
self._create_empty_vectors()
|
self._create_empty_vectors()
|
||||||
|
|
||||||
|
@ -56,8 +55,8 @@ cdef class KnowledgeBase:
|
||||||
|
|
||||||
def add_entity(self, unicode entity, float prob=0.5, vectors=None, features=None):
|
def add_entity(self, unicode entity, float prob=0.5, vectors=None, features=None):
|
||||||
"""
|
"""
|
||||||
Add an entity to the KB.
|
Add an entity to the KB, optionally specifying its log probability based on corpus frequency
|
||||||
Return the hash of the entity ID at the end
|
Return the hash of the entity ID/name at the end
|
||||||
"""
|
"""
|
||||||
cdef hash_t entity_hash = self.vocab.strings.add(entity)
|
cdef hash_t entity_hash = self.vocab.strings.add(entity)
|
||||||
|
|
||||||
|
@ -98,8 +97,6 @@ cdef class KnowledgeBase:
|
||||||
user_warning(Warnings.W017.format(alias=alias))
|
user_warning(Warnings.W017.format(alias=alias))
|
||||||
return
|
return
|
||||||
|
|
||||||
cdef hash_t entity_hash
|
|
||||||
|
|
||||||
cdef vector[int64_t] entry_indices
|
cdef vector[int64_t] entry_indices
|
||||||
cdef vector[float] probs
|
cdef vector[float] probs
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user