mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-13 05:07:03 +03:00
enable nogil for cython functions in kb.pxd
This commit is contained in:
parent
61a33f55d2
commit
9a7d534b1b
|
@ -28,7 +28,7 @@ def create_kb(vocab):
|
||||||
print()
|
print()
|
||||||
alias_0 = "Douglas"
|
alias_0 = "Douglas"
|
||||||
print("adding alias", alias_0)
|
print("adding alias", alias_0)
|
||||||
kb.add_alias(alias=alias_0, entities=[entity_0, entity_1, entity_2], probabilities=[0.1, 0.6, 0.2])
|
kb.add_alias(alias=alias_0, entities=[entity_0, entity_1, entity_2], probabilities=[0.6, 0.1, 0.2])
|
||||||
|
|
||||||
alias_1 = "Douglas Adams"
|
alias_1 = "Douglas Adams"
|
||||||
print("adding alias", alias_1)
|
print("adding alias", alias_1)
|
||||||
|
|
74
spacy/kb.pxd
74
spacy/kb.pxd
|
@ -97,58 +97,64 @@ cdef class KnowledgeBase:
|
||||||
cdef object _features_table
|
cdef object _features_table
|
||||||
|
|
||||||
cdef inline int64_t c_add_entity(self, hash_t entity_hash, float prob,
|
cdef inline int64_t c_add_entity(self, hash_t entity_hash, float prob,
|
||||||
int32_t* vector_rows, int feats_row):
|
int32_t* vector_rows, int feats_row) nogil:
|
||||||
"""Add an entry to the knowledge base."""
|
"""Add an entry to the vector of entries.
|
||||||
|
After calling this method, make sure to update also the _entry_index using the return value"""
|
||||||
# This is what we'll map the entity hash key to. It's where the entry will sit
|
# This is what we'll map the entity hash key to. It's where the entry will sit
|
||||||
# in the vector of entries, so we can get it later.
|
# in the vector of entries, so we can get it later.
|
||||||
cdef int64_t new_index = self._entries.size()
|
cdef int64_t new_index = self._entries.size()
|
||||||
self._entries.push_back(
|
|
||||||
_EntryC(
|
# Avoid struct initializer to enable nogil, cf https://github.com/cython/cython/issues/1642
|
||||||
entity_hash=entity_hash,
|
cdef _EntryC entry
|
||||||
vector_rows=vector_rows,
|
entry.entity_hash = entity_hash
|
||||||
feats_row=feats_row,
|
entry.vector_rows = vector_rows
|
||||||
prob=prob
|
entry.feats_row = feats_row
|
||||||
))
|
entry.prob = prob
|
||||||
self._entry_index[entity_hash] = new_index
|
|
||||||
|
self._entries.push_back(entry)
|
||||||
return new_index
|
return new_index
|
||||||
|
|
||||||
cdef inline int64_t c_add_aliases(self, hash_t alias_hash, vector[int64_t] entry_indices, vector[float] probs):
|
cdef inline int64_t c_add_aliases(self, hash_t alias_hash, vector[int64_t] entry_indices, vector[float] probs) nogil:
|
||||||
"""Connect a mention to a list of potential entities with their prior probabilities ."""
|
"""Connect a mention to a list of potential entities with their prior probabilities .
|
||||||
|
After calling this method, make sure to update also the _alias_index using the return value"""
|
||||||
# This is what we'll map the alias hash key to. It's where the alias will be defined
|
# This is what we'll map the alias hash key to. It's where the alias will be defined
|
||||||
# in the vector of aliases.
|
# in the vector of aliases.
|
||||||
cdef int64_t new_index = self._aliases_table.size()
|
cdef int64_t new_index = self._aliases_table.size()
|
||||||
|
|
||||||
self._aliases_table.push_back(
|
# Avoid struct initializer to enable nogil
|
||||||
_AliasC(
|
cdef _AliasC alias
|
||||||
entry_indices=entry_indices,
|
alias.entry_indices = entry_indices
|
||||||
probs=probs
|
alias.probs = probs
|
||||||
))
|
|
||||||
self._alias_index[alias_hash] = new_index
|
self._aliases_table.push_back(alias)
|
||||||
return new_index
|
return new_index
|
||||||
|
|
||||||
cdef inline _create_empty_vectors(self):
|
cdef inline void _create_empty_vectors(self, hash_t dummy_hash) nogil:
|
||||||
"""
|
"""
|
||||||
Initializing the vectors and making sure the first element of each vector is a dummy,
|
Initializing the vectors and making sure the first element of each vector is a dummy,
|
||||||
because the PreshMap maps pointing to indices in these vectors can not contain 0 as value
|
because the PreshMap maps pointing to indices in these vectors can not contain 0 as value
|
||||||
cf. https://github.com/explosion/preshed/issues/17
|
cf. https://github.com/explosion/preshed/issues/17
|
||||||
"""
|
"""
|
||||||
cdef int32_t dummy_value = 0
|
cdef int32_t dummy_value = 0
|
||||||
self.vocab.strings.add("")
|
|
||||||
|
|
||||||
self._entry_index = PreshMap()
|
# Avoid struct initializer to enable nogil
|
||||||
self._entries.push_back(
|
cdef _EntryC entry
|
||||||
_EntryC(
|
entry.entity_hash = dummy_hash
|
||||||
entity_hash=self.vocab.strings[""],
|
entry.vector_rows = &dummy_value
|
||||||
vector_rows=&dummy_value,
|
entry.feats_row = dummy_value
|
||||||
feats_row=dummy_value,
|
entry.prob = dummy_value
|
||||||
prob=dummy_value
|
|
||||||
))
|
|
||||||
|
|
||||||
self._alias_index = PreshMap()
|
# Avoid struct initializer to enable nogil
|
||||||
self._aliases_table.push_back(
|
cdef vector[int64_t] dummy_entry_indices
|
||||||
_AliasC(
|
dummy_entry_indices.push_back(0)
|
||||||
entry_indices=[dummy_value],
|
cdef vector[float] dummy_probs
|
||||||
probs=[dummy_value]
|
dummy_probs.push_back(0)
|
||||||
))
|
|
||||||
|
cdef _AliasC alias
|
||||||
|
alias.entry_indices = dummy_entry_indices
|
||||||
|
alias.probs = dummy_probs
|
||||||
|
|
||||||
|
self._entries.push_back(entry)
|
||||||
|
self._aliases_table.push_back(alias)
|
||||||
|
|
||||||
|
|
||||||
|
|
15
spacy/kb.pyx
15
spacy/kb.pyx
|
@ -42,7 +42,11 @@ cdef class KnowledgeBase:
|
||||||
def __init__(self, Vocab vocab):
|
def __init__(self, Vocab vocab):
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self.mem = Pool()
|
self.mem = Pool()
|
||||||
self._create_empty_vectors()
|
self._entry_index = PreshMap()
|
||||||
|
self._alias_index = PreshMap()
|
||||||
|
|
||||||
|
self.vocab.strings.add("")
|
||||||
|
self._create_empty_vectors(dummy_hash=self.vocab.strings[""])
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return self.get_size_entities()
|
return self.get_size_entities()
|
||||||
|
@ -66,8 +70,10 @@ cdef class KnowledgeBase:
|
||||||
return
|
return
|
||||||
|
|
||||||
cdef int32_t dummy_value = 342
|
cdef int32_t dummy_value = 342
|
||||||
self.c_add_entity(entity_hash=entity_hash, prob=prob,
|
new_index = self.c_add_entity(entity_hash=entity_hash, prob=prob,
|
||||||
vector_rows=&dummy_value, feats_row=dummy_value)
|
vector_rows=&dummy_value, feats_row=dummy_value)
|
||||||
|
self._entry_index[entity_hash] = new_index
|
||||||
|
|
||||||
# TODO self._vectors_table.get_pointer(vectors),
|
# TODO self._vectors_table.get_pointer(vectors),
|
||||||
# self._features_table.get(features))
|
# self._features_table.get(features))
|
||||||
|
|
||||||
|
@ -109,7 +115,8 @@ cdef class KnowledgeBase:
|
||||||
entry_indices.push_back(int(entry_index))
|
entry_indices.push_back(int(entry_index))
|
||||||
probs.push_back(float(prob))
|
probs.push_back(float(prob))
|
||||||
|
|
||||||
self.c_add_aliases(alias_hash=alias_hash, entry_indices=entry_indices, probs=probs)
|
new_index = self.c_add_aliases(alias_hash=alias_hash, entry_indices=entry_indices, probs=probs)
|
||||||
|
self._alias_index[alias_hash] = new_index
|
||||||
|
|
||||||
return alias_hash
|
return alias_hash
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user