mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
rename to KBEntryC
This commit is contained in:
parent
1de61f68d6
commit
dbc53b9870
|
@ -61,23 +61,23 @@ def run_pipeline():
|
|||
to_create_kb = False
|
||||
|
||||
# read KB back in from file
|
||||
to_read_kb = False
|
||||
to_read_kb = True
|
||||
to_test_kb = False
|
||||
|
||||
# create training dataset
|
||||
create_wp_training = False
|
||||
|
||||
# train the EL pipe
|
||||
train_pipe = False
|
||||
measure_performance = False
|
||||
train_pipe = True
|
||||
measure_performance = True
|
||||
|
||||
# test the EL pipe on a simple example
|
||||
to_test_pipeline = False
|
||||
to_test_pipeline = True
|
||||
|
||||
# write the NLP object, read back in and test again
|
||||
to_write_nlp = False
|
||||
to_write_nlp = True
|
||||
to_read_nlp = False
|
||||
test_from_file = True
|
||||
test_from_file = False
|
||||
|
||||
# STEP 1 : create prior probabilities from WP (run only once)
|
||||
if to_create_prior_probs:
|
||||
|
@ -149,7 +149,7 @@ def run_pipeline():
|
|||
print("STEP 6: training Entity Linking pipe", datetime.datetime.now())
|
||||
# define the size (nr of entities) of training and dev set
|
||||
train_limit = 5000
|
||||
dev_limit = 10000
|
||||
dev_limit = 5000
|
||||
|
||||
train_data = training_set_creator.read_training(nlp=nlp_2,
|
||||
training_dir=TRAINING_DIR,
|
||||
|
@ -285,9 +285,7 @@ def _measure_accuracy(data, el_pipe=None):
|
|||
|
||||
docs = [d for d, g in data if len(d) > 0]
|
||||
if el_pipe is not None:
|
||||
print("applying el_pipe", datetime.datetime.now())
|
||||
docs = list(el_pipe.pipe(docs, batch_size=10000000000))
|
||||
print("done applying el_pipe", datetime.datetime.now())
|
||||
docs = list(el_pipe.pipe(docs))
|
||||
golds = [g for d, g in data if len(d) > 0]
|
||||
|
||||
for doc, gold in zip(docs, golds):
|
||||
|
|
10
spacy/kb.pxd
10
spacy/kb.pxd
|
@ -9,8 +9,8 @@ from libc.stdio cimport FILE
|
|||
from spacy.vocab cimport Vocab
|
||||
from .typedefs cimport hash_t
|
||||
|
||||
from .structs cimport EntryC, AliasC
|
||||
ctypedef vector[EntryC] entry_vec
|
||||
from .structs cimport KBEntryC, AliasC
|
||||
ctypedef vector[KBEntryC] entry_vec
|
||||
ctypedef vector[AliasC] alias_vec
|
||||
ctypedef vector[float] float_vec
|
||||
ctypedef vector[float_vec] float_matrix
|
||||
|
@ -32,7 +32,7 @@ cdef class KnowledgeBase:
|
|||
cdef int64_t entity_vector_length
|
||||
|
||||
# This maps 64bit keys (hash of unique entity string)
|
||||
# to 64bit values (position of the _EntryC struct in the _entries vector).
|
||||
# to 64bit values (position of the _KBEntryC struct in the _entries vector).
|
||||
# The PreshMap is pretty space efficient, as it uses open addressing. So
|
||||
# the only overhead is the vacancy rate, which is approximately 30%.
|
||||
cdef PreshMap _entry_index
|
||||
|
@ -88,7 +88,7 @@ cdef class KnowledgeBase:
|
|||
cdef int64_t new_index = self._entries.size()
|
||||
|
||||
# Avoid struct initializer to enable nogil, cf https://github.com/cython/cython/issues/1642
|
||||
cdef EntryC entry
|
||||
cdef KBEntryC entry
|
||||
entry.entity_hash = entity_hash
|
||||
entry.vector_index = vector_index
|
||||
entry.feats_row = feats_row
|
||||
|
@ -121,7 +121,7 @@ cdef class KnowledgeBase:
|
|||
cdef int32_t dummy_value = 0
|
||||
|
||||
# Avoid struct initializer to enable nogil
|
||||
cdef EntryC entry
|
||||
cdef KBEntryC entry
|
||||
entry.entity_hash = dummy_hash
|
||||
entry.vector_index = dummy_value
|
||||
entry.feats_row = dummy_value
|
||||
|
|
|
@ -129,7 +129,7 @@ cdef class KnowledgeBase:
|
|||
self._entries = entry_vec(nr_entities+1)
|
||||
|
||||
i = 0
|
||||
cdef EntryC entry
|
||||
cdef KBEntryC entry
|
||||
while i < nr_entities:
|
||||
entity_vector = vector_list[i]
|
||||
if len(entity_vector) != self.entity_vector_length:
|
||||
|
@ -250,7 +250,7 @@ cdef class KnowledgeBase:
|
|||
cdef int64_t entry_index
|
||||
cdef float prob
|
||||
cdef int32_t vector_index
|
||||
cdef EntryC entry
|
||||
cdef KBEntryC entry
|
||||
cdef AliasC alias
|
||||
cdef float vector_element
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@ cdef struct TokenC:
|
|||
|
||||
|
||||
# Internal struct, for storage and disambiguation of entities.
|
||||
cdef struct EntryC:
|
||||
cdef struct KBEntryC:
|
||||
|
||||
# The hash of this entry's unique ID/name in the kB
|
||||
hash_t entity_hash
|
||||
|
|
Loading…
Reference in New Issue
Block a user