mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 18:56:36 +03:00
* Remove cruft from conll.pyx --- unused stuff about evlauation, which now lives in spacy.scorer
This commit is contained in:
parent
bfeb29ebd1
commit
acd1245ad4
|
@ -18,10 +18,12 @@ cdef class GoldParse:
|
||||||
cdef readonly list ents
|
cdef readonly list ents
|
||||||
cdef readonly dict brackets
|
cdef readonly dict brackets
|
||||||
|
|
||||||
|
cdef readonly list cand_to_gold
|
||||||
|
cdef readonly list gold_to_cand
|
||||||
|
cdef readonly list orig_annot
|
||||||
|
|
||||||
cdef int* c_tags
|
cdef int* c_tags
|
||||||
cdef int* c_heads
|
cdef int* c_heads
|
||||||
cdef int* c_labels
|
cdef int* c_labels
|
||||||
cdef int** c_brackets
|
cdef int** c_brackets
|
||||||
cdef Transition* c_ner
|
cdef Transition* c_ner
|
||||||
|
|
||||||
cdef int heads_correct(self, TokenC* tokens, bint score_punct=?) except -1
|
|
||||||
|
|
|
@ -162,18 +162,20 @@ cdef class GoldParse:
|
||||||
self.labels = [''] * len(tokens)
|
self.labels = [''] * len(tokens)
|
||||||
self.ner = ['-'] * len(tokens)
|
self.ner = ['-'] * len(tokens)
|
||||||
|
|
||||||
cand_to_gold = align([t.orth_ for t in tokens], annot_tuples[1])
|
self.cand_to_gold = align([t.orth_ for t in tokens], annot_tuples[1])
|
||||||
gold_to_cand = align(annot_tuples[1], [t.orth_ for t in tokens])
|
self.gold_to_cand = align(annot_tuples[1], [t.orth_ for t in tokens])
|
||||||
|
|
||||||
|
self.orig_annot = zip(*annot_tuples)
|
||||||
|
|
||||||
self.ents = []
|
self.ents = []
|
||||||
|
|
||||||
for i, gold_i in enumerate(cand_to_gold):
|
for i, gold_i in enumerate(self.cand_to_gold):
|
||||||
if gold_i is None:
|
if gold_i is None:
|
||||||
# TODO: What do we do for missing values again?
|
# TODO: What do we do for missing values again?
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
self.tags[i] = annot_tuples[2][gold_i]
|
self.tags[i] = annot_tuples[2][gold_i]
|
||||||
self.heads[i] = gold_to_cand[annot_tuples[3][gold_i]]
|
self.heads[i] = self.gold_to_cand[annot_tuples[3][gold_i]]
|
||||||
self.labels[i] = annot_tuples[4][gold_i]
|
self.labels[i] = annot_tuples[4][gold_i]
|
||||||
# TODO: Declare NER information MISSING if tokenization incorrect
|
# TODO: Declare NER information MISSING if tokenization incorrect
|
||||||
for start, end, label in self.ents:
|
for start, end, label in self.ents:
|
||||||
|
@ -187,8 +189,8 @@ cdef class GoldParse:
|
||||||
|
|
||||||
self.brackets = {}
|
self.brackets = {}
|
||||||
for (gold_start, gold_end, label_str) in brackets:
|
for (gold_start, gold_end, label_str) in brackets:
|
||||||
start = gold_to_cand[gold_start]
|
start = self.gold_to_cand[gold_start]
|
||||||
end = gold_to_cand[gold_end]
|
end = self.gold_to_cand[gold_end]
|
||||||
if start is not None and end is not None:
|
if start is not None and end is not None:
|
||||||
self.brackets.setdefault(start, {}).setdefault(end, set())
|
self.brackets.setdefault(start, {}).setdefault(end, set())
|
||||||
self.brackets[end][start].add(label)
|
self.brackets[end][start].add(label)
|
||||||
|
@ -196,33 +198,6 @@ cdef class GoldParse:
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return self.length
|
return self.length
|
||||||
|
|
||||||
@property
|
|
||||||
def n_non_punct(self):
|
|
||||||
return len([l for l in self.labels if l not in ('P', 'punct')])
|
|
||||||
|
|
||||||
cdef int heads_correct(self, TokenC* tokens, bint score_punct=False) except -1:
|
|
||||||
n = 0
|
|
||||||
for i in range(self.length):
|
|
||||||
if not score_punct and self.labels_[i] not in ('P', 'punct'):
|
|
||||||
continue
|
|
||||||
if self.heads[i] == -1:
|
|
||||||
continue
|
|
||||||
n += (i + tokens[i].head) == self.heads[i]
|
|
||||||
return n
|
|
||||||
|
|
||||||
def is_correct(self, i, head):
|
|
||||||
return head == self.c_heads[i]
|
|
||||||
|
|
||||||
|
|
||||||
def is_punct_label(label):
|
def is_punct_label(label):
|
||||||
return label == 'P' or label.lower() == 'punct'
|
return label == 'P' or label.lower() == 'punct'
|
||||||
|
|
||||||
|
|
||||||
def _map_indices_to_tokens(ids, heads):
|
|
||||||
mapped = []
|
|
||||||
for head in heads:
|
|
||||||
if head not in ids:
|
|
||||||
mapped.append(None)
|
|
||||||
else:
|
|
||||||
mapped.append(ids.index(head))
|
|
||||||
return mapped
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user