mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Fix scorer bug for NER, related to ambiguity between missing annotations and misaligned tokens
This commit is contained in:
parent
3d0833c3df
commit
2611ac2a89
|
@ -272,8 +272,8 @@ cdef class GoldParse:
|
||||||
self.words = [None] * len(doc)
|
self.words = [None] * len(doc)
|
||||||
self.tags = [None] * len(doc)
|
self.tags = [None] * len(doc)
|
||||||
self.heads = [None] * len(doc)
|
self.heads = [None] * len(doc)
|
||||||
self.labels = [''] * len(doc)
|
self.labels = [None] * len(doc)
|
||||||
self.ner = ['-'] * len(doc)
|
self.ner = [None] * len(doc)
|
||||||
|
|
||||||
self.cand_to_gold = align([t.orth_ for t in doc], words)
|
self.cand_to_gold = align([t.orth_ for t in doc], words)
|
||||||
self.gold_to_cand = align(words, [t.orth_ for t in doc])
|
self.gold_to_cand = align(words, [t.orth_ for t in doc])
|
||||||
|
|
|
@ -87,7 +87,7 @@ class Scorer(object):
|
||||||
gold_ents = set(tags_to_entities([annot[-1] for annot in gold.orig_annot]))
|
gold_ents = set(tags_to_entities([annot[-1] for annot in gold.orig_annot]))
|
||||||
for id_, word, tag, head, dep, ner in gold.orig_annot:
|
for id_, word, tag, head, dep, ner in gold.orig_annot:
|
||||||
gold_tags.add((id_, tag))
|
gold_tags.add((id_, tag))
|
||||||
if dep is not None and dep.lower() not in punct_labels:
|
if dep not in (None, "") and dep.lower() not in punct_labels:
|
||||||
gold_deps.add((id_, head, dep.lower()))
|
gold_deps.add((id_, head, dep.lower()))
|
||||||
cand_deps = set()
|
cand_deps = set()
|
||||||
cand_tags = set()
|
cand_tags = set()
|
||||||
|
|
|
@ -106,7 +106,7 @@ cdef class BiluoPushDown(TransitionSystem):
|
||||||
self.freqs[ENT_TYPE][0] += 1
|
self.freqs[ENT_TYPE][0] += 1
|
||||||
|
|
||||||
cdef Transition lookup_transition(self, object name) except *:
|
cdef Transition lookup_transition(self, object name) except *:
|
||||||
if name == '-':
|
if name == '-' or name == None:
|
||||||
move_str = 'M'
|
move_str = 'M'
|
||||||
label = 0
|
label = 0
|
||||||
elif '-' in name:
|
elif '-' in name:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user