mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
Fix scorer bug for NER, related to ambiguity between missing annotations and misaligned tokens
This commit is contained in:
parent
3d0833c3df
commit
2611ac2a89
|
@ -272,8 +272,8 @@ cdef class GoldParse:
|
|||
self.words = [None] * len(doc)
|
||||
self.tags = [None] * len(doc)
|
||||
self.heads = [None] * len(doc)
|
||||
self.labels = [''] * len(doc)
|
||||
self.ner = ['-'] * len(doc)
|
||||
self.labels = [None] * len(doc)
|
||||
self.ner = [None] * len(doc)
|
||||
|
||||
self.cand_to_gold = align([t.orth_ for t in doc], words)
|
||||
self.gold_to_cand = align(words, [t.orth_ for t in doc])
|
||||
|
|
|
@ -87,7 +87,7 @@ class Scorer(object):
|
|||
gold_ents = set(tags_to_entities([annot[-1] for annot in gold.orig_annot]))
|
||||
for id_, word, tag, head, dep, ner in gold.orig_annot:
|
||||
gold_tags.add((id_, tag))
|
||||
if dep is not None and dep.lower() not in punct_labels:
|
||||
if dep not in (None, "") and dep.lower() not in punct_labels:
|
||||
gold_deps.add((id_, head, dep.lower()))
|
||||
cand_deps = set()
|
||||
cand_tags = set()
|
||||
|
|
|
@ -106,7 +106,7 @@ cdef class BiluoPushDown(TransitionSystem):
|
|||
self.freqs[ENT_TYPE][0] += 1
|
||||
|
||||
cdef Transition lookup_transition(self, object name) except *:
|
||||
if name == '-':
|
||||
if name == '-' or name == None:
|
||||
move_str = 'M'
|
||||
label = 0
|
||||
elif '-' in name:
|
||||
|
|
Loading…
Reference in New Issue
Block a user