From 2611ac2a89a1b83c63b52c869bb6066d0089b1e4 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 16 Mar 2017 09:38:28 -0500 Subject: [PATCH] Fix scorer bug for NER, related to ambiguity between missing annotations and misaligned tokens --- spacy/gold.pyx | 4 ++-- spacy/scorer.py | 2 +- spacy/syntax/ner.pyx | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/spacy/gold.pyx b/spacy/gold.pyx index b12d2c09b..358412fab 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -272,8 +272,8 @@ cdef class GoldParse: self.words = [None] * len(doc) self.tags = [None] * len(doc) self.heads = [None] * len(doc) - self.labels = [''] * len(doc) - self.ner = ['-'] * len(doc) + self.labels = [None] * len(doc) + self.ner = [None] * len(doc) self.cand_to_gold = align([t.orth_ for t in doc], words) self.gold_to_cand = align(words, [t.orth_ for t in doc]) diff --git a/spacy/scorer.py b/spacy/scorer.py index 3f8d73e6a..f9265f373 100644 --- a/spacy/scorer.py +++ b/spacy/scorer.py @@ -87,7 +87,7 @@ class Scorer(object): gold_ents = set(tags_to_entities([annot[-1] for annot in gold.orig_annot])) for id_, word, tag, head, dep, ner in gold.orig_annot: gold_tags.add((id_, tag)) - if dep is not None and dep.lower() not in punct_labels: + if dep not in (None, "") and dep.lower() not in punct_labels: gold_deps.add((id_, head, dep.lower())) cand_deps = set() cand_tags = set() diff --git a/spacy/syntax/ner.pyx b/spacy/syntax/ner.pyx index dcd53f694..736cc0039 100644 --- a/spacy/syntax/ner.pyx +++ b/spacy/syntax/ner.pyx @@ -106,7 +106,7 @@ cdef class BiluoPushDown(TransitionSystem): self.freqs[ENT_TYPE][0] += 1 cdef Transition lookup_transition(self, object name) except *: - if name == '-': + if name == '-' or name == None: move_str = 'M' label = 0 elif '-' in name: