From 0c6f1f38910de791233faaeddb9f7455ffbac779 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Thu, 18 Jun 2020 13:00:03 +0200 Subject: [PATCH] fix BiluoPushDown parsing entities --- spacy/gold/example.pyx | 2 +- spacy/syntax/ner.pyx | 5 ++--- spacy/tests/regression/test_issue1501-2000.py | 3 +-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/spacy/gold/example.pyx b/spacy/gold/example.pyx index 8dfeffe98..663c8cc6d 100644 --- a/spacy/gold/example.pyx +++ b/spacy/gold/example.pyx @@ -117,7 +117,7 @@ cdef class Example: i = j2i_multi[j] if output[i] is None: output[i] = gold_values[j] - if as_string: + if as_string and field not in ["ENT_IOB"]: output = [vocab.strings[o] if o is not None else o for o in output] return output diff --git a/spacy/syntax/ner.pyx b/spacy/syntax/ner.pyx index a79a784be..31f89ef88 100644 --- a/spacy/syntax/ner.pyx +++ b/spacy/syntax/ner.pyx @@ -72,11 +72,10 @@ cdef class BiluoPushDown(TransitionSystem): actions[action][entity_type] = 1 moves = ('M', 'B', 'I', 'L', 'U') for example in kwargs.get('gold_parses', []): - for i, ner_tag in enumerate(example.token_annotation.entities): + for ner_tag in example.get_aligned("ENT_TYPE", as_string=True): if ner_tag != 'O' and ner_tag != '-': - _, label = ner_tag.split('-', 1) for action in (BEGIN, IN, LAST, UNIT): - actions[action][label] += 1 + actions[action][ner_tag] += 1 return actions @property diff --git a/spacy/tests/regression/test_issue1501-2000.py b/spacy/tests/regression/test_issue1501-2000.py index bb89be711..129c00d99 100644 --- a/spacy/tests/regression/test_issue1501-2000.py +++ b/spacy/tests/regression/test_issue1501-2000.py @@ -268,7 +268,6 @@ def test_issue1963(en_tokenizer): assert doc.tensor.shape == (3, 128) -# TODO: fix @pytest.mark.parametrize("label", ["U-JOB-NAME"]) def test_issue1967(label): config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0} @@ -284,7 +283,7 @@ def test_issue1967(label): "entities": [label] } ) - ner.moves.get_actions(gold_parses=[example]) + assert "JOB-NAME" in ner.moves.get_actions(gold_parses=[example])[1] def test_issue1971(en_vocab):