fix BiluoPushDown parsing entities

2025-07-06 21:03:07 +03:00 · 2020-06-18 13:00:03 +02:00 · 2020-06-18 13:00:03 +02:00 · 0c6f1f3891
commit 0c6f1f3891
parent cd790aaa2a
3 changed files with 4 additions and 6 deletions
--- a/spacy/gold/example.pyx
+++ b/spacy/gold/example.pyx
@ -117,7 +117,7 @@ cdef class Example:
                        i = j2i_multi[j]
                        if output[i] is None:
                            output[i] = gold_values[j]
-        if as_string:
+        if as_string and field not in ["ENT_IOB"]:
            output = [vocab.strings[o] if o is not None else o for o in output]
        return output
--- a/spacy/syntax/ner.pyx
+++ b/spacy/syntax/ner.pyx
@ -72,11 +72,10 @@ cdef class BiluoPushDown(TransitionSystem):
                actions[action][entity_type] = 1
        moves = ('M', 'B', 'I', 'L', 'U')
        for example in kwargs.get('gold_parses', []):
-            for i, ner_tag in enumerate(example.token_annotation.entities):
+            for ner_tag in example.get_aligned("ENT_TYPE", as_string=True):
                if ner_tag != 'O' and ner_tag != '-':
                    _, label = ner_tag.split('-', 1)
                    for action in (BEGIN, IN, LAST, UNIT):
-                        actions[action][label] += 1
+                        actions[action][ner_tag] += 1
        return actions
    @property
--- a/spacy/tests/regression/test_issue1501-2000.py
+++ b/spacy/tests/regression/test_issue1501-2000.py
@ -268,7 +268,6 @@ def test_issue1963(en_tokenizer):
    assert doc.tensor.shape == (3, 128)
 # TODO: fix
@pytest.mark.parametrize("label", ["U-JOB-NAME"])
 def test_issue1967(label):
    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
@ -284,7 +283,7 @@ def test_issue1967(label):
            "entities": [label]
        }
    )
-    ner.moves.get_actions(gold_parses=[example])
+    assert "JOB-NAME" in ner.moves.get_actions(gold_parses=[example])[1]
 def test_issue1971(en_vocab):