From 0702a1d3fba15176c51125d07996390eed5fe19b Mon Sep 17 00:00:00 2001 From: svlandeg Date: Mon, 15 Jun 2020 23:10:47 +0200 Subject: [PATCH] fix test for misaligned --- spacy/tests/test_gold.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/spacy/tests/test_gold.py b/spacy/tests/test_gold.py index f604f4b53..f7d7d70bb 100644 --- a/spacy/tests/test_gold.py +++ b/spacy/tests/test_gold.py @@ -176,10 +176,10 @@ def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer): spaces = [True, True, True, False, False] doc = Doc(en_vocab, words=words, spaces=spaces) entities = [(len("I flew to "), len("I flew to San Francisco Valley"), "LOC")] - gp = GoldParse( - doc, words=["I", "flew to", "San", "Francisco Valley", "."], entities=entities, - ) - assert gp.ner == ["O", "O", "B-LOC", "L-LOC", "O"] + gold_words = ["I", "flew to", "San", "Francisco Valley", "."] + example = Example.from_dict(doc, {"words": gold_words, "entities": entities}) + assert example.get_aligned("ENT_IOB") == [2, 2, 3, 1, 2] + assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "LOC", "LOC", ""] # additional whitespace tokens in GoldParse words words, spaces = get_words_and_spaces(