fix test for misaligned

This commit is contained in:
svlandeg 2020-06-15 23:10:47 +02:00
parent a28f8f369e
commit 0702a1d3fb

View File

@ -176,10 +176,10 @@ def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer):
spaces = [True, True, True, False, False]
doc = Doc(en_vocab, words=words, spaces=spaces)
entities = [(len("I flew to "), len("I flew to San Francisco Valley"), "LOC")]
gp = GoldParse(
doc, words=["I", "flew to", "San", "Francisco Valley", "."], entities=entities,
)
assert gp.ner == ["O", "O", "B-LOC", "L-LOC", "O"]
gold_words = ["I", "flew to", "San", "Francisco Valley", "."]
example = Example.from_dict(doc, {"words": gold_words, "entities": entities})
assert example.get_aligned("ENT_IOB") == [2, 2, 3, 1, 2]
assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "LOC", "LOC", ""]
# additional whitespace tokens in GoldParse words
words, spaces = get_words_and_spaces(