mirror of
https://github.com/explosion/spaCy.git
synced 2025-03-12 07:15:48 +03:00
fix test for misaligned
This commit is contained in:
parent
a28f8f369e
commit
0702a1d3fb
|
@ -176,10 +176,10 @@ def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer):
|
||||||
spaces = [True, True, True, False, False]
|
spaces = [True, True, True, False, False]
|
||||||
doc = Doc(en_vocab, words=words, spaces=spaces)
|
doc = Doc(en_vocab, words=words, spaces=spaces)
|
||||||
entities = [(len("I flew to "), len("I flew to San Francisco Valley"), "LOC")]
|
entities = [(len("I flew to "), len("I flew to San Francisco Valley"), "LOC")]
|
||||||
gp = GoldParse(
|
gold_words = ["I", "flew to", "San", "Francisco Valley", "."]
|
||||||
doc, words=["I", "flew to", "San", "Francisco Valley", "."], entities=entities,
|
example = Example.from_dict(doc, {"words": gold_words, "entities": entities})
|
||||||
)
|
assert example.get_aligned("ENT_IOB") == [2, 2, 3, 1, 2]
|
||||||
assert gp.ner == ["O", "O", "B-LOC", "L-LOC", "O"]
|
assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "LOC", "LOC", ""]
|
||||||
|
|
||||||
# additional whitespace tokens in GoldParse words
|
# additional whitespace tokens in GoldParse words
|
||||||
words, spaces = get_words_and_spaces(
|
words, spaces = get_words_and_spaces(
|
||||||
|
|
Loading…
Reference in New Issue
Block a user