Fix setting empty entities in Example.from_dict (#8426)

This commit is contained in:
Adriane Boyd 2021-06-18 10:41:50 +02:00 committed by GitHub
parent 59da26ddad
commit 30d4eb506a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 22 additions and 1 deletions

View File

@ -182,6 +182,27 @@ def test_Example_from_dict_with_entities(annots):
assert example.reference[5].ent_type_ == "LOC" assert example.reference[5].ent_type_ == "LOC"
def test_Example_from_dict_with_empty_entities():
annots = {
"words": ["I", "like", "New", "York", "and", "Berlin", "."],
"entities": [],
}
vocab = Vocab()
predicted = Doc(vocab, words=annots["words"])
example = Example.from_dict(predicted, annots)
# entities as empty list sets everything to O
assert example.reference.has_annotation("ENT_IOB")
assert len(list(example.reference.ents)) == 0
assert all(token.ent_iob_ == "O" for token in example.reference)
# various unset/missing entities leaves entities unset
annots["entities"] = None
example = Example.from_dict(predicted, annots)
assert not example.reference.has_annotation("ENT_IOB")
annots.pop("entities", None)
example = Example.from_dict(predicted, annots)
assert not example.reference.has_annotation("ENT_IOB")
@pytest.mark.parametrize( @pytest.mark.parametrize(
"annots", "annots",
[ [

View File

@ -420,7 +420,7 @@ def _fix_legacy_dict_data(example_dict):
token_dict = example_dict.get("token_annotation", {}) token_dict = example_dict.get("token_annotation", {})
doc_dict = example_dict.get("doc_annotation", {}) doc_dict = example_dict.get("doc_annotation", {})
for key, value in example_dict.items(): for key, value in example_dict.items():
if value: if value is not None:
if key in ("token_annotation", "doc_annotation"): if key in ("token_annotation", "doc_annotation"):
pass pass
elif key == "ids": elif key == "ids":