diff --git a/spacy/gold.pyx b/spacy/gold.pyx index 37d092395..a9156c1a5 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -834,7 +834,7 @@ cdef class Example: if merge: t = self.token_annotation doc = self.doc - if not self.doc: + if self.doc is None: if not vocab: raise ValueError(Errors.E998) doc = Doc(vocab, words=t.words) @@ -993,7 +993,10 @@ cdef class GoldParse: self.links = {} if links is None else dict(links) # avoid allocating memory if the doc does not contain any tokens - if self.length > 0: + if self.length == 0: + # set a minimal orig so that the scorer can score an empty doc + self.orig = TokenAnnotation(ids=[]) + else: if not words: words = [token.text for token in doc] if not tags: diff --git a/spacy/tests/regression/test_issue4924.py b/spacy/tests/regression/test_issue4924.py index 1eb6afcf0..b240f6d4a 100644 --- a/spacy/tests/regression/test_issue4924.py +++ b/spacy/tests/regression/test_issue4924.py @@ -5,5 +5,4 @@ from spacy.language import Language def test_issue4924(): nlp = Language() docs_golds = [("", {})] - with pytest.raises(ValueError): - nlp.evaluate(docs_golds) + nlp.evaluate(docs_golds) diff --git a/spacy/tests/test_gold.py b/spacy/tests/test_gold.py index 7fe8aab73..0754fb5bc 100644 --- a/spacy/tests/test_gold.py +++ b/spacy/tests/test_gold.py @@ -480,3 +480,10 @@ def test_tuples_to_example(merged_dict): assert ex_dict["token_annotation"]["tags"] == merged_dict["tags"] assert ex_dict["token_annotation"]["sent_starts"] == merged_dict["sent_starts"] assert ex_dict["doc_annotation"]["cats"] == cats + + +def test_empty_example_goldparse(): + nlp = English() + doc = nlp("") + example = Example(doc=doc) + assert len(example.get_gold_parses()) == 1