mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	Check whether doc is instantiated in Example.get_gold_parses() (#5167)
* Check whether doc is instantiated When creating docs to pair with gold parses, modify test to check whether a doc is unset rather than whether it contains tokens. * Restore test of evaluate on an empty doc * Set a minimal gold.orig for the scorer Without a minimal gold.orig the scorer can't evaluate empty docs. This is the v3 equivalent of #4925.
This commit is contained in:
		
							parent
							
								
									d6d95674c1
								
							
						
					
					
						commit
						ce0e538068
					
				|  | @ -834,7 +834,7 @@ cdef class Example: | |||
|         if merge: | ||||
|             t = self.token_annotation | ||||
|             doc = self.doc | ||||
|             if not self.doc: | ||||
|             if self.doc is None: | ||||
|                 if not vocab: | ||||
|                     raise ValueError(Errors.E998) | ||||
|                 doc = Doc(vocab, words=t.words) | ||||
|  | @ -993,7 +993,10 @@ cdef class GoldParse: | |||
|         self.links = {} if links is None else dict(links) | ||||
| 
 | ||||
|         # avoid allocating memory if the doc does not contain any tokens | ||||
|         if self.length > 0: | ||||
|         if self.length == 0: | ||||
|             # set a minimal orig so that the scorer can score an empty doc | ||||
|             self.orig = TokenAnnotation(ids=[]) | ||||
|         else: | ||||
|             if not words: | ||||
|                 words = [token.text for token in doc] | ||||
|             if not tags: | ||||
|  |  | |||
|  | @ -5,5 +5,4 @@ from spacy.language import Language | |||
| def test_issue4924(): | ||||
|     nlp = Language() | ||||
|     docs_golds = [("", {})] | ||||
|     with pytest.raises(ValueError): | ||||
|         nlp.evaluate(docs_golds) | ||||
|     nlp.evaluate(docs_golds) | ||||
|  |  | |||
|  | @ -480,3 +480,10 @@ def test_tuples_to_example(merged_dict): | |||
|     assert ex_dict["token_annotation"]["tags"] == merged_dict["tags"] | ||||
|     assert ex_dict["token_annotation"]["sent_starts"] == merged_dict["sent_starts"] | ||||
|     assert ex_dict["doc_annotation"]["cats"] == cats | ||||
| 
 | ||||
| 
 | ||||
| def test_empty_example_goldparse(): | ||||
|     nlp = English() | ||||
|     doc = nlp("") | ||||
|     example = Example(doc=doc) | ||||
|     assert len(example.get_gold_parses()) == 1 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user