mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	adding tests for new example class (some still failing - WIP)
This commit is contained in:
		
							parent
							
								
									488727aee0
								
							
						
					
					
						commit
						6a67a11682
					
				|  | @ -146,6 +146,8 @@ def _fix_legacy_dict_data(predicted, example_dict): | |||
|             ent_iobs, ent_types = _parse_ner_tags(predicted, words, value) | ||||
|             token_dict["ENT_IOB"] = ent_iobs | ||||
|             token_dict["ENT_TYPE"] = ent_types | ||||
|         else: | ||||
|             raise ValueError(f"Unknown attr: {key}") | ||||
|     return { | ||||
|         "token_annotation": token_dict, | ||||
|         "doc_annotation": doc_dict | ||||
|  |  | |||
|  | @ -531,4 +531,4 @@ def test_tuples_to_example(vocab, merged_dict): | |||
|     assert tags == merged_dict["tags"] | ||||
|     sent_starts = [token.is_sent_start for token in ex.reference] | ||||
|     assert sent_starts == [bool(v) for v in merged_dict["sent_starts"]] | ||||
|     example.reference.cats == cats | ||||
|     ex.reference.cats == cats | ||||
|  |  | |||
|  | @ -14,21 +14,25 @@ def test_Example_init_requires_doc_objects(): | |||
|         eg = Example(None, Doc(vocab, words=["hi"])) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| def test_Example_from_dict_basic(): | ||||
|     eg = Example.from_dict( | ||||
|         Doc(Vocab(), words=["hello", "world"]), | ||||
|         { | ||||
|             "words": ["hello", "world"] | ||||
|         } | ||||
|         Doc(Vocab(), words=["hello", "world"]), {"words": ["hello", "world"]} | ||||
|     ) | ||||
|     assert isinstance(eg.x, Doc) | ||||
|     assert isinstance(eg.y, Doc) | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize("annots", [ | ||||
|     {"words": ["ice", "cream"], "tags": ["NN", "NN"]}, | ||||
| ]) | ||||
| @pytest.mark.parametrize( | ||||
|     "annots", [{"words": ["ice", "cream"], "weirdannots": ["something", "such"]}] | ||||
| ) | ||||
| def test_Example_from_dict_invalid(annots): | ||||
|     vocab = Vocab() | ||||
|     predicted = Doc(vocab, words=annots["words"]) | ||||
|     with pytest.raises(ValueError): | ||||
|         eg = Example.from_dict(predicted, annots) | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize("annots", [{"words": ["ice", "cream"], "tags": ["NN", "NN"]}]) | ||||
| def test_Example_from_dict_with_tags(annots): | ||||
|     vocab = Vocab() | ||||
|     predicted = Doc(vocab, words=annots["words"]) | ||||
|  | @ -37,28 +41,117 @@ def test_Example_from_dict_with_tags(annots): | |||
|         assert token.tag_ == annots["tags"][i] | ||||
| 
 | ||||
| 
 | ||||
| """ | ||||
| @pytest.mark.xfail(reason="TODO - fix") | ||||
| @pytest.mark.parametrize( | ||||
|     "annots", | ||||
|     [ | ||||
|         { | ||||
|             "words": ["I", "like", "London", "and", "Berlin", "."], | ||||
|             "entities": [(7, 13, "LOC"), (18, 24, "LOC")], | ||||
|         } | ||||
|     ], | ||||
| ) | ||||
| def test_Example_from_dict_with_entities(annots): | ||||
|     # TODO | ||||
|     pass | ||||
|     vocab = Vocab() | ||||
|     predicted = Doc(vocab, words=annots["words"]) | ||||
|     eg = Example.from_dict(predicted, annots) | ||||
|     assert len(list(eg.reference.ents)) == 2 | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize( | ||||
|     "annots", | ||||
|     [ | ||||
|         { | ||||
|             "words": ["I", "like", "London", "and", "Berlin", "."], | ||||
|             "deps": ["nsubj", "ROOT", "dobj", "cc", "conj", "punct"], | ||||
|             "heads": [1, 1, 1, 2, 2, 1], | ||||
|         } | ||||
|     ], | ||||
| ) | ||||
| def test_Example_from_dict_with_parse(annots): | ||||
|     # TODO | ||||
|     pass | ||||
|     vocab = Vocab() | ||||
|     predicted = Doc(vocab, words=annots["words"]) | ||||
|     eg = Example.from_dict(predicted, annots) | ||||
|     for i, token in enumerate(eg.reference): | ||||
|         assert token.dep_ == annots["deps"][i] | ||||
|         assert token.head.i == annots["heads"][i] | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.xfail(reason="TODO - fix") | ||||
| @pytest.mark.parametrize( | ||||
|     "annots", | ||||
|     [ | ||||
|         { | ||||
|             "words": ["Sarah", "'s", "sister", "flew"], | ||||
|             "morphs": [ | ||||
|                 "NounType=prop|Number=sing", | ||||
|                 "Poss=yes", | ||||
|                 "Number=sing", | ||||
|                 "Tense=past|VerbForm=fin", | ||||
|             ], | ||||
|         } | ||||
|     ], | ||||
| ) | ||||
| def test_Example_from_dict_with_morphology(annots): | ||||
|     # TODO | ||||
|     pass | ||||
|     vocab = Vocab() | ||||
|     predicted = Doc(vocab, words=annots["words"]) | ||||
|     eg = Example.from_dict(predicted, annots) | ||||
|     for i, token in enumerate(eg.reference): | ||||
|         assert token.morph_ == annots["morphs"][i] | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize( | ||||
|     "annots", | ||||
|     [ | ||||
|         { | ||||
|             "words": ["This", "is", "one", "sentence", "this", "is", "another"], | ||||
|             "sent_starts": [1, 0, 0, 0, 1, 0, 0], | ||||
|         } | ||||
|     ], | ||||
| ) | ||||
| def test_Example_from_dict_with_sent_start(annots): | ||||
|     # TODO | ||||
|     pass | ||||
|     vocab = Vocab() | ||||
|     predicted = Doc(vocab, words=annots["words"]) | ||||
|     eg = Example.from_dict(predicted, annots) | ||||
|     assert len(list(eg.reference.sents)) == 2 | ||||
|     for i, token in enumerate(eg.reference): | ||||
|         assert bool(token.is_sent_start) == bool(annots["sent_starts"][i]) | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize( | ||||
|     "annots", | ||||
|     [ | ||||
|         { | ||||
|             "words": ["This", "is", "a", "sentence"], | ||||
|             "cats": {"cat1": 1.0, "cat2": 0.0, "cat3": 0.5}, | ||||
|         } | ||||
|     ], | ||||
| ) | ||||
| def test_Example_from_dict_with_cats(annots): | ||||
|     # TODO | ||||
|     pass | ||||
|     vocab = Vocab() | ||||
|     predicted = Doc(vocab, words=annots["words"]) | ||||
|     eg = Example.from_dict(predicted, annots) | ||||
|     assert len(list(eg.reference.cats)) == 3 | ||||
|     assert eg.reference.cats["cat1"] == 1.0 | ||||
|     assert eg.reference.cats["cat2"] == 0.0 | ||||
|     assert eg.reference.cats["cat3"] == 0.5 | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.xfail(reason="TODO - fix") | ||||
| @pytest.mark.parametrize( | ||||
|     "annots", | ||||
|     [ | ||||
|         { | ||||
|             "words": ["Russ", "Cochran", "made", "reprints"], | ||||
|             "links": {(0, 12): {"Q7381115": 1.0, "Q2146908": 0.0}}, | ||||
|         } | ||||
|     ], | ||||
| ) | ||||
| def test_Example_from_dict_with_links(annots): | ||||
|     # TODO | ||||
|     pass | ||||
| """ | ||||
|     vocab = Vocab() | ||||
|     predicted = Doc(vocab, words=annots["words"]) | ||||
|     eg = Example.from_dict(predicted, annots) | ||||
|     assert eg.reference[0].ent_kb_id_ == "Q7381115" | ||||
|     assert eg.reference[1].ent_kb_id_ == "Q7381115" | ||||
|     assert eg.reference[2].ent_kb_id_ == "" | ||||
|     assert eg.reference[3].ent_kb_id_ == "" | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user