mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	adding tests for new example class (some still failing - WIP)
This commit is contained in:
		
							parent
							
								
									488727aee0
								
							
						
					
					
						commit
						6a67a11682
					
				| 
						 | 
				
			
			@ -146,6 +146,8 @@ def _fix_legacy_dict_data(predicted, example_dict):
 | 
			
		|||
            ent_iobs, ent_types = _parse_ner_tags(predicted, words, value)
 | 
			
		||||
            token_dict["ENT_IOB"] = ent_iobs
 | 
			
		||||
            token_dict["ENT_TYPE"] = ent_types
 | 
			
		||||
        else:
 | 
			
		||||
            raise ValueError(f"Unknown attr: {key}")
 | 
			
		||||
    return {
 | 
			
		||||
        "token_annotation": token_dict,
 | 
			
		||||
        "doc_annotation": doc_dict
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -531,4 +531,4 @@ def test_tuples_to_example(vocab, merged_dict):
 | 
			
		|||
    assert tags == merged_dict["tags"]
 | 
			
		||||
    sent_starts = [token.is_sent_start for token in ex.reference]
 | 
			
		||||
    assert sent_starts == [bool(v) for v in merged_dict["sent_starts"]]
 | 
			
		||||
    example.reference.cats == cats
 | 
			
		||||
    ex.reference.cats == cats
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -14,21 +14,25 @@ def test_Example_init_requires_doc_objects():
 | 
			
		|||
        eg = Example(None, Doc(vocab, words=["hi"]))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_Example_from_dict_basic():
 | 
			
		||||
    eg = Example.from_dict(
 | 
			
		||||
        Doc(Vocab(), words=["hello", "world"]),
 | 
			
		||||
        {
 | 
			
		||||
            "words": ["hello", "world"]
 | 
			
		||||
        }
 | 
			
		||||
        Doc(Vocab(), words=["hello", "world"]), {"words": ["hello", "world"]}
 | 
			
		||||
    )
 | 
			
		||||
    assert isinstance(eg.x, Doc)
 | 
			
		||||
    assert isinstance(eg.y, Doc)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize("annots", [
 | 
			
		||||
    {"words": ["ice", "cream"], "tags": ["NN", "NN"]},
 | 
			
		||||
])
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "annots", [{"words": ["ice", "cream"], "weirdannots": ["something", "such"]}]
 | 
			
		||||
)
 | 
			
		||||
def test_Example_from_dict_invalid(annots):
 | 
			
		||||
    vocab = Vocab()
 | 
			
		||||
    predicted = Doc(vocab, words=annots["words"])
 | 
			
		||||
    with pytest.raises(ValueError):
 | 
			
		||||
        eg = Example.from_dict(predicted, annots)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize("annots", [{"words": ["ice", "cream"], "tags": ["NN", "NN"]}])
 | 
			
		||||
def test_Example_from_dict_with_tags(annots):
 | 
			
		||||
    vocab = Vocab()
 | 
			
		||||
    predicted = Doc(vocab, words=annots["words"])
 | 
			
		||||
| 
						 | 
				
			
			@ -37,28 +41,117 @@ def test_Example_from_dict_with_tags(annots):
 | 
			
		|||
        assert token.tag_ == annots["tags"][i]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
@pytest.mark.xfail(reason="TODO - fix")
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "annots",
 | 
			
		||||
    [
 | 
			
		||||
        {
 | 
			
		||||
            "words": ["I", "like", "London", "and", "Berlin", "."],
 | 
			
		||||
            "entities": [(7, 13, "LOC"), (18, 24, "LOC")],
 | 
			
		||||
        }
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_Example_from_dict_with_entities(annots):
 | 
			
		||||
    # TODO
 | 
			
		||||
    pass
 | 
			
		||||
    vocab = Vocab()
 | 
			
		||||
    predicted = Doc(vocab, words=annots["words"])
 | 
			
		||||
    eg = Example.from_dict(predicted, annots)
 | 
			
		||||
    assert len(list(eg.reference.ents)) == 2
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "annots",
 | 
			
		||||
    [
 | 
			
		||||
        {
 | 
			
		||||
            "words": ["I", "like", "London", "and", "Berlin", "."],
 | 
			
		||||
            "deps": ["nsubj", "ROOT", "dobj", "cc", "conj", "punct"],
 | 
			
		||||
            "heads": [1, 1, 1, 2, 2, 1],
 | 
			
		||||
        }
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_Example_from_dict_with_parse(annots):
 | 
			
		||||
    # TODO
 | 
			
		||||
    pass
 | 
			
		||||
    vocab = Vocab()
 | 
			
		||||
    predicted = Doc(vocab, words=annots["words"])
 | 
			
		||||
    eg = Example.from_dict(predicted, annots)
 | 
			
		||||
    for i, token in enumerate(eg.reference):
 | 
			
		||||
        assert token.dep_ == annots["deps"][i]
 | 
			
		||||
        assert token.head.i == annots["heads"][i]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.xfail(reason="TODO - fix")
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "annots",
 | 
			
		||||
    [
 | 
			
		||||
        {
 | 
			
		||||
            "words": ["Sarah", "'s", "sister", "flew"],
 | 
			
		||||
            "morphs": [
 | 
			
		||||
                "NounType=prop|Number=sing",
 | 
			
		||||
                "Poss=yes",
 | 
			
		||||
                "Number=sing",
 | 
			
		||||
                "Tense=past|VerbForm=fin",
 | 
			
		||||
            ],
 | 
			
		||||
        }
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_Example_from_dict_with_morphology(annots):
 | 
			
		||||
    # TODO
 | 
			
		||||
    pass
 | 
			
		||||
    vocab = Vocab()
 | 
			
		||||
    predicted = Doc(vocab, words=annots["words"])
 | 
			
		||||
    eg = Example.from_dict(predicted, annots)
 | 
			
		||||
    for i, token in enumerate(eg.reference):
 | 
			
		||||
        assert token.morph_ == annots["morphs"][i]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "annots",
 | 
			
		||||
    [
 | 
			
		||||
        {
 | 
			
		||||
            "words": ["This", "is", "one", "sentence", "this", "is", "another"],
 | 
			
		||||
            "sent_starts": [1, 0, 0, 0, 1, 0, 0],
 | 
			
		||||
        }
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_Example_from_dict_with_sent_start(annots):
 | 
			
		||||
    # TODO
 | 
			
		||||
    pass
 | 
			
		||||
    vocab = Vocab()
 | 
			
		||||
    predicted = Doc(vocab, words=annots["words"])
 | 
			
		||||
    eg = Example.from_dict(predicted, annots)
 | 
			
		||||
    assert len(list(eg.reference.sents)) == 2
 | 
			
		||||
    for i, token in enumerate(eg.reference):
 | 
			
		||||
        assert bool(token.is_sent_start) == bool(annots["sent_starts"][i])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "annots",
 | 
			
		||||
    [
 | 
			
		||||
        {
 | 
			
		||||
            "words": ["This", "is", "a", "sentence"],
 | 
			
		||||
            "cats": {"cat1": 1.0, "cat2": 0.0, "cat3": 0.5},
 | 
			
		||||
        }
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_Example_from_dict_with_cats(annots):
 | 
			
		||||
    # TODO
 | 
			
		||||
    pass
 | 
			
		||||
    vocab = Vocab()
 | 
			
		||||
    predicted = Doc(vocab, words=annots["words"])
 | 
			
		||||
    eg = Example.from_dict(predicted, annots)
 | 
			
		||||
    assert len(list(eg.reference.cats)) == 3
 | 
			
		||||
    assert eg.reference.cats["cat1"] == 1.0
 | 
			
		||||
    assert eg.reference.cats["cat2"] == 0.0
 | 
			
		||||
    assert eg.reference.cats["cat3"] == 0.5
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.xfail(reason="TODO - fix")
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "annots",
 | 
			
		||||
    [
 | 
			
		||||
        {
 | 
			
		||||
            "words": ["Russ", "Cochran", "made", "reprints"],
 | 
			
		||||
            "links": {(0, 12): {"Q7381115": 1.0, "Q2146908": 0.0}},
 | 
			
		||||
        }
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_Example_from_dict_with_links(annots):
 | 
			
		||||
    # TODO
 | 
			
		||||
    pass
 | 
			
		||||
"""
 | 
			
		||||
    vocab = Vocab()
 | 
			
		||||
    predicted = Doc(vocab, words=annots["words"])
 | 
			
		||||
    eg = Example.from_dict(predicted, annots)
 | 
			
		||||
    assert eg.reference[0].ent_kb_id_ == "Q7381115"
 | 
			
		||||
    assert eg.reference[1].ent_kb_id_ == "Q7381115"
 | 
			
		||||
    assert eg.reference[2].ent_kb_id_ == ""
 | 
			
		||||
    assert eg.reference[3].ent_kb_id_ == ""
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user