mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Update tests
This commit is contained in:
		
							parent
							
								
									6a75992af6
								
							
						
					
					
						commit
						792bf9476c
					
				| 
						 | 
					@ -349,6 +349,8 @@ def test_iob_to_biluo():
 | 
				
			||||||
        iob_to_biluo(bad_iob)
 | 
					        iob_to_biluo(bad_iob)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# This test is outdated as we use DocBin now. It should probably be removed?
 | 
				
			||||||
 | 
					@pytest.mark.xfail(reason="Outdated")
 | 
				
			||||||
def test_roundtrip_docs_to_json(doc):
 | 
					def test_roundtrip_docs_to_json(doc):
 | 
				
			||||||
    nlp = English()
 | 
					    nlp = English()
 | 
				
			||||||
    text = doc.text
 | 
					    text = doc.text
 | 
				
			||||||
| 
						 | 
					@ -366,7 +368,7 @@ def test_roundtrip_docs_to_json(doc):
 | 
				
			||||||
    with make_tempdir() as tmpdir:
 | 
					    with make_tempdir() as tmpdir:
 | 
				
			||||||
        json_file = tmpdir / "roundtrip.json"
 | 
					        json_file = tmpdir / "roundtrip.json"
 | 
				
			||||||
        srsly.write_json(json_file, [docs_to_json(doc)])
 | 
					        srsly.write_json(json_file, [docs_to_json(doc)])
 | 
				
			||||||
        goldcorpus = Corpus(train=str(json_file), dev=str(json_file))
 | 
					        goldcorpus = Corpus(str(json_file), str(json_file))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        reloaded_example = next(goldcorpus.dev_dataset(nlp=nlp))
 | 
					        reloaded_example = next(goldcorpus.dev_dataset(nlp=nlp))
 | 
				
			||||||
        assert len(doc) == goldcorpus.count_train()
 | 
					        assert len(doc) == goldcorpus.count_train()
 | 
				
			||||||
| 
						 | 
					@ -387,39 +389,10 @@ def test_roundtrip_docs_to_json(doc):
 | 
				
			||||||
    assert cats["BAKING"] == reloaded_example.reference.cats["BAKING"]
 | 
					    assert cats["BAKING"] == reloaded_example.reference.cats["BAKING"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@pytest.mark.xfail  # TODO do we need to do the projectivity differently?
 | 
					 | 
				
			||||||
def test_projective_train_vs_nonprojective_dev(doc):
 | 
					 | 
				
			||||||
    nlp = English()
 | 
					 | 
				
			||||||
    deps = [t.dep_ for t in doc]
 | 
					 | 
				
			||||||
    heads = [t.head.i for t in doc]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    with make_tempdir() as tmpdir:
 | 
					 | 
				
			||||||
        json_file = tmpdir / "test.json"
 | 
					 | 
				
			||||||
        # write to JSON train dicts
 | 
					 | 
				
			||||||
        srsly.write_json(json_file, [docs_to_json(doc)])
 | 
					 | 
				
			||||||
        goldcorpus = Corpus(str(json_file), str(json_file))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        train_reloaded_example = next(goldcorpus.train_dataset(nlp))
 | 
					 | 
				
			||||||
        train_goldparse = get_parses_from_example(train_reloaded_example)[0][1]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        dev_reloaded_example = next(goldcorpus.dev_dataset(nlp))
 | 
					 | 
				
			||||||
        dev_goldparse = get_parses_from_example(dev_reloaded_example)[0][1]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    assert is_nonproj_tree([t.head.i for t in doc]) is True
 | 
					 | 
				
			||||||
    assert is_nonproj_tree(train_goldparse.heads) is False
 | 
					 | 
				
			||||||
    assert heads[:-1] == train_goldparse.heads[:-1]
 | 
					 | 
				
			||||||
    assert heads[-1] != train_goldparse.heads[-1]
 | 
					 | 
				
			||||||
    assert deps[:-1] == train_goldparse.labels[:-1]
 | 
					 | 
				
			||||||
    assert deps[-1] != train_goldparse.labels[-1]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    assert heads == dev_goldparse.heads
 | 
					 | 
				
			||||||
    assert deps == dev_goldparse.labels
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Hm, not sure where misalignment check would be handled? In the components too?
 | 
					# Hm, not sure where misalignment check would be handled? In the components too?
 | 
				
			||||||
# I guess that does make sense. A text categorizer doesn't care if it's
 | 
					# I guess that does make sense. A text categorizer doesn't care if it's
 | 
				
			||||||
# misaligned...
 | 
					# misaligned...
 | 
				
			||||||
@pytest.mark.xfail  # TODO
 | 
					@pytest.mark.xfail(reason="Outdated")
 | 
				
			||||||
def test_ignore_misaligned(doc):
 | 
					def test_ignore_misaligned(doc):
 | 
				
			||||||
    nlp = English()
 | 
					    nlp = English()
 | 
				
			||||||
    text = doc.text
 | 
					    text = doc.text
 | 
				
			||||||
| 
						 | 
					@ -450,6 +423,9 @@ def test_ignore_misaligned(doc):
 | 
				
			||||||
        assert len(train_reloaded_example) == 0
 | 
					        assert len(train_reloaded_example) == 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# We probably want the orth variant logic back, but this test won't be quite
 | 
				
			||||||
 | 
					# right -- we need to go from DocBin.
 | 
				
			||||||
 | 
					@pytest.mark.xfail(reason="Outdated")
 | 
				
			||||||
def test_make_orth_variants(doc):
 | 
					def test_make_orth_variants(doc):
 | 
				
			||||||
    nlp = English()
 | 
					    nlp = English()
 | 
				
			||||||
    with make_tempdir() as tmpdir:
 | 
					    with make_tempdir() as tmpdir:
 | 
				
			||||||
| 
						 | 
					@ -594,19 +570,3 @@ def test_split_sents(merged_dict):
 | 
				
			||||||
    assert token_annotation_2["words"] == ["It", "is", "just", "me"]
 | 
					    assert token_annotation_2["words"] == ["It", "is", "just", "me"]
 | 
				
			||||||
    assert token_annotation_2["tags"] == ["PRON", "AUX", "ADV", "PRON"]
 | 
					    assert token_annotation_2["tags"] == ["PRON", "AUX", "ADV", "PRON"]
 | 
				
			||||||
    assert token_annotation_2["sent_starts"] == [1, 0, 0, 0]
 | 
					    assert token_annotation_2["sent_starts"] == [1, 0, 0, 0]
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# This fails on some None value? Need to look into that.
 | 
					 | 
				
			||||||
@pytest.mark.xfail  # TODO
 | 
					 | 
				
			||||||
def test_tuples_to_example(vocab, merged_dict):
 | 
					 | 
				
			||||||
    cats = {"TRAVEL": 1.0, "BAKING": 0.0}
 | 
					 | 
				
			||||||
    merged_dict = dict(merged_dict)
 | 
					 | 
				
			||||||
    merged_dict["cats"] = cats
 | 
					 | 
				
			||||||
    ex = Example.from_dict(Doc(vocab, words=merged_dict["words"]), merged_dict)
 | 
					 | 
				
			||||||
    words = [token.text for token in ex.reference]
 | 
					 | 
				
			||||||
    assert words == merged_dict["words"]
 | 
					 | 
				
			||||||
    tags = [token.tag_ for token in ex.reference]
 | 
					 | 
				
			||||||
    assert tags == merged_dict["tags"]
 | 
					 | 
				
			||||||
    sent_starts = [token.is_sent_start for token in ex.reference]
 | 
					 | 
				
			||||||
    assert sent_starts == [bool(v) for v in merged_dict["sent_starts"]]
 | 
					 | 
				
			||||||
    ex.reference.cats == cats
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user