mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-01 00:17:44 +03:00 
			
		
		
		
	delete outdated tests
This commit is contained in:
		
							parent
							
								
									ba2e491cc4
								
							
						
					
					
						commit
						2e3c0e2256
					
				|  | @ -1,180 +0,0 @@ | |||
| import pytest | ||||
| import spacy | ||||
| from spacy.matcher import PhraseMatcher | ||||
| from spacy.training import Example | ||||
| from spacy.lang.en import English | ||||
| from spacy.tests.util import make_tempdir | ||||
| from spacy.tokens import Doc | ||||
| from spacy.pipeline.coref import DEFAULT_CLUSTERS_PREFIX | ||||
| from spacy.pipeline.coref_er import DEFAULT_MENTIONS | ||||
| 
 | ||||
| 
 | ||||
| # fmt: off | ||||
| TRAIN_DATA = [ | ||||
|     ( | ||||
|         "John Smith told Laura that he was running late and asked her whether she could pick up their kids.", | ||||
|         { | ||||
|             "spans": { | ||||
|                 DEFAULT_MENTIONS: [ | ||||
|                     (0, 10, "MENTION"), | ||||
|                     (16, 21, "MENTION"), | ||||
|                     (27, 29, "MENTION"), | ||||
|                     (57, 60, "MENTION"), | ||||
|                     (69, 72, "MENTION"), | ||||
|                     (87, 92, "MENTION"), | ||||
|                     (87, 97, "MENTION"), | ||||
|                 ], | ||||
|                 f"{DEFAULT_CLUSTERS_PREFIX}_1": [ | ||||
|                     (0, 10, "MENTION"),     # John | ||||
|                     (27, 29, "MENTION"), | ||||
|                     (87, 92, "MENTION"),    # 'their' refers to John and Laur | ||||
|                 ], | ||||
|                 f"{DEFAULT_CLUSTERS_PREFIX}_2": [ | ||||
|                     (16, 21, "MENTION"),     # Laura | ||||
|                     (57, 60, "MENTION"), | ||||
|                     (69, 72, "MENTION"), | ||||
|                     (87, 92, "MENTION"),     # 'their' refers to John and Laura | ||||
|                 ], | ||||
|             } | ||||
|         }, | ||||
|     ), | ||||
|     ( | ||||
|         "Yes, I noticed that many friends around me received it. It seems that almost everyone received this SMS.", | ||||
|         { | ||||
|             "spans": { | ||||
|                 DEFAULT_MENTIONS: [ | ||||
|                     (5, 6, "MENTION"), | ||||
|                     (40, 42, "MENTION"), | ||||
|                     (52, 54, "MENTION"), | ||||
|                     (95, 103, "MENTION"), | ||||
|                 ], | ||||
|                 f"{DEFAULT_CLUSTERS_PREFIX}_1": [ | ||||
|                     (5, 6, "MENTION"),      # I | ||||
|                     (40, 42, "MENTION"), | ||||
| 
 | ||||
|                 ], | ||||
|                 f"{DEFAULT_CLUSTERS_PREFIX}_2": [ | ||||
|                     (52, 54, "MENTION"),     # SMS | ||||
|                     (95, 103, "MENTION"), | ||||
|                 ] | ||||
|             } | ||||
|         }, | ||||
|     ), | ||||
| ] | ||||
| # fmt: on | ||||
| 
 | ||||
| 
 | ||||
| @pytest.fixture | ||||
| def nlp(): | ||||
|     return English() | ||||
| 
 | ||||
| 
 | ||||
| @pytest.fixture | ||||
| def examples(nlp): | ||||
|     examples = [] | ||||
|     for text, annot in TRAIN_DATA: | ||||
|         # eg = Example.from_dict(nlp.make_doc(text), annot) | ||||
|         # if PR #7197 is merged, replace below with above line | ||||
|         ref_doc = nlp.make_doc(text) | ||||
|         for key, span_list in annot["spans"].items(): | ||||
|             spans = [] | ||||
|             for span_tuple in span_list: | ||||
|                 start_char = span_tuple[0] | ||||
|                 end_char = span_tuple[1] | ||||
|                 label = span_tuple[2] | ||||
|                 span = ref_doc.char_span(start_char, end_char, label=label) | ||||
|                 spans.append(span) | ||||
|             ref_doc.spans[key] = spans | ||||
|         eg = Example(nlp.make_doc(text), ref_doc) | ||||
|         examples.append(eg) | ||||
|     return examples | ||||
| 
 | ||||
| 
 | ||||
| def test_coref_er_no_POS(nlp): | ||||
|     doc = nlp("The police woman talked to him.") | ||||
|     coref_er = nlp.add_pipe("coref_er", last=True) | ||||
|     with pytest.raises(ValueError): | ||||
|         coref_er(doc) | ||||
| 
 | ||||
| 
 | ||||
| def test_coref_er_with_POS(nlp): | ||||
|     words = ["The", "police", "woman", "talked", "to", "him", "."] | ||||
|     pos = ["DET", "NOUN", "NOUN", "VERB", "ADP", "PRON", "PUNCT"] | ||||
|     doc = Doc(nlp.vocab, words=words, pos=pos) | ||||
|     coref_er = nlp.add_pipe("coref_er", last=True) | ||||
|     coref_er(doc) | ||||
|     assert len(doc.spans[coref_er.span_mentions]) == 1 | ||||
|     mention = doc.spans[coref_er.span_mentions][0] | ||||
|     assert (mention.text, mention.start, mention.end) == ("him", 5, 6) | ||||
| 
 | ||||
| 
 | ||||
| def test_coref_er_custom_POS(nlp): | ||||
|     words = ["The", "police", "woman", "talked", "to", "him", "."] | ||||
|     pos = ["DET", "NOUN", "NOUN", "VERB", "ADP", "PRON", "PUNCT"] | ||||
|     doc = Doc(nlp.vocab, words=words, pos=pos) | ||||
|     config = {"matcher_key": "POS", "matcher_values": ["NOUN"]} | ||||
|     coref_er = nlp.add_pipe("coref_er", last=True, config=config) | ||||
|     coref_er(doc) | ||||
|     assert len(doc.spans[coref_er.span_mentions]) == 1 | ||||
|     mention = doc.spans[coref_er.span_mentions][0] | ||||
|     assert (mention.text, mention.start, mention.end) == ("police woman", 1, 3) | ||||
| 
 | ||||
| 
 | ||||
| def test_coref_clusters(nlp, examples): | ||||
|     coref_er = nlp.add_pipe("coref_er", last=True) | ||||
|     coref = nlp.add_pipe("coref", last=True) | ||||
|     coref.initialize(lambda: examples) | ||||
|     words = ["Laura", "walked", "her", "dog", "."] | ||||
|     pos = ["PROPN", "VERB", "PRON", "NOUN", "PUNCT"] | ||||
|     doc = Doc(nlp.vocab, words=words, pos=pos) | ||||
|     coref_er(doc) | ||||
|     coref(doc) | ||||
|     assert len(doc.spans[coref_er.span_mentions]) > 0 | ||||
|     found_clusters = 0 | ||||
|     for name, spans in doc.spans.items(): | ||||
|         if name.startswith(coref.span_cluster_prefix): | ||||
|             found_clusters += 1 | ||||
|     assert found_clusters > 0 | ||||
| 
 | ||||
| 
 | ||||
| def test_coref_er_score(nlp, examples): | ||||
|     config = {"matcher_key": "POS", "matcher_values": []} | ||||
|     coref_er = nlp.add_pipe("coref_er", last=True, config=config) | ||||
|     coref = nlp.add_pipe("coref", last=True) | ||||
|     coref.initialize(lambda: examples) | ||||
|     mentions_key = coref_er.span_mentions | ||||
|     cluster_prefix_key = coref.span_cluster_prefix | ||||
|     matcher = PhraseMatcher(nlp.vocab) | ||||
|     terms_1 = ["Laura", "her", "she"] | ||||
|     terms_2 = ["it", "this SMS"] | ||||
|     matcher.add("A", [nlp.make_doc(text) for text in terms_1]) | ||||
|     matcher.add("B", [nlp.make_doc(text) for text in terms_2]) | ||||
|     for eg in examples: | ||||
|         pred = eg.predicted | ||||
|         matches = matcher(pred, as_spans=True) | ||||
|         pred.set_ents(matches) | ||||
|         coref_er(pred) | ||||
|         coref(pred) | ||||
|         eg.predicted = pred | ||||
|         # TODO: if #7209 is merged, experiment with 'include_label' | ||||
|         scores = coref_er.score([eg]) | ||||
|         assert f"{mentions_key}_f" in scores | ||||
|         scores = coref.score([eg]) | ||||
|         assert f"{cluster_prefix_key}_f" in scores | ||||
| 
 | ||||
| 
 | ||||
| def test_coref_serialization(nlp): | ||||
|     # Test that the coref component can be serialized | ||||
|     config_er = {"matcher_key": "TAG", "matcher_values": ["NN"]} | ||||
|     nlp.add_pipe("coref_er", last=True, config=config_er) | ||||
|     nlp.add_pipe("coref", last=True) | ||||
|     assert "coref_er" in nlp.pipe_names | ||||
|     assert "coref" in nlp.pipe_names | ||||
| 
 | ||||
|     with make_tempdir() as tmp_dir: | ||||
|         nlp.to_disk(tmp_dir) | ||||
|         nlp2 = spacy.load(tmp_dir) | ||||
|         assert "coref_er" in nlp2.pipe_names | ||||
|         assert "coref" in nlp2.pipe_names | ||||
|         coref_er_2 = nlp2.get_pipe("coref_er") | ||||
|         assert coref_er_2.matcher_key == "TAG" | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user