mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	Remove _spans_to_offsets
Basically the same as get_clusters_from_doc
This commit is contained in:
		
							parent
							
								
									8f598d7b01
								
							
						
					
					
						commit
						6f5cf838ec
					
				|  | @ -203,17 +203,3 @@ def create_gold_scores( | |||
| 
 | ||||
|     # caller needs to convert to array, and add placeholder | ||||
|     return out | ||||
| 
 | ||||
| 
 | ||||
| def _spans_to_offsets(doc: Doc) -> List[List[Tuple[int, int]]]: | ||||
|     """Convert doc.spans to nested list of ints for comparison. | ||||
|     The ints are character indices, and the spans groups are sorted by key first. | ||||
| 
 | ||||
|     This is useful for checking consistency of predictions. | ||||
|     """ | ||||
|     out = [] | ||||
|     keys = sorted([key for key in doc.spans]) | ||||
|     for key in keys: | ||||
|         cluster = doc.spans[key] | ||||
|         out.append([(ss.start_char, ss.end_char) for ss in cluster]) | ||||
|     return out | ||||
|  |  | |||
|  | @ -9,7 +9,7 @@ from spacy.ml.models.coref_util import ( | |||
|     DEFAULT_CLUSTER_PREFIX, | ||||
|     select_non_crossing_spans, | ||||
|     get_sentence_ids, | ||||
|     _spans_to_offsets, | ||||
|     get_clusters_from_doc, | ||||
| ) | ||||
| 
 | ||||
| from thinc.util import has_torch | ||||
|  | @ -101,7 +101,7 @@ def test_coref_serialization(nlp): | |||
|         assert nlp2.pipe_names == ["coref"] | ||||
|         doc2 = nlp2(text) | ||||
| 
 | ||||
|         assert _spans_to_offsets(doc) == _spans_to_offsets(doc2) | ||||
|         assert get_clusters_from_doc(doc) == get_clusters_from_doc(doc2) | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.skipif(not has_torch, reason="Torch not available") | ||||
|  | @ -140,8 +140,8 @@ def test_overfitting_IO(nlp): | |||
|     docs1 = list(nlp.pipe(texts)) | ||||
|     docs2 = list(nlp.pipe(texts)) | ||||
|     docs3 = [nlp(text) for text in texts] | ||||
|     assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs2[0]) | ||||
|     assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs3[0]) | ||||
|     assert get_clusters_from_doc(docs1[0]) == get_clusters_from_doc(docs2[0]) | ||||
|     assert get_clusters_from_doc(docs1[0]) == get_clusters_from_doc(docs3[0]) | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.skipif(not has_torch, reason="Torch not available") | ||||
|  | @ -196,8 +196,8 @@ def test_tokenization_mismatch(nlp): | |||
|     docs1 = list(nlp.pipe(texts)) | ||||
|     docs2 = list(nlp.pipe(texts)) | ||||
|     docs3 = [nlp(text) for text in texts] | ||||
|     assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs2[0]) | ||||
|     assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs3[0]) | ||||
|     assert get_clusters_from_doc(docs1[0]) == get_clusters_from_doc(docs2[0]) | ||||
|     assert get_clusters_from_doc(docs1[0]) == get_clusters_from_doc(docs3[0]) | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.skipif(not has_torch, reason="Torch not available") | ||||
|  |  | |||
|  | @ -9,7 +9,7 @@ from spacy.ml.models.coref_util import ( | |||
|     DEFAULT_CLUSTER_PREFIX, | ||||
|     select_non_crossing_spans, | ||||
|     get_sentence_ids, | ||||
|     _spans_to_offsets, | ||||
|     get_clusters_from_doc, | ||||
| ) | ||||
| 
 | ||||
| from thinc.util import has_torch | ||||
|  | @ -88,7 +88,7 @@ def test_span_predictor_serialization(nlp): | |||
|         assert nlp2.pipe_names == ["span_predictor"] | ||||
|         doc2 = nlp2(text) | ||||
| 
 | ||||
|         assert _spans_to_offsets(doc) == _spans_to_offsets(doc2) | ||||
|         assert get_clusters_from_doc(doc) == get_clusters_from_doc(doc2) | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.skipif(not has_torch, reason="Torch not available") | ||||
|  | @ -122,7 +122,7 @@ def test_overfitting_IO(nlp): | |||
|     # test the trained model, using the pred since it has heads | ||||
|     doc = nlp(train_examples[0].predicted) | ||||
|     # XXX This actually tests that it can overfit | ||||
|     assert _spans_to_offsets(doc) == _spans_to_offsets(train_examples[0].reference) | ||||
|     assert get_clusters_from_doc(doc) == get_clusters_from_doc(train_examples[0].reference) | ||||
| 
 | ||||
|     # Also test the results are still the same after IO | ||||
|     with make_tempdir() as tmp_dir: | ||||
|  | @ -140,8 +140,8 @@ def test_overfitting_IO(nlp): | |||
|     docs1 = list(nlp.pipe(texts)) | ||||
|     docs2 = list(nlp.pipe(texts)) | ||||
|     docs3 = [nlp(text) for text in texts] | ||||
|     assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs2[0]) | ||||
|     assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs3[0]) | ||||
|     assert get_clusters_from_doc(docs1[0]) == get_clusters_from_doc(docs2[0]) | ||||
|     assert get_clusters_from_doc(docs1[0]) == get_clusters_from_doc(docs3[0]) | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.skipif(not has_torch, reason="Torch not available") | ||||
|  | @ -187,7 +187,7 @@ def test_tokenization_mismatch(nlp): | |||
|     test_doc = train_examples[0].predicted | ||||
|     doc = nlp(test_doc) | ||||
|     # XXX This actually tests that it can overfit | ||||
|     assert _spans_to_offsets(doc) == _spans_to_offsets(train_examples[0].reference) | ||||
|     assert get_clusters_from_doc(doc) == get_clusters_from_doc(train_examples[0].reference) | ||||
| 
 | ||||
|     # Also test the results are still the same after IO | ||||
|     with make_tempdir() as tmp_dir: | ||||
|  | @ -206,8 +206,8 @@ def test_tokenization_mismatch(nlp): | |||
|     docs1 = list(nlp.pipe(texts)) | ||||
|     docs2 = list(nlp.pipe(texts)) | ||||
|     docs3 = [nlp(text) for text in texts] | ||||
|     assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs2[0]) | ||||
|     assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs3[0]) | ||||
|     assert get_clusters_from_doc(docs1[0]) == get_clusters_from_doc(docs2[0]) | ||||
|     assert get_clusters_from_doc(docs1[0]) == get_clusters_from_doc(docs3[0]) | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.skipif(not has_torch, reason="Torch not available") | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user