mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	Example.get_aligned_parse: Handle unit and zero length vectors correctly (#11026)
				
					
				
			* `Example.get_aligned_parse`: Do not squeeze gold token idx vector Correctly handle zero-size vectors passed to `np.vectorize` * Add tests * Use `Doc` ctor to initialize attributes * Remove unintended change Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Remove unused import Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
		
							parent
							
								
									a9559e7435
								
							
						
					
					
						commit
						1d5cad0b42
					
				|  | @ -679,6 +679,31 @@ def test_projectivize(en_tokenizer): | ||||||
|     assert proj_heads == [3, 2, 3, 3, 3] |     assert proj_heads == [3, 2, 3, 3, 3] | ||||||
|     assert nonproj_heads == [3, 2, 3, 3, 2] |     assert nonproj_heads == [3, 2, 3, 3, 2] | ||||||
| 
 | 
 | ||||||
|  |     # Test single token documents | ||||||
|  |     doc = en_tokenizer("Conrail") | ||||||
|  |     heads = [0] | ||||||
|  |     deps = ["dep"] | ||||||
|  |     example = Example.from_dict(doc, {"heads": heads, "deps": deps}) | ||||||
|  |     proj_heads, proj_labels = example.get_aligned_parse(projectivize=True) | ||||||
|  |     assert proj_heads == heads | ||||||
|  |     assert proj_labels == deps | ||||||
|  | 
 | ||||||
|  |     # Test documents with no alignments | ||||||
|  |     doc_a = Doc( | ||||||
|  |         doc.vocab, words=["Double-Jointed"], spaces=[False], deps=["ROOT"], heads=[0] | ||||||
|  |     ) | ||||||
|  |     doc_b = Doc( | ||||||
|  |         doc.vocab, | ||||||
|  |         words=["Double", "-", "Jointed"], | ||||||
|  |         spaces=[True, True, True], | ||||||
|  |         deps=["amod", "punct", "ROOT"], | ||||||
|  |         heads=[2, 2, 2], | ||||||
|  |     ) | ||||||
|  |     example = Example(doc_a, doc_b) | ||||||
|  |     proj_heads, proj_deps = example.get_aligned_parse(projectivize=True) | ||||||
|  |     assert proj_heads == [None] | ||||||
|  |     assert proj_deps == [None] | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| def test_iob_to_biluo(): | def test_iob_to_biluo(): | ||||||
|     good_iob = ["O", "O", "B-LOC", "I-LOC", "O", "B-PERSON"] |     good_iob = ["O", "O", "B-LOC", "I-LOC", "O", "B-PERSON"] | ||||||
|  |  | ||||||
|  | @ -249,9 +249,9 @@ cdef class Example: | ||||||
|         # Fetch all aligned gold token incides. |         # Fetch all aligned gold token incides. | ||||||
|         if c2g_single_toks.shape == cand_to_gold.lengths.shape: |         if c2g_single_toks.shape == cand_to_gold.lengths.shape: | ||||||
|             # This the most likely case. |             # This the most likely case. | ||||||
|             gold_i = cand_to_gold[:].squeeze() |             gold_i = cand_to_gold[:] | ||||||
|         else: |         else: | ||||||
|             gold_i = numpy.vectorize(lambda x: cand_to_gold[int(x)][0])(c2g_single_toks).squeeze() |             gold_i = numpy.vectorize(lambda x: cand_to_gold[int(x)][0], otypes='i')(c2g_single_toks) | ||||||
| 
 | 
 | ||||||
|         # Fetch indices of all gold heads for the aligned gold tokens. |         # Fetch indices of all gold heads for the aligned gold tokens. | ||||||
|         heads = numpy.asarray(heads, dtype='i') |         heads = numpy.asarray(heads, dtype='i') | ||||||
|  | @ -261,7 +261,7 @@ cdef class Example: | ||||||
|         # gold tokens (and are aligned to a single candidate token). |         # gold tokens (and are aligned to a single candidate token). | ||||||
|         g2c_len_heads = gold_to_cand.lengths[gold_head_i] |         g2c_len_heads = gold_to_cand.lengths[gold_head_i] | ||||||
|         g2c_len_heads = numpy.where(g2c_len_heads == 1)[0] |         g2c_len_heads = numpy.where(g2c_len_heads == 1)[0] | ||||||
|         g2c_i = numpy.vectorize(lambda x: gold_to_cand[int(x)][0])(gold_head_i[g2c_len_heads]).squeeze() |         g2c_i = numpy.vectorize(lambda x: gold_to_cand[int(x)][0], otypes='i')(gold_head_i[g2c_len_heads]).squeeze() | ||||||
| 
 | 
 | ||||||
|         # Update head/dep alignments with the above. |         # Update head/dep alignments with the above. | ||||||
|         aligned_heads = numpy.full((self.x.length), None) |         aligned_heads = numpy.full((self.x.length), None) | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user