mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Add docstring for spacy.gold.align
This commit is contained in:
		
							parent
							
								
									fe0e1873a3
								
							
						
					
					
						commit
						394e4d8058
					
				|  | @ -71,6 +71,23 @@ def merge_sents(sents): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def align(cand_words, gold_words): | def align(cand_words, gold_words): | ||||||
|  |     """Calculate alignment tables between two tokenizations, using the Levenshtein | ||||||
|  |     algorithm. The alignment is case-insensitive. | ||||||
|  | 
 | ||||||
|  |     cand_words (List[str]): The candidate tokenization. | ||||||
|  |     gold_words (List[str]): The reference tokenization. | ||||||
|  |     RETURNS: (tuple): A 5-tuple consisting of the following information: | ||||||
|  |       * cost (int): The number of misaligned tokens. | ||||||
|  |       * a2b (List[int]): Mapping of indices in `cand_words` to indices in `gold_words`. | ||||||
|  |             For instance, if `a2b[4] == 6`, that means that `cand_words[4]` aligns | ||||||
|  |             to `gold_words[6]`. If there's no one-to-one alignment for a token, | ||||||
|  |             it has the value -1. | ||||||
|  |       * b2a (List[int]): The same as `a2b`, but mapping the other direction. | ||||||
|  |       * a2b_multi (Dict[int, int]): A dictionary mapping indices in `a` to indices | ||||||
|  |             in `b`, where multiple tokens of `a` align to the same token of `b`. | ||||||
|  |       * b2a_multi (Dict[int, int]): As with `a2b_multi`, but mapping the other | ||||||
|  |             direction. | ||||||
|  |     """ | ||||||
|     if cand_words == gold_words: |     if cand_words == gold_words: | ||||||
|         alignment = numpy.arange(len(cand_words)) |         alignment = numpy.arange(len(cand_words)) | ||||||
|         return 0, alignment, alignment, {}, {} |         return 0, alignment, alignment, {}, {} | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user