mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-26 21:51:24 +03:00 
			
		
		
		
	* Fix unicode regex problem for non-English locales in gold standard
This commit is contained in:
		
							parent
							
								
									588026fe93
								
							
						
					
					
						commit
						7c37f45e9f
					
				|  | @ -1,3 +1,4 @@ | |||
| from __future__ import unicode_literals | ||||
| import numpy | ||||
| import io | ||||
| import json | ||||
|  | @ -42,7 +43,6 @@ def tags_to_entities(tags): | |||
|     return entities | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| def align(cand_words, gold_words): | ||||
|     cost, edit_path = _min_edit_path(cand_words, gold_words) | ||||
|     alignment = [] | ||||
|  | @ -63,7 +63,7 @@ def align(cand_words, gold_words): | |||
|     return alignment | ||||
| 
 | ||||
| 
 | ||||
| punct_re = re.compile(r'\W') | ||||
| punct_re = re.compile(r'\W', re.UNICODE) | ||||
| def _min_edit_path(cand_words, gold_words): | ||||
|     cdef: | ||||
|         Pool mem | ||||
|  | @ -264,13 +264,3 @@ cdef class GoldParse: | |||
| 
 | ||||
| def is_punct_label(label): | ||||
|     return label == 'P' or label.lower() == 'punct' | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user