mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	* fix: gold pyx * remove print * skip test in python2 * Add unicode declarations and don't skip test on Python 2
This commit is contained in:
		
							parent
							
								
									bddfbc7e1b
								
							
						
					
					
						commit
						fcd25db033
					
				|  | @ -636,9 +636,9 @@ cdef class GoldParse: | ||||||
|             if morphology is None: |             if morphology is None: | ||||||
|                 morphology = [None for _ in words] |                 morphology = [None for _ in words] | ||||||
|             if entities is None: |             if entities is None: | ||||||
|                 entities = ["-" for _ in doc] |                 entities = ["-" for _ in words] | ||||||
|             elif len(entities) == 0: |             elif len(entities) == 0: | ||||||
|                 entities = ["O" for _ in doc] |                 entities = ["O" for _ in words] | ||||||
|             else: |             else: | ||||||
|                 # Translate the None values to '-', to make processing easier. |                 # Translate the None values to '-', to make processing easier. | ||||||
|                 # See Issue #2603 |                 # See Issue #2603 | ||||||
|  | @ -701,7 +701,9 @@ cdef class GoldParse: | ||||||
|                             self.heads[i] = i+1 |                             self.heads[i] = i+1 | ||||||
|                             self.labels[i] = "subtok" |                             self.labels[i] = "subtok" | ||||||
|                         else: |                         else: | ||||||
|                             self.heads[i] = self.gold_to_cand[heads[i2j_multi[i]]] |                             head_i = heads[i2j_multi[i]] | ||||||
|  |                             if head_i: | ||||||
|  |                                 self.heads[i] = self.gold_to_cand[head_i] | ||||||
|                             self.labels[i] = deps[i2j_multi[i]] |                             self.labels[i] = deps[i2j_multi[i]] | ||||||
|                         # Now set NER...This is annoying because if we've split |                         # Now set NER...This is annoying because if we've split | ||||||
|                         # got an entity word split into two, we need to adjust the |                         # got an entity word split into two, we need to adjust the | ||||||
|  |  | ||||||
							
								
								
									
										13
									
								
								spacy/tests/regression/test_issue4529.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								spacy/tests/regression/test_issue4529.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,13 @@ | ||||||
|  | # coding: utf8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  | 
 | ||||||
|  | import pytest | ||||||
|  | from spacy.gold import GoldParse | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @pytest.mark.parametrize( | ||||||
|  |     "text,words", [("A'B C", ["A", "'", "B", "C"]), ("A-B", ["A-B"])] | ||||||
|  | ) | ||||||
|  | def test_gold_misaligned(en_tokenizer, text, words): | ||||||
|  |     doc = en_tokenizer(text) | ||||||
|  |     GoldParse(doc, words=words) | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user