mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			30 lines
		
	
	
		
			1.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			30 lines
		
	
	
		
			1.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from spacy.lang.en import English
 | |
| from spacy.pipeline import merge_entities
 | |
| import pytest
 | |
| 
 | |
| 
 | |
| def test_issue5918():
 | |
|     # Test edge case when merging entities.
 | |
|     nlp = English()
 | |
|     ruler = nlp.add_pipe("entity_ruler")
 | |
|     patterns = [
 | |
|         {"label": "ORG", "pattern": "Digicon Inc"},
 | |
|         {"label": "ORG", "pattern": "Rotan Mosle Inc's"},
 | |
|         {"label": "ORG", "pattern": "Rotan Mosle Technology Partners Ltd"},
 | |
|     ]
 | |
|     ruler.add_patterns(patterns)
 | |
| 
 | |
|     text = """
 | |
|         Digicon Inc said it has completed the previously-announced disposition
 | |
|         of its computer systems division to an investment group led by
 | |
|         Rotan Mosle Inc's Rotan Mosle Technology Partners Ltd affiliate.
 | |
|         """
 | |
|     doc = nlp(text)
 | |
|     assert len(doc.ents) == 3
 | |
|     # make it so that the third span's head is within the entity (ent_iob=I)
 | |
|     # bug #5918 would wrongly transfer that I to the full entity, resulting in 2 instead of 3 final ents.
 | |
|     with pytest.warns(UserWarning):
 | |
|         doc[29].head = doc[33]
 | |
|     doc = merge_entities(doc)
 | |
|     assert len(doc.ents) == 3
 |