mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	* remove duplicate unit test * unit test (currently failing) for issue 4267 * bugfix: ensure doc.ents preserves kb_id annotations * fix in setting doc.ents with empty label * rename * test for presetting an entity to a certain type * allow overwriting Outside + blocking presets * fix actions when previous label needs to be kept * fix default ent_iob in set entities * cleaner solution with U- action * remove debugging print statements * unit tests with explicit transitions and is_valid testing * remove U- from move_names explicitly * remove unit tests with pre-trained models that don't work * remove (working) unit tests with pre-trained models * clean up unit tests * move unit tests * small fixes * remove two TODO's from doc.ents comments
		
			
				
	
	
		
			43 lines
		
	
	
		
			1.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			43 lines
		
	
	
		
			1.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
# coding: utf8
 | 
						|
from __future__ import unicode_literals
 | 
						|
 | 
						|
import pytest
 | 
						|
 | 
						|
import spacy
 | 
						|
 | 
						|
from spacy.lang.en import English
 | 
						|
from spacy.pipeline import EntityRuler
 | 
						|
from spacy.tokens import Span
 | 
						|
 | 
						|
 | 
						|
def test_issue4267():
 | 
						|
    """ Test that running an entity_ruler after ner gives consistent results"""
 | 
						|
    nlp = English()
 | 
						|
    ner = nlp.create_pipe("ner")
 | 
						|
    ner.add_label("PEOPLE")
 | 
						|
    nlp.add_pipe(ner)
 | 
						|
    nlp.begin_training()
 | 
						|
 | 
						|
    assert "ner" in nlp.pipe_names
 | 
						|
 | 
						|
    # assert that we have correct IOB annotations
 | 
						|
    doc1 = nlp("hi")
 | 
						|
    assert doc1.is_nered
 | 
						|
    for token in doc1:
 | 
						|
        assert token.ent_iob == 2
 | 
						|
 | 
						|
    # add entity ruler and run again
 | 
						|
    ruler = EntityRuler(nlp)
 | 
						|
    patterns = [{"label": "SOFTWARE", "pattern": "spacy"}]
 | 
						|
 | 
						|
    ruler.add_patterns(patterns)
 | 
						|
    nlp.add_pipe(ruler)
 | 
						|
    assert "entity_ruler" in nlp.pipe_names
 | 
						|
    assert "ner" in nlp.pipe_names
 | 
						|
 | 
						|
    # assert that we still have correct IOB annotations
 | 
						|
    doc2 = nlp("hi")
 | 
						|
    assert doc2.is_nered
 | 
						|
    for token in doc2:
 | 
						|
        assert token.ent_iob == 2
 |