diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py index 587d1fff1..efc7ebc1b 100644 --- a/spacy/tests/parser/test_ner.py +++ b/spacy/tests/parser/test_ner.py @@ -131,6 +131,41 @@ def test_negative_sample_key_is_in_config(vocab, entity_types): assert tsys.cfg["neg_key"] == "non_entities" +@pytest.mark.filterwarnings("ignore::UserWarning") +def test_get_oracle_moves_negative_entities(tsys, doc, entity_annots): + entity_annots = [(s, e, "!" + label) for s, e, label in entity_annots] + example = Example.from_dict(doc, {"entities": entity_annots}) + ex_dict = example.to_dict() + + for i, tag in enumerate(ex_dict["doc_annotation"]["entities"]): + if tag == "L-!GPE": + ex_dict["doc_annotation"]["entities"][i] = "-" + example = Example.from_dict(doc, ex_dict) + + act_classes = tsys.get_oracle_sequence(example) + names = [tsys.get_class_name(act) for act in act_classes] + assert names + + +def test_get_oracle_moves_negative_entities2(tsys, vocab): + doc = Doc(vocab, words=["A", "B", "C", "D"]) + entity_annots = ["B-!PERSON", "L-!PERSON", "B-!PERSON", "L-!PERSON"] + example = Example.from_dict(doc, {"entities": entity_annots}) + act_classes = tsys.get_oracle_sequence(example) + names = [tsys.get_class_name(act) for act in act_classes] + assert names + + +@pytest.mark.skip(reason="Maybe outdated? Unsure") +def test_get_oracle_moves_negative_O(tsys, vocab): + doc = Doc(vocab, words=["A", "B", "C", "D"]) + entity_annots = ["O", "!O", "O", "!O"] + example = Example.from_dict(doc, {"entities": entity_annots}) + act_classes = tsys.get_oracle_sequence(example) + names = [tsys.get_class_name(act) for act in act_classes] + assert names + + # We can't easily represent this on a Doc object. Not sure what the best solution # would be, but I don't think it's an important use case? @pytest.mark.skip(reason="No longer supported") @@ -242,7 +277,7 @@ def test_train_empty(): train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1])) ner = nlp.add_pipe("ner", last=True) ner.add_label("PERSON") - nlp.initialize() + nlp.initialize(get_examples=lambda: train_examples) for itn in range(2): losses = {} batches = util.minibatch(train_examples, size=8) @@ -372,7 +407,7 @@ def test_block_ner(): def test_overfitting_IO(): # Simple test to try and quickly overfit the NER component nlp = English() - ner = nlp.add_pipe("ner") + ner = nlp.add_pipe("ner", config={"model": {}}) train_examples = [] for text, annotations in TRAIN_DATA: train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))