Merge regression tests

This commit is contained in:
Ines Montani 2019-02-24 21:03:39 +01:00
parent 8f6c193a4d
commit a48deb4081
2 changed files with 21 additions and 25 deletions

View File

@ -13,6 +13,7 @@ from spacy.vocab import Vocab
from spacy.compat import pickle
from spacy._ml import link_vectors_to_models
import numpy
import random
from ..util import get_doc
@ -138,6 +139,26 @@ def test_issue2782(text, lang_cls):
assert doc[0].like_num
def test_issue2800():
"""Test issue that arises when too many labels are added to NER model.
Used to cause segfault.
"""
train_data = []
train_data.extend([("One sentence", {"entities": []})])
entity_types = [str(i) for i in range(1000)]
nlp = English()
ner = nlp.create_pipe("ner")
nlp.add_pipe(ner)
for entity_type in list(entity_types):
ner.add_label(entity_type)
optimizer = nlp.begin_training()
for i in range(20):
losses = {}
random.shuffle(train_data)
for statement, entities in train_data:
nlp.update([statement], [entities], sgd=optimizer, losses=losses, drop=0.5)
def test_issue2822(it_tokenizer):
"""Test that the abbreviation of poco is kept as one word."""
doc = it_tokenizer("Vuoi un po' di zucchero?")

View File

@ -1,25 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import random
from spacy.lang.en import English
def test_train_with_many_entity_types():
"""Test issue that arises when too many labels are added to NER model.
NB: currently causes segfault!
"""
train_data = []
train_data.extend([("One sentence", {"entities": []})])
entity_types = [str(i) for i in range(1000)]
nlp = English(pipeline=[])
ner = nlp.create_pipe("ner")
nlp.add_pipe(ner)
for entity_type in list(entity_types):
ner.add_label(entity_type)
optimizer = nlp.begin_training()
for i in range(20):
losses = {}
random.shuffle(train_data)
for statement, entities in train_data:
nlp.update([statement], [entities], sgd=optimizer, losses=losses, drop=0.5)