mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-25 00:34:20 +03:00
Merge regression tests
This commit is contained in:
parent
8f6c193a4d
commit
a48deb4081
|
@ -13,6 +13,7 @@ from spacy.vocab import Vocab
|
|||
from spacy.compat import pickle
|
||||
from spacy._ml import link_vectors_to_models
|
||||
import numpy
|
||||
import random
|
||||
|
||||
from ..util import get_doc
|
||||
|
||||
|
@ -138,6 +139,26 @@ def test_issue2782(text, lang_cls):
|
|||
assert doc[0].like_num
|
||||
|
||||
|
||||
def test_issue2800():
|
||||
"""Test issue that arises when too many labels are added to NER model.
|
||||
Used to cause segfault.
|
||||
"""
|
||||
train_data = []
|
||||
train_data.extend([("One sentence", {"entities": []})])
|
||||
entity_types = [str(i) for i in range(1000)]
|
||||
nlp = English()
|
||||
ner = nlp.create_pipe("ner")
|
||||
nlp.add_pipe(ner)
|
||||
for entity_type in list(entity_types):
|
||||
ner.add_label(entity_type)
|
||||
optimizer = nlp.begin_training()
|
||||
for i in range(20):
|
||||
losses = {}
|
||||
random.shuffle(train_data)
|
||||
for statement, entities in train_data:
|
||||
nlp.update([statement], [entities], sgd=optimizer, losses=losses, drop=0.5)
|
||||
|
||||
|
||||
def test_issue2822(it_tokenizer):
|
||||
"""Test that the abbreviation of poco is kept as one word."""
|
||||
doc = it_tokenizer("Vuoi un po' di zucchero?")
|
||||
|
|
|
@ -1,25 +0,0 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
from spacy.lang.en import English
|
||||
|
||||
|
||||
def test_train_with_many_entity_types():
|
||||
"""Test issue that arises when too many labels are added to NER model.
|
||||
NB: currently causes segfault!
|
||||
"""
|
||||
train_data = []
|
||||
train_data.extend([("One sentence", {"entities": []})])
|
||||
entity_types = [str(i) for i in range(1000)]
|
||||
nlp = English(pipeline=[])
|
||||
ner = nlp.create_pipe("ner")
|
||||
nlp.add_pipe(ner)
|
||||
for entity_type in list(entity_types):
|
||||
ner.add_label(entity_type)
|
||||
optimizer = nlp.begin_training()
|
||||
for i in range(20):
|
||||
losses = {}
|
||||
random.shuffle(train_data)
|
||||
for statement, entities in train_data:
|
||||
nlp.update([statement], [entities], sgd=optimizer, losses=losses, drop=0.5)
|
Loading…
Reference in New Issue
Block a user