Merge regression tests

2025-11-08 11:57:39 +03:00 · 2019-02-24 21:03:39 +01:00 · 2019-02-24 21:03:39 +01:00 · a48deb4081
commit a48deb4081
parent 8f6c193a4d
2 changed files with 21 additions and 25 deletions
--- a/spacy/tests/regression/test_issue2501-3000.py
+++ b/spacy/tests/regression/test_issue2501-3000.py
@ -13,6 +13,7 @@ from spacy.vocab import Vocab
 from spacy.compat import pickle
 from spacy._ml import link_vectors_to_models
 import numpy
+import random

 from ..util import get_doc

@ -138,6 +139,26 @@ def test_issue2782(text, lang_cls):
    assert doc[0].like_num


+def test_issue2800():
+    """Test issue that arises when too many labels are added to NER model.
+    Used to cause segfault.
+    """
+    train_data = []
+    train_data.extend([("One sentence", {"entities": []})])
+    entity_types = [str(i) for i in range(1000)]
+    nlp = English()
+    ner = nlp.create_pipe("ner")
+    nlp.add_pipe(ner)
+    for entity_type in list(entity_types):
+        ner.add_label(entity_type)
+    optimizer = nlp.begin_training()
+    for i in range(20):
+        losses = {}
+        random.shuffle(train_data)
+        for statement, entities in train_data:
+            nlp.update([statement], [entities], sgd=optimizer, losses=losses, drop=0.5)
+
+
 def test_issue2822(it_tokenizer):
    """Test that the abbreviation of poco is kept as one word."""
    doc = it_tokenizer("Vuoi un po' di zucchero?")
--- a/spacy/tests/regression/test_issue2800.py
+++ b/spacy/tests/regression/test_issue2800.py
@ -1,25 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import random
-from spacy.lang.en import English
-
-
-def test_train_with_many_entity_types():
-    """Test issue that arises when too many labels are added to NER model.
-    NB: currently causes segfault!
-    """
-    train_data = []
-    train_data.extend([("One sentence", {"entities": []})])
-    entity_types = [str(i) for i in range(1000)]
-    nlp = English(pipeline=[])
-    ner = nlp.create_pipe("ner")
-    nlp.add_pipe(ner)
-    for entity_type in list(entity_types):
-        ner.add_label(entity_type)
-    optimizer = nlp.begin_training()
-    for i in range(20):
-        losses = {}
-        random.shuffle(train_data)
-        for statement, entities in train_data:
-            nlp.update([statement], [entities], sgd=optimizer, losses=losses, drop=0.5)