mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
9d3ce7cba2
* unit test for previously resolved unflatten issue * prevent batch of empty docs to cause problems
24 lines
687 B
Python
24 lines
687 B
Python
# coding: utf8
|
|
from __future__ import unicode_literals
|
|
|
|
from spacy.lang.en import English
|
|
from spacy.util import minibatch, compounding
|
|
|
|
|
|
def test_issue4348():
|
|
"""Test that training the tagger with empty data, doesn't throw errors"""
|
|
|
|
TRAIN_DATA = [("", {"tags": []}), ("", {"tags": []})]
|
|
|
|
nlp = English()
|
|
tagger = nlp.create_pipe("tagger")
|
|
nlp.add_pipe(tagger)
|
|
|
|
optimizer = nlp.begin_training()
|
|
for i in range(5):
|
|
losses = {}
|
|
batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
|
|
for batch in batches:
|
|
texts, annotations = zip(*batch)
|
|
nlp.update(texts, annotations, sgd=optimizer, losses=losses)
|