💫 Support simple training format in nlp.evaluate and add tests (#4033)

* Support simple training format in nlp.evaluate and add tests

* Update docs [ci skip]
This commit is contained in:
Ines Montani 2019-07-27 17:30:18 +02:00 committed by GitHub
parent a3723f439c
commit fc69da0acb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 67 additions and 8 deletions

View File

@ -618,7 +618,7 @@ class Language(object):
if component_cfg is None:
component_cfg = {}
docs, golds = zip(*docs_golds)
docs = list(docs)
docs = [self.make_doc(doc) if isinstance(doc, basestring_) else doc for doc in docs]
golds = list(golds)
for name, pipe in self.pipeline:
kwargs = component_cfg.get(name, {})
@ -628,6 +628,8 @@ class Language(object):
else:
docs = pipe.pipe(docs, **kwargs)
for doc, gold in zip(docs, golds):
if not isinstance(gold, GoldParse):
gold = GoldParse(doc, **gold)
if verbose:
print(doc)
kwargs = component_cfg.get("scorer", {})

View File

@ -0,0 +1,57 @@
# coding: utf-8
from __future__ import unicode_literals
import pytest
from spacy.vocab import Vocab
from spacy.language import Language
from spacy.tokens import Doc
from spacy.gold import GoldParse
@pytest.fixture
def nlp():
nlp = Language(Vocab())
textcat = nlp.create_pipe("textcat")
for label in ("POSITIVE", "NEGATIVE"):
textcat.add_label(label)
nlp.add_pipe(textcat)
nlp.begin_training()
return nlp
def test_language_update(nlp):
text = "hello world"
annots = {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}
doc = Doc(nlp.vocab, words=text.split(" "))
gold = GoldParse(doc, **annots)
# Update with doc and gold objects
nlp.update([doc], [gold])
# Update with text and dict
nlp.update([text], [annots])
# Update with doc object and dict
nlp.update([doc], [annots])
# Update with text and gold object
nlp.update([text], [gold])
# Update badly
with pytest.raises(IndexError):
nlp.update([doc], [])
with pytest.raises(IndexError):
nlp.update([], [gold])
def test_language_evaluate(nlp):
text = "hello world"
annots = {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}
doc = Doc(nlp.vocab, words=text.split(" "))
gold = GoldParse(doc, **annots)
# Evaluate with doc and gold objects
nlp.evaluate([(doc, gold)])
# Evaluate with text and dict
nlp.evaluate([(text, annots)])
# Evaluate with doc object and dict
nlp.evaluate([(doc, annots)])
# Evaluate with text and gold object
nlp.evaluate([(text, gold)])
# Evaluate badly
with pytest.raises(Exception):
nlp.evaluate([text, gold])

View File

@ -133,13 +133,13 @@ Evaluate a model's pipeline components.
> print(scorer.scores)
> ```
| Name | Type | Description |
| -------------------------------------------- | -------- | ------------------------------------------------------------------------------------- |
| `docs_golds` | iterable | Tuples of `Doc` and `GoldParse` objects. |
| `verbose` | bool | Print debugging information. |
| `batch_size` | int | The batch size to use. |
| `scorer` | `Scorer` | Optional [`Scorer`](/api/scorer) to use. If not passed in, a new one will be created. |
| `component_cfg` <Tag variant="new">2.1</Tag> | dict | Config parameters for specific pipeline components, keyed by component name. |
| Name | Type | Description |
| -------------------------------------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `docs_golds` | iterable | Tuples of `Doc` and `GoldParse` objects or `(text, annotations)` of raw text and a dict (see [simple training style](/usage/training#training-simple-style)). |
| `verbose` | bool | Print debugging information. |
| `batch_size` | int | The batch size to use. |
| `scorer` | `Scorer` | Optional [`Scorer`](/api/scorer) to use. If not passed in, a new one will be created. |
| `component_cfg` <Tag variant="new">2.1</Tag> | dict | Config parameters for specific pipeline components, keyed by component name. |
## Language.begin_training {#begin_training tag="method"}