mirror of
https://github.com/explosion/spaCy.git
synced 2025-05-03 15:23:41 +03:00
💫 Support simple training format in nlp.evaluate and add tests (#4033)
* Support simple training format in nlp.evaluate and add tests * Update docs [ci skip]
This commit is contained in:
parent
a3723f439c
commit
fc69da0acb
|
@ -618,7 +618,7 @@ class Language(object):
|
||||||
if component_cfg is None:
|
if component_cfg is None:
|
||||||
component_cfg = {}
|
component_cfg = {}
|
||||||
docs, golds = zip(*docs_golds)
|
docs, golds = zip(*docs_golds)
|
||||||
docs = list(docs)
|
docs = [self.make_doc(doc) if isinstance(doc, basestring_) else doc for doc in docs]
|
||||||
golds = list(golds)
|
golds = list(golds)
|
||||||
for name, pipe in self.pipeline:
|
for name, pipe in self.pipeline:
|
||||||
kwargs = component_cfg.get(name, {})
|
kwargs = component_cfg.get(name, {})
|
||||||
|
@ -628,6 +628,8 @@ class Language(object):
|
||||||
else:
|
else:
|
||||||
docs = pipe.pipe(docs, **kwargs)
|
docs = pipe.pipe(docs, **kwargs)
|
||||||
for doc, gold in zip(docs, golds):
|
for doc, gold in zip(docs, golds):
|
||||||
|
if not isinstance(gold, GoldParse):
|
||||||
|
gold = GoldParse(doc, **gold)
|
||||||
if verbose:
|
if verbose:
|
||||||
print(doc)
|
print(doc)
|
||||||
kwargs = component_cfg.get("scorer", {})
|
kwargs = component_cfg.get("scorer", {})
|
||||||
|
|
57
spacy/tests/test_language.py
Normal file
57
spacy/tests/test_language.py
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from spacy.vocab import Vocab
|
||||||
|
from spacy.language import Language
|
||||||
|
from spacy.tokens import Doc
|
||||||
|
from spacy.gold import GoldParse
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def nlp():
|
||||||
|
nlp = Language(Vocab())
|
||||||
|
textcat = nlp.create_pipe("textcat")
|
||||||
|
for label in ("POSITIVE", "NEGATIVE"):
|
||||||
|
textcat.add_label(label)
|
||||||
|
nlp.add_pipe(textcat)
|
||||||
|
nlp.begin_training()
|
||||||
|
return nlp
|
||||||
|
|
||||||
|
|
||||||
|
def test_language_update(nlp):
|
||||||
|
text = "hello world"
|
||||||
|
annots = {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}
|
||||||
|
doc = Doc(nlp.vocab, words=text.split(" "))
|
||||||
|
gold = GoldParse(doc, **annots)
|
||||||
|
# Update with doc and gold objects
|
||||||
|
nlp.update([doc], [gold])
|
||||||
|
# Update with text and dict
|
||||||
|
nlp.update([text], [annots])
|
||||||
|
# Update with doc object and dict
|
||||||
|
nlp.update([doc], [annots])
|
||||||
|
# Update with text and gold object
|
||||||
|
nlp.update([text], [gold])
|
||||||
|
# Update badly
|
||||||
|
with pytest.raises(IndexError):
|
||||||
|
nlp.update([doc], [])
|
||||||
|
with pytest.raises(IndexError):
|
||||||
|
nlp.update([], [gold])
|
||||||
|
|
||||||
|
|
||||||
|
def test_language_evaluate(nlp):
|
||||||
|
text = "hello world"
|
||||||
|
annots = {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}
|
||||||
|
doc = Doc(nlp.vocab, words=text.split(" "))
|
||||||
|
gold = GoldParse(doc, **annots)
|
||||||
|
# Evaluate with doc and gold objects
|
||||||
|
nlp.evaluate([(doc, gold)])
|
||||||
|
# Evaluate with text and dict
|
||||||
|
nlp.evaluate([(text, annots)])
|
||||||
|
# Evaluate with doc object and dict
|
||||||
|
nlp.evaluate([(doc, annots)])
|
||||||
|
# Evaluate with text and gold object
|
||||||
|
nlp.evaluate([(text, gold)])
|
||||||
|
# Evaluate badly
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
nlp.evaluate([text, gold])
|
|
@ -134,8 +134,8 @@ Evaluate a model's pipeline components.
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Type | Description |
|
| Name | Type | Description |
|
||||||
| -------------------------------------------- | -------- | ------------------------------------------------------------------------------------- |
|
| -------------------------------------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `docs_golds` | iterable | Tuples of `Doc` and `GoldParse` objects. |
|
| `docs_golds` | iterable | Tuples of `Doc` and `GoldParse` objects or `(text, annotations)` of raw text and a dict (see [simple training style](/usage/training#training-simple-style)). |
|
||||||
| `verbose` | bool | Print debugging information. |
|
| `verbose` | bool | Print debugging information. |
|
||||||
| `batch_size` | int | The batch size to use. |
|
| `batch_size` | int | The batch size to use. |
|
||||||
| `scorer` | `Scorer` | Optional [`Scorer`](/api/scorer) to use. If not passed in, a new one will be created. |
|
| `scorer` | `Scorer` | Optional [`Scorer`](/api/scorer) to use. If not passed in, a new one will be created. |
|
||||||
|
|
Loading…
Reference in New Issue
Block a user