💫 Support simple training format in nlp.evaluate and add tests (#4033)

* Support simple training format in nlp.evaluate and add tests * Update docs [ci skip]
2025-10-30 23:47:31 +03:00 · 2019-07-27 17:30:18 +02:00 · 2019-07-27 17:30:18 +02:00 · fc69da0acb
commit fc69da0acb
parent a3723f439c
3 changed files with 67 additions and 8 deletions
--- a/spacy/language.py
+++ b/spacy/language.py
@ -618,7 +618,7 @@ class Language(object):
        if component_cfg is None:
            component_cfg = {}
        docs, golds = zip(*docs_golds)
-        docs = list(docs)
+        docs = [self.make_doc(doc) if isinstance(doc, basestring_) else doc for doc in docs]
        golds = list(golds)
        for name, pipe in self.pipeline:
            kwargs = component_cfg.get(name, {})
@ -628,6 +628,8 @@ class Language(object):
            else:
                docs = pipe.pipe(docs, **kwargs)
        for doc, gold in zip(docs, golds):
            if not isinstance(gold, GoldParse):
                gold = GoldParse(doc, **gold)
            if verbose:
                print(doc)
            kwargs = component_cfg.get("scorer", {})
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@ -0,0 +1,57 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import pytest
 from spacy.vocab import Vocab
 from spacy.language import Language
 from spacy.tokens import Doc
 from spacy.gold import GoldParse
@pytest.fixture
 def nlp():
    nlp = Language(Vocab())
    textcat = nlp.create_pipe("textcat")
    for label in ("POSITIVE", "NEGATIVE"):
        textcat.add_label(label)
    nlp.add_pipe(textcat)
    nlp.begin_training()
    return nlp
 def test_language_update(nlp):
    text = "hello world"
    annots = {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}
    doc = Doc(nlp.vocab, words=text.split(" "))
    gold = GoldParse(doc, **annots)
    # Update with doc and gold objects
    nlp.update([doc], [gold])
    # Update with text and dict
    nlp.update([text], [annots])
    # Update with doc object and dict
    nlp.update([doc], [annots])
    # Update with text and gold object
    nlp.update([text], [gold])
    # Update badly
    with pytest.raises(IndexError):
        nlp.update([doc], [])
    with pytest.raises(IndexError):
        nlp.update([], [gold])
 def test_language_evaluate(nlp):
    text = "hello world"
    annots = {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}
    doc = Doc(nlp.vocab, words=text.split(" "))
    gold = GoldParse(doc, **annots)
    # Evaluate with doc and gold objects
    nlp.evaluate([(doc, gold)])
    # Evaluate with text and dict
    nlp.evaluate([(text, annots)])
    # Evaluate with doc object and dict
    nlp.evaluate([(doc, annots)])
    # Evaluate with text and gold object
    nlp.evaluate([(text, gold)])
    # Evaluate badly
    with pytest.raises(Exception):
        nlp.evaluate([text, gold])
--- a/website/docs/api/language.md
+++ b/website/docs/api/language.md
@ -134,8 +134,8 @@ Evaluate a model's pipeline components.
 > ```
 | Name                                         | Type     | Description                                                                                                                                                   |
-| -------------------------------------------- | -------- | ------------------------------------------------------------------------------------- |
+| -------------------------------------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `docs_golds`                                 | iterable | Tuples of `Doc` and `GoldParse` objects.                                              |
+| `docs_golds`                                 | iterable | Tuples of `Doc` and `GoldParse` objects or `(text, annotations)` of raw text and a dict (see [simple training style](/usage/training#training-simple-style)). |
 | `verbose`                                    | bool     | Print debugging information.                                                                                                                                  |
 | `batch_size`                                 | int      | The batch size to use.                                                                                                                                        |
 | `scorer`                                     | `Scorer` | Optional [`Scorer`](/api/scorer) to use. If not passed in, a new one will be created.                                                                         |