Update after removing GoldCorpus

2026-03-03 19:31:35 +03:00 · 2020-06-20 22:21:24 +02:00 · 2020-06-20 22:21:24 +02:00 · 4bbc277758
commit 4bbc277758
parent 64d00520e2
4 changed files with 10 additions and 10 deletions
--- a/spacy/about.py
+++ b/spacy/about.py
@ -1,6 +1,6 @@
 # fmt: off
 __title__ = "spacy"
-__version__ = "3.0.0.dev9"
+__version__ = "3.0.0"
 __release__ = True
 __download_url__ = "https://github.com/explosion/spacy-models/releases/download"
 __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
--- a/spacy/gold/init.py
+++ b/spacy/gold/init.py
@ -1,4 +1,4 @@
-from .corpus import GoldCorpus
+from .corpus_docbin import Corpus
 from .example import Example
 from .align import align

--- a/spacy/tests/regression/test_issue4402.py
+++ b/spacy/tests/regression/test_issue4402.py
@ -1,5 +1,5 @@
 import srsly
-from spacy.gold import GoldCorpus
+from spacy.gold import Corpus
 from spacy.lang.en import English

 from ..util import make_tempdir
@ -11,7 +11,7 @@ def test_issue4402():
        json_path = tmpdir / "test4402.json"
        srsly.write_json(json_path, json_data)

-        corpus = GoldCorpus(str(json_path), str(json_path))
+        corpus = Corpus(str(json_path), str(json_path))

        train_data = list(corpus.train_dataset(nlp, gold_preproc=True, max_length=0))
        # assert that the data got split into 4 sentences
--- a/spacy/tests/test_gold.py
+++ b/spacy/tests/test_gold.py
@ -1,7 +1,7 @@
 from spacy.errors import AlignmentError
 from spacy.gold import biluo_tags_from_offsets, offsets_from_biluo_tags
 from spacy.gold import spans_from_biluo_tags, iob_to_biluo, align
-from spacy.gold import GoldCorpus, docs_to_json
+from spacy.gold import Corpus, docs_to_json
 from spacy.gold.example import Example
 from spacy.lang.en import English
 from spacy.syntax.nonproj import is_nonproj_tree
@ -299,7 +299,7 @@ def test_roundtrip_docs_to_json(doc):
    with make_tempdir() as tmpdir:
        json_file = tmpdir / "roundtrip.json"
        srsly.write_json(json_file, [docs_to_json(doc)])
-        goldcorpus = GoldCorpus(train=str(json_file), dev=str(json_file))
+        goldcorpus = Corpus(train=str(json_file), dev=str(json_file))

        reloaded_example = next(goldcorpus.dev_dataset(nlp=nlp))
        assert len(doc) == goldcorpus.count_train()
@ -328,7 +328,7 @@ def test_projective_train_vs_nonprojective_dev(doc):
        json_file = tmpdir / "test.json"
        # write to JSON train dicts
        srsly.write_json(json_file, [docs_to_json(doc)])
-        goldcorpus = GoldCorpus(str(json_file), str(json_file))
+        goldcorpus = Corpus(str(json_file), str(json_file))

        train_reloaded_example = next(goldcorpus.train_dataset(nlp))
        train_goldparse = get_parses_from_example(train_reloaded_example)[0][1]
@ -360,7 +360,7 @@ def test_ignore_misaligned(doc):
        data[0]["paragraphs"][0]["raw"] = text.replace("Sarah", "Jane")
        # write to JSON train dicts
        srsly.write_json(json_file, data)
-        goldcorpus = GoldCorpus(str(json_file), str(json_file))
+        goldcorpus = Corpus(str(json_file), str(json_file))

        with pytest.raises(AlignmentError):
            train_reloaded_example = next(goldcorpus.train_dataset(nlp))
@ -371,7 +371,7 @@ def test_ignore_misaligned(doc):
        data[0]["paragraphs"][0]["raw"] = text.replace("Sarah", "Jane")
        # write to JSON train dicts
        srsly.write_json(json_file, data)
-        goldcorpus = GoldCorpus(str(json_file), str(json_file))
+        goldcorpus = Corpus(str(json_file), str(json_file))

        # doesn't raise an AlignmentError, but there is nothing to iterate over
        # because the only example can't be aligned
@ -385,7 +385,7 @@ def test_make_orth_variants(doc):
        json_file = tmpdir / "test.json"
        # write to JSON train dicts
        srsly.write_json(json_file, [docs_to_json(doc)])
-        goldcorpus = GoldCorpus(str(json_file), str(json_file))
+        goldcorpus = Corpus(str(json_file), str(json_file))

        # due to randomness, test only that this runs with no errors for now
        train_example = next(goldcorpus.train_dataset(nlp))