Update after removing GoldCorpus

This commit is contained in:
Matthew Honnibal 2020-06-20 22:21:24 +02:00
parent 64d00520e2
commit 4bbc277758
4 changed files with 10 additions and 10 deletions

View File

@ -1,6 +1,6 @@
# fmt: off # fmt: off
__title__ = "spacy" __title__ = "spacy"
__version__ = "3.0.0.dev9" __version__ = "3.0.0"
__release__ = True __release__ = True
__download_url__ = "https://github.com/explosion/spacy-models/releases/download" __download_url__ = "https://github.com/explosion/spacy-models/releases/download"
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json" __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"

View File

@ -1,4 +1,4 @@
from .corpus import GoldCorpus from .corpus_docbin import Corpus
from .example import Example from .example import Example
from .align import align from .align import align

View File

@ -1,5 +1,5 @@
import srsly import srsly
from spacy.gold import GoldCorpus from spacy.gold import Corpus
from spacy.lang.en import English from spacy.lang.en import English
from ..util import make_tempdir from ..util import make_tempdir
@ -11,7 +11,7 @@ def test_issue4402():
json_path = tmpdir / "test4402.json" json_path = tmpdir / "test4402.json"
srsly.write_json(json_path, json_data) srsly.write_json(json_path, json_data)
corpus = GoldCorpus(str(json_path), str(json_path)) corpus = Corpus(str(json_path), str(json_path))
train_data = list(corpus.train_dataset(nlp, gold_preproc=True, max_length=0)) train_data = list(corpus.train_dataset(nlp, gold_preproc=True, max_length=0))
# assert that the data got split into 4 sentences # assert that the data got split into 4 sentences

View File

@ -1,7 +1,7 @@
from spacy.errors import AlignmentError from spacy.errors import AlignmentError
from spacy.gold import biluo_tags_from_offsets, offsets_from_biluo_tags from spacy.gold import biluo_tags_from_offsets, offsets_from_biluo_tags
from spacy.gold import spans_from_biluo_tags, iob_to_biluo, align from spacy.gold import spans_from_biluo_tags, iob_to_biluo, align
from spacy.gold import GoldCorpus, docs_to_json from spacy.gold import Corpus, docs_to_json
from spacy.gold.example import Example from spacy.gold.example import Example
from spacy.lang.en import English from spacy.lang.en import English
from spacy.syntax.nonproj import is_nonproj_tree from spacy.syntax.nonproj import is_nonproj_tree
@ -299,7 +299,7 @@ def test_roundtrip_docs_to_json(doc):
with make_tempdir() as tmpdir: with make_tempdir() as tmpdir:
json_file = tmpdir / "roundtrip.json" json_file = tmpdir / "roundtrip.json"
srsly.write_json(json_file, [docs_to_json(doc)]) srsly.write_json(json_file, [docs_to_json(doc)])
goldcorpus = GoldCorpus(train=str(json_file), dev=str(json_file)) goldcorpus = Corpus(train=str(json_file), dev=str(json_file))
reloaded_example = next(goldcorpus.dev_dataset(nlp=nlp)) reloaded_example = next(goldcorpus.dev_dataset(nlp=nlp))
assert len(doc) == goldcorpus.count_train() assert len(doc) == goldcorpus.count_train()
@ -328,7 +328,7 @@ def test_projective_train_vs_nonprojective_dev(doc):
json_file = tmpdir / "test.json" json_file = tmpdir / "test.json"
# write to JSON train dicts # write to JSON train dicts
srsly.write_json(json_file, [docs_to_json(doc)]) srsly.write_json(json_file, [docs_to_json(doc)])
goldcorpus = GoldCorpus(str(json_file), str(json_file)) goldcorpus = Corpus(str(json_file), str(json_file))
train_reloaded_example = next(goldcorpus.train_dataset(nlp)) train_reloaded_example = next(goldcorpus.train_dataset(nlp))
train_goldparse = get_parses_from_example(train_reloaded_example)[0][1] train_goldparse = get_parses_from_example(train_reloaded_example)[0][1]
@ -360,7 +360,7 @@ def test_ignore_misaligned(doc):
data[0]["paragraphs"][0]["raw"] = text.replace("Sarah", "Jane") data[0]["paragraphs"][0]["raw"] = text.replace("Sarah", "Jane")
# write to JSON train dicts # write to JSON train dicts
srsly.write_json(json_file, data) srsly.write_json(json_file, data)
goldcorpus = GoldCorpus(str(json_file), str(json_file)) goldcorpus = Corpus(str(json_file), str(json_file))
with pytest.raises(AlignmentError): with pytest.raises(AlignmentError):
train_reloaded_example = next(goldcorpus.train_dataset(nlp)) train_reloaded_example = next(goldcorpus.train_dataset(nlp))
@ -371,7 +371,7 @@ def test_ignore_misaligned(doc):
data[0]["paragraphs"][0]["raw"] = text.replace("Sarah", "Jane") data[0]["paragraphs"][0]["raw"] = text.replace("Sarah", "Jane")
# write to JSON train dicts # write to JSON train dicts
srsly.write_json(json_file, data) srsly.write_json(json_file, data)
goldcorpus = GoldCorpus(str(json_file), str(json_file)) goldcorpus = Corpus(str(json_file), str(json_file))
# doesn't raise an AlignmentError, but there is nothing to iterate over # doesn't raise an AlignmentError, but there is nothing to iterate over
# because the only example can't be aligned # because the only example can't be aligned
@ -385,7 +385,7 @@ def test_make_orth_variants(doc):
json_file = tmpdir / "test.json" json_file = tmpdir / "test.json"
# write to JSON train dicts # write to JSON train dicts
srsly.write_json(json_file, [docs_to_json(doc)]) srsly.write_json(json_file, [docs_to_json(doc)])
goldcorpus = GoldCorpus(str(json_file), str(json_file)) goldcorpus = Corpus(str(json_file), str(json_file))
# due to randomness, test only that this runs with no errors for now # due to randomness, test only that this runs with no errors for now
train_example = next(goldcorpus.train_dataset(nlp)) train_example = next(goldcorpus.train_dataset(nlp))