mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-11 09:00:36 +03:00
fix test_roundtrip_docs_to_docbin
This commit is contained in:
parent
0b3985d307
commit
bb87e8c2b1
|
@ -5,7 +5,7 @@ from spacy.gold import Corpus, docs_to_json
|
||||||
from spacy.gold.example import Example
|
from spacy.gold.example import Example
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
from spacy.syntax.nonproj import is_nonproj_tree
|
from spacy.syntax.nonproj import is_nonproj_tree
|
||||||
from spacy.tokens import Doc
|
from spacy.tokens import Doc, DocBin
|
||||||
from spacy.util import get_words_and_spaces, compounding, minibatch
|
from spacy.util import get_words_and_spaces, compounding, minibatch
|
||||||
import pytest
|
import pytest
|
||||||
import srsly
|
import srsly
|
||||||
|
@ -349,7 +349,7 @@ def test_iob_to_biluo():
|
||||||
iob_to_biluo(bad_iob)
|
iob_to_biluo(bad_iob)
|
||||||
|
|
||||||
|
|
||||||
def test_roundtrip_docs_to_json(doc):
|
def test_roundtrip_docs_to_docbin(doc):
|
||||||
nlp = English()
|
nlp = English()
|
||||||
text = doc.text
|
text = doc.text
|
||||||
idx = [t.idx for t in doc]
|
idx = [t.idx for t in doc]
|
||||||
|
@ -362,14 +362,16 @@ def test_roundtrip_docs_to_json(doc):
|
||||||
cats = doc.cats
|
cats = doc.cats
|
||||||
ents = [(e.start_char, e.end_char, e.label_) for e in doc.ents]
|
ents = [(e.start_char, e.end_char, e.label_) for e in doc.ents]
|
||||||
|
|
||||||
# roundtrip to JSON
|
# roundtrip to DocBin
|
||||||
with make_tempdir() as tmpdir:
|
with make_tempdir() as tmpdir:
|
||||||
json_file = tmpdir / "roundtrip.json"
|
output_file = tmpdir / "roundtrip.spacy"
|
||||||
srsly.write_json(json_file, [docs_to_json(doc)])
|
data = DocBin(docs=[doc]).to_bytes()
|
||||||
goldcorpus = Corpus(train=str(json_file), dev=str(json_file))
|
with output_file.open("wb") as file_:
|
||||||
|
file_.write(data)
|
||||||
|
goldcorpus = Corpus(train_loc=str(output_file), dev_loc=str(output_file))
|
||||||
|
|
||||||
reloaded_example = next(goldcorpus.dev_dataset(nlp=nlp))
|
reloaded_example = next(goldcorpus.dev_dataset(nlp=nlp))
|
||||||
assert len(doc) == goldcorpus.count_train()
|
assert len(doc) == goldcorpus.count_train(nlp)
|
||||||
assert text == reloaded_example.reference.text
|
assert text == reloaded_example.reference.text
|
||||||
assert idx == [t.idx for t in reloaded_example.reference]
|
assert idx == [t.idx for t in reloaded_example.reference]
|
||||||
assert tags == [t.tag_ for t in reloaded_example.reference]
|
assert tags == [t.tag_ for t in reloaded_example.reference]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user