avoid writing to /tmp (not cross-platform compatible)

This commit is contained in:
Henning Peters 2015-12-16 19:53:06 +01:00
parent 976259010e
commit 4ada39f472
2 changed files with 18 additions and 11 deletions

View File

@ -4,6 +4,9 @@ from spacy.serialize.packer import Packer
from spacy.attrs import ORTH, SPACY from spacy.attrs import ORTH, SPACY
from spacy.tokens import Doc from spacy.tokens import Doc
import math import math
import tempfile
import shutil
import os
@pytest.mark.models @pytest.mark.models
@ -11,17 +14,21 @@ def test_read_write(EN):
doc1 = EN(u'This is a simple test. With a couple of sentences.') doc1 = EN(u'This is a simple test. With a couple of sentences.')
doc2 = EN(u'This is another test document.') doc2 = EN(u'This is another test document.')
with open('/tmp/spacy_docs.bin', 'wb') as file_: try:
file_.write(doc1.to_bytes()) tmp_dir = tempfile.mkdtemp()
file_.write(doc2.to_bytes()) with open(os.path.join(tmp_dir, 'spacy_docs.bin'), 'wb') as file_:
file_.write(doc1.to_bytes())
file_.write(doc2.to_bytes())
with open('/tmp/spacy_docs.bin', 'rb') as file_: with open(os.path.join(tmp_dir, 'spacy_docs.bin'), 'rb') as file_:
bytes1, bytes2 = Doc.read_bytes(file_) bytes1, bytes2 = Doc.read_bytes(file_)
r1 = Doc(EN.vocab).from_bytes(bytes1) r1 = Doc(EN.vocab).from_bytes(bytes1)
r2 = Doc(EN.vocab).from_bytes(bytes2) r2 = Doc(EN.vocab).from_bytes(bytes2)
assert r1.string == doc1.string assert r1.string == doc1.string
assert r2.string == doc2.string assert r2.string == doc2.string
finally:
shutil.rmtree(tmp_dir)
@pytest.mark.models @pytest.mark.models

View File

@ -154,9 +154,9 @@ def test_efficient_binary_serialization(doc):
from spacy.tokens.doc import Doc from spacy.tokens.doc import Doc
byte_string = doc.to_bytes() byte_string = doc.to_bytes()
open('/tmp/moby_dick.bin', 'wb').write(byte_string) open('moby_dick.bin', 'wb').write(byte_string)
nlp = spacy.en.English() nlp = spacy.en.English()
for byte_string in Doc.read_bytes(open('/tmp/moby_dick.bin', 'rb')): for byte_string in Doc.read_bytes(open('moby_dick.bin', 'rb')):
doc = Doc(nlp.vocab) doc = Doc(nlp.vocab)
doc.from_bytes(byte_string) doc.from_bytes(byte_string)