Merge pull request #210 from henningpeters/tmpdir

avoid writing to /tmp (not cross-platform compatible)
This commit is contained in:
Matthew Honnibal 2015-12-17 11:34:44 +01:00
commit 96638f68f1
3 changed files with 19 additions and 12 deletions

View File

@ -4,6 +4,9 @@ from spacy.serialize.packer import Packer
from spacy.attrs import ORTH, SPACY from spacy.attrs import ORTH, SPACY
from spacy.tokens import Doc from spacy.tokens import Doc
import math import math
import tempfile
import shutil
import os
@pytest.mark.models @pytest.mark.models
@ -11,17 +14,21 @@ def test_read_write(EN):
doc1 = EN(u'This is a simple test. With a couple of sentences.') doc1 = EN(u'This is a simple test. With a couple of sentences.')
doc2 = EN(u'This is another test document.') doc2 = EN(u'This is another test document.')
with open('/tmp/spacy_docs.bin', 'wb') as file_: try:
file_.write(doc1.to_bytes()) tmp_dir = tempfile.mkdtemp()
file_.write(doc2.to_bytes()) with open(os.path.join(tmp_dir, 'spacy_docs.bin'), 'wb') as file_:
file_.write(doc1.to_bytes())
file_.write(doc2.to_bytes())
with open('/tmp/spacy_docs.bin', 'rb') as file_: with open(os.path.join(tmp_dir, 'spacy_docs.bin'), 'rb') as file_:
bytes1, bytes2 = Doc.read_bytes(file_) bytes1, bytes2 = Doc.read_bytes(file_)
r1 = Doc(EN.vocab).from_bytes(bytes1) r1 = Doc(EN.vocab).from_bytes(bytes1)
r2 = Doc(EN.vocab).from_bytes(bytes2) r2 = Doc(EN.vocab).from_bytes(bytes2)
assert r1.string == doc1.string assert r1.string == doc1.string
assert r2.string == doc2.string assert r2.string == doc2.string
finally:
shutil.rmtree(tmp_dir)
@pytest.mark.models @pytest.mark.models

View File

@ -75,7 +75,7 @@ def test_count_by(nlp):
@pytest.mark.models @pytest.mark.models
def test_read_bytes(nlp): def test_read_bytes(nlp):
from spacy.tokens.doc import Doc from spacy.tokens.doc import Doc
loc = '/tmp/test_serialize.bin' loc = 'test_serialize.bin'
with open(loc, 'wb') as file_: with open(loc, 'wb') as file_:
file_.write(nlp(u'This is a document.').to_bytes()) file_.write(nlp(u'This is a document.').to_bytes())
file_.write(nlp(u'This is another.').to_bytes()) file_.write(nlp(u'This is another.').to_bytes())

View File

@ -154,9 +154,9 @@ def test_efficient_binary_serialization(doc):
from spacy.tokens.doc import Doc from spacy.tokens.doc import Doc
byte_string = doc.to_bytes() byte_string = doc.to_bytes()
open('/tmp/moby_dick.bin', 'wb').write(byte_string) open('moby_dick.bin', 'wb').write(byte_string)
nlp = spacy.en.English() nlp = spacy.en.English()
for byte_string in Doc.read_bytes(open('/tmp/moby_dick.bin', 'rb')): for byte_string in Doc.read_bytes(open('moby_dick.bin', 'rb')):
doc = Doc(nlp.vocab) doc = Doc(nlp.vocab)
doc.from_bytes(byte_string) doc.from_bytes(byte_string)