mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-14 11:36:24 +03:00
Merge pull request #210 from henningpeters/tmpdir
avoid writing to /tmp (not cross-platform compatible)
This commit is contained in:
commit
96638f68f1
|
@ -4,6 +4,9 @@ from spacy.serialize.packer import Packer
|
||||||
from spacy.attrs import ORTH, SPACY
|
from spacy.attrs import ORTH, SPACY
|
||||||
from spacy.tokens import Doc
|
from spacy.tokens import Doc
|
||||||
import math
|
import math
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.models
|
@pytest.mark.models
|
||||||
|
@ -11,17 +14,21 @@ def test_read_write(EN):
|
||||||
doc1 = EN(u'This is a simple test. With a couple of sentences.')
|
doc1 = EN(u'This is a simple test. With a couple of sentences.')
|
||||||
doc2 = EN(u'This is another test document.')
|
doc2 = EN(u'This is another test document.')
|
||||||
|
|
||||||
with open('/tmp/spacy_docs.bin', 'wb') as file_:
|
try:
|
||||||
|
tmp_dir = tempfile.mkdtemp()
|
||||||
|
with open(os.path.join(tmp_dir, 'spacy_docs.bin'), 'wb') as file_:
|
||||||
file_.write(doc1.to_bytes())
|
file_.write(doc1.to_bytes())
|
||||||
file_.write(doc2.to_bytes())
|
file_.write(doc2.to_bytes())
|
||||||
|
|
||||||
with open('/tmp/spacy_docs.bin', 'rb') as file_:
|
with open(os.path.join(tmp_dir, 'spacy_docs.bin'), 'rb') as file_:
|
||||||
bytes1, bytes2 = Doc.read_bytes(file_)
|
bytes1, bytes2 = Doc.read_bytes(file_)
|
||||||
r1 = Doc(EN.vocab).from_bytes(bytes1)
|
r1 = Doc(EN.vocab).from_bytes(bytes1)
|
||||||
r2 = Doc(EN.vocab).from_bytes(bytes2)
|
r2 = Doc(EN.vocab).from_bytes(bytes2)
|
||||||
|
|
||||||
assert r1.string == doc1.string
|
assert r1.string == doc1.string
|
||||||
assert r2.string == doc2.string
|
assert r2.string == doc2.string
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(tmp_dir)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.models
|
@pytest.mark.models
|
||||||
|
|
|
@ -75,7 +75,7 @@ def test_count_by(nlp):
|
||||||
@pytest.mark.models
|
@pytest.mark.models
|
||||||
def test_read_bytes(nlp):
|
def test_read_bytes(nlp):
|
||||||
from spacy.tokens.doc import Doc
|
from spacy.tokens.doc import Doc
|
||||||
loc = '/tmp/test_serialize.bin'
|
loc = 'test_serialize.bin'
|
||||||
with open(loc, 'wb') as file_:
|
with open(loc, 'wb') as file_:
|
||||||
file_.write(nlp(u'This is a document.').to_bytes())
|
file_.write(nlp(u'This is a document.').to_bytes())
|
||||||
file_.write(nlp(u'This is another.').to_bytes())
|
file_.write(nlp(u'This is another.').to_bytes())
|
||||||
|
|
|
@ -154,9 +154,9 @@ def test_efficient_binary_serialization(doc):
|
||||||
from spacy.tokens.doc import Doc
|
from spacy.tokens.doc import Doc
|
||||||
|
|
||||||
byte_string = doc.to_bytes()
|
byte_string = doc.to_bytes()
|
||||||
open('/tmp/moby_dick.bin', 'wb').write(byte_string)
|
open('moby_dick.bin', 'wb').write(byte_string)
|
||||||
|
|
||||||
nlp = spacy.en.English()
|
nlp = spacy.en.English()
|
||||||
for byte_string in Doc.read_bytes(open('/tmp/moby_dick.bin', 'rb')):
|
for byte_string in Doc.read_bytes(open('moby_dick.bin', 'rb')):
|
||||||
doc = Doc(nlp.vocab)
|
doc = Doc(nlp.vocab)
|
||||||
doc.from_bytes(byte_string)
|
doc.from_bytes(byte_string)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user