mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Ensure path in Doc.to_disk/from_disk (resolves ##1521)
Also add Doc serialization tests with both Path and string path options
This commit is contained in:
parent
49fd5a646f
commit
1c218397f6
34
spacy/tests/serialize/test_serialize_doc.py
Normal file
34
spacy/tests/serialize/test_serialize_doc.py
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from ..util import make_tempdir, get_doc
|
||||||
|
from ...tokens import Doc
|
||||||
|
from ...compat import path2str
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def test_serialize_doc_roundtrip_bytes(en_vocab):
|
||||||
|
doc = get_doc(en_vocab, words=['hello', 'world'])
|
||||||
|
doc_b = doc.to_bytes()
|
||||||
|
new_doc = Doc(en_vocab).from_bytes(doc_b)
|
||||||
|
assert new_doc.to_bytes() == doc_b
|
||||||
|
|
||||||
|
|
||||||
|
def test_serialize_doc_roundtrip_disk(en_vocab):
|
||||||
|
doc = get_doc(en_vocab, words=['hello', 'world'])
|
||||||
|
with make_tempdir() as d:
|
||||||
|
file_path = d / 'doc'
|
||||||
|
doc.to_disk(file_path)
|
||||||
|
doc_d = Doc(en_vocab).from_disk(file_path)
|
||||||
|
assert doc.to_bytes() == doc_d.to_bytes()
|
||||||
|
|
||||||
|
|
||||||
|
def test_serialize_doc_roundtrip_disk_str_path(en_vocab):
|
||||||
|
doc = get_doc(en_vocab, words=['hello', 'world'])
|
||||||
|
with make_tempdir() as d:
|
||||||
|
file_path = d / 'doc'
|
||||||
|
file_path = path2str(file_path)
|
||||||
|
doc.to_disk(file_path)
|
||||||
|
doc_d = Doc(en_vocab).from_disk(file_path)
|
||||||
|
assert doc.to_bytes() == doc_d.to_bytes()
|
|
@ -737,6 +737,7 @@ cdef class Doc:
|
||||||
path (unicode or Path): A path to a directory, which will be created if
|
path (unicode or Path): A path to a directory, which will be created if
|
||||||
it doesn't exist. Paths may be either strings or Path-like objects.
|
it doesn't exist. Paths may be either strings or Path-like objects.
|
||||||
"""
|
"""
|
||||||
|
path = util.ensure_path(path)
|
||||||
with path.open('wb') as file_:
|
with path.open('wb') as file_:
|
||||||
file_.write(self.to_bytes(**exclude))
|
file_.write(self.to_bytes(**exclude))
|
||||||
|
|
||||||
|
@ -748,6 +749,7 @@ cdef class Doc:
|
||||||
strings or `Path`-like objects.
|
strings or `Path`-like objects.
|
||||||
RETURNS (Doc): The modified `Doc` object.
|
RETURNS (Doc): The modified `Doc` object.
|
||||||
"""
|
"""
|
||||||
|
path = util.ensure_path(path)
|
||||||
with path.open('rb') as file_:
|
with path.open('rb') as file_:
|
||||||
bytes_data = file_.read()
|
bytes_data = file_.read()
|
||||||
return self.from_bytes(bytes_data, **exclude)
|
return self.from_bytes(bytes_data, **exclude)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user