Ensure path in Doc.to_disk/from_disk (resolves ##1521)

Also add Doc serialization tests with both Path and string path options
This commit is contained in:
ines 2017-11-09 02:29:03 +01:00
parent 49fd5a646f
commit 1c218397f6
2 changed files with 36 additions and 0 deletions

View File

@ -0,0 +1,34 @@
# coding: utf-8
from __future__ import unicode_literals
from ..util import make_tempdir, get_doc
from ...tokens import Doc
from ...compat import path2str
import pytest
def test_serialize_doc_roundtrip_bytes(en_vocab):
doc = get_doc(en_vocab, words=['hello', 'world'])
doc_b = doc.to_bytes()
new_doc = Doc(en_vocab).from_bytes(doc_b)
assert new_doc.to_bytes() == doc_b
def test_serialize_doc_roundtrip_disk(en_vocab):
doc = get_doc(en_vocab, words=['hello', 'world'])
with make_tempdir() as d:
file_path = d / 'doc'
doc.to_disk(file_path)
doc_d = Doc(en_vocab).from_disk(file_path)
assert doc.to_bytes() == doc_d.to_bytes()
def test_serialize_doc_roundtrip_disk_str_path(en_vocab):
doc = get_doc(en_vocab, words=['hello', 'world'])
with make_tempdir() as d:
file_path = d / 'doc'
file_path = path2str(file_path)
doc.to_disk(file_path)
doc_d = Doc(en_vocab).from_disk(file_path)
assert doc.to_bytes() == doc_d.to_bytes()

View File

@ -737,6 +737,7 @@ cdef class Doc:
path (unicode or Path): A path to a directory, which will be created if
it doesn't exist. Paths may be either strings or Path-like objects.
"""
path = util.ensure_path(path)
with path.open('wb') as file_:
file_.write(self.to_bytes(**exclude))
@ -748,6 +749,7 @@ cdef class Doc:
strings or `Path`-like objects.
RETURNS (Doc): The modified `Doc` object.
"""
path = util.ensure_path(path)
with path.open('rb') as file_:
bytes_data = file_.read()
return self.from_bytes(bytes_data, **exclude)