mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Ensure path in Doc.to_disk/from_disk (resolves ##1521)
Also add Doc serialization tests with both Path and string path options
This commit is contained in:
parent
49fd5a646f
commit
1c218397f6
34
spacy/tests/serialize/test_serialize_doc.py
Normal file
34
spacy/tests/serialize/test_serialize_doc.py
Normal file
|
@ -0,0 +1,34 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ..util import make_tempdir, get_doc
|
||||
from ...tokens import Doc
|
||||
from ...compat import path2str
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_serialize_doc_roundtrip_bytes(en_vocab):
|
||||
doc = get_doc(en_vocab, words=['hello', 'world'])
|
||||
doc_b = doc.to_bytes()
|
||||
new_doc = Doc(en_vocab).from_bytes(doc_b)
|
||||
assert new_doc.to_bytes() == doc_b
|
||||
|
||||
|
||||
def test_serialize_doc_roundtrip_disk(en_vocab):
|
||||
doc = get_doc(en_vocab, words=['hello', 'world'])
|
||||
with make_tempdir() as d:
|
||||
file_path = d / 'doc'
|
||||
doc.to_disk(file_path)
|
||||
doc_d = Doc(en_vocab).from_disk(file_path)
|
||||
assert doc.to_bytes() == doc_d.to_bytes()
|
||||
|
||||
|
||||
def test_serialize_doc_roundtrip_disk_str_path(en_vocab):
|
||||
doc = get_doc(en_vocab, words=['hello', 'world'])
|
||||
with make_tempdir() as d:
|
||||
file_path = d / 'doc'
|
||||
file_path = path2str(file_path)
|
||||
doc.to_disk(file_path)
|
||||
doc_d = Doc(en_vocab).from_disk(file_path)
|
||||
assert doc.to_bytes() == doc_d.to_bytes()
|
|
@ -737,6 +737,7 @@ cdef class Doc:
|
|||
path (unicode or Path): A path to a directory, which will be created if
|
||||
it doesn't exist. Paths may be either strings or Path-like objects.
|
||||
"""
|
||||
path = util.ensure_path(path)
|
||||
with path.open('wb') as file_:
|
||||
file_.write(self.to_bytes(**exclude))
|
||||
|
||||
|
@ -748,6 +749,7 @@ cdef class Doc:
|
|||
strings or `Path`-like objects.
|
||||
RETURNS (Doc): The modified `Doc` object.
|
||||
"""
|
||||
path = util.ensure_path(path)
|
||||
with path.open('rb') as file_:
|
||||
bytes_data = file_.read()
|
||||
return self.from_bytes(bytes_data, **exclude)
|
||||
|
|
Loading…
Reference in New Issue
Block a user