From e28de12cbdeb932de0ec4fff901a3bc469bb90c3 Mon Sep 17 00:00:00 2001 From: Claudiu-Vlad Ursache Date: Tue, 13 Feb 2018 20:44:33 +0100 Subject: [PATCH] Ensure files opened in `from_disk` are closed Fixes [issue 1706](https://github.com/explosion/spaCy/issues/1706). --- spacy/language.py | 2 +- spacy/pipeline.pyx | 6 ++-- spacy/syntax/nn_parser.pyx | 2 +- .../serialize/test_serialize_language.py | 28 +++++++++++++++++++ spacy/vectors.pyx | 3 +- 5 files changed, 36 insertions(+), 5 deletions(-) create mode 100644 spacy/tests/serialize/test_serialize_language.py diff --git a/spacy/language.py b/spacy/language.py index a2b945c49..a61b6b09f 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -624,7 +624,7 @@ class Language(object): deserializers = OrderedDict(( ('vocab', lambda p: self.vocab.from_disk(p)), ('tokenizer', lambda p: self.tokenizer.from_disk(p, vocab=False)), - ('meta.json', lambda p: self.meta.update(ujson.load(p.open('r')))) + ('meta.json', lambda p: self.meta.update(util.read_json(p))) )) for name, proc in self.pipeline: if name in disable: diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx index c5f8065de..dae21941e 100644 --- a/spacy/pipeline.pyx +++ b/spacy/pipeline.pyx @@ -214,7 +214,8 @@ class Pipe(object): def _load_cfg(path): if path.exists(): - return ujson.load(path.open()) + with path.open() as file_: + return ujson.load(file_) else: return {} @@ -580,7 +581,8 @@ class Tagger(Pipe): def load_model(p): if self.model is True: self.model = self.Model(self.vocab.morphology.n_tags, **self.cfg) - self.model.from_bytes(p.open('rb').read()) + with p.open('rb') as file_: + self.model.from_bytes(file_.read()) def load_tag_map(p): with p.open('rb') as file_: diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index fa91c697e..f192e3b96 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -887,7 +887,7 @@ cdef class Parser: deserializers = { 'vocab': lambda p: self.vocab.from_disk(p), 'moves': lambda p: self.moves.from_disk(p, strings=False), - 'cfg': lambda p: self.cfg.update(ujson.load(p.open())), + 'cfg': lambda p: self.cfg.update(util.read_json(p)), 'model': lambda p: None } util.from_disk(path, deserializers, exclude) diff --git a/spacy/tests/serialize/test_serialize_language.py b/spacy/tests/serialize/test_serialize_language.py new file mode 100644 index 000000000..1fcf8ef18 --- /dev/null +++ b/spacy/tests/serialize/test_serialize_language.py @@ -0,0 +1,28 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from ..util import make_tempdir +from ...language import Language + +import pytest + + +@pytest.fixture +def meta_data(): + return { + 'name': 'name-in-fixture', + 'version': 'version-in-fixture', + 'description': 'description-in-fixture', + 'author': 'author-in-fixture', + 'email': 'email-in-fixture', + 'url': 'url-in-fixture', + 'license': 'license-in-fixture', + } + + +def test_serialize_language_meta_disk(meta_data): + language = Language(meta=meta_data) + with make_tempdir() as d: + language.to_disk(d) + new_language = Language().from_disk(d) + assert new_language.meta == language.meta diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx index 079f6fc84..7daebabe6 100644 --- a/spacy/vectors.pyx +++ b/spacy/vectors.pyx @@ -347,7 +347,8 @@ cdef class Vectors: """ def load_key2row(path): if path.exists(): - self.key2row = msgpack.load(path.open('rb')) + with path.open('rb') as file_: + self.key2row = msgpack.load(file_) for key, row in self.key2row.items(): if row in self._unset: self._unset.remove(row)