From e367864e59ed366adb8f1b416f91828c05eac3a0 Mon Sep 17 00:00:00 2001 From: adrianeboyd Date: Tue, 10 Sep 2019 11:14:46 +0200 Subject: [PATCH 1/2] Update Ukrainian create_lemmatizer kwargs (#4266) Allow Ukrainian create_lemmatizer to accept lookups kwarg. --- spacy/lang/uk/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/lang/uk/__init__.py b/spacy/lang/uk/__init__.py index d152c08a4..6a4ed546d 100644 --- a/spacy/lang/uk/__init__.py +++ b/spacy/lang/uk/__init__.py @@ -24,7 +24,7 @@ class UkrainianDefaults(Language.Defaults): stop_words = STOP_WORDS @classmethod - def create_lemmatizer(cls, nlp=None): + def create_lemmatizer(cls, nlp=None, **kwargs): return UkrainianLemmatizer() From 669a7d37ce898c0c29f0c6872171a3f604c92d76 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 10 Sep 2019 19:45:16 +0200 Subject: [PATCH 2/2] Exclude vocab when testing to_bytes --- .../serialize/test_serialize_pipeline.py | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/spacy/tests/serialize/test_serialize_pipeline.py b/spacy/tests/serialize/test_serialize_pipeline.py index a5a3f5069..efa7ef625 100644 --- a/spacy/tests/serialize/test_serialize_pipeline.py +++ b/spacy/tests/serialize/test_serialize_pipeline.py @@ -41,8 +41,8 @@ def test_serialize_parser_roundtrip_bytes(en_vocab, Parser): parser.model, _ = parser.Model(10) new_parser = Parser(en_vocab) new_parser.model, _ = new_parser.Model(10) - new_parser = new_parser.from_bytes(parser.to_bytes()) - assert new_parser.to_bytes() == parser.to_bytes() + new_parser = new_parser.from_bytes(parser.to_bytes(exclude=["vocab"])) + assert new_parser.to_bytes(exclude=["vocab"]) == parser.to_bytes(exclude=["vocab"]) @pytest.mark.parametrize("Parser", test_parsers) @@ -55,8 +55,8 @@ def test_serialize_parser_roundtrip_disk(en_vocab, Parser): parser_d = Parser(en_vocab) parser_d.model, _ = parser_d.Model(0) parser_d = parser_d.from_disk(file_path) - parser_bytes = parser.to_bytes(exclude=["model"]) - parser_d_bytes = parser_d.to_bytes(exclude=["model"]) + parser_bytes = parser.to_bytes(exclude=["model", "vocab"]) + parser_d_bytes = parser_d.to_bytes(exclude=["model", "vocab"]) assert parser_bytes == parser_d_bytes @@ -64,7 +64,7 @@ def test_to_from_bytes(parser, blank_parser): assert parser.model is not True assert blank_parser.model is True assert blank_parser.moves.n_moves != parser.moves.n_moves - bytes_data = parser.to_bytes() + bytes_data = parser.to_bytes(exclude=["vocab"]) blank_parser.from_bytes(bytes_data) assert blank_parser.model is not True assert blank_parser.moves.n_moves == parser.moves.n_moves @@ -94,15 +94,12 @@ def test_serialize_tagger_roundtrip_disk(en_vocab, taggers): assert tagger1_d.to_bytes() == tagger2_d.to_bytes() -# I can't get this to work with the lookup tables for 3.5 :(. Something to do -# with the dict ordering -@pytest.mark.xfail def test_serialize_tensorizer_roundtrip_bytes(en_vocab): tensorizer = Tensorizer(en_vocab) tensorizer.model = tensorizer.Model() - tensorizer_b = tensorizer.to_bytes() + tensorizer_b = tensorizer.to_bytes(exclude=["vocab"]) new_tensorizer = Tensorizer(en_vocab).from_bytes(tensorizer_b) - assert new_tensorizer.to_bytes() == tensorizer_b + assert new_tensorizer.to_bytes(exclude=["vocab"]) == tensorizer_b def test_serialize_tensorizer_roundtrip_disk(en_vocab): @@ -112,16 +109,15 @@ def test_serialize_tensorizer_roundtrip_disk(en_vocab): file_path = d / "tensorizer" tensorizer.to_disk(file_path) tensorizer_d = Tensorizer(en_vocab).from_disk(file_path) - assert tensorizer.to_bytes() == tensorizer_d.to_bytes() + assert tensorizer.to_bytes(exclude=["vocab"]) == tensorizer_d.to_bytes( + exclude=["vocab"] + ) -# I can't get this to work with the lookup tables for 3.5 :(. Something to do -# with the dict ordering -@pytest.mark.xfail def test_serialize_textcat_empty(en_vocab): # See issue #1105 textcat = TextCategorizer(en_vocab, labels=["ENTITY", "ACTION", "MODIFIER"]) - textcat.to_bytes() + textcat.to_bytes(exclude=["vocab"]) @pytest.mark.parametrize("Parser", test_parsers) @@ -134,13 +130,17 @@ def test_serialize_pipe_exclude(en_vocab, Parser): parser = Parser(en_vocab) parser.model, _ = parser.Model(0) parser.cfg["foo"] = "bar" - new_parser = get_new_parser().from_bytes(parser.to_bytes()) + new_parser = get_new_parser().from_bytes(parser.to_bytes(exclude=["vocab"])) assert "foo" in new_parser.cfg - new_parser = get_new_parser().from_bytes(parser.to_bytes(), exclude=["cfg"]) + new_parser = get_new_parser().from_bytes( + parser.to_bytes(exclude=["vocab"]), exclude=["cfg"] + ) assert "foo" not in new_parser.cfg - new_parser = get_new_parser().from_bytes(parser.to_bytes(exclude=["cfg"])) + new_parser = get_new_parser().from_bytes( + parser.to_bytes(exclude=["cfg"]), exclude=["vocab"] + ) assert "foo" not in new_parser.cfg with pytest.raises(ValueError): - parser.to_bytes(cfg=False) + parser.to_bytes(cfg=False, exclude=["vocab"]) with pytest.raises(ValueError): - get_new_parser().from_bytes(parser.to_bytes(), cfg=False) + get_new_parser().from_bytes(parser.to_bytes(exclude=["vocab"]), cfg=False)