mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Add logger warning when serializing user hooks (#6595)
Add a warning that user hooks are lost on serialization. Add a `user_hooks` exclude to skip the warning with pickle.
This commit is contained in:
parent
cabd4ae5b1
commit
5ca57d8221
|
@ -123,6 +123,8 @@ class Warnings:
|
||||||
"token '{text}'. Check that your pipeline includes components that "
|
"token '{text}'. Check that your pipeline includes components that "
|
||||||
"assign token.pos, typically 'tagger'+'attribute_ruler' or "
|
"assign token.pos, typically 'tagger'+'attribute_ruler' or "
|
||||||
"'morphologizer'.")
|
"'morphologizer'.")
|
||||||
|
W109 = ("Unable to save user hooks while serializing the doc. Re-add any "
|
||||||
|
"required user hooks to the doc after processing.")
|
||||||
|
|
||||||
|
|
||||||
@add_codes
|
@add_codes
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
import pytest
|
import pytest
|
||||||
import numpy
|
import numpy
|
||||||
|
import logging
|
||||||
|
import mock
|
||||||
from spacy.tokens import Doc, Span
|
from spacy.tokens import Doc, Span
|
||||||
from spacy.vocab import Vocab
|
from spacy.vocab import Vocab
|
||||||
from spacy.lexeme import Lexeme
|
from spacy.lexeme import Lexeme
|
||||||
|
@ -147,6 +149,17 @@ def test_doc_api_serialize(en_tokenizer, text):
|
||||||
assert [t.text for t in tokens] == [t.text for t in new_tokens]
|
assert [t.text for t in tokens] == [t.text for t in new_tokens]
|
||||||
assert [t.orth for t in tokens] == [t.orth for t in new_tokens]
|
assert [t.orth for t in tokens] == [t.orth for t in new_tokens]
|
||||||
|
|
||||||
|
def inner_func(d1, d2):
|
||||||
|
return "hello!"
|
||||||
|
|
||||||
|
logger = logging.getLogger("spacy")
|
||||||
|
with mock.patch.object(logger, "warning") as mock_warning:
|
||||||
|
_ = tokens.to_bytes()
|
||||||
|
mock_warning.assert_not_called()
|
||||||
|
tokens.user_hooks["similarity"] = inner_func
|
||||||
|
_ = tokens.to_bytes()
|
||||||
|
mock_warning.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
def test_doc_api_set_ents(en_tokenizer):
|
def test_doc_api_set_ents(en_tokenizer):
|
||||||
text = "I use goggle chrone to surf the web"
|
text = "I use goggle chrone to surf the web"
|
||||||
|
|
|
@ -1273,6 +1273,8 @@ cdef class Doc:
|
||||||
serializers["user_data_keys"] = lambda: srsly.msgpack_dumps(user_data_keys)
|
serializers["user_data_keys"] = lambda: srsly.msgpack_dumps(user_data_keys)
|
||||||
if "user_data_values" not in exclude:
|
if "user_data_values" not in exclude:
|
||||||
serializers["user_data_values"] = lambda: srsly.msgpack_dumps(user_data_values)
|
serializers["user_data_values"] = lambda: srsly.msgpack_dumps(user_data_values)
|
||||||
|
if "user_hooks" not in exclude and any((self.user_hooks, self.user_token_hooks, self.user_span_hooks)):
|
||||||
|
util.logger.warning(Warnings.W109)
|
||||||
return util.to_dict(serializers, exclude)
|
return util.to_dict(serializers, exclude)
|
||||||
|
|
||||||
def from_dict(self, msg, *, exclude=tuple()):
|
def from_dict(self, msg, *, exclude=tuple()):
|
||||||
|
@ -1649,7 +1651,7 @@ cdef int [:,:] _get_lca_matrix(Doc doc, int start, int end):
|
||||||
|
|
||||||
|
|
||||||
def pickle_doc(doc):
|
def pickle_doc(doc):
|
||||||
bytes_data = doc.to_bytes(exclude=["vocab", "user_data"])
|
bytes_data = doc.to_bytes(exclude=["vocab", "user_data", "user_hooks"])
|
||||||
hooks_and_data = (doc.user_data, doc.user_hooks, doc.user_span_hooks,
|
hooks_and_data = (doc.user_data, doc.user_hooks, doc.user_span_hooks,
|
||||||
doc.user_token_hooks)
|
doc.user_token_hooks)
|
||||||
return (unpickle_doc, (doc.vocab, srsly.pickle_dumps(hooks_and_data), bytes_data))
|
return (unpickle_doc, (doc.vocab, srsly.pickle_dumps(hooks_and_data), bytes_data))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user