Merge pull request #8285 from adrianeboyd/feature/refactor-logger-warnings

Refactor warnings
This commit is contained in:
Adriane Boyd 2021-06-11 10:20:02 +02:00 committed by GitHub
commit dbbeab2506
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 82 additions and 27 deletions

View File

@ -1,10 +1,10 @@
from typing import Union, Iterable, Dict, Any from typing import Union, Iterable, Dict, Any
from pathlib import Path from pathlib import Path
import warnings
import sys import sys
warnings.filterwarnings("ignore", message="numpy.dtype size changed") # noqa # set library-specific custom warning handling before doing anything else
warnings.filterwarnings("ignore", message="numpy.ufunc size changed") # noqa from .errors import setup_default_warnings
setup_default_warnings()
# These are imported as part of the API # These are imported as part of the API
from thinc.api import prefer_gpu, require_gpu, require_cpu # noqa: F401 from thinc.api import prefer_gpu, require_gpu, require_cpu # noqa: F401

View File

@ -1,3 +1,6 @@
import warnings
def add_codes(err_cls): def add_codes(err_cls):
"""Add error codes to string messages via class attribute names.""" """Add error codes to string messages via class attribute names."""
@ -12,6 +15,33 @@ def add_codes(err_cls):
return ErrorsWithCodes() return ErrorsWithCodes()
def setup_default_warnings():
# ignore certain numpy warnings
filter_warning("ignore", error_msg="numpy.dtype size changed") # noqa
filter_warning("ignore", error_msg="numpy.ufunc size changed") # noqa
# warn about entity_ruler & matcher having no patterns only once
for pipe in ["matcher", "entity_ruler"]:
filter_warning("once", error_msg=Warnings.W036.format(name=pipe))
# warn once about lemmatizer without required POS
filter_warning("once", error_msg="[W108]")
def filter_warning(action: str, error_msg: str):
"""Customize how spaCy should handle a certain warning.
error_msg (str): e.g. "W006", or a full error message
action (str): "default", "error", "ignore", "always", "module" or "once"
"""
warnings.filterwarnings(action, message=_escape_warning_msg(error_msg))
def _escape_warning_msg(msg):
"""To filter with warnings.filterwarnings, the [] brackets need to be escaped"""
return msg.replace("[", "\\[").replace("]", "\\]")
# fmt: off # fmt: off
@add_codes @add_codes
@ -80,8 +110,9 @@ class Warnings:
"@misc = \"spacy.LookupsDataLoader.v1\"\n" "@misc = \"spacy.LookupsDataLoader.v1\"\n"
"lang = ${{nlp.lang}}\n" "lang = ${{nlp.lang}}\n"
"tables = [\"lexeme_norm\"]\n") "tables = [\"lexeme_norm\"]\n")
W035 = ('Discarding subpattern "{pattern}" due to an unrecognized ' W035 = ("Discarding subpattern '{pattern}' due to an unrecognized "
"attribute or operator.") "attribute or operator.")
W036 = ("The component '{name}' does not have any patterns defined.")
# New warnings added in v3.x # New warnings added in v3.x
W086 = ("Component '{listener}' will be (re)trained, but it needs the component " W086 = ("Component '{listener}' will be (re)trained, but it needs the component "

View File

@ -689,7 +689,7 @@ class Language:
if self.vocab.vectors.shape != source.vocab.vectors.shape or \ if self.vocab.vectors.shape != source.vocab.vectors.shape or \
self.vocab.vectors.key2row != source.vocab.vectors.key2row or \ self.vocab.vectors.key2row != source.vocab.vectors.key2row or \
self.vocab.vectors.to_bytes() != source.vocab.vectors.to_bytes(): self.vocab.vectors.to_bytes() != source.vocab.vectors.to_bytes():
util.logger.warning(Warnings.W113.format(name=source_name)) warnings.warn(Warnings.W113.format(name=source_name))
if not source_name in source.component_names: if not source_name in source.component_names:
raise KeyError( raise KeyError(
Errors.E944.format( Errors.E944.format(

View File

@ -4,6 +4,7 @@ from collections import defaultdict
from itertools import product from itertools import product
import numpy import numpy
import warnings
from .matcher cimport Matcher from .matcher cimport Matcher
from ..vocab cimport Vocab from ..vocab cimport Vocab
@ -11,7 +12,6 @@ from ..tokens.doc cimport Doc
from ..errors import Errors, Warnings from ..errors import Errors, Warnings
from ..tokens import Span from ..tokens import Span
from ..util import logger
DELIMITER = "||" DELIMITER = "||"
@ -282,7 +282,7 @@ cdef class DependencyMatcher:
keys_to_position_maps = defaultdict(lambda: defaultdict(list)) keys_to_position_maps = defaultdict(lambda: defaultdict(list))
for match_id, start, end in self._matcher(doc): for match_id, start, end in self._matcher(doc):
if start + 1 != end: if start + 1 != end:
logger.warning(Warnings.W110.format(tokens=[t.text for t in doc[start:end]], pattern=self._matcher.get(match_id)[1][0][0])) warnings.warn(Warnings.W110.format(tokens=[t.text for t in doc[start:end]], pattern=self._matcher.get(match_id)[1][0][0]))
token = doc[start] token = doc[start]
root = ([token] + list(token.ancestors))[-1] root = ([token] + list(token.ancestors))[-1]
keys_to_position_maps[root.i][match_id].append(start) keys_to_position_maps[root.i][match_id].append(start)

View File

@ -138,6 +138,11 @@ cdef class Matcher:
self._filter[key] = greedy self._filter[key] = greedy
self._patterns[key].extend(patterns) self._patterns[key].extend(patterns)
def _require_patterns(self) -> None:
"""Raise a warning if this component has no patterns defined."""
if len(self) == 0:
warnings.warn(Warnings.W036.format(name="matcher"))
def remove(self, key): def remove(self, key):
"""Remove a rule from the matcher. A KeyError is raised if the key does """Remove a rule from the matcher. A KeyError is raised if the key does
not exist. not exist.
@ -215,6 +220,7 @@ cdef class Matcher:
If with_alignments is set to True and as_spans is set to False, If with_alignments is set to True and as_spans is set to False,
A list of `(match_id, start, end, alignments)` tuples is returned. A list of `(match_id, start, end, alignments)` tuples is returned.
""" """
self._require_patterns()
if isinstance(doclike, Doc): if isinstance(doclike, Doc):
doc = doclike doc = doclike
length = len(doc) length = len(doc)

View File

@ -1,3 +1,4 @@
import warnings
from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable, Sequence from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable, Sequence
from collections import defaultdict from collections import defaultdict
from pathlib import Path from pathlib import Path
@ -6,7 +7,7 @@ import srsly
from .pipe import Pipe from .pipe import Pipe
from ..training import Example from ..training import Example
from ..language import Language from ..language import Language
from ..errors import Errors from ..errors import Errors, Warnings
from ..util import ensure_path, to_disk, from_disk, SimpleFrozenList from ..util import ensure_path, to_disk, from_disk, SimpleFrozenList
from ..tokens import Doc, Span from ..tokens import Doc, Span
from ..matcher import Matcher, PhraseMatcher from ..matcher import Matcher, PhraseMatcher
@ -139,6 +140,7 @@ class EntityRuler(Pipe):
error_handler(self.name, self, [doc], e) error_handler(self.name, self, [doc], e)
def match(self, doc: Doc): def match(self, doc: Doc):
self._require_patterns()
matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc)) matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc))
matches = set( matches = set(
[(m_id, start, end) for m_id, start, end in matches if start != end] [(m_id, start, end) for m_id, start, end in matches if start != end]
@ -327,6 +329,11 @@ class EntityRuler(Pipe):
self.nlp.vocab, attr=self.phrase_matcher_attr, validate=self._validate self.nlp.vocab, attr=self.phrase_matcher_attr, validate=self._validate
) )
def _require_patterns(self) -> None:
"""Raise a warning if this component has no patterns defined."""
if len(self) == 0:
warnings.warn(Warnings.W036.format(name=self.name))
def _split_label(self, label: str) -> Tuple[str, str]: def _split_label(self, label: str) -> Tuple[str, str]:
"""Split Entity label into ent_label and ent_id if it contains self.ent_id_sep """Split Entity label into ent_label and ent_id if it contains self.ent_id_sep

View File

@ -2,6 +2,8 @@ from typing import Optional, List, Dict, Any, Callable, Iterable, Union, Tuple
from thinc.api import Model from thinc.api import Model
from pathlib import Path from pathlib import Path
import warnings
from .pipe import Pipe from .pipe import Pipe
from ..errors import Errors, Warnings from ..errors import Errors, Warnings
from ..language import Language from ..language import Language
@ -182,7 +184,7 @@ class Lemmatizer(Pipe):
univ_pos = token.pos_.lower() univ_pos = token.pos_.lower()
if univ_pos in ("", "eol", "space"): if univ_pos in ("", "eol", "space"):
if univ_pos == "": if univ_pos == "":
logger.warning(Warnings.W108.format(text=string)) warnings.warn(Warnings.W108.format(text=string))
return [string.lower()] return [string.lower()]
# See Issue #435 for example of where this logic is requied. # See Issue #435 for example of where this logic is requied.
if self.is_base_form(token): if self.is_base_form(token):

View File

@ -2,8 +2,6 @@ import weakref
import pytest import pytest
import numpy import numpy
import logging
import mock
from spacy.lang.xx import MultiLanguage from spacy.lang.xx import MultiLanguage
from spacy.tokens import Doc, Span, Token from spacy.tokens import Doc, Span, Token
@ -158,13 +156,10 @@ def test_doc_api_serialize(en_tokenizer, text):
def inner_func(d1, d2): def inner_func(d1, d2):
return "hello!" return "hello!"
logger = logging.getLogger("spacy") _ = tokens.to_bytes() # noqa: F841
with mock.patch.object(logger, "warning") as mock_warning: with pytest.warns(UserWarning):
_ = tokens.to_bytes() # noqa: F841
mock_warning.assert_not_called()
tokens.user_hooks["similarity"] = inner_func tokens.user_hooks["similarity"] = inner_func
_ = tokens.to_bytes() # noqa: F841 _ = tokens.to_bytes() # noqa: F841
mock_warning.assert_called_once()
def test_doc_api_set_ents(en_tokenizer): def test_doc_api_set_ents(en_tokenizer):

View File

@ -33,6 +33,15 @@ def test_matcher_from_api_docs(en_vocab):
assert len(patterns[0]) assert len(patterns[0])
def test_matcher_empty_patterns_warns(en_vocab):
matcher = Matcher(en_vocab)
assert len(matcher) == 0
doc = Doc(en_vocab, words=["This", "is", "quite", "something"])
with pytest.warns(UserWarning):
matcher(doc)
assert len(doc.ents) == 0
def test_matcher_from_usage_docs(en_vocab): def test_matcher_from_usage_docs(en_vocab):
text = "Wow 😀 This is really cool! 😂 😂" text = "Wow 😀 This is really cool! 😂 😂"
doc = Doc(en_vocab, words=text.split(" ")) doc = Doc(en_vocab, words=text.split(" "))

View File

@ -46,6 +46,17 @@ def test_entity_ruler_init(nlp, patterns):
assert doc.ents[1].label_ == "BYE" assert doc.ents[1].label_ == "BYE"
def test_entity_ruler_no_patterns_warns(nlp):
ruler = EntityRuler(nlp)
assert len(ruler) == 0
assert len(ruler.labels) == 0
nlp.add_pipe("entity_ruler")
assert nlp.pipe_names == ["entity_ruler"]
with pytest.warns(UserWarning):
doc = nlp("hello world bye bye")
assert len(doc.ents) == 0
def test_entity_ruler_init_patterns(nlp, patterns): def test_entity_ruler_init_patterns(nlp, patterns):
# initialize with patterns # initialize with patterns
ruler = nlp.add_pipe("entity_ruler") ruler = nlp.add_pipe("entity_ruler")

View File

@ -1,6 +1,4 @@
import pytest import pytest
import logging
import mock
import pickle import pickle
from spacy import util, registry from spacy import util, registry
from spacy.lang.en import English from spacy.lang.en import English
@ -59,10 +57,10 @@ def test_lemmatizer_config(nlp):
# warning if no POS assigned # warning if no POS assigned
doc = nlp.make_doc("coping") doc = nlp.make_doc("coping")
logger = logging.getLogger("spacy") with pytest.warns(UserWarning):
with mock.patch.object(logger, "warning") as mock_warning:
doc = lemmatizer(doc) doc = lemmatizer(doc)
mock_warning.assert_called_once() # warns once by default
doc = lemmatizer(doc)
# works with POS # works with POS
doc = nlp.make_doc("coping") doc = nlp.make_doc("coping")

View File

@ -1,6 +1,4 @@
import pytest import pytest
import mock
import logging
from spacy.language import Language from spacy.language import Language
from spacy.lang.en import English from spacy.lang.en import English
from spacy.lang.de import German from spacy.lang.de import German
@ -437,10 +435,8 @@ def test_pipe_factories_from_source_language_subclass():
nlp = English() nlp = English()
nlp.vocab.vectors.resize((1, 4)) nlp.vocab.vectors.resize((1, 4))
nlp.vocab.vectors.add("cat", vector=[1, 2, 3, 4]) nlp.vocab.vectors.add("cat", vector=[1, 2, 3, 4])
logger = logging.getLogger("spacy") with pytest.warns(UserWarning):
with mock.patch.object(logger, "warning") as mock_warning:
nlp.add_pipe("tagger", source=source_nlp) nlp.add_pipe("tagger", source=source_nlp)
mock_warning.assert_called()
def test_pipe_factories_from_source_custom(): def test_pipe_factories_from_source_custom():

View File

@ -1318,7 +1318,7 @@ cdef class Doc:
if "user_data_values" not in exclude: if "user_data_values" not in exclude:
serializers["user_data_values"] = lambda: srsly.msgpack_dumps(user_data_values) serializers["user_data_values"] = lambda: srsly.msgpack_dumps(user_data_values)
if "user_hooks" not in exclude and any((self.user_hooks, self.user_token_hooks, self.user_span_hooks)): if "user_hooks" not in exclude and any((self.user_hooks, self.user_token_hooks, self.user_span_hooks)):
util.logger.warning(Warnings.W109) warnings.warn(Warnings.W109)
return util.to_dict(serializers, exclude) return util.to_dict(serializers, exclude)
def from_dict(self, msg, *, exclude=tuple()): def from_dict(self, msg, *, exclude=tuple()):