Merge pull request #8285 from adrianeboyd/feature/refactor-logger-warnings

Refactor warnings
2025-12-17 23:24:18 +03:00 · 2021-06-11 10:20:02 +02:00 · 2021-06-11 10:20:02 +02:00 · dbbeab2506
commit dbbeab2506
parent 07082c9692 9dfd3c9484
13 changed files with 82 additions and 27 deletions
--- a/spacy/init.py
+++ b/spacy/init.py
@ -1,10 +1,10 @@
 from typing import Union, Iterable, Dict, Any
 from pathlib import Path
 import warnings
 import sys
-warnings.filterwarnings("ignore", message="numpy.dtype size changed")  # noqa
+# set library-specific custom warning handling before doing anything else
-warnings.filterwarnings("ignore", message="numpy.ufunc size changed")  # noqa
+from .errors import setup_default_warnings
 setup_default_warnings()
 # These are imported as part of the API
 from thinc.api import prefer_gpu, require_gpu, require_cpu  # noqa: F401
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -1,3 +1,6 @@
 import warnings
 def add_codes(err_cls):
    """Add error codes to string messages via class attribute names."""
@ -12,6 +15,33 @@ def add_codes(err_cls):
    return ErrorsWithCodes()
 def setup_default_warnings():
    # ignore certain numpy warnings
    filter_warning("ignore", error_msg="numpy.dtype size changed")  # noqa
    filter_warning("ignore", error_msg="numpy.ufunc size changed")  # noqa
    # warn about entity_ruler & matcher having no patterns only once
    for pipe in ["matcher", "entity_ruler"]:
        filter_warning("once", error_msg=Warnings.W036.format(name=pipe))
    # warn once about lemmatizer without required POS
    filter_warning("once", error_msg="[W108]")
 def filter_warning(action: str, error_msg: str):
    """Customize how spaCy should handle a certain warning.
    error_msg (str): e.g. "W006", or a full error message
    action (str): "default", "error", "ignore", "always", "module" or "once"
    """
    warnings.filterwarnings(action, message=_escape_warning_msg(error_msg))
 def _escape_warning_msg(msg):
    """To filter with warnings.filterwarnings, the [] brackets need to be escaped"""
    return msg.replace("[", "\\[").replace("]", "\\]")
 # fmt: off
@add_codes
@ -80,8 +110,9 @@ class Warnings:
            "@misc = \"spacy.LookupsDataLoader.v1\"\n"
            "lang = ${{nlp.lang}}\n"
            "tables = [\"lexeme_norm\"]\n")
-    W035 = ('Discarding subpattern "{pattern}" due to an unrecognized '
+    W035 = ("Discarding subpattern '{pattern}' due to an unrecognized "
            "attribute or operator.")
    W036 = ("The component '{name}' does not have any patterns defined.")
    # New warnings added in v3.x
    W086 = ("Component '{listener}' will be (re)trained, but it needs the component "
--- a/spacy/language.py
+++ b/spacy/language.py
@ -689,7 +689,7 @@ class Language:
        if self.vocab.vectors.shape != source.vocab.vectors.shape or \
                self.vocab.vectors.key2row != source.vocab.vectors.key2row or \
                self.vocab.vectors.to_bytes() != source.vocab.vectors.to_bytes():
-            util.logger.warning(Warnings.W113.format(name=source_name))
+            warnings.warn(Warnings.W113.format(name=source_name))
        if not source_name in source.component_names:
            raise KeyError(
                Errors.E944.format(
--- a/spacy/matcher/dependencymatcher.pyx
+++ b/spacy/matcher/dependencymatcher.pyx
@ -4,6 +4,7 @@ from collections import defaultdict
 from itertools import product
 import numpy
 import warnings
 from .matcher cimport Matcher
 from ..vocab cimport Vocab
@ -11,7 +12,6 @@ from ..tokens.doc cimport Doc
 from ..errors import Errors, Warnings
 from ..tokens import Span
 from ..util import logger
 DELIMITER = "||"
@ -282,7 +282,7 @@ cdef class DependencyMatcher:
        keys_to_position_maps = defaultdict(lambda: defaultdict(list))
        for match_id, start, end in self._matcher(doc):
            if start + 1 != end:
-                logger.warning(Warnings.W110.format(tokens=[t.text for t in doc[start:end]], pattern=self._matcher.get(match_id)[1][0][0]))
+                warnings.warn(Warnings.W110.format(tokens=[t.text for t in doc[start:end]], pattern=self._matcher.get(match_id)[1][0][0]))
            token = doc[start]
            root = ([token] + list(token.ancestors))[-1]
            keys_to_position_maps[root.i][match_id].append(start)
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@ -138,6 +138,11 @@ cdef class Matcher:
        self._filter[key] = greedy
        self._patterns[key].extend(patterns)
    def _require_patterns(self) -> None:
        """Raise a warning if this component has no patterns defined."""
        if len(self) == 0:
            warnings.warn(Warnings.W036.format(name="matcher"))
    def remove(self, key):
        """Remove a rule from the matcher. A KeyError is raised if the key does
        not exist.
@ -215,6 +220,7 @@ cdef class Matcher:
            If with_alignments is set to True and as_spans is set to False,
            A list of `(match_id, start, end, alignments)` tuples is returned.
        """
        self._require_patterns()
        if isinstance(doclike, Doc):
            doc = doclike
            length = len(doc)
--- a/spacy/pipeline/entityruler.py
+++ b/spacy/pipeline/entityruler.py
@ -1,3 +1,4 @@
 import warnings
 from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable, Sequence
 from collections import defaultdict
 from pathlib import Path
@ -6,7 +7,7 @@ import srsly
 from .pipe import Pipe
 from ..training import Example
 from ..language import Language
-from ..errors import Errors
+from ..errors import Errors, Warnings
 from ..util import ensure_path, to_disk, from_disk, SimpleFrozenList
 from ..tokens import Doc, Span
 from ..matcher import Matcher, PhraseMatcher
@ -139,6 +140,7 @@ class EntityRuler(Pipe):
            error_handler(self.name, self, [doc], e)
    def match(self, doc: Doc):
        self._require_patterns()
        matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc))
        matches = set(
            [(m_id, start, end) for m_id, start, end in matches if start != end]
@ -327,6 +329,11 @@ class EntityRuler(Pipe):
            self.nlp.vocab, attr=self.phrase_matcher_attr, validate=self._validate
        )
    def _require_patterns(self) -> None:
        """Raise a warning if this component has no patterns defined."""
        if len(self) == 0:
            warnings.warn(Warnings.W036.format(name=self.name))
    def _split_label(self, label: str) -> Tuple[str, str]:
        """Split Entity label into ent_label and ent_id if it contains self.ent_id_sep
--- a/spacy/pipeline/lemmatizer.py
+++ b/spacy/pipeline/lemmatizer.py
@ -2,6 +2,8 @@ from typing import Optional, List, Dict, Any, Callable, Iterable, Union, Tuple
 from thinc.api import Model
 from pathlib import Path
 import warnings
 from .pipe import Pipe
 from ..errors import Errors, Warnings
 from ..language import Language
@ -182,7 +184,7 @@ class Lemmatizer(Pipe):
        univ_pos = token.pos_.lower()
        if univ_pos in ("", "eol", "space"):
            if univ_pos == "":
-                logger.warning(Warnings.W108.format(text=string))
+                warnings.warn(Warnings.W108.format(text=string))
            return [string.lower()]
        # See Issue #435 for example of where this logic is requied.
        if self.is_base_form(token):
--- a/spacy/tests/doc/test_doc_api.py
+++ b/spacy/tests/doc/test_doc_api.py
@ -2,8 +2,6 @@ import weakref
 import pytest
 import numpy
 import logging
 import mock
 from spacy.lang.xx import MultiLanguage
 from spacy.tokens import Doc, Span, Token
@ -158,13 +156,10 @@ def test_doc_api_serialize(en_tokenizer, text):
    def inner_func(d1, d2):
        return "hello!"
-    logger = logging.getLogger("spacy")
+    _ = tokens.to_bytes()  # noqa: F841
-    with mock.patch.object(logger, "warning") as mock_warning:
+    with pytest.warns(UserWarning):
        _ = tokens.to_bytes()  # noqa: F841
        mock_warning.assert_not_called()
        tokens.user_hooks["similarity"] = inner_func
        _ = tokens.to_bytes()  # noqa: F841
        mock_warning.assert_called_once()
 def test_doc_api_set_ents(en_tokenizer):
--- a/spacy/tests/matcher/test_matcher_api.py
+++ b/spacy/tests/matcher/test_matcher_api.py
@ -33,6 +33,15 @@ def test_matcher_from_api_docs(en_vocab):
    assert len(patterns[0])
 def test_matcher_empty_patterns_warns(en_vocab):
    matcher = Matcher(en_vocab)
    assert len(matcher) == 0
    doc = Doc(en_vocab, words=["This", "is", "quite", "something"])
    with pytest.warns(UserWarning):
        matcher(doc)
    assert len(doc.ents) == 0
 def test_matcher_from_usage_docs(en_vocab):
    text = "Wow 😀 This is really cool! 😂 😂"
    doc = Doc(en_vocab, words=text.split(" "))
--- a/spacy/tests/pipeline/test_entity_ruler.py
+++ b/spacy/tests/pipeline/test_entity_ruler.py
@ -46,6 +46,17 @@ def test_entity_ruler_init(nlp, patterns):
    assert doc.ents[1].label_ == "BYE"
 def test_entity_ruler_no_patterns_warns(nlp):
    ruler = EntityRuler(nlp)
    assert len(ruler) == 0
    assert len(ruler.labels) == 0
    nlp.add_pipe("entity_ruler")
    assert nlp.pipe_names == ["entity_ruler"]
    with pytest.warns(UserWarning):
        doc = nlp("hello world bye bye")
    assert len(doc.ents) == 0
 def test_entity_ruler_init_patterns(nlp, patterns):
    # initialize with patterns
    ruler = nlp.add_pipe("entity_ruler")
--- a/spacy/tests/pipeline/test_lemmatizer.py
+++ b/spacy/tests/pipeline/test_lemmatizer.py
@ -1,6 +1,4 @@
 import pytest
 import logging
 import mock
 import pickle
 from spacy import util, registry
 from spacy.lang.en import English
@ -59,10 +57,10 @@ def test_lemmatizer_config(nlp):
    # warning if no POS assigned
    doc = nlp.make_doc("coping")
-    logger = logging.getLogger("spacy")
+    with pytest.warns(UserWarning):
    with mock.patch.object(logger, "warning") as mock_warning:
        doc = lemmatizer(doc)
-        mock_warning.assert_called_once()
+    # warns once by default
    doc = lemmatizer(doc)
    # works with POS
    doc = nlp.make_doc("coping")
--- a/spacy/tests/pipeline/test_pipe_factories.py
+++ b/spacy/tests/pipeline/test_pipe_factories.py
@ -1,6 +1,4 @@
 import pytest
 import mock
 import logging
 from spacy.language import Language
 from spacy.lang.en import English
 from spacy.lang.de import German
@ -437,10 +435,8 @@ def test_pipe_factories_from_source_language_subclass():
    nlp = English()
    nlp.vocab.vectors.resize((1, 4))
    nlp.vocab.vectors.add("cat", vector=[1, 2, 3, 4])
-    logger = logging.getLogger("spacy")
+    with pytest.warns(UserWarning):
    with mock.patch.object(logger, "warning") as mock_warning:
        nlp.add_pipe("tagger", source=source_nlp)
        mock_warning.assert_called()
 def test_pipe_factories_from_source_custom():
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -1318,7 +1318,7 @@ cdef class Doc:
            if "user_data_values" not in exclude:
                serializers["user_data_values"] = lambda: srsly.msgpack_dumps(user_data_values)
        if "user_hooks" not in exclude and any((self.user_hooks, self.user_token_hooks, self.user_span_hooks)):
-            util.logger.warning(Warnings.W109)
+            warnings.warn(Warnings.W109)
        return util.to_dict(serializers, exclude)
    def from_dict(self, msg, *, exclude=tuple()):