mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-03 05:04:09 +03:00
Merge pull request #8285 from adrianeboyd/feature/refactor-logger-warnings
Refactor warnings
This commit is contained in:
commit
dbbeab2506
|
@ -1,10 +1,10 @@
|
|||
from typing import Union, Iterable, Dict, Any
|
||||
from pathlib import Path
|
||||
import warnings
|
||||
import sys
|
||||
|
||||
warnings.filterwarnings("ignore", message="numpy.dtype size changed") # noqa
|
||||
warnings.filterwarnings("ignore", message="numpy.ufunc size changed") # noqa
|
||||
# set library-specific custom warning handling before doing anything else
|
||||
from .errors import setup_default_warnings
|
||||
setup_default_warnings()
|
||||
|
||||
# These are imported as part of the API
|
||||
from thinc.api import prefer_gpu, require_gpu, require_cpu # noqa: F401
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
import warnings
|
||||
|
||||
|
||||
def add_codes(err_cls):
|
||||
"""Add error codes to string messages via class attribute names."""
|
||||
|
||||
|
@ -12,6 +15,33 @@ def add_codes(err_cls):
|
|||
return ErrorsWithCodes()
|
||||
|
||||
|
||||
def setup_default_warnings():
|
||||
# ignore certain numpy warnings
|
||||
filter_warning("ignore", error_msg="numpy.dtype size changed") # noqa
|
||||
filter_warning("ignore", error_msg="numpy.ufunc size changed") # noqa
|
||||
|
||||
# warn about entity_ruler & matcher having no patterns only once
|
||||
for pipe in ["matcher", "entity_ruler"]:
|
||||
filter_warning("once", error_msg=Warnings.W036.format(name=pipe))
|
||||
|
||||
# warn once about lemmatizer without required POS
|
||||
filter_warning("once", error_msg="[W108]")
|
||||
|
||||
|
||||
def filter_warning(action: str, error_msg: str):
|
||||
"""Customize how spaCy should handle a certain warning.
|
||||
|
||||
error_msg (str): e.g. "W006", or a full error message
|
||||
action (str): "default", "error", "ignore", "always", "module" or "once"
|
||||
"""
|
||||
warnings.filterwarnings(action, message=_escape_warning_msg(error_msg))
|
||||
|
||||
|
||||
def _escape_warning_msg(msg):
|
||||
"""To filter with warnings.filterwarnings, the [] brackets need to be escaped"""
|
||||
return msg.replace("[", "\\[").replace("]", "\\]")
|
||||
|
||||
|
||||
# fmt: off
|
||||
|
||||
@add_codes
|
||||
|
@ -80,8 +110,9 @@ class Warnings:
|
|||
"@misc = \"spacy.LookupsDataLoader.v1\"\n"
|
||||
"lang = ${{nlp.lang}}\n"
|
||||
"tables = [\"lexeme_norm\"]\n")
|
||||
W035 = ('Discarding subpattern "{pattern}" due to an unrecognized '
|
||||
W035 = ("Discarding subpattern '{pattern}' due to an unrecognized "
|
||||
"attribute or operator.")
|
||||
W036 = ("The component '{name}' does not have any patterns defined.")
|
||||
|
||||
# New warnings added in v3.x
|
||||
W086 = ("Component '{listener}' will be (re)trained, but it needs the component "
|
||||
|
|
|
@ -689,7 +689,7 @@ class Language:
|
|||
if self.vocab.vectors.shape != source.vocab.vectors.shape or \
|
||||
self.vocab.vectors.key2row != source.vocab.vectors.key2row or \
|
||||
self.vocab.vectors.to_bytes() != source.vocab.vectors.to_bytes():
|
||||
util.logger.warning(Warnings.W113.format(name=source_name))
|
||||
warnings.warn(Warnings.W113.format(name=source_name))
|
||||
if not source_name in source.component_names:
|
||||
raise KeyError(
|
||||
Errors.E944.format(
|
||||
|
|
|
@ -4,6 +4,7 @@ from collections import defaultdict
|
|||
from itertools import product
|
||||
|
||||
import numpy
|
||||
import warnings
|
||||
|
||||
from .matcher cimport Matcher
|
||||
from ..vocab cimport Vocab
|
||||
|
@ -11,7 +12,6 @@ from ..tokens.doc cimport Doc
|
|||
|
||||
from ..errors import Errors, Warnings
|
||||
from ..tokens import Span
|
||||
from ..util import logger
|
||||
|
||||
|
||||
DELIMITER = "||"
|
||||
|
@ -282,7 +282,7 @@ cdef class DependencyMatcher:
|
|||
keys_to_position_maps = defaultdict(lambda: defaultdict(list))
|
||||
for match_id, start, end in self._matcher(doc):
|
||||
if start + 1 != end:
|
||||
logger.warning(Warnings.W110.format(tokens=[t.text for t in doc[start:end]], pattern=self._matcher.get(match_id)[1][0][0]))
|
||||
warnings.warn(Warnings.W110.format(tokens=[t.text for t in doc[start:end]], pattern=self._matcher.get(match_id)[1][0][0]))
|
||||
token = doc[start]
|
||||
root = ([token] + list(token.ancestors))[-1]
|
||||
keys_to_position_maps[root.i][match_id].append(start)
|
||||
|
|
|
@ -138,6 +138,11 @@ cdef class Matcher:
|
|||
self._filter[key] = greedy
|
||||
self._patterns[key].extend(patterns)
|
||||
|
||||
def _require_patterns(self) -> None:
|
||||
"""Raise a warning if this component has no patterns defined."""
|
||||
if len(self) == 0:
|
||||
warnings.warn(Warnings.W036.format(name="matcher"))
|
||||
|
||||
def remove(self, key):
|
||||
"""Remove a rule from the matcher. A KeyError is raised if the key does
|
||||
not exist.
|
||||
|
@ -215,6 +220,7 @@ cdef class Matcher:
|
|||
If with_alignments is set to True and as_spans is set to False,
|
||||
A list of `(match_id, start, end, alignments)` tuples is returned.
|
||||
"""
|
||||
self._require_patterns()
|
||||
if isinstance(doclike, Doc):
|
||||
doc = doclike
|
||||
length = len(doc)
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import warnings
|
||||
from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable, Sequence
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
@ -6,7 +7,7 @@ import srsly
|
|||
from .pipe import Pipe
|
||||
from ..training import Example
|
||||
from ..language import Language
|
||||
from ..errors import Errors
|
||||
from ..errors import Errors, Warnings
|
||||
from ..util import ensure_path, to_disk, from_disk, SimpleFrozenList
|
||||
from ..tokens import Doc, Span
|
||||
from ..matcher import Matcher, PhraseMatcher
|
||||
|
@ -139,6 +140,7 @@ class EntityRuler(Pipe):
|
|||
error_handler(self.name, self, [doc], e)
|
||||
|
||||
def match(self, doc: Doc):
|
||||
self._require_patterns()
|
||||
matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc))
|
||||
matches = set(
|
||||
[(m_id, start, end) for m_id, start, end in matches if start != end]
|
||||
|
@ -327,6 +329,11 @@ class EntityRuler(Pipe):
|
|||
self.nlp.vocab, attr=self.phrase_matcher_attr, validate=self._validate
|
||||
)
|
||||
|
||||
def _require_patterns(self) -> None:
|
||||
"""Raise a warning if this component has no patterns defined."""
|
||||
if len(self) == 0:
|
||||
warnings.warn(Warnings.W036.format(name=self.name))
|
||||
|
||||
def _split_label(self, label: str) -> Tuple[str, str]:
|
||||
"""Split Entity label into ent_label and ent_id if it contains self.ent_id_sep
|
||||
|
||||
|
|
|
@ -2,6 +2,8 @@ from typing import Optional, List, Dict, Any, Callable, Iterable, Union, Tuple
|
|||
from thinc.api import Model
|
||||
from pathlib import Path
|
||||
|
||||
import warnings
|
||||
|
||||
from .pipe import Pipe
|
||||
from ..errors import Errors, Warnings
|
||||
from ..language import Language
|
||||
|
@ -182,7 +184,7 @@ class Lemmatizer(Pipe):
|
|||
univ_pos = token.pos_.lower()
|
||||
if univ_pos in ("", "eol", "space"):
|
||||
if univ_pos == "":
|
||||
logger.warning(Warnings.W108.format(text=string))
|
||||
warnings.warn(Warnings.W108.format(text=string))
|
||||
return [string.lower()]
|
||||
# See Issue #435 for example of where this logic is requied.
|
||||
if self.is_base_form(token):
|
||||
|
|
|
@ -2,8 +2,6 @@ import weakref
|
|||
|
||||
import pytest
|
||||
import numpy
|
||||
import logging
|
||||
import mock
|
||||
|
||||
from spacy.lang.xx import MultiLanguage
|
||||
from spacy.tokens import Doc, Span, Token
|
||||
|
@ -158,13 +156,10 @@ def test_doc_api_serialize(en_tokenizer, text):
|
|||
def inner_func(d1, d2):
|
||||
return "hello!"
|
||||
|
||||
logger = logging.getLogger("spacy")
|
||||
with mock.patch.object(logger, "warning") as mock_warning:
|
||||
_ = tokens.to_bytes() # noqa: F841
|
||||
mock_warning.assert_not_called()
|
||||
_ = tokens.to_bytes() # noqa: F841
|
||||
with pytest.warns(UserWarning):
|
||||
tokens.user_hooks["similarity"] = inner_func
|
||||
_ = tokens.to_bytes() # noqa: F841
|
||||
mock_warning.assert_called_once()
|
||||
|
||||
|
||||
def test_doc_api_set_ents(en_tokenizer):
|
||||
|
|
|
@ -33,6 +33,15 @@ def test_matcher_from_api_docs(en_vocab):
|
|||
assert len(patterns[0])
|
||||
|
||||
|
||||
def test_matcher_empty_patterns_warns(en_vocab):
|
||||
matcher = Matcher(en_vocab)
|
||||
assert len(matcher) == 0
|
||||
doc = Doc(en_vocab, words=["This", "is", "quite", "something"])
|
||||
with pytest.warns(UserWarning):
|
||||
matcher(doc)
|
||||
assert len(doc.ents) == 0
|
||||
|
||||
|
||||
def test_matcher_from_usage_docs(en_vocab):
|
||||
text = "Wow 😀 This is really cool! 😂 😂"
|
||||
doc = Doc(en_vocab, words=text.split(" "))
|
||||
|
|
|
@ -46,6 +46,17 @@ def test_entity_ruler_init(nlp, patterns):
|
|||
assert doc.ents[1].label_ == "BYE"
|
||||
|
||||
|
||||
def test_entity_ruler_no_patterns_warns(nlp):
|
||||
ruler = EntityRuler(nlp)
|
||||
assert len(ruler) == 0
|
||||
assert len(ruler.labels) == 0
|
||||
nlp.add_pipe("entity_ruler")
|
||||
assert nlp.pipe_names == ["entity_ruler"]
|
||||
with pytest.warns(UserWarning):
|
||||
doc = nlp("hello world bye bye")
|
||||
assert len(doc.ents) == 0
|
||||
|
||||
|
||||
def test_entity_ruler_init_patterns(nlp, patterns):
|
||||
# initialize with patterns
|
||||
ruler = nlp.add_pipe("entity_ruler")
|
||||
|
|
|
@ -1,6 +1,4 @@
|
|||
import pytest
|
||||
import logging
|
||||
import mock
|
||||
import pickle
|
||||
from spacy import util, registry
|
||||
from spacy.lang.en import English
|
||||
|
@ -59,10 +57,10 @@ def test_lemmatizer_config(nlp):
|
|||
|
||||
# warning if no POS assigned
|
||||
doc = nlp.make_doc("coping")
|
||||
logger = logging.getLogger("spacy")
|
||||
with mock.patch.object(logger, "warning") as mock_warning:
|
||||
with pytest.warns(UserWarning):
|
||||
doc = lemmatizer(doc)
|
||||
mock_warning.assert_called_once()
|
||||
# warns once by default
|
||||
doc = lemmatizer(doc)
|
||||
|
||||
# works with POS
|
||||
doc = nlp.make_doc("coping")
|
||||
|
|
|
@ -1,6 +1,4 @@
|
|||
import pytest
|
||||
import mock
|
||||
import logging
|
||||
from spacy.language import Language
|
||||
from spacy.lang.en import English
|
||||
from spacy.lang.de import German
|
||||
|
@ -437,10 +435,8 @@ def test_pipe_factories_from_source_language_subclass():
|
|||
nlp = English()
|
||||
nlp.vocab.vectors.resize((1, 4))
|
||||
nlp.vocab.vectors.add("cat", vector=[1, 2, 3, 4])
|
||||
logger = logging.getLogger("spacy")
|
||||
with mock.patch.object(logger, "warning") as mock_warning:
|
||||
with pytest.warns(UserWarning):
|
||||
nlp.add_pipe("tagger", source=source_nlp)
|
||||
mock_warning.assert_called()
|
||||
|
||||
|
||||
def test_pipe_factories_from_source_custom():
|
||||
|
|
|
@ -1318,7 +1318,7 @@ cdef class Doc:
|
|||
if "user_data_values" not in exclude:
|
||||
serializers["user_data_values"] = lambda: srsly.msgpack_dumps(user_data_values)
|
||||
if "user_hooks" not in exclude and any((self.user_hooks, self.user_token_hooks, self.user_span_hooks)):
|
||||
util.logger.warning(Warnings.W109)
|
||||
warnings.warn(Warnings.W109)
|
||||
return util.to_dict(serializers, exclude)
|
||||
|
||||
def from_dict(self, msg, *, exclude=tuple()):
|
||||
|
|
Loading…
Reference in New Issue
Block a user