mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Show warning if entity_ruler runs without patterns (#7807)
* Show warning if entity_ruler runs without patterns * Show warning if matcher runs without patterns * fix wording * unit test for warning once (WIP) * warn W036 only once * cleanup * create filter_warning helper
This commit is contained in:
parent
d959603d51
commit
f0277bdeab
|
@ -1,10 +1,10 @@
|
|||
from typing import Union, Iterable, Dict, Any
|
||||
from pathlib import Path
|
||||
import warnings
|
||||
import sys
|
||||
|
||||
warnings.filterwarnings("ignore", message="numpy.dtype size changed") # noqa
|
||||
warnings.filterwarnings("ignore", message="numpy.ufunc size changed") # noqa
|
||||
# set library-specific custom warning handling before doing anything else
|
||||
from .errors import setup_default_warnings
|
||||
setup_default_warnings()
|
||||
|
||||
# These are imported as part of the API
|
||||
from thinc.api import prefer_gpu, require_gpu, require_cpu # noqa: F401
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
import warnings
|
||||
|
||||
|
||||
def add_codes(err_cls):
|
||||
"""Add error codes to string messages via class attribute names."""
|
||||
|
||||
|
@ -12,6 +15,30 @@ def add_codes(err_cls):
|
|||
return ErrorsWithCodes()
|
||||
|
||||
|
||||
def setup_default_warnings():
|
||||
# ignore certain numpy warnings
|
||||
filter_warning("ignore", error_msg="numpy.dtype size changed") # noqa
|
||||
filter_warning("ignore", error_msg="numpy.ufunc size changed") # noqa
|
||||
|
||||
# warn about entity_ruler & matcher having no patterns only once
|
||||
for pipe in ["matcher", "entity_ruler"]:
|
||||
filter_warning("once", error_msg=Warnings.W036.format(name=pipe))
|
||||
|
||||
|
||||
def filter_warning(action: str, error_msg: str):
|
||||
"""Customize how spaCy should handle a certain warning.
|
||||
|
||||
error_msg (str): e.g. "W006", or a full error message
|
||||
action (str): "default", "error", "ignore", "always", "module" or "once"
|
||||
"""
|
||||
warnings.filterwarnings(action, message=_escape_warning_msg(error_msg))
|
||||
|
||||
|
||||
def _escape_warning_msg(msg):
|
||||
"""To filter with warnings.filterwarnings, the [] brackets need to be escaped"""
|
||||
return msg.replace("[", "\\[").replace("]", "\\]")
|
||||
|
||||
|
||||
# fmt: off
|
||||
|
||||
@add_codes
|
||||
|
@ -80,8 +107,9 @@ class Warnings:
|
|||
"@misc = \"spacy.LookupsDataLoader.v1\"\n"
|
||||
"lang = ${{nlp.lang}}\n"
|
||||
"tables = [\"lexeme_norm\"]\n")
|
||||
W035 = ('Discarding subpattern "{pattern}" due to an unrecognized '
|
||||
W035 = ("Discarding subpattern '{pattern}' due to an unrecognized "
|
||||
"attribute or operator.")
|
||||
W036 = ("The component '{name}' does not have any patterns defined.")
|
||||
|
||||
# New warnings added in v3.x
|
||||
W086 = ("Component '{listener}' will be (re)trained, but it needs the component "
|
||||
|
|
|
@ -138,6 +138,11 @@ cdef class Matcher:
|
|||
self._filter[key] = greedy
|
||||
self._patterns[key].extend(patterns)
|
||||
|
||||
def _require_patterns(self) -> None:
|
||||
"""Raise a warning if this component has no patterns defined."""
|
||||
if len(self) == 0:
|
||||
warnings.warn(Warnings.W036.format(name="matcher"))
|
||||
|
||||
def remove(self, key):
|
||||
"""Remove a rule from the matcher. A KeyError is raised if the key does
|
||||
not exist.
|
||||
|
@ -215,6 +220,7 @@ cdef class Matcher:
|
|||
If with_alignments is set to True and as_spans is set to False,
|
||||
A list of `(match_id, start, end, alignments)` tuples is returned.
|
||||
"""
|
||||
self._require_patterns()
|
||||
if isinstance(doclike, Doc):
|
||||
doc = doclike
|
||||
length = len(doc)
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import warnings
|
||||
from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable, Sequence
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
@ -6,7 +7,7 @@ import srsly
|
|||
from .pipe import Pipe
|
||||
from ..training import Example
|
||||
from ..language import Language
|
||||
from ..errors import Errors
|
||||
from ..errors import Errors, Warnings
|
||||
from ..util import ensure_path, to_disk, from_disk, SimpleFrozenList
|
||||
from ..tokens import Doc, Span
|
||||
from ..matcher import Matcher, PhraseMatcher
|
||||
|
@ -139,6 +140,7 @@ class EntityRuler(Pipe):
|
|||
error_handler(self.name, self, [doc], e)
|
||||
|
||||
def match(self, doc: Doc):
|
||||
self._require_patterns()
|
||||
matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc))
|
||||
matches = set(
|
||||
[(m_id, start, end) for m_id, start, end in matches if start != end]
|
||||
|
@ -327,6 +329,11 @@ class EntityRuler(Pipe):
|
|||
self.nlp.vocab, attr=self.phrase_matcher_attr, validate=self._validate
|
||||
)
|
||||
|
||||
def _require_patterns(self) -> None:
|
||||
"""Raise a warning if this component has no patterns defined."""
|
||||
if len(self) == 0:
|
||||
warnings.warn(Warnings.W036.format(name=self.name))
|
||||
|
||||
def _split_label(self, label: str) -> Tuple[str, str]:
|
||||
"""Split Entity label into ent_label and ent_id if it contains self.ent_id_sep
|
||||
|
||||
|
|
|
@ -33,6 +33,15 @@ def test_matcher_from_api_docs(en_vocab):
|
|||
assert len(patterns[0])
|
||||
|
||||
|
||||
def test_matcher_empty_patterns_warns(en_vocab):
|
||||
matcher = Matcher(en_vocab)
|
||||
assert len(matcher) == 0
|
||||
doc = Doc(en_vocab, words=["This", "is", "quite", "something"])
|
||||
with pytest.warns(UserWarning):
|
||||
matcher(doc)
|
||||
assert len(doc.ents) == 0
|
||||
|
||||
|
||||
def test_matcher_from_usage_docs(en_vocab):
|
||||
text = "Wow 😀 This is really cool! 😂 😂"
|
||||
doc = Doc(en_vocab, words=text.split(" "))
|
||||
|
|
|
@ -46,6 +46,17 @@ def test_entity_ruler_init(nlp, patterns):
|
|||
assert doc.ents[1].label_ == "BYE"
|
||||
|
||||
|
||||
def test_entity_ruler_no_patterns_warns(nlp):
|
||||
ruler = EntityRuler(nlp)
|
||||
assert len(ruler) == 0
|
||||
assert len(ruler.labels) == 0
|
||||
nlp.add_pipe("entity_ruler")
|
||||
assert nlp.pipe_names == ["entity_ruler"]
|
||||
with pytest.warns(UserWarning):
|
||||
doc = nlp("hello world bye bye")
|
||||
assert len(doc.ents) == 0
|
||||
|
||||
|
||||
def test_entity_ruler_init_patterns(nlp, patterns):
|
||||
# initialize with patterns
|
||||
ruler = nlp.add_pipe("entity_ruler")
|
||||
|
|
Loading…
Reference in New Issue
Block a user