Filter W036 for entity ruler, etc. (#8424)

This commit is contained in:
Adriane Boyd 2021-06-21 09:34:29 +02:00 committed by GitHub
parent e39d1bd4ab
commit ec71a6b572
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 11 additions and 8 deletions

View File

@ -3,6 +3,7 @@ from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable,
from collections import defaultdict from collections import defaultdict
from pathlib import Path from pathlib import Path
import srsly import srsly
import warnings
from .pipe import Pipe from .pipe import Pipe
from ..training import Example from ..training import Example
@ -141,6 +142,8 @@ class EntityRuler(Pipe):
def match(self, doc: Doc): def match(self, doc: Doc):
self._require_patterns() self._require_patterns()
with warnings.catch_warnings():
warnings.filterwarnings("ignore", message="\\[W036")
matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc)) matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc))
matches = set( matches = set(
[(m_id, start, end) for m_id, start, end in matches if start != end] [(m_id, start, end) for m_id, start, end in matches if start != end]
@ -276,7 +279,7 @@ class EntityRuler(Pipe):
current_index = i current_index = i
break break
subsequent_pipes = [ subsequent_pipes = [
pipe for pipe in self.nlp.pipe_names[current_index + 1 :] pipe for pipe in self.nlp.pipe_names[current_index :]
] ]
except ValueError: except ValueError:
subsequent_pipes = [] subsequent_pipes = []
@ -334,11 +337,6 @@ class EntityRuler(Pipe):
if len(self) == 0: if len(self) == 0:
warnings.warn(Warnings.W036.format(name=self.name)) warnings.warn(Warnings.W036.format(name=self.name))
def _require_patterns(self) -> None:
"""Raise a warning if this component has no patterns defined."""
if len(self) == 0:
warnings.warn(Warnings.W036.format(name=self.name))
def _split_label(self, label: str) -> Tuple[str, str]: def _split_label(self, label: str) -> Tuple[str, str]:
"""Split Entity label into ent_label and ent_id if it contains self.ent_id_sep """Split Entity label into ent_label and ent_id if it contains self.ent_id_sep

View File

@ -481,6 +481,7 @@ def test_matcher_schema_token_attributes(en_vocab, pattern, text):
assert len(matches) == 1 assert len(matches) == 1
@pytest.mark.filterwarnings("ignore:\\[W036")
def test_matcher_valid_callback(en_vocab): def test_matcher_valid_callback(en_vocab):
"""Test that on_match can only be None or callable.""" """Test that on_match can only be None or callable."""
matcher = Matcher(en_vocab) matcher = Matcher(en_vocab)

View File

@ -180,6 +180,7 @@ def test_matcher_sets_return_correct_tokens(en_vocab):
assert texts == ["zero", "one", "two"] assert texts == ["zero", "one", "two"]
@pytest.mark.filterwarnings("ignore:\\[W036")
def test_matcher_remove(): def test_matcher_remove():
nlp = English() nlp = English()
matcher = Matcher(nlp.vocab) matcher = Matcher(nlp.vocab)

View File

@ -324,6 +324,7 @@ def test_append_alias(nlp):
assert len(mykb.get_alias_candidates("douglas")) == 3 assert len(mykb.get_alias_candidates("douglas")) == 3
@pytest.mark.filterwarnings("ignore:\\[W036")
def test_append_invalid_alias(nlp): def test_append_invalid_alias(nlp):
"""Test that append an alias will throw an error if prior probs are exceeding 1""" """Test that append an alias will throw an error if prior probs are exceeding 1"""
mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1) mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
@ -342,6 +343,7 @@ def test_append_invalid_alias(nlp):
mykb.append_alias(alias="douglas", entity="Q1", prior_prob=0.2) mykb.append_alias(alias="douglas", entity="Q1", prior_prob=0.2)
@pytest.mark.filterwarnings("ignore:\\[W036")
def test_preserving_links_asdoc(nlp): def test_preserving_links_asdoc(nlp):
"""Test that Span.as_doc preserves the existing entity links""" """Test that Span.as_doc preserves the existing entity links"""
vector_length = 1 vector_length = 1

View File

@ -98,6 +98,7 @@ def test_entity_ruler_clear(nlp, patterns):
assert len(doc.ents) == 1 assert len(doc.ents) == 1
ruler.clear() ruler.clear()
assert len(ruler.labels) == 0 assert len(ruler.labels) == 0
with pytest.warns(UserWarning):
doc = nlp("hello world") doc = nlp("hello world")
assert len(doc.ents) == 0 assert len(doc.ents) == 0