Filter W036 for entity ruler, etc. (#8424)

This commit is contained in:
Adriane Boyd 2021-06-21 09:34:29 +02:00 committed by GitHub
parent e39d1bd4ab
commit ec71a6b572
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 11 additions and 8 deletions

View File

@ -3,6 +3,7 @@ from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable,
from collections import defaultdict
from pathlib import Path
import srsly
import warnings
from .pipe import Pipe
from ..training import Example
@ -141,6 +142,8 @@ class EntityRuler(Pipe):
def match(self, doc: Doc):
self._require_patterns()
with warnings.catch_warnings():
warnings.filterwarnings("ignore", message="\\[W036")
matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc))
matches = set(
[(m_id, start, end) for m_id, start, end in matches if start != end]
@ -276,7 +279,7 @@ class EntityRuler(Pipe):
current_index = i
break
subsequent_pipes = [
pipe for pipe in self.nlp.pipe_names[current_index + 1 :]
pipe for pipe in self.nlp.pipe_names[current_index :]
]
except ValueError:
subsequent_pipes = []
@ -334,11 +337,6 @@ class EntityRuler(Pipe):
if len(self) == 0:
warnings.warn(Warnings.W036.format(name=self.name))
def _require_patterns(self) -> None:
"""Raise a warning if this component has no patterns defined."""
if len(self) == 0:
warnings.warn(Warnings.W036.format(name=self.name))
def _split_label(self, label: str) -> Tuple[str, str]:
"""Split Entity label into ent_label and ent_id if it contains self.ent_id_sep

View File

@ -481,6 +481,7 @@ def test_matcher_schema_token_attributes(en_vocab, pattern, text):
assert len(matches) == 1
@pytest.mark.filterwarnings("ignore:\\[W036")
def test_matcher_valid_callback(en_vocab):
"""Test that on_match can only be None or callable."""
matcher = Matcher(en_vocab)

View File

@ -180,6 +180,7 @@ def test_matcher_sets_return_correct_tokens(en_vocab):
assert texts == ["zero", "one", "two"]
@pytest.mark.filterwarnings("ignore:\\[W036")
def test_matcher_remove():
nlp = English()
matcher = Matcher(nlp.vocab)

View File

@ -324,6 +324,7 @@ def test_append_alias(nlp):
assert len(mykb.get_alias_candidates("douglas")) == 3
@pytest.mark.filterwarnings("ignore:\\[W036")
def test_append_invalid_alias(nlp):
"""Test that append an alias will throw an error if prior probs are exceeding 1"""
mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
@ -342,6 +343,7 @@ def test_append_invalid_alias(nlp):
mykb.append_alias(alias="douglas", entity="Q1", prior_prob=0.2)
@pytest.mark.filterwarnings("ignore:\\[W036")
def test_preserving_links_asdoc(nlp):
"""Test that Span.as_doc preserves the existing entity links"""
vector_length = 1

View File

@ -98,6 +98,7 @@ def test_entity_ruler_clear(nlp, patterns):
assert len(doc.ents) == 1
ruler.clear()
assert len(ruler.labels) == 0
with pytest.warns(UserWarning):
doc = nlp("hello world")
assert len(doc.ents) == 0