mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 10:46:29 +03:00
Filter W036 for entity ruler, etc. (#8424)
This commit is contained in:
parent
e39d1bd4ab
commit
ec71a6b572
|
@ -3,6 +3,7 @@ from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable,
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import srsly
|
import srsly
|
||||||
|
import warnings
|
||||||
|
|
||||||
from .pipe import Pipe
|
from .pipe import Pipe
|
||||||
from ..training import Example
|
from ..training import Example
|
||||||
|
@ -141,7 +142,9 @@ class EntityRuler(Pipe):
|
||||||
|
|
||||||
def match(self, doc: Doc):
|
def match(self, doc: Doc):
|
||||||
self._require_patterns()
|
self._require_patterns()
|
||||||
matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc))
|
with warnings.catch_warnings():
|
||||||
|
warnings.filterwarnings("ignore", message="\\[W036")
|
||||||
|
matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc))
|
||||||
matches = set(
|
matches = set(
|
||||||
[(m_id, start, end) for m_id, start, end in matches if start != end]
|
[(m_id, start, end) for m_id, start, end in matches if start != end]
|
||||||
)
|
)
|
||||||
|
@ -276,7 +279,7 @@ class EntityRuler(Pipe):
|
||||||
current_index = i
|
current_index = i
|
||||||
break
|
break
|
||||||
subsequent_pipes = [
|
subsequent_pipes = [
|
||||||
pipe for pipe in self.nlp.pipe_names[current_index + 1 :]
|
pipe for pipe in self.nlp.pipe_names[current_index :]
|
||||||
]
|
]
|
||||||
except ValueError:
|
except ValueError:
|
||||||
subsequent_pipes = []
|
subsequent_pipes = []
|
||||||
|
@ -334,11 +337,6 @@ class EntityRuler(Pipe):
|
||||||
if len(self) == 0:
|
if len(self) == 0:
|
||||||
warnings.warn(Warnings.W036.format(name=self.name))
|
warnings.warn(Warnings.W036.format(name=self.name))
|
||||||
|
|
||||||
def _require_patterns(self) -> None:
|
|
||||||
"""Raise a warning if this component has no patterns defined."""
|
|
||||||
if len(self) == 0:
|
|
||||||
warnings.warn(Warnings.W036.format(name=self.name))
|
|
||||||
|
|
||||||
def _split_label(self, label: str) -> Tuple[str, str]:
|
def _split_label(self, label: str) -> Tuple[str, str]:
|
||||||
"""Split Entity label into ent_label and ent_id if it contains self.ent_id_sep
|
"""Split Entity label into ent_label and ent_id if it contains self.ent_id_sep
|
||||||
|
|
||||||
|
|
|
@ -481,6 +481,7 @@ def test_matcher_schema_token_attributes(en_vocab, pattern, text):
|
||||||
assert len(matches) == 1
|
assert len(matches) == 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.filterwarnings("ignore:\\[W036")
|
||||||
def test_matcher_valid_callback(en_vocab):
|
def test_matcher_valid_callback(en_vocab):
|
||||||
"""Test that on_match can only be None or callable."""
|
"""Test that on_match can only be None or callable."""
|
||||||
matcher = Matcher(en_vocab)
|
matcher = Matcher(en_vocab)
|
||||||
|
|
|
@ -180,6 +180,7 @@ def test_matcher_sets_return_correct_tokens(en_vocab):
|
||||||
assert texts == ["zero", "one", "two"]
|
assert texts == ["zero", "one", "two"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.filterwarnings("ignore:\\[W036")
|
||||||
def test_matcher_remove():
|
def test_matcher_remove():
|
||||||
nlp = English()
|
nlp = English()
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
|
|
|
@ -324,6 +324,7 @@ def test_append_alias(nlp):
|
||||||
assert len(mykb.get_alias_candidates("douglas")) == 3
|
assert len(mykb.get_alias_candidates("douglas")) == 3
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.filterwarnings("ignore:\\[W036")
|
||||||
def test_append_invalid_alias(nlp):
|
def test_append_invalid_alias(nlp):
|
||||||
"""Test that append an alias will throw an error if prior probs are exceeding 1"""
|
"""Test that append an alias will throw an error if prior probs are exceeding 1"""
|
||||||
mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
|
mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
|
||||||
|
@ -342,6 +343,7 @@ def test_append_invalid_alias(nlp):
|
||||||
mykb.append_alias(alias="douglas", entity="Q1", prior_prob=0.2)
|
mykb.append_alias(alias="douglas", entity="Q1", prior_prob=0.2)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.filterwarnings("ignore:\\[W036")
|
||||||
def test_preserving_links_asdoc(nlp):
|
def test_preserving_links_asdoc(nlp):
|
||||||
"""Test that Span.as_doc preserves the existing entity links"""
|
"""Test that Span.as_doc preserves the existing entity links"""
|
||||||
vector_length = 1
|
vector_length = 1
|
||||||
|
|
|
@ -98,7 +98,8 @@ def test_entity_ruler_clear(nlp, patterns):
|
||||||
assert len(doc.ents) == 1
|
assert len(doc.ents) == 1
|
||||||
ruler.clear()
|
ruler.clear()
|
||||||
assert len(ruler.labels) == 0
|
assert len(ruler.labels) == 0
|
||||||
doc = nlp("hello world")
|
with pytest.warns(UserWarning):
|
||||||
|
doc = nlp("hello world")
|
||||||
assert len(doc.ents) == 0
|
assert len(doc.ents) == 0
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user