Replace EntityRuler with SpanRuler implementation (#11320)

* Replace EntityRuler with SpanRuler implementation Remove `EntityRuler` and rename the `SpanRuler`-based `future_entity_ruler` to `entity_ruler`. Main changes: * It is no longer possible to load patterns on init as with `EntityRuler(patterns=)`. * The older serialization formats (`patterns.jsonl`) are no longer supported and the related tests are removed. * The config settings are only stored in the config, not in the serialized component (in particular the `phrase_matcher_attr` and overwrite settings). * Add migration guide to EntityRuler API docs * docs update * Minor edit Co-authored-by: svlandeg <svlandeg@github.com>
2025-07-04 03:43:09 +03:00 · 2022-10-24 09:11:35 +02:00 · 2022-10-24 09:11:35 +02:00 · cae4589f5a
commit cae4589f5a
parent a4bd890f32
15 changed files with 233 additions and 1072 deletions
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -460,13 +460,13 @@ class Errors(metaclass=ErrorsWithCodes):
            "same, but found '{nlp}' and '{vocab}' respectively.")
    E152 = ("The attribute {attr} is not supported for token patterns. "
            "Please use the option `validate=True` with the Matcher, PhraseMatcher, "
-            "EntityRuler or AttributeRuler for more details.")
+            "SpanRuler or AttributeRuler for more details.")
    E153 = ("The value type {vtype} is not supported for token patterns. "
            "Please use the option validate=True with Matcher, PhraseMatcher, "
-            "EntityRuler or AttributeRuler for more details.")
+            "SpanRuler or AttributeRuler for more details.")
    E154 = ("One of the attributes or values is not supported for token "
            "patterns. Please use the option `validate=True` with the Matcher, "
-            "PhraseMatcher, or EntityRuler for more details.")
+            "PhraseMatcher, or SpanRuler for more details.")
    E155 = ("The pipeline needs to include a {pipe} in order to use "
            "Matcher or PhraseMatcher with the attribute {attr}. "
            "Try using `nlp()` instead of `nlp.make_doc()` or `list(nlp.pipe())` "
@ -917,8 +917,6 @@ class Errors(metaclass=ErrorsWithCodes):
    E1021 = ("`pos` value \"{pp}\" is not a valid Universal Dependencies tag. "
             "Non-UD tags should use the `tag` property.")
    E1022 = ("Words must be of type str or int, but input is of type '{wtype}'")
    E1023 = ("Couldn't read EntityRuler from the {path}. This file doesn't "
             "exist.")
    E1024 = ("A pattern with {attr_type} '{label}' is not present in "
             "'{component}' patterns.")
    E1025 = ("Cannot intify the value '{value}' as an IOB string. The only "
--- a/spacy/pipeline/init.py
+++ b/spacy/pipeline/init.py
@ -3,7 +3,6 @@ from .dep_parser import DependencyParser
 from .edit_tree_lemmatizer import EditTreeLemmatizer
 from .entity_linker import EntityLinker
 from .ner import EntityRecognizer
 from .entity_ruler import EntityRuler
 from .lemmatizer import Lemmatizer
 from .morphologizer import Morphologizer
 from .pipe import Pipe
@ -23,7 +22,6 @@ __all__ = [
    "DependencyParser",
    "EntityLinker",
    "EntityRecognizer",
    "EntityRuler",
    "Morphologizer",
    "Lemmatizer",
    "MultiLabel_TextCategorizer",
--- a/spacy/pipeline/entity_ruler.py
+++ b/spacy/pipeline/entity_ruler.py
@ -1,525 +0,0 @@
 from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable, Sequence
 import warnings
 from collections import defaultdict
 from pathlib import Path
 import srsly
 from .pipe import Pipe
 from ..training import Example
 from ..language import Language
 from ..errors import Errors, Warnings
 from ..util import ensure_path, to_disk, from_disk, SimpleFrozenList, registry
 from ..tokens import Doc, Span
 from ..matcher import Matcher, PhraseMatcher
 from ..scorer import get_ner_prf
 DEFAULT_ENT_ID_SEP = "||"
 PatternType = Dict[str, Union[str, List[Dict[str, Any]]]]
@Language.factory(
    "entity_ruler",
    assigns=["doc.ents", "token.ent_type", "token.ent_iob"],
    default_config={
        "phrase_matcher_attr": None,
        "validate": False,
        "overwrite_ents": False,
        "ent_id_sep": DEFAULT_ENT_ID_SEP,
        "scorer": {"@scorers": "spacy.entity_ruler_scorer.v1"},
    },
    default_score_weights={
        "ents_f": 1.0,
        "ents_p": 0.0,
        "ents_r": 0.0,
        "ents_per_type": None,
    },
 )
 def make_entity_ruler(
    nlp: Language,
    name: str,
    phrase_matcher_attr: Optional[Union[int, str]],
    validate: bool,
    overwrite_ents: bool,
    ent_id_sep: str,
    scorer: Optional[Callable],
 ):
    return EntityRuler(
        nlp,
        name,
        phrase_matcher_attr=phrase_matcher_attr,
        validate=validate,
        overwrite_ents=overwrite_ents,
        ent_id_sep=ent_id_sep,
        scorer=scorer,
    )
 def entity_ruler_score(examples, **kwargs):
    return get_ner_prf(examples)
@registry.scorers("spacy.entity_ruler_scorer.v1")
 def make_entity_ruler_scorer():
    return entity_ruler_score
 class EntityRuler(Pipe):
    """The EntityRuler lets you add spans to the `Doc.ents` using token-based
    rules or exact phrase matches. It can be combined with the statistical
    `EntityRecognizer` to boost accuracy, or used on its own to implement a
    purely rule-based entity recognition system. After initialization, the
    component is typically added to the pipeline using `nlp.add_pipe`.
    DOCS: https://spacy.io/api/entityruler
    USAGE: https://spacy.io/usage/rule-based-matching#entityruler
    """
    def __init__(
        self,
        nlp: Language,
        name: str = "entity_ruler",
        *,
        phrase_matcher_attr: Optional[Union[int, str]] = None,
        validate: bool = False,
        overwrite_ents: bool = False,
        ent_id_sep: str = DEFAULT_ENT_ID_SEP,
        patterns: Optional[List[PatternType]] = None,
        scorer: Optional[Callable] = entity_ruler_score,
    ) -> None:
        """Initialize the entity ruler. If patterns are supplied here, they
        need to be a list of dictionaries with a `"label"` and `"pattern"`
        key. A pattern can either be a token pattern (list) or a phrase pattern
        (string). For example: `{'label': 'ORG', 'pattern': 'Apple'}`.
        nlp (Language): The shared nlp object to pass the vocab to the matchers
            and process phrase patterns.
        name (str): Instance name of the current pipeline component. Typically
            passed in automatically from the factory when the component is
            added. Used to disable the current entity ruler while creating
            phrase patterns with the nlp object.
        phrase_matcher_attr (int / str): Token attribute to match on, passed
            to the internal PhraseMatcher as `attr`
        validate (bool): Whether patterns should be validated, passed to
            Matcher and PhraseMatcher as `validate`
        patterns (iterable): Optional patterns to load in.
        overwrite_ents (bool): If existing entities are present, e.g. entities
            added by the model, overwrite them by matches if necessary.
        ent_id_sep (str): Separator used internally for entity IDs.
        scorer (Optional[Callable]): The scoring method. Defaults to
            spacy.scorer.get_ner_prf.
        DOCS: https://spacy.io/api/entityruler#init
        """
        self.nlp = nlp
        self.name = name
        self.overwrite = overwrite_ents
        self.token_patterns = defaultdict(list)  # type: ignore
        self.phrase_patterns = defaultdict(list)  # type: ignore
        self._validate = validate
        self.matcher = Matcher(nlp.vocab, validate=validate)
        self.phrase_matcher_attr = phrase_matcher_attr
        self.phrase_matcher = PhraseMatcher(
            nlp.vocab, attr=self.phrase_matcher_attr, validate=validate
        )
        self.ent_id_sep = ent_id_sep
        self._ent_ids = defaultdict(tuple)  # type: ignore
        if patterns is not None:
            self.add_patterns(patterns)
        self.scorer = scorer
    def __len__(self) -> int:
        """The number of all patterns added to the entity ruler."""
        n_token_patterns = sum(len(p) for p in self.token_patterns.values())
        n_phrase_patterns = sum(len(p) for p in self.phrase_patterns.values())
        return n_token_patterns + n_phrase_patterns
    def __contains__(self, label: str) -> bool:
        """Whether a label is present in the patterns."""
        return label in self.token_patterns or label in self.phrase_patterns
    def __call__(self, doc: Doc) -> Doc:
        """Find matches in document and add them as entities.
        doc (Doc): The Doc object in the pipeline.
        RETURNS (Doc): The Doc with added entities, if available.
        DOCS: https://spacy.io/api/entityruler#call
        """
        error_handler = self.get_error_handler()
        try:
            matches = self.match(doc)
            self.set_annotations(doc, matches)
            return doc
        except Exception as e:
            return error_handler(self.name, self, [doc], e)
    def match(self, doc: Doc):
        self._require_patterns()
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", message="\\[W036")
            matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc))
        final_matches = set(
            [(m_id, start, end) for m_id, start, end in matches if start != end]
        )
        get_sort_key = lambda m: (m[2] - m[1], -m[1])
        final_matches = sorted(final_matches, key=get_sort_key, reverse=True)
        return final_matches
    def set_annotations(self, doc, matches):
        """Modify the document in place"""
        entities = list(doc.ents)
        new_entities = []
        seen_tokens = set()
        for match_id, start, end in matches:
            if any(t.ent_type for t in doc[start:end]) and not self.overwrite:
                continue
            # check for end - 1 here because boundaries are inclusive
            if start not in seen_tokens and end - 1 not in seen_tokens:
                if match_id in self._ent_ids:
                    label, ent_id = self._ent_ids[match_id]
                    span = Span(doc, start, end, label=label, span_id=ent_id)
                else:
                    span = Span(doc, start, end, label=match_id)
                new_entities.append(span)
                entities = [
                    e for e in entities if not (e.start < end and e.end > start)
                ]
                seen_tokens.update(range(start, end))
        doc.ents = entities + new_entities
    @property
    def labels(self) -> Tuple[str, ...]:
        """All labels present in the match patterns.
        RETURNS (set): The string labels.
        DOCS: https://spacy.io/api/entityruler#labels
        """
        keys = set(self.token_patterns.keys())
        keys.update(self.phrase_patterns.keys())
        all_labels = set()
        for l in keys:
            if self.ent_id_sep in l:
                label, _ = self._split_label(l)
                all_labels.add(label)
            else:
                all_labels.add(l)
        return tuple(sorted(all_labels))
    def initialize(
        self,
        get_examples: Callable[[], Iterable[Example]],
        *,
        nlp: Optional[Language] = None,
        patterns: Optional[Sequence[PatternType]] = None,
    ):
        """Initialize the pipe for training.
        get_examples (Callable[[], Iterable[Example]]): Function that
            returns a representative sample of gold-standard Example objects.
        nlp (Language): The current nlp object the component is part of.
        patterns Optional[Iterable[PatternType]]: The list of patterns.
        DOCS: https://spacy.io/api/entityruler#initialize
        """
        self.clear()
        if patterns:
            self.add_patterns(patterns)  # type: ignore[arg-type]
    @property
    def ent_ids(self) -> Tuple[Optional[str], ...]:
        """All entity ids present in the match patterns `id` properties
        RETURNS (set): The string entity ids.
        DOCS: https://spacy.io/api/entityruler#ent_ids
        """
        keys = set(self.token_patterns.keys())
        keys.update(self.phrase_patterns.keys())
        all_ent_ids = set()
        for l in keys:
            if self.ent_id_sep in l:
                _, ent_id = self._split_label(l)
                all_ent_ids.add(ent_id)
        return tuple(all_ent_ids)
    @property
    def patterns(self) -> List[PatternType]:
        """Get all patterns that were added to the entity ruler.
        RETURNS (list): The original patterns, one dictionary per pattern.
        DOCS: https://spacy.io/api/entityruler#patterns
        """
        all_patterns = []
        for label, patterns in self.token_patterns.items():
            for pattern in patterns:
                ent_label, ent_id = self._split_label(label)
                p = {"label": ent_label, "pattern": pattern}
                if ent_id:
                    p["id"] = ent_id
                all_patterns.append(p)
        for label, patterns in self.phrase_patterns.items():
            for pattern in patterns:
                ent_label, ent_id = self._split_label(label)
                p = {"label": ent_label, "pattern": pattern.text}
                if ent_id:
                    p["id"] = ent_id
                all_patterns.append(p)
        return all_patterns
    def add_patterns(self, patterns: List[PatternType]) -> None:
        """Add patterns to the entity ruler. A pattern can either be a token
        pattern (list of dicts) or a phrase pattern (string). For example:
        {'label': 'ORG', 'pattern': 'Apple'}
        {'label': 'GPE', 'pattern': [{'lower': 'san'}, {'lower': 'francisco'}]}
        patterns (list): The patterns to add.
        DOCS: https://spacy.io/api/entityruler#add_patterns
        """
        # disable the nlp components after this one in case they hadn't been initialized / deserialised yet
        try:
            current_index = -1
            for i, (name, pipe) in enumerate(self.nlp.pipeline):
                if self == pipe:
                    current_index = i
                    break
            subsequent_pipes = [pipe for pipe in self.nlp.pipe_names[current_index:]]
        except ValueError:
            subsequent_pipes = []
        with self.nlp.select_pipes(disable=subsequent_pipes):
            token_patterns = []
            phrase_pattern_labels = []
            phrase_pattern_texts = []
            phrase_pattern_ids = []
            for entry in patterns:
                if isinstance(entry["pattern"], str):
                    phrase_pattern_labels.append(entry["label"])
                    phrase_pattern_texts.append(entry["pattern"])
                    phrase_pattern_ids.append(entry.get("id"))
                elif isinstance(entry["pattern"], list):
                    token_patterns.append(entry)
            phrase_patterns = []
            for label, pattern, ent_id in zip(
                phrase_pattern_labels,
                self.nlp.pipe(phrase_pattern_texts),
                phrase_pattern_ids,
            ):
                phrase_pattern = {"label": label, "pattern": pattern}
                if ent_id:
                    phrase_pattern["id"] = ent_id
                phrase_patterns.append(phrase_pattern)
            for entry in token_patterns + phrase_patterns:  # type: ignore[operator]
                label = entry["label"]  # type: ignore
                if "id" in entry:
                    ent_label = label
                    label = self._create_label(label, entry["id"])
                    key = self.matcher._normalize_key(label)
                    self._ent_ids[key] = (ent_label, entry["id"])
                pattern = entry["pattern"]  # type: ignore
                if isinstance(pattern, Doc):
                    self.phrase_patterns[label].append(pattern)
                    self.phrase_matcher.add(label, [pattern])  # type: ignore
                elif isinstance(pattern, list):
                    self.token_patterns[label].append(pattern)
                    self.matcher.add(label, [pattern])
                else:
                    raise ValueError(Errors.E097.format(pattern=pattern))
    def clear(self) -> None:
        """Reset all patterns."""
        self.token_patterns = defaultdict(list)
        self.phrase_patterns = defaultdict(list)
        self._ent_ids = defaultdict(tuple)
        self.matcher = Matcher(self.nlp.vocab, validate=self._validate)
        self.phrase_matcher = PhraseMatcher(
            self.nlp.vocab, attr=self.phrase_matcher_attr, validate=self._validate
        )
    def remove(self, ent_id: str) -> None:
        """Remove a pattern by its ent_id if a pattern with this ent_id was added before
        ent_id (str): id of the pattern to be removed
        RETURNS: None
        DOCS: https://spacy.io/api/entityruler#remove
        """
        label_id_pairs = [
            (label, eid) for (label, eid) in self._ent_ids.values() if eid == ent_id
        ]
        if not label_id_pairs:
            raise ValueError(
                Errors.E1024.format(attr_type="ID", label=ent_id, component=self.name)
            )
        created_labels = [
            self._create_label(label, eid) for (label, eid) in label_id_pairs
        ]
        # remove the patterns from self.phrase_patterns
        self.phrase_patterns = defaultdict(
            list,
            {
                label: val
                for (label, val) in self.phrase_patterns.items()
                if label not in created_labels
            },
        )
        # remove the patterns from self.token_pattern
        self.token_patterns = defaultdict(
            list,
            {
                label: val
                for (label, val) in self.token_patterns.items()
                if label not in created_labels
            },
        )
        # remove the patterns from self.token_pattern
        for label in created_labels:
            if label in self.phrase_matcher:
                self.phrase_matcher.remove(label)
            else:
                self.matcher.remove(label)
    def _require_patterns(self) -> None:
        """Raise a warning if this component has no patterns defined."""
        if len(self) == 0:
            warnings.warn(Warnings.W036.format(name=self.name))
    def _split_label(self, label: str) -> Tuple[str, Optional[str]]:
        """Split Entity label into ent_label and ent_id if it contains self.ent_id_sep
        label (str): The value of label in a pattern entry
        RETURNS (tuple): ent_label, ent_id
        """
        if self.ent_id_sep in label:
            ent_label, ent_id = label.rsplit(self.ent_id_sep, 1)
        else:
            ent_label = label
            ent_id = None  # type: ignore
        return ent_label, ent_id
    def _create_label(self, label: Any, ent_id: Any) -> str:
        """Join Entity label with ent_id if the pattern has an `id` attribute
        If ent_id is not a string, the label is returned as is.
        label (str): The label to set for ent.label_
        ent_id (str): The label
        RETURNS (str): The ent_label joined with configured `ent_id_sep`
        """
        if isinstance(ent_id, str):
            label = f"{label}{self.ent_id_sep}{ent_id}"
        return label
    def from_bytes(
        self, patterns_bytes: bytes, *, exclude: Iterable[str] = SimpleFrozenList()
    ) -> "EntityRuler":
        """Load the entity ruler from a bytestring.
        patterns_bytes (bytes): The bytestring to load.
        RETURNS (EntityRuler): The loaded entity ruler.
        DOCS: https://spacy.io/api/entityruler#from_bytes
        """
        cfg = srsly.msgpack_loads(patterns_bytes)
        self.clear()
        if isinstance(cfg, dict):
            self.add_patterns(cfg.get("patterns", cfg))
            self.overwrite = cfg.get("overwrite", False)
            self.phrase_matcher_attr = cfg.get("phrase_matcher_attr", None)
            self.phrase_matcher = PhraseMatcher(
                self.nlp.vocab, attr=self.phrase_matcher_attr
            )
            self.ent_id_sep = cfg.get("ent_id_sep", DEFAULT_ENT_ID_SEP)
        else:
            self.add_patterns(cfg)
        return self
    def to_bytes(self, *, exclude: Iterable[str] = SimpleFrozenList()) -> bytes:
        """Serialize the entity ruler patterns to a bytestring.
        RETURNS (bytes): The serialized patterns.
        DOCS: https://spacy.io/api/entityruler#to_bytes
        """
        serial = {
            "overwrite": self.overwrite,
            "ent_id_sep": self.ent_id_sep,
            "phrase_matcher_attr": self.phrase_matcher_attr,
            "patterns": self.patterns,
        }
        return srsly.msgpack_dumps(serial)
    def from_disk(
        self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList()
    ) -> "EntityRuler":
        """Load the entity ruler from a file. Expects a file containing
        newline-delimited JSON (JSONL) with one entry per line.
        path (str / Path): The JSONL file to load.
        RETURNS (EntityRuler): The loaded entity ruler.
        DOCS: https://spacy.io/api/entityruler#from_disk
        """
        path = ensure_path(path)
        self.clear()
        depr_patterns_path = path.with_suffix(".jsonl")
        if path.suffix == ".jsonl":  # user provides a jsonl
            if path.is_file:
                patterns = srsly.read_jsonl(path)
                self.add_patterns(patterns)
            else:
                raise ValueError(Errors.E1023.format(path=path))
        elif depr_patterns_path.is_file():
            patterns = srsly.read_jsonl(depr_patterns_path)
            self.add_patterns(patterns)
        elif path.is_dir():  # path is a valid directory
            cfg = {}
            deserializers_patterns = {
                "patterns": lambda p: self.add_patterns(
                    srsly.read_jsonl(p.with_suffix(".jsonl"))
                )
            }
            deserializers_cfg = {"cfg": lambda p: cfg.update(srsly.read_json(p))}
            from_disk(path, deserializers_cfg, {})
            self.overwrite = cfg.get("overwrite", False)
            self.phrase_matcher_attr = cfg.get("phrase_matcher_attr")
            self.ent_id_sep = cfg.get("ent_id_sep", DEFAULT_ENT_ID_SEP)
            self.phrase_matcher = PhraseMatcher(
                self.nlp.vocab, attr=self.phrase_matcher_attr
            )
            from_disk(path, deserializers_patterns, {})
        else:  # path is not a valid directory or file
            raise ValueError(Errors.E146.format(path=path))
        return self
    def to_disk(
        self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList()
    ) -> None:
        """Save the entity ruler patterns to a directory. The patterns will be
        saved as newline-delimited JSON (JSONL).
        path (str / Path): The JSONL file to save.
        DOCS: https://spacy.io/api/entityruler#to_disk
        """
        path = ensure_path(path)
        cfg = {
            "overwrite": self.overwrite,
            "phrase_matcher_attr": self.phrase_matcher_attr,
            "ent_id_sep": self.ent_id_sep,
        }
        serializers = {
            "patterns": lambda p: srsly.write_jsonl(
                p.with_suffix(".jsonl"), self.patterns
            ),
            "cfg": lambda p: srsly.write_json(p, cfg),
        }
        if path.suffix == ".jsonl":  # user wants to save only JSONL
            srsly.write_jsonl(path, self.patterns)
        else:
            to_disk(path, serializers, {})
--- a/spacy/pipeline/span_ruler.py
+++ b/spacy/pipeline/span_ruler.py
@ -11,7 +11,7 @@ from ..language import Language
 from ..errors import Errors, Warnings
 from ..util import ensure_path, SimpleFrozenList, registry
 from ..tokens import Doc, Span
-from ..scorer import Scorer
+from ..scorer import Scorer, get_ner_prf
 from ..matcher import Matcher, PhraseMatcher
 from .. import util
@ -20,7 +20,7 @@ DEFAULT_SPANS_KEY = "ruler"
@Language.factory(
-    "future_entity_ruler",
+    "entity_ruler",
    assigns=["doc.ents"],
    default_config={
        "phrase_matcher_attr": None,
@ -63,6 +63,15 @@ def make_entity_ruler(
    )
 def entity_ruler_score(examples, **kwargs):
    return get_ner_prf(examples)
@registry.scorers("spacy.entity_ruler_scorer.v1")
 def make_entity_ruler_scorer():
    return entity_ruler_score
@Language.factory(
    "span_ruler",
    assigns=["doc.spans"],
@ -117,7 +126,7 @@ def prioritize_new_ents_filter(
 ) -> List[Span]:
    """Merge entities and spans into one list without overlaps by allowing
    spans to overwrite any entities that they overlap with. Intended to
-    replicate the overwrite_ents=True behavior from the EntityRuler.
+    replicate the overwrite_ents=True behavior from the v3 EntityRuler.
    entities (Iterable[Span]): The entities, already filtered for overlaps.
    spans (Iterable[Span]): The spans to merge, may contain overlaps.
@ -148,7 +157,7 @@ def prioritize_existing_ents_filter(
 ) -> List[Span]:
    """Merge entities and spans into one list without overlaps by prioritizing
    existing entities. Intended to replicate the overwrite_ents=False behavior
-    from the EntityRuler.
+    from the v3 EntityRuler.
    entities (Iterable[Span]): The entities, already filtered for overlaps.
    spans (Iterable[Span]): The spans to merge, may contain overlaps.
--- a/spacy/tests/matcher/test_phrase_matcher.py
+++ b/spacy/tests/matcher/test_phrase_matcher.py
@ -87,14 +87,15 @@ def test_issue4373():
@pytest.mark.issue(4651)
 def test_issue4651_with_phrase_matcher_attr():
-    """Test that the EntityRuler PhraseMatcher is deserialized correctly using
+    """Test that the entity_ruler PhraseMatcher is deserialized correctly using
-    the method from_disk when the EntityRuler argument phrase_matcher_attr is
+    the method from_disk when the entity_ruler argument phrase_matcher_attr is
    specified.
    """
    text = "Spacy is a python library for nlp"
    nlp = English()
    patterns = [{"label": "PYTHON_LIB", "pattern": "spacy", "id": "spaCy"}]
-    ruler = nlp.add_pipe("entity_ruler", config={"phrase_matcher_attr": "LOWER"})
+    config = {"phrase_matcher_attr": "LOWER"}
    ruler = nlp.add_pipe("entity_ruler", config=config)
    ruler.add_patterns(patterns)
    doc = nlp(text)
    res = [(ent.text, ent.label_, ent.ent_id_) for ent in doc.ents]
@ -102,7 +103,7 @@ def test_issue4651_with_phrase_matcher_attr():
    with make_tempdir() as d:
        file_path = d / "entityruler"
        ruler.to_disk(file_path)
-        nlp_reloaded.add_pipe("entity_ruler").from_disk(file_path)
+        nlp_reloaded.add_pipe("entity_ruler", config=config).from_disk(file_path)
    doc_reloaded = nlp_reloaded(text)
    res_reloaded = [(ent.text, ent.label_, ent.ent_id_) for ent in doc_reloaded.ents]
    assert res == res_reloaded
--- a/spacy/tests/pipeline/test_entity_ruler.py
+++ b/spacy/tests/pipeline/test_entity_ruler.py
@ -4,7 +4,7 @@ from spacy import registry
 from spacy.tokens import Doc, Span
 from spacy.language import Language
 from spacy.lang.en import English
-from spacy.pipeline import EntityRuler, EntityRecognizer, merge_entities
+from spacy.pipeline import EntityRecognizer, merge_entities
 from spacy.pipeline import SpanRuler
 from spacy.pipeline.ner import DEFAULT_NER_MODEL
 from spacy.errors import MatchPatternError
@ -12,8 +12,6 @@ from spacy.tests.util import make_tempdir
 from thinc.api import NumpyOps, get_current_ops
 ENTITY_RULERS = ["entity_ruler", "future_entity_ruler"]
@pytest.fixture
 def nlp():
@ -40,13 +38,12 @@ def add_ent_component(doc):
@pytest.mark.issue(3345)
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_issue3345():
 def test_issue3345(entity_ruler_factory):
    """Test case where preset entity crosses sentence boundary."""
    nlp = English()
    doc = Doc(nlp.vocab, words=["I", "live", "in", "New", "York"])
    doc[4].is_sent_start = True
-    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
+    ruler = nlp.add_pipe("entity_ruler")
    ruler.add_patterns([{"label": "GPE", "pattern": "New York"}])
    cfg = {"model": DEFAULT_NER_MODEL}
    model = registry.resolve(cfg, validate=True)["model"]
@ -65,15 +62,14 @@ def test_issue3345(entity_ruler_factory):
@pytest.mark.issue(4849)
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_issue4849():
 def test_issue4849(entity_ruler_factory):
    nlp = English()
    patterns = [
        {"label": "PERSON", "pattern": "joe biden", "id": "joe-biden"},
        {"label": "PERSON", "pattern": "bernie sanders", "id": "bernie-sanders"},
    ]
    ruler = nlp.add_pipe(
-        entity_ruler_factory,
+        "entity_ruler",
        name="entity_ruler",
        config={"phrase_matcher_attr": "LOWER"},
    )
@ -96,11 +92,10 @@ def test_issue4849(entity_ruler_factory):
@pytest.mark.issue(5918)
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_issue5918():
 def test_issue5918(entity_ruler_factory):
    # Test edge case when merging entities.
    nlp = English()
-    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
+    ruler = nlp.add_pipe("entity_ruler")
    patterns = [
        {"label": "ORG", "pattern": "Digicon Inc"},
        {"label": "ORG", "pattern": "Rotan Mosle Inc's"},
@ -125,10 +120,9 @@ def test_issue5918(entity_ruler_factory):
@pytest.mark.issue(8168)
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_issue8168():
 def test_issue8168(entity_ruler_factory):
    nlp = English()
-    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
+    ruler = nlp.add_pipe("entity_ruler")
    patterns = [
        {"label": "ORG", "pattern": "Apple"},
        {
@ -148,12 +142,9 @@ def test_issue8168(entity_ruler_factory):
@pytest.mark.issue(8216)
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_fix8216(nlp, patterns):
 def test_entity_ruler_fix8216(nlp, patterns, entity_ruler_factory):
    """Test that patterns don't get added excessively."""
-    ruler = nlp.add_pipe(
+    ruler = nlp.add_pipe("entity_ruler", config={"validate": True})
        entity_ruler_factory, name="entity_ruler", config={"validate": True}
    )
    ruler.add_patterns(patterns)
    pattern_count = sum(len(mm) for mm in ruler.matcher._patterns.values())
    assert pattern_count > 0
@ -162,16 +153,15 @@ def test_entity_ruler_fix8216(nlp, patterns, entity_ruler_factory):
    assert after_count == pattern_count
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_init(nlp, patterns):
-def test_entity_ruler_init(nlp, patterns, entity_ruler_factory):
+    ruler = nlp.add_pipe("entity_ruler")
    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
    ruler.add_patterns(patterns)
    assert len(ruler) == len(patterns)
    assert len(ruler.labels) == 4
    assert "HELLO" in ruler
    assert "BYE" in ruler
    nlp.remove_pipe("entity_ruler")
-    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
+    ruler = nlp.add_pipe("entity_ruler")
    ruler.add_patterns(patterns)
    doc = nlp("hello world bye bye")
    assert len(doc.ents) == 2
@ -179,23 +169,21 @@ def test_entity_ruler_init(nlp, patterns, entity_ruler_factory):
    assert doc.ents[1].label_ == "BYE"
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_no_patterns_warns(nlp):
-def test_entity_ruler_no_patterns_warns(nlp, entity_ruler_factory):
+    ruler = nlp.add_pipe("entity_ruler")
    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
    assert len(ruler) == 0
    assert len(ruler.labels) == 0
    nlp.remove_pipe("entity_ruler")
-    nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
+    nlp.add_pipe("entity_ruler")
    assert nlp.pipe_names == ["entity_ruler"]
    with pytest.warns(UserWarning):
        doc = nlp("hello world bye bye")
    assert len(doc.ents) == 0
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_init_patterns(nlp, patterns):
 def test_entity_ruler_init_patterns(nlp, patterns, entity_ruler_factory):
    # initialize with patterns
-    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
+    ruler = nlp.add_pipe("entity_ruler")
    assert len(ruler.labels) == 0
    ruler.initialize(lambda: [], patterns=patterns)
    assert len(ruler.labels) == 4
@ -207,7 +195,7 @@ def test_entity_ruler_init_patterns(nlp, patterns, entity_ruler_factory):
    nlp.config["initialize"]["components"]["entity_ruler"] = {
        "patterns": {"@misc": "entity_ruler_patterns"}
    }
-    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
+    ruler = nlp.add_pipe("entity_ruler")
    assert len(ruler.labels) == 0
    nlp.initialize()
    assert len(ruler.labels) == 4
@ -216,20 +204,18 @@ def test_entity_ruler_init_patterns(nlp, patterns, entity_ruler_factory):
    assert doc.ents[1].label_ == "BYE"
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_init_clear(nlp, patterns):
 def test_entity_ruler_init_clear(nlp, patterns, entity_ruler_factory):
    """Test that initialization clears patterns."""
-    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
+    ruler = nlp.add_pipe("entity_ruler")
    ruler.add_patterns(patterns)
    assert len(ruler.labels) == 4
    ruler.initialize(lambda: [])
    assert len(ruler.labels) == 0
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_clear(nlp, patterns):
 def test_entity_ruler_clear(nlp, patterns, entity_ruler_factory):
    """Test that initialization clears patterns."""
-    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
+    ruler = nlp.add_pipe("entity_ruler")
    ruler.add_patterns(patterns)
    assert len(ruler.labels) == 4
    doc = nlp("hello world")
@ -241,9 +227,8 @@ def test_entity_ruler_clear(nlp, patterns, entity_ruler_factory):
    assert len(doc.ents) == 0
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_existing(nlp, patterns):
-def test_entity_ruler_existing(nlp, patterns, entity_ruler_factory):
+    ruler = nlp.add_pipe("entity_ruler")
    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
    ruler.add_patterns(patterns)
    nlp.add_pipe("add_ent", before="entity_ruler")
    doc = nlp("OH HELLO WORLD bye bye")
@ -252,11 +237,8 @@ def test_entity_ruler_existing(nlp, patterns, entity_ruler_factory):
    assert doc.ents[1].label_ == "BYE"
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_existing_overwrite(nlp, patterns):
-def test_entity_ruler_existing_overwrite(nlp, patterns, entity_ruler_factory):
+    ruler = nlp.add_pipe("entity_ruler", config={"overwrite_ents": True})
    ruler = nlp.add_pipe(
        entity_ruler_factory, name="entity_ruler", config={"overwrite_ents": True}
    )
    ruler.add_patterns(patterns)
    nlp.add_pipe("add_ent", before="entity_ruler")
    doc = nlp("OH HELLO WORLD bye bye")
@ -266,11 +248,8 @@ def test_entity_ruler_existing_overwrite(nlp, patterns, entity_ruler_factory):
    assert doc.ents[1].label_ == "BYE"
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_existing_complex(nlp, patterns):
-def test_entity_ruler_existing_complex(nlp, patterns, entity_ruler_factory):
+    ruler = nlp.add_pipe("entity_ruler", config={"overwrite_ents": True})
    ruler = nlp.add_pipe(
        entity_ruler_factory, name="entity_ruler", config={"overwrite_ents": True}
    )
    ruler.add_patterns(patterns)
    nlp.add_pipe("add_ent", before="entity_ruler")
    doc = nlp("foo foo bye bye")
@ -281,11 +260,8 @@ def test_entity_ruler_existing_complex(nlp, patterns, entity_ruler_factory):
    assert len(doc.ents[1]) == 2
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_entity_id(nlp, patterns):
-def test_entity_ruler_entity_id(nlp, patterns, entity_ruler_factory):
+    ruler = nlp.add_pipe("entity_ruler", config={"overwrite_ents": True})
    ruler = nlp.add_pipe(
        entity_ruler_factory, name="entity_ruler", config={"overwrite_ents": True}
    )
    ruler.add_patterns(patterns)
    doc = nlp("Apple is a technology company")
    assert len(doc.ents) == 1
@ -293,26 +269,23 @@ def test_entity_ruler_entity_id(nlp, patterns, entity_ruler_factory):
    assert doc.ents[0].ent_id_ == "a1"
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_cfg_ent_id_sep(nlp, patterns):
 def test_entity_ruler_cfg_ent_id_sep(nlp, patterns, entity_ruler_factory):
    config = {"overwrite_ents": True, "ent_id_sep": "**"}
-    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler", config=config)
+    ruler = nlp.add_pipe("entity_ruler", config=config)
    ruler.add_patterns(patterns)
    doc = nlp("Apple is a technology company")
    if isinstance(ruler, EntityRuler):
        assert "TECH_ORG**a1" in ruler.phrase_patterns
    assert len(doc.ents) == 1
    assert doc.ents[0].label_ == "TECH_ORG"
    assert doc.ents[0].ent_id_ == "a1"
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_serialize_bytes(nlp, patterns):
-def test_entity_ruler_serialize_bytes(nlp, patterns, entity_ruler_factory):
+    ruler = nlp.add_pipe("entity_ruler")
-    ruler = EntityRuler(nlp, patterns=patterns)
+    ruler.add_patterns(patterns)
    assert len(ruler) == len(patterns)
    assert len(ruler.labels) == 4
    ruler_bytes = ruler.to_bytes()
-    new_ruler = EntityRuler(nlp)
+    new_ruler = nlp.add_pipe("entity_ruler", name="new_ruler")
    assert len(new_ruler) == 0
    assert len(new_ruler.labels) == 0
    new_ruler = new_ruler.from_bytes(ruler_bytes)
@ -324,28 +297,27 @@ def test_entity_ruler_serialize_bytes(nlp, patterns, entity_ruler_factory):
    assert sorted(new_ruler.labels) == sorted(ruler.labels)
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_serialize_phrase_matcher_attr_bytes(nlp, patterns):
-def test_entity_ruler_serialize_phrase_matcher_attr_bytes(
+    ruler = nlp.add_pipe("entity_ruler", config={"phrase_matcher_attr": "LOWER"})
-    nlp, patterns, entity_ruler_factory
+    ruler.add_patterns(patterns)
 ):
    ruler = EntityRuler(nlp, phrase_matcher_attr="LOWER", patterns=patterns)
    assert len(ruler) == len(patterns)
    assert len(ruler.labels) == 4
    ruler_bytes = ruler.to_bytes()
-    new_ruler = EntityRuler(nlp)
+    new_ruler = nlp.add_pipe(
        "entity_ruler", name="new_ruler", config={"phrase_matcher_attr": "LOWER"}
    )
    assert len(new_ruler) == 0
    assert len(new_ruler.labels) == 0
    assert new_ruler.phrase_matcher_attr is None
    new_ruler = new_ruler.from_bytes(ruler_bytes)
    assert len(new_ruler) == len(patterns)
    assert len(new_ruler.labels) == 4
    assert new_ruler.phrase_matcher_attr == "LOWER"
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_validate(nlp):
-def test_entity_ruler_validate(nlp, entity_ruler_factory):
+    ruler = nlp.add_pipe("entity_ruler")
-    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
+    validated_ruler = nlp.add_pipe(
-    validated_ruler = EntityRuler(nlp, validate=True)
+        "entity_ruler", name="validated_ruler", config={"validate": True}
    )
    valid_pattern = {"label": "HELLO", "pattern": [{"LOWER": "HELLO"}]}
    invalid_pattern = {"label": "HELLO", "pattern": [{"ASDF": "HELLO"}]}
@ -362,16 +334,15 @@ def test_entity_ruler_validate(nlp, entity_ruler_factory):
        validated_ruler.add_patterns([invalid_pattern])
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_properties(nlp, patterns):
-def test_entity_ruler_properties(nlp, patterns, entity_ruler_factory):
+    ruler = nlp.add_pipe("entity_ruler", config={"overwrite_ents": True})
-    ruler = EntityRuler(nlp, patterns=patterns, overwrite_ents=True)
+    ruler.add_patterns(patterns)
    assert sorted(ruler.labels) == sorted(["HELLO", "BYE", "COMPLEX", "TECH_ORG"])
-    assert sorted(ruler.ent_ids) == ["a1", "a2"]
+    assert sorted(ruler.ids) == ["a1", "a2"]
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_overlapping_spans(nlp):
-def test_entity_ruler_overlapping_spans(nlp, entity_ruler_factory):
+    ruler = nlp.add_pipe("entity_ruler")
    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
    patterns = [
        {"label": "FOOBAR", "pattern": "foo bar"},
        {"label": "BARBAZ", "pattern": "bar baz"},
@ -383,14 +354,13 @@ def test_entity_ruler_overlapping_spans(nlp, entity_ruler_factory):
@pytest.mark.parametrize("n_process", [1, 2])
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_multiprocessing(nlp, n_process):
 def test_entity_ruler_multiprocessing(nlp, n_process, entity_ruler_factory):
    if isinstance(get_current_ops, NumpyOps) or n_process < 2:
        texts = ["I enjoy eating Pizza Hut pizza."]
        patterns = [{"label": "FASTFOOD", "pattern": "Pizza Hut", "id": "1234"}]
-        ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
+        ruler = nlp.add_pipe("entity_ruler")
        ruler.add_patterns(patterns)
        for doc in nlp.pipe(texts, n_process=2):
@ -398,9 +368,8 @@ def test_entity_ruler_multiprocessing(nlp, n_process, entity_ruler_factory):
                assert ent.ent_id_ == "1234"
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_serialize_jsonl(nlp, patterns):
-def test_entity_ruler_serialize_jsonl(nlp, patterns, entity_ruler_factory):
+    ruler = nlp.add_pipe("entity_ruler")
    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
    ruler.add_patterns(patterns)
    with make_tempdir() as d:
        ruler.to_disk(d / "test_ruler.jsonl")
@ -409,9 +378,8 @@ def test_entity_ruler_serialize_jsonl(nlp, patterns, entity_ruler_factory):
            ruler.from_disk(d / "non_existing.jsonl")  # read from a bad jsonl file
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_serialize_dir(nlp, patterns):
-def test_entity_ruler_serialize_dir(nlp, patterns, entity_ruler_factory):
+    ruler = nlp.add_pipe("entity_ruler")
    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
    ruler.add_patterns(patterns)
    with make_tempdir() as d:
        ruler.to_disk(d / "test_ruler")
@ -420,9 +388,8 @@ def test_entity_ruler_serialize_dir(nlp, patterns, entity_ruler_factory):
            ruler.from_disk(d / "non_existing_dir")  # read from a bad directory
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_remove_basic(nlp):
-def test_entity_ruler_remove_basic(nlp, entity_ruler_factory):
+    ruler = nlp.add_pipe("entity_ruler")
    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
    patterns = [
        {"label": "PERSON", "pattern": "Dina", "id": "dina"},
        {"label": "ORG", "pattern": "ACME", "id": "acme"},
@ -432,24 +399,16 @@ def test_entity_ruler_remove_basic(nlp, entity_ruler_factory):
    doc = nlp("Dina went to school")
    assert len(ruler.patterns) == 3
    assert len(doc.ents) == 1
    if isinstance(ruler, EntityRuler):
        assert "PERSON||dina" in ruler.phrase_matcher
    assert doc.ents[0].label_ == "PERSON"
    assert doc.ents[0].text == "Dina"
    if isinstance(ruler, EntityRuler):
        ruler.remove("dina")
    else:
    ruler.remove_by_id("dina")
    doc = nlp("Dina went to school")
    assert len(doc.ents) == 0
    if isinstance(ruler, EntityRuler):
        assert "PERSON||dina" not in ruler.phrase_matcher
    assert len(ruler.patterns) == 2
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_remove_same_id_multiple_patterns(nlp):
-def test_entity_ruler_remove_same_id_multiple_patterns(nlp, entity_ruler_factory):
+    ruler = nlp.add_pipe("entity_ruler")
    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
    patterns = [
        {"label": "PERSON", "pattern": "Dina", "id": "dina"},
        {"label": "ORG", "pattern": "DinaCorp", "id": "dina"},
@ -458,25 +417,15 @@ def test_entity_ruler_remove_same_id_multiple_patterns(nlp, entity_ruler_factory
    ruler.add_patterns(patterns)
    doc = nlp("Dina founded DinaCorp and ACME.")
    assert len(ruler.patterns) == 3
    if isinstance(ruler, EntityRuler):
        assert "PERSON||dina" in ruler.phrase_matcher
        assert "ORG||dina" in ruler.phrase_matcher
    assert len(doc.ents) == 3
    if isinstance(ruler, EntityRuler):
        ruler.remove("dina")
    else:
    ruler.remove_by_id("dina")
    doc = nlp("Dina founded DinaCorp and ACME.")
    assert len(ruler.patterns) == 1
    if isinstance(ruler, EntityRuler):
        assert "PERSON||dina" not in ruler.phrase_matcher
        assert "ORG||dina" not in ruler.phrase_matcher
    assert len(doc.ents) == 1
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_remove_nonexisting_pattern(nlp):
-def test_entity_ruler_remove_nonexisting_pattern(nlp, entity_ruler_factory):
+    ruler = nlp.add_pipe("entity_ruler")
    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
    patterns = [
        {"label": "PERSON", "pattern": "Dina", "id": "dina"},
        {"label": "ORG", "pattern": "ACME", "id": "acme"},
@ -491,9 +440,8 @@ def test_entity_ruler_remove_nonexisting_pattern(nlp, entity_ruler_factory):
            ruler.remove_by_id("nepattern")
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_remove_several_patterns(nlp):
-def test_entity_ruler_remove_several_patterns(nlp, entity_ruler_factory):
+    ruler = nlp.add_pipe("entity_ruler")
    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
    patterns = [
        {"label": "PERSON", "pattern": "Dina", "id": "dina"},
        {"label": "ORG", "pattern": "ACME", "id": "acme"},
@ -507,27 +455,20 @@ def test_entity_ruler_remove_several_patterns(nlp, entity_ruler_factory):
    assert doc.ents[0].text == "Dina"
    assert doc.ents[1].label_ == "ORG"
    assert doc.ents[1].text == "ACME"
    if isinstance(ruler, EntityRuler):
        ruler.remove("dina")
    else:
    ruler.remove_by_id("dina")
    doc = nlp("Dina founded her company ACME")
    assert len(ruler.patterns) == 2
    assert len(doc.ents) == 1
    assert doc.ents[0].label_ == "ORG"
    assert doc.ents[0].text == "ACME"
    if isinstance(ruler, EntityRuler):
        ruler.remove("acme")
    else:
    ruler.remove_by_id("acme")
    doc = nlp("Dina founded her company ACME")
    assert len(ruler.patterns) == 1
    assert len(doc.ents) == 0
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_remove_patterns_in_a_row(nlp):
-def test_entity_ruler_remove_patterns_in_a_row(nlp, entity_ruler_factory):
+    ruler = nlp.add_pipe("entity_ruler")
    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
    patterns = [
        {"label": "PERSON", "pattern": "Dina", "id": "dina"},
        {"label": "ORG", "pattern": "ACME", "id": "acme"},
@ -543,11 +484,6 @@ def test_entity_ruler_remove_patterns_in_a_row(nlp, entity_ruler_factory):
    assert doc.ents[1].text == "ACME"
    assert doc.ents[2].label_ == "DATE"
    assert doc.ents[2].text == "her birthday"
    if isinstance(ruler, EntityRuler):
        ruler.remove("dina")
        ruler.remove("acme")
        ruler.remove("bday")
    else:
    ruler.remove_by_id("dina")
    ruler.remove_by_id("acme")
    ruler.remove_by_id("bday")
@ -555,9 +491,8 @@ def test_entity_ruler_remove_patterns_in_a_row(nlp, entity_ruler_factory):
    assert len(doc.ents) == 0
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_remove_all_patterns(nlp):
-def test_entity_ruler_remove_all_patterns(nlp, entity_ruler_factory):
+    ruler = nlp.add_pipe("entity_ruler")
    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
    patterns = [
        {"label": "PERSON", "pattern": "Dina", "id": "dina"},
        {"label": "ORG", "pattern": "ACME", "id": "acme"},
@ -565,19 +500,10 @@ def test_entity_ruler_remove_all_patterns(nlp, entity_ruler_factory):
    ]
    ruler.add_patterns(patterns)
    assert len(ruler.patterns) == 3
    if isinstance(ruler, EntityRuler):
        ruler.remove("dina")
    else:
    ruler.remove_by_id("dina")
    assert len(ruler.patterns) == 2
    if isinstance(ruler, EntityRuler):
        ruler.remove("acme")
    else:
    ruler.remove_by_id("acme")
    assert len(ruler.patterns) == 1
    if isinstance(ruler, EntityRuler):
        ruler.remove("bday")
    else:
    ruler.remove_by_id("bday")
    assert len(ruler.patterns) == 0
    with pytest.warns(UserWarning):
@ -585,9 +511,8 @@ def test_entity_ruler_remove_all_patterns(nlp, entity_ruler_factory):
        assert len(doc.ents) == 0
-@pytest.mark.parametrize("entity_ruler_factory", ENTITY_RULERS)
+def test_entity_ruler_remove_and_add(nlp):
-def test_entity_ruler_remove_and_add(nlp, entity_ruler_factory):
+    ruler = nlp.add_pipe("entity_ruler")
    ruler = nlp.add_pipe(entity_ruler_factory, name="entity_ruler")
    patterns = [{"label": "DATE", "pattern": "last time"}]
    ruler.add_patterns(patterns)
    doc = ruler(
@ -608,9 +533,6 @@ def test_entity_ruler_remove_and_add(nlp, entity_ruler_factory):
    assert doc.ents[0].text == "last time"
    assert doc.ents[1].label_ == "DATE"
    assert doc.ents[1].text == "this time"
    if isinstance(ruler, EntityRuler):
        ruler.remove("ttime")
    else:
    ruler.remove_by_id("ttime")
    doc = ruler(
        nlp.make_doc("I saw him last time we met, this time he brought some flowers")
@ -634,9 +556,6 @@ def test_entity_ruler_remove_and_add(nlp, entity_ruler_factory):
    )
    assert len(ruler.patterns) == 3
    assert len(doc.ents) == 3
    if isinstance(ruler, EntityRuler):
        ruler.remove("ttime")
    else:
    ruler.remove_by_id("ttime")
    doc = ruler(
        nlp.make_doc(
--- a/spacy/tests/serialize/test_serialize_pipeline.py
+++ b/spacy/tests/serialize/test_serialize_pipeline.py
@ -8,7 +8,7 @@ import spacy
 from spacy import Vocab, load, registry
 from spacy.lang.en import English
 from spacy.language import Language
-from spacy.pipeline import DependencyParser, EntityRecognizer, EntityRuler
+from spacy.pipeline import DependencyParser, EntityRecognizer
 from spacy.pipeline import SentenceRecognizer, Tagger, TextCategorizer
 from spacy.pipeline import TrainablePipe
 from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL
@ -85,58 +85,17 @@ def test_issue_3526_1(en_vocab):
        {"label": "TECH_ORG", "pattern": "Apple", "id": "a1"},
    ]
    nlp = Language(vocab=en_vocab)
-    ruler = EntityRuler(nlp, patterns=patterns, overwrite_ents=True)
+    ruler = nlp.add_pipe("entity_ruler", config={"overwrite_ents": True})
    ruler.add_patterns(patterns)
    ruler_bytes = ruler.to_bytes()
    assert len(ruler) == len(patterns)
    assert len(ruler.labels) == 4
-    assert ruler.overwrite
+    new_ruler = nlp.add_pipe(
-    new_ruler = EntityRuler(nlp)
+        "entity_ruler", name="new_ruler", config={"overwrite_ents": True}
    )
    new_ruler = new_ruler.from_bytes(ruler_bytes)
    assert len(new_ruler) == len(ruler)
    assert len(new_ruler.labels) == 4
    assert new_ruler.overwrite == ruler.overwrite
    assert new_ruler.ent_id_sep == ruler.ent_id_sep
@pytest.mark.issue(3526)
 def test_issue_3526_2(en_vocab):
    patterns = [
        {"label": "HELLO", "pattern": "hello world"},
        {"label": "BYE", "pattern": [{"LOWER": "bye"}, {"LOWER": "bye"}]},
        {"label": "HELLO", "pattern": [{"ORTH": "HELLO"}]},
        {"label": "COMPLEX", "pattern": [{"ORTH": "foo", "OP": "*"}]},
        {"label": "TECH_ORG", "pattern": "Apple", "id": "a1"},
    ]
    nlp = Language(vocab=en_vocab)
    ruler = EntityRuler(nlp, patterns=patterns, overwrite_ents=True)
    bytes_old_style = srsly.msgpack_dumps(ruler.patterns)
    new_ruler = EntityRuler(nlp)
    new_ruler = new_ruler.from_bytes(bytes_old_style)
    assert len(new_ruler) == len(ruler)
    for pattern in ruler.patterns:
        assert pattern in new_ruler.patterns
    assert new_ruler.overwrite is not ruler.overwrite
@pytest.mark.issue(3526)
 def test_issue_3526_3(en_vocab):
    patterns = [
        {"label": "HELLO", "pattern": "hello world"},
        {"label": "BYE", "pattern": [{"LOWER": "bye"}, {"LOWER": "bye"}]},
        {"label": "HELLO", "pattern": [{"ORTH": "HELLO"}]},
        {"label": "COMPLEX", "pattern": [{"ORTH": "foo", "OP": "*"}]},
        {"label": "TECH_ORG", "pattern": "Apple", "id": "a1"},
    ]
    nlp = Language(vocab=en_vocab)
    ruler = EntityRuler(nlp, patterns=patterns, overwrite_ents=True)
    with make_tempdir() as tmpdir:
        out_file = tmpdir / "entity_ruler"
        srsly.write_jsonl(out_file.with_suffix(".jsonl"), ruler.patterns)
        new_ruler = EntityRuler(nlp).from_disk(out_file)
        for pattern in ruler.patterns:
            assert pattern in new_ruler.patterns
        assert len(new_ruler) == len(ruler)
        assert new_ruler.overwrite is not ruler.overwrite
@pytest.mark.issue(3526)
@ -150,16 +109,14 @@ def test_issue_3526_4(en_vocab):
        nlp.to_disk(tmpdir)
        ruler = nlp.get_pipe("entity_ruler")
        assert ruler.patterns == [{"label": "ORG", "pattern": "Apple"}]
        assert ruler.overwrite is True
        nlp2 = load(tmpdir)
        new_ruler = nlp2.get_pipe("entity_ruler")
        assert new_ruler.patterns == [{"label": "ORG", "pattern": "Apple"}]
        assert new_ruler.overwrite is True
@pytest.mark.issue(4042)
 def test_issue4042():
-    """Test that serialization of an EntityRuler before NER works fine."""
+    """Test that serialization of an entity_ruler before NER works fine."""
    nlp = English()
    # add ner pipe
    ner = nlp.add_pipe("ner")
--- a/website/docs/api/entityruler.md
+++ b/website/docs/api/entityruler.md
@ -1,13 +1,24 @@
 ---
 title: EntityRuler
 tag: class
 source: spacy/pipeline/entity_ruler.py
 new: 2.1
 teaser: 'Pipeline component for rule-based named entity recognition'
 api_string_name: entity_ruler
 api_trainable: false
 ---
 <Infobox title="New in v4" variant="warning">
 As of spaCy v4, there is no separate `EntityRuler` class. The entity ruler is
 implemented as a special case of the `SpanRuler` component.
 See the [migration guide](#migrating) below for differences between the v3
 `EntityRuler` and v4 `SpanRuler` implementations of the `entity_ruler`
 component.
 See the [`SpanRuler`](/api/spanruler) API docs for the full API.
 </Infobox>
 The entity ruler lets you add spans to the [`Doc.ents`](/api/doc#ents) using
 token-based rules or exact phrase matches. It can be combined with the
 statistical [`EntityRecognizer`](/api/entityrecognizer) to boost accuracy, or
@ -63,271 +74,51 @@ how the component should be configured. You can override its settings via the
 | `ent_id_sep`          | Separator used internally for entity IDs. Defaults to `"\|\|"`. ~~str~~                                                                                                                       |
 | `scorer`              | The scoring method. Defaults to [`spacy.scorer.get_ner_prf`](/api/scorer#get_ner_prf). ~~Optional[Callable]~~                                                                                 |
-```python
+## Migrating from v3 {#migrating}
-%%GITHUB_SPACY/spacy/pipeline/entity_ruler.py
+
 ### Loading patterns
 Unlike the v3 `EntityRuler`, the `SpanRuler` cannot load patterns on
 initialization with `SpanRuler(patterns=patterns)` or directly from a JSONL file
 path with `SpanRuler.from_disk(jsonl_path)`. Patterns should be loaded from the
 JSONL file separately and then added through
 [`SpanRuler.initialize`](/api/spanruler#initialize]) or
 [`SpanRuler.add_patterns`](/api/spanruler#add_patterns).
 ```diff
 ruler = nlp.get_pipe("entity_ruler")
 - ruler.from_disk("patterns.jsonl")
 + import srsly
 + patterns = srsly.read_jsonl("patterns.jsonl")
 + ruler.add_patterns(patterns)
 ```
-## EntityRuler.\_\_init\_\_ {#init tag="method"}
+### Saving patterns
-Initialize the entity ruler. If patterns are supplied here, they need to be a
+`SpanRuler.to_disk` always saves the full component data to a directory and does
-list of dictionaries with a `"label"` and `"pattern"` key. A pattern can either
+not include an option to save the patterns to a single JSONL file.
 be a token pattern (list) or a phrase pattern (string). For example:
 `{"label": "ORG", "pattern": "Apple"}`.
-> #### Example
+```diff
->
+ ruler = nlp.get_pipe("entity_ruler")
-> ```python
+- ruler.to_disk("patterns.jsonl")
-> # Construction via add_pipe
+ import srsly
-> ruler = nlp.add_pipe("entity_ruler")
+ srsly.write_jsonl("patterns.jsonl", ruler.patterns)
->
+```
 > # Construction from class
 > from spacy.pipeline import EntityRuler
 > ruler = EntityRuler(nlp, overwrite_ents=True)
 > ```
-| Name                              | Description                                                                                                                                                                                                                           |
+### Accessing token and phrase patterns
 | --------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `nlp`                             | The shared nlp object to pass the vocab to the matchers and process phrase patterns. ~~Language~~                                                                                                                                     |
 | `name` <Tag variant="new">3</Tag> | Instance name of the current pipeline component. Typically passed in automatically from the factory when the component is added. Used to disable the current entity ruler while creating phrase patterns with the nlp object. ~~str~~ |
 | _keyword-only_                    |                                                                                                                                                                                                                                       |
 | `phrase_matcher_attr`             | Optional attribute name match on for the internal [`PhraseMatcher`](/api/phrasematcher), e.g. `LOWER` to match on the lowercase token text. Defaults to `None`. ~~Optional[Union[int, str]]~~                                         |
 | `validate`                        | Whether patterns should be validated, passed to Matcher and PhraseMatcher as `validate`. Defaults to `False`. ~~bool~~                                                                                                                |
 | `overwrite_ents`                  | If existing entities are present, e.g. entities added by the model, overwrite them by matches if necessary. Defaults to `False`. ~~bool~~                                                                                             |
 | `ent_id_sep`                      | Separator used internally for entity IDs. Defaults to `"\|\|"`. ~~str~~                                                                                                                                                               |
 | `patterns`                        | Optional patterns to load in on initialization. ~~Optional[List[Dict[str, Union[str, List[dict]]]]]~~                                                                                                                                 |
-## EntityRuler.initialize {#initialize tag="method" new="3"}
+The separate token patterns and phrase patterns are no longer accessible under
 `ruler.token_patterns` or `ruler.phrase_patterns`. You can access the combined
 patterns in their original format using the property
 [`SpanRuler.patterns`](/api/spanruler#patterns).
-Initialize the component with data and used before training to load in rules
+### Removing patterns by ID
 from a [pattern file](/usage/rule-based-matching/#entityruler-files). This method
 is typically called by [`Language.initialize`](/api/language#initialize) and
 lets you customize arguments it receives via the
 [`[initialize.components]`](/api/data-formats#config-initialize) block in the
 config.
-> #### Example
+[`SpanRuler.remove`](/api/spanruler#remove) removes by label rather than ID. To
->
+remove by ID, use [`SpanRuler.remove_by_id`](/api/spanruler#remove_by_id):
 > ```python
 > entity_ruler = nlp.add_pipe("entity_ruler")
 > entity_ruler.initialize(lambda: [], nlp=nlp, patterns=patterns)
 > ```
 >
 > ```ini
 > ### config.cfg
 > [initialize.components.entity_ruler]
 >
 > [initialize.components.entity_ruler.patterns]
 > @readers = "srsly.read_jsonl.v1"
 > path = "corpus/entity_ruler_patterns.jsonl
 > ```
-| Name           | Description                                                                                                                                                          |
+```diff
-| -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+ ruler = nlp.get_pipe("entity_ruler")
-| `get_examples` | Function that returns gold-standard annotations in the form of [`Example`](/api/example) objects. Not used by the `EntityRuler`. ~~Callable[[], Iterable[Example]]~~ |
+- ruler.remove("id")
-| _keyword-only_ |                                                                                                                                                                      |
+ ruler.remove_by_id("id")
-| `nlp`          | The current `nlp` object. Defaults to `None`. ~~Optional[Language]~~                                                                                                 |
+```
 | `patterns`     | The list of patterns. Defaults to `None`. ~~Optional[Sequence[Dict[str, Union[str, List[Dict[str, Any]]]]]]~~                                                        |
 ## EntityRuler.\_\len\_\_ {#len tag="method"}
 The number of all patterns added to the entity ruler.
 > #### Example
 >
 > ```python
 > ruler = nlp.add_pipe("entity_ruler")
 > assert len(ruler) == 0
 > ruler.add_patterns([{"label": "ORG", "pattern": "Apple"}])
 > assert len(ruler) == 1
 > ```
 | Name        | Description                     |
 | ----------- | ------------------------------- |
 | **RETURNS** | The number of patterns. ~~int~~ |
 ## EntityRuler.\_\_contains\_\_ {#contains tag="method"}
 Whether a label is present in the patterns.
 > #### Example
 >
 > ```python
 > ruler = nlp.add_pipe("entity_ruler")
 > ruler.add_patterns([{"label": "ORG", "pattern": "Apple"}])
 > assert "ORG" in ruler
 > assert not "PERSON" in ruler
 > ```
 | Name        | Description                                           |
 | ----------- | ----------------------------------------------------- |
 | `label`     | The label to check. ~~str~~                           |
 | **RETURNS** | Whether the entity ruler contains the label. ~~bool~~ |
 ## EntityRuler.\_\_call\_\_ {#call tag="method"}
 Find matches in the `Doc` and add them to the `doc.ents`. Typically, this
 happens automatically after the component has been added to the pipeline using
 [`nlp.add_pipe`](/api/language#add_pipe). If the entity ruler was initialized
 with `overwrite_ents=True`, existing entities will be replaced if they overlap
 with the matches. When matches overlap in a Doc, the entity ruler prioritizes
 longer patterns over shorter, and if equal the match occuring first in the Doc
 is chosen.
 > #### Example
 >
 > ```python
 > ruler = nlp.add_pipe("entity_ruler")
 > ruler.add_patterns([{"label": "ORG", "pattern": "Apple"}])
 >
 > doc = nlp("A text about Apple.")
 > ents = [(ent.text, ent.label_) for ent in doc.ents]
 > assert ents == [("Apple", "ORG")]
 > ```
 | Name        | Description                                                          |
 | ----------- | -------------------------------------------------------------------- |
 | `doc`       | The `Doc` object to process, e.g. the `Doc` in the pipeline. ~~Doc~~ |
 | **RETURNS** | The modified `Doc` with added entities, if available. ~~Doc~~        |
 ## EntityRuler.add_patterns {#add_patterns tag="method"}
 Add patterns to the entity ruler. A pattern can either be a token pattern (list
 of dicts) or a phrase pattern (string). For more details, see the usage guide on
 [rule-based matching](/usage/rule-based-matching).
 > #### Example
 >
 > ```python
 > patterns = [
 >     {"label": "ORG", "pattern": "Apple"},
 >     {"label": "GPE", "pattern": [{"lower": "san"}, {"lower": "francisco"}]}
 > ]
 > ruler = nlp.add_pipe("entity_ruler")
 > ruler.add_patterns(patterns)
 > ```
 | Name       | Description                                                      |
 | ---------- | ---------------------------------------------------------------- |
 | `patterns` | The patterns to add. ~~List[Dict[str, Union[str, List[dict]]]]~~ |
 ## EntityRuler.remove {#remove tag="method" new="3.2.1"}
 Remove a pattern by its ID from the entity ruler. A `ValueError` is raised if the ID does not exist.
 > #### Example
 >
 > ```python
 > patterns = [{"label": "ORG", "pattern": "Apple", "id": "apple"}]
 > ruler = nlp.add_pipe("entity_ruler")
 > ruler.add_patterns(patterns)
 > ruler.remove("apple")
 > ```
 | Name       | Description                                                      |
 | ---------- | ---------------------------------------------------------------- |
 | `id`       | The ID of the pattern rule. ~~str~~ |
 ## EntityRuler.to_disk {#to_disk tag="method"}
 Save the entity ruler patterns to a directory. The patterns will be saved as
 newline-delimited JSON (JSONL). If a file with the suffix `.jsonl` is provided,
 only the patterns are saved as JSONL. If a directory name is provided, a
 `patterns.jsonl` and `cfg` file with the component configuration is exported.
 > #### Example
 >
 > ```python
 > ruler = nlp.add_pipe("entity_ruler")
 > ruler.to_disk("/path/to/patterns.jsonl")  # saves patterns only
 > ruler.to_disk("/path/to/entity_ruler")    # saves patterns and config
 > ```
 | Name   | Description                                                                                                                                              |
 | ------ | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `path` | A path to a JSONL file or directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. ~~Union[str, Path]~~ |
 ## EntityRuler.from_disk {#from_disk tag="method"}
 Load the entity ruler from a path. Expects either a file containing
 newline-delimited JSON (JSONL) with one entry per line, or a directory
 containing a `patterns.jsonl` file and a `cfg` file with the component
 configuration.
 > #### Example
 >
 > ```python
 > ruler = nlp.add_pipe("entity_ruler")
 > ruler.from_disk("/path/to/patterns.jsonl")  # loads patterns only
 > ruler.from_disk("/path/to/entity_ruler")    # loads patterns and config
 > ```
 | Name        | Description                                                                                                   |
 | ----------- | ------------------------------------------------------------------------------------------------------------- |
 | `path`      | A path to a JSONL file or directory. Paths may be either strings or `Path`-like objects. ~~Union[str, Path]~~ |
 | **RETURNS** | The modified `EntityRuler` object. ~~EntityRuler~~                                                            |
 ## EntityRuler.to_bytes {#to_bytes tag="method"}
 Serialize the entity ruler patterns to a bytestring.
 > #### Example
 >
 > ```python
 > ruler = nlp.add_pipe("entity_ruler")
 > ruler_bytes = ruler.to_bytes()
 > ```
 | Name        | Description                        |
 | ----------- | ---------------------------------- |
 | **RETURNS** | The serialized patterns. ~~bytes~~ |
 ## EntityRuler.from_bytes {#from_bytes tag="method"}
 Load the pipe from a bytestring. Modifies the object in place and returns it.
 > #### Example
 >
 > ```python
 > ruler_bytes = ruler.to_bytes()
 > ruler = nlp.add_pipe("entity_ruler")
 > ruler.from_bytes(ruler_bytes)
 > ```
 | Name         | Description                                        |
 | ------------ | -------------------------------------------------- |
 | `bytes_data` | The bytestring to load. ~~bytes~~                  |
 | **RETURNS**  | The modified `EntityRuler` object. ~~EntityRuler~~ |
 ## EntityRuler.labels {#labels tag="property"}
 All labels present in the match patterns.
 | Name        | Description                            |
 | ----------- | -------------------------------------- |
 | **RETURNS** | The string labels. ~~Tuple[str, ...]~~ |
 ## EntityRuler.ent_ids {#ent_ids tag="property" new="2.2.2"}
 All entity IDs present in the `id` properties of the match patterns.
 | Name        | Description                         |
 | ----------- | ----------------------------------- |
 | **RETURNS** | The string IDs. ~~Tuple[str, ...]~~ |
 ## EntityRuler.patterns {#patterns tag="property"}
 Get all patterns that were added to the entity ruler.
 | Name        | Description                                                                              |
 | ----------- | ---------------------------------------------------------------------------------------- |
 | **RETURNS** | The original patterns, one dictionary per pattern. ~~List[Dict[str, Union[str, dict]]]~~ |
 ## Attributes {#attributes}
 | Name              | Description                                                                                                           |
 | ----------------- | --------------------------------------------------------------------------------------------------------------------- |
 | `matcher`         | The underlying matcher used to process token patterns. ~~Matcher~~                                                    |
 | `phrase_matcher`  | The underlying phrase matcher used to process phrase patterns. ~~PhraseMatcher~~                                      |
 | `token_patterns`  | The token patterns present in the entity ruler, keyed by label. ~~Dict[str, List[Dict[str, Union[str, List[dict]]]]~~ |
 | `phrase_patterns` | The phrase patterns present in the entity ruler, keyed by label. ~~Dict[str, List[Doc]]~~                             |
--- a/website/docs/api/spanruler.md
+++ b/website/docs/api/spanruler.md
@ -13,6 +13,17 @@ The span ruler lets you add spans to [`Doc.spans`](/api/doc#spans) and/or
 usage examples, see the docs on
 [rule-based span matching](/usage/rule-based-matching#spanruler).
 <Infobox title="Replacement of the EntityRuler" variant="warning">
 As of spaCy v4, there is no separate `EntityRuler` class. The entity ruler is
 implemented as a special case of the `SpanRuler` component.
 See the [migration guide](/api/entityruler#migrating) for differences between
 the v3 `EntityRuler` and v4 `SpanRuler` implementations of the `entity_ruler`
 component.
 </Infobox>
 ## Assigned Attributes {#assigned-attributes}
 Matches will be saved to `Doc.spans[spans_key]` as a
--- a/website/docs/usage/101/_architecture.md
+++ b/website/docs/usage/101/_architecture.md
@ -41,25 +41,27 @@ components for different language processing tasks and also allows adding
 ![The processing pipeline](../../images/pipeline.svg)
-| Name                                            | Description                                                                                 |
+| Component name         | Component class                                      | Description                                                                                 |
-| ----------------------------------------------- | ------------------------------------------------------------------------------------------- |
+| ---------------------- | ---------------------------------------------------- | ------------------------------------------------------------------------------------------- |
-| [`AttributeRuler`](/api/attributeruler)         | Set token attributes using matcher rules.                                                   |
+| `attribute_ruler`      | [`AttributeRuler`](/api/attributeruler)              | Set token attributes using matcher rules.                                                   |
-| [`DependencyParser`](/api/dependencyparser)     | Predict syntactic dependencies.                                                             |
+| `entity_linker`        | [`EntityLinker`](/api/entitylinker)                  | Disambiguate named entities to nodes in a knowledge base.                                   |
-| [`EditTreeLemmatizer`](/api/edittreelemmatizer) | Predict base forms of words.                                                                |
+| `entity_ruler`         | [`SpanRuler`](/api/spanruler)                        | Add entity spans to the `Doc` using token-based rules or exact phrase matches.              |
-| [`EntityLinker`](/api/entitylinker)             | Disambiguate named entities to nodes in a knowledge base.                                   |
+| `lemmatizer`           | [`Lemmatizer`](/api/lemmatizer)                      | Determine the base forms of words using rules and lookups.                                  |
-| [`EntityRecognizer`](/api/entityrecognizer)     | Predict named entities, e.g. persons or products.                                           |
+| `morphologizer`        | [`Morphologizer`](/api/morphologizer)                | Predict morphological features and coarse-grained part-of-speech tags.                      |
-| [`EntityRuler`](/api/entityruler)               | Add entity spans to the `Doc` using token-based rules or exact phrase matches.              |
+| `ner`                  | [`EntityRecognizer`](/api/entityrecognizer)          | Predict named entities, e.g. persons or products.                                           |
-| [`Lemmatizer`](/api/lemmatizer)                 | Determine the base forms of words using rules and lookups.                                  |
+| `parser`               | [`DependencyParser`](/api/dependencyparser)          | Predict syntactic dependencies.                                                             |
-| [`Morphologizer`](/api/morphologizer)           | Predict morphological features and coarse-grained part-of-speech tags.                      |
+| `senter`               | [`SentenceRecognizer`](/api/sentencerecognizer)      | Predict sentence boundaries.                                                                |
-| [`SentenceRecognizer`](/api/sentencerecognizer) | Predict sentence boundaries.                                                                |
+| `sentencizer`          | [`Sentencizer`](/api/sentencizer)                    | Implement rule-based sentence boundary detection that doesn't require the dependency parse. |
-| [`Sentencizer`](/api/sentencizer)               | Implement rule-based sentence boundary detection that doesn't require the dependency parse. |
+| `span_ruler`           | [`SpanRuler`](/api/spanruler)                        | Add spans to the `Doc` using token-based rules or exact phrase matches.                     |
-| [`Tagger`](/api/tagger)                         | Predict part-of-speech tags.                                                                |
+| `tagger`               | [`Tagger`](/api/tagger)                              | Predict part-of-speech tags.                                                                |
-| [`TextCategorizer`](/api/textcategorizer)       | Predict categories or labels over the whole document.                                       |
+| `textcat`              | [`TextCategorizer`](/api/textcategorizer)            | Predict exactly one category or label over a whole document.                                |
-| [`Tok2Vec`](/api/tok2vec)                       | Apply a "token-to-vector" model and set its outputs.                                        |
+| `textcat_multilabel`   | [`MultiLabel_TextCategorizer`](/api/textcategorizer) | Predict 0, 1 or more categories or labels over a whole document.                            |
-| [`Tokenizer`](/api/tokenizer)                   | Segment raw text and create `Doc` objects from the words.                                   |
+| `tok2vec`              | [`Tok2Vec`](/api/tok2vec)                            | Apply a "token-to-vector" model and set its outputs.                                        |
-| [`TrainablePipe`](/api/pipe)                    | Class that all trainable pipeline components inherit from.                                  |
+| `tokenizer`            | [`Tokenizer`](/api/tokenizer)                        | Segment raw text and create `Doc` objects from the words.                                   |
-| [`Transformer`](/api/transformer)               | Use a transformer model and set its outputs.                                                |
+| `trainable_lemmatizer` | [`EditTreeLemmatizer`](/api/edittreelemmatizer)      | Predict base forms of words.                                                                |
-| [Other functions](/api/pipeline-functions)      | Automatically apply something to the `Doc`, e.g. to merge spans of tokens.                  |
+| `transformer`          | [`Transformer`](/api/transformer)                    | Use a transformer model and set its outputs.                                                |
 | -                      | [`TrainablePipe`](/api/pipe)                         | Class that all trainable pipeline components inherit from.                                  |
 | -                      | [Other functions](/api/pipeline-functions)           | Automatically apply something to the `Doc`, e.g. to merge spans of tokens.                  |
 ### Matchers {#architecture-matchers}
--- a/website/docs/usage/101/_pipelines.md
+++ b/website/docs/usage/101/_pipelines.md
@ -53,9 +53,9 @@ example, a custom lemmatizer may need the part-of-speech tags assigned, so it'll
 only work if it's added after the tagger. The parser will respect pre-defined
 sentence boundaries, so if a previous component in the pipeline sets them, its
 dependency predictions may be different. Similarly, it matters if you add the
-[`EntityRuler`](/api/entityruler) before or after the statistical entity
+[`SpanRuler`](/api/spanruler) before or after the statistical entity recognizer:
-recognizer: if it's added before, the entity recognizer will take the existing
+if it's added before and it is writing to `doc.ents`, then the entity recognizer
-entities into account when making predictions. The
+will take those existing entities into account when making predictions. The
 [`EntityLinker`](/api/entitylinker), which resolves named entities to knowledge
 base IDs, should be preceded by a pipeline component that recognizes entities
 such as the [`EntityRecognizer`](/api/entityrecognizer).
--- a/website/docs/usage/processing-pipelines.md
+++ b/website/docs/usage/processing-pipelines.md
@ -303,13 +303,14 @@ available pipeline components and component functions.
 > ruler = nlp.add_pipe("entity_ruler")
 > ```
-| String name            | Component                                            | Description                                                                               |
+| Component name         | Component class                                      | Description                                                                               |
 | ---------------------- | ---------------------------------------------------- | ----------------------------------------------------------------------------------------- |
 | `tagger`               | [`Tagger`](/api/tagger)                              | Assign part-of-speech-tags.                                                               |
 | `parser`               | [`DependencyParser`](/api/dependencyparser)          | Assign dependency labels.                                                                 |
 | `ner`                  | [`EntityRecognizer`](/api/entityrecognizer)          | Assign named entities.                                                                    |
 | `entity_linker`        | [`EntityLinker`](/api/entitylinker)                  | Assign knowledge base IDs to named entities. Should be added after the entity recognizer. |
-| `entity_ruler`         | [`EntityRuler`](/api/entityruler)                    | Assign named entities based on pattern rules and dictionaries.                            |
+| `span_ruler`           | [`SpanRuler`](/api/spanruler)                        | Assign spans based on pattern rules and dictionaries.                                     |
 | `entity_ruler`         | [`SpanRuler`](/api/spanruler)                        | Assign named entities based on pattern rules and dictionaries.                            |
 | `textcat`              | [`TextCategorizer`](/api/textcategorizer)            | Assign text categories: exactly one category is predicted per document.                   |
 | `textcat_multilabel`   | [`MultiLabel_TextCategorizer`](/api/textcategorizer) | Assign text categories in a multi-label setting: zero, one or more labels per document.   |
 | `lemmatizer`           | [`Lemmatizer`](/api/lemmatizer)                      | Assign base forms to words using rules and lookups.                                       |
--- a/website/docs/usage/rule-based-matching.md
+++ b/website/docs/usage/rule-based-matching.md
@ -375,7 +375,7 @@ scoped quantifiers – instead, you can build those behaviors with `on_match`
 callbacks.
 | OP      | Description                                                            |
-|---------|------------------------------------------------------------------------|
+| ------- | ---------------------------------------------------------------------- |
 | `!`     | Negate the pattern, by requiring it to match exactly 0 times.          |
 | `?`     | Make the pattern optional, by allowing it to match 0 or 1 times.       |
 | `+`     | Require the pattern to match 1 or more times.                          |
@ -471,7 +471,7 @@ matches = matcher(doc)
 ```
 A very similar logic has been implemented in the built-in
-[`EntityRuler`](/api/entityruler) by the way. It also takes care of handling
+[`entity_ruler`](/api/entityruler) by the way. It also takes care of handling
 overlapping matches, which you would otherwise have to take care of yourself.
 > #### Tip: Visualizing matches
@ -1270,7 +1270,7 @@ of patterns such as `{}` that match any token in the sentence.
 ## Rule-based entity recognition {#entityruler new="2.1"}
-The [`EntityRuler`](/api/entityruler) is a component that lets you add named
+The [`entity_ruler`](/api/entityruler) is a component that lets you add named
 entities based on pattern dictionaries, which makes it easy to combine
 rule-based and statistical named entity recognition for even more powerful
 pipelines.
@ -1295,13 +1295,12 @@ pattern. The entity ruler accepts two types of patterns:
 ### Using the entity ruler {#entityruler-usage}
-The [`EntityRuler`](/api/entityruler) is a pipeline component that's typically
+The `entity_ruler` is a pipeline component that's typically added via
-added via [`nlp.add_pipe`](/api/language#add_pipe). When the `nlp` object is
+[`nlp.add_pipe`](/api/language#add_pipe). When the `nlp` object is called on a
-called on a text, it will find matches in the `doc` and add them as entities to
+text, it will find matches in the `doc` and add them as entities to `doc.ents`,
-the `doc.ents`, using the specified pattern label as the entity label. If any
+using the specified pattern label as the entity label. If any matches were to
-matches were to overlap, the pattern matching most tokens takes priority. If
+overlap, the pattern matching most tokens takes priority. If they also happen to
-they also happen to be equally long, then the match occurring first in the `Doc`
+be equally long, then the match occurring first in the `Doc` is chosen.
 is chosen.
 ```python
 ### {executable="true"}
@ -1339,7 +1338,7 @@ doc = nlp("MyCorp Inc. is a company in the U.S.")
 print([(ent.text, ent.label_) for ent in doc.ents])
 ```
-#### Validating and debugging EntityRuler patterns {#entityruler-pattern-validation new="2.1.8"}
+#### Validating and debugging entity ruler patterns {#entityruler-pattern-validation new="2.1.8"}
 The entity ruler can validate patterns against a JSON schema with the config
 setting `"validate"`. See details under
@ -1351,9 +1350,9 @@ ruler = nlp.add_pipe("entity_ruler", config={"validate": True})
 ### Adding IDs to patterns {#entityruler-ent-ids new="2.2.2"}
-The [`EntityRuler`](/api/entityruler) can also accept an `id` attribute for each
+The [`entity_ruler`](/api/entityruler) can also accept an `id` attribute for
-pattern. Using the `id` attribute allows multiple patterns to be associated with
+each pattern. Using the `id` attribute allows multiple patterns to be associated
-the same entity.
+with the same entity.
 ```python
 ### {executable="true"}
@ -1373,10 +1372,10 @@ doc2 = nlp("Apple is opening its first big office in San Fran.")
 print([(ent.text, ent.label_, ent.id_) for ent in doc2.ents])
 ```
-If the `id` attribute is included in the [`EntityRuler`](/api/entityruler)
+If the `id` attribute is included in the [`entity_ruler`](/api/entityruler)
-patterns, the `id_` property of the matched entity is set to the `id` given
+patterns, the `id_` property of the matched entity is set to the `id` given in
-in the patterns. So in the example above it's easy to identify that "San
+the patterns. So in the example above it's easy to identify that "San Francisco"
-Francisco" and "San Fran" are both the same entity.
+and "San Fran" are both the same entity.
 ### Using pattern files {#entityruler-files}
@ -1400,13 +1399,13 @@ new_ruler = nlp.add_pipe("entity_ruler").from_disk("./patterns.jsonl")
 If you're using the [Prodigy](https://prodi.gy) annotation tool, you might
 recognize these pattern files from bootstrapping your named entity and text
-classification labelling. The patterns for the `EntityRuler` follow the same
+classification labelling. The patterns for the `entity_ruler` follow the same
 syntax, so you can use your existing Prodigy pattern files in spaCy, and vice
 versa.
 </Infobox>
-When you save out an `nlp` object that has an `EntityRuler` added to its
+When you save out an `nlp` object that has an `entity_ruler` added to its
 pipeline, its patterns are automatically exported to the pipeline directory:
 ```python
@ -1429,9 +1428,9 @@ rules included!
 When using a large amount of **phrase patterns** (roughly > 10000) it's useful
 to understand how the `add_patterns` function of the entity ruler works. For
-each **phrase pattern**, the EntityRuler calls the nlp object to construct a doc
+each **phrase pattern**, the entity ruler calls the nlp object to construct a
-object. This happens in case you try to add the EntityRuler at the end of an
+doc object. This happens in case you try to add the entity ruler at the end of
-existing pipeline with, for example, a POS tagger and want to extract matches
+an existing pipeline with, for example, a POS tagger and want to extract matches
 based on the pattern's POS signature. In this case you would pass a config value
 of `"phrase_matcher_attr": "POS"` for the entity ruler.
--- a/website/docs/usage/saving-loading.md
+++ b/website/docs/usage/saving-loading.md
@ -193,13 +193,13 @@ the data to and from a JSON file.
 > #### Real-world example
 >
-> To see custom serialization methods in action, check out the new
+> To see custom serialization methods in action, check out the
-> [`EntityRuler`](/api/entityruler) component and its
+> [`SpanRuler`](/api/spanruler) component and its
-> [source](%%GITHUB_SPACY/spacy/pipeline/entity_ruler.py). Patterns added to the
+> [source](%%GITHUB_SPACY/spacy/pipeline/span_ruler.py). Patterns added to the
 > component will be saved to a `.jsonl` file if the pipeline is serialized to
 > disk, and to a bytestring if the pipeline is serialized to bytes. This allows
-> saving out a pipeline with a rule-based entity recognizer and including all
+> saving out a pipeline with rule-based components _with_ all the component
-> rules _with_ the component data.
+> data.
 ```python
 ### {highlight="16-23,25-30"}
--- a/website/docs/usage/training.md
+++ b/website/docs/usage/training.md
@ -424,7 +424,7 @@ your components during training, and the most common scenarios are:
 2. Update an existing **trained component** with more examples.
 3. Include an existing trained component without updating it.
 4. Include a non-trainable component, like a rule-based
-   [`EntityRuler`](/api/entityruler) or [`Sentencizer`](/api/sentencizer), or a
+   [`SpanRuler`](/api/spanruler) or [`Sentencizer`](/api/sentencizer), or a
   fully [custom component](/usage/processing-pipelines#custom-components).
 If a component block defines a `factory`, spaCy will look it up in the