Tidy up, tests and docs

2025-11-06 19:07:30 +03:00 · 2020-10-04 13:54:05 +02:00 · 2020-10-04 13:54:05 +02:00 · 11347f34da
commit 11347f34da
parent 96b636c2d3
5 changed files with 193 additions and 152 deletions
--- a/spacy/pipeline/attributeruler.py
+++ b/spacy/pipeline/attributeruler.py
@ -1,10 +1,11 @@
+from typing import List, Dict, Union, Iterable, Any, Optional, Callable, Iterator
+from typing import Tuple
 import srsly
-from typing import List, Dict, Union, Iterable, Any, Optional
 from pathlib import Path

 from .pipe import Pipe
 from ..errors import Errors
-from ..training import validate_examples
+from ..training import validate_examples, Example
 from ..language import Language
 from ..matcher import Matcher
 from ..scorer import Scorer
@ -22,17 +23,9 @@ TagMapType = Dict[str, Dict[Union[int, str], Union[int, str]]]
 MorphRulesType = Dict[str, Dict[str, Dict[Union[int, str], Union[int, str]]]]


-@Language.factory(
-    "attribute_ruler", default_config={"validate": False}
-)
-def make_attribute_ruler(
-    nlp: Language,
-    name: str,
-    validate: bool,
-):
-    return AttributeRuler(
-        nlp.vocab, name, pattern_dicts=pattern_dicts, validate=validate
-    )
+@Language.factory("attribute_ruler", default_config={"validate": False})
+def make_attribute_ruler(nlp: Language, name: str, validate: bool):
+    return AttributeRuler(nlp.vocab, name, validate=validate)


 class AttributeRuler(Pipe):
@ -43,12 +36,7 @@ class AttributeRuler(Pipe):
    """

    def __init__(
-        self,
-        vocab: Vocab,
-        name: str = "attribute_ruler",
-        *,
-        pattern_dicts: Optional[Iterable[AttributeRulerPatternType]] = None,
-        validate: bool = False,
+        self, vocab: Vocab, name: str = "attribute_ruler", *, validate: bool = False
    ) -> None:
        """Create the AttributeRuler. After creation, you can add patterns
        with the `.initialize()` or `.add_patterns()` methods, or load patterns
@ -71,12 +59,12 @@ class AttributeRuler(Pipe):

    def initialize(
        self,
-        get_examples: Optional[Callable[[], Iterable[Example]]] = None,
+        get_examples: Optional[Callable[[], Iterable[Example]]],
        *,
        nlp: Optional[Language] = None,
        patterns: Optional[Iterable[AttributeRulerPatternType]] = None,
-        tag_map: Optional[TagMapType]=None,
-        morph_rules: Optional[MorphRulesType]=None
+        tag_map: Optional[TagMapType] = None,
+        morph_rules: Optional[MorphRulesType] = None,
    ):
        """Initialize the attribute ruler by adding zero or more patterns.

@ -126,7 +114,7 @@ class AttributeRuler(Pipe):
            set_token_attrs(span[index], attrs)
        return doc

-    def pipe(self, stream, *, batch_size=128):
+    def pipe(self, stream: Iterable[Doc], *, batch_size: int = 128) -> Iterator[Doc]:
        """Apply the pipe to a stream of documents. This usually happens under
        the hood when the nlp object is called on a text and all components are
        applied to the Doc.
@ -210,16 +198,16 @@ class AttributeRuler(Pipe):
        self.attrs.append(attrs)
        self.indices.append(index)

-    def add_patterns(self, pattern_dicts: Iterable[AttributeRulerPatternType]) -> None:
+    def add_patterns(self, patterns: Iterable[AttributeRulerPatternType]) -> None:
        """Add patterns from a list of pattern dicts with the keys as the
        arguments to AttributeRuler.add.
-        pattern_dicts (Iterable[dict]): A list of pattern dicts with the keys
+        patterns (Iterable[dict]): A list of pattern dicts with the keys
            as the arguments to AttributeRuler.add (patterns/attrs/index) to
            add as patterns.

        DOCS: https://nightly.spacy.io/api/attributeruler#add_patterns
        """
-        for p in pattern_dicts:
+        for p in patterns:
            self.add(**p)

    @property
@ -234,7 +222,7 @@ class AttributeRuler(Pipe):
            all_patterns.append(p)
        return all_patterns

-    def score(self, examples, **kwargs):
+    def score(self, examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
        """Score a batch of examples.

        examples (Iterable[Example]): The examples to score.
@ -275,7 +263,7 @@ class AttributeRuler(Pipe):

    def from_bytes(
        self, bytes_data: bytes, exclude: Iterable[str] = SimpleFrozenList()
-    ):
+    ) -> "AttributeRuler":
        """Load the AttributeRuler from a bytestring.

        bytes_data (bytes): The data to load.
@ -293,7 +281,6 @@ class AttributeRuler(Pipe):
            "patterns": load_patterns,
        }
        util.from_bytes(bytes_data, deserialize, exclude)
-
        return self

    def to_disk(
@ -303,6 +290,7 @@ class AttributeRuler(Pipe):

        path (Union[Path, str]): A path to a directory.
        exclude (Iterable[str]): String names of serialization fields to exclude.
+
        DOCS: https://nightly.spacy.io/api/attributeruler#to_disk
        """
        serialize = {
@ -313,11 +301,13 @@ class AttributeRuler(Pipe):

    def from_disk(
        self, path: Union[Path, str], exclude: Iterable[str] = SimpleFrozenList()
-    ) -> None:
+    ) -> "AttributeRuler":
        """Load the AttributeRuler from disk.

        path (Union[Path, str]): A path to a directory.
        exclude (Iterable[str]): String names of serialization fields to exclude.
+        RETURNS (AttributeRuler): The loaded object.
+
        DOCS: https://nightly.spacy.io/api/attributeruler#from_disk
        """

@ -329,11 +319,10 @@ class AttributeRuler(Pipe):
            "patterns": load_patterns,
        }
        util.from_disk(path, deserialize, exclude)
-
        return self


-def _split_morph_attrs(attrs):
+def _split_morph_attrs(attrs: dict) -> Tuple[dict, dict]:
    """Split entries from a tag map or morph rules dict into to two dicts, one
    with the token-level features (POS, LEMMA) and one with the remaining
    features, which are presumed to be individual MORPH features."""
--- a/spacy/tests/pipeline/test_attributeruler.py
+++ b/spacy/tests/pipeline/test_attributeruler.py
@ -63,6 +63,39 @@ def morph_rules():
    return {"DT": {"the": {"POS": "DET", "LEMMA": "a", "Case": "Nom"}}}


+def check_tag_map(ruler):
+    doc = Doc(
+        ruler.vocab,
+        words=["This", "is", "a", "test", "."],
+        tags=["DT", "VBZ", "DT", "NN", "."],
+    )
+    doc = ruler(doc)
+    for i in range(len(doc)):
+        if i == 4:
+            assert doc[i].pos_ == "PUNCT"
+            assert str(doc[i].morph) == "PunctType=peri"
+        else:
+            assert doc[i].pos_ == ""
+            assert str(doc[i].morph) == ""
+
+
+def check_morph_rules(ruler):
+    doc = Doc(
+        ruler.vocab,
+        words=["This", "is", "the", "test", "."],
+        tags=["DT", "VBZ", "DT", "NN", "."],
+    )
+    doc = ruler(doc)
+    for i in range(len(doc)):
+        if i != 2:
+            assert doc[i].pos_ == ""
+            assert str(doc[i].morph) == ""
+        else:
+            assert doc[2].pos_ == "DET"
+            assert doc[2].lemma_ == "a"
+            assert str(doc[2].morph) == "Case=Nom"
+
+
 def test_attributeruler_init(nlp, pattern_dicts):
    a = nlp.add_pipe("attribute_ruler")
    for p in pattern_dicts:
@ -78,7 +111,8 @@ def test_attributeruler_init(nlp, pattern_dicts):

 def test_attributeruler_init_patterns(nlp, pattern_dicts):
    # initialize with patterns
-    nlp.add_pipe("attribute_ruler", config={"pattern_dicts": pattern_dicts})
+    ruler = nlp.add_pipe("attribute_ruler")
+    ruler.initialize(lambda: [], patterns=pattern_dicts)
    doc = nlp("This is a test.")
    assert doc[2].lemma_ == "the"
    assert str(doc[2].morph) == "Case=Nom|Number=Plur"
@ -88,10 +122,11 @@ def test_attributeruler_init_patterns(nlp, pattern_dicts):
    assert doc.has_annotation("MORPH")
    nlp.remove_pipe("attribute_ruler")
    # initialize with patterns from asset
-    nlp.add_pipe(
-        "attribute_ruler",
-        config={"pattern_dicts": {"@misc": "attribute_ruler_patterns"}},
-    )
+    nlp.config["initialize"]["components"]["attribute_ruler"] = {
+        "patterns": {"@misc": "attribute_ruler_patterns"}
+    }
+    nlp.add_pipe("attribute_ruler")
+    nlp.initialize()
    doc = nlp("This is a test.")
    assert doc[2].lemma_ == "the"
    assert str(doc[2].morph) == "Case=Nom|Number=Plur"
@ -103,18 +138,15 @@ def test_attributeruler_init_patterns(nlp, pattern_dicts):

 def test_attributeruler_score(nlp, pattern_dicts):
    # initialize with patterns
-    nlp.add_pipe("attribute_ruler", config={"pattern_dicts": pattern_dicts})
+    ruler = nlp.add_pipe("attribute_ruler")
+    ruler.initialize(lambda: [], patterns=pattern_dicts)
    doc = nlp("This is a test.")
    assert doc[2].lemma_ == "the"
    assert str(doc[2].morph) == "Case=Nom|Number=Plur"
    assert doc[3].lemma_ == "cat"
    assert str(doc[3].morph) == "Case=Nom|Number=Sing"
-
-    dev_examples = [
-        Example.from_dict(
-            nlp.make_doc("This is a test."), {"lemmas": ["this", "is", "a", "cat", "."]}
-        )
-    ]
+    doc = nlp.make_doc("This is a test.")
+    dev_examples = [Example.from_dict(doc, {"lemmas": ["this", "is", "a", "cat", "."]})]
    scores = nlp.evaluate(dev_examples)
    # "cat" is the only correct lemma
    assert scores["lemma_acc"] == pytest.approx(0.2)
@ -139,40 +171,27 @@ def test_attributeruler_rule_order(nlp):


 def test_attributeruler_tag_map(nlp, tag_map):
-    a = AttributeRuler(nlp.vocab)
-    a.load_from_tag_map(tag_map)
-    doc = Doc(
-        nlp.vocab,
-        words=["This", "is", "a", "test", "."],
-        tags=["DT", "VBZ", "DT", "NN", "."],
-    )
-    doc = a(doc)
-    for i in range(len(doc)):
-        if i == 4:
-            assert doc[i].pos_ == "PUNCT"
-            assert str(doc[i].morph) == "PunctType=peri"
-        else:
-            assert doc[i].pos_ == ""
-            assert str(doc[i].morph) == ""
+    ruler = AttributeRuler(nlp.vocab)
+    ruler.load_from_tag_map(tag_map)
+    check_tag_map(ruler)
+
+
+def test_attributeruler_tag_map_initialize(nlp, tag_map):
+    ruler = nlp.add_pipe("attribute_ruler")
+    ruler.initialize(lambda: [], tag_map=tag_map)
+    check_tag_map(ruler)


 def test_attributeruler_morph_rules(nlp, morph_rules):
-    a = AttributeRuler(nlp.vocab)
-    a.load_from_morph_rules(morph_rules)
-    doc = Doc(
-        nlp.vocab,
-        words=["This", "is", "the", "test", "."],
-        tags=["DT", "VBZ", "DT", "NN", "."],
-    )
-    doc = a(doc)
-    for i in range(len(doc)):
-        if i != 2:
-            assert doc[i].pos_ == ""
-            assert str(doc[i].morph) == ""
-        else:
-            assert doc[2].pos_ == "DET"
-            assert doc[2].lemma_ == "a"
-            assert str(doc[2].morph) == "Case=Nom"
+    ruler = AttributeRuler(nlp.vocab)
+    ruler.load_from_morph_rules(morph_rules)
+    check_morph_rules(ruler)
+
+
+def test_attributeruler_morph_rules_initialize(nlp, morph_rules):
+    ruler = nlp.add_pipe("attribute_ruler")
+    ruler.initialize(lambda: [], morph_rules=morph_rules)
+    check_morph_rules(ruler)


 def test_attributeruler_indices(nlp):
--- a/website/docs/api/attributeruler.md
+++ b/website/docs/api/attributeruler.md
@ -4,6 +4,7 @@ tag: class
 source: spacy/pipeline/attributeruler.py
 new: 3
 teaser: 'Pipeline component for rule-based token attribute assignment'
+api_base_class: /api/pipe
 api_string_name: attribute_ruler
 api_trainable: false
 ---
@ -25,17 +26,13 @@ how the component should be configured. You can override its settings via the
 > #### Example
 >
 > ```python
-> config = {
->    "pattern_dicts": None,
->    "validate": True,
-> }
+> config = {"validate": True}
 > nlp.add_pipe("attribute_ruler", config=config)
 > ```

-| Setting         | Description                                                                                                                                                                                                                                    |
-| --------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `pattern_dicts` | A list of pattern dicts with the keys as the arguments to [`AttributeRuler.add`](/api/attributeruler#add) (`patterns`/`attrs`/`index`) to add as patterns. Defaults to `None`. ~~Optional[Iterable[Dict[str, Union[List[dict], dict, int]]]]~~ |
-| `validate`      | Whether patterns should be validated (passed to the `Matcher`). Defaults to `False`. ~~bool~~                                                                                                                                                  |
+| Setting    | Description                                                                                   |
+| ---------- | --------------------------------------------------------------------------------------------- |
+| `validate` | Whether patterns should be validated (passed to the `Matcher`). Defaults to `False`. ~~bool~~ |

 ```python
 %%GITHUB_SPACY/spacy/pipeline/attributeruler.py
@ -43,36 +40,26 @@ how the component should be configured. You can override its settings via the

 ## AttributeRuler.\_\_init\_\_ {#init tag="method"}

-Initialize the attribute ruler. If pattern dicts are supplied here, they need to
-be a list of dictionaries with `"patterns"`, `"attrs"`, and optional `"index"`
-keys, e.g.:
-
-```python
-pattern_dicts = [
-    {"patterns": [[{"TAG": "VB"}]], "attrs": {"POS": "VERB"}},
-    {"patterns": [[{"LOWER": "an"}]], "attrs": {"LEMMA": "a"}},
-]
-```
+Initialize the attribute ruler.

 > #### Example
 >
 > ```python
 > # Construction via add_pipe
-> attribute_ruler = nlp.add_pipe("attribute_ruler")
+> ruler = nlp.add_pipe("attribute_ruler")
 > ```

-| Name            | Description                                                                                                                              |
-| --------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
-| `vocab`         | The shared vocabulary to pass to the matcher. ~~Vocab~~                                                                                  |
-| `name`          | Instance name of the current pipeline component. Typically passed in automatically from the factory when the component is added. ~~str~~ |
-| _keyword-only_  |                                                                                                                                          |
-| `pattern_dicts` | Optional patterns to load in on initialization. Defaults to `None`. ~~Optional[Iterable[Dict[str, Union[List[dict], dict, int]]]]~~      |
-| `validate`      | Whether patterns should be validated (passed to the [`Matcher`](/api/matcher#init)). Defaults to `False`. ~~bool~~                       |
+| Name           | Description                                                                                                                              |
+| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
+| `vocab`        | The shared vocabulary to pass to the matcher. ~~Vocab~~                                                                                  |
+| `name`         | Instance name of the current pipeline component. Typically passed in automatically from the factory when the component is added. ~~str~~ |
+| _keyword-only_ |                                                                                                                                          |
+| `validate`     | Whether patterns should be validated (passed to the [`Matcher`](/api/matcher#init)). Defaults to `False`. ~~bool~~                       |

 ## AttributeRuler.\_\_call\_\_ {#call tag="method"}

-Apply the attribute ruler to a `Doc`, setting token attributes for tokens matched
-by the provided patterns.
+Apply the attribute ruler to a `Doc`, setting token attributes for tokens
+matched by the provided patterns.

 | Name        | Description                      |
 | ----------- | -------------------------------- |
@ -90,10 +77,10 @@ may be negative to index from the end of the span.
 > #### Example
 >
 > ```python
-> attribute_ruler = nlp.add_pipe("attribute_ruler")
+> ruler = nlp.add_pipe("attribute_ruler")
 > patterns = [[{"TAG": "VB"}]]
 > attrs = {"POS": "VERB"}
-> attribute_ruler.add(patterns=patterns, attrs=attrs)
+> ruler.add(patterns=patterns, attrs=attrs)
 > ```

 | Name       | Description                                                                                                                       |
@ -107,11 +94,10 @@ may be negative to index from the end of the span.
 > #### Example
 >
 > ```python
-> attribute_ruler = nlp.add_pipe("attribute_ruler")
-> pattern_dicts = [
+> ruler = nlp.add_pipe("attribute_ruler")
+> patterns = [
 >   {
->     "patterns": [[{"TAG": "VB"}]],
->     "attrs": {"POS": "VERB"}
+>     "patterns": [[{"TAG": "VB"}]], "attrs": {"POS": "VERB"}
 >   },
 >   {
 >     "patterns": [[{"LOWER": "two"}, {"LOWER": "apples"}]],
@ -119,15 +105,16 @@ may be negative to index from the end of the span.
 >     "index": -1
 >   },
 > ]
-> attribute_ruler.add_patterns(pattern_dicts)
+> ruler.add_patterns(patterns)
 > ```

-Add patterns from a list of pattern dicts with the keys as the arguments to
+Add patterns from a list of pattern dicts. Each pattern dict can specify the
+keys `"patterns"`, `"attrs"` and `"index"`, which match the arguments of
 [`AttributeRuler.add`](/api/attributeruler#add).

-| Name            | Description                                                                |
-| --------------- | -------------------------------------------------------------------------- |
-| `pattern_dicts` | The patterns to add. ~~Iterable[Dict[str, Union[List[dict], dict, int]]]~~ |
+| Name       | Description                                                                |
+| ---------- | -------------------------------------------------------------------------- |
+| `patterns` | The patterns to add. ~~Iterable[Dict[str, Union[List[dict], dict, int]]]~~ |

 ## AttributeRuler.patterns {#patterns tag="property"}

@ -139,20 +126,39 @@ Get all patterns that have been added to the attribute ruler in the
 | ----------- | -------------------------------------------------------------------------------------------- |
 | **RETURNS** | The patterns added to the attribute ruler. ~~List[Dict[str, Union[List[dict], dict, int]]]~~ |

-## AttributeRuler.score {#score tag="method" new="3"}
+## AttributeRuler.initialize {#initialize tag="method"}

-Score a batch of examples.
+Initialize the component with data. Typically called before training to load in
+rules from a file. This method is typically called by
+[`Language.initialize`](/api/language#initialize) and lets you customize
+arguments it receives via the
+[`[initialize.components]`](/api/data-formats#config-initialize) block in the
+config.

 > #### Example
 >
 > ```python
-> scores = attribute_ruler.score(examples)
+> ruler = nlp.add_pipe("attribute_ruler")
+> ruler.initialize(lambda: [], nlp=nlp, patterns=patterns)
+> ```
+>
+> ```ini
+> ### config.cfg
+> [initialize.components.attribute_ruler]
+>
+> [initialize.components.attribute_ruler.patterns]
+> @readers = "srsly.read_json.v1"
+> path = "corpus/attribute_ruler_patterns.json
 > ```

-| Name        | Description                                                                                                                                                                                                           |
-| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `examples`  | The examples to score. ~~Iterable[Example]~~                                                                                                                                                                          |
-| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"tag"`, `"pos"`, `"morph"` and `"lemma"` if present in any of the target token attributes. ~~Dict[str, float]~~ |
+| Name           | Description                                                                                                                                                                                                                                    |
+| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `get_examples` | Function that returns gold-standard annotations in the form of [`Example`](/api/example) objects (the training data). Not used by this component. ~~Callable[[], Iterable[Example]]~~                                                          |
+| _keyword-only_ |                                                                                                                                                                                                                                                |
+| `nlp`          | The current `nlp` object. Defaults to `None`. ~~Optional[Language]~~                                                                                                                                                                           |
+| `patterns`     | A list of pattern dicts with the keys as the arguments to [`AttributeRuler.add`](/api/attributeruler#add) (`patterns`/`attrs`/`index`) to add as patterns. Defaults to `None`. ~~Optional[Iterable[Dict[str, Union[List[dict], dict, int]]]]~~ |
+| `tag_map`      | The tag map that maps fine-grained tags to coarse-grained tags and morphological features. Defaults to `None`. ~~Optional[Dict[str, Dict[Union[int, str], Union[int, str]]]]~~                                                                 |
+| `morph_rules`  | The morph rules that map token text and fine-grained tags to coarse-grained tags, lemmas and morphological features. Defaults to `None`. ~~Optional[Dict[str, Dict[str, Dict[Union[int, str], Union[int, str]]]]]~~                            |

 ## AttributeRuler.load_from_tag_map {#load_from_tag_map tag="method"}

@ -170,6 +176,21 @@ Load attribute ruler patterns from morph rules.
 | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `morph_rules` | The morph rules that map token text and fine-grained tags to coarse-grained tags, lemmas and morphological features. ~~Dict[str, Dict[str, Dict[Union[int, str], Union[int, str]]]]~~ |

+## AttributeRuler.score {#score tag="method" new="3"}
+
+Score a batch of examples.
+
+> #### Example
+>
+> ```python
+> scores = ruler.score(examples)
+> ```
+
+| Name        | Description                                                                                                                                                                                                           |
+| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `examples`  | The examples to score. ~~Iterable[Example]~~                                                                                                                                                                          |
+| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"tag"`, `"pos"`, `"morph"` and `"lemma"` if present in any of the target token attributes. ~~Dict[str, float]~~ |
+
 ## AttributeRuler.to_disk {#to_disk tag="method"}

 Serialize the pipe to disk.
@ -177,8 +198,8 @@ Serialize the pipe to disk.
 > #### Example
 >
 > ```python
-> attribute_ruler = nlp.add_pipe("attribute_ruler")
-> attribute_ruler.to_disk("/path/to/attribute_ruler")
+> ruler = nlp.add_pipe("attribute_ruler")
+> ruler.to_disk("/path/to/attribute_ruler")
 > ```

 | Name           | Description                                                                                                                                |
@ -194,8 +215,8 @@ Load the pipe from disk. Modifies the object in place and returns it.
 > #### Example
 >
 > ```python
-> attribute_ruler = nlp.add_pipe("attribute_ruler")
-> attribute_ruler.from_disk("/path/to/attribute_ruler")
+> ruler = nlp.add_pipe("attribute_ruler")
+> ruler.from_disk("/path/to/attribute_ruler")
 > ```

 | Name           | Description                                                                                     |
@ -210,8 +231,8 @@ Load the pipe from disk. Modifies the object in place and returns it.
 > #### Example
 >
 > ```python
-> attribute_ruler = nlp.add_pipe("attribute_ruler")
-> attribute_ruler_bytes = attribute_ruler.to_bytes()
+> ruler = nlp.add_pipe("attribute_ruler")
+> ruler = ruler.to_bytes()
 > ```

 Serialize the pipe to a bytestring.
@ -229,9 +250,9 @@ Load the pipe from a bytestring. Modifies the object in place and returns it.
 > #### Example
 >
 > ```python
-> attribute_ruler_bytes = attribute_ruler.to_bytes()
-> attribute_ruler = nlp.add_pipe("attribute_ruler")
-> attribute_ruler.from_bytes(attribute_ruler_bytes)
+> ruler_bytes = ruler.to_bytes()
+> ruler = nlp.add_pipe("attribute_ruler")
+> ruler.from_bytes(ruler_bytes)
 > ```

 | Name           | Description                                                                                 |
@ -250,12 +271,12 @@ serialization by passing in the string names via the `exclude` argument.
 > #### Example
 >
 > ```python
-> data = attribute_ruler.to_disk("/path", exclude=["vocab"])
+> data = ruler.to_disk("/path", exclude=["vocab"])
 > ```

-| Name       | Description                                                    |
-| ---------- | -------------------------------------------------------------- |
-| `vocab`    | The shared [`Vocab`](/api/vocab).                              |
-| `patterns` | The `Matcher` patterns. You usually don't want to exclude this.  |
-| `attrs`    | The attributes to set. You usually don't want to exclude this. |
-| `indices`  | The token indices. You usually don't want to exclude this.     |
+| Name       | Description                                                     |
+| ---------- | --------------------------------------------------------------- |
+| `vocab`    | The shared [`Vocab`](/api/vocab).                               |
+| `patterns` | The `Matcher` patterns. You usually don't want to exclude this. |
+| `attrs`    | The attributes to set. You usually don't want to exclude this.  |
+| `indices`  | The token indices. You usually don't want to exclude this.      |
--- a/website/docs/usage/linguistic-features.md
+++ b/website/docs/usage/linguistic-features.md
@ -1801,17 +1801,7 @@ print(doc2[5].tag_, doc2[5].pos_)  # WP PRON

 <Infobox variant="warning" title="Migrating from spaCy v2.x">

-For easy migration from from spaCy v2 to v3, the
-[`AttributeRuler`](/api/attributeruler) can import a **tag map and morph rules**
-in the v2 format with the methods
-[`load_from_tag_map`](/api/attributeruler#load_from_tag_map) and
-[`load_from_morph_rules`](/api/attributeruler#load_from_morph_rules).
-
-```diff
-nlp = spacy.blank("en")
-+ ruler = nlp.add_pipe("attribute_ruler")
-+ ruler.load_from_tag_map(YOUR_TAG_MAP)
-```
+The [`AttributeRuler`](/api/attributeruler) can import a **tag map and morph rules** in the v2.x format via its built-in methods or when the component is initialized before training. See the [migration guide](/usage/v3#migrating-training-mappings-exceptions) for details.

 </Infobox>

--- a/website/docs/usage/v3.md
+++ b/website/docs/usage/v3.md
@ -804,8 +804,30 @@ nlp = spacy.blank("en")
 Instead of defining a `tag_map` and `morph_rules` in the language data, spaCy
 v3.0 now manages mappings and exceptions with a separate and more flexible
 pipeline component, the [`AttributeRuler`](/api/attributeruler). See the
-[usage guide](/usage/linguistic-features#mappings-exceptions) for examples. The
-`AttributeRuler` provides two handy helper methods
+[usage guide](/usage/linguistic-features#mappings-exceptions) for examples. If
+you have tag maps and morph rules in the v2.x format, you can load them into the
+attribute ruler before training using the `[initialize]` block of your config.
+
+> #### What does the initialization do?
+>
+> The `[initialize]` block is used when
+> [`nlp.initialize`](/api/language#initialize) is called (usually right before
+> training). It lets you define data resources for initializing the pipeline in
+> your `config.cfg`. After training, the rules are saved to disk with the
+> exported pipeline, so your runtime model doesn't depend on local data. For
+> details see the [config lifecycle](/usage/training/#config-lifecycle) and
+> [initialization](/usage/training/#initialization) docs.
+
+```ini
+### config.cfg (excerpt)
+[initialize.components.attribute_ruler]
+
+[initialize.components.attribute_ruler.tag_map]
+@readers = "srsly.read_json.v1"
+path = "./corpus/tag_map.json"
+```
+
+The `AttributeRuler` also provides two handy helper methods
 [`load_from_tag_map`](/api/attributeruler#load_from_tag_map) and
 [`load_from_morph_rules`](/api/attributeruler#load_from_morph_rules) that let
 you load in your existing tag map or morph rules: